xref: /qemu/tests/qtest/migration/precopy-tests.c (revision 70ce076fa6dff60585c229a4b641b13e64bf03cf)
1 /*
2  * QTest testcase for precopy migration
3  *
4  * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates
5  *   based on the vhost-user-test.c that is:
6  *      Copyright (c) 2014 Virtual Open Systems Sarl.
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  *
11  */
12 
13 #include "qemu/osdep.h"
14 #include "chardev/char.h"
15 #include "crypto/tlscredspsk.h"
16 #include "libqtest.h"
17 #include "migration/bootfile.h"
18 #include "migration/framework.h"
19 #include "migration/migration-qmp.h"
20 #include "migration/migration-util.h"
21 #include "ppc-util.h"
22 #include "qobject/qlist.h"
23 #include "qapi-types-migration.h"
24 #include "qemu/module.h"
25 #include "qemu/option.h"
26 #include "qemu/range.h"
27 #include "qemu/sockets.h"
28 
29 
30 /*
31  * Dirtylimit stop working if dirty page rate error
32  * value less than DIRTYLIMIT_TOLERANCE_RANGE
33  */
34 #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
35 
36 static char *tmpfs;
37 
38 static void test_precopy_unix_plain(void)
39 {
40     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
41     MigrateCommon args = {
42         .listen_uri = uri,
43         .connect_uri = uri,
44         /*
45          * The simplest use case of precopy, covering smoke tests of
46          * get-dirty-log dirty tracking.
47          */
48         .live = true,
49     };
50 
51     test_precopy_common(&args);
52 }
53 
54 static void test_precopy_unix_suspend_live(void)
55 {
56     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
57     MigrateCommon args = {
58         .listen_uri = uri,
59         .connect_uri = uri,
60         /*
61          * despite being live, the test is fast because the src
62          * suspends immediately.
63          */
64         .live = true,
65         .start.suspend_me = true,
66     };
67 
68     test_precopy_common(&args);
69 }
70 
71 static void test_precopy_unix_suspend_notlive(void)
72 {
73     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
74     MigrateCommon args = {
75         .listen_uri = uri,
76         .connect_uri = uri,
77         .start.suspend_me = true,
78     };
79 
80     test_precopy_common(&args);
81 }
82 
83 static void test_precopy_unix_dirty_ring(void)
84 {
85     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
86     MigrateCommon args = {
87         .start = {
88             .use_dirty_ring = true,
89         },
90         .listen_uri = uri,
91         .connect_uri = uri,
92         /*
93          * Besides the precopy/unix basic test, cover dirty ring interface
94          * rather than get-dirty-log.
95          */
96         .live = true,
97     };
98 
99     test_precopy_common(&args);
100 }
101 
102 static void test_precopy_tcp_plain(void)
103 {
104     MigrateCommon args = {
105         .listen_uri = "tcp:127.0.0.1:0",
106     };
107 
108     test_precopy_common(&args);
109 }
110 
111 static void *migrate_hook_start_switchover_ack(QTestState *from, QTestState *to)
112 {
113 
114     migrate_set_capability(from, "return-path", true);
115     migrate_set_capability(to, "return-path", true);
116 
117     migrate_set_capability(from, "switchover-ack", true);
118     migrate_set_capability(to, "switchover-ack", true);
119 
120     return NULL;
121 }
122 
123 static void test_precopy_tcp_switchover_ack(void)
124 {
125     MigrateCommon args = {
126         .listen_uri = "tcp:127.0.0.1:0",
127         .start_hook = migrate_hook_start_switchover_ack,
128         /*
129          * Source VM must be running in order to consider the switchover ACK
130          * when deciding to do switchover or not.
131          */
132         .live = true,
133     };
134 
135     test_precopy_common(&args);
136 }
137 
138 #ifndef _WIN32
139 static void *migrate_hook_start_fd(QTestState *from,
140                                    QTestState *to)
141 {
142     int ret;
143     int pair[2];
144 
145     /* Create two connected sockets for migration */
146     ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
147     g_assert_cmpint(ret, ==, 0);
148 
149     /* Send the 1st socket to the target */
150     qtest_qmp_fds_assert_success(to, &pair[0], 1,
151                                  "{ 'execute': 'getfd',"
152                                  "  'arguments': { 'fdname': 'fd-mig' }}");
153     close(pair[0]);
154 
155     /* Start incoming migration from the 1st socket */
156     migrate_incoming_qmp(to, "fd:fd-mig", NULL, "{}");
157 
158     /* Send the 2nd socket to the target */
159     qtest_qmp_fds_assert_success(from, &pair[1], 1,
160                                  "{ 'execute': 'getfd',"
161                                  "  'arguments': { 'fdname': 'fd-mig' }}");
162     close(pair[1]);
163 
164     return NULL;
165 }
166 
167 static void migrate_hook_end_fd(QTestState *from,
168                                 QTestState *to,
169                                 void *opaque)
170 {
171     QDict *rsp;
172     const char *error_desc;
173 
174     /* Test closing fds */
175     /*
176      * We assume, that QEMU removes named fd from its list,
177      * so this should fail.
178      */
179     rsp = qtest_qmp(from,
180                     "{ 'execute': 'closefd',"
181                     "  'arguments': { 'fdname': 'fd-mig' }}");
182     g_assert_true(qdict_haskey(rsp, "error"));
183     error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
184     g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
185     qobject_unref(rsp);
186 
187     rsp = qtest_qmp(to,
188                     "{ 'execute': 'closefd',"
189                     "  'arguments': { 'fdname': 'fd-mig' }}");
190     g_assert_true(qdict_haskey(rsp, "error"));
191     error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
192     g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
193     qobject_unref(rsp);
194 }
195 
196 static void test_precopy_fd_socket(void)
197 {
198     MigrateCommon args = {
199         .listen_uri = "defer",
200         .connect_uri = "fd:fd-mig",
201         .start_hook = migrate_hook_start_fd,
202         .end_hook = migrate_hook_end_fd,
203     };
204     test_precopy_common(&args);
205 }
206 
207 static void *migrate_hook_start_precopy_fd_file(QTestState *from,
208                                                 QTestState *to)
209 {
210     g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
211     int src_flags = O_CREAT | O_RDWR;
212     int dst_flags = O_CREAT | O_RDWR;
213     int fds[2];
214 
215     fds[0] = open(file, src_flags, 0660);
216     assert(fds[0] != -1);
217 
218     fds[1] = open(file, dst_flags, 0660);
219     assert(fds[1] != -1);
220 
221 
222     qtest_qmp_fds_assert_success(to, &fds[0], 1,
223                                  "{ 'execute': 'getfd',"
224                                  "  'arguments': { 'fdname': 'fd-mig' }}");
225 
226     qtest_qmp_fds_assert_success(from, &fds[1], 1,
227                                  "{ 'execute': 'getfd',"
228                                  "  'arguments': { 'fdname': 'fd-mig' }}");
229 
230     close(fds[0]);
231     close(fds[1]);
232 
233     return NULL;
234 }
235 
236 static void test_precopy_fd_file(void)
237 {
238     MigrateCommon args = {
239         .listen_uri = "defer",
240         .connect_uri = "fd:fd-mig",
241         .start_hook = migrate_hook_start_precopy_fd_file,
242         .end_hook = migrate_hook_end_fd,
243     };
244     test_file_common(&args, true);
245 }
246 #endif /* _WIN32 */
247 
248 /*
249  * The way auto_converge works, we need to do too many passes to
250  * run this test.  Auto_converge logic is only run once every
251  * three iterations, so:
252  *
253  * - 3 iterations without auto_converge enabled
254  * - 3 iterations with pct = 5
255  * - 3 iterations with pct = 30
256  * - 3 iterations with pct = 55
257  * - 3 iterations with pct = 80
258  * - 3 iterations with pct = 95 (max(95, 80 + 25))
259  *
260  * To make things even worse, we need to run the initial stage at
261  * 3MB/s so we enter autoconverge even when host is (over)loaded.
262  */
263 static void test_auto_converge(void)
264 {
265     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
266     MigrateStart args = {};
267     QTestState *from, *to;
268     int64_t percentage;
269 
270     /*
271      * We want the test to be stable and as fast as possible.
272      * E.g., with 1Gb/s bandwidth migration may pass without throttling,
273      * so we need to decrease a bandwidth.
274      */
275     const int64_t init_pct = 5, inc_pct = 25, max_pct = 95;
276     uint64_t prev_dirty_sync_cnt, dirty_sync_cnt;
277     int max_try_count, hit = 0;
278 
279     if (migrate_start(&from, &to, uri, &args)) {
280         return;
281     }
282 
283     migrate_set_capability(from, "auto-converge", true);
284     migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct);
285     migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct);
286     migrate_set_parameter_int(from, "max-cpu-throttle", max_pct);
287 
288     /*
289      * Set the initial parameters so that the migration could not converge
290      * without throttling.
291      */
292     migrate_ensure_non_converge(from);
293 
294     /* To check remaining size after precopy */
295     migrate_set_capability(from, "pause-before-switchover", true);
296 
297     /* Wait for the first serial output from the source */
298     wait_for_serial("src_serial");
299 
300     migrate_qmp(from, to, uri, NULL, "{}");
301 
302     /* Wait for throttling begins */
303     percentage = 0;
304     do {
305         percentage = read_migrate_property_int(from, "cpu-throttle-percentage");
306         if (percentage != 0) {
307             break;
308         }
309         usleep(20);
310         g_assert_false(get_src()->stop_seen);
311     } while (true);
312     /* The first percentage of throttling should be at least init_pct */
313     g_assert_cmpint(percentage, >=, init_pct);
314 
315     /*
316      * End the loop when the dirty sync count greater than 1.
317      */
318     while ((dirty_sync_cnt = get_migration_pass(from)) < 2) {
319         usleep(1000 * 1000);
320     }
321 
322     prev_dirty_sync_cnt = dirty_sync_cnt;
323 
324     /*
325      * The RAMBlock dirty sync count must changes in 5 seconds, here we set
326      * the timeout to 10 seconds to ensure it changes.
327      *
328      * Note that migrate_ensure_non_converge set the max-bandwidth to 3MB/s,
329      * while the qtest mem is >= 100MB, one iteration takes at least 33s (100/3)
330      * to complete; this ensures that the RAMBlock dirty sync occurs.
331      */
332     max_try_count = 10;
333     while (--max_try_count) {
334         dirty_sync_cnt = get_migration_pass(from);
335         if (dirty_sync_cnt != prev_dirty_sync_cnt) {
336             hit = 1;
337             break;
338         }
339         prev_dirty_sync_cnt = dirty_sync_cnt;
340         sleep(1);
341     }
342     g_assert_cmpint(hit, ==, 1);
343 
344     /* Now, when we tested that throttling works, let it converge */
345     migrate_ensure_converge(from);
346 
347     /*
348      * Wait for pre-switchover status to check last throttle percentage
349      * and remaining. These values will be zeroed later
350      */
351     wait_for_migration_status(from, "pre-switchover", NULL);
352 
353     /* The final percentage of throttling shouldn't be greater than max_pct */
354     percentage = read_migrate_property_int(from, "cpu-throttle-percentage");
355     g_assert_cmpint(percentage, <=, max_pct);
356     migrate_continue(from, "pre-switchover");
357 
358     qtest_qmp_eventwait(to, "RESUME");
359 
360     wait_for_serial("dest_serial");
361     wait_for_migration_complete(from);
362 
363     migrate_end(from, to, true);
364 }
365 
366 static void *
367 migrate_hook_start_precopy_tcp_multifd(QTestState *from,
368                                        QTestState *to)
369 {
370     return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none");
371 }
372 
373 static void *
374 migrate_hook_start_precopy_tcp_multifd_zero_page_legacy(QTestState *from,
375                                                         QTestState *to)
376 {
377     migrate_hook_start_precopy_tcp_multifd_common(from, to, "none");
378     migrate_set_parameter_str(from, "zero-page-detection", "legacy");
379     return NULL;
380 }
381 
382 static void *
383 migrate_hook_start_precopy_tcp_multifd_no_zero_page(QTestState *from,
384                                                     QTestState *to)
385 {
386     migrate_hook_start_precopy_tcp_multifd_common(from, to, "none");
387     migrate_set_parameter_str(from, "zero-page-detection", "none");
388     return NULL;
389 }
390 
391 static void test_multifd_tcp_uri_none(void)
392 {
393     MigrateCommon args = {
394         .listen_uri = "defer",
395         .start_hook = migrate_hook_start_precopy_tcp_multifd,
396         /*
397          * Multifd is more complicated than most of the features, it
398          * directly takes guest page buffers when sending, make sure
399          * everything will work alright even if guest page is changing.
400          */
401         .live = true,
402     };
403     test_precopy_common(&args);
404 }
405 
406 static void test_multifd_tcp_zero_page_legacy(void)
407 {
408     MigrateCommon args = {
409         .listen_uri = "defer",
410         .start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy,
411         /*
412          * Multifd is more complicated than most of the features, it
413          * directly takes guest page buffers when sending, make sure
414          * everything will work alright even if guest page is changing.
415          */
416         .live = true,
417     };
418     test_precopy_common(&args);
419 }
420 
421 static void test_multifd_tcp_no_zero_page(void)
422 {
423     MigrateCommon args = {
424         .listen_uri = "defer",
425         .start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page,
426         /*
427          * Multifd is more complicated than most of the features, it
428          * directly takes guest page buffers when sending, make sure
429          * everything will work alright even if guest page is changing.
430          */
431         .live = true,
432     };
433     test_precopy_common(&args);
434 }
435 
436 static void test_multifd_tcp_channels_none(void)
437 {
438     MigrateCommon args = {
439         .listen_uri = "defer",
440         .start_hook = migrate_hook_start_precopy_tcp_multifd,
441         .live = true,
442         .connect_channels = ("[ { 'channel-type': 'main',"
443                              "    'addr': { 'transport': 'socket',"
444                              "              'type': 'inet',"
445                              "              'host': '127.0.0.1',"
446                              "              'port': '0' } } ]"),
447     };
448     test_precopy_common(&args);
449 }
450 
451 /*
452  * This test does:
453  *  source               target
454  *                       migrate_incoming
455  *     migrate
456  *     migrate_cancel
457  *                       launch another target
458  *     migrate
459  *
460  *  And see that it works
461  */
462 static void test_multifd_tcp_cancel(void)
463 {
464     MigrateStart args = {
465         .hide_stderr = true,
466     };
467     QTestState *from, *to, *to2;
468 
469     if (migrate_start(&from, &to, "defer", &args)) {
470         return;
471     }
472 
473     migrate_ensure_non_converge(from);
474     migrate_prepare_for_dirty_mem(from);
475 
476     migrate_set_parameter_int(from, "multifd-channels", 16);
477     migrate_set_parameter_int(to, "multifd-channels", 16);
478 
479     migrate_set_capability(from, "multifd", true);
480     migrate_set_capability(to, "multifd", true);
481 
482     /* Start incoming migration from the 1st socket */
483     migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}");
484 
485     /* Wait for the first serial output from the source */
486     wait_for_serial("src_serial");
487 
488     migrate_qmp(from, to, NULL, NULL, "{}");
489 
490     migrate_wait_for_dirty_mem(from, to);
491 
492     migrate_cancel(from);
493 
494     /* Make sure QEMU process "to" exited */
495     qtest_set_expected_status(to, EXIT_FAILURE);
496     qtest_wait_qemu(to);
497     qtest_quit(to);
498 
499     /*
500      * Ensure the source QEMU finishes its cancellation process before we
501      * proceed with the setup of the next migration. The migrate_start()
502      * function and others might want to interact with the source in a way that
503      * is not possible while the migration is not canceled properly. For
504      * example, setting migration capabilities when the migration is still
505      * running leads to an error.
506      */
507     wait_for_migration_status(from, "cancelled", NULL);
508 
509     args = (MigrateStart){
510         .only_target = true,
511     };
512 
513     if (migrate_start(&from, &to2, "defer", &args)) {
514         return;
515     }
516 
517     migrate_set_parameter_int(to2, "multifd-channels", 16);
518 
519     migrate_set_capability(to2, "multifd", true);
520 
521     /* Start incoming migration from the 1st socket */
522     migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", NULL, "{}");
523 
524     migrate_ensure_non_converge(from);
525 
526     migrate_qmp(from, to2, NULL, NULL, "{}");
527 
528     migrate_wait_for_dirty_mem(from, to2);
529 
530     migrate_ensure_converge(from);
531 
532     wait_for_stop(from, get_src());
533     qtest_qmp_eventwait(to2, "RESUME");
534 
535     wait_for_serial("dest_serial");
536     wait_for_migration_complete(from);
537     migrate_end(from, to2, true);
538 }
539 
540 static void test_cancel_src_after_failed(QTestState *from, QTestState *to,
541                                          const char *uri, const char *phase)
542 {
543     /*
544      * No migrate_incoming_qmp() at the start to force source into
545      * failed state during migrate_qmp().
546      */
547 
548     wait_for_serial("src_serial");
549     migrate_ensure_converge(from);
550 
551     migrate_qmp(from, to, uri, NULL, "{}");
552 
553     migration_event_wait(from, phase);
554     migrate_cancel(from);
555 
556     /* cancelling will not move the migration out of 'failed' */
557 
558     wait_for_migration_status(from, "failed",
559                               (const char * []) { "completed", NULL });
560 
561     /*
562      * Not waiting for the destination because it never started
563      * migration.
564      */
565 }
566 
567 static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to,
568                                             const char *uri, const char *phase)
569 {
570     migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
571 
572     wait_for_serial("src_serial");
573     migrate_ensure_converge(from);
574 
575     migrate_qmp(from, to, uri, NULL, "{}");
576 
577     /* To move to cancelled/cancelling */
578     migrate_cancel(from);
579     migration_event_wait(from, phase);
580 
581     /* The migrate_cancel under test */
582     migrate_cancel(from);
583 
584     wait_for_migration_status(from, "cancelled",
585                               (const char * []) { "completed", NULL });
586 
587     wait_for_migration_status(to, "failed",
588                               (const char * []) { "completed", NULL });
589 }
590 
591 static void test_cancel_src_after_complete(QTestState *from, QTestState *to,
592                                            const char *uri, const char *phase)
593 {
594     migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
595 
596     wait_for_serial("src_serial");
597     migrate_ensure_converge(from);
598 
599     migrate_qmp(from, to, uri, NULL, "{}");
600 
601     migration_event_wait(from, phase);
602     migrate_cancel(from);
603 
604     /*
605      * qmp_migrate_cancel() exits early if migration is not running
606      * anymore, the status will not change to cancelled.
607      */
608     wait_for_migration_complete(from);
609     wait_for_migration_complete(to);
610 }
611 
612 static void test_cancel_src_after_none(QTestState *from, QTestState *to,
613                                        const char *uri, const char *phase)
614 {
615     /*
616      * Test that cancelling without a migration happening does not
617      * affect subsequent migrations
618      */
619     migrate_cancel(to);
620 
621     wait_for_serial("src_serial");
622     migrate_cancel(from);
623 
624     migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
625 
626     migrate_ensure_converge(from);
627     migrate_qmp(from, to, uri, NULL, "{}");
628 
629     wait_for_migration_complete(from);
630     wait_for_migration_complete(to);
631 }
632 
633 static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to,
634                                            const char *uri, const char *phase)
635 {
636     migrate_set_capability(from, "pause-before-switchover", true);
637     migrate_set_capability(to, "pause-before-switchover", true);
638 
639     migrate_set_capability(from, "multifd", true);
640     migrate_set_capability(to, "multifd", true);
641 
642     migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
643 
644     wait_for_serial("src_serial");
645     migrate_ensure_converge(from);
646 
647     migrate_qmp(from, to, uri, NULL, "{}");
648 
649     migration_event_wait(from, phase);
650     migrate_cancel(from);
651     migration_event_wait(from, "cancelling");
652 
653     wait_for_migration_status(from, "cancelled",
654                               (const char * []) { "completed", NULL });
655 
656     wait_for_migration_status(to, "failed",
657                               (const char * []) { "completed", NULL });
658 }
659 
660 static void test_cancel_src_after_status(void *opaque)
661 {
662     const char *test_path = opaque;
663     g_autofree char *phase = g_path_get_basename(test_path);
664     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
665     QTestState *from, *to;
666     MigrateStart args = {
667         .hide_stderr = true,
668     };
669 
670     if (migrate_start(&from, &to, "defer", &args)) {
671         return;
672     }
673 
674     if (g_str_equal(phase, "cancelling") ||
675         g_str_equal(phase, "cancelled")) {
676         test_cancel_src_after_cancelled(from, to, uri, phase);
677 
678     } else if (g_str_equal(phase, "completed")) {
679         test_cancel_src_after_complete(from, to, uri, phase);
680 
681     } else if (g_str_equal(phase, "failed")) {
682         test_cancel_src_after_failed(from, to, uri, phase);
683 
684     } else if (g_str_equal(phase, "none")) {
685         test_cancel_src_after_none(from, to, uri, phase);
686 
687     } else {
688         /* any state that comes before pre-switchover */
689         test_cancel_src_pre_switchover(from, to, uri, phase);
690     }
691 
692     migrate_end(from, to, false);
693 }
694 
695 static void calc_dirty_rate(QTestState *who, uint64_t calc_time)
696 {
697     qtest_qmp_assert_success(who,
698                              "{ 'execute': 'calc-dirty-rate',"
699                              "'arguments': { "
700                              "'calc-time': %" PRIu64 ","
701                              "'mode': 'dirty-ring' }}",
702                              calc_time);
703 }
704 
705 static QDict *query_dirty_rate(QTestState *who)
706 {
707     return qtest_qmp_assert_success_ref(who,
708                                         "{ 'execute': 'query-dirty-rate' }");
709 }
710 
711 static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate)
712 {
713     qtest_qmp_assert_success(who,
714                              "{ 'execute': 'set-vcpu-dirty-limit',"
715                              "'arguments': { "
716                              "'dirty-rate': %" PRIu64 " } }",
717                              dirtyrate);
718 }
719 
720 static void cancel_vcpu_dirty_limit(QTestState *who)
721 {
722     qtest_qmp_assert_success(who,
723                              "{ 'execute': 'cancel-vcpu-dirty-limit' }");
724 }
725 
726 static QDict *query_vcpu_dirty_limit(QTestState *who)
727 {
728     QDict *rsp;
729 
730     rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }");
731     g_assert(!qdict_haskey(rsp, "error"));
732     g_assert(qdict_haskey(rsp, "return"));
733 
734     return rsp;
735 }
736 
737 static bool calc_dirtyrate_ready(QTestState *who)
738 {
739     QDict *rsp_return;
740     const char *status;
741     bool ready;
742 
743     rsp_return = query_dirty_rate(who);
744     g_assert(rsp_return);
745 
746     status = qdict_get_str(rsp_return, "status");
747     g_assert(status);
748     ready = g_strcmp0(status, "measuring");
749     qobject_unref(rsp_return);
750 
751     return ready;
752 }
753 
754 static void wait_for_calc_dirtyrate_complete(QTestState *who,
755                                              int64_t time_s)
756 {
757     int max_try_count = 10000;
758     usleep(time_s * 1000000);
759 
760     while (!calc_dirtyrate_ready(who) && max_try_count--) {
761         usleep(1000);
762     }
763 
764     /*
765      * Set the timeout with 10 s(max_try_count * 1000us),
766      * if dirtyrate measurement not complete, fail test.
767      */
768     g_assert_cmpint(max_try_count, !=, 0);
769 }
770 
771 static int64_t get_dirty_rate(QTestState *who)
772 {
773     QDict *rsp_return;
774     const char *status;
775     QList *rates;
776     const QListEntry *entry;
777     QDict *rate;
778     int64_t dirtyrate;
779 
780     rsp_return = query_dirty_rate(who);
781     g_assert(rsp_return);
782 
783     status = qdict_get_str(rsp_return, "status");
784     g_assert(status);
785     g_assert_cmpstr(status, ==, "measured");
786 
787     rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate");
788     g_assert(rates && !qlist_empty(rates));
789 
790     entry = qlist_first(rates);
791     g_assert(entry);
792 
793     rate = qobject_to(QDict, qlist_entry_obj(entry));
794     g_assert(rate);
795 
796     dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1);
797 
798     qobject_unref(rsp_return);
799     return dirtyrate;
800 }
801 
802 static int64_t get_limit_rate(QTestState *who)
803 {
804     QDict *rsp_return;
805     QList *rates;
806     const QListEntry *entry;
807     QDict *rate;
808     int64_t dirtyrate;
809 
810     rsp_return = query_vcpu_dirty_limit(who);
811     g_assert(rsp_return);
812 
813     rates = qdict_get_qlist(rsp_return, "return");
814     g_assert(rates && !qlist_empty(rates));
815 
816     entry = qlist_first(rates);
817     g_assert(entry);
818 
819     rate = qobject_to(QDict, qlist_entry_obj(entry));
820     g_assert(rate);
821 
822     dirtyrate = qdict_get_try_int(rate, "limit-rate", -1);
823 
824     qobject_unref(rsp_return);
825     return dirtyrate;
826 }
827 
828 static QTestState *dirtylimit_start_vm(void)
829 {
830     QTestState *vm = NULL;
831     g_autofree gchar *cmd = NULL;
832     const char *bootpath;
833 
834     bootpath = bootfile_create(qtest_get_arch(), tmpfs, false);
835     cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 "
836                           "-name dirtylimit-test,debug-threads=on "
837                           "-m 150M -smp 1 "
838                           "-serial file:%s/vm_serial "
839                           "-drive file=%s,format=raw ",
840                           tmpfs, bootpath);
841 
842     vm = qtest_init(cmd);
843     return vm;
844 }
845 
846 static void dirtylimit_stop_vm(QTestState *vm)
847 {
848     g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, "vm_serial");
849 
850     qtest_quit(vm);
851     unlink(path);
852 }
853 
854 static void test_vcpu_dirty_limit(void)
855 {
856     QTestState *vm;
857     int64_t origin_rate;
858     int64_t quota_rate;
859     int64_t rate ;
860     int max_try_count = 20;
861     int hit = 0;
862 
863     /* Start vm for vcpu dirtylimit test */
864     vm = dirtylimit_start_vm();
865 
866     /* Wait for the first serial output from the vm*/
867     wait_for_serial("vm_serial");
868 
869     /* Do dirtyrate measurement with calc time equals 1s */
870     calc_dirty_rate(vm, 1);
871 
872     /* Sleep calc time and wait for calc dirtyrate complete */
873     wait_for_calc_dirtyrate_complete(vm, 1);
874 
875     /* Query original dirty page rate */
876     origin_rate = get_dirty_rate(vm);
877 
878     /* VM booted from bootsect should dirty memory steadily */
879     assert(origin_rate != 0);
880 
881     /* Setup quota dirty page rate at half of origin */
882     quota_rate = origin_rate / 2;
883 
884     /* Set dirtylimit */
885     dirtylimit_set_all(vm, quota_rate);
886 
887     /*
888      * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit
889      * works literally
890      */
891     g_assert_cmpint(quota_rate, ==, get_limit_rate(vm));
892 
893     /* Sleep a bit to check if it take effect */
894     usleep(2000000);
895 
896     /*
897      * Check if dirtylimit take effect realistically, set the
898      * timeout with 20 s(max_try_count * 1s), if dirtylimit
899      * doesn't take effect, fail test.
900      */
901     while (--max_try_count) {
902         calc_dirty_rate(vm, 1);
903         wait_for_calc_dirtyrate_complete(vm, 1);
904         rate = get_dirty_rate(vm);
905 
906         /*
907          * Assume hitting if current rate is less
908          * than quota rate (within accepting error)
909          */
910         if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
911             hit = 1;
912             break;
913         }
914     }
915 
916     g_assert_cmpint(hit, ==, 1);
917 
918     hit = 0;
919     max_try_count = 20;
920 
921     /* Check if dirtylimit cancellation take effect */
922     cancel_vcpu_dirty_limit(vm);
923     while (--max_try_count) {
924         calc_dirty_rate(vm, 1);
925         wait_for_calc_dirtyrate_complete(vm, 1);
926         rate = get_dirty_rate(vm);
927 
928         /*
929          * Assume dirtylimit be canceled if current rate is
930          * greater than quota rate (within accepting error)
931          */
932         if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
933             hit = 1;
934             break;
935         }
936     }
937 
938     g_assert_cmpint(hit, ==, 1);
939     dirtylimit_stop_vm(vm);
940 }
941 
942 static void migrate_dirty_limit_wait_showup(QTestState *from,
943                                             const int64_t period,
944                                             const int64_t value)
945 {
946     /* Enable dirty limit capability */
947     migrate_set_capability(from, "dirty-limit", true);
948 
949     /* Set dirty limit parameters */
950     migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period);
951     migrate_set_parameter_int(from, "vcpu-dirty-limit", value);
952 
953     /* Make sure migrate can't converge */
954     migrate_ensure_non_converge(from);
955 
956     /* To check limit rate after precopy */
957     migrate_set_capability(from, "pause-before-switchover", true);
958 
959     /* Wait for the serial output from the source */
960     wait_for_serial("src_serial");
961 }
962 
963 /*
964  * This test does:
965  *  source                          destination
966  *  start vm
967  *                                  start incoming vm
968  *  migrate
969  *  wait dirty limit to begin
970  *  cancel migrate
971  *  cancellation check
972  *                                  restart incoming vm
973  *  migrate
974  *  wait dirty limit to begin
975  *  wait pre-switchover event
976  *  convergence condition check
977  *
978  * And see if dirty limit migration works correctly.
979  * This test case involves many passes, so it runs in slow mode only.
980  */
981 static void test_dirty_limit(void)
982 {
983     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
984     QTestState *from, *to;
985     int64_t remaining;
986     uint64_t throttle_us_per_full;
987     /*
988      * We want the test to be stable and as fast as possible.
989      * E.g., with 1Gb/s bandwidth migration may pass without dirty limit,
990      * so we need to decrease a bandwidth.
991      */
992     const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
993     const int64_t max_bandwidth = 400000000; /* ~400Mb/s */
994     const int64_t downtime_limit = 250; /* 250ms */
995     /*
996      * We migrate through unix-socket (> 500Mb/s).
997      * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
998      * So, we can predict expected_threshold
999      */
1000     const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
1001     int max_try_count = 10;
1002     MigrateCommon args = {
1003         .start = {
1004             .hide_stderr = true,
1005             .use_dirty_ring = true,
1006         },
1007         .listen_uri = uri,
1008         .connect_uri = uri,
1009     };
1010 
1011     /* Start src, dst vm */
1012     if (migrate_start(&from, &to, args.listen_uri, &args.start)) {
1013         return;
1014     }
1015 
1016     /* Prepare for dirty limit migration and wait src vm show up */
1017     migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value);
1018 
1019     /* Start migrate */
1020     migrate_qmp(from, to, args.connect_uri, NULL, "{}");
1021 
1022     /* Wait for dirty limit throttle begin */
1023     throttle_us_per_full = 0;
1024     while (throttle_us_per_full == 0) {
1025         throttle_us_per_full =
1026             read_migrate_property_int(from,
1027                                       "dirty-limit-throttle-time-per-round");
1028         usleep(100);
1029         g_assert_false(get_src()->stop_seen);
1030     }
1031 
1032     /* Now cancel migrate and wait for dirty limit throttle switch off */
1033     migrate_cancel(from);
1034     wait_for_migration_status(from, "cancelled", NULL);
1035 
1036     /* destination always fails after cancel */
1037     migration_event_wait(to, "failed");
1038     qtest_set_expected_status(to, EXIT_FAILURE);
1039     qtest_quit(to);
1040 
1041     /* Check if dirty limit throttle switched off, set timeout 1ms */
1042     do {
1043         throttle_us_per_full =
1044             read_migrate_property_int(from,
1045                                       "dirty-limit-throttle-time-per-round");
1046         usleep(100);
1047         g_assert_false(get_src()->stop_seen);
1048     } while (throttle_us_per_full != 0 && --max_try_count);
1049 
1050     /* Assert dirty limit is not in service */
1051     g_assert_cmpint(throttle_us_per_full, ==, 0);
1052 
1053     args = (MigrateCommon) {
1054         .start = {
1055             .only_target = true,
1056             .use_dirty_ring = true,
1057         },
1058         .listen_uri = uri,
1059         .connect_uri = uri,
1060     };
1061 
1062     /* Restart dst vm, src vm already show up so we needn't wait anymore */
1063     if (migrate_start(&from, &to, args.listen_uri, &args.start)) {
1064         return;
1065     }
1066 
1067     /* Start migrate */
1068     migrate_qmp(from, to, args.connect_uri, NULL, "{}");
1069 
1070     /* Wait for dirty limit throttle begin */
1071     throttle_us_per_full = 0;
1072     while (throttle_us_per_full == 0) {
1073         throttle_us_per_full =
1074             read_migrate_property_int(from,
1075                                       "dirty-limit-throttle-time-per-round");
1076         usleep(100);
1077         g_assert_false(get_src()->stop_seen);
1078     }
1079 
1080     /*
1081      * The dirty limit rate should equals the return value of
1082      * query-vcpu-dirty-limit if dirty limit cap set
1083      */
1084     g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from));
1085 
1086     /* Now, we have tested if dirty limit works, let it converge */
1087     migrate_set_parameter_int(from, "downtime-limit", downtime_limit);
1088     migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth);
1089 
1090     /*
1091      * Wait for pre-switchover status to check if migration
1092      * satisfy the convergence condition
1093      */
1094     wait_for_migration_status(from, "pre-switchover", NULL);
1095 
1096     remaining = read_ram_property_int(from, "remaining");
1097     g_assert_cmpint(remaining, <,
1098                     (expected_threshold + expected_threshold / 100));
1099 
1100     migrate_continue(from, "pre-switchover");
1101 
1102     qtest_qmp_eventwait(to, "RESUME");
1103 
1104     wait_for_serial("dest_serial");
1105     wait_for_migration_complete(from);
1106 
1107     migrate_end(from, to, true);
1108 }
1109 
1110 static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
1111 {
1112     if (env->is_x86) {
1113         migration_test_add("/migration/precopy/unix/suspend/live",
1114                            test_precopy_unix_suspend_live);
1115         migration_test_add("/migration/precopy/unix/suspend/notlive",
1116                            test_precopy_unix_suspend_notlive);
1117     }
1118 
1119     migration_test_add("/migration/precopy/unix/plain",
1120                        test_precopy_unix_plain);
1121 
1122     migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain);
1123     migration_test_add("/migration/multifd/tcp/uri/plain/none",
1124                        test_multifd_tcp_uri_none);
1125     migration_test_add("/migration/multifd/tcp/plain/cancel",
1126                        test_multifd_tcp_cancel);
1127 }
1128 
1129 void migration_test_add_precopy(MigrationTestEnv *env)
1130 {
1131     tmpfs = env->tmpfs;
1132 
1133     migration_test_add_precopy_smoke(env);
1134 
1135     if (!env->full_set) {
1136         return;
1137     }
1138 
1139     migration_test_add("/migration/precopy/tcp/plain/switchover-ack",
1140                        test_precopy_tcp_switchover_ack);
1141 
1142 #ifndef _WIN32
1143     migration_test_add("/migration/precopy/fd/tcp",
1144                        test_precopy_fd_socket);
1145     migration_test_add("/migration/precopy/fd/file",
1146                        test_precopy_fd_file);
1147 #endif
1148 
1149     /*
1150      * See explanation why this test is slow on function definition
1151      */
1152     if (g_test_slow()) {
1153         migration_test_add("/migration/auto_converge",
1154                            test_auto_converge);
1155         if (g_str_equal(env->arch, "x86_64") &&
1156             env->has_kvm && env->has_dirty_ring) {
1157             migration_test_add("/dirty_limit",
1158                                test_dirty_limit);
1159         }
1160     }
1161     migration_test_add("/migration/multifd/tcp/channels/plain/none",
1162                        test_multifd_tcp_channels_none);
1163     migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy",
1164                        test_multifd_tcp_zero_page_legacy);
1165     migration_test_add("/migration/multifd/tcp/plain/zero-page/none",
1166                        test_multifd_tcp_no_zero_page);
1167     if (g_str_equal(env->arch, "x86_64")
1168         && env->has_kvm && env->has_dirty_ring) {
1169 
1170         migration_test_add("/migration/dirty_ring",
1171                            test_precopy_unix_dirty_ring);
1172         if (qtest_has_machine("pc") && g_test_slow()) {
1173             migration_test_add("/migration/vcpu_dirty_limit",
1174                                test_vcpu_dirty_limit);
1175         }
1176     }
1177 
1178     /* ensure new status don't go unnoticed */
1179     assert(MIGRATION_STATUS__MAX == 15);
1180 
1181     for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) {
1182         switch (i) {
1183         case MIGRATION_STATUS_DEVICE: /* happens too fast */
1184         case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */
1185         case MIGRATION_STATUS_COLO: /* no support in tests */
1186         case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */
1187         case MIGRATION_STATUS_POSTCOPY_PAUSED:
1188         case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1189         case MIGRATION_STATUS_POSTCOPY_RECOVER:
1190             continue;
1191         default:
1192             migration_test_add_suffix("/migration/cancel/src/after/",
1193                                       MigrationStatus_str(i),
1194                                       test_cancel_src_after_status);
1195         }
1196     }
1197 }
1198