xref: /qemu/tests/qtest/migration/precopy-tests.c (revision 7d9849c3c41463ab9ba40348a8606927dc0fb85d)
1 /*
2  * QTest testcase for precopy migration
3  *
4  * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates
5  *   based on the vhost-user-test.c that is:
6  *      Copyright (c) 2014 Virtual Open Systems Sarl.
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  *
11  */
12 
13 #include "qemu/osdep.h"
14 #include "chardev/char.h"
15 #include "crypto/tlscredspsk.h"
16 #include "libqtest.h"
17 #include "migration/bootfile.h"
18 #include "migration/framework.h"
19 #include "migration/migration-qmp.h"
20 #include "migration/migration-util.h"
21 #include "ppc-util.h"
22 #include "qobject/qlist.h"
23 #include "qapi-types-migration.h"
24 #include "qemu/module.h"
25 #include "qemu/option.h"
26 #include "qemu/range.h"
27 #include "qemu/sockets.h"
28 
29 
30 /*
31  * Dirtylimit stop working if dirty page rate error
32  * value less than DIRTYLIMIT_TOLERANCE_RANGE
33  */
34 #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
35 
36 static char *tmpfs;
37 
38 static void test_precopy_unix_plain(void)
39 {
40     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
41     MigrateCommon args = {
42         .listen_uri = uri,
43         .connect_uri = uri,
44         /*
45          * The simplest use case of precopy, covering smoke tests of
46          * get-dirty-log dirty tracking.
47          */
48         .live = true,
49     };
50 
51     test_precopy_common(&args);
52 }
53 
54 static void test_precopy_unix_suspend_live(void)
55 {
56     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
57     MigrateCommon args = {
58         .listen_uri = uri,
59         .connect_uri = uri,
60         /*
61          * despite being live, the test is fast because the src
62          * suspends immediately.
63          */
64         .live = true,
65         .start.suspend_me = true,
66     };
67 
68     test_precopy_common(&args);
69 }
70 
71 static void test_precopy_unix_suspend_notlive(void)
72 {
73     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
74     MigrateCommon args = {
75         .listen_uri = uri,
76         .connect_uri = uri,
77         .start.suspend_me = true,
78     };
79 
80     test_precopy_common(&args);
81 }
82 
83 static void test_precopy_unix_dirty_ring(void)
84 {
85     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
86     MigrateCommon args = {
87         .start = {
88             .use_dirty_ring = true,
89         },
90         .listen_uri = uri,
91         .connect_uri = uri,
92         /*
93          * Besides the precopy/unix basic test, cover dirty ring interface
94          * rather than get-dirty-log.
95          */
96         .live = true,
97     };
98 
99     test_precopy_common(&args);
100 }
101 
102 #ifdef CONFIG_RDMA
103 
104 #define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
105 static int new_rdma_link(char *buffer)
106 {
107     char cmd[256];
108     bool verbose = g_getenv("QTEST_LOG");
109 
110     snprintf(cmd, sizeof(cmd), "%s detect %s", RDMA_MIGRATION_HELPER,
111              verbose ? "" : "2>/dev/null");
112 
113     FILE *pipe = popen(cmd, "r");
114     if (pipe == NULL) {
115         perror("Failed to run script");
116         return -1;
117     }
118 
119     int idx = 0;
120     while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
121         idx += strlen(buffer);
122     }
123 
124     int status = pclose(pipe);
125     if (status == -1) {
126         perror("Error reported by pclose()");
127         return -1;
128     } else if (WIFEXITED(status)) {
129         return WEXITSTATUS(status);
130     }
131 
132     return -1;
133 }
134 
135 static void test_precopy_rdma_plain(void)
136 {
137     char buffer[128] = {};
138 
139     if (new_rdma_link(buffer)) {
140         g_test_skip("No rdma link available\n"
141                     "# To enable the test:\n"
142                     "# Run \'" RDMA_MIGRATION_HELPER " setup\' with root to "
143                     "setup a new rdma/rxe link and rerun the test\n"
144                     "# Optional: run 'scripts/rdma-migration-helper.sh clean' "
145                     "to revert the 'setup'");
146         return;
147     }
148 
149     /*
150      * TODO: query a free port instead of hard code.
151      * 29200=('R'+'D'+'M'+'A')*100
152      **/
153     g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer);
154 
155     MigrateCommon args = {
156         .listen_uri = uri,
157         .connect_uri = uri,
158     };
159 
160     test_precopy_common(&args);
161 }
162 #endif
163 
164 static void test_precopy_tcp_plain(void)
165 {
166     MigrateCommon args = {
167         .listen_uri = "tcp:127.0.0.1:0",
168     };
169 
170     test_precopy_common(&args);
171 }
172 
173 static void *migrate_hook_start_switchover_ack(QTestState *from, QTestState *to)
174 {
175 
176     migrate_set_capability(from, "return-path", true);
177     migrate_set_capability(to, "return-path", true);
178 
179     migrate_set_capability(from, "switchover-ack", true);
180     migrate_set_capability(to, "switchover-ack", true);
181 
182     return NULL;
183 }
184 
185 static void test_precopy_tcp_switchover_ack(void)
186 {
187     MigrateCommon args = {
188         .listen_uri = "tcp:127.0.0.1:0",
189         .start_hook = migrate_hook_start_switchover_ack,
190         /*
191          * Source VM must be running in order to consider the switchover ACK
192          * when deciding to do switchover or not.
193          */
194         .live = true,
195     };
196 
197     test_precopy_common(&args);
198 }
199 
200 #ifndef _WIN32
201 static void *migrate_hook_start_fd(QTestState *from,
202                                    QTestState *to)
203 {
204     int ret;
205     int pair[2];
206 
207     /* Create two connected sockets for migration */
208     ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
209     g_assert_cmpint(ret, ==, 0);
210 
211     /* Send the 1st socket to the target */
212     qtest_qmp_fds_assert_success(to, &pair[0], 1,
213                                  "{ 'execute': 'getfd',"
214                                  "  'arguments': { 'fdname': 'fd-mig' }}");
215     close(pair[0]);
216 
217     /* Start incoming migration from the 1st socket */
218     migrate_incoming_qmp(to, "fd:fd-mig", NULL, "{}");
219 
220     /* Send the 2nd socket to the target */
221     qtest_qmp_fds_assert_success(from, &pair[1], 1,
222                                  "{ 'execute': 'getfd',"
223                                  "  'arguments': { 'fdname': 'fd-mig' }}");
224     close(pair[1]);
225 
226     return NULL;
227 }
228 
229 static void migrate_hook_end_fd(QTestState *from,
230                                 QTestState *to,
231                                 void *opaque)
232 {
233     QDict *rsp;
234     const char *error_desc;
235 
236     /* Test closing fds */
237     /*
238      * We assume, that QEMU removes named fd from its list,
239      * so this should fail.
240      */
241     rsp = qtest_qmp(from,
242                     "{ 'execute': 'closefd',"
243                     "  'arguments': { 'fdname': 'fd-mig' }}");
244     g_assert_true(qdict_haskey(rsp, "error"));
245     error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
246     g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
247     qobject_unref(rsp);
248 
249     rsp = qtest_qmp(to,
250                     "{ 'execute': 'closefd',"
251                     "  'arguments': { 'fdname': 'fd-mig' }}");
252     g_assert_true(qdict_haskey(rsp, "error"));
253     error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc");
254     g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found");
255     qobject_unref(rsp);
256 }
257 
258 static void test_precopy_fd_socket(void)
259 {
260     MigrateCommon args = {
261         .listen_uri = "defer",
262         .connect_uri = "fd:fd-mig",
263         .start_hook = migrate_hook_start_fd,
264         .end_hook = migrate_hook_end_fd,
265     };
266     test_precopy_common(&args);
267 }
268 
269 static void *migrate_hook_start_precopy_fd_file(QTestState *from,
270                                                 QTestState *to)
271 {
272     g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME);
273     int src_flags = O_CREAT | O_RDWR;
274     int dst_flags = O_CREAT | O_RDWR;
275     int fds[2];
276 
277     fds[0] = open(file, src_flags, 0660);
278     assert(fds[0] != -1);
279 
280     fds[1] = open(file, dst_flags, 0660);
281     assert(fds[1] != -1);
282 
283 
284     qtest_qmp_fds_assert_success(to, &fds[0], 1,
285                                  "{ 'execute': 'getfd',"
286                                  "  'arguments': { 'fdname': 'fd-mig' }}");
287 
288     qtest_qmp_fds_assert_success(from, &fds[1], 1,
289                                  "{ 'execute': 'getfd',"
290                                  "  'arguments': { 'fdname': 'fd-mig' }}");
291 
292     close(fds[0]);
293     close(fds[1]);
294 
295     return NULL;
296 }
297 
298 static void test_precopy_fd_file(void)
299 {
300     MigrateCommon args = {
301         .listen_uri = "defer",
302         .connect_uri = "fd:fd-mig",
303         .start_hook = migrate_hook_start_precopy_fd_file,
304         .end_hook = migrate_hook_end_fd,
305     };
306     test_file_common(&args, true);
307 }
308 #endif /* _WIN32 */
309 
310 /*
311  * The way auto_converge works, we need to do too many passes to
312  * run this test.  Auto_converge logic is only run once every
313  * three iterations, so:
314  *
315  * - 3 iterations without auto_converge enabled
316  * - 3 iterations with pct = 5
317  * - 3 iterations with pct = 30
318  * - 3 iterations with pct = 55
319  * - 3 iterations with pct = 80
320  * - 3 iterations with pct = 95 (max(95, 80 + 25))
321  *
322  * To make things even worse, we need to run the initial stage at
323  * 3MB/s so we enter autoconverge even when host is (over)loaded.
324  */
325 static void test_auto_converge(void)
326 {
327     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
328     MigrateStart args = {};
329     QTestState *from, *to;
330     int64_t percentage;
331 
332     /*
333      * We want the test to be stable and as fast as possible.
334      * E.g., with 1Gb/s bandwidth migration may pass without throttling,
335      * so we need to decrease a bandwidth.
336      */
337     const int64_t init_pct = 5, inc_pct = 25, max_pct = 95;
338     uint64_t prev_dirty_sync_cnt, dirty_sync_cnt;
339     int max_try_count, hit = 0;
340 
341     if (migrate_start(&from, &to, uri, &args)) {
342         return;
343     }
344 
345     migrate_set_capability(from, "auto-converge", true);
346     migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct);
347     migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct);
348     migrate_set_parameter_int(from, "max-cpu-throttle", max_pct);
349 
350     /*
351      * Set the initial parameters so that the migration could not converge
352      * without throttling.
353      */
354     migrate_ensure_non_converge(from);
355 
356     /* To check remaining size after precopy */
357     migrate_set_capability(from, "pause-before-switchover", true);
358 
359     /* Wait for the first serial output from the source */
360     wait_for_serial("src_serial");
361 
362     migrate_qmp(from, to, uri, NULL, "{}");
363 
364     /* Wait for throttling begins */
365     percentage = 0;
366     do {
367         percentage = read_migrate_property_int(from, "cpu-throttle-percentage");
368         if (percentage != 0) {
369             break;
370         }
371         usleep(20);
372         g_assert_false(get_src()->stop_seen);
373     } while (true);
374     /* The first percentage of throttling should be at least init_pct */
375     g_assert_cmpint(percentage, >=, init_pct);
376 
377     /*
378      * End the loop when the dirty sync count greater than 1.
379      */
380     while ((dirty_sync_cnt = get_migration_pass(from)) < 2) {
381         usleep(1000 * 1000);
382     }
383 
384     prev_dirty_sync_cnt = dirty_sync_cnt;
385 
386     /*
387      * The RAMBlock dirty sync count must changes in 5 seconds, here we set
388      * the timeout to 10 seconds to ensure it changes.
389      *
390      * Note that migrate_ensure_non_converge set the max-bandwidth to 3MB/s,
391      * while the qtest mem is >= 100MB, one iteration takes at least 33s (100/3)
392      * to complete; this ensures that the RAMBlock dirty sync occurs.
393      */
394     max_try_count = 10;
395     while (--max_try_count) {
396         dirty_sync_cnt = get_migration_pass(from);
397         if (dirty_sync_cnt != prev_dirty_sync_cnt) {
398             hit = 1;
399             break;
400         }
401         prev_dirty_sync_cnt = dirty_sync_cnt;
402         sleep(1);
403     }
404     g_assert_cmpint(hit, ==, 1);
405 
406     /* Now, when we tested that throttling works, let it converge */
407     migrate_ensure_converge(from);
408 
409     /*
410      * Wait for pre-switchover status to check last throttle percentage
411      * and remaining. These values will be zeroed later
412      */
413     wait_for_migration_status(from, "pre-switchover", NULL);
414 
415     /* The final percentage of throttling shouldn't be greater than max_pct */
416     percentage = read_migrate_property_int(from, "cpu-throttle-percentage");
417     g_assert_cmpint(percentage, <=, max_pct);
418     migrate_continue(from, "pre-switchover");
419 
420     qtest_qmp_eventwait(to, "RESUME");
421 
422     wait_for_serial("dest_serial");
423     wait_for_migration_complete(from);
424 
425     migrate_end(from, to, true);
426 }
427 
428 static void *
429 migrate_hook_start_precopy_tcp_multifd(QTestState *from,
430                                        QTestState *to)
431 {
432     return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none");
433 }
434 
435 static void *
436 migrate_hook_start_precopy_tcp_multifd_zero_page_legacy(QTestState *from,
437                                                         QTestState *to)
438 {
439     migrate_hook_start_precopy_tcp_multifd_common(from, to, "none");
440     migrate_set_parameter_str(from, "zero-page-detection", "legacy");
441     return NULL;
442 }
443 
444 static void *
445 migrate_hook_start_precopy_tcp_multifd_no_zero_page(QTestState *from,
446                                                     QTestState *to)
447 {
448     migrate_hook_start_precopy_tcp_multifd_common(from, to, "none");
449     migrate_set_parameter_str(from, "zero-page-detection", "none");
450     return NULL;
451 }
452 
453 static void test_multifd_tcp_uri_none(void)
454 {
455     MigrateCommon args = {
456         .listen_uri = "defer",
457         .start_hook = migrate_hook_start_precopy_tcp_multifd,
458         /*
459          * Multifd is more complicated than most of the features, it
460          * directly takes guest page buffers when sending, make sure
461          * everything will work alright even if guest page is changing.
462          */
463         .live = true,
464     };
465     test_precopy_common(&args);
466 }
467 
468 static void test_multifd_tcp_zero_page_legacy(void)
469 {
470     MigrateCommon args = {
471         .listen_uri = "defer",
472         .start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy,
473         /*
474          * Multifd is more complicated than most of the features, it
475          * directly takes guest page buffers when sending, make sure
476          * everything will work alright even if guest page is changing.
477          */
478         .live = true,
479     };
480     test_precopy_common(&args);
481 }
482 
483 static void test_multifd_tcp_no_zero_page(void)
484 {
485     MigrateCommon args = {
486         .listen_uri = "defer",
487         .start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page,
488         /*
489          * Multifd is more complicated than most of the features, it
490          * directly takes guest page buffers when sending, make sure
491          * everything will work alright even if guest page is changing.
492          */
493         .live = true,
494     };
495     test_precopy_common(&args);
496 }
497 
498 static void test_multifd_tcp_channels_none(void)
499 {
500     MigrateCommon args = {
501         .listen_uri = "defer",
502         .start_hook = migrate_hook_start_precopy_tcp_multifd,
503         .live = true,
504         .connect_channels = ("[ { 'channel-type': 'main',"
505                              "    'addr': { 'transport': 'socket',"
506                              "              'type': 'inet',"
507                              "              'host': '127.0.0.1',"
508                              "              'port': '0' } } ]"),
509     };
510     test_precopy_common(&args);
511 }
512 
513 /*
514  * This test does:
515  *  source               target
516  *                       migrate_incoming
517  *     migrate
518  *     migrate_cancel
519  *                       launch another target
520  *     migrate
521  *
522  *  And see that it works
523  */
524 static void test_multifd_tcp_cancel(void)
525 {
526     MigrateStart args = {
527         .hide_stderr = true,
528     };
529     QTestState *from, *to, *to2;
530 
531     if (migrate_start(&from, &to, "defer", &args)) {
532         return;
533     }
534 
535     migrate_ensure_non_converge(from);
536     migrate_prepare_for_dirty_mem(from);
537 
538     migrate_set_parameter_int(from, "multifd-channels", 16);
539     migrate_set_parameter_int(to, "multifd-channels", 16);
540 
541     migrate_set_capability(from, "multifd", true);
542     migrate_set_capability(to, "multifd", true);
543 
544     /* Start incoming migration from the 1st socket */
545     migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}");
546 
547     /* Wait for the first serial output from the source */
548     wait_for_serial("src_serial");
549 
550     migrate_qmp(from, to, NULL, NULL, "{}");
551 
552     migrate_wait_for_dirty_mem(from, to);
553 
554     migrate_cancel(from);
555 
556     /* Make sure QEMU process "to" exited */
557     qtest_set_expected_status(to, EXIT_FAILURE);
558     qtest_wait_qemu(to);
559     qtest_quit(to);
560 
561     /*
562      * Ensure the source QEMU finishes its cancellation process before we
563      * proceed with the setup of the next migration. The migrate_start()
564      * function and others might want to interact with the source in a way that
565      * is not possible while the migration is not canceled properly. For
566      * example, setting migration capabilities when the migration is still
567      * running leads to an error.
568      */
569     wait_for_migration_status(from, "cancelled", NULL);
570 
571     args = (MigrateStart){
572         .only_target = true,
573     };
574 
575     if (migrate_start(&from, &to2, "defer", &args)) {
576         return;
577     }
578 
579     migrate_set_parameter_int(to2, "multifd-channels", 16);
580 
581     migrate_set_capability(to2, "multifd", true);
582 
583     /* Start incoming migration from the 1st socket */
584     migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", NULL, "{}");
585 
586     migrate_ensure_non_converge(from);
587 
588     migrate_qmp(from, to2, NULL, NULL, "{}");
589 
590     migrate_wait_for_dirty_mem(from, to2);
591 
592     migrate_ensure_converge(from);
593 
594     wait_for_stop(from, get_src());
595     qtest_qmp_eventwait(to2, "RESUME");
596 
597     wait_for_serial("dest_serial");
598     wait_for_migration_complete(from);
599     migrate_end(from, to2, true);
600 }
601 
602 static void test_cancel_src_after_failed(QTestState *from, QTestState *to,
603                                          const char *uri, const char *phase)
604 {
605     /*
606      * No migrate_incoming_qmp() at the start to force source into
607      * failed state during migrate_qmp().
608      */
609 
610     wait_for_serial("src_serial");
611     migrate_ensure_converge(from);
612 
613     migrate_qmp(from, to, uri, NULL, "{}");
614 
615     migration_event_wait(from, phase);
616     migrate_cancel(from);
617 
618     /* cancelling will not move the migration out of 'failed' */
619 
620     wait_for_migration_status(from, "failed",
621                               (const char * []) { "completed", NULL });
622 
623     /*
624      * Not waiting for the destination because it never started
625      * migration.
626      */
627 }
628 
629 static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to,
630                                             const char *uri, const char *phase)
631 {
632     migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
633 
634     wait_for_serial("src_serial");
635     migrate_ensure_converge(from);
636 
637     migrate_qmp(from, to, uri, NULL, "{}");
638 
639     /* To move to cancelled/cancelling */
640     migrate_cancel(from);
641     migration_event_wait(from, phase);
642 
643     /* The migrate_cancel under test */
644     migrate_cancel(from);
645 
646     wait_for_migration_status(from, "cancelled",
647                               (const char * []) { "completed", NULL });
648 
649     wait_for_migration_status(to, "failed",
650                               (const char * []) { "completed", NULL });
651 }
652 
653 static void test_cancel_src_after_complete(QTestState *from, QTestState *to,
654                                            const char *uri, const char *phase)
655 {
656     migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
657 
658     wait_for_serial("src_serial");
659     migrate_ensure_converge(from);
660 
661     migrate_qmp(from, to, uri, NULL, "{}");
662 
663     migration_event_wait(from, phase);
664     migrate_cancel(from);
665 
666     /*
667      * qmp_migrate_cancel() exits early if migration is not running
668      * anymore, the status will not change to cancelled.
669      */
670     wait_for_migration_complete(from);
671     wait_for_migration_complete(to);
672 }
673 
674 static void test_cancel_src_after_none(QTestState *from, QTestState *to,
675                                        const char *uri, const char *phase)
676 {
677     /*
678      * Test that cancelling without a migration happening does not
679      * affect subsequent migrations
680      */
681     migrate_cancel(to);
682 
683     wait_for_serial("src_serial");
684     migrate_cancel(from);
685 
686     migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
687 
688     migrate_ensure_converge(from);
689     migrate_qmp(from, to, uri, NULL, "{}");
690 
691     wait_for_migration_complete(from);
692     wait_for_migration_complete(to);
693 }
694 
695 static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to,
696                                            const char *uri, const char *phase)
697 {
698     migrate_set_capability(from, "pause-before-switchover", true);
699     migrate_set_capability(to, "pause-before-switchover", true);
700 
701     migrate_set_capability(from, "multifd", true);
702     migrate_set_capability(to, "multifd", true);
703 
704     migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
705 
706     wait_for_serial("src_serial");
707     migrate_ensure_converge(from);
708 
709     migrate_qmp(from, to, uri, NULL, "{}");
710 
711     migration_event_wait(from, phase);
712     migrate_cancel(from);
713     migration_event_wait(from, "cancelling");
714 
715     wait_for_migration_status(from, "cancelled",
716                               (const char * []) { "completed", NULL });
717 
718     wait_for_migration_status(to, "failed",
719                               (const char * []) { "completed", NULL });
720 }
721 
722 static void test_cancel_src_after_status(void *opaque)
723 {
724     const char *test_path = opaque;
725     g_autofree char *phase = g_path_get_basename(test_path);
726     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
727     QTestState *from, *to;
728     MigrateStart args = {
729         .hide_stderr = true,
730     };
731 
732     if (migrate_start(&from, &to, "defer", &args)) {
733         return;
734     }
735 
736     if (g_str_equal(phase, "cancelling") ||
737         g_str_equal(phase, "cancelled")) {
738         test_cancel_src_after_cancelled(from, to, uri, phase);
739 
740     } else if (g_str_equal(phase, "completed")) {
741         test_cancel_src_after_complete(from, to, uri, phase);
742 
743     } else if (g_str_equal(phase, "failed")) {
744         test_cancel_src_after_failed(from, to, uri, phase);
745 
746     } else if (g_str_equal(phase, "none")) {
747         test_cancel_src_after_none(from, to, uri, phase);
748 
749     } else {
750         /* any state that comes before pre-switchover */
751         test_cancel_src_pre_switchover(from, to, uri, phase);
752     }
753 
754     migrate_end(from, to, false);
755 }
756 
757 static void calc_dirty_rate(QTestState *who, uint64_t calc_time)
758 {
759     qtest_qmp_assert_success(who,
760                              "{ 'execute': 'calc-dirty-rate',"
761                              "'arguments': { "
762                              "'calc-time': %" PRIu64 ","
763                              "'mode': 'dirty-ring' }}",
764                              calc_time);
765 }
766 
767 static QDict *query_dirty_rate(QTestState *who)
768 {
769     return qtest_qmp_assert_success_ref(who,
770                                         "{ 'execute': 'query-dirty-rate' }");
771 }
772 
773 static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate)
774 {
775     qtest_qmp_assert_success(who,
776                              "{ 'execute': 'set-vcpu-dirty-limit',"
777                              "'arguments': { "
778                              "'dirty-rate': %" PRIu64 " } }",
779                              dirtyrate);
780 }
781 
782 static void cancel_vcpu_dirty_limit(QTestState *who)
783 {
784     qtest_qmp_assert_success(who,
785                              "{ 'execute': 'cancel-vcpu-dirty-limit' }");
786 }
787 
788 static QDict *query_vcpu_dirty_limit(QTestState *who)
789 {
790     QDict *rsp;
791 
792     rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }");
793     g_assert(!qdict_haskey(rsp, "error"));
794     g_assert(qdict_haskey(rsp, "return"));
795 
796     return rsp;
797 }
798 
799 static bool calc_dirtyrate_ready(QTestState *who)
800 {
801     QDict *rsp_return;
802     const char *status;
803     bool ready;
804 
805     rsp_return = query_dirty_rate(who);
806     g_assert(rsp_return);
807 
808     status = qdict_get_str(rsp_return, "status");
809     g_assert(status);
810     ready = g_strcmp0(status, "measuring");
811     qobject_unref(rsp_return);
812 
813     return ready;
814 }
815 
816 static void wait_for_calc_dirtyrate_complete(QTestState *who,
817                                              int64_t time_s)
818 {
819     int max_try_count = 10000;
820     usleep(time_s * 1000000);
821 
822     while (!calc_dirtyrate_ready(who) && max_try_count--) {
823         usleep(1000);
824     }
825 
826     /*
827      * Set the timeout with 10 s(max_try_count * 1000us),
828      * if dirtyrate measurement not complete, fail test.
829      */
830     g_assert_cmpint(max_try_count, !=, 0);
831 }
832 
833 static int64_t get_dirty_rate(QTestState *who)
834 {
835     QDict *rsp_return;
836     const char *status;
837     QList *rates;
838     const QListEntry *entry;
839     QDict *rate;
840     int64_t dirtyrate;
841 
842     rsp_return = query_dirty_rate(who);
843     g_assert(rsp_return);
844 
845     status = qdict_get_str(rsp_return, "status");
846     g_assert(status);
847     g_assert_cmpstr(status, ==, "measured");
848 
849     rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate");
850     g_assert(rates && !qlist_empty(rates));
851 
852     entry = qlist_first(rates);
853     g_assert(entry);
854 
855     rate = qobject_to(QDict, qlist_entry_obj(entry));
856     g_assert(rate);
857 
858     dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1);
859 
860     qobject_unref(rsp_return);
861     return dirtyrate;
862 }
863 
864 static int64_t get_limit_rate(QTestState *who)
865 {
866     QDict *rsp_return;
867     QList *rates;
868     const QListEntry *entry;
869     QDict *rate;
870     int64_t dirtyrate;
871 
872     rsp_return = query_vcpu_dirty_limit(who);
873     g_assert(rsp_return);
874 
875     rates = qdict_get_qlist(rsp_return, "return");
876     g_assert(rates && !qlist_empty(rates));
877 
878     entry = qlist_first(rates);
879     g_assert(entry);
880 
881     rate = qobject_to(QDict, qlist_entry_obj(entry));
882     g_assert(rate);
883 
884     dirtyrate = qdict_get_try_int(rate, "limit-rate", -1);
885 
886     qobject_unref(rsp_return);
887     return dirtyrate;
888 }
889 
890 static QTestState *dirtylimit_start_vm(void)
891 {
892     QTestState *vm = NULL;
893     g_autofree gchar *cmd = NULL;
894     const char *bootpath;
895 
896     bootpath = bootfile_create(qtest_get_arch(), tmpfs, false);
897     cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 "
898                           "-name dirtylimit-test,debug-threads=on "
899                           "-m 150M -smp 1 "
900                           "-serial file:%s/vm_serial "
901                           "-drive file=%s,format=raw ",
902                           tmpfs, bootpath);
903 
904     vm = qtest_init(cmd);
905     return vm;
906 }
907 
908 static void dirtylimit_stop_vm(QTestState *vm)
909 {
910     g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, "vm_serial");
911 
912     qtest_quit(vm);
913     unlink(path);
914 }
915 
916 static void test_vcpu_dirty_limit(void)
917 {
918     QTestState *vm;
919     int64_t origin_rate;
920     int64_t quota_rate;
921     int64_t rate ;
922     int max_try_count = 20;
923     int hit = 0;
924 
925     /* Start vm for vcpu dirtylimit test */
926     vm = dirtylimit_start_vm();
927 
928     /* Wait for the first serial output from the vm*/
929     wait_for_serial("vm_serial");
930 
931     /* Do dirtyrate measurement with calc time equals 1s */
932     calc_dirty_rate(vm, 1);
933 
934     /* Sleep calc time and wait for calc dirtyrate complete */
935     wait_for_calc_dirtyrate_complete(vm, 1);
936 
937     /* Query original dirty page rate */
938     origin_rate = get_dirty_rate(vm);
939 
940     /* VM booted from bootsect should dirty memory steadily */
941     assert(origin_rate != 0);
942 
943     /* Setup quota dirty page rate at half of origin */
944     quota_rate = origin_rate / 2;
945 
946     /* Set dirtylimit */
947     dirtylimit_set_all(vm, quota_rate);
948 
949     /*
950      * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit
951      * works literally
952      */
953     g_assert_cmpint(quota_rate, ==, get_limit_rate(vm));
954 
955     /* Sleep a bit to check if it take effect */
956     usleep(2000000);
957 
958     /*
959      * Check if dirtylimit take effect realistically, set the
960      * timeout with 20 s(max_try_count * 1s), if dirtylimit
961      * doesn't take effect, fail test.
962      */
963     while (--max_try_count) {
964         calc_dirty_rate(vm, 1);
965         wait_for_calc_dirtyrate_complete(vm, 1);
966         rate = get_dirty_rate(vm);
967 
968         /*
969          * Assume hitting if current rate is less
970          * than quota rate (within accepting error)
971          */
972         if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
973             hit = 1;
974             break;
975         }
976     }
977 
978     g_assert_cmpint(hit, ==, 1);
979 
980     hit = 0;
981     max_try_count = 20;
982 
983     /* Check if dirtylimit cancellation take effect */
984     cancel_vcpu_dirty_limit(vm);
985     while (--max_try_count) {
986         calc_dirty_rate(vm, 1);
987         wait_for_calc_dirtyrate_complete(vm, 1);
988         rate = get_dirty_rate(vm);
989 
990         /*
991          * Assume dirtylimit be canceled if current rate is
992          * greater than quota rate (within accepting error)
993          */
994         if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
995             hit = 1;
996             break;
997         }
998     }
999 
1000     g_assert_cmpint(hit, ==, 1);
1001     dirtylimit_stop_vm(vm);
1002 }
1003 
1004 static void migrate_dirty_limit_wait_showup(QTestState *from,
1005                                             const int64_t period,
1006                                             const int64_t value)
1007 {
1008     /* Enable dirty limit capability */
1009     migrate_set_capability(from, "dirty-limit", true);
1010 
1011     /* Set dirty limit parameters */
1012     migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period);
1013     migrate_set_parameter_int(from, "vcpu-dirty-limit", value);
1014 
1015     /* Make sure migrate can't converge */
1016     migrate_ensure_non_converge(from);
1017 
1018     /* To check limit rate after precopy */
1019     migrate_set_capability(from, "pause-before-switchover", true);
1020 
1021     /* Wait for the serial output from the source */
1022     wait_for_serial("src_serial");
1023 }
1024 
1025 /*
1026  * This test does:
1027  *  source                          destination
1028  *  start vm
1029  *                                  start incoming vm
1030  *  migrate
1031  *  wait dirty limit to begin
1032  *  cancel migrate
1033  *  cancellation check
1034  *                                  restart incoming vm
1035  *  migrate
1036  *  wait dirty limit to begin
1037  *  wait pre-switchover event
1038  *  convergence condition check
1039  *
1040  * And see if dirty limit migration works correctly.
1041  * This test case involves many passes, so it runs in slow mode only.
1042  */
1043 static void test_dirty_limit(void)
1044 {
1045     g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
1046     QTestState *from, *to;
1047     int64_t remaining;
1048     uint64_t throttle_us_per_full;
1049     /*
1050      * We want the test to be stable and as fast as possible.
1051      * E.g., with 1Gb/s bandwidth migration may pass without dirty limit,
1052      * so we need to decrease a bandwidth.
1053      */
1054     const int64_t dirtylimit_period = 1000, dirtylimit_value = 50;
1055     const int64_t max_bandwidth = 400000000; /* ~400Mb/s */
1056     const int64_t downtime_limit = 250; /* 250ms */
1057     /*
1058      * We migrate through unix-socket (> 500Mb/s).
1059      * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s).
1060      * So, we can predict expected_threshold
1061      */
1062     const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000;
1063     int max_try_count = 10;
1064     MigrateCommon args = {
1065         .start = {
1066             .hide_stderr = true,
1067             .use_dirty_ring = true,
1068         },
1069         .listen_uri = uri,
1070         .connect_uri = uri,
1071     };
1072 
1073     /* Start src, dst vm */
1074     if (migrate_start(&from, &to, args.listen_uri, &args.start)) {
1075         return;
1076     }
1077 
1078     /* Prepare for dirty limit migration and wait src vm show up */
1079     migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value);
1080 
1081     /* Start migrate */
1082     migrate_qmp(from, to, args.connect_uri, NULL, "{}");
1083 
1084     /* Wait for dirty limit throttle begin */
1085     throttle_us_per_full = 0;
1086     while (throttle_us_per_full == 0) {
1087         throttle_us_per_full =
1088             read_migrate_property_int(from,
1089                                       "dirty-limit-throttle-time-per-round");
1090         usleep(100);
1091         g_assert_false(get_src()->stop_seen);
1092     }
1093 
1094     /* Now cancel migrate and wait for dirty limit throttle switch off */
1095     migrate_cancel(from);
1096     wait_for_migration_status(from, "cancelled", NULL);
1097 
1098     /* destination always fails after cancel */
1099     migration_event_wait(to, "failed");
1100     qtest_set_expected_status(to, EXIT_FAILURE);
1101     qtest_quit(to);
1102 
1103     /* Check if dirty limit throttle switched off, set timeout 1ms */
1104     do {
1105         throttle_us_per_full =
1106             read_migrate_property_int(from,
1107                                       "dirty-limit-throttle-time-per-round");
1108         usleep(100);
1109         g_assert_false(get_src()->stop_seen);
1110     } while (throttle_us_per_full != 0 && --max_try_count);
1111 
1112     /* Assert dirty limit is not in service */
1113     g_assert_cmpint(throttle_us_per_full, ==, 0);
1114 
1115     args = (MigrateCommon) {
1116         .start = {
1117             .only_target = true,
1118             .use_dirty_ring = true,
1119         },
1120         .listen_uri = uri,
1121         .connect_uri = uri,
1122     };
1123 
1124     /* Restart dst vm, src vm already show up so we needn't wait anymore */
1125     if (migrate_start(&from, &to, args.listen_uri, &args.start)) {
1126         return;
1127     }
1128 
1129     /* Start migrate */
1130     migrate_qmp(from, to, args.connect_uri, NULL, "{}");
1131 
1132     /* Wait for dirty limit throttle begin */
1133     throttle_us_per_full = 0;
1134     while (throttle_us_per_full == 0) {
1135         throttle_us_per_full =
1136             read_migrate_property_int(from,
1137                                       "dirty-limit-throttle-time-per-round");
1138         usleep(100);
1139         g_assert_false(get_src()->stop_seen);
1140     }
1141 
1142     /*
1143      * The dirty limit rate should equals the return value of
1144      * query-vcpu-dirty-limit if dirty limit cap set
1145      */
1146     g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from));
1147 
1148     /* Now, we have tested if dirty limit works, let it converge */
1149     migrate_set_parameter_int(from, "downtime-limit", downtime_limit);
1150     migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth);
1151 
1152     /*
1153      * Wait for pre-switchover status to check if migration
1154      * satisfy the convergence condition
1155      */
1156     wait_for_migration_status(from, "pre-switchover", NULL);
1157 
1158     remaining = read_ram_property_int(from, "remaining");
1159     g_assert_cmpint(remaining, <,
1160                     (expected_threshold + expected_threshold / 100));
1161 
1162     migrate_continue(from, "pre-switchover");
1163 
1164     qtest_qmp_eventwait(to, "RESUME");
1165 
1166     wait_for_serial("dest_serial");
1167     wait_for_migration_complete(from);
1168 
1169     migrate_end(from, to, true);
1170 }
1171 
1172 static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
1173 {
1174     if (env->is_x86) {
1175         migration_test_add("/migration/precopy/unix/suspend/live",
1176                            test_precopy_unix_suspend_live);
1177         migration_test_add("/migration/precopy/unix/suspend/notlive",
1178                            test_precopy_unix_suspend_notlive);
1179     }
1180 
1181     migration_test_add("/migration/precopy/unix/plain",
1182                        test_precopy_unix_plain);
1183 
1184     migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain);
1185     migration_test_add("/migration/multifd/tcp/uri/plain/none",
1186                        test_multifd_tcp_uri_none);
1187     migration_test_add("/migration/multifd/tcp/plain/cancel",
1188                        test_multifd_tcp_cancel);
1189 #ifdef CONFIG_RDMA
1190     migration_test_add("/migration/precopy/rdma/plain",
1191                        test_precopy_rdma_plain);
1192 #endif
1193 }
1194 
1195 void migration_test_add_precopy(MigrationTestEnv *env)
1196 {
1197     tmpfs = env->tmpfs;
1198 
1199     migration_test_add_precopy_smoke(env);
1200 
1201     if (!env->full_set) {
1202         return;
1203     }
1204 
1205     migration_test_add("/migration/precopy/tcp/plain/switchover-ack",
1206                        test_precopy_tcp_switchover_ack);
1207 
1208 #ifndef _WIN32
1209     migration_test_add("/migration/precopy/fd/tcp",
1210                        test_precopy_fd_socket);
1211     migration_test_add("/migration/precopy/fd/file",
1212                        test_precopy_fd_file);
1213 #endif
1214 
1215     /*
1216      * See explanation why this test is slow on function definition
1217      */
1218     if (g_test_slow()) {
1219         migration_test_add("/migration/auto_converge",
1220                            test_auto_converge);
1221         if (g_str_equal(env->arch, "x86_64") &&
1222             env->has_kvm && env->has_dirty_ring) {
1223             migration_test_add("/dirty_limit",
1224                                test_dirty_limit);
1225         }
1226     }
1227     migration_test_add("/migration/multifd/tcp/channels/plain/none",
1228                        test_multifd_tcp_channels_none);
1229     migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy",
1230                        test_multifd_tcp_zero_page_legacy);
1231     migration_test_add("/migration/multifd/tcp/plain/zero-page/none",
1232                        test_multifd_tcp_no_zero_page);
1233     if (g_str_equal(env->arch, "x86_64")
1234         && env->has_kvm && env->has_dirty_ring) {
1235 
1236         migration_test_add("/migration/dirty_ring",
1237                            test_precopy_unix_dirty_ring);
1238         if (qtest_has_machine("pc") && g_test_slow()) {
1239             migration_test_add("/migration/vcpu_dirty_limit",
1240                                test_vcpu_dirty_limit);
1241         }
1242     }
1243 
1244     /* ensure new status don't go unnoticed */
1245     assert(MIGRATION_STATUS__MAX == 15);
1246 
1247     for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) {
1248         switch (i) {
1249         case MIGRATION_STATUS_DEVICE: /* happens too fast */
1250         case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */
1251         case MIGRATION_STATUS_COLO: /* no support in tests */
1252         case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */
1253         case MIGRATION_STATUS_POSTCOPY_PAUSED:
1254         case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1255         case MIGRATION_STATUS_POSTCOPY_RECOVER:
1256             continue;
1257         default:
1258             migration_test_add_suffix("/migration/cancel/src/after/",
1259                                       MigrationStatus_str(i),
1260                                       test_cancel_src_after_status);
1261         }
1262     }
1263 }
1264