1 /* 2 * QTest testcase for precopy migration 3 * 4 * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates 5 * based on the vhost-user-test.c that is: 6 * Copyright (c) 2014 Virtual Open Systems Sarl. 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 * 11 */ 12 13 #include "qemu/osdep.h" 14 #include "chardev/char.h" 15 #include "crypto/tlscredspsk.h" 16 #include "libqtest.h" 17 #include "migration/bootfile.h" 18 #include "migration/framework.h" 19 #include "migration/migration-qmp.h" 20 #include "migration/migration-util.h" 21 #include "ppc-util.h" 22 #include "qobject/qlist.h" 23 #include "qapi-types-migration.h" 24 #include "qemu/module.h" 25 #include "qemu/option.h" 26 #include "qemu/range.h" 27 #include "qemu/sockets.h" 28 29 30 /* 31 * Dirtylimit stop working if dirty page rate error 32 * value less than DIRTYLIMIT_TOLERANCE_RANGE 33 */ 34 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 35 36 static char *tmpfs; 37 38 static void test_precopy_unix_plain(void) 39 { 40 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 41 MigrateCommon args = { 42 .listen_uri = uri, 43 .connect_uri = uri, 44 /* 45 * The simplest use case of precopy, covering smoke tests of 46 * get-dirty-log dirty tracking. 47 */ 48 .live = true, 49 }; 50 51 test_precopy_common(&args); 52 } 53 54 static void test_precopy_unix_suspend_live(void) 55 { 56 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 57 MigrateCommon args = { 58 .listen_uri = uri, 59 .connect_uri = uri, 60 /* 61 * despite being live, the test is fast because the src 62 * suspends immediately. 63 */ 64 .live = true, 65 .start.suspend_me = true, 66 }; 67 68 test_precopy_common(&args); 69 } 70 71 static void test_precopy_unix_suspend_notlive(void) 72 { 73 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 74 MigrateCommon args = { 75 .listen_uri = uri, 76 .connect_uri = uri, 77 .start.suspend_me = true, 78 }; 79 80 test_precopy_common(&args); 81 } 82 83 static void test_precopy_unix_dirty_ring(void) 84 { 85 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 86 MigrateCommon args = { 87 .start = { 88 .use_dirty_ring = true, 89 }, 90 .listen_uri = uri, 91 .connect_uri = uri, 92 /* 93 * Besides the precopy/unix basic test, cover dirty ring interface 94 * rather than get-dirty-log. 95 */ 96 .live = true, 97 }; 98 99 test_precopy_common(&args); 100 } 101 102 #ifdef CONFIG_RDMA 103 104 #include <sys/resource.h> 105 106 /* 107 * During migration over RDMA, it will try to pin portions of guest memory, 108 * typically exceeding 100MB in this test, while the remainder will be 109 * transmitted as compressed zero pages. 110 * 111 * REQUIRED_MEMLOCK_SZ indicates the minimal mlock size in the current context. 112 */ 113 #define REQUIRED_MEMLOCK_SZ (128 << 20) /* 128MB */ 114 115 /* check 'ulimit -l' */ 116 static bool mlock_check(void) 117 { 118 uid_t uid; 119 struct rlimit rlim; 120 121 uid = getuid(); 122 if (uid == 0) { 123 return true; 124 } 125 126 if (getrlimit(RLIMIT_MEMLOCK, &rlim) != 0) { 127 return false; 128 } 129 130 return rlim.rlim_cur >= REQUIRED_MEMLOCK_SZ; 131 } 132 133 #define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh" 134 static int new_rdma_link(char *buffer, bool ipv6) 135 { 136 char cmd[256]; 137 bool verbose = g_getenv("QTEST_LOG"); 138 139 snprintf(cmd, sizeof(cmd), "IP_FAMILY=%s %s detect %s", 140 ipv6 ? "ipv6" : "ipv4", RDMA_MIGRATION_HELPER, 141 verbose ? "" : "2>/dev/null"); 142 143 FILE *pipe = popen(cmd, "r"); 144 if (pipe == NULL) { 145 perror("Failed to run script"); 146 return -1; 147 } 148 149 int idx = 0; 150 while (fgets(buffer + idx, 128 - idx, pipe) != NULL) { 151 idx += strlen(buffer); 152 } 153 154 int status = pclose(pipe); 155 if (status == -1) { 156 perror("Error reported by pclose()"); 157 return -1; 158 } else if (WIFEXITED(status)) { 159 return WEXITSTATUS(status); 160 } 161 162 return -1; 163 } 164 165 static void __test_precopy_rdma_plain(bool ipv6) 166 { 167 char buffer[128] = {}; 168 169 if (!mlock_check()) { 170 g_test_skip("'ulimit -l' is too small, require >=128M"); 171 return; 172 } 173 174 if (new_rdma_link(buffer, ipv6)) { 175 g_test_skip("No rdma link available\n" 176 "# To enable the test:\n" 177 "# Run \'" RDMA_MIGRATION_HELPER " setup\' with root to " 178 "setup a new rdma/rxe link and rerun the test\n" 179 "# Optional: run 'scripts/rdma-migration-helper.sh clean' " 180 "to revert the 'setup'"); 181 return; 182 } 183 184 /* 185 * TODO: query a free port instead of hard code. 186 * 29200=('R'+'D'+'M'+'A')*100 187 **/ 188 g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer); 189 190 MigrateCommon args = { 191 .listen_uri = uri, 192 .connect_uri = uri, 193 }; 194 195 test_precopy_common(&args); 196 } 197 198 static void test_precopy_rdma_plain(void) 199 { 200 __test_precopy_rdma_plain(false); 201 } 202 203 static void test_precopy_rdma_plain_ipv6(void) 204 { 205 __test_precopy_rdma_plain(true); 206 } 207 #endif 208 209 static void test_precopy_tcp_plain(void) 210 { 211 MigrateCommon args = { 212 .listen_uri = "tcp:127.0.0.1:0", 213 }; 214 215 test_precopy_common(&args); 216 } 217 218 static void test_precopy_tcp_switchover_ack(void) 219 { 220 MigrateCommon args = { 221 .listen_uri = "tcp:127.0.0.1:0", 222 .start = { 223 .caps[MIGRATION_CAPABILITY_RETURN_PATH] = true, 224 .caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK] = true, 225 }, 226 /* 227 * Source VM must be running in order to consider the switchover ACK 228 * when deciding to do switchover or not. 229 */ 230 .live = true, 231 }; 232 233 test_precopy_common(&args); 234 } 235 236 #ifndef _WIN32 237 static void *migrate_hook_start_fd(QTestState *from, 238 QTestState *to) 239 { 240 int ret; 241 int pair[2]; 242 243 /* Create two connected sockets for migration */ 244 ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair); 245 g_assert_cmpint(ret, ==, 0); 246 247 /* Send the 1st socket to the target */ 248 qtest_qmp_fds_assert_success(to, &pair[0], 1, 249 "{ 'execute': 'getfd'," 250 " 'arguments': { 'fdname': 'fd-mig' }}"); 251 close(pair[0]); 252 253 /* Start incoming migration from the 1st socket */ 254 migrate_incoming_qmp(to, "fd:fd-mig", NULL, "{}"); 255 256 /* Send the 2nd socket to the target */ 257 qtest_qmp_fds_assert_success(from, &pair[1], 1, 258 "{ 'execute': 'getfd'," 259 " 'arguments': { 'fdname': 'fd-mig' }}"); 260 close(pair[1]); 261 262 return NULL; 263 } 264 265 static void migrate_hook_end_fd(QTestState *from, 266 QTestState *to, 267 void *opaque) 268 { 269 QDict *rsp; 270 const char *error_desc; 271 272 /* Test closing fds */ 273 /* 274 * We assume, that QEMU removes named fd from its list, 275 * so this should fail. 276 */ 277 rsp = qtest_qmp(from, 278 "{ 'execute': 'closefd'," 279 " 'arguments': { 'fdname': 'fd-mig' }}"); 280 g_assert_true(qdict_haskey(rsp, "error")); 281 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 282 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 283 qobject_unref(rsp); 284 285 rsp = qtest_qmp(to, 286 "{ 'execute': 'closefd'," 287 " 'arguments': { 'fdname': 'fd-mig' }}"); 288 g_assert_true(qdict_haskey(rsp, "error")); 289 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 290 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 291 qobject_unref(rsp); 292 } 293 294 static void test_precopy_fd_socket(void) 295 { 296 MigrateCommon args = { 297 .listen_uri = "defer", 298 .connect_uri = "fd:fd-mig", 299 .start_hook = migrate_hook_start_fd, 300 .end_hook = migrate_hook_end_fd, 301 }; 302 test_precopy_common(&args); 303 } 304 305 static void *migrate_hook_start_precopy_fd_file(QTestState *from, 306 QTestState *to) 307 { 308 g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME); 309 int src_flags = O_CREAT | O_RDWR; 310 int dst_flags = O_CREAT | O_RDWR; 311 int fds[2]; 312 313 fds[0] = open(file, src_flags, 0660); 314 assert(fds[0] != -1); 315 316 fds[1] = open(file, dst_flags, 0660); 317 assert(fds[1] != -1); 318 319 320 qtest_qmp_fds_assert_success(to, &fds[0], 1, 321 "{ 'execute': 'getfd'," 322 " 'arguments': { 'fdname': 'fd-mig' }}"); 323 324 qtest_qmp_fds_assert_success(from, &fds[1], 1, 325 "{ 'execute': 'getfd'," 326 " 'arguments': { 'fdname': 'fd-mig' }}"); 327 328 close(fds[0]); 329 close(fds[1]); 330 331 return NULL; 332 } 333 334 static void test_precopy_fd_file(void) 335 { 336 MigrateCommon args = { 337 .listen_uri = "defer", 338 .connect_uri = "fd:fd-mig", 339 .start_hook = migrate_hook_start_precopy_fd_file, 340 .end_hook = migrate_hook_end_fd, 341 }; 342 test_file_common(&args, true); 343 } 344 #endif /* _WIN32 */ 345 346 /* 347 * The way auto_converge works, we need to do too many passes to 348 * run this test. Auto_converge logic is only run once every 349 * three iterations, so: 350 * 351 * - 3 iterations without auto_converge enabled 352 * - 3 iterations with pct = 5 353 * - 3 iterations with pct = 30 354 * - 3 iterations with pct = 55 355 * - 3 iterations with pct = 80 356 * - 3 iterations with pct = 95 (max(95, 80 + 25)) 357 * 358 * To make things even worse, we need to run the initial stage at 359 * 3MB/s so we enter autoconverge even when host is (over)loaded. 360 */ 361 static void test_auto_converge(void) 362 { 363 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 364 MigrateStart args = {}; 365 QTestState *from, *to; 366 int64_t percentage; 367 368 /* 369 * We want the test to be stable and as fast as possible. 370 * E.g., with 1Gb/s bandwidth migration may pass without throttling, 371 * so we need to decrease a bandwidth. 372 */ 373 const int64_t init_pct = 5, inc_pct = 25, max_pct = 95; 374 uint64_t prev_dirty_sync_cnt, dirty_sync_cnt; 375 int max_try_count, hit = 0; 376 377 if (migrate_start(&from, &to, uri, &args)) { 378 return; 379 } 380 381 migrate_set_capability(from, "auto-converge", true); 382 migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct); 383 migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct); 384 migrate_set_parameter_int(from, "max-cpu-throttle", max_pct); 385 386 /* 387 * Set the initial parameters so that the migration could not converge 388 * without throttling. 389 */ 390 migrate_ensure_non_converge(from); 391 392 /* To check remaining size after precopy */ 393 migrate_set_capability(from, "pause-before-switchover", true); 394 395 /* Wait for the first serial output from the source */ 396 wait_for_serial("src_serial"); 397 398 migrate_qmp(from, to, uri, NULL, "{}"); 399 400 /* Wait for throttling begins */ 401 percentage = 0; 402 do { 403 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 404 if (percentage != 0) { 405 break; 406 } 407 usleep(20); 408 g_assert_false(get_src()->stop_seen); 409 } while (true); 410 /* The first percentage of throttling should be at least init_pct */ 411 g_assert_cmpint(percentage, >=, init_pct); 412 413 /* 414 * End the loop when the dirty sync count greater than 1. 415 */ 416 while ((dirty_sync_cnt = get_migration_pass(from)) < 2) { 417 usleep(1000 * 1000); 418 } 419 420 prev_dirty_sync_cnt = dirty_sync_cnt; 421 422 /* 423 * The RAMBlock dirty sync count must changes in 5 seconds, here we set 424 * the timeout to 10 seconds to ensure it changes. 425 * 426 * Note that migrate_ensure_non_converge set the max-bandwidth to 3MB/s, 427 * while the qtest mem is >= 100MB, one iteration takes at least 33s (100/3) 428 * to complete; this ensures that the RAMBlock dirty sync occurs. 429 */ 430 max_try_count = 10; 431 while (--max_try_count) { 432 dirty_sync_cnt = get_migration_pass(from); 433 if (dirty_sync_cnt != prev_dirty_sync_cnt) { 434 hit = 1; 435 break; 436 } 437 prev_dirty_sync_cnt = dirty_sync_cnt; 438 sleep(1); 439 } 440 g_assert_cmpint(hit, ==, 1); 441 442 /* Now, when we tested that throttling works, let it converge */ 443 migrate_ensure_converge(from); 444 445 /* 446 * Wait for pre-switchover status to check last throttle percentage 447 * and remaining. These values will be zeroed later 448 */ 449 wait_for_migration_status(from, "pre-switchover", NULL); 450 451 /* The final percentage of throttling shouldn't be greater than max_pct */ 452 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 453 g_assert_cmpint(percentage, <=, max_pct); 454 migrate_continue(from, "pre-switchover"); 455 456 qtest_qmp_eventwait(to, "RESUME"); 457 458 wait_for_serial("dest_serial"); 459 wait_for_migration_complete(from); 460 461 migrate_end(from, to, true); 462 } 463 464 static void * 465 migrate_hook_start_precopy_tcp_multifd(QTestState *from, 466 QTestState *to) 467 { 468 return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 469 } 470 471 static void * 472 migrate_hook_start_precopy_tcp_multifd_zero_page_legacy(QTestState *from, 473 QTestState *to) 474 { 475 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 476 migrate_set_parameter_str(from, "zero-page-detection", "legacy"); 477 return NULL; 478 } 479 480 static void * 481 migrate_hook_start_precopy_tcp_multifd_no_zero_page(QTestState *from, 482 QTestState *to) 483 { 484 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 485 migrate_set_parameter_str(from, "zero-page-detection", "none"); 486 return NULL; 487 } 488 489 static void test_multifd_tcp_uri_none(void) 490 { 491 MigrateCommon args = { 492 .listen_uri = "defer", 493 .start_hook = migrate_hook_start_precopy_tcp_multifd, 494 .start = { 495 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 496 }, 497 /* 498 * Multifd is more complicated than most of the features, it 499 * directly takes guest page buffers when sending, make sure 500 * everything will work alright even if guest page is changing. 501 */ 502 .live = true, 503 }; 504 test_precopy_common(&args); 505 } 506 507 static void test_multifd_tcp_zero_page_legacy(void) 508 { 509 MigrateCommon args = { 510 .listen_uri = "defer", 511 .start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy, 512 .start = { 513 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 514 }, 515 /* 516 * Multifd is more complicated than most of the features, it 517 * directly takes guest page buffers when sending, make sure 518 * everything will work alright even if guest page is changing. 519 */ 520 .live = true, 521 }; 522 test_precopy_common(&args); 523 } 524 525 static void test_multifd_tcp_no_zero_page(void) 526 { 527 MigrateCommon args = { 528 .listen_uri = "defer", 529 .start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page, 530 .start = { 531 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 532 }, 533 /* 534 * Multifd is more complicated than most of the features, it 535 * directly takes guest page buffers when sending, make sure 536 * everything will work alright even if guest page is changing. 537 */ 538 .live = true, 539 }; 540 test_precopy_common(&args); 541 } 542 543 static void test_multifd_tcp_channels_none(void) 544 { 545 MigrateCommon args = { 546 .listen_uri = "defer", 547 .start_hook = migrate_hook_start_precopy_tcp_multifd, 548 .live = true, 549 .start = { 550 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 551 }, 552 .connect_channels = ("[ { 'channel-type': 'main'," 553 " 'addr': { 'transport': 'socket'," 554 " 'type': 'inet'," 555 " 'host': '127.0.0.1'," 556 " 'port': '0' } } ]"), 557 }; 558 test_precopy_common(&args); 559 } 560 561 /* 562 * This test does: 563 * source target 564 * migrate_incoming 565 * migrate 566 * migrate_cancel 567 * launch another target 568 * migrate 569 * 570 * And see that it works 571 */ 572 static void test_multifd_tcp_cancel(bool postcopy_ram) 573 { 574 MigrateStart args = { 575 .hide_stderr = true, 576 }; 577 QTestState *from, *to, *to2; 578 579 if (migrate_start(&from, &to, "defer", &args)) { 580 return; 581 } 582 583 migrate_ensure_non_converge(from); 584 migrate_prepare_for_dirty_mem(from); 585 586 if (postcopy_ram) { 587 migrate_set_capability(from, "postcopy-ram", true); 588 migrate_set_capability(to, "postcopy-ram", true); 589 } 590 591 migrate_set_parameter_int(from, "multifd-channels", 16); 592 migrate_set_parameter_int(to, "multifd-channels", 16); 593 594 migrate_set_capability(from, "multifd", true); 595 migrate_set_capability(to, "multifd", true); 596 597 /* Start incoming migration from the 1st socket */ 598 migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}"); 599 600 /* Wait for the first serial output from the source */ 601 wait_for_serial("src_serial"); 602 603 migrate_qmp(from, to, NULL, NULL, "{}"); 604 605 migrate_wait_for_dirty_mem(from, to); 606 607 migrate_cancel(from); 608 609 /* Make sure QEMU process "to" exited */ 610 qtest_set_expected_status(to, EXIT_FAILURE); 611 qtest_wait_qemu(to); 612 qtest_quit(to); 613 614 /* 615 * Ensure the source QEMU finishes its cancellation process before we 616 * proceed with the setup of the next migration. The migrate_start() 617 * function and others might want to interact with the source in a way that 618 * is not possible while the migration is not canceled properly. For 619 * example, setting migration capabilities when the migration is still 620 * running leads to an error. 621 */ 622 wait_for_migration_status(from, "cancelled", NULL); 623 624 args = (MigrateStart){ 625 .only_target = true, 626 }; 627 628 if (migrate_start(&from, &to2, "defer", &args)) { 629 return; 630 } 631 632 if (postcopy_ram) { 633 migrate_set_capability(to2, "postcopy-ram", true); 634 } 635 636 migrate_set_parameter_int(to2, "multifd-channels", 16); 637 638 migrate_set_capability(to2, "multifd", true); 639 640 /* Start incoming migration from the 1st socket */ 641 migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", NULL, "{}"); 642 643 migrate_ensure_non_converge(from); 644 645 migrate_qmp(from, to2, NULL, NULL, "{}"); 646 647 migrate_wait_for_dirty_mem(from, to2); 648 649 migrate_ensure_converge(from); 650 651 wait_for_stop(from, get_src()); 652 qtest_qmp_eventwait(to2, "RESUME"); 653 654 wait_for_serial("dest_serial"); 655 wait_for_migration_complete(from); 656 migrate_end(from, to2, true); 657 } 658 659 static void test_multifd_precopy_tcp_cancel(void) 660 { 661 test_multifd_tcp_cancel(false); 662 } 663 664 static void test_multifd_postcopy_tcp_cancel(void) 665 { 666 test_multifd_tcp_cancel(true); 667 } 668 669 static void test_cancel_src_after_failed(QTestState *from, QTestState *to, 670 const char *uri, const char *phase) 671 { 672 /* 673 * No migrate_incoming_qmp() at the start to force source into 674 * failed state during migrate_qmp(). 675 */ 676 677 wait_for_serial("src_serial"); 678 migrate_ensure_converge(from); 679 680 migrate_qmp(from, to, uri, NULL, "{}"); 681 682 migration_event_wait(from, phase); 683 migrate_cancel(from); 684 685 /* cancelling will not move the migration out of 'failed' */ 686 687 wait_for_migration_status(from, "failed", 688 (const char * []) { "completed", NULL }); 689 690 /* 691 * Not waiting for the destination because it never started 692 * migration. 693 */ 694 } 695 696 static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to, 697 const char *uri, const char *phase) 698 { 699 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 700 701 wait_for_serial("src_serial"); 702 migrate_ensure_converge(from); 703 704 migrate_qmp(from, to, uri, NULL, "{}"); 705 706 /* To move to cancelled/cancelling */ 707 migrate_cancel(from); 708 migration_event_wait(from, phase); 709 710 /* The migrate_cancel under test */ 711 migrate_cancel(from); 712 713 wait_for_migration_status(from, "cancelled", 714 (const char * []) { "completed", NULL }); 715 716 wait_for_migration_status(to, "failed", 717 (const char * []) { "completed", NULL }); 718 } 719 720 static void test_cancel_src_after_complete(QTestState *from, QTestState *to, 721 const char *uri, const char *phase) 722 { 723 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 724 725 wait_for_serial("src_serial"); 726 migrate_ensure_converge(from); 727 728 migrate_qmp(from, to, uri, NULL, "{}"); 729 730 migration_event_wait(from, phase); 731 migrate_cancel(from); 732 733 /* 734 * qmp_migrate_cancel() exits early if migration is not running 735 * anymore, the status will not change to cancelled. 736 */ 737 wait_for_migration_complete(from); 738 wait_for_migration_complete(to); 739 } 740 741 static void test_cancel_src_after_none(QTestState *from, QTestState *to, 742 const char *uri, const char *phase) 743 { 744 /* 745 * Test that cancelling without a migration happening does not 746 * affect subsequent migrations 747 */ 748 migrate_cancel(to); 749 750 wait_for_serial("src_serial"); 751 migrate_cancel(from); 752 753 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 754 755 migrate_ensure_converge(from); 756 migrate_qmp(from, to, uri, NULL, "{}"); 757 758 wait_for_migration_complete(from); 759 wait_for_migration_complete(to); 760 } 761 762 static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to, 763 const char *uri, const char *phase) 764 { 765 migrate_set_capability(from, "pause-before-switchover", true); 766 migrate_set_capability(to, "pause-before-switchover", true); 767 768 migrate_set_capability(from, "multifd", true); 769 migrate_set_capability(to, "multifd", true); 770 771 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 772 773 wait_for_serial("src_serial"); 774 migrate_ensure_converge(from); 775 776 migrate_qmp(from, to, uri, NULL, "{}"); 777 778 migration_event_wait(from, phase); 779 migrate_cancel(from); 780 migration_event_wait(from, "cancelling"); 781 782 wait_for_migration_status(from, "cancelled", 783 (const char * []) { "completed", NULL }); 784 785 wait_for_migration_status(to, "failed", 786 (const char * []) { "completed", NULL }); 787 } 788 789 static void test_cancel_src_after_status(void *opaque) 790 { 791 const char *test_path = opaque; 792 g_autofree char *phase = g_path_get_basename(test_path); 793 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 794 QTestState *from, *to; 795 MigrateStart args = { 796 .hide_stderr = true, 797 }; 798 799 if (migrate_start(&from, &to, "defer", &args)) { 800 return; 801 } 802 803 if (g_str_equal(phase, "cancelling") || 804 g_str_equal(phase, "cancelled")) { 805 test_cancel_src_after_cancelled(from, to, uri, phase); 806 807 } else if (g_str_equal(phase, "completed")) { 808 test_cancel_src_after_complete(from, to, uri, phase); 809 810 } else if (g_str_equal(phase, "failed")) { 811 test_cancel_src_after_failed(from, to, uri, phase); 812 813 } else if (g_str_equal(phase, "none")) { 814 test_cancel_src_after_none(from, to, uri, phase); 815 816 } else { 817 /* any state that comes before pre-switchover */ 818 test_cancel_src_pre_switchover(from, to, uri, phase); 819 } 820 821 migrate_end(from, to, false); 822 } 823 824 static void calc_dirty_rate(QTestState *who, uint64_t calc_time) 825 { 826 qtest_qmp_assert_success(who, 827 "{ 'execute': 'calc-dirty-rate'," 828 "'arguments': { " 829 "'calc-time': %" PRIu64 "," 830 "'mode': 'dirty-ring' }}", 831 calc_time); 832 } 833 834 static QDict *query_dirty_rate(QTestState *who) 835 { 836 return qtest_qmp_assert_success_ref(who, 837 "{ 'execute': 'query-dirty-rate' }"); 838 } 839 840 static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate) 841 { 842 qtest_qmp_assert_success(who, 843 "{ 'execute': 'set-vcpu-dirty-limit'," 844 "'arguments': { " 845 "'dirty-rate': %" PRIu64 " } }", 846 dirtyrate); 847 } 848 849 static void cancel_vcpu_dirty_limit(QTestState *who) 850 { 851 qtest_qmp_assert_success(who, 852 "{ 'execute': 'cancel-vcpu-dirty-limit' }"); 853 } 854 855 static QDict *query_vcpu_dirty_limit(QTestState *who) 856 { 857 QDict *rsp; 858 859 rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }"); 860 g_assert(!qdict_haskey(rsp, "error")); 861 g_assert(qdict_haskey(rsp, "return")); 862 863 return rsp; 864 } 865 866 static bool calc_dirtyrate_ready(QTestState *who) 867 { 868 QDict *rsp_return; 869 const char *status; 870 bool ready; 871 872 rsp_return = query_dirty_rate(who); 873 g_assert(rsp_return); 874 875 status = qdict_get_str(rsp_return, "status"); 876 g_assert(status); 877 ready = g_strcmp0(status, "measuring"); 878 qobject_unref(rsp_return); 879 880 return ready; 881 } 882 883 static void wait_for_calc_dirtyrate_complete(QTestState *who, 884 int64_t time_s) 885 { 886 int max_try_count = 10000; 887 usleep(time_s * 1000000); 888 889 while (!calc_dirtyrate_ready(who) && max_try_count--) { 890 usleep(1000); 891 } 892 893 /* 894 * Set the timeout with 10 s(max_try_count * 1000us), 895 * if dirtyrate measurement not complete, fail test. 896 */ 897 g_assert_cmpint(max_try_count, !=, 0); 898 } 899 900 static int64_t get_dirty_rate(QTestState *who) 901 { 902 QDict *rsp_return; 903 const char *status; 904 QList *rates; 905 const QListEntry *entry; 906 QDict *rate; 907 int64_t dirtyrate; 908 909 rsp_return = query_dirty_rate(who); 910 g_assert(rsp_return); 911 912 status = qdict_get_str(rsp_return, "status"); 913 g_assert(status); 914 g_assert_cmpstr(status, ==, "measured"); 915 916 rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate"); 917 g_assert(rates && !qlist_empty(rates)); 918 919 entry = qlist_first(rates); 920 g_assert(entry); 921 922 rate = qobject_to(QDict, qlist_entry_obj(entry)); 923 g_assert(rate); 924 925 dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1); 926 927 qobject_unref(rsp_return); 928 return dirtyrate; 929 } 930 931 static int64_t get_limit_rate(QTestState *who) 932 { 933 QDict *rsp_return; 934 QList *rates; 935 const QListEntry *entry; 936 QDict *rate; 937 int64_t dirtyrate; 938 939 rsp_return = query_vcpu_dirty_limit(who); 940 g_assert(rsp_return); 941 942 rates = qdict_get_qlist(rsp_return, "return"); 943 g_assert(rates && !qlist_empty(rates)); 944 945 entry = qlist_first(rates); 946 g_assert(entry); 947 948 rate = qobject_to(QDict, qlist_entry_obj(entry)); 949 g_assert(rate); 950 951 dirtyrate = qdict_get_try_int(rate, "limit-rate", -1); 952 953 qobject_unref(rsp_return); 954 return dirtyrate; 955 } 956 957 static QTestState *dirtylimit_start_vm(void) 958 { 959 QTestState *vm = NULL; 960 g_autofree gchar *cmd = NULL; 961 const char *bootpath; 962 963 bootpath = bootfile_create(qtest_get_arch(), tmpfs, false); 964 cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " 965 "-name dirtylimit-test,debug-threads=on " 966 "-m 150M -smp 1 " 967 "-serial file:%s/vm_serial " 968 "-drive file=%s,format=raw ", 969 tmpfs, bootpath); 970 971 vm = qtest_init(cmd); 972 return vm; 973 } 974 975 static void dirtylimit_stop_vm(QTestState *vm) 976 { 977 g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, "vm_serial"); 978 979 qtest_quit(vm); 980 unlink(path); 981 } 982 983 static void test_vcpu_dirty_limit(void) 984 { 985 QTestState *vm; 986 int64_t origin_rate; 987 int64_t quota_rate; 988 int64_t rate ; 989 int max_try_count = 20; 990 int hit = 0; 991 992 /* Start vm for vcpu dirtylimit test */ 993 vm = dirtylimit_start_vm(); 994 995 /* Wait for the first serial output from the vm*/ 996 wait_for_serial("vm_serial"); 997 998 /* Do dirtyrate measurement with calc time equals 1s */ 999 calc_dirty_rate(vm, 1); 1000 1001 /* Sleep calc time and wait for calc dirtyrate complete */ 1002 wait_for_calc_dirtyrate_complete(vm, 1); 1003 1004 /* Query original dirty page rate */ 1005 origin_rate = get_dirty_rate(vm); 1006 1007 /* VM booted from bootsect should dirty memory steadily */ 1008 assert(origin_rate != 0); 1009 1010 /* Setup quota dirty page rate at half of origin */ 1011 quota_rate = origin_rate / 2; 1012 1013 /* Set dirtylimit */ 1014 dirtylimit_set_all(vm, quota_rate); 1015 1016 /* 1017 * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit 1018 * works literally 1019 */ 1020 g_assert_cmpint(quota_rate, ==, get_limit_rate(vm)); 1021 1022 /* Sleep a bit to check if it take effect */ 1023 usleep(2000000); 1024 1025 /* 1026 * Check if dirtylimit take effect realistically, set the 1027 * timeout with 20 s(max_try_count * 1s), if dirtylimit 1028 * doesn't take effect, fail test. 1029 */ 1030 while (--max_try_count) { 1031 calc_dirty_rate(vm, 1); 1032 wait_for_calc_dirtyrate_complete(vm, 1); 1033 rate = get_dirty_rate(vm); 1034 1035 /* 1036 * Assume hitting if current rate is less 1037 * than quota rate (within accepting error) 1038 */ 1039 if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 1040 hit = 1; 1041 break; 1042 } 1043 } 1044 1045 g_assert_cmpint(hit, ==, 1); 1046 1047 hit = 0; 1048 max_try_count = 20; 1049 1050 /* Check if dirtylimit cancellation take effect */ 1051 cancel_vcpu_dirty_limit(vm); 1052 while (--max_try_count) { 1053 calc_dirty_rate(vm, 1); 1054 wait_for_calc_dirtyrate_complete(vm, 1); 1055 rate = get_dirty_rate(vm); 1056 1057 /* 1058 * Assume dirtylimit be canceled if current rate is 1059 * greater than quota rate (within accepting error) 1060 */ 1061 if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 1062 hit = 1; 1063 break; 1064 } 1065 } 1066 1067 g_assert_cmpint(hit, ==, 1); 1068 dirtylimit_stop_vm(vm); 1069 } 1070 1071 static void migrate_dirty_limit_wait_showup(QTestState *from, 1072 const int64_t period, 1073 const int64_t value) 1074 { 1075 /* Enable dirty limit capability */ 1076 migrate_set_capability(from, "dirty-limit", true); 1077 1078 /* Set dirty limit parameters */ 1079 migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period); 1080 migrate_set_parameter_int(from, "vcpu-dirty-limit", value); 1081 1082 /* Make sure migrate can't converge */ 1083 migrate_ensure_non_converge(from); 1084 1085 /* To check limit rate after precopy */ 1086 migrate_set_capability(from, "pause-before-switchover", true); 1087 1088 /* Wait for the serial output from the source */ 1089 wait_for_serial("src_serial"); 1090 } 1091 1092 /* 1093 * This test does: 1094 * source destination 1095 * start vm 1096 * start incoming vm 1097 * migrate 1098 * wait dirty limit to begin 1099 * cancel migrate 1100 * cancellation check 1101 * restart incoming vm 1102 * migrate 1103 * wait dirty limit to begin 1104 * wait pre-switchover event 1105 * convergence condition check 1106 * 1107 * And see if dirty limit migration works correctly. 1108 * This test case involves many passes, so it runs in slow mode only. 1109 */ 1110 static void test_dirty_limit(void) 1111 { 1112 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 1113 QTestState *from, *to; 1114 int64_t remaining; 1115 uint64_t throttle_us_per_full; 1116 /* 1117 * We want the test to be stable and as fast as possible. 1118 * E.g., with 1Gb/s bandwidth migration may pass without dirty limit, 1119 * so we need to decrease a bandwidth. 1120 */ 1121 const int64_t dirtylimit_period = 1000, dirtylimit_value = 50; 1122 const int64_t max_bandwidth = 400000000; /* ~400Mb/s */ 1123 const int64_t downtime_limit = 250; /* 250ms */ 1124 /* 1125 * We migrate through unix-socket (> 500Mb/s). 1126 * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s). 1127 * So, we can predict expected_threshold 1128 */ 1129 const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000; 1130 int max_try_count = 10; 1131 MigrateCommon args = { 1132 .start = { 1133 .hide_stderr = true, 1134 .use_dirty_ring = true, 1135 }, 1136 .listen_uri = uri, 1137 .connect_uri = uri, 1138 }; 1139 1140 /* Start src, dst vm */ 1141 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1142 return; 1143 } 1144 1145 /* Prepare for dirty limit migration and wait src vm show up */ 1146 migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value); 1147 1148 /* Start migrate */ 1149 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1150 1151 /* Wait for dirty limit throttle begin */ 1152 throttle_us_per_full = 0; 1153 while (throttle_us_per_full == 0) { 1154 throttle_us_per_full = 1155 read_migrate_property_int(from, 1156 "dirty-limit-throttle-time-per-round"); 1157 usleep(100); 1158 g_assert_false(get_src()->stop_seen); 1159 } 1160 1161 /* Now cancel migrate and wait for dirty limit throttle switch off */ 1162 migrate_cancel(from); 1163 wait_for_migration_status(from, "cancelled", NULL); 1164 1165 /* destination always fails after cancel */ 1166 migration_event_wait(to, "failed"); 1167 qtest_set_expected_status(to, EXIT_FAILURE); 1168 qtest_quit(to); 1169 1170 /* Check if dirty limit throttle switched off, set timeout 1ms */ 1171 do { 1172 throttle_us_per_full = 1173 read_migrate_property_int(from, 1174 "dirty-limit-throttle-time-per-round"); 1175 usleep(100); 1176 g_assert_false(get_src()->stop_seen); 1177 } while (throttle_us_per_full != 0 && --max_try_count); 1178 1179 /* Assert dirty limit is not in service */ 1180 g_assert_cmpint(throttle_us_per_full, ==, 0); 1181 1182 args = (MigrateCommon) { 1183 .start = { 1184 .only_target = true, 1185 .use_dirty_ring = true, 1186 }, 1187 .listen_uri = uri, 1188 .connect_uri = uri, 1189 }; 1190 1191 /* Restart dst vm, src vm already show up so we needn't wait anymore */ 1192 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1193 return; 1194 } 1195 1196 /* Start migrate */ 1197 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1198 1199 /* Wait for dirty limit throttle begin */ 1200 throttle_us_per_full = 0; 1201 while (throttle_us_per_full == 0) { 1202 throttle_us_per_full = 1203 read_migrate_property_int(from, 1204 "dirty-limit-throttle-time-per-round"); 1205 usleep(100); 1206 g_assert_false(get_src()->stop_seen); 1207 } 1208 1209 /* 1210 * The dirty limit rate should equals the return value of 1211 * query-vcpu-dirty-limit if dirty limit cap set 1212 */ 1213 g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from)); 1214 1215 /* Now, we have tested if dirty limit works, let it converge */ 1216 migrate_set_parameter_int(from, "downtime-limit", downtime_limit); 1217 migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth); 1218 1219 /* 1220 * Wait for pre-switchover status to check if migration 1221 * satisfy the convergence condition 1222 */ 1223 wait_for_migration_status(from, "pre-switchover", NULL); 1224 1225 remaining = read_ram_property_int(from, "remaining"); 1226 g_assert_cmpint(remaining, <, 1227 (expected_threshold + expected_threshold / 100)); 1228 1229 migrate_continue(from, "pre-switchover"); 1230 1231 qtest_qmp_eventwait(to, "RESUME"); 1232 1233 wait_for_serial("dest_serial"); 1234 wait_for_migration_complete(from); 1235 1236 migrate_end(from, to, true); 1237 } 1238 1239 static void migration_test_add_precopy_smoke(MigrationTestEnv *env) 1240 { 1241 if (env->is_x86) { 1242 migration_test_add("/migration/precopy/unix/suspend/live", 1243 test_precopy_unix_suspend_live); 1244 migration_test_add("/migration/precopy/unix/suspend/notlive", 1245 test_precopy_unix_suspend_notlive); 1246 } 1247 1248 migration_test_add("/migration/precopy/unix/plain", 1249 test_precopy_unix_plain); 1250 1251 migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain); 1252 migration_test_add("/migration/multifd/tcp/uri/plain/none", 1253 test_multifd_tcp_uri_none); 1254 migration_test_add("/migration/multifd/tcp/plain/cancel", 1255 test_multifd_precopy_tcp_cancel); 1256 if (env->has_uffd) { 1257 migration_test_add("/migration/multifd+postcopy/tcp/plain/cancel", 1258 test_multifd_postcopy_tcp_cancel); 1259 } 1260 1261 #ifdef CONFIG_RDMA 1262 migration_test_add("/migration/precopy/rdma/plain", 1263 test_precopy_rdma_plain); 1264 migration_test_add("/migration/precopy/rdma/plain/ipv6", 1265 test_precopy_rdma_plain_ipv6); 1266 #endif 1267 } 1268 1269 void migration_test_add_precopy(MigrationTestEnv *env) 1270 { 1271 tmpfs = env->tmpfs; 1272 1273 migration_test_add_precopy_smoke(env); 1274 1275 if (!env->full_set) { 1276 return; 1277 } 1278 1279 migration_test_add("/migration/precopy/tcp/plain/switchover-ack", 1280 test_precopy_tcp_switchover_ack); 1281 1282 #ifndef _WIN32 1283 migration_test_add("/migration/precopy/fd/tcp", 1284 test_precopy_fd_socket); 1285 migration_test_add("/migration/precopy/fd/file", 1286 test_precopy_fd_file); 1287 #endif 1288 1289 /* 1290 * See explanation why this test is slow on function definition 1291 */ 1292 if (g_test_slow()) { 1293 migration_test_add("/migration/auto_converge", 1294 test_auto_converge); 1295 if (g_str_equal(env->arch, "x86_64") && 1296 env->has_kvm && env->has_dirty_ring) { 1297 migration_test_add("/dirty_limit", 1298 test_dirty_limit); 1299 } 1300 } 1301 migration_test_add("/migration/multifd/tcp/channels/plain/none", 1302 test_multifd_tcp_channels_none); 1303 migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy", 1304 test_multifd_tcp_zero_page_legacy); 1305 migration_test_add("/migration/multifd/tcp/plain/zero-page/none", 1306 test_multifd_tcp_no_zero_page); 1307 if (g_str_equal(env->arch, "x86_64") 1308 && env->has_kvm && env->has_dirty_ring) { 1309 1310 migration_test_add("/migration/dirty_ring", 1311 test_precopy_unix_dirty_ring); 1312 if (qtest_has_machine("pc") && g_test_slow()) { 1313 migration_test_add("/migration/vcpu_dirty_limit", 1314 test_vcpu_dirty_limit); 1315 } 1316 } 1317 1318 /* ensure new status don't go unnoticed */ 1319 assert(MIGRATION_STATUS__MAX == 15); 1320 1321 for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) { 1322 switch (i) { 1323 case MIGRATION_STATUS_DEVICE: /* happens too fast */ 1324 case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */ 1325 case MIGRATION_STATUS_COLO: /* no support in tests */ 1326 case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */ 1327 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1328 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: 1329 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1330 continue; 1331 default: 1332 migration_test_add_suffix("/migration/cancel/src/after/", 1333 MigrationStatus_str(i), 1334 test_cancel_src_after_status); 1335 } 1336 } 1337 } 1338