1 /* 2 * QTest testcase for precopy migration 3 * 4 * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates 5 * based on the vhost-user-test.c that is: 6 * Copyright (c) 2014 Virtual Open Systems Sarl. 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 * 11 */ 12 13 #include "qemu/osdep.h" 14 #include "chardev/char.h" 15 #include "crypto/tlscredspsk.h" 16 #include "libqtest.h" 17 #include "migration/bootfile.h" 18 #include "migration/framework.h" 19 #include "migration/migration-qmp.h" 20 #include "migration/migration-util.h" 21 #include "ppc-util.h" 22 #include "qobject/qlist.h" 23 #include "qapi-types-migration.h" 24 #include "qemu/module.h" 25 #include "qemu/option.h" 26 #include "qemu/range.h" 27 #include "qemu/sockets.h" 28 29 30 /* 31 * Dirtylimit stop working if dirty page rate error 32 * value less than DIRTYLIMIT_TOLERANCE_RANGE 33 */ 34 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 35 36 static char *tmpfs; 37 38 static void test_precopy_unix_plain(void) 39 { 40 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 41 MigrateCommon args = { 42 .listen_uri = uri, 43 .connect_uri = uri, 44 /* 45 * The simplest use case of precopy, covering smoke tests of 46 * get-dirty-log dirty tracking. 47 */ 48 .live = true, 49 }; 50 51 test_precopy_common(&args); 52 } 53 54 static void test_precopy_unix_suspend_live(void) 55 { 56 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 57 MigrateCommon args = { 58 .listen_uri = uri, 59 .connect_uri = uri, 60 /* 61 * despite being live, the test is fast because the src 62 * suspends immediately. 63 */ 64 .live = true, 65 .start.suspend_me = true, 66 }; 67 68 test_precopy_common(&args); 69 } 70 71 static void test_precopy_unix_suspend_notlive(void) 72 { 73 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 74 MigrateCommon args = { 75 .listen_uri = uri, 76 .connect_uri = uri, 77 .start.suspend_me = true, 78 }; 79 80 test_precopy_common(&args); 81 } 82 83 static void test_precopy_unix_dirty_ring(void) 84 { 85 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 86 MigrateCommon args = { 87 .start = { 88 .use_dirty_ring = true, 89 }, 90 .listen_uri = uri, 91 .connect_uri = uri, 92 /* 93 * Besides the precopy/unix basic test, cover dirty ring interface 94 * rather than get-dirty-log. 95 */ 96 .live = true, 97 }; 98 99 test_precopy_common(&args); 100 } 101 102 #ifdef CONFIG_RDMA 103 104 #include <sys/resource.h> 105 106 /* 107 * During migration over RDMA, it will try to pin portions of guest memory, 108 * typically exceeding 100MB in this test, while the remainder will be 109 * transmitted as compressed zero pages. 110 * 111 * REQUIRED_MEMLOCK_SZ indicates the minimal mlock size in the current context. 112 */ 113 #define REQUIRED_MEMLOCK_SZ (128 << 20) /* 128MB */ 114 115 /* check 'ulimit -l' */ 116 static bool mlock_check(void) 117 { 118 uid_t uid; 119 struct rlimit rlim; 120 121 uid = getuid(); 122 if (uid == 0) { 123 return true; 124 } 125 126 if (getrlimit(RLIMIT_MEMLOCK, &rlim) != 0) { 127 return false; 128 } 129 130 return rlim.rlim_cur >= REQUIRED_MEMLOCK_SZ; 131 } 132 133 #define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh" 134 static int new_rdma_link(char *buffer, bool ipv6) 135 { 136 char cmd[256]; 137 bool verbose = g_getenv("QTEST_LOG"); 138 139 snprintf(cmd, sizeof(cmd), "IP_FAMILY=%s %s detect %s", 140 ipv6 ? "ipv6" : "ipv4", RDMA_MIGRATION_HELPER, 141 verbose ? "" : "2>/dev/null"); 142 143 FILE *pipe = popen(cmd, "r"); 144 if (pipe == NULL) { 145 perror("Failed to run script"); 146 return -1; 147 } 148 149 int idx = 0; 150 while (fgets(buffer + idx, 128 - idx, pipe) != NULL) { 151 idx += strlen(buffer); 152 } 153 154 int status = pclose(pipe); 155 if (status == -1) { 156 perror("Error reported by pclose()"); 157 return -1; 158 } else if (WIFEXITED(status)) { 159 return WEXITSTATUS(status); 160 } 161 162 return -1; 163 } 164 165 static void __test_precopy_rdma_plain(bool ipv6) 166 { 167 char buffer[128] = {}; 168 169 if (!mlock_check()) { 170 g_test_skip("'ulimit -l' is too small, require >=128M"); 171 return; 172 } 173 174 if (new_rdma_link(buffer, ipv6)) { 175 g_test_skip("No rdma link available\n" 176 "# To enable the test:\n" 177 "# Run \'" RDMA_MIGRATION_HELPER " setup\' with root to " 178 "setup a new rdma/rxe link and rerun the test\n" 179 "# Optional: run 'scripts/rdma-migration-helper.sh clean' " 180 "to revert the 'setup'"); 181 return; 182 } 183 184 /* 185 * TODO: query a free port instead of hard code. 186 * 29200=('R'+'D'+'M'+'A')*100 187 **/ 188 g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer); 189 190 MigrateCommon args = { 191 .listen_uri = uri, 192 .connect_uri = uri, 193 }; 194 195 test_precopy_common(&args); 196 } 197 198 static void test_precopy_rdma_plain(void) 199 { 200 __test_precopy_rdma_plain(false); 201 } 202 203 static void test_precopy_rdma_plain_ipv6(void) 204 { 205 __test_precopy_rdma_plain(true); 206 } 207 #endif 208 209 static void test_precopy_tcp_plain(void) 210 { 211 MigrateCommon args = { 212 .listen_uri = "tcp:127.0.0.1:0", 213 }; 214 215 test_precopy_common(&args); 216 } 217 218 static void test_precopy_tcp_switchover_ack(void) 219 { 220 MigrateCommon args = { 221 .listen_uri = "tcp:127.0.0.1:0", 222 .start = { 223 .caps[MIGRATION_CAPABILITY_RETURN_PATH] = true, 224 .caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK] = true, 225 }, 226 /* 227 * Source VM must be running in order to consider the switchover ACK 228 * when deciding to do switchover or not. 229 */ 230 .live = true, 231 }; 232 233 test_precopy_common(&args); 234 } 235 236 #ifndef _WIN32 237 static void *migrate_hook_start_fd(QTestState *from, 238 QTestState *to) 239 { 240 int ret; 241 int pair[2]; 242 243 /* Create two connected sockets for migration */ 244 ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair); 245 g_assert_cmpint(ret, ==, 0); 246 247 /* Send the 1st socket to the target */ 248 qtest_qmp_fds_assert_success(to, &pair[0], 1, 249 "{ 'execute': 'getfd'," 250 " 'arguments': { 'fdname': 'fd-mig' }}"); 251 close(pair[0]); 252 253 /* Start incoming migration from the 1st socket */ 254 migrate_incoming_qmp(to, "fd:fd-mig", NULL, "{}"); 255 256 /* Send the 2nd socket to the target */ 257 qtest_qmp_fds_assert_success(from, &pair[1], 1, 258 "{ 'execute': 'getfd'," 259 " 'arguments': { 'fdname': 'fd-mig' }}"); 260 close(pair[1]); 261 262 return NULL; 263 } 264 265 static void migrate_hook_end_fd(QTestState *from, 266 QTestState *to, 267 void *opaque) 268 { 269 QDict *rsp; 270 const char *error_desc; 271 272 /* Test closing fds */ 273 /* 274 * We assume, that QEMU removes named fd from its list, 275 * so this should fail. 276 */ 277 rsp = qtest_qmp(from, 278 "{ 'execute': 'closefd'," 279 " 'arguments': { 'fdname': 'fd-mig' }}"); 280 g_assert_true(qdict_haskey(rsp, "error")); 281 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 282 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 283 qobject_unref(rsp); 284 285 rsp = qtest_qmp(to, 286 "{ 'execute': 'closefd'," 287 " 'arguments': { 'fdname': 'fd-mig' }}"); 288 g_assert_true(qdict_haskey(rsp, "error")); 289 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 290 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 291 qobject_unref(rsp); 292 } 293 294 static void test_precopy_fd_socket(void) 295 { 296 MigrateCommon args = { 297 .listen_uri = "defer", 298 .connect_uri = "fd:fd-mig", 299 .start_hook = migrate_hook_start_fd, 300 .end_hook = migrate_hook_end_fd, 301 }; 302 test_precopy_common(&args); 303 } 304 305 static void *migrate_hook_start_precopy_fd_file(QTestState *from, 306 QTestState *to) 307 { 308 g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME); 309 int src_flags = O_CREAT | O_RDWR; 310 int dst_flags = O_CREAT | O_RDWR; 311 int fds[2]; 312 313 fds[0] = open(file, src_flags, 0660); 314 assert(fds[0] != -1); 315 316 fds[1] = open(file, dst_flags, 0660); 317 assert(fds[1] != -1); 318 319 320 qtest_qmp_fds_assert_success(to, &fds[0], 1, 321 "{ 'execute': 'getfd'," 322 " 'arguments': { 'fdname': 'fd-mig' }}"); 323 324 qtest_qmp_fds_assert_success(from, &fds[1], 1, 325 "{ 'execute': 'getfd'," 326 " 'arguments': { 'fdname': 'fd-mig' }}"); 327 328 close(fds[0]); 329 close(fds[1]); 330 331 return NULL; 332 } 333 334 static void test_precopy_fd_file(void) 335 { 336 MigrateCommon args = { 337 .listen_uri = "defer", 338 .connect_uri = "fd:fd-mig", 339 .start_hook = migrate_hook_start_precopy_fd_file, 340 .end_hook = migrate_hook_end_fd, 341 }; 342 test_file_common(&args, true); 343 } 344 #endif /* _WIN32 */ 345 346 /* 347 * The way auto_converge works, we need to do too many passes to 348 * run this test. Auto_converge logic is only run once every 349 * three iterations, so: 350 * 351 * - 3 iterations without auto_converge enabled 352 * - 3 iterations with pct = 5 353 * - 3 iterations with pct = 30 354 * - 3 iterations with pct = 55 355 * - 3 iterations with pct = 80 356 * - 3 iterations with pct = 95 (max(95, 80 + 25)) 357 * 358 * To make things even worse, we need to run the initial stage at 359 * 3MB/s so we enter autoconverge even when host is (over)loaded. 360 */ 361 static void test_auto_converge(void) 362 { 363 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 364 MigrateStart args = {}; 365 QTestState *from, *to; 366 int64_t percentage; 367 368 /* 369 * We want the test to be stable and as fast as possible. 370 * E.g., with 1Gb/s bandwidth migration may pass without throttling, 371 * so we need to decrease a bandwidth. 372 */ 373 const int64_t init_pct = 5, inc_pct = 25, max_pct = 95; 374 uint64_t prev_dirty_sync_cnt, dirty_sync_cnt; 375 int max_try_count, hit = 0; 376 377 if (migrate_start(&from, &to, uri, &args)) { 378 return; 379 } 380 381 migrate_set_capability(from, "auto-converge", true); 382 migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct); 383 migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct); 384 migrate_set_parameter_int(from, "max-cpu-throttle", max_pct); 385 386 /* 387 * Set the initial parameters so that the migration could not converge 388 * without throttling. 389 */ 390 migrate_ensure_non_converge(from); 391 392 /* To check remaining size after precopy */ 393 migrate_set_capability(from, "pause-before-switchover", true); 394 395 /* Wait for the first serial output from the source */ 396 wait_for_serial("src_serial"); 397 398 migrate_qmp(from, to, uri, NULL, "{}"); 399 400 /* Wait for throttling begins */ 401 percentage = 0; 402 do { 403 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 404 if (percentage != 0) { 405 break; 406 } 407 usleep(20); 408 g_assert_false(get_src()->stop_seen); 409 } while (true); 410 /* The first percentage of throttling should be at least init_pct */ 411 g_assert_cmpint(percentage, >=, init_pct); 412 413 /* 414 * End the loop when the dirty sync count greater than 1. 415 */ 416 while ((dirty_sync_cnt = get_migration_pass(from)) < 2) { 417 usleep(1000 * 1000); 418 } 419 420 prev_dirty_sync_cnt = dirty_sync_cnt; 421 422 /* 423 * The RAMBlock dirty sync count must changes in 5 seconds, here we set 424 * the timeout to 10 seconds to ensure it changes. 425 * 426 * Note that migrate_ensure_non_converge set the max-bandwidth to 3MB/s, 427 * while the qtest mem is >= 100MB, one iteration takes at least 33s (100/3) 428 * to complete; this ensures that the RAMBlock dirty sync occurs. 429 */ 430 max_try_count = 10; 431 while (--max_try_count) { 432 dirty_sync_cnt = get_migration_pass(from); 433 if (dirty_sync_cnt != prev_dirty_sync_cnt) { 434 hit = 1; 435 break; 436 } 437 prev_dirty_sync_cnt = dirty_sync_cnt; 438 sleep(1); 439 } 440 g_assert_cmpint(hit, ==, 1); 441 442 /* Now, when we tested that throttling works, let it converge */ 443 migrate_ensure_converge(from); 444 445 /* 446 * Wait for pre-switchover status to check last throttle percentage 447 * and remaining. These values will be zeroed later 448 */ 449 wait_for_migration_status(from, "pre-switchover", NULL); 450 451 /* The final percentage of throttling shouldn't be greater than max_pct */ 452 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 453 g_assert_cmpint(percentage, <=, max_pct); 454 migrate_continue(from, "pre-switchover"); 455 456 qtest_qmp_eventwait(to, "RESUME"); 457 458 wait_for_serial("dest_serial"); 459 wait_for_migration_complete(from); 460 461 migrate_end(from, to, true); 462 } 463 464 static void * 465 migrate_hook_start_precopy_tcp_multifd(QTestState *from, 466 QTestState *to) 467 { 468 return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 469 } 470 471 static void * 472 migrate_hook_start_precopy_tcp_multifd_zero_page_legacy(QTestState *from, 473 QTestState *to) 474 { 475 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 476 migrate_set_parameter_str(from, "zero-page-detection", "legacy"); 477 return NULL; 478 } 479 480 static void * 481 migrate_hook_start_precopy_tcp_multifd_no_zero_page(QTestState *from, 482 QTestState *to) 483 { 484 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 485 migrate_set_parameter_str(from, "zero-page-detection", "none"); 486 return NULL; 487 } 488 489 static void test_multifd_tcp_uri_none(void) 490 { 491 MigrateCommon args = { 492 .listen_uri = "defer", 493 .start_hook = migrate_hook_start_precopy_tcp_multifd, 494 .start = { 495 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 496 }, 497 /* 498 * Multifd is more complicated than most of the features, it 499 * directly takes guest page buffers when sending, make sure 500 * everything will work alright even if guest page is changing. 501 */ 502 .live = true, 503 }; 504 test_precopy_common(&args); 505 } 506 507 static void test_multifd_tcp_zero_page_legacy(void) 508 { 509 MigrateCommon args = { 510 .listen_uri = "defer", 511 .start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy, 512 .start = { 513 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 514 }, 515 /* 516 * Multifd is more complicated than most of the features, it 517 * directly takes guest page buffers when sending, make sure 518 * everything will work alright even if guest page is changing. 519 */ 520 .live = true, 521 }; 522 test_precopy_common(&args); 523 } 524 525 static void test_multifd_tcp_no_zero_page(void) 526 { 527 MigrateCommon args = { 528 .listen_uri = "defer", 529 .start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page, 530 .start = { 531 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 532 }, 533 /* 534 * Multifd is more complicated than most of the features, it 535 * directly takes guest page buffers when sending, make sure 536 * everything will work alright even if guest page is changing. 537 */ 538 .live = true, 539 }; 540 test_precopy_common(&args); 541 } 542 543 static void test_multifd_tcp_channels_none(void) 544 { 545 MigrateCommon args = { 546 .listen_uri = "defer", 547 .start_hook = migrate_hook_start_precopy_tcp_multifd, 548 .live = true, 549 .start = { 550 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 551 }, 552 .connect_channels = ("[ { 'channel-type': 'main'," 553 " 'addr': { 'transport': 'socket'," 554 " 'type': 'inet'," 555 " 'host': '127.0.0.1'," 556 " 'port': '0' } } ]"), 557 }; 558 test_precopy_common(&args); 559 } 560 561 /* 562 * This test does: 563 * source target 564 * migrate_incoming 565 * migrate 566 * migrate_cancel 567 * launch another target 568 * migrate 569 * 570 * And see that it works 571 */ 572 static void test_multifd_tcp_cancel(void) 573 { 574 MigrateStart args = { 575 .hide_stderr = true, 576 }; 577 QTestState *from, *to, *to2; 578 579 if (migrate_start(&from, &to, "defer", &args)) { 580 return; 581 } 582 583 migrate_ensure_non_converge(from); 584 migrate_prepare_for_dirty_mem(from); 585 586 migrate_set_parameter_int(from, "multifd-channels", 16); 587 migrate_set_parameter_int(to, "multifd-channels", 16); 588 589 migrate_set_capability(from, "multifd", true); 590 migrate_set_capability(to, "multifd", true); 591 592 /* Start incoming migration from the 1st socket */ 593 migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}"); 594 595 /* Wait for the first serial output from the source */ 596 wait_for_serial("src_serial"); 597 598 migrate_qmp(from, to, NULL, NULL, "{}"); 599 600 migrate_wait_for_dirty_mem(from, to); 601 602 migrate_cancel(from); 603 604 /* Make sure QEMU process "to" exited */ 605 qtest_set_expected_status(to, EXIT_FAILURE); 606 qtest_wait_qemu(to); 607 qtest_quit(to); 608 609 /* 610 * Ensure the source QEMU finishes its cancellation process before we 611 * proceed with the setup of the next migration. The migrate_start() 612 * function and others might want to interact with the source in a way that 613 * is not possible while the migration is not canceled properly. For 614 * example, setting migration capabilities when the migration is still 615 * running leads to an error. 616 */ 617 wait_for_migration_status(from, "cancelled", NULL); 618 619 args = (MigrateStart){ 620 .only_target = true, 621 }; 622 623 if (migrate_start(&from, &to2, "defer", &args)) { 624 return; 625 } 626 627 migrate_set_parameter_int(to2, "multifd-channels", 16); 628 629 migrate_set_capability(to2, "multifd", true); 630 631 /* Start incoming migration from the 1st socket */ 632 migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", NULL, "{}"); 633 634 migrate_ensure_non_converge(from); 635 636 migrate_qmp(from, to2, NULL, NULL, "{}"); 637 638 migrate_wait_for_dirty_mem(from, to2); 639 640 migrate_ensure_converge(from); 641 642 wait_for_stop(from, get_src()); 643 qtest_qmp_eventwait(to2, "RESUME"); 644 645 wait_for_serial("dest_serial"); 646 wait_for_migration_complete(from); 647 migrate_end(from, to2, true); 648 } 649 650 static void test_cancel_src_after_failed(QTestState *from, QTestState *to, 651 const char *uri, const char *phase) 652 { 653 /* 654 * No migrate_incoming_qmp() at the start to force source into 655 * failed state during migrate_qmp(). 656 */ 657 658 wait_for_serial("src_serial"); 659 migrate_ensure_converge(from); 660 661 migrate_qmp(from, to, uri, NULL, "{}"); 662 663 migration_event_wait(from, phase); 664 migrate_cancel(from); 665 666 /* cancelling will not move the migration out of 'failed' */ 667 668 wait_for_migration_status(from, "failed", 669 (const char * []) { "completed", NULL }); 670 671 /* 672 * Not waiting for the destination because it never started 673 * migration. 674 */ 675 } 676 677 static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to, 678 const char *uri, const char *phase) 679 { 680 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 681 682 wait_for_serial("src_serial"); 683 migrate_ensure_converge(from); 684 685 migrate_qmp(from, to, uri, NULL, "{}"); 686 687 /* To move to cancelled/cancelling */ 688 migrate_cancel(from); 689 migration_event_wait(from, phase); 690 691 /* The migrate_cancel under test */ 692 migrate_cancel(from); 693 694 wait_for_migration_status(from, "cancelled", 695 (const char * []) { "completed", NULL }); 696 697 wait_for_migration_status(to, "failed", 698 (const char * []) { "completed", NULL }); 699 } 700 701 static void test_cancel_src_after_complete(QTestState *from, QTestState *to, 702 const char *uri, const char *phase) 703 { 704 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 705 706 wait_for_serial("src_serial"); 707 migrate_ensure_converge(from); 708 709 migrate_qmp(from, to, uri, NULL, "{}"); 710 711 migration_event_wait(from, phase); 712 migrate_cancel(from); 713 714 /* 715 * qmp_migrate_cancel() exits early if migration is not running 716 * anymore, the status will not change to cancelled. 717 */ 718 wait_for_migration_complete(from); 719 wait_for_migration_complete(to); 720 } 721 722 static void test_cancel_src_after_none(QTestState *from, QTestState *to, 723 const char *uri, const char *phase) 724 { 725 /* 726 * Test that cancelling without a migration happening does not 727 * affect subsequent migrations 728 */ 729 migrate_cancel(to); 730 731 wait_for_serial("src_serial"); 732 migrate_cancel(from); 733 734 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 735 736 migrate_ensure_converge(from); 737 migrate_qmp(from, to, uri, NULL, "{}"); 738 739 wait_for_migration_complete(from); 740 wait_for_migration_complete(to); 741 } 742 743 static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to, 744 const char *uri, const char *phase) 745 { 746 migrate_set_capability(from, "pause-before-switchover", true); 747 migrate_set_capability(to, "pause-before-switchover", true); 748 749 migrate_set_capability(from, "multifd", true); 750 migrate_set_capability(to, "multifd", true); 751 752 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 753 754 wait_for_serial("src_serial"); 755 migrate_ensure_converge(from); 756 757 migrate_qmp(from, to, uri, NULL, "{}"); 758 759 migration_event_wait(from, phase); 760 migrate_cancel(from); 761 migration_event_wait(from, "cancelling"); 762 763 wait_for_migration_status(from, "cancelled", 764 (const char * []) { "completed", NULL }); 765 766 wait_for_migration_status(to, "failed", 767 (const char * []) { "completed", NULL }); 768 } 769 770 static void test_cancel_src_after_status(void *opaque) 771 { 772 const char *test_path = opaque; 773 g_autofree char *phase = g_path_get_basename(test_path); 774 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 775 QTestState *from, *to; 776 MigrateStart args = { 777 .hide_stderr = true, 778 }; 779 780 if (migrate_start(&from, &to, "defer", &args)) { 781 return; 782 } 783 784 if (g_str_equal(phase, "cancelling") || 785 g_str_equal(phase, "cancelled")) { 786 test_cancel_src_after_cancelled(from, to, uri, phase); 787 788 } else if (g_str_equal(phase, "completed")) { 789 test_cancel_src_after_complete(from, to, uri, phase); 790 791 } else if (g_str_equal(phase, "failed")) { 792 test_cancel_src_after_failed(from, to, uri, phase); 793 794 } else if (g_str_equal(phase, "none")) { 795 test_cancel_src_after_none(from, to, uri, phase); 796 797 } else { 798 /* any state that comes before pre-switchover */ 799 test_cancel_src_pre_switchover(from, to, uri, phase); 800 } 801 802 migrate_end(from, to, false); 803 } 804 805 static void calc_dirty_rate(QTestState *who, uint64_t calc_time) 806 { 807 qtest_qmp_assert_success(who, 808 "{ 'execute': 'calc-dirty-rate'," 809 "'arguments': { " 810 "'calc-time': %" PRIu64 "," 811 "'mode': 'dirty-ring' }}", 812 calc_time); 813 } 814 815 static QDict *query_dirty_rate(QTestState *who) 816 { 817 return qtest_qmp_assert_success_ref(who, 818 "{ 'execute': 'query-dirty-rate' }"); 819 } 820 821 static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate) 822 { 823 qtest_qmp_assert_success(who, 824 "{ 'execute': 'set-vcpu-dirty-limit'," 825 "'arguments': { " 826 "'dirty-rate': %" PRIu64 " } }", 827 dirtyrate); 828 } 829 830 static void cancel_vcpu_dirty_limit(QTestState *who) 831 { 832 qtest_qmp_assert_success(who, 833 "{ 'execute': 'cancel-vcpu-dirty-limit' }"); 834 } 835 836 static QDict *query_vcpu_dirty_limit(QTestState *who) 837 { 838 QDict *rsp; 839 840 rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }"); 841 g_assert(!qdict_haskey(rsp, "error")); 842 g_assert(qdict_haskey(rsp, "return")); 843 844 return rsp; 845 } 846 847 static bool calc_dirtyrate_ready(QTestState *who) 848 { 849 QDict *rsp_return; 850 const char *status; 851 bool ready; 852 853 rsp_return = query_dirty_rate(who); 854 g_assert(rsp_return); 855 856 status = qdict_get_str(rsp_return, "status"); 857 g_assert(status); 858 ready = g_strcmp0(status, "measuring"); 859 qobject_unref(rsp_return); 860 861 return ready; 862 } 863 864 static void wait_for_calc_dirtyrate_complete(QTestState *who, 865 int64_t time_s) 866 { 867 int max_try_count = 10000; 868 usleep(time_s * 1000000); 869 870 while (!calc_dirtyrate_ready(who) && max_try_count--) { 871 usleep(1000); 872 } 873 874 /* 875 * Set the timeout with 10 s(max_try_count * 1000us), 876 * if dirtyrate measurement not complete, fail test. 877 */ 878 g_assert_cmpint(max_try_count, !=, 0); 879 } 880 881 static int64_t get_dirty_rate(QTestState *who) 882 { 883 QDict *rsp_return; 884 const char *status; 885 QList *rates; 886 const QListEntry *entry; 887 QDict *rate; 888 int64_t dirtyrate; 889 890 rsp_return = query_dirty_rate(who); 891 g_assert(rsp_return); 892 893 status = qdict_get_str(rsp_return, "status"); 894 g_assert(status); 895 g_assert_cmpstr(status, ==, "measured"); 896 897 rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate"); 898 g_assert(rates && !qlist_empty(rates)); 899 900 entry = qlist_first(rates); 901 g_assert(entry); 902 903 rate = qobject_to(QDict, qlist_entry_obj(entry)); 904 g_assert(rate); 905 906 dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1); 907 908 qobject_unref(rsp_return); 909 return dirtyrate; 910 } 911 912 static int64_t get_limit_rate(QTestState *who) 913 { 914 QDict *rsp_return; 915 QList *rates; 916 const QListEntry *entry; 917 QDict *rate; 918 int64_t dirtyrate; 919 920 rsp_return = query_vcpu_dirty_limit(who); 921 g_assert(rsp_return); 922 923 rates = qdict_get_qlist(rsp_return, "return"); 924 g_assert(rates && !qlist_empty(rates)); 925 926 entry = qlist_first(rates); 927 g_assert(entry); 928 929 rate = qobject_to(QDict, qlist_entry_obj(entry)); 930 g_assert(rate); 931 932 dirtyrate = qdict_get_try_int(rate, "limit-rate", -1); 933 934 qobject_unref(rsp_return); 935 return dirtyrate; 936 } 937 938 static QTestState *dirtylimit_start_vm(void) 939 { 940 QTestState *vm = NULL; 941 g_autofree gchar *cmd = NULL; 942 const char *bootpath; 943 944 bootpath = bootfile_create(qtest_get_arch(), tmpfs, false); 945 cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " 946 "-name dirtylimit-test,debug-threads=on " 947 "-m 150M -smp 1 " 948 "-serial file:%s/vm_serial " 949 "-drive file=%s,format=raw ", 950 tmpfs, bootpath); 951 952 vm = qtest_init(cmd); 953 return vm; 954 } 955 956 static void dirtylimit_stop_vm(QTestState *vm) 957 { 958 g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, "vm_serial"); 959 960 qtest_quit(vm); 961 unlink(path); 962 } 963 964 static void test_vcpu_dirty_limit(void) 965 { 966 QTestState *vm; 967 int64_t origin_rate; 968 int64_t quota_rate; 969 int64_t rate ; 970 int max_try_count = 20; 971 int hit = 0; 972 973 /* Start vm for vcpu dirtylimit test */ 974 vm = dirtylimit_start_vm(); 975 976 /* Wait for the first serial output from the vm*/ 977 wait_for_serial("vm_serial"); 978 979 /* Do dirtyrate measurement with calc time equals 1s */ 980 calc_dirty_rate(vm, 1); 981 982 /* Sleep calc time and wait for calc dirtyrate complete */ 983 wait_for_calc_dirtyrate_complete(vm, 1); 984 985 /* Query original dirty page rate */ 986 origin_rate = get_dirty_rate(vm); 987 988 /* VM booted from bootsect should dirty memory steadily */ 989 assert(origin_rate != 0); 990 991 /* Setup quota dirty page rate at half of origin */ 992 quota_rate = origin_rate / 2; 993 994 /* Set dirtylimit */ 995 dirtylimit_set_all(vm, quota_rate); 996 997 /* 998 * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit 999 * works literally 1000 */ 1001 g_assert_cmpint(quota_rate, ==, get_limit_rate(vm)); 1002 1003 /* Sleep a bit to check if it take effect */ 1004 usleep(2000000); 1005 1006 /* 1007 * Check if dirtylimit take effect realistically, set the 1008 * timeout with 20 s(max_try_count * 1s), if dirtylimit 1009 * doesn't take effect, fail test. 1010 */ 1011 while (--max_try_count) { 1012 calc_dirty_rate(vm, 1); 1013 wait_for_calc_dirtyrate_complete(vm, 1); 1014 rate = get_dirty_rate(vm); 1015 1016 /* 1017 * Assume hitting if current rate is less 1018 * than quota rate (within accepting error) 1019 */ 1020 if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 1021 hit = 1; 1022 break; 1023 } 1024 } 1025 1026 g_assert_cmpint(hit, ==, 1); 1027 1028 hit = 0; 1029 max_try_count = 20; 1030 1031 /* Check if dirtylimit cancellation take effect */ 1032 cancel_vcpu_dirty_limit(vm); 1033 while (--max_try_count) { 1034 calc_dirty_rate(vm, 1); 1035 wait_for_calc_dirtyrate_complete(vm, 1); 1036 rate = get_dirty_rate(vm); 1037 1038 /* 1039 * Assume dirtylimit be canceled if current rate is 1040 * greater than quota rate (within accepting error) 1041 */ 1042 if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 1043 hit = 1; 1044 break; 1045 } 1046 } 1047 1048 g_assert_cmpint(hit, ==, 1); 1049 dirtylimit_stop_vm(vm); 1050 } 1051 1052 static void migrate_dirty_limit_wait_showup(QTestState *from, 1053 const int64_t period, 1054 const int64_t value) 1055 { 1056 /* Enable dirty limit capability */ 1057 migrate_set_capability(from, "dirty-limit", true); 1058 1059 /* Set dirty limit parameters */ 1060 migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period); 1061 migrate_set_parameter_int(from, "vcpu-dirty-limit", value); 1062 1063 /* Make sure migrate can't converge */ 1064 migrate_ensure_non_converge(from); 1065 1066 /* To check limit rate after precopy */ 1067 migrate_set_capability(from, "pause-before-switchover", true); 1068 1069 /* Wait for the serial output from the source */ 1070 wait_for_serial("src_serial"); 1071 } 1072 1073 /* 1074 * This test does: 1075 * source destination 1076 * start vm 1077 * start incoming vm 1078 * migrate 1079 * wait dirty limit to begin 1080 * cancel migrate 1081 * cancellation check 1082 * restart incoming vm 1083 * migrate 1084 * wait dirty limit to begin 1085 * wait pre-switchover event 1086 * convergence condition check 1087 * 1088 * And see if dirty limit migration works correctly. 1089 * This test case involves many passes, so it runs in slow mode only. 1090 */ 1091 static void test_dirty_limit(void) 1092 { 1093 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 1094 QTestState *from, *to; 1095 int64_t remaining; 1096 uint64_t throttle_us_per_full; 1097 /* 1098 * We want the test to be stable and as fast as possible. 1099 * E.g., with 1Gb/s bandwidth migration may pass without dirty limit, 1100 * so we need to decrease a bandwidth. 1101 */ 1102 const int64_t dirtylimit_period = 1000, dirtylimit_value = 50; 1103 const int64_t max_bandwidth = 400000000; /* ~400Mb/s */ 1104 const int64_t downtime_limit = 250; /* 250ms */ 1105 /* 1106 * We migrate through unix-socket (> 500Mb/s). 1107 * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s). 1108 * So, we can predict expected_threshold 1109 */ 1110 const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000; 1111 int max_try_count = 10; 1112 MigrateCommon args = { 1113 .start = { 1114 .hide_stderr = true, 1115 .use_dirty_ring = true, 1116 }, 1117 .listen_uri = uri, 1118 .connect_uri = uri, 1119 }; 1120 1121 /* Start src, dst vm */ 1122 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1123 return; 1124 } 1125 1126 /* Prepare for dirty limit migration and wait src vm show up */ 1127 migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value); 1128 1129 /* Start migrate */ 1130 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1131 1132 /* Wait for dirty limit throttle begin */ 1133 throttle_us_per_full = 0; 1134 while (throttle_us_per_full == 0) { 1135 throttle_us_per_full = 1136 read_migrate_property_int(from, 1137 "dirty-limit-throttle-time-per-round"); 1138 usleep(100); 1139 g_assert_false(get_src()->stop_seen); 1140 } 1141 1142 /* Now cancel migrate and wait for dirty limit throttle switch off */ 1143 migrate_cancel(from); 1144 wait_for_migration_status(from, "cancelled", NULL); 1145 1146 /* destination always fails after cancel */ 1147 migration_event_wait(to, "failed"); 1148 qtest_set_expected_status(to, EXIT_FAILURE); 1149 qtest_quit(to); 1150 1151 /* Check if dirty limit throttle switched off, set timeout 1ms */ 1152 do { 1153 throttle_us_per_full = 1154 read_migrate_property_int(from, 1155 "dirty-limit-throttle-time-per-round"); 1156 usleep(100); 1157 g_assert_false(get_src()->stop_seen); 1158 } while (throttle_us_per_full != 0 && --max_try_count); 1159 1160 /* Assert dirty limit is not in service */ 1161 g_assert_cmpint(throttle_us_per_full, ==, 0); 1162 1163 args = (MigrateCommon) { 1164 .start = { 1165 .only_target = true, 1166 .use_dirty_ring = true, 1167 }, 1168 .listen_uri = uri, 1169 .connect_uri = uri, 1170 }; 1171 1172 /* Restart dst vm, src vm already show up so we needn't wait anymore */ 1173 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1174 return; 1175 } 1176 1177 /* Start migrate */ 1178 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1179 1180 /* Wait for dirty limit throttle begin */ 1181 throttle_us_per_full = 0; 1182 while (throttle_us_per_full == 0) { 1183 throttle_us_per_full = 1184 read_migrate_property_int(from, 1185 "dirty-limit-throttle-time-per-round"); 1186 usleep(100); 1187 g_assert_false(get_src()->stop_seen); 1188 } 1189 1190 /* 1191 * The dirty limit rate should equals the return value of 1192 * query-vcpu-dirty-limit if dirty limit cap set 1193 */ 1194 g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from)); 1195 1196 /* Now, we have tested if dirty limit works, let it converge */ 1197 migrate_set_parameter_int(from, "downtime-limit", downtime_limit); 1198 migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth); 1199 1200 /* 1201 * Wait for pre-switchover status to check if migration 1202 * satisfy the convergence condition 1203 */ 1204 wait_for_migration_status(from, "pre-switchover", NULL); 1205 1206 remaining = read_ram_property_int(from, "remaining"); 1207 g_assert_cmpint(remaining, <, 1208 (expected_threshold + expected_threshold / 100)); 1209 1210 migrate_continue(from, "pre-switchover"); 1211 1212 qtest_qmp_eventwait(to, "RESUME"); 1213 1214 wait_for_serial("dest_serial"); 1215 wait_for_migration_complete(from); 1216 1217 migrate_end(from, to, true); 1218 } 1219 1220 static void migration_test_add_precopy_smoke(MigrationTestEnv *env) 1221 { 1222 if (env->is_x86) { 1223 migration_test_add("/migration/precopy/unix/suspend/live", 1224 test_precopy_unix_suspend_live); 1225 migration_test_add("/migration/precopy/unix/suspend/notlive", 1226 test_precopy_unix_suspend_notlive); 1227 } 1228 1229 migration_test_add("/migration/precopy/unix/plain", 1230 test_precopy_unix_plain); 1231 1232 migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain); 1233 migration_test_add("/migration/multifd/tcp/uri/plain/none", 1234 test_multifd_tcp_uri_none); 1235 migration_test_add("/migration/multifd/tcp/plain/cancel", 1236 test_multifd_tcp_cancel); 1237 #ifdef CONFIG_RDMA 1238 migration_test_add("/migration/precopy/rdma/plain", 1239 test_precopy_rdma_plain); 1240 migration_test_add("/migration/precopy/rdma/plain/ipv6", 1241 test_precopy_rdma_plain_ipv6); 1242 #endif 1243 } 1244 1245 void migration_test_add_precopy(MigrationTestEnv *env) 1246 { 1247 tmpfs = env->tmpfs; 1248 1249 migration_test_add_precopy_smoke(env); 1250 1251 if (!env->full_set) { 1252 return; 1253 } 1254 1255 migration_test_add("/migration/precopy/tcp/plain/switchover-ack", 1256 test_precopy_tcp_switchover_ack); 1257 1258 #ifndef _WIN32 1259 migration_test_add("/migration/precopy/fd/tcp", 1260 test_precopy_fd_socket); 1261 migration_test_add("/migration/precopy/fd/file", 1262 test_precopy_fd_file); 1263 #endif 1264 1265 /* 1266 * See explanation why this test is slow on function definition 1267 */ 1268 if (g_test_slow()) { 1269 migration_test_add("/migration/auto_converge", 1270 test_auto_converge); 1271 if (g_str_equal(env->arch, "x86_64") && 1272 env->has_kvm && env->has_dirty_ring) { 1273 migration_test_add("/dirty_limit", 1274 test_dirty_limit); 1275 } 1276 } 1277 migration_test_add("/migration/multifd/tcp/channels/plain/none", 1278 test_multifd_tcp_channels_none); 1279 migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy", 1280 test_multifd_tcp_zero_page_legacy); 1281 migration_test_add("/migration/multifd/tcp/plain/zero-page/none", 1282 test_multifd_tcp_no_zero_page); 1283 if (g_str_equal(env->arch, "x86_64") 1284 && env->has_kvm && env->has_dirty_ring) { 1285 1286 migration_test_add("/migration/dirty_ring", 1287 test_precopy_unix_dirty_ring); 1288 if (qtest_has_machine("pc") && g_test_slow()) { 1289 migration_test_add("/migration/vcpu_dirty_limit", 1290 test_vcpu_dirty_limit); 1291 } 1292 } 1293 1294 /* ensure new status don't go unnoticed */ 1295 assert(MIGRATION_STATUS__MAX == 15); 1296 1297 for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) { 1298 switch (i) { 1299 case MIGRATION_STATUS_DEVICE: /* happens too fast */ 1300 case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */ 1301 case MIGRATION_STATUS_COLO: /* no support in tests */ 1302 case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */ 1303 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1304 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: 1305 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1306 continue; 1307 default: 1308 migration_test_add_suffix("/migration/cancel/src/after/", 1309 MigrationStatus_str(i), 1310 test_cancel_src_after_status); 1311 } 1312 } 1313 } 1314