1 /* 2 * QTest testcase for precopy migration 3 * 4 * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates 5 * based on the vhost-user-test.c that is: 6 * Copyright (c) 2014 Virtual Open Systems Sarl. 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 * 11 */ 12 13 #include "qemu/osdep.h" 14 #include "chardev/char.h" 15 #include "crypto/tlscredspsk.h" 16 #include "libqtest.h" 17 #include "migration/bootfile.h" 18 #include "migration/framework.h" 19 #include "migration/migration-qmp.h" 20 #include "migration/migration-util.h" 21 #include "ppc-util.h" 22 #include "qobject/qlist.h" 23 #include "qapi-types-migration.h" 24 #include "qemu/module.h" 25 #include "qemu/option.h" 26 #include "qemu/range.h" 27 #include "qemu/sockets.h" 28 29 30 /* 31 * Dirtylimit stop working if dirty page rate error 32 * value less than DIRTYLIMIT_TOLERANCE_RANGE 33 */ 34 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 35 36 static char *tmpfs; 37 38 static void test_precopy_unix_plain(void) 39 { 40 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 41 MigrateCommon args = { 42 .listen_uri = uri, 43 .connect_uri = uri, 44 /* 45 * The simplest use case of precopy, covering smoke tests of 46 * get-dirty-log dirty tracking. 47 */ 48 .live = true, 49 }; 50 51 test_precopy_common(&args); 52 } 53 54 static void test_precopy_unix_suspend_live(void) 55 { 56 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 57 MigrateCommon args = { 58 .listen_uri = uri, 59 .connect_uri = uri, 60 /* 61 * despite being live, the test is fast because the src 62 * suspends immediately. 63 */ 64 .live = true, 65 .start.suspend_me = true, 66 }; 67 68 test_precopy_common(&args); 69 } 70 71 static void test_precopy_unix_suspend_notlive(void) 72 { 73 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 74 MigrateCommon args = { 75 .listen_uri = uri, 76 .connect_uri = uri, 77 .start.suspend_me = true, 78 }; 79 80 test_precopy_common(&args); 81 } 82 83 static void test_precopy_unix_dirty_ring(void) 84 { 85 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 86 MigrateCommon args = { 87 .start = { 88 .use_dirty_ring = true, 89 }, 90 .listen_uri = uri, 91 .connect_uri = uri, 92 /* 93 * Besides the precopy/unix basic test, cover dirty ring interface 94 * rather than get-dirty-log. 95 */ 96 .live = true, 97 }; 98 99 test_precopy_common(&args); 100 } 101 102 #ifdef CONFIG_RDMA 103 104 #define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh" 105 static int new_rdma_link(char *buffer) 106 { 107 char cmd[256]; 108 bool verbose = g_getenv("QTEST_LOG"); 109 110 snprintf(cmd, sizeof(cmd), "%s detect %s", RDMA_MIGRATION_HELPER, 111 verbose ? "" : "2>/dev/null"); 112 113 FILE *pipe = popen(cmd, "r"); 114 if (pipe == NULL) { 115 perror("Failed to run script"); 116 return -1; 117 } 118 119 int idx = 0; 120 while (fgets(buffer + idx, 128 - idx, pipe) != NULL) { 121 idx += strlen(buffer); 122 } 123 124 int status = pclose(pipe); 125 if (status == -1) { 126 perror("Error reported by pclose()"); 127 return -1; 128 } else if (WIFEXITED(status)) { 129 return WEXITSTATUS(status); 130 } 131 132 return -1; 133 } 134 135 static void test_precopy_rdma_plain(void) 136 { 137 char buffer[128] = {}; 138 139 if (new_rdma_link(buffer)) { 140 g_test_skip("No rdma link available\n" 141 "# To enable the test:\n" 142 "# Run \'" RDMA_MIGRATION_HELPER " setup\' with root to " 143 "setup a new rdma/rxe link and rerun the test\n" 144 "# Optional: run 'scripts/rdma-migration-helper.sh clean' " 145 "to revert the 'setup'"); 146 return; 147 } 148 149 /* 150 * TODO: query a free port instead of hard code. 151 * 29200=('R'+'D'+'M'+'A')*100 152 **/ 153 g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer); 154 155 MigrateCommon args = { 156 .listen_uri = uri, 157 .connect_uri = uri, 158 }; 159 160 test_precopy_common(&args); 161 } 162 #endif 163 164 static void test_precopy_tcp_plain(void) 165 { 166 MigrateCommon args = { 167 .listen_uri = "tcp:127.0.0.1:0", 168 }; 169 170 test_precopy_common(&args); 171 } 172 173 static void test_precopy_tcp_switchover_ack(void) 174 { 175 MigrateCommon args = { 176 .listen_uri = "tcp:127.0.0.1:0", 177 .start = { 178 .caps[MIGRATION_CAPABILITY_RETURN_PATH] = true, 179 .caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK] = true, 180 }, 181 /* 182 * Source VM must be running in order to consider the switchover ACK 183 * when deciding to do switchover or not. 184 */ 185 .live = true, 186 }; 187 188 test_precopy_common(&args); 189 } 190 191 #ifndef _WIN32 192 static void *migrate_hook_start_fd(QTestState *from, 193 QTestState *to) 194 { 195 int ret; 196 int pair[2]; 197 198 /* Create two connected sockets for migration */ 199 ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair); 200 g_assert_cmpint(ret, ==, 0); 201 202 /* Send the 1st socket to the target */ 203 qtest_qmp_fds_assert_success(to, &pair[0], 1, 204 "{ 'execute': 'getfd'," 205 " 'arguments': { 'fdname': 'fd-mig' }}"); 206 close(pair[0]); 207 208 /* Start incoming migration from the 1st socket */ 209 migrate_incoming_qmp(to, "fd:fd-mig", NULL, "{}"); 210 211 /* Send the 2nd socket to the target */ 212 qtest_qmp_fds_assert_success(from, &pair[1], 1, 213 "{ 'execute': 'getfd'," 214 " 'arguments': { 'fdname': 'fd-mig' }}"); 215 close(pair[1]); 216 217 return NULL; 218 } 219 220 static void migrate_hook_end_fd(QTestState *from, 221 QTestState *to, 222 void *opaque) 223 { 224 QDict *rsp; 225 const char *error_desc; 226 227 /* Test closing fds */ 228 /* 229 * We assume, that QEMU removes named fd from its list, 230 * so this should fail. 231 */ 232 rsp = qtest_qmp(from, 233 "{ 'execute': 'closefd'," 234 " 'arguments': { 'fdname': 'fd-mig' }}"); 235 g_assert_true(qdict_haskey(rsp, "error")); 236 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 237 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 238 qobject_unref(rsp); 239 240 rsp = qtest_qmp(to, 241 "{ 'execute': 'closefd'," 242 " 'arguments': { 'fdname': 'fd-mig' }}"); 243 g_assert_true(qdict_haskey(rsp, "error")); 244 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 245 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 246 qobject_unref(rsp); 247 } 248 249 static void test_precopy_fd_socket(void) 250 { 251 MigrateCommon args = { 252 .listen_uri = "defer", 253 .connect_uri = "fd:fd-mig", 254 .start_hook = migrate_hook_start_fd, 255 .end_hook = migrate_hook_end_fd, 256 }; 257 test_precopy_common(&args); 258 } 259 260 static void *migrate_hook_start_precopy_fd_file(QTestState *from, 261 QTestState *to) 262 { 263 g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME); 264 int src_flags = O_CREAT | O_RDWR; 265 int dst_flags = O_CREAT | O_RDWR; 266 int fds[2]; 267 268 fds[0] = open(file, src_flags, 0660); 269 assert(fds[0] != -1); 270 271 fds[1] = open(file, dst_flags, 0660); 272 assert(fds[1] != -1); 273 274 275 qtest_qmp_fds_assert_success(to, &fds[0], 1, 276 "{ 'execute': 'getfd'," 277 " 'arguments': { 'fdname': 'fd-mig' }}"); 278 279 qtest_qmp_fds_assert_success(from, &fds[1], 1, 280 "{ 'execute': 'getfd'," 281 " 'arguments': { 'fdname': 'fd-mig' }}"); 282 283 close(fds[0]); 284 close(fds[1]); 285 286 return NULL; 287 } 288 289 static void test_precopy_fd_file(void) 290 { 291 MigrateCommon args = { 292 .listen_uri = "defer", 293 .connect_uri = "fd:fd-mig", 294 .start_hook = migrate_hook_start_precopy_fd_file, 295 .end_hook = migrate_hook_end_fd, 296 }; 297 test_file_common(&args, true); 298 } 299 #endif /* _WIN32 */ 300 301 /* 302 * The way auto_converge works, we need to do too many passes to 303 * run this test. Auto_converge logic is only run once every 304 * three iterations, so: 305 * 306 * - 3 iterations without auto_converge enabled 307 * - 3 iterations with pct = 5 308 * - 3 iterations with pct = 30 309 * - 3 iterations with pct = 55 310 * - 3 iterations with pct = 80 311 * - 3 iterations with pct = 95 (max(95, 80 + 25)) 312 * 313 * To make things even worse, we need to run the initial stage at 314 * 3MB/s so we enter autoconverge even when host is (over)loaded. 315 */ 316 static void test_auto_converge(void) 317 { 318 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 319 MigrateStart args = {}; 320 QTestState *from, *to; 321 int64_t percentage; 322 323 /* 324 * We want the test to be stable and as fast as possible. 325 * E.g., with 1Gb/s bandwidth migration may pass without throttling, 326 * so we need to decrease a bandwidth. 327 */ 328 const int64_t init_pct = 5, inc_pct = 25, max_pct = 95; 329 uint64_t prev_dirty_sync_cnt, dirty_sync_cnt; 330 int max_try_count, hit = 0; 331 332 if (migrate_start(&from, &to, uri, &args)) { 333 return; 334 } 335 336 migrate_set_capability(from, "auto-converge", true); 337 migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct); 338 migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct); 339 migrate_set_parameter_int(from, "max-cpu-throttle", max_pct); 340 341 /* 342 * Set the initial parameters so that the migration could not converge 343 * without throttling. 344 */ 345 migrate_ensure_non_converge(from); 346 347 /* To check remaining size after precopy */ 348 migrate_set_capability(from, "pause-before-switchover", true); 349 350 /* Wait for the first serial output from the source */ 351 wait_for_serial("src_serial"); 352 353 migrate_qmp(from, to, uri, NULL, "{}"); 354 355 /* Wait for throttling begins */ 356 percentage = 0; 357 do { 358 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 359 if (percentage != 0) { 360 break; 361 } 362 usleep(20); 363 g_assert_false(get_src()->stop_seen); 364 } while (true); 365 /* The first percentage of throttling should be at least init_pct */ 366 g_assert_cmpint(percentage, >=, init_pct); 367 368 /* 369 * End the loop when the dirty sync count greater than 1. 370 */ 371 while ((dirty_sync_cnt = get_migration_pass(from)) < 2) { 372 usleep(1000 * 1000); 373 } 374 375 prev_dirty_sync_cnt = dirty_sync_cnt; 376 377 /* 378 * The RAMBlock dirty sync count must changes in 5 seconds, here we set 379 * the timeout to 10 seconds to ensure it changes. 380 * 381 * Note that migrate_ensure_non_converge set the max-bandwidth to 3MB/s, 382 * while the qtest mem is >= 100MB, one iteration takes at least 33s (100/3) 383 * to complete; this ensures that the RAMBlock dirty sync occurs. 384 */ 385 max_try_count = 10; 386 while (--max_try_count) { 387 dirty_sync_cnt = get_migration_pass(from); 388 if (dirty_sync_cnt != prev_dirty_sync_cnt) { 389 hit = 1; 390 break; 391 } 392 prev_dirty_sync_cnt = dirty_sync_cnt; 393 sleep(1); 394 } 395 g_assert_cmpint(hit, ==, 1); 396 397 /* Now, when we tested that throttling works, let it converge */ 398 migrate_ensure_converge(from); 399 400 /* 401 * Wait for pre-switchover status to check last throttle percentage 402 * and remaining. These values will be zeroed later 403 */ 404 wait_for_migration_status(from, "pre-switchover", NULL); 405 406 /* The final percentage of throttling shouldn't be greater than max_pct */ 407 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 408 g_assert_cmpint(percentage, <=, max_pct); 409 migrate_continue(from, "pre-switchover"); 410 411 qtest_qmp_eventwait(to, "RESUME"); 412 413 wait_for_serial("dest_serial"); 414 wait_for_migration_complete(from); 415 416 migrate_end(from, to, true); 417 } 418 419 static void * 420 migrate_hook_start_precopy_tcp_multifd(QTestState *from, 421 QTestState *to) 422 { 423 return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 424 } 425 426 static void * 427 migrate_hook_start_precopy_tcp_multifd_zero_page_legacy(QTestState *from, 428 QTestState *to) 429 { 430 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 431 migrate_set_parameter_str(from, "zero-page-detection", "legacy"); 432 return NULL; 433 } 434 435 static void * 436 migrate_hook_start_precopy_tcp_multifd_no_zero_page(QTestState *from, 437 QTestState *to) 438 { 439 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 440 migrate_set_parameter_str(from, "zero-page-detection", "none"); 441 return NULL; 442 } 443 444 static void test_multifd_tcp_uri_none(void) 445 { 446 MigrateCommon args = { 447 .listen_uri = "defer", 448 .start_hook = migrate_hook_start_precopy_tcp_multifd, 449 .start = { 450 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 451 }, 452 /* 453 * Multifd is more complicated than most of the features, it 454 * directly takes guest page buffers when sending, make sure 455 * everything will work alright even if guest page is changing. 456 */ 457 .live = true, 458 }; 459 test_precopy_common(&args); 460 } 461 462 static void test_multifd_tcp_zero_page_legacy(void) 463 { 464 MigrateCommon args = { 465 .listen_uri = "defer", 466 .start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy, 467 .start = { 468 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 469 }, 470 /* 471 * Multifd is more complicated than most of the features, it 472 * directly takes guest page buffers when sending, make sure 473 * everything will work alright even if guest page is changing. 474 */ 475 .live = true, 476 }; 477 test_precopy_common(&args); 478 } 479 480 static void test_multifd_tcp_no_zero_page(void) 481 { 482 MigrateCommon args = { 483 .listen_uri = "defer", 484 .start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page, 485 .start = { 486 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 487 }, 488 /* 489 * Multifd is more complicated than most of the features, it 490 * directly takes guest page buffers when sending, make sure 491 * everything will work alright even if guest page is changing. 492 */ 493 .live = true, 494 }; 495 test_precopy_common(&args); 496 } 497 498 static void test_multifd_tcp_channels_none(void) 499 { 500 MigrateCommon args = { 501 .listen_uri = "defer", 502 .start_hook = migrate_hook_start_precopy_tcp_multifd, 503 .live = true, 504 .start = { 505 .caps[MIGRATION_CAPABILITY_MULTIFD] = true, 506 }, 507 .connect_channels = ("[ { 'channel-type': 'main'," 508 " 'addr': { 'transport': 'socket'," 509 " 'type': 'inet'," 510 " 'host': '127.0.0.1'," 511 " 'port': '0' } } ]"), 512 }; 513 test_precopy_common(&args); 514 } 515 516 /* 517 * This test does: 518 * source target 519 * migrate_incoming 520 * migrate 521 * migrate_cancel 522 * launch another target 523 * migrate 524 * 525 * And see that it works 526 */ 527 static void test_multifd_tcp_cancel(void) 528 { 529 MigrateStart args = { 530 .hide_stderr = true, 531 }; 532 QTestState *from, *to, *to2; 533 534 if (migrate_start(&from, &to, "defer", &args)) { 535 return; 536 } 537 538 migrate_ensure_non_converge(from); 539 migrate_prepare_for_dirty_mem(from); 540 541 migrate_set_parameter_int(from, "multifd-channels", 16); 542 migrate_set_parameter_int(to, "multifd-channels", 16); 543 544 migrate_set_capability(from, "multifd", true); 545 migrate_set_capability(to, "multifd", true); 546 547 /* Start incoming migration from the 1st socket */ 548 migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}"); 549 550 /* Wait for the first serial output from the source */ 551 wait_for_serial("src_serial"); 552 553 migrate_qmp(from, to, NULL, NULL, "{}"); 554 555 migrate_wait_for_dirty_mem(from, to); 556 557 migrate_cancel(from); 558 559 /* Make sure QEMU process "to" exited */ 560 qtest_set_expected_status(to, EXIT_FAILURE); 561 qtest_wait_qemu(to); 562 qtest_quit(to); 563 564 /* 565 * Ensure the source QEMU finishes its cancellation process before we 566 * proceed with the setup of the next migration. The migrate_start() 567 * function and others might want to interact with the source in a way that 568 * is not possible while the migration is not canceled properly. For 569 * example, setting migration capabilities when the migration is still 570 * running leads to an error. 571 */ 572 wait_for_migration_status(from, "cancelled", NULL); 573 574 args = (MigrateStart){ 575 .only_target = true, 576 }; 577 578 if (migrate_start(&from, &to2, "defer", &args)) { 579 return; 580 } 581 582 migrate_set_parameter_int(to2, "multifd-channels", 16); 583 584 migrate_set_capability(to2, "multifd", true); 585 586 /* Start incoming migration from the 1st socket */ 587 migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", NULL, "{}"); 588 589 migrate_ensure_non_converge(from); 590 591 migrate_qmp(from, to2, NULL, NULL, "{}"); 592 593 migrate_wait_for_dirty_mem(from, to2); 594 595 migrate_ensure_converge(from); 596 597 wait_for_stop(from, get_src()); 598 qtest_qmp_eventwait(to2, "RESUME"); 599 600 wait_for_serial("dest_serial"); 601 wait_for_migration_complete(from); 602 migrate_end(from, to2, true); 603 } 604 605 static void test_cancel_src_after_failed(QTestState *from, QTestState *to, 606 const char *uri, const char *phase) 607 { 608 /* 609 * No migrate_incoming_qmp() at the start to force source into 610 * failed state during migrate_qmp(). 611 */ 612 613 wait_for_serial("src_serial"); 614 migrate_ensure_converge(from); 615 616 migrate_qmp(from, to, uri, NULL, "{}"); 617 618 migration_event_wait(from, phase); 619 migrate_cancel(from); 620 621 /* cancelling will not move the migration out of 'failed' */ 622 623 wait_for_migration_status(from, "failed", 624 (const char * []) { "completed", NULL }); 625 626 /* 627 * Not waiting for the destination because it never started 628 * migration. 629 */ 630 } 631 632 static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to, 633 const char *uri, const char *phase) 634 { 635 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 636 637 wait_for_serial("src_serial"); 638 migrate_ensure_converge(from); 639 640 migrate_qmp(from, to, uri, NULL, "{}"); 641 642 /* To move to cancelled/cancelling */ 643 migrate_cancel(from); 644 migration_event_wait(from, phase); 645 646 /* The migrate_cancel under test */ 647 migrate_cancel(from); 648 649 wait_for_migration_status(from, "cancelled", 650 (const char * []) { "completed", NULL }); 651 652 wait_for_migration_status(to, "failed", 653 (const char * []) { "completed", NULL }); 654 } 655 656 static void test_cancel_src_after_complete(QTestState *from, QTestState *to, 657 const char *uri, const char *phase) 658 { 659 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 660 661 wait_for_serial("src_serial"); 662 migrate_ensure_converge(from); 663 664 migrate_qmp(from, to, uri, NULL, "{}"); 665 666 migration_event_wait(from, phase); 667 migrate_cancel(from); 668 669 /* 670 * qmp_migrate_cancel() exits early if migration is not running 671 * anymore, the status will not change to cancelled. 672 */ 673 wait_for_migration_complete(from); 674 wait_for_migration_complete(to); 675 } 676 677 static void test_cancel_src_after_none(QTestState *from, QTestState *to, 678 const char *uri, const char *phase) 679 { 680 /* 681 * Test that cancelling without a migration happening does not 682 * affect subsequent migrations 683 */ 684 migrate_cancel(to); 685 686 wait_for_serial("src_serial"); 687 migrate_cancel(from); 688 689 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 690 691 migrate_ensure_converge(from); 692 migrate_qmp(from, to, uri, NULL, "{}"); 693 694 wait_for_migration_complete(from); 695 wait_for_migration_complete(to); 696 } 697 698 static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to, 699 const char *uri, const char *phase) 700 { 701 migrate_set_capability(from, "pause-before-switchover", true); 702 migrate_set_capability(to, "pause-before-switchover", true); 703 704 migrate_set_capability(from, "multifd", true); 705 migrate_set_capability(to, "multifd", true); 706 707 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 708 709 wait_for_serial("src_serial"); 710 migrate_ensure_converge(from); 711 712 migrate_qmp(from, to, uri, NULL, "{}"); 713 714 migration_event_wait(from, phase); 715 migrate_cancel(from); 716 migration_event_wait(from, "cancelling"); 717 718 wait_for_migration_status(from, "cancelled", 719 (const char * []) { "completed", NULL }); 720 721 wait_for_migration_status(to, "failed", 722 (const char * []) { "completed", NULL }); 723 } 724 725 static void test_cancel_src_after_status(void *opaque) 726 { 727 const char *test_path = opaque; 728 g_autofree char *phase = g_path_get_basename(test_path); 729 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 730 QTestState *from, *to; 731 MigrateStart args = { 732 .hide_stderr = true, 733 }; 734 735 if (migrate_start(&from, &to, "defer", &args)) { 736 return; 737 } 738 739 if (g_str_equal(phase, "cancelling") || 740 g_str_equal(phase, "cancelled")) { 741 test_cancel_src_after_cancelled(from, to, uri, phase); 742 743 } else if (g_str_equal(phase, "completed")) { 744 test_cancel_src_after_complete(from, to, uri, phase); 745 746 } else if (g_str_equal(phase, "failed")) { 747 test_cancel_src_after_failed(from, to, uri, phase); 748 749 } else if (g_str_equal(phase, "none")) { 750 test_cancel_src_after_none(from, to, uri, phase); 751 752 } else { 753 /* any state that comes before pre-switchover */ 754 test_cancel_src_pre_switchover(from, to, uri, phase); 755 } 756 757 migrate_end(from, to, false); 758 } 759 760 static void calc_dirty_rate(QTestState *who, uint64_t calc_time) 761 { 762 qtest_qmp_assert_success(who, 763 "{ 'execute': 'calc-dirty-rate'," 764 "'arguments': { " 765 "'calc-time': %" PRIu64 "," 766 "'mode': 'dirty-ring' }}", 767 calc_time); 768 } 769 770 static QDict *query_dirty_rate(QTestState *who) 771 { 772 return qtest_qmp_assert_success_ref(who, 773 "{ 'execute': 'query-dirty-rate' }"); 774 } 775 776 static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate) 777 { 778 qtest_qmp_assert_success(who, 779 "{ 'execute': 'set-vcpu-dirty-limit'," 780 "'arguments': { " 781 "'dirty-rate': %" PRIu64 " } }", 782 dirtyrate); 783 } 784 785 static void cancel_vcpu_dirty_limit(QTestState *who) 786 { 787 qtest_qmp_assert_success(who, 788 "{ 'execute': 'cancel-vcpu-dirty-limit' }"); 789 } 790 791 static QDict *query_vcpu_dirty_limit(QTestState *who) 792 { 793 QDict *rsp; 794 795 rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }"); 796 g_assert(!qdict_haskey(rsp, "error")); 797 g_assert(qdict_haskey(rsp, "return")); 798 799 return rsp; 800 } 801 802 static bool calc_dirtyrate_ready(QTestState *who) 803 { 804 QDict *rsp_return; 805 const char *status; 806 bool ready; 807 808 rsp_return = query_dirty_rate(who); 809 g_assert(rsp_return); 810 811 status = qdict_get_str(rsp_return, "status"); 812 g_assert(status); 813 ready = g_strcmp0(status, "measuring"); 814 qobject_unref(rsp_return); 815 816 return ready; 817 } 818 819 static void wait_for_calc_dirtyrate_complete(QTestState *who, 820 int64_t time_s) 821 { 822 int max_try_count = 10000; 823 usleep(time_s * 1000000); 824 825 while (!calc_dirtyrate_ready(who) && max_try_count--) { 826 usleep(1000); 827 } 828 829 /* 830 * Set the timeout with 10 s(max_try_count * 1000us), 831 * if dirtyrate measurement not complete, fail test. 832 */ 833 g_assert_cmpint(max_try_count, !=, 0); 834 } 835 836 static int64_t get_dirty_rate(QTestState *who) 837 { 838 QDict *rsp_return; 839 const char *status; 840 QList *rates; 841 const QListEntry *entry; 842 QDict *rate; 843 int64_t dirtyrate; 844 845 rsp_return = query_dirty_rate(who); 846 g_assert(rsp_return); 847 848 status = qdict_get_str(rsp_return, "status"); 849 g_assert(status); 850 g_assert_cmpstr(status, ==, "measured"); 851 852 rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate"); 853 g_assert(rates && !qlist_empty(rates)); 854 855 entry = qlist_first(rates); 856 g_assert(entry); 857 858 rate = qobject_to(QDict, qlist_entry_obj(entry)); 859 g_assert(rate); 860 861 dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1); 862 863 qobject_unref(rsp_return); 864 return dirtyrate; 865 } 866 867 static int64_t get_limit_rate(QTestState *who) 868 { 869 QDict *rsp_return; 870 QList *rates; 871 const QListEntry *entry; 872 QDict *rate; 873 int64_t dirtyrate; 874 875 rsp_return = query_vcpu_dirty_limit(who); 876 g_assert(rsp_return); 877 878 rates = qdict_get_qlist(rsp_return, "return"); 879 g_assert(rates && !qlist_empty(rates)); 880 881 entry = qlist_first(rates); 882 g_assert(entry); 883 884 rate = qobject_to(QDict, qlist_entry_obj(entry)); 885 g_assert(rate); 886 887 dirtyrate = qdict_get_try_int(rate, "limit-rate", -1); 888 889 qobject_unref(rsp_return); 890 return dirtyrate; 891 } 892 893 static QTestState *dirtylimit_start_vm(void) 894 { 895 QTestState *vm = NULL; 896 g_autofree gchar *cmd = NULL; 897 const char *bootpath; 898 899 bootpath = bootfile_create(qtest_get_arch(), tmpfs, false); 900 cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " 901 "-name dirtylimit-test,debug-threads=on " 902 "-m 150M -smp 1 " 903 "-serial file:%s/vm_serial " 904 "-drive file=%s,format=raw ", 905 tmpfs, bootpath); 906 907 vm = qtest_init(cmd); 908 return vm; 909 } 910 911 static void dirtylimit_stop_vm(QTestState *vm) 912 { 913 g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, "vm_serial"); 914 915 qtest_quit(vm); 916 unlink(path); 917 } 918 919 static void test_vcpu_dirty_limit(void) 920 { 921 QTestState *vm; 922 int64_t origin_rate; 923 int64_t quota_rate; 924 int64_t rate ; 925 int max_try_count = 20; 926 int hit = 0; 927 928 /* Start vm for vcpu dirtylimit test */ 929 vm = dirtylimit_start_vm(); 930 931 /* Wait for the first serial output from the vm*/ 932 wait_for_serial("vm_serial"); 933 934 /* Do dirtyrate measurement with calc time equals 1s */ 935 calc_dirty_rate(vm, 1); 936 937 /* Sleep calc time and wait for calc dirtyrate complete */ 938 wait_for_calc_dirtyrate_complete(vm, 1); 939 940 /* Query original dirty page rate */ 941 origin_rate = get_dirty_rate(vm); 942 943 /* VM booted from bootsect should dirty memory steadily */ 944 assert(origin_rate != 0); 945 946 /* Setup quota dirty page rate at half of origin */ 947 quota_rate = origin_rate / 2; 948 949 /* Set dirtylimit */ 950 dirtylimit_set_all(vm, quota_rate); 951 952 /* 953 * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit 954 * works literally 955 */ 956 g_assert_cmpint(quota_rate, ==, get_limit_rate(vm)); 957 958 /* Sleep a bit to check if it take effect */ 959 usleep(2000000); 960 961 /* 962 * Check if dirtylimit take effect realistically, set the 963 * timeout with 20 s(max_try_count * 1s), if dirtylimit 964 * doesn't take effect, fail test. 965 */ 966 while (--max_try_count) { 967 calc_dirty_rate(vm, 1); 968 wait_for_calc_dirtyrate_complete(vm, 1); 969 rate = get_dirty_rate(vm); 970 971 /* 972 * Assume hitting if current rate is less 973 * than quota rate (within accepting error) 974 */ 975 if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 976 hit = 1; 977 break; 978 } 979 } 980 981 g_assert_cmpint(hit, ==, 1); 982 983 hit = 0; 984 max_try_count = 20; 985 986 /* Check if dirtylimit cancellation take effect */ 987 cancel_vcpu_dirty_limit(vm); 988 while (--max_try_count) { 989 calc_dirty_rate(vm, 1); 990 wait_for_calc_dirtyrate_complete(vm, 1); 991 rate = get_dirty_rate(vm); 992 993 /* 994 * Assume dirtylimit be canceled if current rate is 995 * greater than quota rate (within accepting error) 996 */ 997 if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 998 hit = 1; 999 break; 1000 } 1001 } 1002 1003 g_assert_cmpint(hit, ==, 1); 1004 dirtylimit_stop_vm(vm); 1005 } 1006 1007 static void migrate_dirty_limit_wait_showup(QTestState *from, 1008 const int64_t period, 1009 const int64_t value) 1010 { 1011 /* Enable dirty limit capability */ 1012 migrate_set_capability(from, "dirty-limit", true); 1013 1014 /* Set dirty limit parameters */ 1015 migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period); 1016 migrate_set_parameter_int(from, "vcpu-dirty-limit", value); 1017 1018 /* Make sure migrate can't converge */ 1019 migrate_ensure_non_converge(from); 1020 1021 /* To check limit rate after precopy */ 1022 migrate_set_capability(from, "pause-before-switchover", true); 1023 1024 /* Wait for the serial output from the source */ 1025 wait_for_serial("src_serial"); 1026 } 1027 1028 /* 1029 * This test does: 1030 * source destination 1031 * start vm 1032 * start incoming vm 1033 * migrate 1034 * wait dirty limit to begin 1035 * cancel migrate 1036 * cancellation check 1037 * restart incoming vm 1038 * migrate 1039 * wait dirty limit to begin 1040 * wait pre-switchover event 1041 * convergence condition check 1042 * 1043 * And see if dirty limit migration works correctly. 1044 * This test case involves many passes, so it runs in slow mode only. 1045 */ 1046 static void test_dirty_limit(void) 1047 { 1048 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 1049 QTestState *from, *to; 1050 int64_t remaining; 1051 uint64_t throttle_us_per_full; 1052 /* 1053 * We want the test to be stable and as fast as possible. 1054 * E.g., with 1Gb/s bandwidth migration may pass without dirty limit, 1055 * so we need to decrease a bandwidth. 1056 */ 1057 const int64_t dirtylimit_period = 1000, dirtylimit_value = 50; 1058 const int64_t max_bandwidth = 400000000; /* ~400Mb/s */ 1059 const int64_t downtime_limit = 250; /* 250ms */ 1060 /* 1061 * We migrate through unix-socket (> 500Mb/s). 1062 * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s). 1063 * So, we can predict expected_threshold 1064 */ 1065 const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000; 1066 int max_try_count = 10; 1067 MigrateCommon args = { 1068 .start = { 1069 .hide_stderr = true, 1070 .use_dirty_ring = true, 1071 }, 1072 .listen_uri = uri, 1073 .connect_uri = uri, 1074 }; 1075 1076 /* Start src, dst vm */ 1077 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1078 return; 1079 } 1080 1081 /* Prepare for dirty limit migration and wait src vm show up */ 1082 migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value); 1083 1084 /* Start migrate */ 1085 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1086 1087 /* Wait for dirty limit throttle begin */ 1088 throttle_us_per_full = 0; 1089 while (throttle_us_per_full == 0) { 1090 throttle_us_per_full = 1091 read_migrate_property_int(from, 1092 "dirty-limit-throttle-time-per-round"); 1093 usleep(100); 1094 g_assert_false(get_src()->stop_seen); 1095 } 1096 1097 /* Now cancel migrate and wait for dirty limit throttle switch off */ 1098 migrate_cancel(from); 1099 wait_for_migration_status(from, "cancelled", NULL); 1100 1101 /* destination always fails after cancel */ 1102 migration_event_wait(to, "failed"); 1103 qtest_set_expected_status(to, EXIT_FAILURE); 1104 qtest_quit(to); 1105 1106 /* Check if dirty limit throttle switched off, set timeout 1ms */ 1107 do { 1108 throttle_us_per_full = 1109 read_migrate_property_int(from, 1110 "dirty-limit-throttle-time-per-round"); 1111 usleep(100); 1112 g_assert_false(get_src()->stop_seen); 1113 } while (throttle_us_per_full != 0 && --max_try_count); 1114 1115 /* Assert dirty limit is not in service */ 1116 g_assert_cmpint(throttle_us_per_full, ==, 0); 1117 1118 args = (MigrateCommon) { 1119 .start = { 1120 .only_target = true, 1121 .use_dirty_ring = true, 1122 }, 1123 .listen_uri = uri, 1124 .connect_uri = uri, 1125 }; 1126 1127 /* Restart dst vm, src vm already show up so we needn't wait anymore */ 1128 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1129 return; 1130 } 1131 1132 /* Start migrate */ 1133 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1134 1135 /* Wait for dirty limit throttle begin */ 1136 throttle_us_per_full = 0; 1137 while (throttle_us_per_full == 0) { 1138 throttle_us_per_full = 1139 read_migrate_property_int(from, 1140 "dirty-limit-throttle-time-per-round"); 1141 usleep(100); 1142 g_assert_false(get_src()->stop_seen); 1143 } 1144 1145 /* 1146 * The dirty limit rate should equals the return value of 1147 * query-vcpu-dirty-limit if dirty limit cap set 1148 */ 1149 g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from)); 1150 1151 /* Now, we have tested if dirty limit works, let it converge */ 1152 migrate_set_parameter_int(from, "downtime-limit", downtime_limit); 1153 migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth); 1154 1155 /* 1156 * Wait for pre-switchover status to check if migration 1157 * satisfy the convergence condition 1158 */ 1159 wait_for_migration_status(from, "pre-switchover", NULL); 1160 1161 remaining = read_ram_property_int(from, "remaining"); 1162 g_assert_cmpint(remaining, <, 1163 (expected_threshold + expected_threshold / 100)); 1164 1165 migrate_continue(from, "pre-switchover"); 1166 1167 qtest_qmp_eventwait(to, "RESUME"); 1168 1169 wait_for_serial("dest_serial"); 1170 wait_for_migration_complete(from); 1171 1172 migrate_end(from, to, true); 1173 } 1174 1175 static void migration_test_add_precopy_smoke(MigrationTestEnv *env) 1176 { 1177 if (env->is_x86) { 1178 migration_test_add("/migration/precopy/unix/suspend/live", 1179 test_precopy_unix_suspend_live); 1180 migration_test_add("/migration/precopy/unix/suspend/notlive", 1181 test_precopy_unix_suspend_notlive); 1182 } 1183 1184 migration_test_add("/migration/precopy/unix/plain", 1185 test_precopy_unix_plain); 1186 1187 migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain); 1188 migration_test_add("/migration/multifd/tcp/uri/plain/none", 1189 test_multifd_tcp_uri_none); 1190 migration_test_add("/migration/multifd/tcp/plain/cancel", 1191 test_multifd_tcp_cancel); 1192 #ifdef CONFIG_RDMA 1193 migration_test_add("/migration/precopy/rdma/plain", 1194 test_precopy_rdma_plain); 1195 #endif 1196 } 1197 1198 void migration_test_add_precopy(MigrationTestEnv *env) 1199 { 1200 tmpfs = env->tmpfs; 1201 1202 migration_test_add_precopy_smoke(env); 1203 1204 if (!env->full_set) { 1205 return; 1206 } 1207 1208 migration_test_add("/migration/precopy/tcp/plain/switchover-ack", 1209 test_precopy_tcp_switchover_ack); 1210 1211 #ifndef _WIN32 1212 migration_test_add("/migration/precopy/fd/tcp", 1213 test_precopy_fd_socket); 1214 migration_test_add("/migration/precopy/fd/file", 1215 test_precopy_fd_file); 1216 #endif 1217 1218 /* 1219 * See explanation why this test is slow on function definition 1220 */ 1221 if (g_test_slow()) { 1222 migration_test_add("/migration/auto_converge", 1223 test_auto_converge); 1224 if (g_str_equal(env->arch, "x86_64") && 1225 env->has_kvm && env->has_dirty_ring) { 1226 migration_test_add("/dirty_limit", 1227 test_dirty_limit); 1228 } 1229 } 1230 migration_test_add("/migration/multifd/tcp/channels/plain/none", 1231 test_multifd_tcp_channels_none); 1232 migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy", 1233 test_multifd_tcp_zero_page_legacy); 1234 migration_test_add("/migration/multifd/tcp/plain/zero-page/none", 1235 test_multifd_tcp_no_zero_page); 1236 if (g_str_equal(env->arch, "x86_64") 1237 && env->has_kvm && env->has_dirty_ring) { 1238 1239 migration_test_add("/migration/dirty_ring", 1240 test_precopy_unix_dirty_ring); 1241 if (qtest_has_machine("pc") && g_test_slow()) { 1242 migration_test_add("/migration/vcpu_dirty_limit", 1243 test_vcpu_dirty_limit); 1244 } 1245 } 1246 1247 /* ensure new status don't go unnoticed */ 1248 assert(MIGRATION_STATUS__MAX == 15); 1249 1250 for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) { 1251 switch (i) { 1252 case MIGRATION_STATUS_DEVICE: /* happens too fast */ 1253 case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */ 1254 case MIGRATION_STATUS_COLO: /* no support in tests */ 1255 case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */ 1256 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1257 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: 1258 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1259 continue; 1260 default: 1261 migration_test_add_suffix("/migration/cancel/src/after/", 1262 MigrationStatus_str(i), 1263 test_cancel_src_after_status); 1264 } 1265 } 1266 } 1267