1 /* 2 * QTest testcase for precopy migration 3 * 4 * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates 5 * based on the vhost-user-test.c that is: 6 * Copyright (c) 2014 Virtual Open Systems Sarl. 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 * 11 */ 12 13 #include "qemu/osdep.h" 14 #include "chardev/char.h" 15 #include "crypto/tlscredspsk.h" 16 #include "libqtest.h" 17 #include "migration/bootfile.h" 18 #include "migration/framework.h" 19 #include "migration/migration-qmp.h" 20 #include "migration/migration-util.h" 21 #include "ppc-util.h" 22 #include "qobject/qlist.h" 23 #include "qapi-types-migration.h" 24 #include "qemu/module.h" 25 #include "qemu/option.h" 26 #include "qemu/range.h" 27 #include "qemu/sockets.h" 28 29 30 /* 31 * Dirtylimit stop working if dirty page rate error 32 * value less than DIRTYLIMIT_TOLERANCE_RANGE 33 */ 34 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 35 36 static char *tmpfs; 37 38 static void test_precopy_unix_plain(void) 39 { 40 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 41 MigrateCommon args = { 42 .listen_uri = uri, 43 .connect_uri = uri, 44 /* 45 * The simplest use case of precopy, covering smoke tests of 46 * get-dirty-log dirty tracking. 47 */ 48 .live = true, 49 }; 50 51 test_precopy_common(&args); 52 } 53 54 static void test_precopy_unix_suspend_live(void) 55 { 56 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 57 MigrateCommon args = { 58 .listen_uri = uri, 59 .connect_uri = uri, 60 /* 61 * despite being live, the test is fast because the src 62 * suspends immediately. 63 */ 64 .live = true, 65 .start.suspend_me = true, 66 }; 67 68 test_precopy_common(&args); 69 } 70 71 static void test_precopy_unix_suspend_notlive(void) 72 { 73 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 74 MigrateCommon args = { 75 .listen_uri = uri, 76 .connect_uri = uri, 77 .start.suspend_me = true, 78 }; 79 80 test_precopy_common(&args); 81 } 82 83 static void test_precopy_unix_dirty_ring(void) 84 { 85 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 86 MigrateCommon args = { 87 .start = { 88 .use_dirty_ring = true, 89 }, 90 .listen_uri = uri, 91 .connect_uri = uri, 92 /* 93 * Besides the precopy/unix basic test, cover dirty ring interface 94 * rather than get-dirty-log. 95 */ 96 .live = true, 97 }; 98 99 test_precopy_common(&args); 100 } 101 102 #ifdef CONFIG_RDMA 103 104 #define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh" 105 static int new_rdma_link(char *buffer) 106 { 107 char cmd[256]; 108 bool verbose = g_getenv("QTEST_LOG"); 109 110 snprintf(cmd, sizeof(cmd), "%s detect %s", RDMA_MIGRATION_HELPER, 111 verbose ? "" : "2>/dev/null"); 112 113 FILE *pipe = popen(cmd, "r"); 114 if (pipe == NULL) { 115 perror("Failed to run script"); 116 return -1; 117 } 118 119 int idx = 0; 120 while (fgets(buffer + idx, 128 - idx, pipe) != NULL) { 121 idx += strlen(buffer); 122 } 123 124 int status = pclose(pipe); 125 if (status == -1) { 126 perror("Error reported by pclose()"); 127 return -1; 128 } else if (WIFEXITED(status)) { 129 return WEXITSTATUS(status); 130 } 131 132 return -1; 133 } 134 135 static void test_precopy_rdma_plain(void) 136 { 137 char buffer[128] = {}; 138 139 if (new_rdma_link(buffer)) { 140 g_test_skip("No rdma link available\n" 141 "# To enable the test:\n" 142 "# Run \'" RDMA_MIGRATION_HELPER " setup\' with root to " 143 "setup a new rdma/rxe link and rerun the test\n" 144 "# Optional: run 'scripts/rdma-migration-helper.sh clean' " 145 "to revert the 'setup'"); 146 return; 147 } 148 149 /* 150 * TODO: query a free port instead of hard code. 151 * 29200=('R'+'D'+'M'+'A')*100 152 **/ 153 g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer); 154 155 MigrateCommon args = { 156 .listen_uri = uri, 157 .connect_uri = uri, 158 }; 159 160 test_precopy_common(&args); 161 } 162 #endif 163 164 static void test_precopy_tcp_plain(void) 165 { 166 MigrateCommon args = { 167 .listen_uri = "tcp:127.0.0.1:0", 168 }; 169 170 test_precopy_common(&args); 171 } 172 173 static void *migrate_hook_start_switchover_ack(QTestState *from, QTestState *to) 174 { 175 176 migrate_set_capability(from, "return-path", true); 177 migrate_set_capability(to, "return-path", true); 178 179 migrate_set_capability(from, "switchover-ack", true); 180 migrate_set_capability(to, "switchover-ack", true); 181 182 return NULL; 183 } 184 185 static void test_precopy_tcp_switchover_ack(void) 186 { 187 MigrateCommon args = { 188 .listen_uri = "tcp:127.0.0.1:0", 189 .start_hook = migrate_hook_start_switchover_ack, 190 /* 191 * Source VM must be running in order to consider the switchover ACK 192 * when deciding to do switchover or not. 193 */ 194 .live = true, 195 }; 196 197 test_precopy_common(&args); 198 } 199 200 #ifndef _WIN32 201 static void *migrate_hook_start_fd(QTestState *from, 202 QTestState *to) 203 { 204 int ret; 205 int pair[2]; 206 207 /* Create two connected sockets for migration */ 208 ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair); 209 g_assert_cmpint(ret, ==, 0); 210 211 /* Send the 1st socket to the target */ 212 qtest_qmp_fds_assert_success(to, &pair[0], 1, 213 "{ 'execute': 'getfd'," 214 " 'arguments': { 'fdname': 'fd-mig' }}"); 215 close(pair[0]); 216 217 /* Start incoming migration from the 1st socket */ 218 migrate_incoming_qmp(to, "fd:fd-mig", NULL, "{}"); 219 220 /* Send the 2nd socket to the target */ 221 qtest_qmp_fds_assert_success(from, &pair[1], 1, 222 "{ 'execute': 'getfd'," 223 " 'arguments': { 'fdname': 'fd-mig' }}"); 224 close(pair[1]); 225 226 return NULL; 227 } 228 229 static void migrate_hook_end_fd(QTestState *from, 230 QTestState *to, 231 void *opaque) 232 { 233 QDict *rsp; 234 const char *error_desc; 235 236 /* Test closing fds */ 237 /* 238 * We assume, that QEMU removes named fd from its list, 239 * so this should fail. 240 */ 241 rsp = qtest_qmp(from, 242 "{ 'execute': 'closefd'," 243 " 'arguments': { 'fdname': 'fd-mig' }}"); 244 g_assert_true(qdict_haskey(rsp, "error")); 245 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 246 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 247 qobject_unref(rsp); 248 249 rsp = qtest_qmp(to, 250 "{ 'execute': 'closefd'," 251 " 'arguments': { 'fdname': 'fd-mig' }}"); 252 g_assert_true(qdict_haskey(rsp, "error")); 253 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 254 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 255 qobject_unref(rsp); 256 } 257 258 static void test_precopy_fd_socket(void) 259 { 260 MigrateCommon args = { 261 .listen_uri = "defer", 262 .connect_uri = "fd:fd-mig", 263 .start_hook = migrate_hook_start_fd, 264 .end_hook = migrate_hook_end_fd, 265 }; 266 test_precopy_common(&args); 267 } 268 269 static void *migrate_hook_start_precopy_fd_file(QTestState *from, 270 QTestState *to) 271 { 272 g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME); 273 int src_flags = O_CREAT | O_RDWR; 274 int dst_flags = O_CREAT | O_RDWR; 275 int fds[2]; 276 277 fds[0] = open(file, src_flags, 0660); 278 assert(fds[0] != -1); 279 280 fds[1] = open(file, dst_flags, 0660); 281 assert(fds[1] != -1); 282 283 284 qtest_qmp_fds_assert_success(to, &fds[0], 1, 285 "{ 'execute': 'getfd'," 286 " 'arguments': { 'fdname': 'fd-mig' }}"); 287 288 qtest_qmp_fds_assert_success(from, &fds[1], 1, 289 "{ 'execute': 'getfd'," 290 " 'arguments': { 'fdname': 'fd-mig' }}"); 291 292 close(fds[0]); 293 close(fds[1]); 294 295 return NULL; 296 } 297 298 static void test_precopy_fd_file(void) 299 { 300 MigrateCommon args = { 301 .listen_uri = "defer", 302 .connect_uri = "fd:fd-mig", 303 .start_hook = migrate_hook_start_precopy_fd_file, 304 .end_hook = migrate_hook_end_fd, 305 }; 306 test_file_common(&args, true); 307 } 308 #endif /* _WIN32 */ 309 310 /* 311 * The way auto_converge works, we need to do too many passes to 312 * run this test. Auto_converge logic is only run once every 313 * three iterations, so: 314 * 315 * - 3 iterations without auto_converge enabled 316 * - 3 iterations with pct = 5 317 * - 3 iterations with pct = 30 318 * - 3 iterations with pct = 55 319 * - 3 iterations with pct = 80 320 * - 3 iterations with pct = 95 (max(95, 80 + 25)) 321 * 322 * To make things even worse, we need to run the initial stage at 323 * 3MB/s so we enter autoconverge even when host is (over)loaded. 324 */ 325 static void test_auto_converge(void) 326 { 327 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 328 MigrateStart args = {}; 329 QTestState *from, *to; 330 int64_t percentage; 331 332 /* 333 * We want the test to be stable and as fast as possible. 334 * E.g., with 1Gb/s bandwidth migration may pass without throttling, 335 * so we need to decrease a bandwidth. 336 */ 337 const int64_t init_pct = 5, inc_pct = 25, max_pct = 95; 338 uint64_t prev_dirty_sync_cnt, dirty_sync_cnt; 339 int max_try_count, hit = 0; 340 341 if (migrate_start(&from, &to, uri, &args)) { 342 return; 343 } 344 345 migrate_set_capability(from, "auto-converge", true); 346 migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct); 347 migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct); 348 migrate_set_parameter_int(from, "max-cpu-throttle", max_pct); 349 350 /* 351 * Set the initial parameters so that the migration could not converge 352 * without throttling. 353 */ 354 migrate_ensure_non_converge(from); 355 356 /* To check remaining size after precopy */ 357 migrate_set_capability(from, "pause-before-switchover", true); 358 359 /* Wait for the first serial output from the source */ 360 wait_for_serial("src_serial"); 361 362 migrate_qmp(from, to, uri, NULL, "{}"); 363 364 /* Wait for throttling begins */ 365 percentage = 0; 366 do { 367 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 368 if (percentage != 0) { 369 break; 370 } 371 usleep(20); 372 g_assert_false(get_src()->stop_seen); 373 } while (true); 374 /* The first percentage of throttling should be at least init_pct */ 375 g_assert_cmpint(percentage, >=, init_pct); 376 377 /* 378 * End the loop when the dirty sync count greater than 1. 379 */ 380 while ((dirty_sync_cnt = get_migration_pass(from)) < 2) { 381 usleep(1000 * 1000); 382 } 383 384 prev_dirty_sync_cnt = dirty_sync_cnt; 385 386 /* 387 * The RAMBlock dirty sync count must changes in 5 seconds, here we set 388 * the timeout to 10 seconds to ensure it changes. 389 * 390 * Note that migrate_ensure_non_converge set the max-bandwidth to 3MB/s, 391 * while the qtest mem is >= 100MB, one iteration takes at least 33s (100/3) 392 * to complete; this ensures that the RAMBlock dirty sync occurs. 393 */ 394 max_try_count = 10; 395 while (--max_try_count) { 396 dirty_sync_cnt = get_migration_pass(from); 397 if (dirty_sync_cnt != prev_dirty_sync_cnt) { 398 hit = 1; 399 break; 400 } 401 prev_dirty_sync_cnt = dirty_sync_cnt; 402 sleep(1); 403 } 404 g_assert_cmpint(hit, ==, 1); 405 406 /* Now, when we tested that throttling works, let it converge */ 407 migrate_ensure_converge(from); 408 409 /* 410 * Wait for pre-switchover status to check last throttle percentage 411 * and remaining. These values will be zeroed later 412 */ 413 wait_for_migration_status(from, "pre-switchover", NULL); 414 415 /* The final percentage of throttling shouldn't be greater than max_pct */ 416 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 417 g_assert_cmpint(percentage, <=, max_pct); 418 migrate_continue(from, "pre-switchover"); 419 420 qtest_qmp_eventwait(to, "RESUME"); 421 422 wait_for_serial("dest_serial"); 423 wait_for_migration_complete(from); 424 425 migrate_end(from, to, true); 426 } 427 428 static void * 429 migrate_hook_start_precopy_tcp_multifd(QTestState *from, 430 QTestState *to) 431 { 432 return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 433 } 434 435 static void * 436 migrate_hook_start_precopy_tcp_multifd_zero_page_legacy(QTestState *from, 437 QTestState *to) 438 { 439 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 440 migrate_set_parameter_str(from, "zero-page-detection", "legacy"); 441 return NULL; 442 } 443 444 static void * 445 migrate_hook_start_precopy_tcp_multifd_no_zero_page(QTestState *from, 446 QTestState *to) 447 { 448 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 449 migrate_set_parameter_str(from, "zero-page-detection", "none"); 450 return NULL; 451 } 452 453 static void test_multifd_tcp_uri_none(void) 454 { 455 MigrateCommon args = { 456 .listen_uri = "defer", 457 .start_hook = migrate_hook_start_precopy_tcp_multifd, 458 /* 459 * Multifd is more complicated than most of the features, it 460 * directly takes guest page buffers when sending, make sure 461 * everything will work alright even if guest page is changing. 462 */ 463 .live = true, 464 }; 465 test_precopy_common(&args); 466 } 467 468 static void test_multifd_tcp_zero_page_legacy(void) 469 { 470 MigrateCommon args = { 471 .listen_uri = "defer", 472 .start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy, 473 /* 474 * Multifd is more complicated than most of the features, it 475 * directly takes guest page buffers when sending, make sure 476 * everything will work alright even if guest page is changing. 477 */ 478 .live = true, 479 }; 480 test_precopy_common(&args); 481 } 482 483 static void test_multifd_tcp_no_zero_page(void) 484 { 485 MigrateCommon args = { 486 .listen_uri = "defer", 487 .start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page, 488 /* 489 * Multifd is more complicated than most of the features, it 490 * directly takes guest page buffers when sending, make sure 491 * everything will work alright even if guest page is changing. 492 */ 493 .live = true, 494 }; 495 test_precopy_common(&args); 496 } 497 498 static void test_multifd_tcp_channels_none(void) 499 { 500 MigrateCommon args = { 501 .listen_uri = "defer", 502 .start_hook = migrate_hook_start_precopy_tcp_multifd, 503 .live = true, 504 .connect_channels = ("[ { 'channel-type': 'main'," 505 " 'addr': { 'transport': 'socket'," 506 " 'type': 'inet'," 507 " 'host': '127.0.0.1'," 508 " 'port': '0' } } ]"), 509 }; 510 test_precopy_common(&args); 511 } 512 513 /* 514 * This test does: 515 * source target 516 * migrate_incoming 517 * migrate 518 * migrate_cancel 519 * launch another target 520 * migrate 521 * 522 * And see that it works 523 */ 524 static void test_multifd_tcp_cancel(void) 525 { 526 MigrateStart args = { 527 .hide_stderr = true, 528 }; 529 QTestState *from, *to, *to2; 530 531 if (migrate_start(&from, &to, "defer", &args)) { 532 return; 533 } 534 535 migrate_ensure_non_converge(from); 536 migrate_prepare_for_dirty_mem(from); 537 538 migrate_set_parameter_int(from, "multifd-channels", 16); 539 migrate_set_parameter_int(to, "multifd-channels", 16); 540 541 migrate_set_capability(from, "multifd", true); 542 migrate_set_capability(to, "multifd", true); 543 544 /* Start incoming migration from the 1st socket */ 545 migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}"); 546 547 /* Wait for the first serial output from the source */ 548 wait_for_serial("src_serial"); 549 550 migrate_qmp(from, to, NULL, NULL, "{}"); 551 552 migrate_wait_for_dirty_mem(from, to); 553 554 migrate_cancel(from); 555 556 /* Make sure QEMU process "to" exited */ 557 qtest_set_expected_status(to, EXIT_FAILURE); 558 qtest_wait_qemu(to); 559 qtest_quit(to); 560 561 /* 562 * Ensure the source QEMU finishes its cancellation process before we 563 * proceed with the setup of the next migration. The migrate_start() 564 * function and others might want to interact with the source in a way that 565 * is not possible while the migration is not canceled properly. For 566 * example, setting migration capabilities when the migration is still 567 * running leads to an error. 568 */ 569 wait_for_migration_status(from, "cancelled", NULL); 570 571 args = (MigrateStart){ 572 .only_target = true, 573 }; 574 575 if (migrate_start(&from, &to2, "defer", &args)) { 576 return; 577 } 578 579 migrate_set_parameter_int(to2, "multifd-channels", 16); 580 581 migrate_set_capability(to2, "multifd", true); 582 583 /* Start incoming migration from the 1st socket */ 584 migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", NULL, "{}"); 585 586 migrate_ensure_non_converge(from); 587 588 migrate_qmp(from, to2, NULL, NULL, "{}"); 589 590 migrate_wait_for_dirty_mem(from, to2); 591 592 migrate_ensure_converge(from); 593 594 wait_for_stop(from, get_src()); 595 qtest_qmp_eventwait(to2, "RESUME"); 596 597 wait_for_serial("dest_serial"); 598 wait_for_migration_complete(from); 599 migrate_end(from, to2, true); 600 } 601 602 static void test_cancel_src_after_failed(QTestState *from, QTestState *to, 603 const char *uri, const char *phase) 604 { 605 /* 606 * No migrate_incoming_qmp() at the start to force source into 607 * failed state during migrate_qmp(). 608 */ 609 610 wait_for_serial("src_serial"); 611 migrate_ensure_converge(from); 612 613 migrate_qmp(from, to, uri, NULL, "{}"); 614 615 migration_event_wait(from, phase); 616 migrate_cancel(from); 617 618 /* cancelling will not move the migration out of 'failed' */ 619 620 wait_for_migration_status(from, "failed", 621 (const char * []) { "completed", NULL }); 622 623 /* 624 * Not waiting for the destination because it never started 625 * migration. 626 */ 627 } 628 629 static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to, 630 const char *uri, const char *phase) 631 { 632 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 633 634 wait_for_serial("src_serial"); 635 migrate_ensure_converge(from); 636 637 migrate_qmp(from, to, uri, NULL, "{}"); 638 639 /* To move to cancelled/cancelling */ 640 migrate_cancel(from); 641 migration_event_wait(from, phase); 642 643 /* The migrate_cancel under test */ 644 migrate_cancel(from); 645 646 wait_for_migration_status(from, "cancelled", 647 (const char * []) { "completed", NULL }); 648 649 wait_for_migration_status(to, "failed", 650 (const char * []) { "completed", NULL }); 651 } 652 653 static void test_cancel_src_after_complete(QTestState *from, QTestState *to, 654 const char *uri, const char *phase) 655 { 656 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 657 658 wait_for_serial("src_serial"); 659 migrate_ensure_converge(from); 660 661 migrate_qmp(from, to, uri, NULL, "{}"); 662 663 migration_event_wait(from, phase); 664 migrate_cancel(from); 665 666 /* 667 * qmp_migrate_cancel() exits early if migration is not running 668 * anymore, the status will not change to cancelled. 669 */ 670 wait_for_migration_complete(from); 671 wait_for_migration_complete(to); 672 } 673 674 static void test_cancel_src_after_none(QTestState *from, QTestState *to, 675 const char *uri, const char *phase) 676 { 677 /* 678 * Test that cancelling without a migration happening does not 679 * affect subsequent migrations 680 */ 681 migrate_cancel(to); 682 683 wait_for_serial("src_serial"); 684 migrate_cancel(from); 685 686 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 687 688 migrate_ensure_converge(from); 689 migrate_qmp(from, to, uri, NULL, "{}"); 690 691 wait_for_migration_complete(from); 692 wait_for_migration_complete(to); 693 } 694 695 static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to, 696 const char *uri, const char *phase) 697 { 698 migrate_set_capability(from, "pause-before-switchover", true); 699 migrate_set_capability(to, "pause-before-switchover", true); 700 701 migrate_set_capability(from, "multifd", true); 702 migrate_set_capability(to, "multifd", true); 703 704 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 705 706 wait_for_serial("src_serial"); 707 migrate_ensure_converge(from); 708 709 migrate_qmp(from, to, uri, NULL, "{}"); 710 711 migration_event_wait(from, phase); 712 migrate_cancel(from); 713 migration_event_wait(from, "cancelling"); 714 715 wait_for_migration_status(from, "cancelled", 716 (const char * []) { "completed", NULL }); 717 718 wait_for_migration_status(to, "failed", 719 (const char * []) { "completed", NULL }); 720 } 721 722 static void test_cancel_src_after_status(void *opaque) 723 { 724 const char *test_path = opaque; 725 g_autofree char *phase = g_path_get_basename(test_path); 726 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 727 QTestState *from, *to; 728 MigrateStart args = { 729 .hide_stderr = true, 730 }; 731 732 if (migrate_start(&from, &to, "defer", &args)) { 733 return; 734 } 735 736 if (g_str_equal(phase, "cancelling") || 737 g_str_equal(phase, "cancelled")) { 738 test_cancel_src_after_cancelled(from, to, uri, phase); 739 740 } else if (g_str_equal(phase, "completed")) { 741 test_cancel_src_after_complete(from, to, uri, phase); 742 743 } else if (g_str_equal(phase, "failed")) { 744 test_cancel_src_after_failed(from, to, uri, phase); 745 746 } else if (g_str_equal(phase, "none")) { 747 test_cancel_src_after_none(from, to, uri, phase); 748 749 } else { 750 /* any state that comes before pre-switchover */ 751 test_cancel_src_pre_switchover(from, to, uri, phase); 752 } 753 754 migrate_end(from, to, false); 755 } 756 757 static void calc_dirty_rate(QTestState *who, uint64_t calc_time) 758 { 759 qtest_qmp_assert_success(who, 760 "{ 'execute': 'calc-dirty-rate'," 761 "'arguments': { " 762 "'calc-time': %" PRIu64 "," 763 "'mode': 'dirty-ring' }}", 764 calc_time); 765 } 766 767 static QDict *query_dirty_rate(QTestState *who) 768 { 769 return qtest_qmp_assert_success_ref(who, 770 "{ 'execute': 'query-dirty-rate' }"); 771 } 772 773 static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate) 774 { 775 qtest_qmp_assert_success(who, 776 "{ 'execute': 'set-vcpu-dirty-limit'," 777 "'arguments': { " 778 "'dirty-rate': %" PRIu64 " } }", 779 dirtyrate); 780 } 781 782 static void cancel_vcpu_dirty_limit(QTestState *who) 783 { 784 qtest_qmp_assert_success(who, 785 "{ 'execute': 'cancel-vcpu-dirty-limit' }"); 786 } 787 788 static QDict *query_vcpu_dirty_limit(QTestState *who) 789 { 790 QDict *rsp; 791 792 rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }"); 793 g_assert(!qdict_haskey(rsp, "error")); 794 g_assert(qdict_haskey(rsp, "return")); 795 796 return rsp; 797 } 798 799 static bool calc_dirtyrate_ready(QTestState *who) 800 { 801 QDict *rsp_return; 802 const char *status; 803 bool ready; 804 805 rsp_return = query_dirty_rate(who); 806 g_assert(rsp_return); 807 808 status = qdict_get_str(rsp_return, "status"); 809 g_assert(status); 810 ready = g_strcmp0(status, "measuring"); 811 qobject_unref(rsp_return); 812 813 return ready; 814 } 815 816 static void wait_for_calc_dirtyrate_complete(QTestState *who, 817 int64_t time_s) 818 { 819 int max_try_count = 10000; 820 usleep(time_s * 1000000); 821 822 while (!calc_dirtyrate_ready(who) && max_try_count--) { 823 usleep(1000); 824 } 825 826 /* 827 * Set the timeout with 10 s(max_try_count * 1000us), 828 * if dirtyrate measurement not complete, fail test. 829 */ 830 g_assert_cmpint(max_try_count, !=, 0); 831 } 832 833 static int64_t get_dirty_rate(QTestState *who) 834 { 835 QDict *rsp_return; 836 const char *status; 837 QList *rates; 838 const QListEntry *entry; 839 QDict *rate; 840 int64_t dirtyrate; 841 842 rsp_return = query_dirty_rate(who); 843 g_assert(rsp_return); 844 845 status = qdict_get_str(rsp_return, "status"); 846 g_assert(status); 847 g_assert_cmpstr(status, ==, "measured"); 848 849 rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate"); 850 g_assert(rates && !qlist_empty(rates)); 851 852 entry = qlist_first(rates); 853 g_assert(entry); 854 855 rate = qobject_to(QDict, qlist_entry_obj(entry)); 856 g_assert(rate); 857 858 dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1); 859 860 qobject_unref(rsp_return); 861 return dirtyrate; 862 } 863 864 static int64_t get_limit_rate(QTestState *who) 865 { 866 QDict *rsp_return; 867 QList *rates; 868 const QListEntry *entry; 869 QDict *rate; 870 int64_t dirtyrate; 871 872 rsp_return = query_vcpu_dirty_limit(who); 873 g_assert(rsp_return); 874 875 rates = qdict_get_qlist(rsp_return, "return"); 876 g_assert(rates && !qlist_empty(rates)); 877 878 entry = qlist_first(rates); 879 g_assert(entry); 880 881 rate = qobject_to(QDict, qlist_entry_obj(entry)); 882 g_assert(rate); 883 884 dirtyrate = qdict_get_try_int(rate, "limit-rate", -1); 885 886 qobject_unref(rsp_return); 887 return dirtyrate; 888 } 889 890 static QTestState *dirtylimit_start_vm(void) 891 { 892 QTestState *vm = NULL; 893 g_autofree gchar *cmd = NULL; 894 const char *bootpath; 895 896 bootpath = bootfile_create(qtest_get_arch(), tmpfs, false); 897 cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " 898 "-name dirtylimit-test,debug-threads=on " 899 "-m 150M -smp 1 " 900 "-serial file:%s/vm_serial " 901 "-drive file=%s,format=raw ", 902 tmpfs, bootpath); 903 904 vm = qtest_init(cmd); 905 return vm; 906 } 907 908 static void dirtylimit_stop_vm(QTestState *vm) 909 { 910 g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, "vm_serial"); 911 912 qtest_quit(vm); 913 unlink(path); 914 } 915 916 static void test_vcpu_dirty_limit(void) 917 { 918 QTestState *vm; 919 int64_t origin_rate; 920 int64_t quota_rate; 921 int64_t rate ; 922 int max_try_count = 20; 923 int hit = 0; 924 925 /* Start vm for vcpu dirtylimit test */ 926 vm = dirtylimit_start_vm(); 927 928 /* Wait for the first serial output from the vm*/ 929 wait_for_serial("vm_serial"); 930 931 /* Do dirtyrate measurement with calc time equals 1s */ 932 calc_dirty_rate(vm, 1); 933 934 /* Sleep calc time and wait for calc dirtyrate complete */ 935 wait_for_calc_dirtyrate_complete(vm, 1); 936 937 /* Query original dirty page rate */ 938 origin_rate = get_dirty_rate(vm); 939 940 /* VM booted from bootsect should dirty memory steadily */ 941 assert(origin_rate != 0); 942 943 /* Setup quota dirty page rate at half of origin */ 944 quota_rate = origin_rate / 2; 945 946 /* Set dirtylimit */ 947 dirtylimit_set_all(vm, quota_rate); 948 949 /* 950 * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit 951 * works literally 952 */ 953 g_assert_cmpint(quota_rate, ==, get_limit_rate(vm)); 954 955 /* Sleep a bit to check if it take effect */ 956 usleep(2000000); 957 958 /* 959 * Check if dirtylimit take effect realistically, set the 960 * timeout with 20 s(max_try_count * 1s), if dirtylimit 961 * doesn't take effect, fail test. 962 */ 963 while (--max_try_count) { 964 calc_dirty_rate(vm, 1); 965 wait_for_calc_dirtyrate_complete(vm, 1); 966 rate = get_dirty_rate(vm); 967 968 /* 969 * Assume hitting if current rate is less 970 * than quota rate (within accepting error) 971 */ 972 if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 973 hit = 1; 974 break; 975 } 976 } 977 978 g_assert_cmpint(hit, ==, 1); 979 980 hit = 0; 981 max_try_count = 20; 982 983 /* Check if dirtylimit cancellation take effect */ 984 cancel_vcpu_dirty_limit(vm); 985 while (--max_try_count) { 986 calc_dirty_rate(vm, 1); 987 wait_for_calc_dirtyrate_complete(vm, 1); 988 rate = get_dirty_rate(vm); 989 990 /* 991 * Assume dirtylimit be canceled if current rate is 992 * greater than quota rate (within accepting error) 993 */ 994 if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 995 hit = 1; 996 break; 997 } 998 } 999 1000 g_assert_cmpint(hit, ==, 1); 1001 dirtylimit_stop_vm(vm); 1002 } 1003 1004 static void migrate_dirty_limit_wait_showup(QTestState *from, 1005 const int64_t period, 1006 const int64_t value) 1007 { 1008 /* Enable dirty limit capability */ 1009 migrate_set_capability(from, "dirty-limit", true); 1010 1011 /* Set dirty limit parameters */ 1012 migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period); 1013 migrate_set_parameter_int(from, "vcpu-dirty-limit", value); 1014 1015 /* Make sure migrate can't converge */ 1016 migrate_ensure_non_converge(from); 1017 1018 /* To check limit rate after precopy */ 1019 migrate_set_capability(from, "pause-before-switchover", true); 1020 1021 /* Wait for the serial output from the source */ 1022 wait_for_serial("src_serial"); 1023 } 1024 1025 /* 1026 * This test does: 1027 * source destination 1028 * start vm 1029 * start incoming vm 1030 * migrate 1031 * wait dirty limit to begin 1032 * cancel migrate 1033 * cancellation check 1034 * restart incoming vm 1035 * migrate 1036 * wait dirty limit to begin 1037 * wait pre-switchover event 1038 * convergence condition check 1039 * 1040 * And see if dirty limit migration works correctly. 1041 * This test case involves many passes, so it runs in slow mode only. 1042 */ 1043 static void test_dirty_limit(void) 1044 { 1045 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 1046 QTestState *from, *to; 1047 int64_t remaining; 1048 uint64_t throttle_us_per_full; 1049 /* 1050 * We want the test to be stable and as fast as possible. 1051 * E.g., with 1Gb/s bandwidth migration may pass without dirty limit, 1052 * so we need to decrease a bandwidth. 1053 */ 1054 const int64_t dirtylimit_period = 1000, dirtylimit_value = 50; 1055 const int64_t max_bandwidth = 400000000; /* ~400Mb/s */ 1056 const int64_t downtime_limit = 250; /* 250ms */ 1057 /* 1058 * We migrate through unix-socket (> 500Mb/s). 1059 * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s). 1060 * So, we can predict expected_threshold 1061 */ 1062 const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000; 1063 int max_try_count = 10; 1064 MigrateCommon args = { 1065 .start = { 1066 .hide_stderr = true, 1067 .use_dirty_ring = true, 1068 }, 1069 .listen_uri = uri, 1070 .connect_uri = uri, 1071 }; 1072 1073 /* Start src, dst vm */ 1074 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1075 return; 1076 } 1077 1078 /* Prepare for dirty limit migration and wait src vm show up */ 1079 migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value); 1080 1081 /* Start migrate */ 1082 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1083 1084 /* Wait for dirty limit throttle begin */ 1085 throttle_us_per_full = 0; 1086 while (throttle_us_per_full == 0) { 1087 throttle_us_per_full = 1088 read_migrate_property_int(from, 1089 "dirty-limit-throttle-time-per-round"); 1090 usleep(100); 1091 g_assert_false(get_src()->stop_seen); 1092 } 1093 1094 /* Now cancel migrate and wait for dirty limit throttle switch off */ 1095 migrate_cancel(from); 1096 wait_for_migration_status(from, "cancelled", NULL); 1097 1098 /* destination always fails after cancel */ 1099 migration_event_wait(to, "failed"); 1100 qtest_set_expected_status(to, EXIT_FAILURE); 1101 qtest_quit(to); 1102 1103 /* Check if dirty limit throttle switched off, set timeout 1ms */ 1104 do { 1105 throttle_us_per_full = 1106 read_migrate_property_int(from, 1107 "dirty-limit-throttle-time-per-round"); 1108 usleep(100); 1109 g_assert_false(get_src()->stop_seen); 1110 } while (throttle_us_per_full != 0 && --max_try_count); 1111 1112 /* Assert dirty limit is not in service */ 1113 g_assert_cmpint(throttle_us_per_full, ==, 0); 1114 1115 args = (MigrateCommon) { 1116 .start = { 1117 .only_target = true, 1118 .use_dirty_ring = true, 1119 }, 1120 .listen_uri = uri, 1121 .connect_uri = uri, 1122 }; 1123 1124 /* Restart dst vm, src vm already show up so we needn't wait anymore */ 1125 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1126 return; 1127 } 1128 1129 /* Start migrate */ 1130 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1131 1132 /* Wait for dirty limit throttle begin */ 1133 throttle_us_per_full = 0; 1134 while (throttle_us_per_full == 0) { 1135 throttle_us_per_full = 1136 read_migrate_property_int(from, 1137 "dirty-limit-throttle-time-per-round"); 1138 usleep(100); 1139 g_assert_false(get_src()->stop_seen); 1140 } 1141 1142 /* 1143 * The dirty limit rate should equals the return value of 1144 * query-vcpu-dirty-limit if dirty limit cap set 1145 */ 1146 g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from)); 1147 1148 /* Now, we have tested if dirty limit works, let it converge */ 1149 migrate_set_parameter_int(from, "downtime-limit", downtime_limit); 1150 migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth); 1151 1152 /* 1153 * Wait for pre-switchover status to check if migration 1154 * satisfy the convergence condition 1155 */ 1156 wait_for_migration_status(from, "pre-switchover", NULL); 1157 1158 remaining = read_ram_property_int(from, "remaining"); 1159 g_assert_cmpint(remaining, <, 1160 (expected_threshold + expected_threshold / 100)); 1161 1162 migrate_continue(from, "pre-switchover"); 1163 1164 qtest_qmp_eventwait(to, "RESUME"); 1165 1166 wait_for_serial("dest_serial"); 1167 wait_for_migration_complete(from); 1168 1169 migrate_end(from, to, true); 1170 } 1171 1172 static void migration_test_add_precopy_smoke(MigrationTestEnv *env) 1173 { 1174 if (env->is_x86) { 1175 migration_test_add("/migration/precopy/unix/suspend/live", 1176 test_precopy_unix_suspend_live); 1177 migration_test_add("/migration/precopy/unix/suspend/notlive", 1178 test_precopy_unix_suspend_notlive); 1179 } 1180 1181 migration_test_add("/migration/precopy/unix/plain", 1182 test_precopy_unix_plain); 1183 1184 migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain); 1185 migration_test_add("/migration/multifd/tcp/uri/plain/none", 1186 test_multifd_tcp_uri_none); 1187 migration_test_add("/migration/multifd/tcp/plain/cancel", 1188 test_multifd_tcp_cancel); 1189 #ifdef CONFIG_RDMA 1190 migration_test_add("/migration/precopy/rdma/plain", 1191 test_precopy_rdma_plain); 1192 #endif 1193 } 1194 1195 void migration_test_add_precopy(MigrationTestEnv *env) 1196 { 1197 tmpfs = env->tmpfs; 1198 1199 migration_test_add_precopy_smoke(env); 1200 1201 if (!env->full_set) { 1202 return; 1203 } 1204 1205 migration_test_add("/migration/precopy/tcp/plain/switchover-ack", 1206 test_precopy_tcp_switchover_ack); 1207 1208 #ifndef _WIN32 1209 migration_test_add("/migration/precopy/fd/tcp", 1210 test_precopy_fd_socket); 1211 migration_test_add("/migration/precopy/fd/file", 1212 test_precopy_fd_file); 1213 #endif 1214 1215 /* 1216 * See explanation why this test is slow on function definition 1217 */ 1218 if (g_test_slow()) { 1219 migration_test_add("/migration/auto_converge", 1220 test_auto_converge); 1221 if (g_str_equal(env->arch, "x86_64") && 1222 env->has_kvm && env->has_dirty_ring) { 1223 migration_test_add("/dirty_limit", 1224 test_dirty_limit); 1225 } 1226 } 1227 migration_test_add("/migration/multifd/tcp/channels/plain/none", 1228 test_multifd_tcp_channels_none); 1229 migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy", 1230 test_multifd_tcp_zero_page_legacy); 1231 migration_test_add("/migration/multifd/tcp/plain/zero-page/none", 1232 test_multifd_tcp_no_zero_page); 1233 if (g_str_equal(env->arch, "x86_64") 1234 && env->has_kvm && env->has_dirty_ring) { 1235 1236 migration_test_add("/migration/dirty_ring", 1237 test_precopy_unix_dirty_ring); 1238 if (qtest_has_machine("pc") && g_test_slow()) { 1239 migration_test_add("/migration/vcpu_dirty_limit", 1240 test_vcpu_dirty_limit); 1241 } 1242 } 1243 1244 /* ensure new status don't go unnoticed */ 1245 assert(MIGRATION_STATUS__MAX == 15); 1246 1247 for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) { 1248 switch (i) { 1249 case MIGRATION_STATUS_DEVICE: /* happens too fast */ 1250 case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */ 1251 case MIGRATION_STATUS_COLO: /* no support in tests */ 1252 case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */ 1253 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1254 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: 1255 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1256 continue; 1257 default: 1258 migration_test_add_suffix("/migration/cancel/src/after/", 1259 MigrationStatus_str(i), 1260 test_cancel_src_after_status); 1261 } 1262 } 1263 } 1264