1 /* 2 * QTest testcase for precopy migration 3 * 4 * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates 5 * based on the vhost-user-test.c that is: 6 * Copyright (c) 2014 Virtual Open Systems Sarl. 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 * 11 */ 12 13 #include "qemu/osdep.h" 14 #include "chardev/char.h" 15 #include "crypto/tlscredspsk.h" 16 #include "libqtest.h" 17 #include "migration/bootfile.h" 18 #include "migration/framework.h" 19 #include "migration/migration-qmp.h" 20 #include "migration/migration-util.h" 21 #include "ppc-util.h" 22 #include "qobject/qlist.h" 23 #include "qapi-types-migration.h" 24 #include "qemu/module.h" 25 #include "qemu/option.h" 26 #include "qemu/range.h" 27 #include "qemu/sockets.h" 28 29 30 /* 31 * Dirtylimit stop working if dirty page rate error 32 * value less than DIRTYLIMIT_TOLERANCE_RANGE 33 */ 34 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 35 36 static char *tmpfs; 37 38 static void test_precopy_unix_plain(void) 39 { 40 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 41 MigrateCommon args = { 42 .listen_uri = uri, 43 .connect_uri = uri, 44 /* 45 * The simplest use case of precopy, covering smoke tests of 46 * get-dirty-log dirty tracking. 47 */ 48 .live = true, 49 }; 50 51 test_precopy_common(&args); 52 } 53 54 static void test_precopy_unix_suspend_live(void) 55 { 56 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 57 MigrateCommon args = { 58 .listen_uri = uri, 59 .connect_uri = uri, 60 /* 61 * despite being live, the test is fast because the src 62 * suspends immediately. 63 */ 64 .live = true, 65 .start.suspend_me = true, 66 }; 67 68 test_precopy_common(&args); 69 } 70 71 static void test_precopy_unix_suspend_notlive(void) 72 { 73 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 74 MigrateCommon args = { 75 .listen_uri = uri, 76 .connect_uri = uri, 77 .start.suspend_me = true, 78 }; 79 80 test_precopy_common(&args); 81 } 82 83 static void test_precopy_unix_dirty_ring(void) 84 { 85 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 86 MigrateCommon args = { 87 .start = { 88 .use_dirty_ring = true, 89 }, 90 .listen_uri = uri, 91 .connect_uri = uri, 92 /* 93 * Besides the precopy/unix basic test, cover dirty ring interface 94 * rather than get-dirty-log. 95 */ 96 .live = true, 97 }; 98 99 test_precopy_common(&args); 100 } 101 102 static void test_precopy_tcp_plain(void) 103 { 104 MigrateCommon args = { 105 .listen_uri = "tcp:127.0.0.1:0", 106 }; 107 108 test_precopy_common(&args); 109 } 110 111 static void *migrate_hook_start_switchover_ack(QTestState *from, QTestState *to) 112 { 113 114 migrate_set_capability(from, "return-path", true); 115 migrate_set_capability(to, "return-path", true); 116 117 migrate_set_capability(from, "switchover-ack", true); 118 migrate_set_capability(to, "switchover-ack", true); 119 120 return NULL; 121 } 122 123 static void test_precopy_tcp_switchover_ack(void) 124 { 125 MigrateCommon args = { 126 .listen_uri = "tcp:127.0.0.1:0", 127 .start_hook = migrate_hook_start_switchover_ack, 128 /* 129 * Source VM must be running in order to consider the switchover ACK 130 * when deciding to do switchover or not. 131 */ 132 .live = true, 133 }; 134 135 test_precopy_common(&args); 136 } 137 138 #ifndef _WIN32 139 static void *migrate_hook_start_fd(QTestState *from, 140 QTestState *to) 141 { 142 int ret; 143 int pair[2]; 144 145 /* Create two connected sockets for migration */ 146 ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair); 147 g_assert_cmpint(ret, ==, 0); 148 149 /* Send the 1st socket to the target */ 150 qtest_qmp_fds_assert_success(to, &pair[0], 1, 151 "{ 'execute': 'getfd'," 152 " 'arguments': { 'fdname': 'fd-mig' }}"); 153 close(pair[0]); 154 155 /* Start incoming migration from the 1st socket */ 156 migrate_incoming_qmp(to, "fd:fd-mig", NULL, "{}"); 157 158 /* Send the 2nd socket to the target */ 159 qtest_qmp_fds_assert_success(from, &pair[1], 1, 160 "{ 'execute': 'getfd'," 161 " 'arguments': { 'fdname': 'fd-mig' }}"); 162 close(pair[1]); 163 164 return NULL; 165 } 166 167 static void migrate_hook_end_fd(QTestState *from, 168 QTestState *to, 169 void *opaque) 170 { 171 QDict *rsp; 172 const char *error_desc; 173 174 /* Test closing fds */ 175 /* 176 * We assume, that QEMU removes named fd from its list, 177 * so this should fail. 178 */ 179 rsp = qtest_qmp(from, 180 "{ 'execute': 'closefd'," 181 " 'arguments': { 'fdname': 'fd-mig' }}"); 182 g_assert_true(qdict_haskey(rsp, "error")); 183 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 184 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 185 qobject_unref(rsp); 186 187 rsp = qtest_qmp(to, 188 "{ 'execute': 'closefd'," 189 " 'arguments': { 'fdname': 'fd-mig' }}"); 190 g_assert_true(qdict_haskey(rsp, "error")); 191 error_desc = qdict_get_str(qdict_get_qdict(rsp, "error"), "desc"); 192 g_assert_cmpstr(error_desc, ==, "File descriptor named 'fd-mig' not found"); 193 qobject_unref(rsp); 194 } 195 196 static void test_precopy_fd_socket(void) 197 { 198 MigrateCommon args = { 199 .listen_uri = "defer", 200 .connect_uri = "fd:fd-mig", 201 .start_hook = migrate_hook_start_fd, 202 .end_hook = migrate_hook_end_fd, 203 }; 204 test_precopy_common(&args); 205 } 206 207 static void *migrate_hook_start_precopy_fd_file(QTestState *from, 208 QTestState *to) 209 { 210 g_autofree char *file = g_strdup_printf("%s/%s", tmpfs, FILE_TEST_FILENAME); 211 int src_flags = O_CREAT | O_RDWR; 212 int dst_flags = O_CREAT | O_RDWR; 213 int fds[2]; 214 215 fds[0] = open(file, src_flags, 0660); 216 assert(fds[0] != -1); 217 218 fds[1] = open(file, dst_flags, 0660); 219 assert(fds[1] != -1); 220 221 222 qtest_qmp_fds_assert_success(to, &fds[0], 1, 223 "{ 'execute': 'getfd'," 224 " 'arguments': { 'fdname': 'fd-mig' }}"); 225 226 qtest_qmp_fds_assert_success(from, &fds[1], 1, 227 "{ 'execute': 'getfd'," 228 " 'arguments': { 'fdname': 'fd-mig' }}"); 229 230 close(fds[0]); 231 close(fds[1]); 232 233 return NULL; 234 } 235 236 static void test_precopy_fd_file(void) 237 { 238 MigrateCommon args = { 239 .listen_uri = "defer", 240 .connect_uri = "fd:fd-mig", 241 .start_hook = migrate_hook_start_precopy_fd_file, 242 .end_hook = migrate_hook_end_fd, 243 }; 244 test_file_common(&args, true); 245 } 246 #endif /* _WIN32 */ 247 248 /* 249 * The way auto_converge works, we need to do too many passes to 250 * run this test. Auto_converge logic is only run once every 251 * three iterations, so: 252 * 253 * - 3 iterations without auto_converge enabled 254 * - 3 iterations with pct = 5 255 * - 3 iterations with pct = 30 256 * - 3 iterations with pct = 55 257 * - 3 iterations with pct = 80 258 * - 3 iterations with pct = 95 (max(95, 80 + 25)) 259 * 260 * To make things even worse, we need to run the initial stage at 261 * 3MB/s so we enter autoconverge even when host is (over)loaded. 262 */ 263 static void test_auto_converge(void) 264 { 265 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 266 MigrateStart args = {}; 267 QTestState *from, *to; 268 int64_t percentage; 269 270 /* 271 * We want the test to be stable and as fast as possible. 272 * E.g., with 1Gb/s bandwidth migration may pass without throttling, 273 * so we need to decrease a bandwidth. 274 */ 275 const int64_t init_pct = 5, inc_pct = 25, max_pct = 95; 276 uint64_t prev_dirty_sync_cnt, dirty_sync_cnt; 277 int max_try_count, hit = 0; 278 279 if (migrate_start(&from, &to, uri, &args)) { 280 return; 281 } 282 283 migrate_set_capability(from, "auto-converge", true); 284 migrate_set_parameter_int(from, "cpu-throttle-initial", init_pct); 285 migrate_set_parameter_int(from, "cpu-throttle-increment", inc_pct); 286 migrate_set_parameter_int(from, "max-cpu-throttle", max_pct); 287 288 /* 289 * Set the initial parameters so that the migration could not converge 290 * without throttling. 291 */ 292 migrate_ensure_non_converge(from); 293 294 /* To check remaining size after precopy */ 295 migrate_set_capability(from, "pause-before-switchover", true); 296 297 /* Wait for the first serial output from the source */ 298 wait_for_serial("src_serial"); 299 300 migrate_qmp(from, to, uri, NULL, "{}"); 301 302 /* Wait for throttling begins */ 303 percentage = 0; 304 do { 305 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 306 if (percentage != 0) { 307 break; 308 } 309 usleep(20); 310 g_assert_false(get_src()->stop_seen); 311 } while (true); 312 /* The first percentage of throttling should be at least init_pct */ 313 g_assert_cmpint(percentage, >=, init_pct); 314 315 /* 316 * End the loop when the dirty sync count greater than 1. 317 */ 318 while ((dirty_sync_cnt = get_migration_pass(from)) < 2) { 319 usleep(1000 * 1000); 320 } 321 322 prev_dirty_sync_cnt = dirty_sync_cnt; 323 324 /* 325 * The RAMBlock dirty sync count must changes in 5 seconds, here we set 326 * the timeout to 10 seconds to ensure it changes. 327 * 328 * Note that migrate_ensure_non_converge set the max-bandwidth to 3MB/s, 329 * while the qtest mem is >= 100MB, one iteration takes at least 33s (100/3) 330 * to complete; this ensures that the RAMBlock dirty sync occurs. 331 */ 332 max_try_count = 10; 333 while (--max_try_count) { 334 dirty_sync_cnt = get_migration_pass(from); 335 if (dirty_sync_cnt != prev_dirty_sync_cnt) { 336 hit = 1; 337 break; 338 } 339 prev_dirty_sync_cnt = dirty_sync_cnt; 340 sleep(1); 341 } 342 g_assert_cmpint(hit, ==, 1); 343 344 /* Now, when we tested that throttling works, let it converge */ 345 migrate_ensure_converge(from); 346 347 /* 348 * Wait for pre-switchover status to check last throttle percentage 349 * and remaining. These values will be zeroed later 350 */ 351 wait_for_migration_status(from, "pre-switchover", NULL); 352 353 /* The final percentage of throttling shouldn't be greater than max_pct */ 354 percentage = read_migrate_property_int(from, "cpu-throttle-percentage"); 355 g_assert_cmpint(percentage, <=, max_pct); 356 migrate_continue(from, "pre-switchover"); 357 358 qtest_qmp_eventwait(to, "RESUME"); 359 360 wait_for_serial("dest_serial"); 361 wait_for_migration_complete(from); 362 363 migrate_end(from, to, true); 364 } 365 366 static void * 367 migrate_hook_start_precopy_tcp_multifd(QTestState *from, 368 QTestState *to) 369 { 370 return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 371 } 372 373 static void * 374 migrate_hook_start_precopy_tcp_multifd_zero_page_legacy(QTestState *from, 375 QTestState *to) 376 { 377 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 378 migrate_set_parameter_str(from, "zero-page-detection", "legacy"); 379 return NULL; 380 } 381 382 static void * 383 migrate_hook_start_precopy_tcp_multifd_no_zero_page(QTestState *from, 384 QTestState *to) 385 { 386 migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); 387 migrate_set_parameter_str(from, "zero-page-detection", "none"); 388 return NULL; 389 } 390 391 static void test_multifd_tcp_uri_none(void) 392 { 393 MigrateCommon args = { 394 .listen_uri = "defer", 395 .start_hook = migrate_hook_start_precopy_tcp_multifd, 396 /* 397 * Multifd is more complicated than most of the features, it 398 * directly takes guest page buffers when sending, make sure 399 * everything will work alright even if guest page is changing. 400 */ 401 .live = true, 402 }; 403 test_precopy_common(&args); 404 } 405 406 static void test_multifd_tcp_zero_page_legacy(void) 407 { 408 MigrateCommon args = { 409 .listen_uri = "defer", 410 .start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy, 411 /* 412 * Multifd is more complicated than most of the features, it 413 * directly takes guest page buffers when sending, make sure 414 * everything will work alright even if guest page is changing. 415 */ 416 .live = true, 417 }; 418 test_precopy_common(&args); 419 } 420 421 static void test_multifd_tcp_no_zero_page(void) 422 { 423 MigrateCommon args = { 424 .listen_uri = "defer", 425 .start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page, 426 /* 427 * Multifd is more complicated than most of the features, it 428 * directly takes guest page buffers when sending, make sure 429 * everything will work alright even if guest page is changing. 430 */ 431 .live = true, 432 }; 433 test_precopy_common(&args); 434 } 435 436 static void test_multifd_tcp_channels_none(void) 437 { 438 MigrateCommon args = { 439 .listen_uri = "defer", 440 .start_hook = migrate_hook_start_precopy_tcp_multifd, 441 .live = true, 442 .connect_channels = ("[ { 'channel-type': 'main'," 443 " 'addr': { 'transport': 'socket'," 444 " 'type': 'inet'," 445 " 'host': '127.0.0.1'," 446 " 'port': '0' } } ]"), 447 }; 448 test_precopy_common(&args); 449 } 450 451 /* 452 * This test does: 453 * source target 454 * migrate_incoming 455 * migrate 456 * migrate_cancel 457 * launch another target 458 * migrate 459 * 460 * And see that it works 461 */ 462 static void test_multifd_tcp_cancel(void) 463 { 464 MigrateStart args = { 465 .hide_stderr = true, 466 }; 467 QTestState *from, *to, *to2; 468 469 if (migrate_start(&from, &to, "defer", &args)) { 470 return; 471 } 472 473 migrate_ensure_non_converge(from); 474 migrate_prepare_for_dirty_mem(from); 475 476 migrate_set_parameter_int(from, "multifd-channels", 16); 477 migrate_set_parameter_int(to, "multifd-channels", 16); 478 479 migrate_set_capability(from, "multifd", true); 480 migrate_set_capability(to, "multifd", true); 481 482 /* Start incoming migration from the 1st socket */ 483 migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}"); 484 485 /* Wait for the first serial output from the source */ 486 wait_for_serial("src_serial"); 487 488 migrate_qmp(from, to, NULL, NULL, "{}"); 489 490 migrate_wait_for_dirty_mem(from, to); 491 492 migrate_cancel(from); 493 494 /* Make sure QEMU process "to" exited */ 495 qtest_set_expected_status(to, EXIT_FAILURE); 496 qtest_wait_qemu(to); 497 qtest_quit(to); 498 499 /* 500 * Ensure the source QEMU finishes its cancellation process before we 501 * proceed with the setup of the next migration. The migrate_start() 502 * function and others might want to interact with the source in a way that 503 * is not possible while the migration is not canceled properly. For 504 * example, setting migration capabilities when the migration is still 505 * running leads to an error. 506 */ 507 wait_for_migration_status(from, "cancelled", NULL); 508 509 args = (MigrateStart){ 510 .only_target = true, 511 }; 512 513 if (migrate_start(&from, &to2, "defer", &args)) { 514 return; 515 } 516 517 migrate_set_parameter_int(to2, "multifd-channels", 16); 518 519 migrate_set_capability(to2, "multifd", true); 520 521 /* Start incoming migration from the 1st socket */ 522 migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", NULL, "{}"); 523 524 migrate_ensure_non_converge(from); 525 526 migrate_qmp(from, to2, NULL, NULL, "{}"); 527 528 migrate_wait_for_dirty_mem(from, to2); 529 530 migrate_ensure_converge(from); 531 532 wait_for_stop(from, get_src()); 533 qtest_qmp_eventwait(to2, "RESUME"); 534 535 wait_for_serial("dest_serial"); 536 wait_for_migration_complete(from); 537 migrate_end(from, to2, true); 538 } 539 540 static void test_cancel_src_after_failed(QTestState *from, QTestState *to, 541 const char *uri, const char *phase) 542 { 543 /* 544 * No migrate_incoming_qmp() at the start to force source into 545 * failed state during migrate_qmp(). 546 */ 547 548 wait_for_serial("src_serial"); 549 migrate_ensure_converge(from); 550 551 migrate_qmp(from, to, uri, NULL, "{}"); 552 553 migration_event_wait(from, phase); 554 migrate_cancel(from); 555 556 /* cancelling will not move the migration out of 'failed' */ 557 558 wait_for_migration_status(from, "failed", 559 (const char * []) { "completed", NULL }); 560 561 /* 562 * Not waiting for the destination because it never started 563 * migration. 564 */ 565 } 566 567 static void test_cancel_src_after_cancelled(QTestState *from, QTestState *to, 568 const char *uri, const char *phase) 569 { 570 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 571 572 wait_for_serial("src_serial"); 573 migrate_ensure_converge(from); 574 575 migrate_qmp(from, to, uri, NULL, "{}"); 576 577 /* To move to cancelled/cancelling */ 578 migrate_cancel(from); 579 migration_event_wait(from, phase); 580 581 /* The migrate_cancel under test */ 582 migrate_cancel(from); 583 584 wait_for_migration_status(from, "cancelled", 585 (const char * []) { "completed", NULL }); 586 587 wait_for_migration_status(to, "failed", 588 (const char * []) { "completed", NULL }); 589 } 590 591 static void test_cancel_src_after_complete(QTestState *from, QTestState *to, 592 const char *uri, const char *phase) 593 { 594 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 595 596 wait_for_serial("src_serial"); 597 migrate_ensure_converge(from); 598 599 migrate_qmp(from, to, uri, NULL, "{}"); 600 601 migration_event_wait(from, phase); 602 migrate_cancel(from); 603 604 /* 605 * qmp_migrate_cancel() exits early if migration is not running 606 * anymore, the status will not change to cancelled. 607 */ 608 wait_for_migration_complete(from); 609 wait_for_migration_complete(to); 610 } 611 612 static void test_cancel_src_after_none(QTestState *from, QTestState *to, 613 const char *uri, const char *phase) 614 { 615 /* 616 * Test that cancelling without a migration happening does not 617 * affect subsequent migrations 618 */ 619 migrate_cancel(to); 620 621 wait_for_serial("src_serial"); 622 migrate_cancel(from); 623 624 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 625 626 migrate_ensure_converge(from); 627 migrate_qmp(from, to, uri, NULL, "{}"); 628 629 wait_for_migration_complete(from); 630 wait_for_migration_complete(to); 631 } 632 633 static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to, 634 const char *uri, const char *phase) 635 { 636 migrate_set_capability(from, "pause-before-switchover", true); 637 migrate_set_capability(to, "pause-before-switchover", true); 638 639 migrate_set_capability(from, "multifd", true); 640 migrate_set_capability(to, "multifd", true); 641 642 migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); 643 644 wait_for_serial("src_serial"); 645 migrate_ensure_converge(from); 646 647 migrate_qmp(from, to, uri, NULL, "{}"); 648 649 migration_event_wait(from, phase); 650 migrate_cancel(from); 651 migration_event_wait(from, "cancelling"); 652 653 wait_for_migration_status(from, "cancelled", 654 (const char * []) { "completed", NULL }); 655 656 wait_for_migration_status(to, "failed", 657 (const char * []) { "completed", NULL }); 658 } 659 660 static void test_cancel_src_after_status(void *opaque) 661 { 662 const char *test_path = opaque; 663 g_autofree char *phase = g_path_get_basename(test_path); 664 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 665 QTestState *from, *to; 666 MigrateStart args = { 667 .hide_stderr = true, 668 }; 669 670 if (migrate_start(&from, &to, "defer", &args)) { 671 return; 672 } 673 674 if (g_str_equal(phase, "cancelling") || 675 g_str_equal(phase, "cancelled")) { 676 test_cancel_src_after_cancelled(from, to, uri, phase); 677 678 } else if (g_str_equal(phase, "completed")) { 679 test_cancel_src_after_complete(from, to, uri, phase); 680 681 } else if (g_str_equal(phase, "failed")) { 682 test_cancel_src_after_failed(from, to, uri, phase); 683 684 } else if (g_str_equal(phase, "none")) { 685 test_cancel_src_after_none(from, to, uri, phase); 686 687 } else { 688 /* any state that comes before pre-switchover */ 689 test_cancel_src_pre_switchover(from, to, uri, phase); 690 } 691 692 migrate_end(from, to, false); 693 } 694 695 static void calc_dirty_rate(QTestState *who, uint64_t calc_time) 696 { 697 qtest_qmp_assert_success(who, 698 "{ 'execute': 'calc-dirty-rate'," 699 "'arguments': { " 700 "'calc-time': %" PRIu64 "," 701 "'mode': 'dirty-ring' }}", 702 calc_time); 703 } 704 705 static QDict *query_dirty_rate(QTestState *who) 706 { 707 return qtest_qmp_assert_success_ref(who, 708 "{ 'execute': 'query-dirty-rate' }"); 709 } 710 711 static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate) 712 { 713 qtest_qmp_assert_success(who, 714 "{ 'execute': 'set-vcpu-dirty-limit'," 715 "'arguments': { " 716 "'dirty-rate': %" PRIu64 " } }", 717 dirtyrate); 718 } 719 720 static void cancel_vcpu_dirty_limit(QTestState *who) 721 { 722 qtest_qmp_assert_success(who, 723 "{ 'execute': 'cancel-vcpu-dirty-limit' }"); 724 } 725 726 static QDict *query_vcpu_dirty_limit(QTestState *who) 727 { 728 QDict *rsp; 729 730 rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }"); 731 g_assert(!qdict_haskey(rsp, "error")); 732 g_assert(qdict_haskey(rsp, "return")); 733 734 return rsp; 735 } 736 737 static bool calc_dirtyrate_ready(QTestState *who) 738 { 739 QDict *rsp_return; 740 const char *status; 741 bool ready; 742 743 rsp_return = query_dirty_rate(who); 744 g_assert(rsp_return); 745 746 status = qdict_get_str(rsp_return, "status"); 747 g_assert(status); 748 ready = g_strcmp0(status, "measuring"); 749 qobject_unref(rsp_return); 750 751 return ready; 752 } 753 754 static void wait_for_calc_dirtyrate_complete(QTestState *who, 755 int64_t time_s) 756 { 757 int max_try_count = 10000; 758 usleep(time_s * 1000000); 759 760 while (!calc_dirtyrate_ready(who) && max_try_count--) { 761 usleep(1000); 762 } 763 764 /* 765 * Set the timeout with 10 s(max_try_count * 1000us), 766 * if dirtyrate measurement not complete, fail test. 767 */ 768 g_assert_cmpint(max_try_count, !=, 0); 769 } 770 771 static int64_t get_dirty_rate(QTestState *who) 772 { 773 QDict *rsp_return; 774 const char *status; 775 QList *rates; 776 const QListEntry *entry; 777 QDict *rate; 778 int64_t dirtyrate; 779 780 rsp_return = query_dirty_rate(who); 781 g_assert(rsp_return); 782 783 status = qdict_get_str(rsp_return, "status"); 784 g_assert(status); 785 g_assert_cmpstr(status, ==, "measured"); 786 787 rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate"); 788 g_assert(rates && !qlist_empty(rates)); 789 790 entry = qlist_first(rates); 791 g_assert(entry); 792 793 rate = qobject_to(QDict, qlist_entry_obj(entry)); 794 g_assert(rate); 795 796 dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1); 797 798 qobject_unref(rsp_return); 799 return dirtyrate; 800 } 801 802 static int64_t get_limit_rate(QTestState *who) 803 { 804 QDict *rsp_return; 805 QList *rates; 806 const QListEntry *entry; 807 QDict *rate; 808 int64_t dirtyrate; 809 810 rsp_return = query_vcpu_dirty_limit(who); 811 g_assert(rsp_return); 812 813 rates = qdict_get_qlist(rsp_return, "return"); 814 g_assert(rates && !qlist_empty(rates)); 815 816 entry = qlist_first(rates); 817 g_assert(entry); 818 819 rate = qobject_to(QDict, qlist_entry_obj(entry)); 820 g_assert(rate); 821 822 dirtyrate = qdict_get_try_int(rate, "limit-rate", -1); 823 824 qobject_unref(rsp_return); 825 return dirtyrate; 826 } 827 828 static QTestState *dirtylimit_start_vm(void) 829 { 830 QTestState *vm = NULL; 831 g_autofree gchar *cmd = NULL; 832 const char *bootpath; 833 834 bootpath = bootfile_create(qtest_get_arch(), tmpfs, false); 835 cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " 836 "-name dirtylimit-test,debug-threads=on " 837 "-m 150M -smp 1 " 838 "-serial file:%s/vm_serial " 839 "-drive file=%s,format=raw ", 840 tmpfs, bootpath); 841 842 vm = qtest_init(cmd); 843 return vm; 844 } 845 846 static void dirtylimit_stop_vm(QTestState *vm) 847 { 848 g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, "vm_serial"); 849 850 qtest_quit(vm); 851 unlink(path); 852 } 853 854 static void test_vcpu_dirty_limit(void) 855 { 856 QTestState *vm; 857 int64_t origin_rate; 858 int64_t quota_rate; 859 int64_t rate ; 860 int max_try_count = 20; 861 int hit = 0; 862 863 /* Start vm for vcpu dirtylimit test */ 864 vm = dirtylimit_start_vm(); 865 866 /* Wait for the first serial output from the vm*/ 867 wait_for_serial("vm_serial"); 868 869 /* Do dirtyrate measurement with calc time equals 1s */ 870 calc_dirty_rate(vm, 1); 871 872 /* Sleep calc time and wait for calc dirtyrate complete */ 873 wait_for_calc_dirtyrate_complete(vm, 1); 874 875 /* Query original dirty page rate */ 876 origin_rate = get_dirty_rate(vm); 877 878 /* VM booted from bootsect should dirty memory steadily */ 879 assert(origin_rate != 0); 880 881 /* Setup quota dirty page rate at half of origin */ 882 quota_rate = origin_rate / 2; 883 884 /* Set dirtylimit */ 885 dirtylimit_set_all(vm, quota_rate); 886 887 /* 888 * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit 889 * works literally 890 */ 891 g_assert_cmpint(quota_rate, ==, get_limit_rate(vm)); 892 893 /* Sleep a bit to check if it take effect */ 894 usleep(2000000); 895 896 /* 897 * Check if dirtylimit take effect realistically, set the 898 * timeout with 20 s(max_try_count * 1s), if dirtylimit 899 * doesn't take effect, fail test. 900 */ 901 while (--max_try_count) { 902 calc_dirty_rate(vm, 1); 903 wait_for_calc_dirtyrate_complete(vm, 1); 904 rate = get_dirty_rate(vm); 905 906 /* 907 * Assume hitting if current rate is less 908 * than quota rate (within accepting error) 909 */ 910 if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 911 hit = 1; 912 break; 913 } 914 } 915 916 g_assert_cmpint(hit, ==, 1); 917 918 hit = 0; 919 max_try_count = 20; 920 921 /* Check if dirtylimit cancellation take effect */ 922 cancel_vcpu_dirty_limit(vm); 923 while (--max_try_count) { 924 calc_dirty_rate(vm, 1); 925 wait_for_calc_dirtyrate_complete(vm, 1); 926 rate = get_dirty_rate(vm); 927 928 /* 929 * Assume dirtylimit be canceled if current rate is 930 * greater than quota rate (within accepting error) 931 */ 932 if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) { 933 hit = 1; 934 break; 935 } 936 } 937 938 g_assert_cmpint(hit, ==, 1); 939 dirtylimit_stop_vm(vm); 940 } 941 942 static void migrate_dirty_limit_wait_showup(QTestState *from, 943 const int64_t period, 944 const int64_t value) 945 { 946 /* Enable dirty limit capability */ 947 migrate_set_capability(from, "dirty-limit", true); 948 949 /* Set dirty limit parameters */ 950 migrate_set_parameter_int(from, "x-vcpu-dirty-limit-period", period); 951 migrate_set_parameter_int(from, "vcpu-dirty-limit", value); 952 953 /* Make sure migrate can't converge */ 954 migrate_ensure_non_converge(from); 955 956 /* To check limit rate after precopy */ 957 migrate_set_capability(from, "pause-before-switchover", true); 958 959 /* Wait for the serial output from the source */ 960 wait_for_serial("src_serial"); 961 } 962 963 /* 964 * This test does: 965 * source destination 966 * start vm 967 * start incoming vm 968 * migrate 969 * wait dirty limit to begin 970 * cancel migrate 971 * cancellation check 972 * restart incoming vm 973 * migrate 974 * wait dirty limit to begin 975 * wait pre-switchover event 976 * convergence condition check 977 * 978 * And see if dirty limit migration works correctly. 979 * This test case involves many passes, so it runs in slow mode only. 980 */ 981 static void test_dirty_limit(void) 982 { 983 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); 984 QTestState *from, *to; 985 int64_t remaining; 986 uint64_t throttle_us_per_full; 987 /* 988 * We want the test to be stable and as fast as possible. 989 * E.g., with 1Gb/s bandwidth migration may pass without dirty limit, 990 * so we need to decrease a bandwidth. 991 */ 992 const int64_t dirtylimit_period = 1000, dirtylimit_value = 50; 993 const int64_t max_bandwidth = 400000000; /* ~400Mb/s */ 994 const int64_t downtime_limit = 250; /* 250ms */ 995 /* 996 * We migrate through unix-socket (> 500Mb/s). 997 * Thus, expected migration speed ~= bandwidth limit (< 500Mb/s). 998 * So, we can predict expected_threshold 999 */ 1000 const int64_t expected_threshold = max_bandwidth * downtime_limit / 1000; 1001 int max_try_count = 10; 1002 MigrateCommon args = { 1003 .start = { 1004 .hide_stderr = true, 1005 .use_dirty_ring = true, 1006 }, 1007 .listen_uri = uri, 1008 .connect_uri = uri, 1009 }; 1010 1011 /* Start src, dst vm */ 1012 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1013 return; 1014 } 1015 1016 /* Prepare for dirty limit migration and wait src vm show up */ 1017 migrate_dirty_limit_wait_showup(from, dirtylimit_period, dirtylimit_value); 1018 1019 /* Start migrate */ 1020 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1021 1022 /* Wait for dirty limit throttle begin */ 1023 throttle_us_per_full = 0; 1024 while (throttle_us_per_full == 0) { 1025 throttle_us_per_full = 1026 read_migrate_property_int(from, 1027 "dirty-limit-throttle-time-per-round"); 1028 usleep(100); 1029 g_assert_false(get_src()->stop_seen); 1030 } 1031 1032 /* Now cancel migrate and wait for dirty limit throttle switch off */ 1033 migrate_cancel(from); 1034 wait_for_migration_status(from, "cancelled", NULL); 1035 1036 /* destination always fails after cancel */ 1037 migration_event_wait(to, "failed"); 1038 qtest_set_expected_status(to, EXIT_FAILURE); 1039 qtest_quit(to); 1040 1041 /* Check if dirty limit throttle switched off, set timeout 1ms */ 1042 do { 1043 throttle_us_per_full = 1044 read_migrate_property_int(from, 1045 "dirty-limit-throttle-time-per-round"); 1046 usleep(100); 1047 g_assert_false(get_src()->stop_seen); 1048 } while (throttle_us_per_full != 0 && --max_try_count); 1049 1050 /* Assert dirty limit is not in service */ 1051 g_assert_cmpint(throttle_us_per_full, ==, 0); 1052 1053 args = (MigrateCommon) { 1054 .start = { 1055 .only_target = true, 1056 .use_dirty_ring = true, 1057 }, 1058 .listen_uri = uri, 1059 .connect_uri = uri, 1060 }; 1061 1062 /* Restart dst vm, src vm already show up so we needn't wait anymore */ 1063 if (migrate_start(&from, &to, args.listen_uri, &args.start)) { 1064 return; 1065 } 1066 1067 /* Start migrate */ 1068 migrate_qmp(from, to, args.connect_uri, NULL, "{}"); 1069 1070 /* Wait for dirty limit throttle begin */ 1071 throttle_us_per_full = 0; 1072 while (throttle_us_per_full == 0) { 1073 throttle_us_per_full = 1074 read_migrate_property_int(from, 1075 "dirty-limit-throttle-time-per-round"); 1076 usleep(100); 1077 g_assert_false(get_src()->stop_seen); 1078 } 1079 1080 /* 1081 * The dirty limit rate should equals the return value of 1082 * query-vcpu-dirty-limit if dirty limit cap set 1083 */ 1084 g_assert_cmpint(dirtylimit_value, ==, get_limit_rate(from)); 1085 1086 /* Now, we have tested if dirty limit works, let it converge */ 1087 migrate_set_parameter_int(from, "downtime-limit", downtime_limit); 1088 migrate_set_parameter_int(from, "max-bandwidth", max_bandwidth); 1089 1090 /* 1091 * Wait for pre-switchover status to check if migration 1092 * satisfy the convergence condition 1093 */ 1094 wait_for_migration_status(from, "pre-switchover", NULL); 1095 1096 remaining = read_ram_property_int(from, "remaining"); 1097 g_assert_cmpint(remaining, <, 1098 (expected_threshold + expected_threshold / 100)); 1099 1100 migrate_continue(from, "pre-switchover"); 1101 1102 qtest_qmp_eventwait(to, "RESUME"); 1103 1104 wait_for_serial("dest_serial"); 1105 wait_for_migration_complete(from); 1106 1107 migrate_end(from, to, true); 1108 } 1109 1110 static void migration_test_add_precopy_smoke(MigrationTestEnv *env) 1111 { 1112 if (env->is_x86) { 1113 migration_test_add("/migration/precopy/unix/suspend/live", 1114 test_precopy_unix_suspend_live); 1115 migration_test_add("/migration/precopy/unix/suspend/notlive", 1116 test_precopy_unix_suspend_notlive); 1117 } 1118 1119 migration_test_add("/migration/precopy/unix/plain", 1120 test_precopy_unix_plain); 1121 1122 migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain); 1123 migration_test_add("/migration/multifd/tcp/uri/plain/none", 1124 test_multifd_tcp_uri_none); 1125 migration_test_add("/migration/multifd/tcp/plain/cancel", 1126 test_multifd_tcp_cancel); 1127 } 1128 1129 void migration_test_add_precopy(MigrationTestEnv *env) 1130 { 1131 tmpfs = env->tmpfs; 1132 1133 migration_test_add_precopy_smoke(env); 1134 1135 if (!env->full_set) { 1136 return; 1137 } 1138 1139 migration_test_add("/migration/precopy/tcp/plain/switchover-ack", 1140 test_precopy_tcp_switchover_ack); 1141 1142 #ifndef _WIN32 1143 migration_test_add("/migration/precopy/fd/tcp", 1144 test_precopy_fd_socket); 1145 migration_test_add("/migration/precopy/fd/file", 1146 test_precopy_fd_file); 1147 #endif 1148 1149 /* 1150 * See explanation why this test is slow on function definition 1151 */ 1152 if (g_test_slow()) { 1153 migration_test_add("/migration/auto_converge", 1154 test_auto_converge); 1155 if (g_str_equal(env->arch, "x86_64") && 1156 env->has_kvm && env->has_dirty_ring) { 1157 migration_test_add("/dirty_limit", 1158 test_dirty_limit); 1159 } 1160 } 1161 migration_test_add("/migration/multifd/tcp/channels/plain/none", 1162 test_multifd_tcp_channels_none); 1163 migration_test_add("/migration/multifd/tcp/plain/zero-page/legacy", 1164 test_multifd_tcp_zero_page_legacy); 1165 migration_test_add("/migration/multifd/tcp/plain/zero-page/none", 1166 test_multifd_tcp_no_zero_page); 1167 if (g_str_equal(env->arch, "x86_64") 1168 && env->has_kvm && env->has_dirty_ring) { 1169 1170 migration_test_add("/migration/dirty_ring", 1171 test_precopy_unix_dirty_ring); 1172 if (qtest_has_machine("pc") && g_test_slow()) { 1173 migration_test_add("/migration/vcpu_dirty_limit", 1174 test_vcpu_dirty_limit); 1175 } 1176 } 1177 1178 /* ensure new status don't go unnoticed */ 1179 assert(MIGRATION_STATUS__MAX == 15); 1180 1181 for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) { 1182 switch (i) { 1183 case MIGRATION_STATUS_DEVICE: /* happens too fast */ 1184 case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */ 1185 case MIGRATION_STATUS_COLO: /* no support in tests */ 1186 case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */ 1187 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1188 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: 1189 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1190 continue; 1191 default: 1192 migration_test_add_suffix("/migration/cancel/src/after/", 1193 MigrationStatus_str(i), 1194 test_cancel_src_after_status); 1195 } 1196 } 1197 } 1198