1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include <stdint.h> 25 #include <stdarg.h> 26 #include <stdlib.h> 27 #ifndef _WIN32 28 #include <sys/types.h> 29 #include <sys/mman.h> 30 #endif 31 #include "config.h" 32 #include "monitor/monitor.h" 33 #include "sysemu/sysemu.h" 34 #include "qemu/bitops.h" 35 #include "qemu/bitmap.h" 36 #include "sysemu/arch_init.h" 37 #include "audio/audio.h" 38 #include "hw/i386/pc.h" 39 #include "hw/pci/pci.h" 40 #include "hw/audio/audio.h" 41 #include "sysemu/kvm.h" 42 #include "migration/migration.h" 43 #include "hw/i386/smbios.h" 44 #include "exec/address-spaces.h" 45 #include "hw/audio/pcspk.h" 46 #include "migration/page_cache.h" 47 #include "qemu/config-file.h" 48 #include "qmp-commands.h" 49 #include "trace.h" 50 #include "exec/cpu-all.h" 51 #include "hw/acpi/acpi.h" 52 53 #ifdef DEBUG_ARCH_INIT 54 #define DPRINTF(fmt, ...) \ 55 do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) 56 #else 57 #define DPRINTF(fmt, ...) \ 58 do { } while (0) 59 #endif 60 61 #ifdef TARGET_SPARC 62 int graphic_width = 1024; 63 int graphic_height = 768; 64 int graphic_depth = 8; 65 #else 66 int graphic_width = 800; 67 int graphic_height = 600; 68 int graphic_depth = 32; 69 #endif 70 71 72 #if defined(TARGET_ALPHA) 73 #define QEMU_ARCH QEMU_ARCH_ALPHA 74 #elif defined(TARGET_ARM) 75 #define QEMU_ARCH QEMU_ARCH_ARM 76 #elif defined(TARGET_CRIS) 77 #define QEMU_ARCH QEMU_ARCH_CRIS 78 #elif defined(TARGET_I386) 79 #define QEMU_ARCH QEMU_ARCH_I386 80 #elif defined(TARGET_M68K) 81 #define QEMU_ARCH QEMU_ARCH_M68K 82 #elif defined(TARGET_LM32) 83 #define QEMU_ARCH QEMU_ARCH_LM32 84 #elif defined(TARGET_MICROBLAZE) 85 #define QEMU_ARCH QEMU_ARCH_MICROBLAZE 86 #elif defined(TARGET_MIPS) 87 #define QEMU_ARCH QEMU_ARCH_MIPS 88 #elif defined(TARGET_MOXIE) 89 #define QEMU_ARCH QEMU_ARCH_MOXIE 90 #elif defined(TARGET_OPENRISC) 91 #define QEMU_ARCH QEMU_ARCH_OPENRISC 92 #elif defined(TARGET_PPC) 93 #define QEMU_ARCH QEMU_ARCH_PPC 94 #elif defined(TARGET_S390X) 95 #define QEMU_ARCH QEMU_ARCH_S390X 96 #elif defined(TARGET_SH4) 97 #define QEMU_ARCH QEMU_ARCH_SH4 98 #elif defined(TARGET_SPARC) 99 #define QEMU_ARCH QEMU_ARCH_SPARC 100 #elif defined(TARGET_XTENSA) 101 #define QEMU_ARCH QEMU_ARCH_XTENSA 102 #elif defined(TARGET_UNICORE32) 103 #define QEMU_ARCH QEMU_ARCH_UNICORE32 104 #endif 105 106 const uint32_t arch_type = QEMU_ARCH; 107 static bool mig_throttle_on; 108 static int dirty_rate_high_cnt; 109 static void check_guest_throttling(void); 110 111 /***********************************************************/ 112 /* ram save/restore */ 113 114 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 115 #define RAM_SAVE_FLAG_COMPRESS 0x02 116 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 117 #define RAM_SAVE_FLAG_PAGE 0x08 118 #define RAM_SAVE_FLAG_EOS 0x10 119 #define RAM_SAVE_FLAG_CONTINUE 0x20 120 #define RAM_SAVE_FLAG_XBZRLE 0x40 121 /* 0x80 is reserved in migration.h start with 0x100 next */ 122 123 124 static struct defconfig_file { 125 const char *filename; 126 /* Indicates it is an user config file (disabled by -no-user-config) */ 127 bool userconfig; 128 } default_config_files[] = { 129 { CONFIG_QEMU_CONFDIR "/qemu.conf", true }, 130 { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true }, 131 { NULL }, /* end of list */ 132 }; 133 134 135 int qemu_read_default_config_files(bool userconfig) 136 { 137 int ret; 138 struct defconfig_file *f; 139 140 for (f = default_config_files; f->filename; f++) { 141 if (!userconfig && f->userconfig) { 142 continue; 143 } 144 ret = qemu_read_config_file(f->filename); 145 if (ret < 0 && ret != -ENOENT) { 146 return ret; 147 } 148 } 149 150 return 0; 151 } 152 153 static inline bool is_zero_page(uint8_t *p) 154 { 155 return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) == 156 TARGET_PAGE_SIZE; 157 } 158 159 /* struct contains XBZRLE cache and a static page 160 used by the compression */ 161 static struct { 162 /* buffer used for XBZRLE encoding */ 163 uint8_t *encoded_buf; 164 /* buffer for storing page content */ 165 uint8_t *current_buf; 166 /* buffer used for XBZRLE decoding */ 167 uint8_t *decoded_buf; 168 /* Cache for XBZRLE */ 169 PageCache *cache; 170 } XBZRLE = { 171 .encoded_buf = NULL, 172 .current_buf = NULL, 173 .decoded_buf = NULL, 174 .cache = NULL, 175 }; 176 177 178 int64_t xbzrle_cache_resize(int64_t new_size) 179 { 180 if (XBZRLE.cache != NULL) { 181 return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * 182 TARGET_PAGE_SIZE; 183 } 184 return pow2floor(new_size); 185 } 186 187 /* accounting for migration statistics */ 188 typedef struct AccountingInfo { 189 uint64_t dup_pages; 190 uint64_t skipped_pages; 191 uint64_t norm_pages; 192 uint64_t iterations; 193 uint64_t xbzrle_bytes; 194 uint64_t xbzrle_pages; 195 uint64_t xbzrle_cache_miss; 196 uint64_t xbzrle_overflows; 197 } AccountingInfo; 198 199 static AccountingInfo acct_info; 200 201 static void acct_clear(void) 202 { 203 memset(&acct_info, 0, sizeof(acct_info)); 204 } 205 206 uint64_t dup_mig_bytes_transferred(void) 207 { 208 return acct_info.dup_pages * TARGET_PAGE_SIZE; 209 } 210 211 uint64_t dup_mig_pages_transferred(void) 212 { 213 return acct_info.dup_pages; 214 } 215 216 uint64_t skipped_mig_bytes_transferred(void) 217 { 218 return acct_info.skipped_pages * TARGET_PAGE_SIZE; 219 } 220 221 uint64_t skipped_mig_pages_transferred(void) 222 { 223 return acct_info.skipped_pages; 224 } 225 226 uint64_t norm_mig_bytes_transferred(void) 227 { 228 return acct_info.norm_pages * TARGET_PAGE_SIZE; 229 } 230 231 uint64_t norm_mig_pages_transferred(void) 232 { 233 return acct_info.norm_pages; 234 } 235 236 uint64_t xbzrle_mig_bytes_transferred(void) 237 { 238 return acct_info.xbzrle_bytes; 239 } 240 241 uint64_t xbzrle_mig_pages_transferred(void) 242 { 243 return acct_info.xbzrle_pages; 244 } 245 246 uint64_t xbzrle_mig_pages_cache_miss(void) 247 { 248 return acct_info.xbzrle_cache_miss; 249 } 250 251 uint64_t xbzrle_mig_pages_overflow(void) 252 { 253 return acct_info.xbzrle_overflows; 254 } 255 256 static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, 257 int cont, int flag) 258 { 259 size_t size; 260 261 qemu_put_be64(f, offset | cont | flag); 262 size = 8; 263 264 if (!cont) { 265 qemu_put_byte(f, strlen(block->idstr)); 266 qemu_put_buffer(f, (uint8_t *)block->idstr, 267 strlen(block->idstr)); 268 size += 1 + strlen(block->idstr); 269 } 270 return size; 271 } 272 273 #define ENCODING_FLAG_XBZRLE 0x1 274 275 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, 276 ram_addr_t current_addr, RAMBlock *block, 277 ram_addr_t offset, int cont, bool last_stage) 278 { 279 int encoded_len = 0, bytes_sent = -1; 280 uint8_t *prev_cached_page; 281 282 if (!cache_is_cached(XBZRLE.cache, current_addr)) { 283 if (!last_stage) { 284 cache_insert(XBZRLE.cache, current_addr, current_data); 285 } 286 acct_info.xbzrle_cache_miss++; 287 return -1; 288 } 289 290 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 291 292 /* save current buffer into memory */ 293 memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); 294 295 /* XBZRLE encoding (if there is no overflow) */ 296 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 297 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 298 TARGET_PAGE_SIZE); 299 if (encoded_len == 0) { 300 DPRINTF("Skipping unmodified page\n"); 301 return 0; 302 } else if (encoded_len == -1) { 303 DPRINTF("Overflow\n"); 304 acct_info.xbzrle_overflows++; 305 /* update data in the cache */ 306 memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); 307 return -1; 308 } 309 310 /* we need to update the data in the cache, in order to get the same data */ 311 if (!last_stage) { 312 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 313 } 314 315 /* Send XBZRLE based compressed page */ 316 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); 317 qemu_put_byte(f, ENCODING_FLAG_XBZRLE); 318 qemu_put_be16(f, encoded_len); 319 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); 320 bytes_sent += encoded_len + 1 + 2; 321 acct_info.xbzrle_pages++; 322 acct_info.xbzrle_bytes += bytes_sent; 323 324 return bytes_sent; 325 } 326 327 328 /* This is the last block that we have visited serching for dirty pages 329 */ 330 static RAMBlock *last_seen_block; 331 /* This is the last block from where we have sent data */ 332 static RAMBlock *last_sent_block; 333 static ram_addr_t last_offset; 334 static unsigned long *migration_bitmap; 335 static uint64_t migration_dirty_pages; 336 static uint32_t last_version; 337 static bool ram_bulk_stage; 338 339 static inline 340 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, 341 ram_addr_t start) 342 { 343 unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; 344 unsigned long nr = base + (start >> TARGET_PAGE_BITS); 345 uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); 346 unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); 347 348 unsigned long next; 349 350 if (ram_bulk_stage && nr > base) { 351 next = nr + 1; 352 } else { 353 next = find_next_bit(migration_bitmap, size, nr); 354 } 355 356 if (next < size) { 357 clear_bit(next, migration_bitmap); 358 migration_dirty_pages--; 359 } 360 return (next - base) << TARGET_PAGE_BITS; 361 } 362 363 static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, 364 ram_addr_t offset) 365 { 366 bool ret; 367 int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; 368 369 ret = test_and_set_bit(nr, migration_bitmap); 370 371 if (!ret) { 372 migration_dirty_pages++; 373 } 374 return ret; 375 } 376 377 /* Needs iothread lock! */ 378 379 static void migration_bitmap_sync(void) 380 { 381 RAMBlock *block; 382 ram_addr_t addr; 383 uint64_t num_dirty_pages_init = migration_dirty_pages; 384 MigrationState *s = migrate_get_current(); 385 static int64_t start_time; 386 static int64_t bytes_xfer_prev; 387 static int64_t num_dirty_pages_period; 388 int64_t end_time; 389 int64_t bytes_xfer_now; 390 391 if (!bytes_xfer_prev) { 392 bytes_xfer_prev = ram_bytes_transferred(); 393 } 394 395 if (!start_time) { 396 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 397 } 398 399 trace_migration_bitmap_sync_start(); 400 address_space_sync_dirty_bitmap(&address_space_memory); 401 402 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 403 for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { 404 if (memory_region_test_and_clear_dirty(block->mr, 405 addr, TARGET_PAGE_SIZE, 406 DIRTY_MEMORY_MIGRATION)) { 407 migration_bitmap_set_dirty(block->mr, addr); 408 } 409 } 410 } 411 trace_migration_bitmap_sync_end(migration_dirty_pages 412 - num_dirty_pages_init); 413 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; 414 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 415 416 /* more than 1 second = 1000 millisecons */ 417 if (end_time > start_time + 1000) { 418 if (migrate_auto_converge()) { 419 /* The following detection logic can be refined later. For now: 420 Check to see if the dirtied bytes is 50% more than the approx. 421 amount of bytes that just got transferred since the last time we 422 were in this routine. If that happens >N times (for now N==4) 423 we turn on the throttle down logic */ 424 bytes_xfer_now = ram_bytes_transferred(); 425 if (s->dirty_pages_rate && 426 (num_dirty_pages_period * TARGET_PAGE_SIZE > 427 (bytes_xfer_now - bytes_xfer_prev)/2) && 428 (dirty_rate_high_cnt++ > 4)) { 429 trace_migration_throttle(); 430 mig_throttle_on = true; 431 dirty_rate_high_cnt = 0; 432 } 433 bytes_xfer_prev = bytes_xfer_now; 434 } else { 435 mig_throttle_on = false; 436 } 437 s->dirty_pages_rate = num_dirty_pages_period * 1000 438 / (end_time - start_time); 439 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; 440 start_time = end_time; 441 num_dirty_pages_period = 0; 442 } 443 } 444 445 /* 446 * ram_save_block: Writes a page of memory to the stream f 447 * 448 * Returns: The number of bytes written. 449 * 0 means no dirty pages 450 */ 451 452 static int ram_save_block(QEMUFile *f, bool last_stage) 453 { 454 RAMBlock *block = last_seen_block; 455 ram_addr_t offset = last_offset; 456 bool complete_round = false; 457 int bytes_sent = 0; 458 MemoryRegion *mr; 459 ram_addr_t current_addr; 460 461 if (!block) 462 block = QTAILQ_FIRST(&ram_list.blocks); 463 464 while (true) { 465 mr = block->mr; 466 offset = migration_bitmap_find_and_reset_dirty(mr, offset); 467 if (complete_round && block == last_seen_block && 468 offset >= last_offset) { 469 break; 470 } 471 if (offset >= block->length) { 472 offset = 0; 473 block = QTAILQ_NEXT(block, next); 474 if (!block) { 475 block = QTAILQ_FIRST(&ram_list.blocks); 476 complete_round = true; 477 ram_bulk_stage = false; 478 } 479 } else { 480 int ret; 481 uint8_t *p; 482 int cont = (block == last_sent_block) ? 483 RAM_SAVE_FLAG_CONTINUE : 0; 484 485 p = memory_region_get_ram_ptr(mr) + offset; 486 487 /* In doubt sent page as normal */ 488 bytes_sent = -1; 489 ret = ram_control_save_page(f, block->offset, 490 offset, TARGET_PAGE_SIZE, &bytes_sent); 491 492 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 493 if (ret != RAM_SAVE_CONTROL_DELAYED) { 494 if (bytes_sent > 0) { 495 acct_info.norm_pages++; 496 } else if (bytes_sent == 0) { 497 acct_info.dup_pages++; 498 } 499 } 500 } else if (is_zero_page(p)) { 501 acct_info.dup_pages++; 502 bytes_sent = save_block_hdr(f, block, offset, cont, 503 RAM_SAVE_FLAG_COMPRESS); 504 qemu_put_byte(f, 0); 505 bytes_sent++; 506 } else if (!ram_bulk_stage && migrate_use_xbzrle()) { 507 current_addr = block->offset + offset; 508 bytes_sent = save_xbzrle_page(f, p, current_addr, block, 509 offset, cont, last_stage); 510 if (!last_stage) { 511 p = get_cached_data(XBZRLE.cache, current_addr); 512 } 513 } 514 515 /* XBZRLE overflow or normal page */ 516 if (bytes_sent == -1) { 517 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); 518 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); 519 bytes_sent += TARGET_PAGE_SIZE; 520 acct_info.norm_pages++; 521 } 522 523 /* if page is unmodified, continue to the next */ 524 if (bytes_sent > 0) { 525 last_sent_block = block; 526 break; 527 } 528 } 529 } 530 last_seen_block = block; 531 last_offset = offset; 532 533 return bytes_sent; 534 } 535 536 static uint64_t bytes_transferred; 537 538 void acct_update_position(QEMUFile *f, size_t size, bool zero) 539 { 540 uint64_t pages = size / TARGET_PAGE_SIZE; 541 if (zero) { 542 acct_info.dup_pages += pages; 543 } else { 544 acct_info.norm_pages += pages; 545 bytes_transferred += size; 546 qemu_update_position(f, size); 547 } 548 } 549 550 static ram_addr_t ram_save_remaining(void) 551 { 552 return migration_dirty_pages; 553 } 554 555 uint64_t ram_bytes_remaining(void) 556 { 557 return ram_save_remaining() * TARGET_PAGE_SIZE; 558 } 559 560 uint64_t ram_bytes_transferred(void) 561 { 562 return bytes_transferred; 563 } 564 565 uint64_t ram_bytes_total(void) 566 { 567 RAMBlock *block; 568 uint64_t total = 0; 569 570 QTAILQ_FOREACH(block, &ram_list.blocks, next) 571 total += block->length; 572 573 return total; 574 } 575 576 static void migration_end(void) 577 { 578 if (migration_bitmap) { 579 memory_global_dirty_log_stop(); 580 g_free(migration_bitmap); 581 migration_bitmap = NULL; 582 } 583 584 if (XBZRLE.cache) { 585 cache_fini(XBZRLE.cache); 586 g_free(XBZRLE.cache); 587 g_free(XBZRLE.encoded_buf); 588 g_free(XBZRLE.current_buf); 589 g_free(XBZRLE.decoded_buf); 590 XBZRLE.cache = NULL; 591 } 592 } 593 594 static void ram_migration_cancel(void *opaque) 595 { 596 migration_end(); 597 } 598 599 static void reset_ram_globals(void) 600 { 601 last_seen_block = NULL; 602 last_sent_block = NULL; 603 last_offset = 0; 604 last_version = ram_list.version; 605 ram_bulk_stage = true; 606 } 607 608 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 609 610 static int ram_save_setup(QEMUFile *f, void *opaque) 611 { 612 RAMBlock *block; 613 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; 614 615 migration_bitmap = bitmap_new(ram_pages); 616 bitmap_set(migration_bitmap, 0, ram_pages); 617 migration_dirty_pages = ram_pages; 618 mig_throttle_on = false; 619 dirty_rate_high_cnt = 0; 620 621 if (migrate_use_xbzrle()) { 622 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / 623 TARGET_PAGE_SIZE, 624 TARGET_PAGE_SIZE); 625 if (!XBZRLE.cache) { 626 DPRINTF("Error creating cache\n"); 627 return -1; 628 } 629 XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); 630 XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); 631 acct_clear(); 632 } 633 634 qemu_mutex_lock_iothread(); 635 qemu_mutex_lock_ramlist(); 636 bytes_transferred = 0; 637 reset_ram_globals(); 638 639 memory_global_dirty_log_start(); 640 migration_bitmap_sync(); 641 qemu_mutex_unlock_iothread(); 642 643 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 644 645 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 646 qemu_put_byte(f, strlen(block->idstr)); 647 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 648 qemu_put_be64(f, block->length); 649 } 650 651 qemu_mutex_unlock_ramlist(); 652 653 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 654 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 655 656 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 657 658 return 0; 659 } 660 661 static int ram_save_iterate(QEMUFile *f, void *opaque) 662 { 663 int ret; 664 int i; 665 int64_t t0; 666 int total_sent = 0; 667 668 qemu_mutex_lock_ramlist(); 669 670 if (ram_list.version != last_version) { 671 reset_ram_globals(); 672 } 673 674 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 675 676 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 677 i = 0; 678 while ((ret = qemu_file_rate_limit(f)) == 0) { 679 int bytes_sent; 680 681 bytes_sent = ram_save_block(f, false); 682 /* no more blocks to sent */ 683 if (bytes_sent == 0) { 684 break; 685 } 686 total_sent += bytes_sent; 687 acct_info.iterations++; 688 check_guest_throttling(); 689 /* we want to check in the 1st loop, just in case it was the 1st time 690 and we had to sync the dirty bitmap. 691 qemu_get_clock_ns() is a bit expensive, so we only check each some 692 iterations 693 */ 694 if ((i & 63) == 0) { 695 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 696 if (t1 > MAX_WAIT) { 697 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", 698 t1, i); 699 break; 700 } 701 } 702 i++; 703 } 704 705 qemu_mutex_unlock_ramlist(); 706 707 /* 708 * Must occur before EOS (or any QEMUFile operation) 709 * because of RDMA protocol. 710 */ 711 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 712 713 bytes_transferred += total_sent; 714 715 /* 716 * Do not count these 8 bytes into total_sent, so that we can 717 * return 0 if no page had been dirtied. 718 */ 719 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 720 bytes_transferred += 8; 721 722 ret = qemu_file_get_error(f); 723 if (ret < 0) { 724 return ret; 725 } 726 727 return total_sent; 728 } 729 730 static int ram_save_complete(QEMUFile *f, void *opaque) 731 { 732 qemu_mutex_lock_ramlist(); 733 migration_bitmap_sync(); 734 735 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 736 737 /* try transferring iterative blocks of memory */ 738 739 /* flush all remaining blocks regardless of rate limiting */ 740 while (true) { 741 int bytes_sent; 742 743 bytes_sent = ram_save_block(f, true); 744 /* no more blocks to sent */ 745 if (bytes_sent == 0) { 746 break; 747 } 748 bytes_transferred += bytes_sent; 749 } 750 751 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 752 migration_end(); 753 754 qemu_mutex_unlock_ramlist(); 755 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 756 757 return 0; 758 } 759 760 static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) 761 { 762 uint64_t remaining_size; 763 764 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 765 766 if (remaining_size < max_size) { 767 qemu_mutex_lock_iothread(); 768 migration_bitmap_sync(); 769 qemu_mutex_unlock_iothread(); 770 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 771 } 772 return remaining_size; 773 } 774 775 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 776 { 777 int ret, rc = 0; 778 unsigned int xh_len; 779 int xh_flags; 780 781 if (!XBZRLE.decoded_buf) { 782 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); 783 } 784 785 /* extract RLE header */ 786 xh_flags = qemu_get_byte(f); 787 xh_len = qemu_get_be16(f); 788 789 if (xh_flags != ENCODING_FLAG_XBZRLE) { 790 fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); 791 return -1; 792 } 793 794 if (xh_len > TARGET_PAGE_SIZE) { 795 fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); 796 return -1; 797 } 798 /* load data and decode */ 799 qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); 800 801 /* decode RLE */ 802 ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, 803 TARGET_PAGE_SIZE); 804 if (ret == -1) { 805 fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); 806 rc = -1; 807 } else if (ret > TARGET_PAGE_SIZE) { 808 fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", 809 ret, TARGET_PAGE_SIZE); 810 abort(); 811 } 812 813 return rc; 814 } 815 816 static inline void *host_from_stream_offset(QEMUFile *f, 817 ram_addr_t offset, 818 int flags) 819 { 820 static RAMBlock *block = NULL; 821 char id[256]; 822 uint8_t len; 823 824 if (flags & RAM_SAVE_FLAG_CONTINUE) { 825 if (!block) { 826 fprintf(stderr, "Ack, bad migration stream!\n"); 827 return NULL; 828 } 829 830 return memory_region_get_ram_ptr(block->mr) + offset; 831 } 832 833 len = qemu_get_byte(f); 834 qemu_get_buffer(f, (uint8_t *)id, len); 835 id[len] = 0; 836 837 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 838 if (!strncmp(id, block->idstr, sizeof(id))) 839 return memory_region_get_ram_ptr(block->mr) + offset; 840 } 841 842 fprintf(stderr, "Can't find block %s!\n", id); 843 return NULL; 844 } 845 846 /* 847 * If a page (or a whole RDMA chunk) has been 848 * determined to be zero, then zap it. 849 */ 850 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 851 { 852 if (ch != 0 || !is_zero_page(host)) { 853 memset(host, ch, size); 854 #ifndef _WIN32 855 if (ch == 0 && 856 (!kvm_enabled() || kvm_has_sync_mmu()) && 857 getpagesize() <= TARGET_PAGE_SIZE) { 858 qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); 859 } 860 #endif 861 } 862 } 863 864 static int ram_load(QEMUFile *f, void *opaque, int version_id) 865 { 866 ram_addr_t addr; 867 int flags, ret = 0; 868 int error; 869 static uint64_t seq_iter; 870 871 seq_iter++; 872 873 if (version_id < 4 || version_id > 4) { 874 return -EINVAL; 875 } 876 877 do { 878 addr = qemu_get_be64(f); 879 880 flags = addr & ~TARGET_PAGE_MASK; 881 addr &= TARGET_PAGE_MASK; 882 883 if (flags & RAM_SAVE_FLAG_MEM_SIZE) { 884 if (version_id == 4) { 885 /* Synchronize RAM block list */ 886 char id[256]; 887 ram_addr_t length; 888 ram_addr_t total_ram_bytes = addr; 889 890 while (total_ram_bytes) { 891 RAMBlock *block; 892 uint8_t len; 893 894 len = qemu_get_byte(f); 895 qemu_get_buffer(f, (uint8_t *)id, len); 896 id[len] = 0; 897 length = qemu_get_be64(f); 898 899 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 900 if (!strncmp(id, block->idstr, sizeof(id))) { 901 if (block->length != length) { 902 fprintf(stderr, 903 "Length mismatch: %s: " RAM_ADDR_FMT 904 " in != " RAM_ADDR_FMT "\n", id, length, 905 block->length); 906 ret = -EINVAL; 907 goto done; 908 } 909 break; 910 } 911 } 912 913 if (!block) { 914 fprintf(stderr, "Unknown ramblock \"%s\", cannot " 915 "accept migration\n", id); 916 ret = -EINVAL; 917 goto done; 918 } 919 920 total_ram_bytes -= length; 921 } 922 } 923 } 924 925 if (flags & RAM_SAVE_FLAG_COMPRESS) { 926 void *host; 927 uint8_t ch; 928 929 host = host_from_stream_offset(f, addr, flags); 930 if (!host) { 931 return -EINVAL; 932 } 933 934 ch = qemu_get_byte(f); 935 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 936 } else if (flags & RAM_SAVE_FLAG_PAGE) { 937 void *host; 938 939 host = host_from_stream_offset(f, addr, flags); 940 if (!host) { 941 return -EINVAL; 942 } 943 944 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 945 } else if (flags & RAM_SAVE_FLAG_XBZRLE) { 946 void *host = host_from_stream_offset(f, addr, flags); 947 if (!host) { 948 return -EINVAL; 949 } 950 951 if (load_xbzrle(f, addr, host) < 0) { 952 ret = -EINVAL; 953 goto done; 954 } 955 } else if (flags & RAM_SAVE_FLAG_HOOK) { 956 ram_control_load_hook(f, flags); 957 } 958 error = qemu_file_get_error(f); 959 if (error) { 960 ret = error; 961 goto done; 962 } 963 } while (!(flags & RAM_SAVE_FLAG_EOS)); 964 965 done: 966 DPRINTF("Completed load of VM with exit code %d seq iteration " 967 "%" PRIu64 "\n", ret, seq_iter); 968 return ret; 969 } 970 971 SaveVMHandlers savevm_ram_handlers = { 972 .save_live_setup = ram_save_setup, 973 .save_live_iterate = ram_save_iterate, 974 .save_live_complete = ram_save_complete, 975 .save_live_pending = ram_save_pending, 976 .load_state = ram_load, 977 .cancel = ram_migration_cancel, 978 }; 979 980 struct soundhw { 981 const char *name; 982 const char *descr; 983 int enabled; 984 int isa; 985 union { 986 int (*init_isa) (ISABus *bus); 987 int (*init_pci) (PCIBus *bus); 988 } init; 989 }; 990 991 static struct soundhw soundhw[9]; 992 static int soundhw_count; 993 994 void isa_register_soundhw(const char *name, const char *descr, 995 int (*init_isa)(ISABus *bus)) 996 { 997 assert(soundhw_count < ARRAY_SIZE(soundhw) - 1); 998 soundhw[soundhw_count].name = name; 999 soundhw[soundhw_count].descr = descr; 1000 soundhw[soundhw_count].isa = 1; 1001 soundhw[soundhw_count].init.init_isa = init_isa; 1002 soundhw_count++; 1003 } 1004 1005 void pci_register_soundhw(const char *name, const char *descr, 1006 int (*init_pci)(PCIBus *bus)) 1007 { 1008 assert(soundhw_count < ARRAY_SIZE(soundhw) - 1); 1009 soundhw[soundhw_count].name = name; 1010 soundhw[soundhw_count].descr = descr; 1011 soundhw[soundhw_count].isa = 0; 1012 soundhw[soundhw_count].init.init_pci = init_pci; 1013 soundhw_count++; 1014 } 1015 1016 void select_soundhw(const char *optarg) 1017 { 1018 struct soundhw *c; 1019 1020 if (is_help_option(optarg)) { 1021 show_valid_cards: 1022 1023 if (soundhw_count) { 1024 printf("Valid sound card names (comma separated):\n"); 1025 for (c = soundhw; c->name; ++c) { 1026 printf ("%-11s %s\n", c->name, c->descr); 1027 } 1028 printf("\n-soundhw all will enable all of the above\n"); 1029 } else { 1030 printf("Machine has no user-selectable audio hardware " 1031 "(it may or may not have always-present audio hardware).\n"); 1032 } 1033 exit(!is_help_option(optarg)); 1034 } 1035 else { 1036 size_t l; 1037 const char *p; 1038 char *e; 1039 int bad_card = 0; 1040 1041 if (!strcmp(optarg, "all")) { 1042 for (c = soundhw; c->name; ++c) { 1043 c->enabled = 1; 1044 } 1045 return; 1046 } 1047 1048 p = optarg; 1049 while (*p) { 1050 e = strchr(p, ','); 1051 l = !e ? strlen(p) : (size_t) (e - p); 1052 1053 for (c = soundhw; c->name; ++c) { 1054 if (!strncmp(c->name, p, l) && !c->name[l]) { 1055 c->enabled = 1; 1056 break; 1057 } 1058 } 1059 1060 if (!c->name) { 1061 if (l > 80) { 1062 fprintf(stderr, 1063 "Unknown sound card name (too big to show)\n"); 1064 } 1065 else { 1066 fprintf(stderr, "Unknown sound card name `%.*s'\n", 1067 (int) l, p); 1068 } 1069 bad_card = 1; 1070 } 1071 p += l + (e != NULL); 1072 } 1073 1074 if (bad_card) { 1075 goto show_valid_cards; 1076 } 1077 } 1078 } 1079 1080 void audio_init(void) 1081 { 1082 struct soundhw *c; 1083 ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL); 1084 PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL); 1085 1086 for (c = soundhw; c->name; ++c) { 1087 if (c->enabled) { 1088 if (c->isa) { 1089 if (!isa_bus) { 1090 fprintf(stderr, "ISA bus not available for %s\n", c->name); 1091 exit(1); 1092 } 1093 c->init.init_isa(isa_bus); 1094 } else { 1095 if (!pci_bus) { 1096 fprintf(stderr, "PCI bus not available for %s\n", c->name); 1097 exit(1); 1098 } 1099 c->init.init_pci(pci_bus); 1100 } 1101 } 1102 } 1103 } 1104 1105 int qemu_uuid_parse(const char *str, uint8_t *uuid) 1106 { 1107 int ret; 1108 1109 if (strlen(str) != 36) { 1110 return -1; 1111 } 1112 1113 ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3], 1114 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9], 1115 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14], 1116 &uuid[15]); 1117 1118 if (ret != 16) { 1119 return -1; 1120 } 1121 #ifdef TARGET_I386 1122 smbios_add_field(1, offsetof(struct smbios_type_1, uuid), uuid, 16); 1123 #endif 1124 return 0; 1125 } 1126 1127 void do_acpitable_option(const QemuOpts *opts) 1128 { 1129 #ifdef TARGET_I386 1130 Error *err = NULL; 1131 1132 acpi_table_add(opts, &err); 1133 if (err) { 1134 error_report("Wrong acpi table provided: %s", 1135 error_get_pretty(err)); 1136 error_free(err); 1137 exit(1); 1138 } 1139 #endif 1140 } 1141 1142 void do_smbios_option(const char *optarg) 1143 { 1144 #ifdef TARGET_I386 1145 if (smbios_entry_add(optarg) < 0) { 1146 exit(1); 1147 } 1148 #endif 1149 } 1150 1151 void cpudef_init(void) 1152 { 1153 #if defined(cpudef_setup) 1154 cpudef_setup(); /* parse cpu definitions in target config file */ 1155 #endif 1156 } 1157 1158 int tcg_available(void) 1159 { 1160 return 1; 1161 } 1162 1163 int kvm_available(void) 1164 { 1165 #ifdef CONFIG_KVM 1166 return 1; 1167 #else 1168 return 0; 1169 #endif 1170 } 1171 1172 int xen_available(void) 1173 { 1174 #ifdef CONFIG_XEN 1175 return 1; 1176 #else 1177 return 0; 1178 #endif 1179 } 1180 1181 1182 TargetInfo *qmp_query_target(Error **errp) 1183 { 1184 TargetInfo *info = g_malloc0(sizeof(*info)); 1185 1186 info->arch = g_strdup(TARGET_NAME); 1187 1188 return info; 1189 } 1190 1191 /* Stub function that's gets run on the vcpu when its brought out of the 1192 VM to run inside qemu via async_run_on_cpu()*/ 1193 static void mig_sleep_cpu(void *opq) 1194 { 1195 qemu_mutex_unlock_iothread(); 1196 g_usleep(30*1000); 1197 qemu_mutex_lock_iothread(); 1198 } 1199 1200 /* To reduce the dirty rate explicitly disallow the VCPUs from spending 1201 much time in the VM. The migration thread will try to catchup. 1202 Workload will experience a performance drop. 1203 */ 1204 static void mig_throttle_guest_down(void) 1205 { 1206 CPUState *cpu; 1207 1208 qemu_mutex_lock_iothread(); 1209 CPU_FOREACH(cpu) { 1210 async_run_on_cpu(cpu, mig_sleep_cpu, NULL); 1211 } 1212 qemu_mutex_unlock_iothread(); 1213 } 1214 1215 static void check_guest_throttling(void) 1216 { 1217 static int64_t t0; 1218 int64_t t1; 1219 1220 if (!mig_throttle_on) { 1221 return; 1222 } 1223 1224 if (!t0) { 1225 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1226 return; 1227 } 1228 1229 t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1230 1231 /* If it has been more than 40 ms since the last time the guest 1232 * was throttled then do it again. 1233 */ 1234 if (40 < (t1-t0)/1000000) { 1235 mig_throttle_guest_down(); 1236 t0 = t1; 1237 } 1238 } 1239