1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include <stdint.h> 25 #include <stdarg.h> 26 #include <stdlib.h> 27 #ifndef _WIN32 28 #include <sys/types.h> 29 #include <sys/mman.h> 30 #endif 31 #include "config.h" 32 #include "monitor/monitor.h" 33 #include "sysemu/sysemu.h" 34 #include "qemu/bitops.h" 35 #include "qemu/bitmap.h" 36 #include "sysemu/arch_init.h" 37 #include "audio/audio.h" 38 #include "hw/i386/pc.h" 39 #include "hw/pci/pci.h" 40 #include "hw/audio/audio.h" 41 #include "sysemu/kvm.h" 42 #include "migration/migration.h" 43 #include "hw/i386/smbios.h" 44 #include "exec/address-spaces.h" 45 #include "hw/audio/pcspk.h" 46 #include "migration/page_cache.h" 47 #include "qemu/config-file.h" 48 #include "qmp-commands.h" 49 #include "trace.h" 50 #include "exec/cpu-all.h" 51 #include "hw/acpi/acpi.h" 52 53 #ifdef DEBUG_ARCH_INIT 54 #define DPRINTF(fmt, ...) \ 55 do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) 56 #else 57 #define DPRINTF(fmt, ...) \ 58 do { } while (0) 59 #endif 60 61 #ifdef TARGET_SPARC 62 int graphic_width = 1024; 63 int graphic_height = 768; 64 int graphic_depth = 8; 65 #else 66 int graphic_width = 800; 67 int graphic_height = 600; 68 int graphic_depth = 32; 69 #endif 70 71 72 #if defined(TARGET_ALPHA) 73 #define QEMU_ARCH QEMU_ARCH_ALPHA 74 #elif defined(TARGET_ARM) 75 #define QEMU_ARCH QEMU_ARCH_ARM 76 #elif defined(TARGET_CRIS) 77 #define QEMU_ARCH QEMU_ARCH_CRIS 78 #elif defined(TARGET_I386) 79 #define QEMU_ARCH QEMU_ARCH_I386 80 #elif defined(TARGET_M68K) 81 #define QEMU_ARCH QEMU_ARCH_M68K 82 #elif defined(TARGET_LM32) 83 #define QEMU_ARCH QEMU_ARCH_LM32 84 #elif defined(TARGET_MICROBLAZE) 85 #define QEMU_ARCH QEMU_ARCH_MICROBLAZE 86 #elif defined(TARGET_MIPS) 87 #define QEMU_ARCH QEMU_ARCH_MIPS 88 #elif defined(TARGET_MOXIE) 89 #define QEMU_ARCH QEMU_ARCH_MOXIE 90 #elif defined(TARGET_OPENRISC) 91 #define QEMU_ARCH QEMU_ARCH_OPENRISC 92 #elif defined(TARGET_PPC) 93 #define QEMU_ARCH QEMU_ARCH_PPC 94 #elif defined(TARGET_S390X) 95 #define QEMU_ARCH QEMU_ARCH_S390X 96 #elif defined(TARGET_SH4) 97 #define QEMU_ARCH QEMU_ARCH_SH4 98 #elif defined(TARGET_SPARC) 99 #define QEMU_ARCH QEMU_ARCH_SPARC 100 #elif defined(TARGET_XTENSA) 101 #define QEMU_ARCH QEMU_ARCH_XTENSA 102 #elif defined(TARGET_UNICORE32) 103 #define QEMU_ARCH QEMU_ARCH_UNICORE32 104 #endif 105 106 const uint32_t arch_type = QEMU_ARCH; 107 static bool mig_throttle_on; 108 static int dirty_rate_high_cnt; 109 static void check_guest_throttling(void); 110 111 /***********************************************************/ 112 /* ram save/restore */ 113 114 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 115 #define RAM_SAVE_FLAG_COMPRESS 0x02 116 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 117 #define RAM_SAVE_FLAG_PAGE 0x08 118 #define RAM_SAVE_FLAG_EOS 0x10 119 #define RAM_SAVE_FLAG_CONTINUE 0x20 120 #define RAM_SAVE_FLAG_XBZRLE 0x40 121 /* 0x80 is reserved in migration.h start with 0x100 next */ 122 123 124 static struct defconfig_file { 125 const char *filename; 126 /* Indicates it is an user config file (disabled by -no-user-config) */ 127 bool userconfig; 128 } default_config_files[] = { 129 { CONFIG_QEMU_CONFDIR "/qemu.conf", true }, 130 { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true }, 131 { NULL }, /* end of list */ 132 }; 133 134 135 int qemu_read_default_config_files(bool userconfig) 136 { 137 int ret; 138 struct defconfig_file *f; 139 140 for (f = default_config_files; f->filename; f++) { 141 if (!userconfig && f->userconfig) { 142 continue; 143 } 144 ret = qemu_read_config_file(f->filename); 145 if (ret < 0 && ret != -ENOENT) { 146 return ret; 147 } 148 } 149 150 return 0; 151 } 152 153 static inline bool is_zero_page(uint8_t *p) 154 { 155 return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) == 156 TARGET_PAGE_SIZE; 157 } 158 159 /* struct contains XBZRLE cache and a static page 160 used by the compression */ 161 static struct { 162 /* buffer used for XBZRLE encoding */ 163 uint8_t *encoded_buf; 164 /* buffer for storing page content */ 165 uint8_t *current_buf; 166 /* buffer used for XBZRLE decoding */ 167 uint8_t *decoded_buf; 168 /* Cache for XBZRLE */ 169 PageCache *cache; 170 } XBZRLE = { 171 .encoded_buf = NULL, 172 .current_buf = NULL, 173 .decoded_buf = NULL, 174 .cache = NULL, 175 }; 176 177 178 int64_t xbzrle_cache_resize(int64_t new_size) 179 { 180 if (XBZRLE.cache != NULL) { 181 return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * 182 TARGET_PAGE_SIZE; 183 } 184 return pow2floor(new_size); 185 } 186 187 /* accounting for migration statistics */ 188 typedef struct AccountingInfo { 189 uint64_t dup_pages; 190 uint64_t skipped_pages; 191 uint64_t norm_pages; 192 uint64_t iterations; 193 uint64_t xbzrle_bytes; 194 uint64_t xbzrle_pages; 195 uint64_t xbzrle_cache_miss; 196 uint64_t xbzrle_overflows; 197 } AccountingInfo; 198 199 static AccountingInfo acct_info; 200 201 static void acct_clear(void) 202 { 203 memset(&acct_info, 0, sizeof(acct_info)); 204 } 205 206 uint64_t dup_mig_bytes_transferred(void) 207 { 208 return acct_info.dup_pages * TARGET_PAGE_SIZE; 209 } 210 211 uint64_t dup_mig_pages_transferred(void) 212 { 213 return acct_info.dup_pages; 214 } 215 216 uint64_t skipped_mig_bytes_transferred(void) 217 { 218 return acct_info.skipped_pages * TARGET_PAGE_SIZE; 219 } 220 221 uint64_t skipped_mig_pages_transferred(void) 222 { 223 return acct_info.skipped_pages; 224 } 225 226 uint64_t norm_mig_bytes_transferred(void) 227 { 228 return acct_info.norm_pages * TARGET_PAGE_SIZE; 229 } 230 231 uint64_t norm_mig_pages_transferred(void) 232 { 233 return acct_info.norm_pages; 234 } 235 236 uint64_t xbzrle_mig_bytes_transferred(void) 237 { 238 return acct_info.xbzrle_bytes; 239 } 240 241 uint64_t xbzrle_mig_pages_transferred(void) 242 { 243 return acct_info.xbzrle_pages; 244 } 245 246 uint64_t xbzrle_mig_pages_cache_miss(void) 247 { 248 return acct_info.xbzrle_cache_miss; 249 } 250 251 uint64_t xbzrle_mig_pages_overflow(void) 252 { 253 return acct_info.xbzrle_overflows; 254 } 255 256 static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, 257 int cont, int flag) 258 { 259 size_t size; 260 261 qemu_put_be64(f, offset | cont | flag); 262 size = 8; 263 264 if (!cont) { 265 qemu_put_byte(f, strlen(block->idstr)); 266 qemu_put_buffer(f, (uint8_t *)block->idstr, 267 strlen(block->idstr)); 268 size += 1 + strlen(block->idstr); 269 } 270 return size; 271 } 272 273 #define ENCODING_FLAG_XBZRLE 0x1 274 275 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, 276 ram_addr_t current_addr, RAMBlock *block, 277 ram_addr_t offset, int cont, bool last_stage) 278 { 279 int encoded_len = 0, bytes_sent = -1; 280 uint8_t *prev_cached_page; 281 282 if (!cache_is_cached(XBZRLE.cache, current_addr)) { 283 if (!last_stage) { 284 cache_insert(XBZRLE.cache, current_addr, current_data); 285 } 286 acct_info.xbzrle_cache_miss++; 287 return -1; 288 } 289 290 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 291 292 /* save current buffer into memory */ 293 memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); 294 295 /* XBZRLE encoding (if there is no overflow) */ 296 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 297 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 298 TARGET_PAGE_SIZE); 299 if (encoded_len == 0) { 300 DPRINTF("Skipping unmodified page\n"); 301 return 0; 302 } else if (encoded_len == -1) { 303 DPRINTF("Overflow\n"); 304 acct_info.xbzrle_overflows++; 305 /* update data in the cache */ 306 memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); 307 return -1; 308 } 309 310 /* we need to update the data in the cache, in order to get the same data */ 311 if (!last_stage) { 312 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 313 } 314 315 /* Send XBZRLE based compressed page */ 316 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); 317 qemu_put_byte(f, ENCODING_FLAG_XBZRLE); 318 qemu_put_be16(f, encoded_len); 319 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); 320 bytes_sent += encoded_len + 1 + 2; 321 acct_info.xbzrle_pages++; 322 acct_info.xbzrle_bytes += bytes_sent; 323 324 return bytes_sent; 325 } 326 327 328 /* This is the last block that we have visited serching for dirty pages 329 */ 330 static RAMBlock *last_seen_block; 331 /* This is the last block from where we have sent data */ 332 static RAMBlock *last_sent_block; 333 static ram_addr_t last_offset; 334 static unsigned long *migration_bitmap; 335 static uint64_t migration_dirty_pages; 336 static uint32_t last_version; 337 static bool ram_bulk_stage; 338 339 static inline 340 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, 341 ram_addr_t start) 342 { 343 unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; 344 unsigned long nr = base + (start >> TARGET_PAGE_BITS); 345 uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); 346 unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); 347 348 unsigned long next; 349 350 if (ram_bulk_stage && nr > base) { 351 next = nr + 1; 352 } else { 353 next = find_next_bit(migration_bitmap, size, nr); 354 } 355 356 if (next < size) { 357 clear_bit(next, migration_bitmap); 358 migration_dirty_pages--; 359 } 360 return (next - base) << TARGET_PAGE_BITS; 361 } 362 363 static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, 364 ram_addr_t offset) 365 { 366 bool ret; 367 int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; 368 369 ret = test_and_set_bit(nr, migration_bitmap); 370 371 if (!ret) { 372 migration_dirty_pages++; 373 } 374 return ret; 375 } 376 377 /* Needs iothread lock! */ 378 379 static void migration_bitmap_sync(void) 380 { 381 RAMBlock *block; 382 ram_addr_t addr; 383 uint64_t num_dirty_pages_init = migration_dirty_pages; 384 MigrationState *s = migrate_get_current(); 385 static int64_t start_time; 386 static int64_t bytes_xfer_prev; 387 static int64_t num_dirty_pages_period; 388 int64_t end_time; 389 int64_t bytes_xfer_now; 390 391 if (!bytes_xfer_prev) { 392 bytes_xfer_prev = ram_bytes_transferred(); 393 } 394 395 if (!start_time) { 396 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 397 } 398 399 trace_migration_bitmap_sync_start(); 400 address_space_sync_dirty_bitmap(&address_space_memory); 401 402 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 403 for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { 404 if (memory_region_test_and_clear_dirty(block->mr, 405 addr, TARGET_PAGE_SIZE, 406 DIRTY_MEMORY_MIGRATION)) { 407 migration_bitmap_set_dirty(block->mr, addr); 408 } 409 } 410 } 411 trace_migration_bitmap_sync_end(migration_dirty_pages 412 - num_dirty_pages_init); 413 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; 414 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 415 416 /* more than 1 second = 1000 millisecons */ 417 if (end_time > start_time + 1000) { 418 if (migrate_auto_converge()) { 419 /* The following detection logic can be refined later. For now: 420 Check to see if the dirtied bytes is 50% more than the approx. 421 amount of bytes that just got transferred since the last time we 422 were in this routine. If that happens >N times (for now N==4) 423 we turn on the throttle down logic */ 424 bytes_xfer_now = ram_bytes_transferred(); 425 if (s->dirty_pages_rate && 426 (num_dirty_pages_period * TARGET_PAGE_SIZE > 427 (bytes_xfer_now - bytes_xfer_prev)/2) && 428 (dirty_rate_high_cnt++ > 4)) { 429 trace_migration_throttle(); 430 mig_throttle_on = true; 431 dirty_rate_high_cnt = 0; 432 } 433 bytes_xfer_prev = bytes_xfer_now; 434 } else { 435 mig_throttle_on = false; 436 } 437 s->dirty_pages_rate = num_dirty_pages_period * 1000 438 / (end_time - start_time); 439 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; 440 start_time = end_time; 441 num_dirty_pages_period = 0; 442 } 443 } 444 445 /* 446 * ram_save_block: Writes a page of memory to the stream f 447 * 448 * Returns: The number of bytes written. 449 * 0 means no dirty pages 450 */ 451 452 static int ram_save_block(QEMUFile *f, bool last_stage) 453 { 454 RAMBlock *block = last_seen_block; 455 ram_addr_t offset = last_offset; 456 bool complete_round = false; 457 int bytes_sent = 0; 458 MemoryRegion *mr; 459 ram_addr_t current_addr; 460 461 if (!block) 462 block = QTAILQ_FIRST(&ram_list.blocks); 463 464 while (true) { 465 mr = block->mr; 466 offset = migration_bitmap_find_and_reset_dirty(mr, offset); 467 if (complete_round && block == last_seen_block && 468 offset >= last_offset) { 469 break; 470 } 471 if (offset >= block->length) { 472 offset = 0; 473 block = QTAILQ_NEXT(block, next); 474 if (!block) { 475 block = QTAILQ_FIRST(&ram_list.blocks); 476 complete_round = true; 477 ram_bulk_stage = false; 478 } 479 } else { 480 int ret; 481 uint8_t *p; 482 int cont = (block == last_sent_block) ? 483 RAM_SAVE_FLAG_CONTINUE : 0; 484 485 p = memory_region_get_ram_ptr(mr) + offset; 486 487 /* In doubt sent page as normal */ 488 bytes_sent = -1; 489 ret = ram_control_save_page(f, block->offset, 490 offset, TARGET_PAGE_SIZE, &bytes_sent); 491 492 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 493 if (ret != RAM_SAVE_CONTROL_DELAYED) { 494 if (bytes_sent > 0) { 495 acct_info.norm_pages++; 496 } else if (bytes_sent == 0) { 497 acct_info.dup_pages++; 498 } 499 } 500 } else if (is_zero_page(p)) { 501 acct_info.dup_pages++; 502 bytes_sent = save_block_hdr(f, block, offset, cont, 503 RAM_SAVE_FLAG_COMPRESS); 504 qemu_put_byte(f, 0); 505 bytes_sent++; 506 } else if (!ram_bulk_stage && migrate_use_xbzrle()) { 507 current_addr = block->offset + offset; 508 bytes_sent = save_xbzrle_page(f, p, current_addr, block, 509 offset, cont, last_stage); 510 if (!last_stage) { 511 p = get_cached_data(XBZRLE.cache, current_addr); 512 } 513 } 514 515 /* XBZRLE overflow or normal page */ 516 if (bytes_sent == -1) { 517 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); 518 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); 519 bytes_sent += TARGET_PAGE_SIZE; 520 acct_info.norm_pages++; 521 } 522 523 /* if page is unmodified, continue to the next */ 524 if (bytes_sent > 0) { 525 last_sent_block = block; 526 break; 527 } 528 } 529 } 530 last_seen_block = block; 531 last_offset = offset; 532 533 return bytes_sent; 534 } 535 536 static uint64_t bytes_transferred; 537 538 void acct_update_position(QEMUFile *f, size_t size, bool zero) 539 { 540 uint64_t pages = size / TARGET_PAGE_SIZE; 541 if (zero) { 542 acct_info.dup_pages += pages; 543 } else { 544 acct_info.norm_pages += pages; 545 bytes_transferred += size; 546 qemu_update_position(f, size); 547 } 548 } 549 550 static ram_addr_t ram_save_remaining(void) 551 { 552 return migration_dirty_pages; 553 } 554 555 uint64_t ram_bytes_remaining(void) 556 { 557 return ram_save_remaining() * TARGET_PAGE_SIZE; 558 } 559 560 uint64_t ram_bytes_transferred(void) 561 { 562 return bytes_transferred; 563 } 564 565 uint64_t ram_bytes_total(void) 566 { 567 RAMBlock *block; 568 uint64_t total = 0; 569 570 QTAILQ_FOREACH(block, &ram_list.blocks, next) 571 total += block->length; 572 573 return total; 574 } 575 576 static void migration_end(void) 577 { 578 if (migration_bitmap) { 579 memory_global_dirty_log_stop(); 580 g_free(migration_bitmap); 581 migration_bitmap = NULL; 582 } 583 584 if (XBZRLE.cache) { 585 cache_fini(XBZRLE.cache); 586 g_free(XBZRLE.cache); 587 g_free(XBZRLE.encoded_buf); 588 g_free(XBZRLE.current_buf); 589 g_free(XBZRLE.decoded_buf); 590 XBZRLE.cache = NULL; 591 } 592 } 593 594 static void ram_migration_cancel(void *opaque) 595 { 596 migration_end(); 597 } 598 599 static void reset_ram_globals(void) 600 { 601 last_seen_block = NULL; 602 last_sent_block = NULL; 603 last_offset = 0; 604 last_version = ram_list.version; 605 ram_bulk_stage = true; 606 } 607 608 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 609 610 static int ram_save_setup(QEMUFile *f, void *opaque) 611 { 612 RAMBlock *block; 613 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; 614 615 migration_bitmap = bitmap_new(ram_pages); 616 bitmap_set(migration_bitmap, 0, ram_pages); 617 migration_dirty_pages = ram_pages; 618 mig_throttle_on = false; 619 dirty_rate_high_cnt = 0; 620 621 if (migrate_use_xbzrle()) { 622 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / 623 TARGET_PAGE_SIZE, 624 TARGET_PAGE_SIZE); 625 if (!XBZRLE.cache) { 626 DPRINTF("Error creating cache\n"); 627 return -1; 628 } 629 XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); 630 XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); 631 acct_clear(); 632 } 633 634 qemu_mutex_lock_iothread(); 635 qemu_mutex_lock_ramlist(); 636 bytes_transferred = 0; 637 reset_ram_globals(); 638 639 memory_global_dirty_log_start(); 640 migration_bitmap_sync(); 641 qemu_mutex_unlock_iothread(); 642 643 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 644 645 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 646 qemu_put_byte(f, strlen(block->idstr)); 647 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 648 qemu_put_be64(f, block->length); 649 } 650 651 qemu_mutex_unlock_ramlist(); 652 653 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 654 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 655 656 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 657 658 return 0; 659 } 660 661 static int ram_save_iterate(QEMUFile *f, void *opaque) 662 { 663 int ret; 664 int i; 665 int64_t t0; 666 int total_sent = 0; 667 668 qemu_mutex_lock_ramlist(); 669 670 if (ram_list.version != last_version) { 671 reset_ram_globals(); 672 } 673 674 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 675 676 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 677 i = 0; 678 while ((ret = qemu_file_rate_limit(f)) == 0) { 679 int bytes_sent; 680 681 bytes_sent = ram_save_block(f, false); 682 /* no more blocks to sent */ 683 if (bytes_sent == 0) { 684 break; 685 } 686 total_sent += bytes_sent; 687 acct_info.iterations++; 688 check_guest_throttling(); 689 /* we want to check in the 1st loop, just in case it was the 1st time 690 and we had to sync the dirty bitmap. 691 qemu_get_clock_ns() is a bit expensive, so we only check each some 692 iterations 693 */ 694 if ((i & 63) == 0) { 695 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 696 if (t1 > MAX_WAIT) { 697 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", 698 t1, i); 699 break; 700 } 701 } 702 i++; 703 } 704 705 qemu_mutex_unlock_ramlist(); 706 707 /* 708 * Must occur before EOS (or any QEMUFile operation) 709 * because of RDMA protocol. 710 */ 711 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 712 713 if (ret < 0) { 714 bytes_transferred += total_sent; 715 return ret; 716 } 717 718 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 719 total_sent += 8; 720 bytes_transferred += total_sent; 721 722 return total_sent; 723 } 724 725 static int ram_save_complete(QEMUFile *f, void *opaque) 726 { 727 qemu_mutex_lock_ramlist(); 728 migration_bitmap_sync(); 729 730 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 731 732 /* try transferring iterative blocks of memory */ 733 734 /* flush all remaining blocks regardless of rate limiting */ 735 while (true) { 736 int bytes_sent; 737 738 bytes_sent = ram_save_block(f, true); 739 /* no more blocks to sent */ 740 if (bytes_sent == 0) { 741 break; 742 } 743 bytes_transferred += bytes_sent; 744 } 745 746 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 747 migration_end(); 748 749 qemu_mutex_unlock_ramlist(); 750 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 751 752 return 0; 753 } 754 755 static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) 756 { 757 uint64_t remaining_size; 758 759 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 760 761 if (remaining_size < max_size) { 762 qemu_mutex_lock_iothread(); 763 migration_bitmap_sync(); 764 qemu_mutex_unlock_iothread(); 765 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 766 } 767 return remaining_size; 768 } 769 770 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 771 { 772 int ret, rc = 0; 773 unsigned int xh_len; 774 int xh_flags; 775 776 if (!XBZRLE.decoded_buf) { 777 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); 778 } 779 780 /* extract RLE header */ 781 xh_flags = qemu_get_byte(f); 782 xh_len = qemu_get_be16(f); 783 784 if (xh_flags != ENCODING_FLAG_XBZRLE) { 785 fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); 786 return -1; 787 } 788 789 if (xh_len > TARGET_PAGE_SIZE) { 790 fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); 791 return -1; 792 } 793 /* load data and decode */ 794 qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); 795 796 /* decode RLE */ 797 ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, 798 TARGET_PAGE_SIZE); 799 if (ret == -1) { 800 fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); 801 rc = -1; 802 } else if (ret > TARGET_PAGE_SIZE) { 803 fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", 804 ret, TARGET_PAGE_SIZE); 805 abort(); 806 } 807 808 return rc; 809 } 810 811 static inline void *host_from_stream_offset(QEMUFile *f, 812 ram_addr_t offset, 813 int flags) 814 { 815 static RAMBlock *block = NULL; 816 char id[256]; 817 uint8_t len; 818 819 if (flags & RAM_SAVE_FLAG_CONTINUE) { 820 if (!block) { 821 fprintf(stderr, "Ack, bad migration stream!\n"); 822 return NULL; 823 } 824 825 return memory_region_get_ram_ptr(block->mr) + offset; 826 } 827 828 len = qemu_get_byte(f); 829 qemu_get_buffer(f, (uint8_t *)id, len); 830 id[len] = 0; 831 832 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 833 if (!strncmp(id, block->idstr, sizeof(id))) 834 return memory_region_get_ram_ptr(block->mr) + offset; 835 } 836 837 fprintf(stderr, "Can't find block %s!\n", id); 838 return NULL; 839 } 840 841 /* 842 * If a page (or a whole RDMA chunk) has been 843 * determined to be zero, then zap it. 844 */ 845 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 846 { 847 if (ch != 0 || !is_zero_page(host)) { 848 memset(host, ch, size); 849 #ifndef _WIN32 850 if (ch == 0 && 851 (!kvm_enabled() || kvm_has_sync_mmu()) && 852 getpagesize() <= TARGET_PAGE_SIZE) { 853 qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); 854 } 855 #endif 856 } 857 } 858 859 static int ram_load(QEMUFile *f, void *opaque, int version_id) 860 { 861 ram_addr_t addr; 862 int flags, ret = 0; 863 int error; 864 static uint64_t seq_iter; 865 866 seq_iter++; 867 868 if (version_id < 4 || version_id > 4) { 869 return -EINVAL; 870 } 871 872 do { 873 addr = qemu_get_be64(f); 874 875 flags = addr & ~TARGET_PAGE_MASK; 876 addr &= TARGET_PAGE_MASK; 877 878 if (flags & RAM_SAVE_FLAG_MEM_SIZE) { 879 if (version_id == 4) { 880 /* Synchronize RAM block list */ 881 char id[256]; 882 ram_addr_t length; 883 ram_addr_t total_ram_bytes = addr; 884 885 while (total_ram_bytes) { 886 RAMBlock *block; 887 uint8_t len; 888 889 len = qemu_get_byte(f); 890 qemu_get_buffer(f, (uint8_t *)id, len); 891 id[len] = 0; 892 length = qemu_get_be64(f); 893 894 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 895 if (!strncmp(id, block->idstr, sizeof(id))) { 896 if (block->length != length) { 897 fprintf(stderr, 898 "Length mismatch: %s: " RAM_ADDR_FMT 899 " in != " RAM_ADDR_FMT "\n", id, length, 900 block->length); 901 ret = -EINVAL; 902 goto done; 903 } 904 break; 905 } 906 } 907 908 if (!block) { 909 fprintf(stderr, "Unknown ramblock \"%s\", cannot " 910 "accept migration\n", id); 911 ret = -EINVAL; 912 goto done; 913 } 914 915 total_ram_bytes -= length; 916 } 917 } 918 } 919 920 if (flags & RAM_SAVE_FLAG_COMPRESS) { 921 void *host; 922 uint8_t ch; 923 924 host = host_from_stream_offset(f, addr, flags); 925 if (!host) { 926 return -EINVAL; 927 } 928 929 ch = qemu_get_byte(f); 930 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 931 } else if (flags & RAM_SAVE_FLAG_PAGE) { 932 void *host; 933 934 host = host_from_stream_offset(f, addr, flags); 935 if (!host) { 936 return -EINVAL; 937 } 938 939 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 940 } else if (flags & RAM_SAVE_FLAG_XBZRLE) { 941 void *host = host_from_stream_offset(f, addr, flags); 942 if (!host) { 943 return -EINVAL; 944 } 945 946 if (load_xbzrle(f, addr, host) < 0) { 947 ret = -EINVAL; 948 goto done; 949 } 950 } else if (flags & RAM_SAVE_FLAG_HOOK) { 951 ram_control_load_hook(f, flags); 952 } 953 error = qemu_file_get_error(f); 954 if (error) { 955 ret = error; 956 goto done; 957 } 958 } while (!(flags & RAM_SAVE_FLAG_EOS)); 959 960 done: 961 DPRINTF("Completed load of VM with exit code %d seq iteration " 962 "%" PRIu64 "\n", ret, seq_iter); 963 return ret; 964 } 965 966 SaveVMHandlers savevm_ram_handlers = { 967 .save_live_setup = ram_save_setup, 968 .save_live_iterate = ram_save_iterate, 969 .save_live_complete = ram_save_complete, 970 .save_live_pending = ram_save_pending, 971 .load_state = ram_load, 972 .cancel = ram_migration_cancel, 973 }; 974 975 struct soundhw { 976 const char *name; 977 const char *descr; 978 int enabled; 979 int isa; 980 union { 981 int (*init_isa) (ISABus *bus); 982 int (*init_pci) (PCIBus *bus); 983 } init; 984 }; 985 986 static struct soundhw soundhw[9]; 987 static int soundhw_count; 988 989 void isa_register_soundhw(const char *name, const char *descr, 990 int (*init_isa)(ISABus *bus)) 991 { 992 assert(soundhw_count < ARRAY_SIZE(soundhw) - 1); 993 soundhw[soundhw_count].name = name; 994 soundhw[soundhw_count].descr = descr; 995 soundhw[soundhw_count].isa = 1; 996 soundhw[soundhw_count].init.init_isa = init_isa; 997 soundhw_count++; 998 } 999 1000 void pci_register_soundhw(const char *name, const char *descr, 1001 int (*init_pci)(PCIBus *bus)) 1002 { 1003 assert(soundhw_count < ARRAY_SIZE(soundhw) - 1); 1004 soundhw[soundhw_count].name = name; 1005 soundhw[soundhw_count].descr = descr; 1006 soundhw[soundhw_count].isa = 0; 1007 soundhw[soundhw_count].init.init_pci = init_pci; 1008 soundhw_count++; 1009 } 1010 1011 void select_soundhw(const char *optarg) 1012 { 1013 struct soundhw *c; 1014 1015 if (is_help_option(optarg)) { 1016 show_valid_cards: 1017 1018 if (soundhw_count) { 1019 printf("Valid sound card names (comma separated):\n"); 1020 for (c = soundhw; c->name; ++c) { 1021 printf ("%-11s %s\n", c->name, c->descr); 1022 } 1023 printf("\n-soundhw all will enable all of the above\n"); 1024 } else { 1025 printf("Machine has no user-selectable audio hardware " 1026 "(it may or may not have always-present audio hardware).\n"); 1027 } 1028 exit(!is_help_option(optarg)); 1029 } 1030 else { 1031 size_t l; 1032 const char *p; 1033 char *e; 1034 int bad_card = 0; 1035 1036 if (!strcmp(optarg, "all")) { 1037 for (c = soundhw; c->name; ++c) { 1038 c->enabled = 1; 1039 } 1040 return; 1041 } 1042 1043 p = optarg; 1044 while (*p) { 1045 e = strchr(p, ','); 1046 l = !e ? strlen(p) : (size_t) (e - p); 1047 1048 for (c = soundhw; c->name; ++c) { 1049 if (!strncmp(c->name, p, l) && !c->name[l]) { 1050 c->enabled = 1; 1051 break; 1052 } 1053 } 1054 1055 if (!c->name) { 1056 if (l > 80) { 1057 fprintf(stderr, 1058 "Unknown sound card name (too big to show)\n"); 1059 } 1060 else { 1061 fprintf(stderr, "Unknown sound card name `%.*s'\n", 1062 (int) l, p); 1063 } 1064 bad_card = 1; 1065 } 1066 p += l + (e != NULL); 1067 } 1068 1069 if (bad_card) { 1070 goto show_valid_cards; 1071 } 1072 } 1073 } 1074 1075 void audio_init(void) 1076 { 1077 struct soundhw *c; 1078 ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL); 1079 PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL); 1080 1081 for (c = soundhw; c->name; ++c) { 1082 if (c->enabled) { 1083 if (c->isa) { 1084 if (!isa_bus) { 1085 fprintf(stderr, "ISA bus not available for %s\n", c->name); 1086 exit(1); 1087 } 1088 c->init.init_isa(isa_bus); 1089 } else { 1090 if (!pci_bus) { 1091 fprintf(stderr, "PCI bus not available for %s\n", c->name); 1092 exit(1); 1093 } 1094 c->init.init_pci(pci_bus); 1095 } 1096 } 1097 } 1098 } 1099 1100 int qemu_uuid_parse(const char *str, uint8_t *uuid) 1101 { 1102 int ret; 1103 1104 if (strlen(str) != 36) { 1105 return -1; 1106 } 1107 1108 ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3], 1109 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9], 1110 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14], 1111 &uuid[15]); 1112 1113 if (ret != 16) { 1114 return -1; 1115 } 1116 #ifdef TARGET_I386 1117 smbios_add_field(1, offsetof(struct smbios_type_1, uuid), uuid, 16); 1118 #endif 1119 return 0; 1120 } 1121 1122 void do_acpitable_option(const QemuOpts *opts) 1123 { 1124 #ifdef TARGET_I386 1125 Error *err = NULL; 1126 1127 acpi_table_add(opts, &err); 1128 if (err) { 1129 error_report("Wrong acpi table provided: %s", 1130 error_get_pretty(err)); 1131 error_free(err); 1132 exit(1); 1133 } 1134 #endif 1135 } 1136 1137 void do_smbios_option(const char *optarg) 1138 { 1139 #ifdef TARGET_I386 1140 if (smbios_entry_add(optarg) < 0) { 1141 exit(1); 1142 } 1143 #endif 1144 } 1145 1146 void cpudef_init(void) 1147 { 1148 #if defined(cpudef_setup) 1149 cpudef_setup(); /* parse cpu definitions in target config file */ 1150 #endif 1151 } 1152 1153 int tcg_available(void) 1154 { 1155 return 1; 1156 } 1157 1158 int kvm_available(void) 1159 { 1160 #ifdef CONFIG_KVM 1161 return 1; 1162 #else 1163 return 0; 1164 #endif 1165 } 1166 1167 int xen_available(void) 1168 { 1169 #ifdef CONFIG_XEN 1170 return 1; 1171 #else 1172 return 0; 1173 #endif 1174 } 1175 1176 1177 TargetInfo *qmp_query_target(Error **errp) 1178 { 1179 TargetInfo *info = g_malloc0(sizeof(*info)); 1180 1181 info->arch = g_strdup(TARGET_NAME); 1182 1183 return info; 1184 } 1185 1186 /* Stub function that's gets run on the vcpu when its brought out of the 1187 VM to run inside qemu via async_run_on_cpu()*/ 1188 static void mig_sleep_cpu(void *opq) 1189 { 1190 qemu_mutex_unlock_iothread(); 1191 g_usleep(30*1000); 1192 qemu_mutex_lock_iothread(); 1193 } 1194 1195 /* To reduce the dirty rate explicitly disallow the VCPUs from spending 1196 much time in the VM. The migration thread will try to catchup. 1197 Workload will experience a performance drop. 1198 */ 1199 static void mig_throttle_guest_down(void) 1200 { 1201 CPUState *cpu; 1202 1203 qemu_mutex_lock_iothread(); 1204 CPU_FOREACH(cpu) { 1205 async_run_on_cpu(cpu, mig_sleep_cpu, NULL); 1206 } 1207 qemu_mutex_unlock_iothread(); 1208 } 1209 1210 static void check_guest_throttling(void) 1211 { 1212 static int64_t t0; 1213 int64_t t1; 1214 1215 if (!mig_throttle_on) { 1216 return; 1217 } 1218 1219 if (!t0) { 1220 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1221 return; 1222 } 1223 1224 t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1225 1226 /* If it has been more than 40 ms since the last time the guest 1227 * was throttled then do it again. 1228 */ 1229 if (40 < (t1-t0)/1000000) { 1230 mig_throttle_guest_down(); 1231 t0 = t1; 1232 } 1233 } 1234