1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include <stdint.h> 25 #include <stdarg.h> 26 #include <stdlib.h> 27 #ifndef _WIN32 28 #include <sys/types.h> 29 #include <sys/mman.h> 30 #endif 31 #include "config.h" 32 #include "monitor/monitor.h" 33 #include "sysemu/sysemu.h" 34 #include "qemu/bitops.h" 35 #include "qemu/bitmap.h" 36 #include "sysemu/arch_init.h" 37 #include "audio/audio.h" 38 #include "hw/i386/pc.h" 39 #include "hw/pci/pci.h" 40 #include "hw/audio/audio.h" 41 #include "sysemu/kvm.h" 42 #include "migration/migration.h" 43 #include "hw/i386/smbios.h" 44 #include "exec/address-spaces.h" 45 #include "hw/audio/pcspk.h" 46 #include "migration/page_cache.h" 47 #include "qemu/config-file.h" 48 #include "qmp-commands.h" 49 #include "trace.h" 50 #include "exec/cpu-all.h" 51 #include "hw/acpi/acpi.h" 52 53 #ifdef DEBUG_ARCH_INIT 54 #define DPRINTF(fmt, ...) \ 55 do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) 56 #else 57 #define DPRINTF(fmt, ...) \ 58 do { } while (0) 59 #endif 60 61 #ifdef TARGET_SPARC 62 int graphic_width = 1024; 63 int graphic_height = 768; 64 int graphic_depth = 8; 65 #else 66 int graphic_width = 800; 67 int graphic_height = 600; 68 int graphic_depth = 32; 69 #endif 70 71 72 #if defined(TARGET_ALPHA) 73 #define QEMU_ARCH QEMU_ARCH_ALPHA 74 #elif defined(TARGET_ARM) 75 #define QEMU_ARCH QEMU_ARCH_ARM 76 #elif defined(TARGET_CRIS) 77 #define QEMU_ARCH QEMU_ARCH_CRIS 78 #elif defined(TARGET_I386) 79 #define QEMU_ARCH QEMU_ARCH_I386 80 #elif defined(TARGET_M68K) 81 #define QEMU_ARCH QEMU_ARCH_M68K 82 #elif defined(TARGET_LM32) 83 #define QEMU_ARCH QEMU_ARCH_LM32 84 #elif defined(TARGET_MICROBLAZE) 85 #define QEMU_ARCH QEMU_ARCH_MICROBLAZE 86 #elif defined(TARGET_MIPS) 87 #define QEMU_ARCH QEMU_ARCH_MIPS 88 #elif defined(TARGET_MOXIE) 89 #define QEMU_ARCH QEMU_ARCH_MOXIE 90 #elif defined(TARGET_OPENRISC) 91 #define QEMU_ARCH QEMU_ARCH_OPENRISC 92 #elif defined(TARGET_PPC) 93 #define QEMU_ARCH QEMU_ARCH_PPC 94 #elif defined(TARGET_S390X) 95 #define QEMU_ARCH QEMU_ARCH_S390X 96 #elif defined(TARGET_SH4) 97 #define QEMU_ARCH QEMU_ARCH_SH4 98 #elif defined(TARGET_SPARC) 99 #define QEMU_ARCH QEMU_ARCH_SPARC 100 #elif defined(TARGET_XTENSA) 101 #define QEMU_ARCH QEMU_ARCH_XTENSA 102 #elif defined(TARGET_UNICORE32) 103 #define QEMU_ARCH QEMU_ARCH_UNICORE32 104 #endif 105 106 const uint32_t arch_type = QEMU_ARCH; 107 static bool mig_throttle_on; 108 static int dirty_rate_high_cnt; 109 static void check_guest_throttling(void); 110 111 /***********************************************************/ 112 /* ram save/restore */ 113 114 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 115 #define RAM_SAVE_FLAG_COMPRESS 0x02 116 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 117 #define RAM_SAVE_FLAG_PAGE 0x08 118 #define RAM_SAVE_FLAG_EOS 0x10 119 #define RAM_SAVE_FLAG_CONTINUE 0x20 120 #define RAM_SAVE_FLAG_XBZRLE 0x40 121 /* 0x80 is reserved in migration.h start with 0x100 next */ 122 123 124 static struct defconfig_file { 125 const char *filename; 126 /* Indicates it is an user config file (disabled by -no-user-config) */ 127 bool userconfig; 128 } default_config_files[] = { 129 { CONFIG_QEMU_CONFDIR "/qemu.conf", true }, 130 { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true }, 131 { NULL }, /* end of list */ 132 }; 133 134 135 int qemu_read_default_config_files(bool userconfig) 136 { 137 int ret; 138 struct defconfig_file *f; 139 140 for (f = default_config_files; f->filename; f++) { 141 if (!userconfig && f->userconfig) { 142 continue; 143 } 144 ret = qemu_read_config_file(f->filename); 145 if (ret < 0 && ret != -ENOENT) { 146 return ret; 147 } 148 } 149 150 return 0; 151 } 152 153 static inline bool is_zero_range(uint8_t *p, uint64_t size) 154 { 155 return buffer_find_nonzero_offset(p, size) == size; 156 } 157 158 /* struct contains XBZRLE cache and a static page 159 used by the compression */ 160 static struct { 161 /* buffer used for XBZRLE encoding */ 162 uint8_t *encoded_buf; 163 /* buffer for storing page content */ 164 uint8_t *current_buf; 165 /* buffer used for XBZRLE decoding */ 166 uint8_t *decoded_buf; 167 /* Cache for XBZRLE */ 168 PageCache *cache; 169 } XBZRLE = { 170 .encoded_buf = NULL, 171 .current_buf = NULL, 172 .decoded_buf = NULL, 173 .cache = NULL, 174 }; 175 176 177 int64_t xbzrle_cache_resize(int64_t new_size) 178 { 179 if (XBZRLE.cache != NULL) { 180 return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * 181 TARGET_PAGE_SIZE; 182 } 183 return pow2floor(new_size); 184 } 185 186 /* accounting for migration statistics */ 187 typedef struct AccountingInfo { 188 uint64_t dup_pages; 189 uint64_t skipped_pages; 190 uint64_t norm_pages; 191 uint64_t iterations; 192 uint64_t xbzrle_bytes; 193 uint64_t xbzrle_pages; 194 uint64_t xbzrle_cache_miss; 195 uint64_t xbzrle_overflows; 196 } AccountingInfo; 197 198 static AccountingInfo acct_info; 199 200 static void acct_clear(void) 201 { 202 memset(&acct_info, 0, sizeof(acct_info)); 203 } 204 205 uint64_t dup_mig_bytes_transferred(void) 206 { 207 return acct_info.dup_pages * TARGET_PAGE_SIZE; 208 } 209 210 uint64_t dup_mig_pages_transferred(void) 211 { 212 return acct_info.dup_pages; 213 } 214 215 uint64_t skipped_mig_bytes_transferred(void) 216 { 217 return acct_info.skipped_pages * TARGET_PAGE_SIZE; 218 } 219 220 uint64_t skipped_mig_pages_transferred(void) 221 { 222 return acct_info.skipped_pages; 223 } 224 225 uint64_t norm_mig_bytes_transferred(void) 226 { 227 return acct_info.norm_pages * TARGET_PAGE_SIZE; 228 } 229 230 uint64_t norm_mig_pages_transferred(void) 231 { 232 return acct_info.norm_pages; 233 } 234 235 uint64_t xbzrle_mig_bytes_transferred(void) 236 { 237 return acct_info.xbzrle_bytes; 238 } 239 240 uint64_t xbzrle_mig_pages_transferred(void) 241 { 242 return acct_info.xbzrle_pages; 243 } 244 245 uint64_t xbzrle_mig_pages_cache_miss(void) 246 { 247 return acct_info.xbzrle_cache_miss; 248 } 249 250 uint64_t xbzrle_mig_pages_overflow(void) 251 { 252 return acct_info.xbzrle_overflows; 253 } 254 255 static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, 256 int cont, int flag) 257 { 258 size_t size; 259 260 qemu_put_be64(f, offset | cont | flag); 261 size = 8; 262 263 if (!cont) { 264 qemu_put_byte(f, strlen(block->idstr)); 265 qemu_put_buffer(f, (uint8_t *)block->idstr, 266 strlen(block->idstr)); 267 size += 1 + strlen(block->idstr); 268 } 269 return size; 270 } 271 272 #define ENCODING_FLAG_XBZRLE 0x1 273 274 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, 275 ram_addr_t current_addr, RAMBlock *block, 276 ram_addr_t offset, int cont, bool last_stage) 277 { 278 int encoded_len = 0, bytes_sent = -1; 279 uint8_t *prev_cached_page; 280 281 if (!cache_is_cached(XBZRLE.cache, current_addr)) { 282 if (!last_stage) { 283 cache_insert(XBZRLE.cache, current_addr, current_data); 284 } 285 acct_info.xbzrle_cache_miss++; 286 return -1; 287 } 288 289 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 290 291 /* save current buffer into memory */ 292 memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); 293 294 /* XBZRLE encoding (if there is no overflow) */ 295 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 296 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 297 TARGET_PAGE_SIZE); 298 if (encoded_len == 0) { 299 DPRINTF("Skipping unmodified page\n"); 300 return 0; 301 } else if (encoded_len == -1) { 302 DPRINTF("Overflow\n"); 303 acct_info.xbzrle_overflows++; 304 /* update data in the cache */ 305 memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); 306 return -1; 307 } 308 309 /* we need to update the data in the cache, in order to get the same data */ 310 if (!last_stage) { 311 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 312 } 313 314 /* Send XBZRLE based compressed page */ 315 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); 316 qemu_put_byte(f, ENCODING_FLAG_XBZRLE); 317 qemu_put_be16(f, encoded_len); 318 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); 319 bytes_sent += encoded_len + 1 + 2; 320 acct_info.xbzrle_pages++; 321 acct_info.xbzrle_bytes += bytes_sent; 322 323 return bytes_sent; 324 } 325 326 327 /* This is the last block that we have visited serching for dirty pages 328 */ 329 static RAMBlock *last_seen_block; 330 /* This is the last block from where we have sent data */ 331 static RAMBlock *last_sent_block; 332 static ram_addr_t last_offset; 333 static unsigned long *migration_bitmap; 334 static uint64_t migration_dirty_pages; 335 static uint32_t last_version; 336 static bool ram_bulk_stage; 337 338 static inline 339 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, 340 ram_addr_t start) 341 { 342 unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; 343 unsigned long nr = base + (start >> TARGET_PAGE_BITS); 344 uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); 345 unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); 346 347 unsigned long next; 348 349 if (ram_bulk_stage && nr > base) { 350 next = nr + 1; 351 } else { 352 next = find_next_bit(migration_bitmap, size, nr); 353 } 354 355 if (next < size) { 356 clear_bit(next, migration_bitmap); 357 migration_dirty_pages--; 358 } 359 return (next - base) << TARGET_PAGE_BITS; 360 } 361 362 static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, 363 ram_addr_t offset) 364 { 365 bool ret; 366 int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; 367 368 ret = test_and_set_bit(nr, migration_bitmap); 369 370 if (!ret) { 371 migration_dirty_pages++; 372 } 373 return ret; 374 } 375 376 /* Needs iothread lock! */ 377 378 static void migration_bitmap_sync(void) 379 { 380 RAMBlock *block; 381 ram_addr_t addr; 382 uint64_t num_dirty_pages_init = migration_dirty_pages; 383 MigrationState *s = migrate_get_current(); 384 static int64_t start_time; 385 static int64_t bytes_xfer_prev; 386 static int64_t num_dirty_pages_period; 387 int64_t end_time; 388 int64_t bytes_xfer_now; 389 390 if (!bytes_xfer_prev) { 391 bytes_xfer_prev = ram_bytes_transferred(); 392 } 393 394 if (!start_time) { 395 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 396 } 397 398 trace_migration_bitmap_sync_start(); 399 address_space_sync_dirty_bitmap(&address_space_memory); 400 401 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 402 for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { 403 if (memory_region_test_and_clear_dirty(block->mr, 404 addr, TARGET_PAGE_SIZE, 405 DIRTY_MEMORY_MIGRATION)) { 406 migration_bitmap_set_dirty(block->mr, addr); 407 } 408 } 409 } 410 trace_migration_bitmap_sync_end(migration_dirty_pages 411 - num_dirty_pages_init); 412 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; 413 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 414 415 /* more than 1 second = 1000 millisecons */ 416 if (end_time > start_time + 1000) { 417 if (migrate_auto_converge()) { 418 /* The following detection logic can be refined later. For now: 419 Check to see if the dirtied bytes is 50% more than the approx. 420 amount of bytes that just got transferred since the last time we 421 were in this routine. If that happens >N times (for now N==4) 422 we turn on the throttle down logic */ 423 bytes_xfer_now = ram_bytes_transferred(); 424 if (s->dirty_pages_rate && 425 (num_dirty_pages_period * TARGET_PAGE_SIZE > 426 (bytes_xfer_now - bytes_xfer_prev)/2) && 427 (dirty_rate_high_cnt++ > 4)) { 428 trace_migration_throttle(); 429 mig_throttle_on = true; 430 dirty_rate_high_cnt = 0; 431 } 432 bytes_xfer_prev = bytes_xfer_now; 433 } else { 434 mig_throttle_on = false; 435 } 436 s->dirty_pages_rate = num_dirty_pages_period * 1000 437 / (end_time - start_time); 438 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; 439 start_time = end_time; 440 num_dirty_pages_period = 0; 441 } 442 } 443 444 /* 445 * ram_save_block: Writes a page of memory to the stream f 446 * 447 * Returns: The number of bytes written. 448 * 0 means no dirty pages 449 */ 450 451 static int ram_save_block(QEMUFile *f, bool last_stage) 452 { 453 RAMBlock *block = last_seen_block; 454 ram_addr_t offset = last_offset; 455 bool complete_round = false; 456 int bytes_sent = 0; 457 MemoryRegion *mr; 458 ram_addr_t current_addr; 459 460 if (!block) 461 block = QTAILQ_FIRST(&ram_list.blocks); 462 463 while (true) { 464 mr = block->mr; 465 offset = migration_bitmap_find_and_reset_dirty(mr, offset); 466 if (complete_round && block == last_seen_block && 467 offset >= last_offset) { 468 break; 469 } 470 if (offset >= block->length) { 471 offset = 0; 472 block = QTAILQ_NEXT(block, next); 473 if (!block) { 474 block = QTAILQ_FIRST(&ram_list.blocks); 475 complete_round = true; 476 ram_bulk_stage = false; 477 } 478 } else { 479 int ret; 480 uint8_t *p; 481 int cont = (block == last_sent_block) ? 482 RAM_SAVE_FLAG_CONTINUE : 0; 483 484 p = memory_region_get_ram_ptr(mr) + offset; 485 486 /* In doubt sent page as normal */ 487 bytes_sent = -1; 488 ret = ram_control_save_page(f, block->offset, 489 offset, TARGET_PAGE_SIZE, &bytes_sent); 490 491 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 492 if (ret != RAM_SAVE_CONTROL_DELAYED) { 493 if (bytes_sent > 0) { 494 acct_info.norm_pages++; 495 } else if (bytes_sent == 0) { 496 acct_info.dup_pages++; 497 } 498 } 499 } else if (is_zero_range(p, TARGET_PAGE_SIZE)) { 500 acct_info.dup_pages++; 501 bytes_sent = save_block_hdr(f, block, offset, cont, 502 RAM_SAVE_FLAG_COMPRESS); 503 qemu_put_byte(f, 0); 504 bytes_sent++; 505 } else if (!ram_bulk_stage && migrate_use_xbzrle()) { 506 current_addr = block->offset + offset; 507 bytes_sent = save_xbzrle_page(f, p, current_addr, block, 508 offset, cont, last_stage); 509 if (!last_stage) { 510 p = get_cached_data(XBZRLE.cache, current_addr); 511 } 512 } 513 514 /* XBZRLE overflow or normal page */ 515 if (bytes_sent == -1) { 516 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); 517 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); 518 bytes_sent += TARGET_PAGE_SIZE; 519 acct_info.norm_pages++; 520 } 521 522 /* if page is unmodified, continue to the next */ 523 if (bytes_sent > 0) { 524 last_sent_block = block; 525 break; 526 } 527 } 528 } 529 last_seen_block = block; 530 last_offset = offset; 531 532 return bytes_sent; 533 } 534 535 static uint64_t bytes_transferred; 536 537 void acct_update_position(QEMUFile *f, size_t size, bool zero) 538 { 539 uint64_t pages = size / TARGET_PAGE_SIZE; 540 if (zero) { 541 acct_info.dup_pages += pages; 542 } else { 543 acct_info.norm_pages += pages; 544 bytes_transferred += size; 545 qemu_update_position(f, size); 546 } 547 } 548 549 static ram_addr_t ram_save_remaining(void) 550 { 551 return migration_dirty_pages; 552 } 553 554 uint64_t ram_bytes_remaining(void) 555 { 556 return ram_save_remaining() * TARGET_PAGE_SIZE; 557 } 558 559 uint64_t ram_bytes_transferred(void) 560 { 561 return bytes_transferred; 562 } 563 564 uint64_t ram_bytes_total(void) 565 { 566 RAMBlock *block; 567 uint64_t total = 0; 568 569 QTAILQ_FOREACH(block, &ram_list.blocks, next) 570 total += block->length; 571 572 return total; 573 } 574 575 static void migration_end(void) 576 { 577 if (migration_bitmap) { 578 memory_global_dirty_log_stop(); 579 g_free(migration_bitmap); 580 migration_bitmap = NULL; 581 } 582 583 if (XBZRLE.cache) { 584 cache_fini(XBZRLE.cache); 585 g_free(XBZRLE.cache); 586 g_free(XBZRLE.encoded_buf); 587 g_free(XBZRLE.current_buf); 588 g_free(XBZRLE.decoded_buf); 589 XBZRLE.cache = NULL; 590 } 591 } 592 593 static void ram_migration_cancel(void *opaque) 594 { 595 migration_end(); 596 } 597 598 static void reset_ram_globals(void) 599 { 600 last_seen_block = NULL; 601 last_sent_block = NULL; 602 last_offset = 0; 603 last_version = ram_list.version; 604 ram_bulk_stage = true; 605 } 606 607 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 608 609 static int ram_save_setup(QEMUFile *f, void *opaque) 610 { 611 RAMBlock *block; 612 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; 613 614 migration_bitmap = bitmap_new(ram_pages); 615 bitmap_set(migration_bitmap, 0, ram_pages); 616 migration_dirty_pages = ram_pages; 617 mig_throttle_on = false; 618 dirty_rate_high_cnt = 0; 619 620 if (migrate_use_xbzrle()) { 621 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / 622 TARGET_PAGE_SIZE, 623 TARGET_PAGE_SIZE); 624 if (!XBZRLE.cache) { 625 DPRINTF("Error creating cache\n"); 626 return -1; 627 } 628 XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); 629 XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); 630 acct_clear(); 631 } 632 633 qemu_mutex_lock_iothread(); 634 qemu_mutex_lock_ramlist(); 635 bytes_transferred = 0; 636 reset_ram_globals(); 637 638 memory_global_dirty_log_start(); 639 migration_bitmap_sync(); 640 qemu_mutex_unlock_iothread(); 641 642 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 643 644 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 645 qemu_put_byte(f, strlen(block->idstr)); 646 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 647 qemu_put_be64(f, block->length); 648 } 649 650 qemu_mutex_unlock_ramlist(); 651 652 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 653 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 654 655 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 656 657 return 0; 658 } 659 660 static int ram_save_iterate(QEMUFile *f, void *opaque) 661 { 662 int ret; 663 int i; 664 int64_t t0; 665 int total_sent = 0; 666 667 qemu_mutex_lock_ramlist(); 668 669 if (ram_list.version != last_version) { 670 reset_ram_globals(); 671 } 672 673 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 674 675 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 676 i = 0; 677 while ((ret = qemu_file_rate_limit(f)) == 0) { 678 int bytes_sent; 679 680 bytes_sent = ram_save_block(f, false); 681 /* no more blocks to sent */ 682 if (bytes_sent == 0) { 683 break; 684 } 685 total_sent += bytes_sent; 686 acct_info.iterations++; 687 check_guest_throttling(); 688 /* we want to check in the 1st loop, just in case it was the 1st time 689 and we had to sync the dirty bitmap. 690 qemu_get_clock_ns() is a bit expensive, so we only check each some 691 iterations 692 */ 693 if ((i & 63) == 0) { 694 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 695 if (t1 > MAX_WAIT) { 696 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", 697 t1, i); 698 break; 699 } 700 } 701 i++; 702 } 703 704 qemu_mutex_unlock_ramlist(); 705 706 /* 707 * Must occur before EOS (or any QEMUFile operation) 708 * because of RDMA protocol. 709 */ 710 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 711 712 bytes_transferred += total_sent; 713 714 /* 715 * Do not count these 8 bytes into total_sent, so that we can 716 * return 0 if no page had been dirtied. 717 */ 718 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 719 bytes_transferred += 8; 720 721 ret = qemu_file_get_error(f); 722 if (ret < 0) { 723 return ret; 724 } 725 726 return total_sent; 727 } 728 729 static int ram_save_complete(QEMUFile *f, void *opaque) 730 { 731 qemu_mutex_lock_ramlist(); 732 migration_bitmap_sync(); 733 734 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 735 736 /* try transferring iterative blocks of memory */ 737 738 /* flush all remaining blocks regardless of rate limiting */ 739 while (true) { 740 int bytes_sent; 741 742 bytes_sent = ram_save_block(f, true); 743 /* no more blocks to sent */ 744 if (bytes_sent == 0) { 745 break; 746 } 747 bytes_transferred += bytes_sent; 748 } 749 750 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 751 migration_end(); 752 753 qemu_mutex_unlock_ramlist(); 754 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 755 756 return 0; 757 } 758 759 static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) 760 { 761 uint64_t remaining_size; 762 763 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 764 765 if (remaining_size < max_size) { 766 qemu_mutex_lock_iothread(); 767 migration_bitmap_sync(); 768 qemu_mutex_unlock_iothread(); 769 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 770 } 771 return remaining_size; 772 } 773 774 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 775 { 776 int ret, rc = 0; 777 unsigned int xh_len; 778 int xh_flags; 779 780 if (!XBZRLE.decoded_buf) { 781 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); 782 } 783 784 /* extract RLE header */ 785 xh_flags = qemu_get_byte(f); 786 xh_len = qemu_get_be16(f); 787 788 if (xh_flags != ENCODING_FLAG_XBZRLE) { 789 fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); 790 return -1; 791 } 792 793 if (xh_len > TARGET_PAGE_SIZE) { 794 fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); 795 return -1; 796 } 797 /* load data and decode */ 798 qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); 799 800 /* decode RLE */ 801 ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, 802 TARGET_PAGE_SIZE); 803 if (ret == -1) { 804 fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); 805 rc = -1; 806 } else if (ret > TARGET_PAGE_SIZE) { 807 fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", 808 ret, TARGET_PAGE_SIZE); 809 abort(); 810 } 811 812 return rc; 813 } 814 815 static inline void *host_from_stream_offset(QEMUFile *f, 816 ram_addr_t offset, 817 int flags) 818 { 819 static RAMBlock *block = NULL; 820 char id[256]; 821 uint8_t len; 822 823 if (flags & RAM_SAVE_FLAG_CONTINUE) { 824 if (!block) { 825 fprintf(stderr, "Ack, bad migration stream!\n"); 826 return NULL; 827 } 828 829 return memory_region_get_ram_ptr(block->mr) + offset; 830 } 831 832 len = qemu_get_byte(f); 833 qemu_get_buffer(f, (uint8_t *)id, len); 834 id[len] = 0; 835 836 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 837 if (!strncmp(id, block->idstr, sizeof(id))) 838 return memory_region_get_ram_ptr(block->mr) + offset; 839 } 840 841 fprintf(stderr, "Can't find block %s!\n", id); 842 return NULL; 843 } 844 845 /* 846 * If a page (or a whole RDMA chunk) has been 847 * determined to be zero, then zap it. 848 */ 849 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 850 { 851 if (ch != 0 || !is_zero_range(host, size)) { 852 memset(host, ch, size); 853 } 854 } 855 856 static int ram_load(QEMUFile *f, void *opaque, int version_id) 857 { 858 ram_addr_t addr; 859 int flags, ret = 0; 860 int error; 861 static uint64_t seq_iter; 862 863 seq_iter++; 864 865 if (version_id < 4 || version_id > 4) { 866 return -EINVAL; 867 } 868 869 do { 870 addr = qemu_get_be64(f); 871 872 flags = addr & ~TARGET_PAGE_MASK; 873 addr &= TARGET_PAGE_MASK; 874 875 if (flags & RAM_SAVE_FLAG_MEM_SIZE) { 876 if (version_id == 4) { 877 /* Synchronize RAM block list */ 878 char id[256]; 879 ram_addr_t length; 880 ram_addr_t total_ram_bytes = addr; 881 882 while (total_ram_bytes) { 883 RAMBlock *block; 884 uint8_t len; 885 886 len = qemu_get_byte(f); 887 qemu_get_buffer(f, (uint8_t *)id, len); 888 id[len] = 0; 889 length = qemu_get_be64(f); 890 891 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 892 if (!strncmp(id, block->idstr, sizeof(id))) { 893 if (block->length != length) { 894 fprintf(stderr, 895 "Length mismatch: %s: " RAM_ADDR_FMT 896 " in != " RAM_ADDR_FMT "\n", id, length, 897 block->length); 898 ret = -EINVAL; 899 goto done; 900 } 901 break; 902 } 903 } 904 905 if (!block) { 906 fprintf(stderr, "Unknown ramblock \"%s\", cannot " 907 "accept migration\n", id); 908 ret = -EINVAL; 909 goto done; 910 } 911 912 total_ram_bytes -= length; 913 } 914 } 915 } 916 917 if (flags & RAM_SAVE_FLAG_COMPRESS) { 918 void *host; 919 uint8_t ch; 920 921 host = host_from_stream_offset(f, addr, flags); 922 if (!host) { 923 return -EINVAL; 924 } 925 926 ch = qemu_get_byte(f); 927 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 928 } else if (flags & RAM_SAVE_FLAG_PAGE) { 929 void *host; 930 931 host = host_from_stream_offset(f, addr, flags); 932 if (!host) { 933 return -EINVAL; 934 } 935 936 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 937 } else if (flags & RAM_SAVE_FLAG_XBZRLE) { 938 void *host = host_from_stream_offset(f, addr, flags); 939 if (!host) { 940 return -EINVAL; 941 } 942 943 if (load_xbzrle(f, addr, host) < 0) { 944 ret = -EINVAL; 945 goto done; 946 } 947 } else if (flags & RAM_SAVE_FLAG_HOOK) { 948 ram_control_load_hook(f, flags); 949 } 950 error = qemu_file_get_error(f); 951 if (error) { 952 ret = error; 953 goto done; 954 } 955 } while (!(flags & RAM_SAVE_FLAG_EOS)); 956 957 done: 958 DPRINTF("Completed load of VM with exit code %d seq iteration " 959 "%" PRIu64 "\n", ret, seq_iter); 960 return ret; 961 } 962 963 SaveVMHandlers savevm_ram_handlers = { 964 .save_live_setup = ram_save_setup, 965 .save_live_iterate = ram_save_iterate, 966 .save_live_complete = ram_save_complete, 967 .save_live_pending = ram_save_pending, 968 .load_state = ram_load, 969 .cancel = ram_migration_cancel, 970 }; 971 972 struct soundhw { 973 const char *name; 974 const char *descr; 975 int enabled; 976 int isa; 977 union { 978 int (*init_isa) (ISABus *bus); 979 int (*init_pci) (PCIBus *bus); 980 } init; 981 }; 982 983 static struct soundhw soundhw[9]; 984 static int soundhw_count; 985 986 void isa_register_soundhw(const char *name, const char *descr, 987 int (*init_isa)(ISABus *bus)) 988 { 989 assert(soundhw_count < ARRAY_SIZE(soundhw) - 1); 990 soundhw[soundhw_count].name = name; 991 soundhw[soundhw_count].descr = descr; 992 soundhw[soundhw_count].isa = 1; 993 soundhw[soundhw_count].init.init_isa = init_isa; 994 soundhw_count++; 995 } 996 997 void pci_register_soundhw(const char *name, const char *descr, 998 int (*init_pci)(PCIBus *bus)) 999 { 1000 assert(soundhw_count < ARRAY_SIZE(soundhw) - 1); 1001 soundhw[soundhw_count].name = name; 1002 soundhw[soundhw_count].descr = descr; 1003 soundhw[soundhw_count].isa = 0; 1004 soundhw[soundhw_count].init.init_pci = init_pci; 1005 soundhw_count++; 1006 } 1007 1008 void select_soundhw(const char *optarg) 1009 { 1010 struct soundhw *c; 1011 1012 if (is_help_option(optarg)) { 1013 show_valid_cards: 1014 1015 if (soundhw_count) { 1016 printf("Valid sound card names (comma separated):\n"); 1017 for (c = soundhw; c->name; ++c) { 1018 printf ("%-11s %s\n", c->name, c->descr); 1019 } 1020 printf("\n-soundhw all will enable all of the above\n"); 1021 } else { 1022 printf("Machine has no user-selectable audio hardware " 1023 "(it may or may not have always-present audio hardware).\n"); 1024 } 1025 exit(!is_help_option(optarg)); 1026 } 1027 else { 1028 size_t l; 1029 const char *p; 1030 char *e; 1031 int bad_card = 0; 1032 1033 if (!strcmp(optarg, "all")) { 1034 for (c = soundhw; c->name; ++c) { 1035 c->enabled = 1; 1036 } 1037 return; 1038 } 1039 1040 p = optarg; 1041 while (*p) { 1042 e = strchr(p, ','); 1043 l = !e ? strlen(p) : (size_t) (e - p); 1044 1045 for (c = soundhw; c->name; ++c) { 1046 if (!strncmp(c->name, p, l) && !c->name[l]) { 1047 c->enabled = 1; 1048 break; 1049 } 1050 } 1051 1052 if (!c->name) { 1053 if (l > 80) { 1054 fprintf(stderr, 1055 "Unknown sound card name (too big to show)\n"); 1056 } 1057 else { 1058 fprintf(stderr, "Unknown sound card name `%.*s'\n", 1059 (int) l, p); 1060 } 1061 bad_card = 1; 1062 } 1063 p += l + (e != NULL); 1064 } 1065 1066 if (bad_card) { 1067 goto show_valid_cards; 1068 } 1069 } 1070 } 1071 1072 void audio_init(void) 1073 { 1074 struct soundhw *c; 1075 ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL); 1076 PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL); 1077 1078 for (c = soundhw; c->name; ++c) { 1079 if (c->enabled) { 1080 if (c->isa) { 1081 if (!isa_bus) { 1082 fprintf(stderr, "ISA bus not available for %s\n", c->name); 1083 exit(1); 1084 } 1085 c->init.init_isa(isa_bus); 1086 } else { 1087 if (!pci_bus) { 1088 fprintf(stderr, "PCI bus not available for %s\n", c->name); 1089 exit(1); 1090 } 1091 c->init.init_pci(pci_bus); 1092 } 1093 } 1094 } 1095 } 1096 1097 int qemu_uuid_parse(const char *str, uint8_t *uuid) 1098 { 1099 int ret; 1100 1101 if (strlen(str) != 36) { 1102 return -1; 1103 } 1104 1105 ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3], 1106 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9], 1107 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14], 1108 &uuid[15]); 1109 1110 if (ret != 16) { 1111 return -1; 1112 } 1113 return 0; 1114 } 1115 1116 void do_acpitable_option(const QemuOpts *opts) 1117 { 1118 #ifdef TARGET_I386 1119 Error *err = NULL; 1120 1121 acpi_table_add(opts, &err); 1122 if (err) { 1123 error_report("Wrong acpi table provided: %s", 1124 error_get_pretty(err)); 1125 error_free(err); 1126 exit(1); 1127 } 1128 #endif 1129 } 1130 1131 void do_smbios_option(QemuOpts *opts) 1132 { 1133 #ifdef TARGET_I386 1134 smbios_entry_add(opts); 1135 #endif 1136 } 1137 1138 void cpudef_init(void) 1139 { 1140 #if defined(cpudef_setup) 1141 cpudef_setup(); /* parse cpu definitions in target config file */ 1142 #endif 1143 } 1144 1145 int tcg_available(void) 1146 { 1147 return 1; 1148 } 1149 1150 int kvm_available(void) 1151 { 1152 #ifdef CONFIG_KVM 1153 return 1; 1154 #else 1155 return 0; 1156 #endif 1157 } 1158 1159 int xen_available(void) 1160 { 1161 #ifdef CONFIG_XEN 1162 return 1; 1163 #else 1164 return 0; 1165 #endif 1166 } 1167 1168 1169 TargetInfo *qmp_query_target(Error **errp) 1170 { 1171 TargetInfo *info = g_malloc0(sizeof(*info)); 1172 1173 info->arch = g_strdup(TARGET_NAME); 1174 1175 return info; 1176 } 1177 1178 /* Stub function that's gets run on the vcpu when its brought out of the 1179 VM to run inside qemu via async_run_on_cpu()*/ 1180 static void mig_sleep_cpu(void *opq) 1181 { 1182 qemu_mutex_unlock_iothread(); 1183 g_usleep(30*1000); 1184 qemu_mutex_lock_iothread(); 1185 } 1186 1187 /* To reduce the dirty rate explicitly disallow the VCPUs from spending 1188 much time in the VM. The migration thread will try to catchup. 1189 Workload will experience a performance drop. 1190 */ 1191 static void mig_throttle_guest_down(void) 1192 { 1193 CPUState *cpu; 1194 1195 qemu_mutex_lock_iothread(); 1196 CPU_FOREACH(cpu) { 1197 async_run_on_cpu(cpu, mig_sleep_cpu, NULL); 1198 } 1199 qemu_mutex_unlock_iothread(); 1200 } 1201 1202 static void check_guest_throttling(void) 1203 { 1204 static int64_t t0; 1205 int64_t t1; 1206 1207 if (!mig_throttle_on) { 1208 return; 1209 } 1210 1211 if (!t0) { 1212 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1213 return; 1214 } 1215 1216 t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1217 1218 /* If it has been more than 40 ms since the last time the guest 1219 * was throttled then do it again. 1220 */ 1221 if (40 < (t1-t0)/1000000) { 1222 mig_throttle_guest_down(); 1223 t0 = t1; 1224 } 1225 } 1226