1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include <stdint.h> 25 #include <stdarg.h> 26 #include <stdlib.h> 27 #ifndef _WIN32 28 #include <sys/types.h> 29 #include <sys/mman.h> 30 #endif 31 #include "config.h" 32 #include "monitor/monitor.h" 33 #include "sysemu/sysemu.h" 34 #include "qemu/bitops.h" 35 #include "qemu/bitmap.h" 36 #include "sysemu/arch_init.h" 37 #include "audio/audio.h" 38 #include "hw/i386/pc.h" 39 #include "hw/pci/pci.h" 40 #include "hw/audio/audio.h" 41 #include "sysemu/kvm.h" 42 #include "migration/migration.h" 43 #include "hw/i386/smbios.h" 44 #include "exec/address-spaces.h" 45 #include "hw/audio/pcspk.h" 46 #include "migration/page_cache.h" 47 #include "qemu/config-file.h" 48 #include "qmp-commands.h" 49 #include "trace.h" 50 #include "exec/cpu-all.h" 51 #include "exec/ram_addr.h" 52 #include "hw/acpi/acpi.h" 53 #include "qemu/host-utils.h" 54 55 #ifdef DEBUG_ARCH_INIT 56 #define DPRINTF(fmt, ...) \ 57 do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) 58 #else 59 #define DPRINTF(fmt, ...) \ 60 do { } while (0) 61 #endif 62 63 #ifdef TARGET_SPARC 64 int graphic_width = 1024; 65 int graphic_height = 768; 66 int graphic_depth = 8; 67 #else 68 int graphic_width = 800; 69 int graphic_height = 600; 70 int graphic_depth = 32; 71 #endif 72 73 74 #if defined(TARGET_ALPHA) 75 #define QEMU_ARCH QEMU_ARCH_ALPHA 76 #elif defined(TARGET_ARM) 77 #define QEMU_ARCH QEMU_ARCH_ARM 78 #elif defined(TARGET_CRIS) 79 #define QEMU_ARCH QEMU_ARCH_CRIS 80 #elif defined(TARGET_I386) 81 #define QEMU_ARCH QEMU_ARCH_I386 82 #elif defined(TARGET_M68K) 83 #define QEMU_ARCH QEMU_ARCH_M68K 84 #elif defined(TARGET_LM32) 85 #define QEMU_ARCH QEMU_ARCH_LM32 86 #elif defined(TARGET_MICROBLAZE) 87 #define QEMU_ARCH QEMU_ARCH_MICROBLAZE 88 #elif defined(TARGET_MIPS) 89 #define QEMU_ARCH QEMU_ARCH_MIPS 90 #elif defined(TARGET_MOXIE) 91 #define QEMU_ARCH QEMU_ARCH_MOXIE 92 #elif defined(TARGET_OPENRISC) 93 #define QEMU_ARCH QEMU_ARCH_OPENRISC 94 #elif defined(TARGET_PPC) 95 #define QEMU_ARCH QEMU_ARCH_PPC 96 #elif defined(TARGET_S390X) 97 #define QEMU_ARCH QEMU_ARCH_S390X 98 #elif defined(TARGET_SH4) 99 #define QEMU_ARCH QEMU_ARCH_SH4 100 #elif defined(TARGET_SPARC) 101 #define QEMU_ARCH QEMU_ARCH_SPARC 102 #elif defined(TARGET_XTENSA) 103 #define QEMU_ARCH QEMU_ARCH_XTENSA 104 #elif defined(TARGET_UNICORE32) 105 #define QEMU_ARCH QEMU_ARCH_UNICORE32 106 #endif 107 108 const uint32_t arch_type = QEMU_ARCH; 109 static bool mig_throttle_on; 110 static int dirty_rate_high_cnt; 111 static void check_guest_throttling(void); 112 113 /***********************************************************/ 114 /* ram save/restore */ 115 116 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 117 #define RAM_SAVE_FLAG_COMPRESS 0x02 118 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 119 #define RAM_SAVE_FLAG_PAGE 0x08 120 #define RAM_SAVE_FLAG_EOS 0x10 121 #define RAM_SAVE_FLAG_CONTINUE 0x20 122 #define RAM_SAVE_FLAG_XBZRLE 0x40 123 /* 0x80 is reserved in migration.h start with 0x100 next */ 124 125 static struct defconfig_file { 126 const char *filename; 127 /* Indicates it is an user config file (disabled by -no-user-config) */ 128 bool userconfig; 129 } default_config_files[] = { 130 { CONFIG_QEMU_CONFDIR "/qemu.conf", true }, 131 { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true }, 132 { NULL }, /* end of list */ 133 }; 134 135 static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE]; 136 137 int qemu_read_default_config_files(bool userconfig) 138 { 139 int ret; 140 struct defconfig_file *f; 141 142 for (f = default_config_files; f->filename; f++) { 143 if (!userconfig && f->userconfig) { 144 continue; 145 } 146 ret = qemu_read_config_file(f->filename); 147 if (ret < 0 && ret != -ENOENT) { 148 return ret; 149 } 150 } 151 152 return 0; 153 } 154 155 static inline bool is_zero_range(uint8_t *p, uint64_t size) 156 { 157 return buffer_find_nonzero_offset(p, size) == size; 158 } 159 160 /* struct contains XBZRLE cache and a static page 161 used by the compression */ 162 static struct { 163 /* buffer used for XBZRLE encoding */ 164 uint8_t *encoded_buf; 165 /* buffer for storing page content */ 166 uint8_t *current_buf; 167 /* Cache for XBZRLE */ 168 PageCache *cache; 169 } XBZRLE = { 170 .encoded_buf = NULL, 171 .current_buf = NULL, 172 .cache = NULL, 173 }; 174 /* buffer used for XBZRLE decoding */ 175 static uint8_t *xbzrle_decoded_buf; 176 177 int64_t xbzrle_cache_resize(int64_t new_size) 178 { 179 if (new_size < TARGET_PAGE_SIZE) { 180 return -1; 181 } 182 183 if (XBZRLE.cache != NULL) { 184 return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * 185 TARGET_PAGE_SIZE; 186 } 187 return pow2floor(new_size); 188 } 189 190 /* accounting for migration statistics */ 191 typedef struct AccountingInfo { 192 uint64_t dup_pages; 193 uint64_t skipped_pages; 194 uint64_t norm_pages; 195 uint64_t iterations; 196 uint64_t xbzrle_bytes; 197 uint64_t xbzrle_pages; 198 uint64_t xbzrle_cache_miss; 199 uint64_t xbzrle_overflows; 200 } AccountingInfo; 201 202 static AccountingInfo acct_info; 203 204 static void acct_clear(void) 205 { 206 memset(&acct_info, 0, sizeof(acct_info)); 207 } 208 209 uint64_t dup_mig_bytes_transferred(void) 210 { 211 return acct_info.dup_pages * TARGET_PAGE_SIZE; 212 } 213 214 uint64_t dup_mig_pages_transferred(void) 215 { 216 return acct_info.dup_pages; 217 } 218 219 uint64_t skipped_mig_bytes_transferred(void) 220 { 221 return acct_info.skipped_pages * TARGET_PAGE_SIZE; 222 } 223 224 uint64_t skipped_mig_pages_transferred(void) 225 { 226 return acct_info.skipped_pages; 227 } 228 229 uint64_t norm_mig_bytes_transferred(void) 230 { 231 return acct_info.norm_pages * TARGET_PAGE_SIZE; 232 } 233 234 uint64_t norm_mig_pages_transferred(void) 235 { 236 return acct_info.norm_pages; 237 } 238 239 uint64_t xbzrle_mig_bytes_transferred(void) 240 { 241 return acct_info.xbzrle_bytes; 242 } 243 244 uint64_t xbzrle_mig_pages_transferred(void) 245 { 246 return acct_info.xbzrle_pages; 247 } 248 249 uint64_t xbzrle_mig_pages_cache_miss(void) 250 { 251 return acct_info.xbzrle_cache_miss; 252 } 253 254 uint64_t xbzrle_mig_pages_overflow(void) 255 { 256 return acct_info.xbzrle_overflows; 257 } 258 259 static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, 260 int cont, int flag) 261 { 262 size_t size; 263 264 qemu_put_be64(f, offset | cont | flag); 265 size = 8; 266 267 if (!cont) { 268 qemu_put_byte(f, strlen(block->idstr)); 269 qemu_put_buffer(f, (uint8_t *)block->idstr, 270 strlen(block->idstr)); 271 size += 1 + strlen(block->idstr); 272 } 273 return size; 274 } 275 276 /* This is the last block that we have visited serching for dirty pages 277 */ 278 static RAMBlock *last_seen_block; 279 /* This is the last block from where we have sent data */ 280 static RAMBlock *last_sent_block; 281 static ram_addr_t last_offset; 282 static unsigned long *migration_bitmap; 283 static uint64_t migration_dirty_pages; 284 static uint32_t last_version; 285 static bool ram_bulk_stage; 286 287 /* Update the xbzrle cache to reflect a page that's been sent as all 0. 288 * The important thing is that a stale (not-yet-0'd) page be replaced 289 * by the new data. 290 * As a bonus, if the page wasn't in the cache it gets added so that 291 * when a small write is made into the 0'd page it gets XBZRLE sent 292 */ 293 static void xbzrle_cache_zero_page(ram_addr_t current_addr) 294 { 295 if (ram_bulk_stage || !migrate_use_xbzrle()) { 296 return; 297 } 298 299 /* We don't care if this fails to allocate a new cache page 300 * as long as it updated an old one */ 301 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE); 302 } 303 304 #define ENCODING_FLAG_XBZRLE 0x1 305 306 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, 307 ram_addr_t current_addr, RAMBlock *block, 308 ram_addr_t offset, int cont, bool last_stage) 309 { 310 int encoded_len = 0, bytes_sent = -1; 311 uint8_t *prev_cached_page; 312 313 if (!cache_is_cached(XBZRLE.cache, current_addr)) { 314 if (!last_stage) { 315 if (cache_insert(XBZRLE.cache, current_addr, current_data) == -1) { 316 return -1; 317 } 318 } 319 acct_info.xbzrle_cache_miss++; 320 return -1; 321 } 322 323 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 324 325 /* save current buffer into memory */ 326 memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); 327 328 /* XBZRLE encoding (if there is no overflow) */ 329 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 330 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 331 TARGET_PAGE_SIZE); 332 if (encoded_len == 0) { 333 DPRINTF("Skipping unmodified page\n"); 334 return 0; 335 } else if (encoded_len == -1) { 336 DPRINTF("Overflow\n"); 337 acct_info.xbzrle_overflows++; 338 /* update data in the cache */ 339 memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); 340 return -1; 341 } 342 343 /* we need to update the data in the cache, in order to get the same data */ 344 if (!last_stage) { 345 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 346 } 347 348 /* Send XBZRLE based compressed page */ 349 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); 350 qemu_put_byte(f, ENCODING_FLAG_XBZRLE); 351 qemu_put_be16(f, encoded_len); 352 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); 353 bytes_sent += encoded_len + 1 + 2; 354 acct_info.xbzrle_pages++; 355 acct_info.xbzrle_bytes += bytes_sent; 356 357 return bytes_sent; 358 } 359 360 static inline 361 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, 362 ram_addr_t start) 363 { 364 unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; 365 unsigned long nr = base + (start >> TARGET_PAGE_BITS); 366 uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); 367 unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); 368 369 unsigned long next; 370 371 if (ram_bulk_stage && nr > base) { 372 next = nr + 1; 373 } else { 374 next = find_next_bit(migration_bitmap, size, nr); 375 } 376 377 if (next < size) { 378 clear_bit(next, migration_bitmap); 379 migration_dirty_pages--; 380 } 381 return (next - base) << TARGET_PAGE_BITS; 382 } 383 384 static inline bool migration_bitmap_set_dirty(ram_addr_t addr) 385 { 386 bool ret; 387 int nr = addr >> TARGET_PAGE_BITS; 388 389 ret = test_and_set_bit(nr, migration_bitmap); 390 391 if (!ret) { 392 migration_dirty_pages++; 393 } 394 return ret; 395 } 396 397 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) 398 { 399 ram_addr_t addr; 400 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 401 402 /* start address is aligned at the start of a word? */ 403 if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) { 404 int k; 405 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 406 unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]; 407 408 for (k = page; k < page + nr; k++) { 409 if (src[k]) { 410 unsigned long new_dirty; 411 new_dirty = ~migration_bitmap[k]; 412 migration_bitmap[k] |= src[k]; 413 new_dirty &= src[k]; 414 migration_dirty_pages += ctpopl(new_dirty); 415 src[k] = 0; 416 } 417 } 418 } else { 419 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 420 if (cpu_physical_memory_get_dirty(start + addr, 421 TARGET_PAGE_SIZE, 422 DIRTY_MEMORY_MIGRATION)) { 423 cpu_physical_memory_reset_dirty(start + addr, 424 TARGET_PAGE_SIZE, 425 DIRTY_MEMORY_MIGRATION); 426 migration_bitmap_set_dirty(start + addr); 427 } 428 } 429 } 430 } 431 432 433 /* Needs iothread lock! */ 434 435 static void migration_bitmap_sync(void) 436 { 437 RAMBlock *block; 438 uint64_t num_dirty_pages_init = migration_dirty_pages; 439 MigrationState *s = migrate_get_current(); 440 static int64_t start_time; 441 static int64_t bytes_xfer_prev; 442 static int64_t num_dirty_pages_period; 443 int64_t end_time; 444 int64_t bytes_xfer_now; 445 446 if (!bytes_xfer_prev) { 447 bytes_xfer_prev = ram_bytes_transferred(); 448 } 449 450 if (!start_time) { 451 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 452 } 453 454 trace_migration_bitmap_sync_start(); 455 address_space_sync_dirty_bitmap(&address_space_memory); 456 457 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 458 migration_bitmap_sync_range(block->mr->ram_addr, block->length); 459 } 460 trace_migration_bitmap_sync_end(migration_dirty_pages 461 - num_dirty_pages_init); 462 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; 463 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 464 465 /* more than 1 second = 1000 millisecons */ 466 if (end_time > start_time + 1000) { 467 if (migrate_auto_converge()) { 468 /* The following detection logic can be refined later. For now: 469 Check to see if the dirtied bytes is 50% more than the approx. 470 amount of bytes that just got transferred since the last time we 471 were in this routine. If that happens >N times (for now N==4) 472 we turn on the throttle down logic */ 473 bytes_xfer_now = ram_bytes_transferred(); 474 if (s->dirty_pages_rate && 475 (num_dirty_pages_period * TARGET_PAGE_SIZE > 476 (bytes_xfer_now - bytes_xfer_prev)/2) && 477 (dirty_rate_high_cnt++ > 4)) { 478 trace_migration_throttle(); 479 mig_throttle_on = true; 480 dirty_rate_high_cnt = 0; 481 } 482 bytes_xfer_prev = bytes_xfer_now; 483 } else { 484 mig_throttle_on = false; 485 } 486 s->dirty_pages_rate = num_dirty_pages_period * 1000 487 / (end_time - start_time); 488 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; 489 start_time = end_time; 490 num_dirty_pages_period = 0; 491 } 492 } 493 494 /* 495 * ram_save_block: Writes a page of memory to the stream f 496 * 497 * Returns: The number of bytes written. 498 * 0 means no dirty pages 499 */ 500 501 static int ram_save_block(QEMUFile *f, bool last_stage) 502 { 503 RAMBlock *block = last_seen_block; 504 ram_addr_t offset = last_offset; 505 bool complete_round = false; 506 int bytes_sent = 0; 507 MemoryRegion *mr; 508 ram_addr_t current_addr; 509 510 if (!block) 511 block = QTAILQ_FIRST(&ram_list.blocks); 512 513 while (true) { 514 mr = block->mr; 515 offset = migration_bitmap_find_and_reset_dirty(mr, offset); 516 if (complete_round && block == last_seen_block && 517 offset >= last_offset) { 518 break; 519 } 520 if (offset >= block->length) { 521 offset = 0; 522 block = QTAILQ_NEXT(block, next); 523 if (!block) { 524 block = QTAILQ_FIRST(&ram_list.blocks); 525 complete_round = true; 526 ram_bulk_stage = false; 527 } 528 } else { 529 int ret; 530 uint8_t *p; 531 bool send_async = true; 532 int cont = (block == last_sent_block) ? 533 RAM_SAVE_FLAG_CONTINUE : 0; 534 535 p = memory_region_get_ram_ptr(mr) + offset; 536 537 /* In doubt sent page as normal */ 538 bytes_sent = -1; 539 ret = ram_control_save_page(f, block->offset, 540 offset, TARGET_PAGE_SIZE, &bytes_sent); 541 542 current_addr = block->offset + offset; 543 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 544 if (ret != RAM_SAVE_CONTROL_DELAYED) { 545 if (bytes_sent > 0) { 546 acct_info.norm_pages++; 547 } else if (bytes_sent == 0) { 548 acct_info.dup_pages++; 549 } 550 } 551 } else if (is_zero_range(p, TARGET_PAGE_SIZE)) { 552 acct_info.dup_pages++; 553 bytes_sent = save_block_hdr(f, block, offset, cont, 554 RAM_SAVE_FLAG_COMPRESS); 555 qemu_put_byte(f, 0); 556 bytes_sent++; 557 /* Must let xbzrle know, otherwise a previous (now 0'd) cached 558 * page would be stale 559 */ 560 xbzrle_cache_zero_page(current_addr); 561 } else if (!ram_bulk_stage && migrate_use_xbzrle()) { 562 bytes_sent = save_xbzrle_page(f, p, current_addr, block, 563 offset, cont, last_stage); 564 if (!last_stage) { 565 /* We must send exactly what's in the xbzrle cache 566 * even if the page wasn't xbzrle compressed, so that 567 * it's right next time. 568 */ 569 p = get_cached_data(XBZRLE.cache, current_addr); 570 571 /* Can't send this cached data async, since the cache page 572 * might get updated before it gets to the wire 573 */ 574 send_async = false; 575 } 576 } 577 578 /* XBZRLE overflow or normal page */ 579 if (bytes_sent == -1) { 580 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); 581 if (send_async) { 582 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); 583 } else { 584 qemu_put_buffer(f, p, TARGET_PAGE_SIZE); 585 } 586 bytes_sent += TARGET_PAGE_SIZE; 587 acct_info.norm_pages++; 588 } 589 590 /* if page is unmodified, continue to the next */ 591 if (bytes_sent > 0) { 592 last_sent_block = block; 593 break; 594 } 595 } 596 } 597 last_seen_block = block; 598 last_offset = offset; 599 600 return bytes_sent; 601 } 602 603 static uint64_t bytes_transferred; 604 605 void acct_update_position(QEMUFile *f, size_t size, bool zero) 606 { 607 uint64_t pages = size / TARGET_PAGE_SIZE; 608 if (zero) { 609 acct_info.dup_pages += pages; 610 } else { 611 acct_info.norm_pages += pages; 612 bytes_transferred += size; 613 qemu_update_position(f, size); 614 } 615 } 616 617 static ram_addr_t ram_save_remaining(void) 618 { 619 return migration_dirty_pages; 620 } 621 622 uint64_t ram_bytes_remaining(void) 623 { 624 return ram_save_remaining() * TARGET_PAGE_SIZE; 625 } 626 627 uint64_t ram_bytes_transferred(void) 628 { 629 return bytes_transferred; 630 } 631 632 uint64_t ram_bytes_total(void) 633 { 634 RAMBlock *block; 635 uint64_t total = 0; 636 637 QTAILQ_FOREACH(block, &ram_list.blocks, next) 638 total += block->length; 639 640 return total; 641 } 642 643 void free_xbzrle_decoded_buf(void) 644 { 645 g_free(xbzrle_decoded_buf); 646 xbzrle_decoded_buf = NULL; 647 } 648 649 static void migration_end(void) 650 { 651 if (migration_bitmap) { 652 memory_global_dirty_log_stop(); 653 g_free(migration_bitmap); 654 migration_bitmap = NULL; 655 } 656 657 if (XBZRLE.cache) { 658 cache_fini(XBZRLE.cache); 659 g_free(XBZRLE.cache); 660 g_free(XBZRLE.encoded_buf); 661 g_free(XBZRLE.current_buf); 662 XBZRLE.cache = NULL; 663 XBZRLE.encoded_buf = NULL; 664 XBZRLE.current_buf = NULL; 665 } 666 } 667 668 static void ram_migration_cancel(void *opaque) 669 { 670 migration_end(); 671 } 672 673 static void reset_ram_globals(void) 674 { 675 last_seen_block = NULL; 676 last_sent_block = NULL; 677 last_offset = 0; 678 last_version = ram_list.version; 679 ram_bulk_stage = true; 680 } 681 682 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 683 684 static int ram_save_setup(QEMUFile *f, void *opaque) 685 { 686 RAMBlock *block; 687 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; 688 689 migration_bitmap = bitmap_new(ram_pages); 690 bitmap_set(migration_bitmap, 0, ram_pages); 691 migration_dirty_pages = ram_pages; 692 mig_throttle_on = false; 693 dirty_rate_high_cnt = 0; 694 695 if (migrate_use_xbzrle()) { 696 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / 697 TARGET_PAGE_SIZE, 698 TARGET_PAGE_SIZE); 699 if (!XBZRLE.cache) { 700 DPRINTF("Error creating cache\n"); 701 return -1; 702 } 703 704 /* We prefer not to abort if there is no memory */ 705 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); 706 if (!XBZRLE.encoded_buf) { 707 DPRINTF("Error allocating encoded_buf\n"); 708 return -1; 709 } 710 711 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); 712 if (!XBZRLE.current_buf) { 713 DPRINTF("Error allocating current_buf\n"); 714 g_free(XBZRLE.encoded_buf); 715 XBZRLE.encoded_buf = NULL; 716 return -1; 717 } 718 719 acct_clear(); 720 } 721 722 qemu_mutex_lock_iothread(); 723 qemu_mutex_lock_ramlist(); 724 bytes_transferred = 0; 725 reset_ram_globals(); 726 727 memory_global_dirty_log_start(); 728 migration_bitmap_sync(); 729 qemu_mutex_unlock_iothread(); 730 731 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 732 733 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 734 qemu_put_byte(f, strlen(block->idstr)); 735 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 736 qemu_put_be64(f, block->length); 737 } 738 739 qemu_mutex_unlock_ramlist(); 740 741 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 742 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 743 744 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 745 746 return 0; 747 } 748 749 static int ram_save_iterate(QEMUFile *f, void *opaque) 750 { 751 int ret; 752 int i; 753 int64_t t0; 754 int total_sent = 0; 755 756 qemu_mutex_lock_ramlist(); 757 758 if (ram_list.version != last_version) { 759 reset_ram_globals(); 760 } 761 762 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 763 764 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 765 i = 0; 766 while ((ret = qemu_file_rate_limit(f)) == 0) { 767 int bytes_sent; 768 769 bytes_sent = ram_save_block(f, false); 770 /* no more blocks to sent */ 771 if (bytes_sent == 0) { 772 break; 773 } 774 total_sent += bytes_sent; 775 acct_info.iterations++; 776 check_guest_throttling(); 777 /* we want to check in the 1st loop, just in case it was the 1st time 778 and we had to sync the dirty bitmap. 779 qemu_get_clock_ns() is a bit expensive, so we only check each some 780 iterations 781 */ 782 if ((i & 63) == 0) { 783 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 784 if (t1 > MAX_WAIT) { 785 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", 786 t1, i); 787 break; 788 } 789 } 790 i++; 791 } 792 793 qemu_mutex_unlock_ramlist(); 794 795 /* 796 * Must occur before EOS (or any QEMUFile operation) 797 * because of RDMA protocol. 798 */ 799 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 800 801 bytes_transferred += total_sent; 802 803 /* 804 * Do not count these 8 bytes into total_sent, so that we can 805 * return 0 if no page had been dirtied. 806 */ 807 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 808 bytes_transferred += 8; 809 810 ret = qemu_file_get_error(f); 811 if (ret < 0) { 812 return ret; 813 } 814 815 return total_sent; 816 } 817 818 static int ram_save_complete(QEMUFile *f, void *opaque) 819 { 820 qemu_mutex_lock_ramlist(); 821 migration_bitmap_sync(); 822 823 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 824 825 /* try transferring iterative blocks of memory */ 826 827 /* flush all remaining blocks regardless of rate limiting */ 828 while (true) { 829 int bytes_sent; 830 831 bytes_sent = ram_save_block(f, true); 832 /* no more blocks to sent */ 833 if (bytes_sent == 0) { 834 break; 835 } 836 bytes_transferred += bytes_sent; 837 } 838 839 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 840 migration_end(); 841 842 qemu_mutex_unlock_ramlist(); 843 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 844 845 return 0; 846 } 847 848 static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) 849 { 850 uint64_t remaining_size; 851 852 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 853 854 if (remaining_size < max_size) { 855 qemu_mutex_lock_iothread(); 856 migration_bitmap_sync(); 857 qemu_mutex_unlock_iothread(); 858 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 859 } 860 return remaining_size; 861 } 862 863 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 864 { 865 int ret, rc = 0; 866 unsigned int xh_len; 867 int xh_flags; 868 869 if (!xbzrle_decoded_buf) { 870 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE); 871 } 872 873 /* extract RLE header */ 874 xh_flags = qemu_get_byte(f); 875 xh_len = qemu_get_be16(f); 876 877 if (xh_flags != ENCODING_FLAG_XBZRLE) { 878 fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); 879 return -1; 880 } 881 882 if (xh_len > TARGET_PAGE_SIZE) { 883 fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); 884 return -1; 885 } 886 /* load data and decode */ 887 qemu_get_buffer(f, xbzrle_decoded_buf, xh_len); 888 889 /* decode RLE */ 890 ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, 891 TARGET_PAGE_SIZE); 892 if (ret == -1) { 893 fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); 894 rc = -1; 895 } else if (ret > TARGET_PAGE_SIZE) { 896 fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", 897 ret, TARGET_PAGE_SIZE); 898 abort(); 899 } 900 901 return rc; 902 } 903 904 static inline void *host_from_stream_offset(QEMUFile *f, 905 ram_addr_t offset, 906 int flags) 907 { 908 static RAMBlock *block = NULL; 909 char id[256]; 910 uint8_t len; 911 912 if (flags & RAM_SAVE_FLAG_CONTINUE) { 913 if (!block) { 914 fprintf(stderr, "Ack, bad migration stream!\n"); 915 return NULL; 916 } 917 918 return memory_region_get_ram_ptr(block->mr) + offset; 919 } 920 921 len = qemu_get_byte(f); 922 qemu_get_buffer(f, (uint8_t *)id, len); 923 id[len] = 0; 924 925 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 926 if (!strncmp(id, block->idstr, sizeof(id))) 927 return memory_region_get_ram_ptr(block->mr) + offset; 928 } 929 930 fprintf(stderr, "Can't find block %s!\n", id); 931 return NULL; 932 } 933 934 /* 935 * If a page (or a whole RDMA chunk) has been 936 * determined to be zero, then zap it. 937 */ 938 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 939 { 940 if (ch != 0 || !is_zero_range(host, size)) { 941 memset(host, ch, size); 942 } 943 } 944 945 static int ram_load(QEMUFile *f, void *opaque, int version_id) 946 { 947 ram_addr_t addr; 948 int flags, ret = 0; 949 int error; 950 static uint64_t seq_iter; 951 952 seq_iter++; 953 954 if (version_id < 4 || version_id > 4) { 955 return -EINVAL; 956 } 957 958 do { 959 addr = qemu_get_be64(f); 960 961 flags = addr & ~TARGET_PAGE_MASK; 962 addr &= TARGET_PAGE_MASK; 963 964 if (flags & RAM_SAVE_FLAG_MEM_SIZE) { 965 if (version_id == 4) { 966 /* Synchronize RAM block list */ 967 char id[256]; 968 ram_addr_t length; 969 ram_addr_t total_ram_bytes = addr; 970 971 while (total_ram_bytes) { 972 RAMBlock *block; 973 uint8_t len; 974 975 len = qemu_get_byte(f); 976 qemu_get_buffer(f, (uint8_t *)id, len); 977 id[len] = 0; 978 length = qemu_get_be64(f); 979 980 QTAILQ_FOREACH(block, &ram_list.blocks, next) { 981 if (!strncmp(id, block->idstr, sizeof(id))) { 982 if (block->length != length) { 983 fprintf(stderr, 984 "Length mismatch: %s: " RAM_ADDR_FMT 985 " in != " RAM_ADDR_FMT "\n", id, length, 986 block->length); 987 ret = -EINVAL; 988 goto done; 989 } 990 break; 991 } 992 } 993 994 if (!block) { 995 fprintf(stderr, "Unknown ramblock \"%s\", cannot " 996 "accept migration\n", id); 997 ret = -EINVAL; 998 goto done; 999 } 1000 1001 total_ram_bytes -= length; 1002 } 1003 } 1004 } 1005 1006 if (flags & RAM_SAVE_FLAG_COMPRESS) { 1007 void *host; 1008 uint8_t ch; 1009 1010 host = host_from_stream_offset(f, addr, flags); 1011 if (!host) { 1012 return -EINVAL; 1013 } 1014 1015 ch = qemu_get_byte(f); 1016 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 1017 } else if (flags & RAM_SAVE_FLAG_PAGE) { 1018 void *host; 1019 1020 host = host_from_stream_offset(f, addr, flags); 1021 if (!host) { 1022 return -EINVAL; 1023 } 1024 1025 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 1026 } else if (flags & RAM_SAVE_FLAG_XBZRLE) { 1027 void *host = host_from_stream_offset(f, addr, flags); 1028 if (!host) { 1029 return -EINVAL; 1030 } 1031 1032 if (load_xbzrle(f, addr, host) < 0) { 1033 ret = -EINVAL; 1034 goto done; 1035 } 1036 } else if (flags & RAM_SAVE_FLAG_HOOK) { 1037 ram_control_load_hook(f, flags); 1038 } 1039 error = qemu_file_get_error(f); 1040 if (error) { 1041 ret = error; 1042 goto done; 1043 } 1044 } while (!(flags & RAM_SAVE_FLAG_EOS)); 1045 1046 done: 1047 DPRINTF("Completed load of VM with exit code %d seq iteration " 1048 "%" PRIu64 "\n", ret, seq_iter); 1049 return ret; 1050 } 1051 1052 SaveVMHandlers savevm_ram_handlers = { 1053 .save_live_setup = ram_save_setup, 1054 .save_live_iterate = ram_save_iterate, 1055 .save_live_complete = ram_save_complete, 1056 .save_live_pending = ram_save_pending, 1057 .load_state = ram_load, 1058 .cancel = ram_migration_cancel, 1059 }; 1060 1061 struct soundhw { 1062 const char *name; 1063 const char *descr; 1064 int enabled; 1065 int isa; 1066 union { 1067 int (*init_isa) (ISABus *bus); 1068 int (*init_pci) (PCIBus *bus); 1069 } init; 1070 }; 1071 1072 static struct soundhw soundhw[9]; 1073 static int soundhw_count; 1074 1075 void isa_register_soundhw(const char *name, const char *descr, 1076 int (*init_isa)(ISABus *bus)) 1077 { 1078 assert(soundhw_count < ARRAY_SIZE(soundhw) - 1); 1079 soundhw[soundhw_count].name = name; 1080 soundhw[soundhw_count].descr = descr; 1081 soundhw[soundhw_count].isa = 1; 1082 soundhw[soundhw_count].init.init_isa = init_isa; 1083 soundhw_count++; 1084 } 1085 1086 void pci_register_soundhw(const char *name, const char *descr, 1087 int (*init_pci)(PCIBus *bus)) 1088 { 1089 assert(soundhw_count < ARRAY_SIZE(soundhw) - 1); 1090 soundhw[soundhw_count].name = name; 1091 soundhw[soundhw_count].descr = descr; 1092 soundhw[soundhw_count].isa = 0; 1093 soundhw[soundhw_count].init.init_pci = init_pci; 1094 soundhw_count++; 1095 } 1096 1097 void select_soundhw(const char *optarg) 1098 { 1099 struct soundhw *c; 1100 1101 if (is_help_option(optarg)) { 1102 show_valid_cards: 1103 1104 if (soundhw_count) { 1105 printf("Valid sound card names (comma separated):\n"); 1106 for (c = soundhw; c->name; ++c) { 1107 printf ("%-11s %s\n", c->name, c->descr); 1108 } 1109 printf("\n-soundhw all will enable all of the above\n"); 1110 } else { 1111 printf("Machine has no user-selectable audio hardware " 1112 "(it may or may not have always-present audio hardware).\n"); 1113 } 1114 exit(!is_help_option(optarg)); 1115 } 1116 else { 1117 size_t l; 1118 const char *p; 1119 char *e; 1120 int bad_card = 0; 1121 1122 if (!strcmp(optarg, "all")) { 1123 for (c = soundhw; c->name; ++c) { 1124 c->enabled = 1; 1125 } 1126 return; 1127 } 1128 1129 p = optarg; 1130 while (*p) { 1131 e = strchr(p, ','); 1132 l = !e ? strlen(p) : (size_t) (e - p); 1133 1134 for (c = soundhw; c->name; ++c) { 1135 if (!strncmp(c->name, p, l) && !c->name[l]) { 1136 c->enabled = 1; 1137 break; 1138 } 1139 } 1140 1141 if (!c->name) { 1142 if (l > 80) { 1143 fprintf(stderr, 1144 "Unknown sound card name (too big to show)\n"); 1145 } 1146 else { 1147 fprintf(stderr, "Unknown sound card name `%.*s'\n", 1148 (int) l, p); 1149 } 1150 bad_card = 1; 1151 } 1152 p += l + (e != NULL); 1153 } 1154 1155 if (bad_card) { 1156 goto show_valid_cards; 1157 } 1158 } 1159 } 1160 1161 void audio_init(void) 1162 { 1163 struct soundhw *c; 1164 ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL); 1165 PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL); 1166 1167 for (c = soundhw; c->name; ++c) { 1168 if (c->enabled) { 1169 if (c->isa) { 1170 if (!isa_bus) { 1171 fprintf(stderr, "ISA bus not available for %s\n", c->name); 1172 exit(1); 1173 } 1174 c->init.init_isa(isa_bus); 1175 } else { 1176 if (!pci_bus) { 1177 fprintf(stderr, "PCI bus not available for %s\n", c->name); 1178 exit(1); 1179 } 1180 c->init.init_pci(pci_bus); 1181 } 1182 } 1183 } 1184 } 1185 1186 int qemu_uuid_parse(const char *str, uint8_t *uuid) 1187 { 1188 int ret; 1189 1190 if (strlen(str) != 36) { 1191 return -1; 1192 } 1193 1194 ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3], 1195 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9], 1196 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14], 1197 &uuid[15]); 1198 1199 if (ret != 16) { 1200 return -1; 1201 } 1202 return 0; 1203 } 1204 1205 void do_acpitable_option(const QemuOpts *opts) 1206 { 1207 #ifdef TARGET_I386 1208 Error *err = NULL; 1209 1210 acpi_table_add(opts, &err); 1211 if (err) { 1212 error_report("Wrong acpi table provided: %s", 1213 error_get_pretty(err)); 1214 error_free(err); 1215 exit(1); 1216 } 1217 #endif 1218 } 1219 1220 void do_smbios_option(QemuOpts *opts) 1221 { 1222 #ifdef TARGET_I386 1223 smbios_entry_add(opts); 1224 #endif 1225 } 1226 1227 void cpudef_init(void) 1228 { 1229 #if defined(cpudef_setup) 1230 cpudef_setup(); /* parse cpu definitions in target config file */ 1231 #endif 1232 } 1233 1234 int tcg_available(void) 1235 { 1236 return 1; 1237 } 1238 1239 int kvm_available(void) 1240 { 1241 #ifdef CONFIG_KVM 1242 return 1; 1243 #else 1244 return 0; 1245 #endif 1246 } 1247 1248 int xen_available(void) 1249 { 1250 #ifdef CONFIG_XEN 1251 return 1; 1252 #else 1253 return 0; 1254 #endif 1255 } 1256 1257 1258 TargetInfo *qmp_query_target(Error **errp) 1259 { 1260 TargetInfo *info = g_malloc0(sizeof(*info)); 1261 1262 info->arch = g_strdup(TARGET_NAME); 1263 1264 return info; 1265 } 1266 1267 /* Stub function that's gets run on the vcpu when its brought out of the 1268 VM to run inside qemu via async_run_on_cpu()*/ 1269 static void mig_sleep_cpu(void *opq) 1270 { 1271 qemu_mutex_unlock_iothread(); 1272 g_usleep(30*1000); 1273 qemu_mutex_lock_iothread(); 1274 } 1275 1276 /* To reduce the dirty rate explicitly disallow the VCPUs from spending 1277 much time in the VM. The migration thread will try to catchup. 1278 Workload will experience a performance drop. 1279 */ 1280 static void mig_throttle_guest_down(void) 1281 { 1282 CPUState *cpu; 1283 1284 qemu_mutex_lock_iothread(); 1285 CPU_FOREACH(cpu) { 1286 async_run_on_cpu(cpu, mig_sleep_cpu, NULL); 1287 } 1288 qemu_mutex_unlock_iothread(); 1289 } 1290 1291 static void check_guest_throttling(void) 1292 { 1293 static int64_t t0; 1294 int64_t t1; 1295 1296 if (!mig_throttle_on) { 1297 return; 1298 } 1299 1300 if (!t0) { 1301 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1302 return; 1303 } 1304 1305 t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1306 1307 /* If it has been more than 40 ms since the last time the guest 1308 * was throttled then do it again. 1309 */ 1310 if (40 < (t1-t0)/1000000) { 1311 mig_throttle_guest_down(); 1312 t0 = t1; 1313 } 1314 } 1315