1 /* 2 * Virtio Balloon Device 3 * 4 * Copyright IBM, Corp. 2008 5 * Copyright (C) 2011 Red Hat, Inc. 6 * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com> 7 * 8 * Authors: 9 * Anthony Liguori <aliguori@us.ibm.com> 10 * 11 * This work is licensed under the terms of the GNU GPL, version 2. See 12 * the COPYING file in the top-level directory. 13 * 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/iov.h" 18 #include "qemu/module.h" 19 #include "qemu/timer.h" 20 #include "hw/virtio/virtio.h" 21 #include "hw/mem/pc-dimm.h" 22 #include "sysemu/balloon.h" 23 #include "hw/virtio/virtio-balloon.h" 24 #include "exec/address-spaces.h" 25 #include "qapi/error.h" 26 #include "qapi/qapi-events-misc.h" 27 #include "qapi/visitor.h" 28 #include "trace.h" 29 #include "qemu/error-report.h" 30 #include "migration/misc.h" 31 32 #include "hw/virtio/virtio-bus.h" 33 #include "hw/virtio/virtio-access.h" 34 35 #define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT) 36 37 typedef struct PartiallyBalloonedPage { 38 ram_addr_t base_gpa; 39 long subpages; 40 unsigned long *bitmap; 41 } PartiallyBalloonedPage; 42 43 static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp) 44 { 45 if (!pbp) { 46 return; 47 } 48 g_free(pbp->bitmap); 49 g_free(pbp); 50 } 51 52 static PartiallyBalloonedPage *virtio_balloon_pbp_alloc(ram_addr_t base_gpa, 53 long subpages) 54 { 55 PartiallyBalloonedPage *pbp = g_new0(PartiallyBalloonedPage, 1); 56 57 pbp->base_gpa = base_gpa; 58 pbp->subpages = subpages; 59 pbp->bitmap = bitmap_new(subpages); 60 61 return pbp; 62 } 63 64 static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, 65 ram_addr_t base_gpa, long subpages) 66 { 67 return pbp->subpages == subpages && pbp->base_gpa == base_gpa; 68 } 69 70 static void balloon_inflate_page(VirtIOBalloon *balloon, 71 MemoryRegion *mr, hwaddr mr_offset, 72 PartiallyBalloonedPage **pbp) 73 { 74 void *addr = memory_region_get_ram_ptr(mr) + mr_offset; 75 ram_addr_t rb_offset, rb_aligned_offset, base_gpa; 76 RAMBlock *rb; 77 size_t rb_page_size; 78 int subpages; 79 80 /* XXX is there a better way to get to the RAMBlock than via a 81 * host address? */ 82 rb = qemu_ram_block_from_host(addr, false, &rb_offset); 83 rb_page_size = qemu_ram_pagesize(rb); 84 85 if (rb_page_size == BALLOON_PAGE_SIZE) { 86 /* Easy case */ 87 88 ram_block_discard_range(rb, rb_offset, rb_page_size); 89 /* We ignore errors from ram_block_discard_range(), because it 90 * has already reported them, and failing to discard a balloon 91 * page is not fatal */ 92 return; 93 } 94 95 /* Hard case 96 * 97 * We've put a piece of a larger host page into the balloon - we 98 * need to keep track until we have a whole host page to 99 * discard 100 */ 101 warn_report_once( 102 "Balloon used with backing page size > 4kiB, this may not be reliable"); 103 104 rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size); 105 subpages = rb_page_size / BALLOON_PAGE_SIZE; 106 base_gpa = memory_region_get_ram_addr(mr) + mr_offset - 107 (rb_offset - rb_aligned_offset); 108 109 if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa, subpages)) { 110 /* We've partially ballooned part of a host page, but now 111 * we're trying to balloon part of a different one. Too hard, 112 * give up on the old partial page */ 113 virtio_balloon_pbp_free(*pbp); 114 *pbp = NULL; 115 } 116 117 if (!*pbp) { 118 *pbp = virtio_balloon_pbp_alloc(base_gpa, subpages); 119 } 120 121 set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE, 122 (*pbp)->bitmap); 123 124 if (bitmap_full((*pbp)->bitmap, subpages)) { 125 /* We've accumulated a full host page, we can actually discard 126 * it now */ 127 128 ram_block_discard_range(rb, rb_aligned_offset, rb_page_size); 129 /* We ignore errors from ram_block_discard_range(), because it 130 * has already reported them, and failing to discard a balloon 131 * page is not fatal */ 132 virtio_balloon_pbp_free(*pbp); 133 *pbp = NULL; 134 } 135 } 136 137 static void balloon_deflate_page(VirtIOBalloon *balloon, 138 MemoryRegion *mr, hwaddr mr_offset) 139 { 140 void *addr = memory_region_get_ram_ptr(mr) + mr_offset; 141 ram_addr_t rb_offset; 142 RAMBlock *rb; 143 size_t rb_page_size; 144 void *host_addr; 145 int ret; 146 147 /* XXX is there a better way to get to the RAMBlock than via a 148 * host address? */ 149 rb = qemu_ram_block_from_host(addr, false, &rb_offset); 150 rb_page_size = qemu_ram_pagesize(rb); 151 152 host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1)); 153 154 /* When a page is deflated, we hint the whole host page it lives 155 * on, since we can't do anything smaller */ 156 ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED); 157 if (ret != 0) { 158 warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s", 159 strerror(errno)); 160 /* Otherwise ignore, failing to page hint shouldn't be fatal */ 161 } 162 } 163 164 static const char *balloon_stat_names[] = { 165 [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in", 166 [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out", 167 [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults", 168 [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults", 169 [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory", 170 [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory", 171 [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory", 172 [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches", 173 [VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc", 174 [VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail", 175 [VIRTIO_BALLOON_S_NR] = NULL 176 }; 177 178 /* 179 * reset_stats - Mark all items in the stats array as unset 180 * 181 * This function needs to be called at device initialization and before 182 * updating to a set of newly-generated stats. This will ensure that no 183 * stale values stick around in case the guest reports a subset of the supported 184 * statistics. 185 */ 186 static inline void reset_stats(VirtIOBalloon *dev) 187 { 188 int i; 189 for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1); 190 } 191 192 static bool balloon_stats_supported(const VirtIOBalloon *s) 193 { 194 VirtIODevice *vdev = VIRTIO_DEVICE(s); 195 return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ); 196 } 197 198 static bool balloon_stats_enabled(const VirtIOBalloon *s) 199 { 200 return s->stats_poll_interval > 0; 201 } 202 203 static void balloon_stats_destroy_timer(VirtIOBalloon *s) 204 { 205 if (balloon_stats_enabled(s)) { 206 timer_del(s->stats_timer); 207 timer_free(s->stats_timer); 208 s->stats_timer = NULL; 209 s->stats_poll_interval = 0; 210 } 211 } 212 213 static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs) 214 { 215 timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000); 216 } 217 218 static void balloon_stats_poll_cb(void *opaque) 219 { 220 VirtIOBalloon *s = opaque; 221 VirtIODevice *vdev = VIRTIO_DEVICE(s); 222 223 if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) { 224 /* re-schedule */ 225 balloon_stats_change_timer(s, s->stats_poll_interval); 226 return; 227 } 228 229 virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset); 230 virtio_notify(vdev, s->svq); 231 g_free(s->stats_vq_elem); 232 s->stats_vq_elem = NULL; 233 } 234 235 static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name, 236 void *opaque, Error **errp) 237 { 238 Error *err = NULL; 239 VirtIOBalloon *s = opaque; 240 int i; 241 242 visit_start_struct(v, name, NULL, 0, &err); 243 if (err) { 244 goto out; 245 } 246 visit_type_int(v, "last-update", &s->stats_last_update, &err); 247 if (err) { 248 goto out_end; 249 } 250 251 visit_start_struct(v, "stats", NULL, 0, &err); 252 if (err) { 253 goto out_end; 254 } 255 for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) { 256 visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err); 257 if (err) { 258 goto out_nested; 259 } 260 } 261 visit_check_struct(v, &err); 262 out_nested: 263 visit_end_struct(v, NULL); 264 265 if (!err) { 266 visit_check_struct(v, &err); 267 } 268 out_end: 269 visit_end_struct(v, NULL); 270 out: 271 error_propagate(errp, err); 272 } 273 274 static void balloon_stats_get_poll_interval(Object *obj, Visitor *v, 275 const char *name, void *opaque, 276 Error **errp) 277 { 278 VirtIOBalloon *s = opaque; 279 visit_type_int(v, name, &s->stats_poll_interval, errp); 280 } 281 282 static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, 283 const char *name, void *opaque, 284 Error **errp) 285 { 286 VirtIOBalloon *s = opaque; 287 Error *local_err = NULL; 288 int64_t value; 289 290 visit_type_int(v, name, &value, &local_err); 291 if (local_err) { 292 error_propagate(errp, local_err); 293 return; 294 } 295 296 if (value < 0) { 297 error_setg(errp, "timer value must be greater than zero"); 298 return; 299 } 300 301 if (value > UINT32_MAX) { 302 error_setg(errp, "timer value is too big"); 303 return; 304 } 305 306 if (value == s->stats_poll_interval) { 307 return; 308 } 309 310 if (value == 0) { 311 /* timer=0 disables the timer */ 312 balloon_stats_destroy_timer(s); 313 return; 314 } 315 316 if (balloon_stats_enabled(s)) { 317 /* timer interval change */ 318 s->stats_poll_interval = value; 319 balloon_stats_change_timer(s, value); 320 return; 321 } 322 323 /* create a new timer */ 324 g_assert(s->stats_timer == NULL); 325 s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s); 326 s->stats_poll_interval = value; 327 balloon_stats_change_timer(s, 0); 328 } 329 330 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) 331 { 332 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 333 PartiallyBalloonedPage *pbp = NULL; 334 VirtQueueElement *elem; 335 MemoryRegionSection section; 336 337 for (;;) { 338 size_t offset = 0; 339 uint32_t pfn; 340 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 341 if (!elem) { 342 break; 343 } 344 345 while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) { 346 unsigned int p = virtio_ldl_p(vdev, &pfn); 347 hwaddr pa; 348 349 pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT; 350 offset += 4; 351 352 section = memory_region_find(get_system_memory(), pa, 353 BALLOON_PAGE_SIZE); 354 if (!section.mr) { 355 trace_virtio_balloon_bad_addr(pa); 356 continue; 357 } 358 if (!memory_region_is_ram(section.mr) || 359 memory_region_is_rom(section.mr) || 360 memory_region_is_romd(section.mr)) { 361 trace_virtio_balloon_bad_addr(pa); 362 memory_region_unref(section.mr); 363 continue; 364 } 365 366 trace_virtio_balloon_handle_output(memory_region_name(section.mr), 367 pa); 368 if (!qemu_balloon_is_inhibited()) { 369 if (vq == s->ivq) { 370 balloon_inflate_page(s, section.mr, 371 section.offset_within_region, &pbp); 372 } else if (vq == s->dvq) { 373 balloon_deflate_page(s, section.mr, section.offset_within_region); 374 } else { 375 g_assert_not_reached(); 376 } 377 } 378 memory_region_unref(section.mr); 379 } 380 381 virtqueue_push(vq, elem, offset); 382 virtio_notify(vdev, vq); 383 g_free(elem); 384 } 385 386 virtio_balloon_pbp_free(pbp); 387 } 388 389 static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) 390 { 391 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 392 VirtQueueElement *elem; 393 VirtIOBalloonStat stat; 394 size_t offset = 0; 395 qemu_timeval tv; 396 397 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 398 if (!elem) { 399 goto out; 400 } 401 402 if (s->stats_vq_elem != NULL) { 403 /* This should never happen if the driver follows the spec. */ 404 virtqueue_push(vq, s->stats_vq_elem, 0); 405 virtio_notify(vdev, vq); 406 g_free(s->stats_vq_elem); 407 } 408 409 s->stats_vq_elem = elem; 410 411 /* Initialize the stats to get rid of any stale values. This is only 412 * needed to handle the case where a guest supports fewer stats than it 413 * used to (ie. it has booted into an old kernel). 414 */ 415 reset_stats(s); 416 417 while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat)) 418 == sizeof(stat)) { 419 uint16_t tag = virtio_tswap16(vdev, stat.tag); 420 uint64_t val = virtio_tswap64(vdev, stat.val); 421 422 offset += sizeof(stat); 423 if (tag < VIRTIO_BALLOON_S_NR) 424 s->stats[tag] = val; 425 } 426 s->stats_vq_offset = offset; 427 428 if (qemu_gettimeofday(&tv) < 0) { 429 warn_report("%s: failed to get time of day", __func__); 430 goto out; 431 } 432 433 s->stats_last_update = tv.tv_sec; 434 435 out: 436 if (balloon_stats_enabled(s)) { 437 balloon_stats_change_timer(s, s->stats_poll_interval); 438 } 439 } 440 441 static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev, 442 VirtQueue *vq) 443 { 444 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 445 qemu_bh_schedule(s->free_page_bh); 446 } 447 448 static bool get_free_page_hints(VirtIOBalloon *dev) 449 { 450 VirtQueueElement *elem; 451 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 452 VirtQueue *vq = dev->free_page_vq; 453 bool ret = true; 454 455 while (dev->block_iothread) { 456 qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock); 457 } 458 459 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 460 if (!elem) { 461 return false; 462 } 463 464 if (elem->out_num) { 465 uint32_t id; 466 size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0, 467 &id, sizeof(id)); 468 469 virtio_tswap32s(vdev, &id); 470 if (unlikely(size != sizeof(id))) { 471 virtio_error(vdev, "received an incorrect cmd id"); 472 ret = false; 473 goto out; 474 } 475 if (id == dev->free_page_report_cmd_id) { 476 dev->free_page_report_status = FREE_PAGE_REPORT_S_START; 477 } else { 478 /* 479 * Stop the optimization only when it has started. This 480 * avoids a stale stop sign for the previous command. 481 */ 482 if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) { 483 dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP; 484 } 485 } 486 } 487 488 if (elem->in_num) { 489 if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) { 490 qemu_guest_free_page_hint(elem->in_sg[0].iov_base, 491 elem->in_sg[0].iov_len); 492 } 493 } 494 495 out: 496 virtqueue_push(vq, elem, 1); 497 g_free(elem); 498 return ret; 499 } 500 501 static void virtio_ballloon_get_free_page_hints(void *opaque) 502 { 503 VirtIOBalloon *dev = opaque; 504 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 505 VirtQueue *vq = dev->free_page_vq; 506 bool continue_to_get_hints; 507 508 do { 509 qemu_mutex_lock(&dev->free_page_lock); 510 virtio_queue_set_notification(vq, 0); 511 continue_to_get_hints = get_free_page_hints(dev); 512 qemu_mutex_unlock(&dev->free_page_lock); 513 virtio_notify(vdev, vq); 514 /* 515 * Start to poll the vq once the reporting started. Otherwise, continue 516 * only when there are entries on the vq, which need to be given back. 517 */ 518 } while (continue_to_get_hints || 519 dev->free_page_report_status == FREE_PAGE_REPORT_S_START); 520 virtio_queue_set_notification(vq, 1); 521 } 522 523 static bool virtio_balloon_free_page_support(void *opaque) 524 { 525 VirtIOBalloon *s = opaque; 526 VirtIODevice *vdev = VIRTIO_DEVICE(s); 527 528 return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT); 529 } 530 531 static void virtio_balloon_free_page_start(VirtIOBalloon *s) 532 { 533 VirtIODevice *vdev = VIRTIO_DEVICE(s); 534 535 /* For the stop and copy phase, we don't need to start the optimization */ 536 if (!vdev->vm_running) { 537 return; 538 } 539 540 if (s->free_page_report_cmd_id == UINT_MAX) { 541 s->free_page_report_cmd_id = 542 VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; 543 } else { 544 s->free_page_report_cmd_id++; 545 } 546 547 s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED; 548 virtio_notify_config(vdev); 549 } 550 551 static void virtio_balloon_free_page_stop(VirtIOBalloon *s) 552 { 553 VirtIODevice *vdev = VIRTIO_DEVICE(s); 554 555 if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) { 556 /* 557 * The lock also guarantees us that the 558 * virtio_ballloon_get_free_page_hints exits after the 559 * free_page_report_status is set to S_STOP. 560 */ 561 qemu_mutex_lock(&s->free_page_lock); 562 /* 563 * The guest hasn't done the reporting, so host sends a notification 564 * to the guest to actively stop the reporting. 565 */ 566 s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; 567 qemu_mutex_unlock(&s->free_page_lock); 568 virtio_notify_config(vdev); 569 } 570 } 571 572 static void virtio_balloon_free_page_done(VirtIOBalloon *s) 573 { 574 VirtIODevice *vdev = VIRTIO_DEVICE(s); 575 576 s->free_page_report_status = FREE_PAGE_REPORT_S_DONE; 577 virtio_notify_config(vdev); 578 } 579 580 static int 581 virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data) 582 { 583 VirtIOBalloon *dev = container_of(n, VirtIOBalloon, 584 free_page_report_notify); 585 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 586 PrecopyNotifyData *pnd = data; 587 588 if (!virtio_balloon_free_page_support(dev)) { 589 /* 590 * This is an optimization provided to migration, so just return 0 to 591 * have the normal migration process not affected when this feature is 592 * not supported. 593 */ 594 return 0; 595 } 596 597 switch (pnd->reason) { 598 case PRECOPY_NOTIFY_SETUP: 599 precopy_enable_free_page_optimization(); 600 break; 601 case PRECOPY_NOTIFY_COMPLETE: 602 case PRECOPY_NOTIFY_CLEANUP: 603 case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC: 604 virtio_balloon_free_page_stop(dev); 605 break; 606 case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: 607 if (vdev->vm_running) { 608 virtio_balloon_free_page_start(dev); 609 } else { 610 virtio_balloon_free_page_done(dev); 611 } 612 break; 613 default: 614 virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason); 615 } 616 617 return 0; 618 } 619 620 static size_t virtio_balloon_config_size(VirtIOBalloon *s) 621 { 622 uint64_t features = s->host_features; 623 624 if (s->qemu_4_0_config_size) { 625 return sizeof(struct virtio_balloon_config); 626 } 627 if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { 628 return sizeof(struct virtio_balloon_config); 629 } 630 if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 631 return offsetof(struct virtio_balloon_config, poison_val); 632 } 633 return offsetof(struct virtio_balloon_config, free_page_report_cmd_id); 634 } 635 636 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) 637 { 638 VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); 639 struct virtio_balloon_config config = {}; 640 641 config.num_pages = cpu_to_le32(dev->num_pages); 642 config.actual = cpu_to_le32(dev->actual); 643 644 if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { 645 config.free_page_report_cmd_id = 646 cpu_to_le32(dev->free_page_report_cmd_id); 647 } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) { 648 config.free_page_report_cmd_id = 649 cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP); 650 } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) { 651 config.free_page_report_cmd_id = 652 cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE); 653 } 654 655 trace_virtio_balloon_get_config(config.num_pages, config.actual); 656 memcpy(config_data, &config, virtio_balloon_config_size(dev)); 657 } 658 659 static int build_dimm_list(Object *obj, void *opaque) 660 { 661 GSList **list = opaque; 662 663 if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { 664 DeviceState *dev = DEVICE(obj); 665 if (dev->realized) { /* only realized DIMMs matter */ 666 *list = g_slist_prepend(*list, dev); 667 } 668 } 669 670 object_child_foreach(obj, build_dimm_list, opaque); 671 return 0; 672 } 673 674 static ram_addr_t get_current_ram_size(void) 675 { 676 GSList *list = NULL, *item; 677 ram_addr_t size = ram_size; 678 679 build_dimm_list(qdev_get_machine(), &list); 680 for (item = list; item; item = g_slist_next(item)) { 681 Object *obj = OBJECT(item->data); 682 if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) { 683 size += object_property_get_int(obj, PC_DIMM_SIZE_PROP, 684 &error_abort); 685 } 686 } 687 g_slist_free(list); 688 689 return size; 690 } 691 692 static void virtio_balloon_set_config(VirtIODevice *vdev, 693 const uint8_t *config_data) 694 { 695 VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); 696 struct virtio_balloon_config config; 697 uint32_t oldactual = dev->actual; 698 ram_addr_t vm_ram_size = get_current_ram_size(); 699 700 memcpy(&config, config_data, virtio_balloon_config_size(dev)); 701 dev->actual = le32_to_cpu(config.actual); 702 if (dev->actual != oldactual) { 703 qapi_event_send_balloon_change(vm_ram_size - 704 ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT)); 705 } 706 trace_virtio_balloon_set_config(dev->actual, oldactual); 707 } 708 709 static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, 710 Error **errp) 711 { 712 VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); 713 f |= dev->host_features; 714 virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ); 715 716 return f; 717 } 718 719 static void virtio_balloon_stat(void *opaque, BalloonInfo *info) 720 { 721 VirtIOBalloon *dev = opaque; 722 info->actual = get_current_ram_size() - ((uint64_t) dev->actual << 723 VIRTIO_BALLOON_PFN_SHIFT); 724 } 725 726 static void virtio_balloon_to_target(void *opaque, ram_addr_t target) 727 { 728 VirtIOBalloon *dev = VIRTIO_BALLOON(opaque); 729 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 730 ram_addr_t vm_ram_size = get_current_ram_size(); 731 732 if (target > vm_ram_size) { 733 target = vm_ram_size; 734 } 735 if (target) { 736 dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT; 737 virtio_notify_config(vdev); 738 } 739 trace_virtio_balloon_to_target(target, dev->num_pages); 740 } 741 742 static int virtio_balloon_post_load_device(void *opaque, int version_id) 743 { 744 VirtIOBalloon *s = VIRTIO_BALLOON(opaque); 745 746 if (balloon_stats_enabled(s)) { 747 balloon_stats_change_timer(s, s->stats_poll_interval); 748 } 749 return 0; 750 } 751 752 static const VMStateDescription vmstate_virtio_balloon_free_page_report = { 753 .name = "virtio-balloon-device/free-page-report", 754 .version_id = 1, 755 .minimum_version_id = 1, 756 .needed = virtio_balloon_free_page_support, 757 .fields = (VMStateField[]) { 758 VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon), 759 VMSTATE_UINT32(free_page_report_status, VirtIOBalloon), 760 VMSTATE_END_OF_LIST() 761 } 762 }; 763 764 static const VMStateDescription vmstate_virtio_balloon_device = { 765 .name = "virtio-balloon-device", 766 .version_id = 1, 767 .minimum_version_id = 1, 768 .post_load = virtio_balloon_post_load_device, 769 .fields = (VMStateField[]) { 770 VMSTATE_UINT32(num_pages, VirtIOBalloon), 771 VMSTATE_UINT32(actual, VirtIOBalloon), 772 VMSTATE_END_OF_LIST() 773 }, 774 .subsections = (const VMStateDescription * []) { 775 &vmstate_virtio_balloon_free_page_report, 776 NULL 777 } 778 }; 779 780 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) 781 { 782 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 783 VirtIOBalloon *s = VIRTIO_BALLOON(dev); 784 int ret; 785 786 virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, 787 virtio_balloon_config_size(s)); 788 789 ret = qemu_add_balloon_handler(virtio_balloon_to_target, 790 virtio_balloon_stat, s); 791 792 if (ret < 0) { 793 error_setg(errp, "Only one balloon device is supported"); 794 virtio_cleanup(vdev); 795 return; 796 } 797 798 s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); 799 s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); 800 s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); 801 802 if (virtio_has_feature(s->host_features, 803 VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 804 s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, 805 virtio_balloon_handle_free_page_vq); 806 s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; 807 s->free_page_report_cmd_id = 808 VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; 809 s->free_page_report_notify.notify = 810 virtio_balloon_free_page_report_notify; 811 precopy_add_notifier(&s->free_page_report_notify); 812 if (s->iothread) { 813 object_ref(OBJECT(s->iothread)); 814 s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), 815 virtio_ballloon_get_free_page_hints, s); 816 qemu_mutex_init(&s->free_page_lock); 817 qemu_cond_init(&s->free_page_cond); 818 s->block_iothread = false; 819 } else { 820 /* Simply disable this feature if the iothread wasn't created. */ 821 s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT); 822 virtio_error(vdev, "iothread is missing"); 823 } 824 } 825 reset_stats(s); 826 } 827 828 static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) 829 { 830 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 831 VirtIOBalloon *s = VIRTIO_BALLOON(dev); 832 833 if (virtio_balloon_free_page_support(s)) { 834 qemu_bh_delete(s->free_page_bh); 835 virtio_balloon_free_page_stop(s); 836 precopy_remove_notifier(&s->free_page_report_notify); 837 } 838 balloon_stats_destroy_timer(s); 839 qemu_remove_balloon_handler(s); 840 virtio_cleanup(vdev); 841 } 842 843 static void virtio_balloon_device_reset(VirtIODevice *vdev) 844 { 845 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 846 847 if (virtio_balloon_free_page_support(s)) { 848 virtio_balloon_free_page_stop(s); 849 } 850 851 if (s->stats_vq_elem != NULL) { 852 virtqueue_unpop(s->svq, s->stats_vq_elem, 0); 853 g_free(s->stats_vq_elem); 854 s->stats_vq_elem = NULL; 855 } 856 } 857 858 static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) 859 { 860 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 861 862 if (!s->stats_vq_elem && vdev->vm_running && 863 (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) { 864 /* poll stats queue for the element we have discarded when the VM 865 * was stopped */ 866 virtio_balloon_receive_stats(vdev, s->svq); 867 } 868 869 if (virtio_balloon_free_page_support(s)) { 870 /* 871 * The VM is woken up and the iothread was blocked, so signal it to 872 * continue. 873 */ 874 if (vdev->vm_running && s->block_iothread) { 875 qemu_mutex_lock(&s->free_page_lock); 876 s->block_iothread = false; 877 qemu_cond_signal(&s->free_page_cond); 878 qemu_mutex_unlock(&s->free_page_lock); 879 } 880 881 /* The VM is stopped, block the iothread. */ 882 if (!vdev->vm_running) { 883 qemu_mutex_lock(&s->free_page_lock); 884 s->block_iothread = true; 885 qemu_mutex_unlock(&s->free_page_lock); 886 } 887 } 888 } 889 890 static void virtio_balloon_instance_init(Object *obj) 891 { 892 VirtIOBalloon *s = VIRTIO_BALLOON(obj); 893 894 object_property_add(obj, "guest-stats", "guest statistics", 895 balloon_stats_get_all, NULL, NULL, s, NULL); 896 897 object_property_add(obj, "guest-stats-polling-interval", "int", 898 balloon_stats_get_poll_interval, 899 balloon_stats_set_poll_interval, 900 NULL, s, NULL); 901 } 902 903 static const VMStateDescription vmstate_virtio_balloon = { 904 .name = "virtio-balloon", 905 .minimum_version_id = 1, 906 .version_id = 1, 907 .fields = (VMStateField[]) { 908 VMSTATE_VIRTIO_DEVICE, 909 VMSTATE_END_OF_LIST() 910 }, 911 }; 912 913 static Property virtio_balloon_properties[] = { 914 DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features, 915 VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), 916 DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, 917 VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), 918 /* QEMU 4.0 accidentally changed the config size even when free-page-hint 919 * is disabled, resulting in QEMU 3.1 migration incompatibility. This 920 * property retains this quirk for QEMU 4.1 machine types. 921 */ 922 DEFINE_PROP_BOOL("qemu-4-0-config-size", VirtIOBalloon, 923 qemu_4_0_config_size, false), 924 DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD, 925 IOThread *), 926 DEFINE_PROP_END_OF_LIST(), 927 }; 928 929 static void virtio_balloon_class_init(ObjectClass *klass, void *data) 930 { 931 DeviceClass *dc = DEVICE_CLASS(klass); 932 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 933 934 dc->props = virtio_balloon_properties; 935 dc->vmsd = &vmstate_virtio_balloon; 936 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 937 vdc->realize = virtio_balloon_device_realize; 938 vdc->unrealize = virtio_balloon_device_unrealize; 939 vdc->reset = virtio_balloon_device_reset; 940 vdc->get_config = virtio_balloon_get_config; 941 vdc->set_config = virtio_balloon_set_config; 942 vdc->get_features = virtio_balloon_get_features; 943 vdc->set_status = virtio_balloon_set_status; 944 vdc->vmsd = &vmstate_virtio_balloon_device; 945 } 946 947 static const TypeInfo virtio_balloon_info = { 948 .name = TYPE_VIRTIO_BALLOON, 949 .parent = TYPE_VIRTIO_DEVICE, 950 .instance_size = sizeof(VirtIOBalloon), 951 .instance_init = virtio_balloon_instance_init, 952 .class_init = virtio_balloon_class_init, 953 }; 954 955 static void virtio_register_types(void) 956 { 957 type_register_static(&virtio_balloon_info); 958 } 959 960 type_init(virtio_register_types) 961