1 /* 2 * Virtio Balloon Device 3 * 4 * Copyright IBM, Corp. 2008 5 * Copyright (C) 2011 Red Hat, Inc. 6 * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com> 7 * 8 * Authors: 9 * Anthony Liguori <aliguori@us.ibm.com> 10 * 11 * This work is licensed under the terms of the GNU GPL, version 2. See 12 * the COPYING file in the top-level directory. 13 * 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/iov.h" 18 #include "qemu/module.h" 19 #include "qemu/timer.h" 20 #include "hw/virtio/virtio.h" 21 #include "hw/mem/pc-dimm.h" 22 #include "sysemu/balloon.h" 23 #include "hw/virtio/virtio-balloon.h" 24 #include "exec/address-spaces.h" 25 #include "qapi/error.h" 26 #include "qapi/qapi-events-misc.h" 27 #include "qapi/visitor.h" 28 #include "trace.h" 29 #include "qemu/error-report.h" 30 #include "migration/misc.h" 31 32 #include "hw/virtio/virtio-bus.h" 33 #include "hw/virtio/virtio-access.h" 34 35 #define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT) 36 37 struct PartiallyBalloonedPage { 38 RAMBlock *rb; 39 ram_addr_t base; 40 unsigned long bitmap[]; 41 }; 42 43 static void balloon_inflate_page(VirtIOBalloon *balloon, 44 MemoryRegion *mr, hwaddr offset) 45 { 46 void *addr = memory_region_get_ram_ptr(mr) + offset; 47 RAMBlock *rb; 48 size_t rb_page_size; 49 int subpages; 50 ram_addr_t ram_offset, host_page_base; 51 52 /* XXX is there a better way to get to the RAMBlock than via a 53 * host address? */ 54 rb = qemu_ram_block_from_host(addr, false, &ram_offset); 55 rb_page_size = qemu_ram_pagesize(rb); 56 host_page_base = ram_offset & ~(rb_page_size - 1); 57 58 if (rb_page_size == BALLOON_PAGE_SIZE) { 59 /* Easy case */ 60 61 ram_block_discard_range(rb, ram_offset, rb_page_size); 62 /* We ignore errors from ram_block_discard_range(), because it 63 * has already reported them, and failing to discard a balloon 64 * page is not fatal */ 65 return; 66 } 67 68 /* Hard case 69 * 70 * We've put a piece of a larger host page into the balloon - we 71 * need to keep track until we have a whole host page to 72 * discard 73 */ 74 warn_report_once( 75 "Balloon used with backing page size > 4kiB, this may not be reliable"); 76 77 subpages = rb_page_size / BALLOON_PAGE_SIZE; 78 79 if (balloon->pbp 80 && (rb != balloon->pbp->rb 81 || host_page_base != balloon->pbp->base)) { 82 /* We've partially ballooned part of a host page, but now 83 * we're trying to balloon part of a different one. Too hard, 84 * give up on the old partial page */ 85 g_free(balloon->pbp); 86 balloon->pbp = NULL; 87 } 88 89 if (!balloon->pbp) { 90 /* Starting on a new host page */ 91 size_t bitlen = BITS_TO_LONGS(subpages) * sizeof(unsigned long); 92 balloon->pbp = g_malloc0(sizeof(PartiallyBalloonedPage) + bitlen); 93 balloon->pbp->rb = rb; 94 balloon->pbp->base = host_page_base; 95 } 96 97 set_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, 98 balloon->pbp->bitmap); 99 100 if (bitmap_full(balloon->pbp->bitmap, subpages)) { 101 /* We've accumulated a full host page, we can actually discard 102 * it now */ 103 104 ram_block_discard_range(rb, balloon->pbp->base, rb_page_size); 105 /* We ignore errors from ram_block_discard_range(), because it 106 * has already reported them, and failing to discard a balloon 107 * page is not fatal */ 108 109 g_free(balloon->pbp); 110 balloon->pbp = NULL; 111 } 112 } 113 114 static void balloon_deflate_page(VirtIOBalloon *balloon, 115 MemoryRegion *mr, hwaddr offset) 116 { 117 void *addr = memory_region_get_ram_ptr(mr) + offset; 118 RAMBlock *rb; 119 size_t rb_page_size; 120 ram_addr_t ram_offset; 121 void *host_addr; 122 int ret; 123 124 /* XXX is there a better way to get to the RAMBlock than via a 125 * host address? */ 126 rb = qemu_ram_block_from_host(addr, false, &ram_offset); 127 rb_page_size = qemu_ram_pagesize(rb); 128 129 if (balloon->pbp) { 130 /* Let's play safe and always reset the pbp on deflation requests. */ 131 g_free(balloon->pbp); 132 balloon->pbp = NULL; 133 } 134 135 host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1)); 136 137 /* When a page is deflated, we hint the whole host page it lives 138 * on, since we can't do anything smaller */ 139 ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED); 140 if (ret != 0) { 141 warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s", 142 strerror(errno)); 143 /* Otherwise ignore, failing to page hint shouldn't be fatal */ 144 } 145 } 146 147 static const char *balloon_stat_names[] = { 148 [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in", 149 [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out", 150 [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults", 151 [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults", 152 [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory", 153 [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory", 154 [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory", 155 [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches", 156 [VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc", 157 [VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail", 158 [VIRTIO_BALLOON_S_NR] = NULL 159 }; 160 161 /* 162 * reset_stats - Mark all items in the stats array as unset 163 * 164 * This function needs to be called at device initialization and before 165 * updating to a set of newly-generated stats. This will ensure that no 166 * stale values stick around in case the guest reports a subset of the supported 167 * statistics. 168 */ 169 static inline void reset_stats(VirtIOBalloon *dev) 170 { 171 int i; 172 for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1); 173 } 174 175 static bool balloon_stats_supported(const VirtIOBalloon *s) 176 { 177 VirtIODevice *vdev = VIRTIO_DEVICE(s); 178 return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ); 179 } 180 181 static bool balloon_stats_enabled(const VirtIOBalloon *s) 182 { 183 return s->stats_poll_interval > 0; 184 } 185 186 static void balloon_stats_destroy_timer(VirtIOBalloon *s) 187 { 188 if (balloon_stats_enabled(s)) { 189 timer_del(s->stats_timer); 190 timer_free(s->stats_timer); 191 s->stats_timer = NULL; 192 s->stats_poll_interval = 0; 193 } 194 } 195 196 static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs) 197 { 198 timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000); 199 } 200 201 static void balloon_stats_poll_cb(void *opaque) 202 { 203 VirtIOBalloon *s = opaque; 204 VirtIODevice *vdev = VIRTIO_DEVICE(s); 205 206 if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) { 207 /* re-schedule */ 208 balloon_stats_change_timer(s, s->stats_poll_interval); 209 return; 210 } 211 212 virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset); 213 virtio_notify(vdev, s->svq); 214 g_free(s->stats_vq_elem); 215 s->stats_vq_elem = NULL; 216 } 217 218 static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name, 219 void *opaque, Error **errp) 220 { 221 Error *err = NULL; 222 VirtIOBalloon *s = opaque; 223 int i; 224 225 visit_start_struct(v, name, NULL, 0, &err); 226 if (err) { 227 goto out; 228 } 229 visit_type_int(v, "last-update", &s->stats_last_update, &err); 230 if (err) { 231 goto out_end; 232 } 233 234 visit_start_struct(v, "stats", NULL, 0, &err); 235 if (err) { 236 goto out_end; 237 } 238 for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) { 239 visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err); 240 if (err) { 241 goto out_nested; 242 } 243 } 244 visit_check_struct(v, &err); 245 out_nested: 246 visit_end_struct(v, NULL); 247 248 if (!err) { 249 visit_check_struct(v, &err); 250 } 251 out_end: 252 visit_end_struct(v, NULL); 253 out: 254 error_propagate(errp, err); 255 } 256 257 static void balloon_stats_get_poll_interval(Object *obj, Visitor *v, 258 const char *name, void *opaque, 259 Error **errp) 260 { 261 VirtIOBalloon *s = opaque; 262 visit_type_int(v, name, &s->stats_poll_interval, errp); 263 } 264 265 static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, 266 const char *name, void *opaque, 267 Error **errp) 268 { 269 VirtIOBalloon *s = opaque; 270 Error *local_err = NULL; 271 int64_t value; 272 273 visit_type_int(v, name, &value, &local_err); 274 if (local_err) { 275 error_propagate(errp, local_err); 276 return; 277 } 278 279 if (value < 0) { 280 error_setg(errp, "timer value must be greater than zero"); 281 return; 282 } 283 284 if (value > UINT32_MAX) { 285 error_setg(errp, "timer value is too big"); 286 return; 287 } 288 289 if (value == s->stats_poll_interval) { 290 return; 291 } 292 293 if (value == 0) { 294 /* timer=0 disables the timer */ 295 balloon_stats_destroy_timer(s); 296 return; 297 } 298 299 if (balloon_stats_enabled(s)) { 300 /* timer interval change */ 301 s->stats_poll_interval = value; 302 balloon_stats_change_timer(s, value); 303 return; 304 } 305 306 /* create a new timer */ 307 g_assert(s->stats_timer == NULL); 308 s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s); 309 s->stats_poll_interval = value; 310 balloon_stats_change_timer(s, 0); 311 } 312 313 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) 314 { 315 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 316 VirtQueueElement *elem; 317 MemoryRegionSection section; 318 319 for (;;) { 320 size_t offset = 0; 321 uint32_t pfn; 322 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 323 if (!elem) { 324 return; 325 } 326 327 while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) { 328 unsigned int p = virtio_ldl_p(vdev, &pfn); 329 hwaddr pa; 330 331 pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT; 332 offset += 4; 333 334 section = memory_region_find(get_system_memory(), pa, 335 BALLOON_PAGE_SIZE); 336 if (!section.mr) { 337 trace_virtio_balloon_bad_addr(pa); 338 continue; 339 } 340 if (!memory_region_is_ram(section.mr) || 341 memory_region_is_rom(section.mr) || 342 memory_region_is_romd(section.mr)) { 343 trace_virtio_balloon_bad_addr(pa); 344 memory_region_unref(section.mr); 345 continue; 346 } 347 348 trace_virtio_balloon_handle_output(memory_region_name(section.mr), 349 pa); 350 if (!qemu_balloon_is_inhibited()) { 351 if (vq == s->ivq) { 352 balloon_inflate_page(s, section.mr, 353 section.offset_within_region); 354 } else if (vq == s->dvq) { 355 balloon_deflate_page(s, section.mr, section.offset_within_region); 356 } else { 357 g_assert_not_reached(); 358 } 359 } 360 memory_region_unref(section.mr); 361 } 362 363 virtqueue_push(vq, elem, offset); 364 virtio_notify(vdev, vq); 365 g_free(elem); 366 } 367 } 368 369 static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) 370 { 371 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 372 VirtQueueElement *elem; 373 VirtIOBalloonStat stat; 374 size_t offset = 0; 375 qemu_timeval tv; 376 377 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 378 if (!elem) { 379 goto out; 380 } 381 382 if (s->stats_vq_elem != NULL) { 383 /* This should never happen if the driver follows the spec. */ 384 virtqueue_push(vq, s->stats_vq_elem, 0); 385 virtio_notify(vdev, vq); 386 g_free(s->stats_vq_elem); 387 } 388 389 s->stats_vq_elem = elem; 390 391 /* Initialize the stats to get rid of any stale values. This is only 392 * needed to handle the case where a guest supports fewer stats than it 393 * used to (ie. it has booted into an old kernel). 394 */ 395 reset_stats(s); 396 397 while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat)) 398 == sizeof(stat)) { 399 uint16_t tag = virtio_tswap16(vdev, stat.tag); 400 uint64_t val = virtio_tswap64(vdev, stat.val); 401 402 offset += sizeof(stat); 403 if (tag < VIRTIO_BALLOON_S_NR) 404 s->stats[tag] = val; 405 } 406 s->stats_vq_offset = offset; 407 408 if (qemu_gettimeofday(&tv) < 0) { 409 warn_report("%s: failed to get time of day", __func__); 410 goto out; 411 } 412 413 s->stats_last_update = tv.tv_sec; 414 415 out: 416 if (balloon_stats_enabled(s)) { 417 balloon_stats_change_timer(s, s->stats_poll_interval); 418 } 419 } 420 421 static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev, 422 VirtQueue *vq) 423 { 424 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 425 qemu_bh_schedule(s->free_page_bh); 426 } 427 428 static bool get_free_page_hints(VirtIOBalloon *dev) 429 { 430 VirtQueueElement *elem; 431 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 432 VirtQueue *vq = dev->free_page_vq; 433 bool ret = true; 434 435 while (dev->block_iothread) { 436 qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock); 437 } 438 439 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 440 if (!elem) { 441 return false; 442 } 443 444 if (elem->out_num) { 445 uint32_t id; 446 size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0, 447 &id, sizeof(id)); 448 449 virtio_tswap32s(vdev, &id); 450 if (unlikely(size != sizeof(id))) { 451 virtio_error(vdev, "received an incorrect cmd id"); 452 ret = false; 453 goto out; 454 } 455 if (id == dev->free_page_report_cmd_id) { 456 dev->free_page_report_status = FREE_PAGE_REPORT_S_START; 457 } else { 458 /* 459 * Stop the optimization only when it has started. This 460 * avoids a stale stop sign for the previous command. 461 */ 462 if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) { 463 dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP; 464 } 465 } 466 } 467 468 if (elem->in_num) { 469 if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) { 470 qemu_guest_free_page_hint(elem->in_sg[0].iov_base, 471 elem->in_sg[0].iov_len); 472 } 473 } 474 475 out: 476 virtqueue_push(vq, elem, 1); 477 g_free(elem); 478 return ret; 479 } 480 481 static void virtio_ballloon_get_free_page_hints(void *opaque) 482 { 483 VirtIOBalloon *dev = opaque; 484 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 485 VirtQueue *vq = dev->free_page_vq; 486 bool continue_to_get_hints; 487 488 do { 489 qemu_mutex_lock(&dev->free_page_lock); 490 virtio_queue_set_notification(vq, 0); 491 continue_to_get_hints = get_free_page_hints(dev); 492 qemu_mutex_unlock(&dev->free_page_lock); 493 virtio_notify(vdev, vq); 494 /* 495 * Start to poll the vq once the reporting started. Otherwise, continue 496 * only when there are entries on the vq, which need to be given back. 497 */ 498 } while (continue_to_get_hints || 499 dev->free_page_report_status == FREE_PAGE_REPORT_S_START); 500 virtio_queue_set_notification(vq, 1); 501 } 502 503 static bool virtio_balloon_free_page_support(void *opaque) 504 { 505 VirtIOBalloon *s = opaque; 506 VirtIODevice *vdev = VIRTIO_DEVICE(s); 507 508 return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT); 509 } 510 511 static void virtio_balloon_free_page_start(VirtIOBalloon *s) 512 { 513 VirtIODevice *vdev = VIRTIO_DEVICE(s); 514 515 /* For the stop and copy phase, we don't need to start the optimization */ 516 if (!vdev->vm_running) { 517 return; 518 } 519 520 if (s->free_page_report_cmd_id == UINT_MAX) { 521 s->free_page_report_cmd_id = 522 VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; 523 } else { 524 s->free_page_report_cmd_id++; 525 } 526 527 s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED; 528 virtio_notify_config(vdev); 529 } 530 531 static void virtio_balloon_free_page_stop(VirtIOBalloon *s) 532 { 533 VirtIODevice *vdev = VIRTIO_DEVICE(s); 534 535 if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) { 536 /* 537 * The lock also guarantees us that the 538 * virtio_ballloon_get_free_page_hints exits after the 539 * free_page_report_status is set to S_STOP. 540 */ 541 qemu_mutex_lock(&s->free_page_lock); 542 /* 543 * The guest hasn't done the reporting, so host sends a notification 544 * to the guest to actively stop the reporting. 545 */ 546 s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; 547 qemu_mutex_unlock(&s->free_page_lock); 548 virtio_notify_config(vdev); 549 } 550 } 551 552 static void virtio_balloon_free_page_done(VirtIOBalloon *s) 553 { 554 VirtIODevice *vdev = VIRTIO_DEVICE(s); 555 556 s->free_page_report_status = FREE_PAGE_REPORT_S_DONE; 557 virtio_notify_config(vdev); 558 } 559 560 static int 561 virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data) 562 { 563 VirtIOBalloon *dev = container_of(n, VirtIOBalloon, 564 free_page_report_notify); 565 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 566 PrecopyNotifyData *pnd = data; 567 568 if (!virtio_balloon_free_page_support(dev)) { 569 /* 570 * This is an optimization provided to migration, so just return 0 to 571 * have the normal migration process not affected when this feature is 572 * not supported. 573 */ 574 return 0; 575 } 576 577 switch (pnd->reason) { 578 case PRECOPY_NOTIFY_SETUP: 579 precopy_enable_free_page_optimization(); 580 break; 581 case PRECOPY_NOTIFY_COMPLETE: 582 case PRECOPY_NOTIFY_CLEANUP: 583 case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC: 584 virtio_balloon_free_page_stop(dev); 585 break; 586 case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: 587 if (vdev->vm_running) { 588 virtio_balloon_free_page_start(dev); 589 } else { 590 virtio_balloon_free_page_done(dev); 591 } 592 break; 593 default: 594 virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason); 595 } 596 597 return 0; 598 } 599 600 static size_t virtio_balloon_config_size(VirtIOBalloon *s) 601 { 602 uint64_t features = s->host_features; 603 604 if (s->qemu_4_0_config_size) { 605 return sizeof(struct virtio_balloon_config); 606 } 607 if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { 608 return sizeof(struct virtio_balloon_config); 609 } 610 if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 611 return offsetof(struct virtio_balloon_config, poison_val); 612 } 613 return offsetof(struct virtio_balloon_config, free_page_report_cmd_id); 614 } 615 616 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) 617 { 618 VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); 619 struct virtio_balloon_config config = {}; 620 621 config.num_pages = cpu_to_le32(dev->num_pages); 622 config.actual = cpu_to_le32(dev->actual); 623 624 if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { 625 config.free_page_report_cmd_id = 626 cpu_to_le32(dev->free_page_report_cmd_id); 627 } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) { 628 config.free_page_report_cmd_id = 629 cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP); 630 } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) { 631 config.free_page_report_cmd_id = 632 cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE); 633 } 634 635 trace_virtio_balloon_get_config(config.num_pages, config.actual); 636 memcpy(config_data, &config, virtio_balloon_config_size(dev)); 637 } 638 639 static int build_dimm_list(Object *obj, void *opaque) 640 { 641 GSList **list = opaque; 642 643 if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { 644 DeviceState *dev = DEVICE(obj); 645 if (dev->realized) { /* only realized DIMMs matter */ 646 *list = g_slist_prepend(*list, dev); 647 } 648 } 649 650 object_child_foreach(obj, build_dimm_list, opaque); 651 return 0; 652 } 653 654 static ram_addr_t get_current_ram_size(void) 655 { 656 GSList *list = NULL, *item; 657 ram_addr_t size = ram_size; 658 659 build_dimm_list(qdev_get_machine(), &list); 660 for (item = list; item; item = g_slist_next(item)) { 661 Object *obj = OBJECT(item->data); 662 if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) { 663 size += object_property_get_int(obj, PC_DIMM_SIZE_PROP, 664 &error_abort); 665 } 666 } 667 g_slist_free(list); 668 669 return size; 670 } 671 672 static void virtio_balloon_set_config(VirtIODevice *vdev, 673 const uint8_t *config_data) 674 { 675 VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); 676 struct virtio_balloon_config config; 677 uint32_t oldactual = dev->actual; 678 ram_addr_t vm_ram_size = get_current_ram_size(); 679 680 memcpy(&config, config_data, virtio_balloon_config_size(dev)); 681 dev->actual = le32_to_cpu(config.actual); 682 if (dev->actual != oldactual) { 683 qapi_event_send_balloon_change(vm_ram_size - 684 ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT)); 685 } 686 trace_virtio_balloon_set_config(dev->actual, oldactual); 687 } 688 689 static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, 690 Error **errp) 691 { 692 VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); 693 f |= dev->host_features; 694 virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ); 695 696 return f; 697 } 698 699 static void virtio_balloon_stat(void *opaque, BalloonInfo *info) 700 { 701 VirtIOBalloon *dev = opaque; 702 info->actual = get_current_ram_size() - ((uint64_t) dev->actual << 703 VIRTIO_BALLOON_PFN_SHIFT); 704 } 705 706 static void virtio_balloon_to_target(void *opaque, ram_addr_t target) 707 { 708 VirtIOBalloon *dev = VIRTIO_BALLOON(opaque); 709 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 710 ram_addr_t vm_ram_size = get_current_ram_size(); 711 712 if (target > vm_ram_size) { 713 target = vm_ram_size; 714 } 715 if (target) { 716 dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT; 717 virtio_notify_config(vdev); 718 } 719 trace_virtio_balloon_to_target(target, dev->num_pages); 720 } 721 722 static int virtio_balloon_post_load_device(void *opaque, int version_id) 723 { 724 VirtIOBalloon *s = VIRTIO_BALLOON(opaque); 725 726 if (balloon_stats_enabled(s)) { 727 balloon_stats_change_timer(s, s->stats_poll_interval); 728 } 729 return 0; 730 } 731 732 static const VMStateDescription vmstate_virtio_balloon_free_page_report = { 733 .name = "virtio-balloon-device/free-page-report", 734 .version_id = 1, 735 .minimum_version_id = 1, 736 .needed = virtio_balloon_free_page_support, 737 .fields = (VMStateField[]) { 738 VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon), 739 VMSTATE_UINT32(free_page_report_status, VirtIOBalloon), 740 VMSTATE_END_OF_LIST() 741 } 742 }; 743 744 static const VMStateDescription vmstate_virtio_balloon_device = { 745 .name = "virtio-balloon-device", 746 .version_id = 1, 747 .minimum_version_id = 1, 748 .post_load = virtio_balloon_post_load_device, 749 .fields = (VMStateField[]) { 750 VMSTATE_UINT32(num_pages, VirtIOBalloon), 751 VMSTATE_UINT32(actual, VirtIOBalloon), 752 VMSTATE_END_OF_LIST() 753 }, 754 .subsections = (const VMStateDescription * []) { 755 &vmstate_virtio_balloon_free_page_report, 756 NULL 757 } 758 }; 759 760 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) 761 { 762 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 763 VirtIOBalloon *s = VIRTIO_BALLOON(dev); 764 int ret; 765 766 virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, 767 virtio_balloon_config_size(s)); 768 769 ret = qemu_add_balloon_handler(virtio_balloon_to_target, 770 virtio_balloon_stat, s); 771 772 if (ret < 0) { 773 error_setg(errp, "Only one balloon device is supported"); 774 virtio_cleanup(vdev); 775 return; 776 } 777 778 s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); 779 s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); 780 s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); 781 782 if (virtio_has_feature(s->host_features, 783 VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 784 s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, 785 virtio_balloon_handle_free_page_vq); 786 s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; 787 s->free_page_report_cmd_id = 788 VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; 789 s->free_page_report_notify.notify = 790 virtio_balloon_free_page_report_notify; 791 precopy_add_notifier(&s->free_page_report_notify); 792 if (s->iothread) { 793 object_ref(OBJECT(s->iothread)); 794 s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), 795 virtio_ballloon_get_free_page_hints, s); 796 qemu_mutex_init(&s->free_page_lock); 797 qemu_cond_init(&s->free_page_cond); 798 s->block_iothread = false; 799 } else { 800 /* Simply disable this feature if the iothread wasn't created. */ 801 s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT); 802 virtio_error(vdev, "iothread is missing"); 803 } 804 } 805 reset_stats(s); 806 } 807 808 static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) 809 { 810 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 811 VirtIOBalloon *s = VIRTIO_BALLOON(dev); 812 813 if (virtio_balloon_free_page_support(s)) { 814 qemu_bh_delete(s->free_page_bh); 815 virtio_balloon_free_page_stop(s); 816 precopy_remove_notifier(&s->free_page_report_notify); 817 } 818 balloon_stats_destroy_timer(s); 819 qemu_remove_balloon_handler(s); 820 virtio_cleanup(vdev); 821 } 822 823 static void virtio_balloon_device_reset(VirtIODevice *vdev) 824 { 825 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 826 827 if (virtio_balloon_free_page_support(s)) { 828 virtio_balloon_free_page_stop(s); 829 } 830 831 if (s->stats_vq_elem != NULL) { 832 virtqueue_unpop(s->svq, s->stats_vq_elem, 0); 833 g_free(s->stats_vq_elem); 834 s->stats_vq_elem = NULL; 835 } 836 } 837 838 static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) 839 { 840 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); 841 842 if (!s->stats_vq_elem && vdev->vm_running && 843 (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) { 844 /* poll stats queue for the element we have discarded when the VM 845 * was stopped */ 846 virtio_balloon_receive_stats(vdev, s->svq); 847 } 848 849 if (virtio_balloon_free_page_support(s)) { 850 /* 851 * The VM is woken up and the iothread was blocked, so signal it to 852 * continue. 853 */ 854 if (vdev->vm_running && s->block_iothread) { 855 qemu_mutex_lock(&s->free_page_lock); 856 s->block_iothread = false; 857 qemu_cond_signal(&s->free_page_cond); 858 qemu_mutex_unlock(&s->free_page_lock); 859 } 860 861 /* The VM is stopped, block the iothread. */ 862 if (!vdev->vm_running) { 863 qemu_mutex_lock(&s->free_page_lock); 864 s->block_iothread = true; 865 qemu_mutex_unlock(&s->free_page_lock); 866 } 867 } 868 } 869 870 static void virtio_balloon_instance_init(Object *obj) 871 { 872 VirtIOBalloon *s = VIRTIO_BALLOON(obj); 873 874 object_property_add(obj, "guest-stats", "guest statistics", 875 balloon_stats_get_all, NULL, NULL, s, NULL); 876 877 object_property_add(obj, "guest-stats-polling-interval", "int", 878 balloon_stats_get_poll_interval, 879 balloon_stats_set_poll_interval, 880 NULL, s, NULL); 881 } 882 883 static const VMStateDescription vmstate_virtio_balloon = { 884 .name = "virtio-balloon", 885 .minimum_version_id = 1, 886 .version_id = 1, 887 .fields = (VMStateField[]) { 888 VMSTATE_VIRTIO_DEVICE, 889 VMSTATE_END_OF_LIST() 890 }, 891 }; 892 893 static Property virtio_balloon_properties[] = { 894 DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features, 895 VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), 896 DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, 897 VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), 898 /* QEMU 4.0 accidentally changed the config size even when free-page-hint 899 * is disabled, resulting in QEMU 3.1 migration incompatibility. This 900 * property retains this quirk for QEMU 4.1 machine types. 901 */ 902 DEFINE_PROP_BOOL("qemu-4-0-config-size", VirtIOBalloon, 903 qemu_4_0_config_size, false), 904 DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD, 905 IOThread *), 906 DEFINE_PROP_END_OF_LIST(), 907 }; 908 909 static void virtio_balloon_class_init(ObjectClass *klass, void *data) 910 { 911 DeviceClass *dc = DEVICE_CLASS(klass); 912 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 913 914 dc->props = virtio_balloon_properties; 915 dc->vmsd = &vmstate_virtio_balloon; 916 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 917 vdc->realize = virtio_balloon_device_realize; 918 vdc->unrealize = virtio_balloon_device_unrealize; 919 vdc->reset = virtio_balloon_device_reset; 920 vdc->get_config = virtio_balloon_get_config; 921 vdc->set_config = virtio_balloon_set_config; 922 vdc->get_features = virtio_balloon_get_features; 923 vdc->set_status = virtio_balloon_set_status; 924 vdc->vmsd = &vmstate_virtio_balloon_device; 925 } 926 927 static const TypeInfo virtio_balloon_info = { 928 .name = TYPE_VIRTIO_BALLOON, 929 .parent = TYPE_VIRTIO_DEVICE, 930 .instance_size = sizeof(VirtIOBalloon), 931 .instance_init = virtio_balloon_instance_init, 932 .class_init = virtio_balloon_class_init, 933 }; 934 935 static void virtio_register_types(void) 936 { 937 type_register_static(&virtio_balloon_info); 938 } 939 940 type_init(virtio_register_types) 941