1 /* 2 * QEMU Hyper-V Dynamic Memory Protocol driver 3 * 4 * Copyright (C) 2020-2023 Oracle and/or its affiliates. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "hv-balloon-internal.h" 12 13 #include "system/address-spaces.h" 14 #include "exec/cpu-common.h" 15 #include "system/ramblock.h" 16 #include "hw/boards.h" 17 #include "hw/hyperv/dynmem-proto.h" 18 #include "hw/hyperv/hv-balloon.h" 19 #include "hw/hyperv/vmbus.h" 20 #include "hw/mem/memory-device.h" 21 #include "hw/mem/pc-dimm.h" 22 #include "hw/qdev-core.h" 23 #include "hw/qdev-properties.h" 24 #include "monitor/qdev.h" 25 #include "qapi/error.h" 26 #include "qapi/qapi-commands-machine.h" 27 #include "qapi/qapi-events-machine.h" 28 #include "qapi/qapi-types-machine.h" 29 #include "qobject/qdict.h" 30 #include "qapi/visitor.h" 31 #include "qemu/error-report.h" 32 #include "qemu/module.h" 33 #include "qemu/units.h" 34 #include "qemu/timer.h" 35 #include "system/balloon.h" 36 #include "system/hostmem.h" 37 #include "system/reset.h" 38 #include "hv-balloon-our_range_memslots.h" 39 #include "hv-balloon-page_range_tree.h" 40 #include "trace.h" 41 42 #define HV_BALLOON_ADDR_PROP "addr" 43 #define HV_BALLOON_MEMDEV_PROP "memdev" 44 #define HV_BALLOON_GUID "525074DC-8985-46e2-8057-A307DC18A502" 45 46 /* 47 * Some Windows versions (at least Server 2019) will crash with various 48 * error codes when receiving DM protocol requests (at least 49 * DM_MEM_HOT_ADD_REQUEST) immediately after boot. 50 * 51 * It looks like Hyper-V from Server 2016 uses a 50-second after-boot 52 * delay, probably to workaround this issue, so we'll use this value, too. 53 */ 54 #define HV_BALLOON_POST_INIT_WAIT (50 * 1000) 55 56 #define HV_BALLOON_HA_CHUNK_SIZE (2 * GiB) 57 #define HV_BALLOON_HA_CHUNK_PAGES (HV_BALLOON_HA_CHUNK_SIZE / HV_BALLOON_PAGE_SIZE) 58 59 #define HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN (128 * MiB) 60 61 #define HV_BALLOON_HR_CHUNK_PAGES 585728 62 /* 63 * ^ that's the maximum number of pages 64 * that Windows returns in one hot remove response 65 * 66 * If the number requested is too high Windows will no longer honor 67 * these requests 68 */ 69 70 typedef enum State { 71 /* not a real state */ 72 S_NO_CHANGE = 0, 73 74 S_WAIT_RESET, 75 S_POST_RESET_CLOSED, 76 77 /* init flow */ 78 S_VERSION, 79 S_CAPS, 80 S_POST_INIT_WAIT, 81 82 S_IDLE, 83 84 /* balloon op flow */ 85 S_BALLOON_POSTING, 86 S_BALLOON_RB_WAIT, 87 S_BALLOON_REPLY_WAIT, 88 89 /* unballoon + hot add ops flow */ 90 S_UNBALLOON_POSTING, 91 S_UNBALLOON_RB_WAIT, 92 S_UNBALLOON_REPLY_WAIT, 93 S_HOT_ADD_SETUP, 94 S_HOT_ADD_RB_WAIT, 95 S_HOT_ADD_POSTING, 96 S_HOT_ADD_REPLY_WAIT, 97 } State; 98 99 typedef struct StateDesc { 100 State state; 101 const char *desc; 102 } StateDesc; 103 104 typedef struct HvBalloon { 105 VMBusDevice parent; 106 State state; 107 108 union dm_version version; 109 union dm_caps caps; 110 111 QEMUTimer post_init_timer; 112 113 unsigned int trans_id; 114 115 struct { 116 bool enabled; 117 bool received; 118 uint64_t committed; 119 uint64_t available; 120 } status_report; 121 122 /* Guest target size */ 123 uint64_t target; 124 bool target_changed; 125 126 /* Current (un)balloon / hot-add operation parameters */ 127 union { 128 uint64_t balloon_diff; 129 130 struct { 131 uint64_t unballoon_diff; 132 uint64_t hot_add_diff; 133 }; 134 135 struct { 136 PageRange hot_add_range; 137 uint64_t ha_current_count; 138 }; 139 }; 140 141 OurRangeMemslots *our_range; 142 143 /* Count of memslots covering our memory */ 144 unsigned int memslot_count; 145 146 /* Nominal size of each memslot (the last one might be smaller) */ 147 uint64_t memslot_size; 148 149 /* Non-ours removed memory */ 150 PageRangeTree removed_guest, removed_both; 151 152 /* Grand totals of removed memory (both ours and non-ours) */ 153 uint64_t removed_guest_ctr, removed_both_ctr; 154 155 /* MEMORY_DEVICE props */ 156 uint64_t addr; 157 HostMemoryBackend *hostmem; 158 MemoryRegion *mr; 159 } HvBalloon; 160 161 OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, \ 162 HV_BALLOON, VMBUS_DEVICE, \ 163 { TYPE_MEMORY_DEVICE }, { }) 164 165 #define HV_BALLOON_SET_STATE(hvb, news) \ 166 do { \ 167 assert(news != S_NO_CHANGE); \ 168 hv_balloon_state_set(hvb, news, # news); \ 169 } while (0) 170 171 #define HV_BALLOON_STATE_DESC_SET(stdesc, news) \ 172 _hv_balloon_state_desc_set(stdesc, news, # news) 173 174 #define HV_BALLOON_STATE_DESC_INIT \ 175 { \ 176 .state = S_NO_CHANGE, \ 177 } 178 179 typedef struct HvBalloonReq { 180 VMBusChanReq vmreq; 181 } HvBalloonReq; 182 183 /* total our memory includes parts currently removed from the guest */ 184 static uint64_t hv_balloon_total_our_ram(HvBalloon *balloon) 185 { 186 if (!balloon->our_range) { 187 return 0; 188 } 189 190 return balloon->our_range->range.added; 191 } 192 193 /* TODO: unify the code below with virtio-balloon and cache the value */ 194 static int build_dimm_list(Object *obj, void *opaque) 195 { 196 GSList **list = opaque; 197 198 if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { 199 DeviceState *dev = DEVICE(obj); 200 if (dev->realized) { /* only realized DIMMs matter */ 201 *list = g_slist_prepend(*list, dev); 202 } 203 } 204 205 object_child_foreach(obj, build_dimm_list, opaque); 206 return 0; 207 } 208 209 static ram_addr_t get_current_ram_size(void) 210 { 211 GSList *list = NULL, *item; 212 ram_addr_t size = current_machine->ram_size; 213 214 build_dimm_list(qdev_get_machine(), &list); 215 for (item = list; item; item = g_slist_next(item)) { 216 Object *obj = OBJECT(item->data); 217 if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) 218 size += object_property_get_int(obj, PC_DIMM_SIZE_PROP, 219 &error_abort); 220 } 221 g_slist_free(list); 222 223 return size; 224 } 225 226 /* total RAM includes memory currently removed from the guest */ 227 static uint64_t hv_balloon_total_ram(HvBalloon *balloon) 228 { 229 ram_addr_t ram_size = get_current_ram_size(); 230 uint64_t ram_size_pages = ram_size >> HV_BALLOON_PFN_SHIFT; 231 uint64_t our_ram_size_pages = hv_balloon_total_our_ram(balloon); 232 233 assert(ram_size_pages > 0); 234 235 return SUM_SATURATE_U64(ram_size_pages, our_ram_size_pages); 236 } 237 238 /* 239 * calculating the total RAM size is a slow operation, 240 * avoid it as much as possible 241 */ 242 static uint64_t hv_balloon_total_removed_rs(HvBalloon *balloon, 243 uint64_t ram_size_pages) 244 { 245 uint64_t total_removed; 246 247 total_removed = SUM_SATURATE_U64(balloon->removed_guest_ctr, 248 balloon->removed_both_ctr); 249 250 /* possible if guest returns pages outside actual RAM */ 251 if (total_removed > ram_size_pages) { 252 total_removed = ram_size_pages; 253 } 254 255 return total_removed; 256 } 257 258 /* Returns whether the state has actually changed */ 259 static bool hv_balloon_state_set(HvBalloon *balloon, 260 State newst, const char *newststr) 261 { 262 if (newst == S_NO_CHANGE || balloon->state == newst) { 263 return false; 264 } 265 266 balloon->state = newst; 267 trace_hv_balloon_state_change(newststr); 268 return true; 269 } 270 271 static void _hv_balloon_state_desc_set(StateDesc *stdesc, 272 State newst, const char *newststr) 273 { 274 /* state setting is only permitted on a freshly init desc */ 275 assert(stdesc->state == S_NO_CHANGE); 276 277 assert(newst != S_NO_CHANGE); 278 279 stdesc->state = newst; 280 stdesc->desc = newststr; 281 } 282 283 static VMBusChannel *hv_balloon_get_channel_maybe(HvBalloon *balloon) 284 { 285 return vmbus_device_channel(&balloon->parent, 0); 286 } 287 288 static VMBusChannel *hv_balloon_get_channel(HvBalloon *balloon) 289 { 290 VMBusChannel *chan; 291 292 chan = hv_balloon_get_channel_maybe(balloon); 293 assert(chan != NULL); 294 return chan; 295 } 296 297 static ssize_t hv_balloon_send_packet(VMBusChannel *chan, 298 struct dm_message *msg) 299 { 300 int ret; 301 302 ret = vmbus_channel_reserve(chan, 0, msg->hdr.size); 303 if (ret < 0) { 304 return ret; 305 } 306 307 return vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND, 308 NULL, 0, msg, msg->hdr.size, false, 309 msg->hdr.trans_id); 310 } 311 312 static bool hv_balloon_unballoon_get_source(HvBalloon *balloon, 313 PageRangeTree *dtree, 314 uint64_t **dctr, 315 bool *is_our_range) 316 { 317 OurRange *our_range = OUR_RANGE(balloon->our_range); 318 319 /* Try the boot memory first */ 320 if (g_tree_nnodes(balloon->removed_guest.t) > 0) { 321 *dtree = balloon->removed_guest; 322 *dctr = &balloon->removed_guest_ctr; 323 *is_our_range = false; 324 } else if (g_tree_nnodes(balloon->removed_both.t) > 0) { 325 *dtree = balloon->removed_both; 326 *dctr = &balloon->removed_both_ctr; 327 *is_our_range = false; 328 } else if (!our_range) { 329 return false; 330 } else if (!our_range_is_removed_tree_empty(our_range, false)) { 331 *dtree = our_range_get_removed_tree(our_range, false); 332 *dctr = &balloon->removed_guest_ctr; 333 *is_our_range = true; 334 } else if (!our_range_is_removed_tree_empty(our_range, true)) { 335 *dtree = our_range_get_removed_tree(our_range, true); 336 *dctr = &balloon->removed_both_ctr; 337 *is_our_range = true; 338 } else { 339 return false; 340 } 341 342 return true; 343 } 344 345 static void hv_balloon_unballoon_rb_wait(HvBalloon *balloon, StateDesc *stdesc) 346 { 347 VMBusChannel *chan = hv_balloon_get_channel(balloon); 348 struct dm_unballoon_request *ur; 349 size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]); 350 351 assert(balloon->state == S_UNBALLOON_RB_WAIT); 352 353 if (vmbus_channel_reserve(chan, 0, ur_size) < 0) { 354 return; 355 } 356 357 HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_POSTING); 358 } 359 360 static void hv_balloon_unballoon_posting(HvBalloon *balloon, StateDesc *stdesc) 361 { 362 VMBusChannel *chan = hv_balloon_get_channel(balloon); 363 PageRangeTree dtree; 364 uint64_t *dctr; 365 bool our_range; 366 g_autofree struct dm_unballoon_request *ur = NULL; 367 size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]); 368 PageRange range; 369 bool bret; 370 ssize_t ret; 371 372 assert(balloon->state == S_UNBALLOON_POSTING); 373 assert(balloon->unballoon_diff > 0); 374 375 if (!hv_balloon_unballoon_get_source(balloon, &dtree, &dctr, &our_range)) { 376 error_report("trying to unballoon but nothing seems to be ballooned"); 377 /* 378 * there is little we can do as we might have already 379 * sent the guest a partial request we can't cancel 380 */ 381 return; 382 } 383 384 assert(balloon->our_range || !our_range); 385 assert(dtree.t); 386 assert(dctr); 387 388 ur = g_malloc0(ur_size); 389 ur->hdr.type = DM_UNBALLOON_REQUEST; 390 ur->hdr.size = ur_size; 391 ur->hdr.trans_id = balloon->trans_id; 392 393 bret = hvb_page_range_tree_pop(dtree, &range, MIN(balloon->unballoon_diff, 394 HV_BALLOON_HA_CHUNK_PAGES)); 395 assert(bret); 396 /* TODO: madvise? */ 397 398 *dctr -= range.count; 399 balloon->unballoon_diff -= range.count; 400 401 ur->range_count = 1; 402 ur->range_array[0].finfo.start_page = range.start; 403 ur->range_array[0].finfo.page_cnt = range.count; 404 ur->more_pages = balloon->unballoon_diff > 0; 405 406 trace_hv_balloon_outgoing_unballoon(ur->hdr.trans_id, 407 range.count, range.start, 408 balloon->unballoon_diff); 409 410 if (ur->more_pages) { 411 HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT); 412 } else { 413 HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_REPLY_WAIT); 414 } 415 416 ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND, 417 NULL, 0, ur, ur_size, false, 418 ur->hdr.trans_id); 419 if (ret <= 0) { 420 error_report("error %zd when posting unballoon msg, expect problems", 421 ret); 422 } 423 } 424 425 static bool hv_balloon_our_range_ensure(HvBalloon *balloon) 426 { 427 uint64_t align; 428 MemoryRegion *hostmem_mr; 429 g_autoptr(OurRangeMemslots) our_range_memslots = NULL; 430 OurRange *our_range; 431 432 if (balloon->our_range) { 433 return true; 434 } 435 436 if (!balloon->hostmem) { 437 return false; 438 } 439 440 align = (1 << balloon->caps.cap_bits.hot_add_alignment) * MiB; 441 assert(QEMU_IS_ALIGNED(balloon->addr, align)); 442 443 hostmem_mr = host_memory_backend_get_memory(balloon->hostmem); 444 445 our_range_memslots = hvb_our_range_memslots_new(balloon->addr, 446 balloon->mr, hostmem_mr, 447 OBJECT(balloon), 448 balloon->memslot_count, 449 balloon->memslot_size); 450 our_range = OUR_RANGE(our_range_memslots); 451 452 if (hvb_page_range_tree_intree_any(balloon->removed_guest, 453 our_range->range.start, 454 our_range->range.count) || 455 hvb_page_range_tree_intree_any(balloon->removed_both, 456 our_range->range.start, 457 our_range->range.count)) { 458 error_report("some parts of the memory backend were already returned by the guest. this should not happen, please reboot the guest and try again"); 459 return false; 460 } 461 462 trace_hv_balloon_our_range_add(our_range->range.count, 463 our_range->range.start); 464 465 balloon->our_range = g_steal_pointer(&our_range_memslots); 466 return true; 467 } 468 469 static void hv_balloon_hot_add_setup(HvBalloon *balloon, StateDesc *stdesc) 470 { 471 /* need to make copy since it is in union with hot_add_range */ 472 uint64_t hot_add_diff = balloon->hot_add_diff; 473 PageRange *hot_add_range = &balloon->hot_add_range; 474 uint64_t align, our_range_remaining; 475 OurRange *our_range; 476 477 assert(balloon->state == S_HOT_ADD_SETUP); 478 assert(hot_add_diff > 0); 479 480 if (!hv_balloon_our_range_ensure(balloon)) { 481 goto ret_idle; 482 } 483 484 our_range = OUR_RANGE(balloon->our_range); 485 486 align = (1 << balloon->caps.cap_bits.hot_add_alignment) * 487 (MiB / HV_BALLOON_PAGE_SIZE); 488 489 /* Absolute GPA in pages */ 490 hot_add_range->start = our_range_get_remaining_start(our_range); 491 assert(QEMU_IS_ALIGNED(hot_add_range->start, align)); 492 493 our_range_remaining = our_range_get_remaining_size(our_range); 494 hot_add_range->count = MIN(our_range_remaining, hot_add_diff); 495 hot_add_range->count = QEMU_ALIGN_DOWN(hot_add_range->count, align); 496 if (hot_add_range->count == 0) { 497 goto ret_idle; 498 } 499 500 hvb_our_range_memslots_ensure_mapped_additional(balloon->our_range, 501 hot_add_range->count); 502 503 HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT); 504 return; 505 506 ret_idle: 507 HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE); 508 } 509 510 static void hv_balloon_hot_add_rb_wait(HvBalloon *balloon, StateDesc *stdesc) 511 { 512 VMBusChannel *chan = hv_balloon_get_channel(balloon); 513 struct dm_hot_add_with_region *ha; 514 size_t ha_size = sizeof(*ha); 515 516 assert(balloon->state == S_HOT_ADD_RB_WAIT); 517 518 if (vmbus_channel_reserve(chan, 0, ha_size) < 0) { 519 return; 520 } 521 522 HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_POSTING); 523 } 524 525 static void hv_balloon_hot_add_posting(HvBalloon *balloon, StateDesc *stdesc) 526 { 527 PageRange *hot_add_range = &balloon->hot_add_range; 528 uint64_t *current_count = &balloon->ha_current_count; 529 VMBusChannel *chan = hv_balloon_get_channel(balloon); 530 g_autofree struct dm_hot_add_with_region *ha = NULL; 531 size_t ha_size = sizeof(*ha); 532 union dm_mem_page_range *ha_region; 533 uint64_t align, chunk_max_size; 534 ssize_t ret; 535 536 assert(balloon->state == S_HOT_ADD_POSTING); 537 assert(hot_add_range->count > 0); 538 539 align = (1 << balloon->caps.cap_bits.hot_add_alignment) * 540 (MiB / HV_BALLOON_PAGE_SIZE); 541 if (align >= HV_BALLOON_HA_CHUNK_PAGES) { 542 /* 543 * If the required alignment is higher than the chunk size we let it 544 * override that size. 545 */ 546 chunk_max_size = align; 547 } else { 548 chunk_max_size = QEMU_ALIGN_DOWN(HV_BALLOON_HA_CHUNK_PAGES, align); 549 } 550 551 /* 552 * hot_add_range->count starts aligned in hv_balloon_hot_add_setup(), 553 * then it is either reduced by subtracting aligned current_count or 554 * further hot-adds are prevented by marking the whole remaining our range 555 * as unusable in hv_balloon_handle_hot_add_response(). 556 */ 557 *current_count = MIN(hot_add_range->count, chunk_max_size); 558 559 ha = g_malloc0(ha_size); 560 ha_region = &ha->region; 561 ha->hdr.type = DM_MEM_HOT_ADD_REQUEST; 562 ha->hdr.size = ha_size; 563 ha->hdr.trans_id = balloon->trans_id; 564 565 ha->range.finfo.start_page = hot_add_range->start; 566 ha->range.finfo.page_cnt = *current_count; 567 ha_region->finfo.start_page = hot_add_range->start; 568 ha_region->finfo.page_cnt = ha->range.finfo.page_cnt; 569 570 trace_hv_balloon_outgoing_hot_add(ha->hdr.trans_id, 571 *current_count, hot_add_range->start); 572 573 ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND, 574 NULL, 0, ha, ha_size, false, 575 ha->hdr.trans_id); 576 if (ret <= 0) { 577 error_report("error %zd when posting hot add msg, expect problems", 578 ret); 579 } 580 581 HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_REPLY_WAIT); 582 } 583 584 static void hv_balloon_balloon_rb_wait(HvBalloon *balloon, StateDesc *stdesc) 585 { 586 VMBusChannel *chan = hv_balloon_get_channel(balloon); 587 size_t bl_size = sizeof(struct dm_balloon); 588 589 assert(balloon->state == S_BALLOON_RB_WAIT); 590 591 if (vmbus_channel_reserve(chan, 0, bl_size) < 0) { 592 return; 593 } 594 595 HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_POSTING); 596 } 597 598 static void hv_balloon_balloon_posting(HvBalloon *balloon, StateDesc *stdesc) 599 { 600 VMBusChannel *chan = hv_balloon_get_channel(balloon); 601 struct dm_balloon bl; 602 size_t bl_size = sizeof(bl); 603 ssize_t ret; 604 605 assert(balloon->state == S_BALLOON_POSTING); 606 assert(balloon->balloon_diff > 0); 607 608 memset(&bl, 0, sizeof(bl)); 609 bl.hdr.type = DM_BALLOON_REQUEST; 610 bl.hdr.size = bl_size; 611 bl.hdr.trans_id = balloon->trans_id; 612 bl.num_pages = MIN(balloon->balloon_diff, HV_BALLOON_HR_CHUNK_PAGES); 613 614 trace_hv_balloon_outgoing_balloon(bl.hdr.trans_id, bl.num_pages, 615 balloon->balloon_diff); 616 617 ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND, 618 NULL, 0, &bl, bl_size, false, 619 bl.hdr.trans_id); 620 if (ret <= 0) { 621 error_report("error %zd when posting balloon msg, expect problems", 622 ret); 623 } 624 625 HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_REPLY_WAIT); 626 } 627 628 static void hv_balloon_idle_state_process_target(HvBalloon *balloon, 629 StateDesc *stdesc) 630 { 631 bool can_balloon = balloon->caps.cap_bits.balloon; 632 uint64_t ram_size_pages, total_removed; 633 634 ram_size_pages = hv_balloon_total_ram(balloon); 635 total_removed = hv_balloon_total_removed_rs(balloon, ram_size_pages); 636 637 /* 638 * we need to cache the values computed from the balloon target value when 639 * starting the adjustment procedure in case someone changes the target when 640 * the procedure is in progress 641 */ 642 if (balloon->target > ram_size_pages - total_removed) { 643 bool can_hot_add = balloon->caps.cap_bits.hot_add; 644 uint64_t target_diff = balloon->target - 645 (ram_size_pages - total_removed); 646 647 balloon->unballoon_diff = MIN(target_diff, total_removed); 648 649 if (can_hot_add) { 650 balloon->hot_add_diff = target_diff - balloon->unballoon_diff; 651 } else { 652 balloon->hot_add_diff = 0; 653 } 654 655 if (balloon->unballoon_diff > 0) { 656 assert(can_balloon); 657 HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT); 658 } else if (balloon->hot_add_diff > 0) { 659 HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP); 660 } 661 } else if (can_balloon && 662 balloon->target < ram_size_pages - total_removed) { 663 balloon->balloon_diff = ram_size_pages - total_removed - 664 balloon->target; 665 HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT); 666 } 667 } 668 669 static void hv_balloon_idle_state(HvBalloon *balloon, 670 StateDesc *stdesc) 671 { 672 assert(balloon->state == S_IDLE); 673 674 if (balloon->target_changed) { 675 balloon->target_changed = false; 676 hv_balloon_idle_state_process_target(balloon, stdesc); 677 return; 678 } 679 } 680 681 static const struct { 682 void (*handler)(HvBalloon *balloon, StateDesc *stdesc); 683 } state_handlers[] = { 684 [S_IDLE].handler = hv_balloon_idle_state, 685 [S_BALLOON_POSTING].handler = hv_balloon_balloon_posting, 686 [S_BALLOON_RB_WAIT].handler = hv_balloon_balloon_rb_wait, 687 [S_UNBALLOON_POSTING].handler = hv_balloon_unballoon_posting, 688 [S_UNBALLOON_RB_WAIT].handler = hv_balloon_unballoon_rb_wait, 689 [S_HOT_ADD_SETUP].handler = hv_balloon_hot_add_setup, 690 [S_HOT_ADD_RB_WAIT].handler = hv_balloon_hot_add_rb_wait, 691 [S_HOT_ADD_POSTING].handler = hv_balloon_hot_add_posting, 692 }; 693 694 static void hv_balloon_handle_state(HvBalloon *balloon, StateDesc *stdesc) 695 { 696 if (balloon->state >= ARRAY_SIZE(state_handlers) || 697 !state_handlers[balloon->state].handler) { 698 return; 699 } 700 701 state_handlers[balloon->state].handler(balloon, stdesc); 702 } 703 704 static void hv_balloon_remove_response_insert_range(PageRangeTree tree, 705 const PageRange *range, 706 uint64_t *ctr1, 707 uint64_t *ctr2, 708 uint64_t *ctr3) 709 { 710 uint64_t dupcount, effcount; 711 712 if (range->count == 0) { 713 return; 714 } 715 716 dupcount = 0; 717 hvb_page_range_tree_insert(tree, range->start, range->count, &dupcount); 718 719 assert(dupcount <= range->count); 720 effcount = range->count - dupcount; 721 722 *ctr1 += effcount; 723 *ctr2 += effcount; 724 if (ctr3) { 725 *ctr3 += effcount; 726 } 727 } 728 729 static void hv_balloon_remove_response_handle_range(HvBalloon *balloon, 730 PageRange *range, 731 bool both, 732 uint64_t *removedctr) 733 { 734 OurRange *our_range = OUR_RANGE(balloon->our_range); 735 PageRangeTree globaltree = 736 both ? balloon->removed_both : balloon->removed_guest; 737 uint64_t *globalctr = 738 both ? &balloon->removed_both_ctr : &balloon->removed_guest_ctr; 739 PageRange rangeeff; 740 741 if (range->count == 0) { 742 return; 743 } 744 745 trace_hv_balloon_remove_response(range->count, range->start, both); 746 747 if (our_range) { 748 /* Includes the not-yet-hot-added and unusable parts. */ 749 rangeeff = our_range->range; 750 } else { 751 rangeeff.start = rangeeff.count = 0; 752 } 753 754 if (page_range_intersection_size(range, rangeeff.start, rangeeff.count) > 0) { 755 PageRangeTree ourtree = our_range_get_removed_tree(our_range, both); 756 PageRange rangehole, rangecommon; 757 uint64_t ourremoved = 0; 758 759 /* process the hole before our range, if it exists */ 760 page_range_part_before(range, rangeeff.start, &rangehole); 761 hv_balloon_remove_response_insert_range(globaltree, &rangehole, 762 globalctr, removedctr, NULL); 763 if (rangehole.count > 0) { 764 trace_hv_balloon_remove_response_hole(rangehole.count, 765 rangehole.start, 766 range->count, range->start, 767 rangeeff.start, both); 768 } 769 770 /* process our part */ 771 page_range_intersect(range, rangeeff.start, rangeeff.count, 772 &rangecommon); 773 hv_balloon_remove_response_insert_range(ourtree, &rangecommon, 774 globalctr, removedctr, 775 &ourremoved); 776 if (rangecommon.count > 0) { 777 trace_hv_balloon_remove_response_common(rangecommon.count, 778 rangecommon.start, 779 range->count, range->start, 780 rangeeff.count, 781 rangeeff.start, ourremoved, 782 both); 783 } 784 785 /* calculate what's left after our range */ 786 rangecommon = *range; 787 page_range_part_after(&rangecommon, rangeeff.start, rangeeff.count, 788 range); 789 } 790 791 /* process the remainder of the range that lies after our range */ 792 if (range->count > 0) { 793 hv_balloon_remove_response_insert_range(globaltree, range, 794 globalctr, removedctr, NULL); 795 trace_hv_balloon_remove_response_remainder(range->count, range->start, 796 both); 797 range->count = 0; 798 } 799 } 800 801 static void hv_balloon_remove_response_handle_pages(HvBalloon *balloon, 802 PageRange *range, 803 uint64_t start, 804 uint64_t count, 805 bool both, 806 uint64_t *removedctr) 807 { 808 assert(count > 0); 809 810 /* 811 * if there is an existing range that the new range can't be joined to 812 * dump it into tree(s) 813 */ 814 if (range->count > 0 && !page_range_joinable(range, start, count)) { 815 hv_balloon_remove_response_handle_range(balloon, range, both, 816 removedctr); 817 } 818 819 if (range->count == 0) { 820 range->start = start; 821 range->count = count; 822 } else if (page_range_joinable_left(range, start, count)) { 823 range->start = start; 824 range->count += count; 825 } else { /* page_range_joinable_right() */ 826 range->count += count; 827 } 828 } 829 830 static gboolean hv_balloon_handle_remove_host_addr_node(gpointer key, 831 gpointer value, 832 gpointer data) 833 { 834 PageRange *range = value; 835 uint64_t pageoff; 836 837 for (pageoff = 0; pageoff < range->count; ) { 838 uint64_t addr_64 = (range->start + pageoff) * HV_BALLOON_PAGE_SIZE; 839 void *addr; 840 RAMBlock *rb; 841 ram_addr_t rb_offset; 842 size_t rb_page_size; 843 size_t discard_size; 844 845 assert(addr_64 <= UINTPTR_MAX); 846 addr = (void *)((uintptr_t)addr_64); 847 rb = qemu_ram_block_from_host(addr, false, &rb_offset); 848 rb_page_size = qemu_ram_pagesize(rb); 849 850 if (rb_page_size != HV_BALLOON_PAGE_SIZE) { 851 /* TODO: these should end in "removed_guest" */ 852 warn_report("guest reported removed page backed by unsupported page size %zu", 853 rb_page_size); 854 pageoff++; 855 continue; 856 } 857 858 discard_size = MIN(range->count - pageoff, 859 (rb->max_length - rb_offset) / 860 HV_BALLOON_PAGE_SIZE); 861 discard_size = MAX(discard_size, 1); 862 863 if (ram_block_discard_range(rb, rb_offset, discard_size * 864 HV_BALLOON_PAGE_SIZE) != 0) { 865 warn_report("guest reported removed page failed discard"); 866 } 867 868 pageoff += discard_size; 869 } 870 871 return false; 872 } 873 874 static void hv_balloon_handle_remove_host_addr_tree(PageRangeTree tree) 875 { 876 g_tree_foreach(tree.t, hv_balloon_handle_remove_host_addr_node, NULL); 877 } 878 879 static int hv_balloon_handle_remove_section(PageRangeTree tree, 880 const MemoryRegionSection *section, 881 uint64_t count) 882 { 883 void *addr = memory_region_get_ram_ptr(section->mr) + 884 section->offset_within_region; 885 uint64_t addr_page; 886 887 assert(count > 0); 888 889 if ((uintptr_t)addr % HV_BALLOON_PAGE_SIZE) { 890 warn_report("guest reported removed pages at an unaligned host addr %p", 891 addr); 892 return -EINVAL; 893 } 894 895 addr_page = (uintptr_t)addr / HV_BALLOON_PAGE_SIZE; 896 hvb_page_range_tree_insert(tree, addr_page, count, NULL); 897 898 return 0; 899 } 900 901 static void hv_balloon_handle_remove_ranges(HvBalloon *balloon, 902 union dm_mem_page_range ranges[], 903 uint32_t count) 904 { 905 uint64_t removedcnt; 906 PageRangeTree removed_host_addr; 907 PageRange range_guest, range_both; 908 909 hvb_page_range_tree_init(&removed_host_addr); 910 range_guest.count = range_both.count = removedcnt = 0; 911 for (unsigned int ctr = 0; ctr < count; ctr++) { 912 union dm_mem_page_range *mr = &ranges[ctr]; 913 hwaddr pa; 914 MemoryRegionSection section; 915 916 for (unsigned int offset = 0; offset < mr->finfo.page_cnt; ) { 917 int ret; 918 uint64_t pageno = mr->finfo.start_page + offset; 919 uint64_t pagecnt = 1; 920 921 pa = (hwaddr)pageno << HV_BALLOON_PFN_SHIFT; 922 section = memory_region_find(get_system_memory(), pa, 923 (mr->finfo.page_cnt - offset) * 924 HV_BALLOON_PAGE_SIZE); 925 if (!section.mr) { 926 warn_report("guest reported removed page %"PRIu64" not found in RAM", 927 pageno); 928 ret = -EINVAL; 929 goto finish_page; 930 } 931 932 pagecnt = int128_get64(section.size) / HV_BALLOON_PAGE_SIZE; 933 if (pagecnt <= 0) { 934 warn_report("guest reported removed page %"PRIu64" in a section smaller than page size", 935 pageno); 936 pagecnt = 1; /* skip the whole page */ 937 ret = -EINVAL; 938 goto finish_page; 939 } 940 941 if (!memory_region_is_ram(section.mr) || 942 memory_region_is_rom(section.mr) || 943 memory_region_is_romd(section.mr)) { 944 warn_report("guest reported removed page %"PRIu64" in a section that is not an ordinary RAM", 945 pageno); 946 ret = -EINVAL; 947 goto finish_page; 948 } 949 950 ret = hv_balloon_handle_remove_section(removed_host_addr, §ion, 951 pagecnt); 952 953 finish_page: 954 if (ret == 0) { 955 hv_balloon_remove_response_handle_pages(balloon, 956 &range_both, 957 pageno, pagecnt, 958 true, &removedcnt); 959 } else { 960 hv_balloon_remove_response_handle_pages(balloon, 961 &range_guest, 962 pageno, pagecnt, 963 false, &removedcnt); 964 } 965 966 if (section.mr) { 967 memory_region_unref(section.mr); 968 } 969 970 offset += pagecnt; 971 } 972 } 973 974 hv_balloon_remove_response_handle_range(balloon, &range_both, true, 975 &removedcnt); 976 hv_balloon_remove_response_handle_range(balloon, &range_guest, false, 977 &removedcnt); 978 979 hv_balloon_handle_remove_host_addr_tree(removed_host_addr); 980 hvb_page_range_tree_destroy(&removed_host_addr); 981 982 if (removedcnt > balloon->balloon_diff) { 983 warn_report("guest reported more pages removed than currently pending (%"PRIu64" vs %"PRIu64")", 984 removedcnt, balloon->balloon_diff); 985 balloon->balloon_diff = 0; 986 } else { 987 balloon->balloon_diff -= removedcnt; 988 } 989 } 990 991 static bool hv_balloon_handle_msg_size(HvBalloonReq *req, size_t minsize, 992 const char *msgname) 993 { 994 VMBusChanReq *vmreq = &req->vmreq; 995 uint32_t msglen = vmreq->msglen; 996 997 if (msglen >= minsize) { 998 return true; 999 } 1000 1001 warn_report("%s message too short (%u vs %zu), ignoring", msgname, 1002 (unsigned int)msglen, minsize); 1003 return false; 1004 } 1005 1006 static void hv_balloon_handle_version_request(HvBalloon *balloon, 1007 HvBalloonReq *req, 1008 StateDesc *stdesc) 1009 { 1010 VMBusChanReq *vmreq = &req->vmreq; 1011 struct dm_version_request *msgVr = vmreq->msg; 1012 struct dm_version_response respVr; 1013 1014 if (balloon->state != S_VERSION) { 1015 warn_report("unexpected DM_VERSION_REQUEST in %d state", 1016 balloon->state); 1017 return; 1018 } 1019 1020 if (!hv_balloon_handle_msg_size(req, sizeof(*msgVr), 1021 "DM_VERSION_REQUEST")) { 1022 return; 1023 } 1024 1025 trace_hv_balloon_incoming_version(msgVr->version.major_version, 1026 msgVr->version.minor_version); 1027 1028 memset(&respVr, 0, sizeof(respVr)); 1029 respVr.hdr.type = DM_VERSION_RESPONSE; 1030 respVr.hdr.size = sizeof(respVr); 1031 respVr.hdr.trans_id = msgVr->hdr.trans_id; 1032 respVr.is_accepted = msgVr->version.version >= DYNMEM_PROTOCOL_VERSION_1 && 1033 msgVr->version.version <= DYNMEM_PROTOCOL_VERSION_3; 1034 1035 hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respVr); 1036 1037 if (respVr.is_accepted) { 1038 HV_BALLOON_STATE_DESC_SET(stdesc, S_CAPS); 1039 } 1040 } 1041 1042 static void hv_balloon_handle_caps_report(HvBalloon *balloon, 1043 HvBalloonReq *req, 1044 StateDesc *stdesc) 1045 { 1046 VMBusChanReq *vmreq = &req->vmreq; 1047 struct dm_capabilities *msgCap = vmreq->msg; 1048 struct dm_capabilities_resp_msg respCap; 1049 1050 if (balloon->state != S_CAPS) { 1051 warn_report("unexpected DM_CAPABILITIES_REPORT in %d state", 1052 balloon->state); 1053 return; 1054 } 1055 1056 if (!hv_balloon_handle_msg_size(req, sizeof(*msgCap), 1057 "DM_CAPABILITIES_REPORT")) { 1058 return; 1059 } 1060 1061 trace_hv_balloon_incoming_caps(msgCap->caps.caps); 1062 balloon->caps = msgCap->caps; 1063 1064 memset(&respCap, 0, sizeof(respCap)); 1065 respCap.hdr.type = DM_CAPABILITIES_RESPONSE; 1066 respCap.hdr.size = sizeof(respCap); 1067 respCap.hdr.trans_id = msgCap->hdr.trans_id; 1068 respCap.is_accepted = 1; 1069 respCap.hot_remove = 1; 1070 respCap.suppress_pressure_reports = !balloon->status_report.enabled; 1071 hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respCap); 1072 1073 timer_mod(&balloon->post_init_timer, 1074 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1075 HV_BALLOON_POST_INIT_WAIT); 1076 1077 HV_BALLOON_STATE_DESC_SET(stdesc, S_POST_INIT_WAIT); 1078 } 1079 1080 static void hv_balloon_handle_status_report(HvBalloon *balloon, 1081 HvBalloonReq *req) 1082 { 1083 VMBusChanReq *vmreq = &req->vmreq; 1084 struct dm_status *msgStatus = vmreq->msg; 1085 1086 if (!hv_balloon_handle_msg_size(req, sizeof(*msgStatus), 1087 "DM_STATUS_REPORT")) { 1088 return; 1089 } 1090 1091 if (!balloon->status_report.enabled) { 1092 return; 1093 } 1094 1095 balloon->status_report.committed = msgStatus->num_committed; 1096 balloon->status_report.committed *= HV_BALLOON_PAGE_SIZE; 1097 balloon->status_report.available = msgStatus->num_avail; 1098 balloon->status_report.available *= HV_BALLOON_PAGE_SIZE; 1099 balloon->status_report.received = true; 1100 1101 qapi_event_send_hv_balloon_status_report(balloon->status_report.committed, 1102 balloon->status_report.available); 1103 } 1104 1105 HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp) 1106 { 1107 HvBalloon *balloon; 1108 HvBalloonInfo *info; 1109 1110 balloon = HV_BALLOON(object_resolve_path_type("", TYPE_HV_BALLOON, NULL)); 1111 if (!balloon) { 1112 error_setg(errp, "no %s device present", TYPE_HV_BALLOON); 1113 return NULL; 1114 } 1115 1116 if (!balloon->status_report.enabled) { 1117 error_setg(errp, "guest memory status reporting not enabled"); 1118 return NULL; 1119 } 1120 1121 if (!balloon->status_report.received) { 1122 error_setg(errp, "no guest memory status report received yet"); 1123 return NULL; 1124 } 1125 1126 info = g_malloc0(sizeof(*info)); 1127 info->committed = balloon->status_report.committed; 1128 info->available = balloon->status_report.available; 1129 return info; 1130 } 1131 1132 static void hv_balloon_handle_unballoon_response(HvBalloon *balloon, 1133 HvBalloonReq *req, 1134 StateDesc *stdesc) 1135 { 1136 VMBusChanReq *vmreq = &req->vmreq; 1137 struct dm_unballoon_response *msgUrR = vmreq->msg; 1138 1139 if (balloon->state != S_UNBALLOON_REPLY_WAIT) { 1140 warn_report("unexpected DM_UNBALLOON_RESPONSE in %d state", 1141 balloon->state); 1142 return; 1143 } 1144 1145 if (!hv_balloon_handle_msg_size(req, sizeof(*msgUrR), 1146 "DM_UNBALLOON_RESPONSE")) 1147 return; 1148 1149 trace_hv_balloon_incoming_unballoon(msgUrR->hdr.trans_id); 1150 1151 balloon->trans_id++; 1152 1153 if (balloon->hot_add_diff > 0) { 1154 bool can_hot_add = balloon->caps.cap_bits.hot_add; 1155 1156 assert(can_hot_add); 1157 HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP); 1158 } else { 1159 HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE); 1160 } 1161 } 1162 1163 static void hv_balloon_handle_hot_add_response(HvBalloon *balloon, 1164 HvBalloonReq *req, 1165 StateDesc *stdesc) 1166 { 1167 PageRange *hot_add_range = &balloon->hot_add_range; 1168 VMBusChanReq *vmreq = &req->vmreq; 1169 struct dm_hot_add_response *msgHaR = vmreq->msg; 1170 OurRange *our_range; 1171 1172 if (balloon->state != S_HOT_ADD_REPLY_WAIT) { 1173 warn_report("unexpected DM_HOT_ADD_RESPONSE in %d state", 1174 balloon->state); 1175 return; 1176 } 1177 1178 assert(balloon->our_range); 1179 our_range = OUR_RANGE(balloon->our_range); 1180 1181 if (!hv_balloon_handle_msg_size(req, sizeof(*msgHaR), 1182 "DM_HOT_ADD_RESPONSE")) 1183 return; 1184 1185 trace_hv_balloon_incoming_hot_add(msgHaR->hdr.trans_id, msgHaR->result, 1186 msgHaR->page_count); 1187 1188 balloon->trans_id++; 1189 1190 if (msgHaR->result) { 1191 if (msgHaR->page_count > balloon->ha_current_count) { 1192 warn_report("DM_HOT_ADD_RESPONSE page count higher than requested (%"PRIu32" vs %"PRIu64")", 1193 msgHaR->page_count, balloon->ha_current_count); 1194 msgHaR->page_count = balloon->ha_current_count; 1195 } 1196 1197 hvb_our_range_mark_added(our_range, msgHaR->page_count); 1198 hot_add_range->start += msgHaR->page_count; 1199 hot_add_range->count -= msgHaR->page_count; 1200 } 1201 1202 if (!msgHaR->result || msgHaR->page_count < balloon->ha_current_count) { 1203 /* 1204 * the current planned range was only partially hot-added, take note 1205 * how much of it remains and don't attempt any further hot adds 1206 */ 1207 our_range_mark_remaining_unusable(our_range); 1208 1209 goto ret_idle; 1210 } 1211 1212 /* any pages remaining to hot-add in our range? */ 1213 if (hot_add_range->count > 0) { 1214 HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT); 1215 return; 1216 } 1217 1218 ret_idle: 1219 HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE); 1220 } 1221 1222 static void hv_balloon_handle_balloon_response(HvBalloon *balloon, 1223 HvBalloonReq *req, 1224 StateDesc *stdesc) 1225 { 1226 VMBusChanReq *vmreq = &req->vmreq; 1227 struct dm_balloon_response *msgBR = vmreq->msg; 1228 1229 if (balloon->state != S_BALLOON_REPLY_WAIT) { 1230 warn_report("unexpected DM_BALLOON_RESPONSE in %d state", 1231 balloon->state); 1232 return; 1233 } 1234 1235 if (!hv_balloon_handle_msg_size(req, sizeof(*msgBR), 1236 "DM_BALLOON_RESPONSE")) 1237 return; 1238 1239 trace_hv_balloon_incoming_balloon(msgBR->hdr.trans_id, msgBR->range_count, 1240 msgBR->more_pages); 1241 1242 if (vmreq->msglen < sizeof(*msgBR) + 1243 (uint64_t)sizeof(msgBR->range_array[0]) * msgBR->range_count) { 1244 warn_report("DM_BALLOON_RESPONSE too short for the range count"); 1245 return; 1246 } 1247 1248 if (msgBR->range_count == 0) { 1249 /* The guest is already at its minimum size */ 1250 balloon->balloon_diff = 0; 1251 goto ret_end_trans; 1252 } else { 1253 hv_balloon_handle_remove_ranges(balloon, 1254 msgBR->range_array, 1255 msgBR->range_count); 1256 } 1257 1258 /* More responses expected? */ 1259 if (msgBR->more_pages) { 1260 return; 1261 } 1262 1263 ret_end_trans: 1264 balloon->trans_id++; 1265 1266 if (balloon->balloon_diff > 0) { 1267 HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT); 1268 } else { 1269 HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE); 1270 } 1271 } 1272 1273 static void hv_balloon_handle_packet(HvBalloon *balloon, HvBalloonReq *req, 1274 StateDesc *stdesc) 1275 { 1276 VMBusChanReq *vmreq = &req->vmreq; 1277 struct dm_message *msg = vmreq->msg; 1278 1279 if (vmreq->msglen < sizeof(msg->hdr)) { 1280 return; 1281 } 1282 1283 switch (msg->hdr.type) { 1284 case DM_VERSION_REQUEST: 1285 hv_balloon_handle_version_request(balloon, req, stdesc); 1286 break; 1287 1288 case DM_CAPABILITIES_REPORT: 1289 hv_balloon_handle_caps_report(balloon, req, stdesc); 1290 break; 1291 1292 case DM_STATUS_REPORT: 1293 hv_balloon_handle_status_report(balloon, req); 1294 break; 1295 1296 case DM_MEM_HOT_ADD_RESPONSE: 1297 hv_balloon_handle_hot_add_response(balloon, req, stdesc); 1298 break; 1299 1300 case DM_UNBALLOON_RESPONSE: 1301 hv_balloon_handle_unballoon_response(balloon, req, stdesc); 1302 break; 1303 1304 case DM_BALLOON_RESPONSE: 1305 hv_balloon_handle_balloon_response(balloon, req, stdesc); 1306 break; 1307 1308 default: 1309 warn_report("unknown DM message %u", msg->hdr.type); 1310 break; 1311 } 1312 } 1313 1314 static bool hv_balloon_recv_channel(HvBalloon *balloon, StateDesc *stdesc) 1315 { 1316 VMBusChannel *chan; 1317 HvBalloonReq *req; 1318 1319 if (balloon->state == S_WAIT_RESET || 1320 balloon->state == S_POST_RESET_CLOSED) { 1321 return false; 1322 } 1323 1324 chan = hv_balloon_get_channel(balloon); 1325 if (vmbus_channel_recv_start(chan)) { 1326 return false; 1327 } 1328 1329 while ((req = vmbus_channel_recv_peek(chan, sizeof(*req)))) { 1330 hv_balloon_handle_packet(balloon, req, stdesc); 1331 vmbus_free_req(req); 1332 vmbus_channel_recv_pop(chan); 1333 1334 if (stdesc->state != S_NO_CHANGE) { 1335 break; 1336 } 1337 } 1338 1339 return vmbus_channel_recv_done(chan) > 0; 1340 } 1341 1342 /* old state handler -> new state transition (potential) */ 1343 static bool hv_balloon_event_loop_state(HvBalloon *balloon) 1344 { 1345 StateDesc state_new = HV_BALLOON_STATE_DESC_INIT; 1346 1347 hv_balloon_handle_state(balloon, &state_new); 1348 return hv_balloon_state_set(balloon, state_new.state, state_new.desc); 1349 } 1350 1351 /* VMBus message -> new state transition (potential) */ 1352 static bool hv_balloon_event_loop_recv(HvBalloon *balloon) 1353 { 1354 StateDesc state_new = HV_BALLOON_STATE_DESC_INIT; 1355 bool any_recv, state_changed; 1356 1357 any_recv = hv_balloon_recv_channel(balloon, &state_new); 1358 state_changed = hv_balloon_state_set(balloon, 1359 state_new.state, state_new.desc); 1360 1361 return state_changed || any_recv; 1362 } 1363 1364 static void hv_balloon_event_loop(HvBalloon *balloon) 1365 { 1366 bool state_repeat, recv_repeat; 1367 1368 do { 1369 state_repeat = hv_balloon_event_loop_state(balloon); 1370 recv_repeat = hv_balloon_event_loop_recv(balloon); 1371 } while (state_repeat || recv_repeat); 1372 } 1373 1374 static void hv_balloon_vmdev_chan_notify(VMBusChannel *chan) 1375 { 1376 HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan)); 1377 1378 hv_balloon_event_loop(balloon); 1379 } 1380 1381 static void hv_balloon_stat(void *opaque, BalloonInfo *info) 1382 { 1383 HvBalloon *balloon = opaque; 1384 info->actual = (hv_balloon_total_ram(balloon) - balloon->removed_both_ctr) 1385 << HV_BALLOON_PFN_SHIFT; 1386 } 1387 1388 static void hv_balloon_to_target(void *opaque, ram_addr_t target) 1389 { 1390 HvBalloon *balloon = opaque; 1391 uint64_t target_pages = target >> HV_BALLOON_PFN_SHIFT; 1392 1393 if (!target_pages) { 1394 return; 1395 } 1396 1397 /* 1398 * always set target_changed, even with unchanged target, as the user 1399 * might be asking us to try again reaching it 1400 */ 1401 balloon->target = target_pages; 1402 balloon->target_changed = true; 1403 1404 hv_balloon_event_loop(balloon); 1405 } 1406 1407 static int hv_balloon_vmdev_open_channel(VMBusChannel *chan) 1408 { 1409 HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan)); 1410 1411 if (balloon->state != S_POST_RESET_CLOSED) { 1412 warn_report("guest trying to open a DM channel in invalid %d state", 1413 balloon->state); 1414 return -EINVAL; 1415 } 1416 1417 HV_BALLOON_SET_STATE(balloon, S_VERSION); 1418 hv_balloon_event_loop(balloon); 1419 1420 return 0; 1421 } 1422 1423 static void hv_balloon_vmdev_close_channel(VMBusChannel *chan) 1424 { 1425 HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan)); 1426 1427 timer_del(&balloon->post_init_timer); 1428 1429 /* Don't report stale data */ 1430 balloon->status_report.received = false; 1431 1432 HV_BALLOON_SET_STATE(balloon, S_WAIT_RESET); 1433 hv_balloon_event_loop(balloon); 1434 } 1435 1436 static void hv_balloon_post_init_timer(void *opaque) 1437 { 1438 HvBalloon *balloon = opaque; 1439 1440 if (balloon->state != S_POST_INIT_WAIT) { 1441 return; 1442 } 1443 1444 HV_BALLOON_SET_STATE(balloon, S_IDLE); 1445 hv_balloon_event_loop(balloon); 1446 } 1447 1448 static void hv_balloon_system_reset_unrealize_common(HvBalloon *balloon) 1449 { 1450 g_clear_pointer(&balloon->our_range, hvb_our_range_memslots_free); 1451 } 1452 1453 static void hv_balloon_system_reset(void *opaque) 1454 { 1455 HvBalloon *balloon = HV_BALLOON(opaque); 1456 1457 hv_balloon_system_reset_unrealize_common(balloon); 1458 } 1459 1460 static void hv_balloon_ensure_mr(HvBalloon *balloon) 1461 { 1462 MemoryRegion *hostmem_mr; 1463 1464 assert(balloon->hostmem); 1465 1466 if (balloon->mr) { 1467 return; 1468 } 1469 1470 hostmem_mr = host_memory_backend_get_memory(balloon->hostmem); 1471 1472 balloon->mr = g_new0(MemoryRegion, 1); 1473 memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON, 1474 memory_region_size(hostmem_mr)); 1475 balloon->mr->align = memory_region_get_alignment(hostmem_mr); 1476 } 1477 1478 static void hv_balloon_free_mr(HvBalloon *balloon) 1479 { 1480 if (!balloon->mr) { 1481 return; 1482 } 1483 1484 object_unparent(OBJECT(balloon->mr)); 1485 g_clear_pointer(&balloon->mr, g_free); 1486 } 1487 1488 static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp) 1489 { 1490 ERRP_GUARD(); 1491 HvBalloon *balloon = HV_BALLOON(vdev); 1492 int ret; 1493 1494 balloon->state = S_WAIT_RESET; 1495 1496 ret = qemu_add_balloon_handler(hv_balloon_to_target, hv_balloon_stat, 1497 balloon); 1498 if (ret < 0) { 1499 /* This also protects against having multiple hv-balloon instances */ 1500 error_setg(errp, "Only one balloon device is supported"); 1501 return; 1502 } 1503 1504 if (balloon->hostmem) { 1505 if (host_memory_backend_is_mapped(balloon->hostmem)) { 1506 Object *obj = OBJECT(balloon->hostmem); 1507 1508 error_setg(errp, "'%s' property specifies a busy memdev: %s", 1509 HV_BALLOON_MEMDEV_PROP, 1510 object_get_canonical_path_component(obj)); 1511 goto out_balloon_handler; 1512 } 1513 1514 hv_balloon_ensure_mr(balloon); 1515 1516 /* This is rather unlikely to happen, but let's still check for it. */ 1517 if (!QEMU_IS_ALIGNED(memory_region_size(balloon->mr), 1518 HV_BALLOON_PAGE_SIZE)) { 1519 error_setg(errp, "'%s' property memdev size has to be a multiple of 0x%" PRIx64, 1520 HV_BALLOON_MEMDEV_PROP, (uint64_t)HV_BALLOON_PAGE_SIZE); 1521 goto out_balloon_handler; 1522 } 1523 1524 host_memory_backend_set_mapped(balloon->hostmem, true); 1525 vmstate_register_ram(host_memory_backend_get_memory(balloon->hostmem), 1526 DEVICE(balloon)); 1527 } else if (balloon->addr) { 1528 error_setg(errp, "'%s' property must not be set without a memdev", 1529 HV_BALLOON_MEMDEV_PROP); 1530 goto out_balloon_handler; 1531 } 1532 1533 timer_init_ms(&balloon->post_init_timer, QEMU_CLOCK_VIRTUAL, 1534 hv_balloon_post_init_timer, balloon); 1535 1536 qemu_register_reset(hv_balloon_system_reset, balloon); 1537 1538 return; 1539 1540 out_balloon_handler: 1541 qemu_remove_balloon_handler(balloon); 1542 } 1543 1544 /* 1545 * VMBus device reset has to be implemented in case the guest decides to 1546 * disconnect and reconnect to the VMBus without rebooting the whole system. 1547 * 1548 * However, the hot-added memory can't be removed here as Windows keeps on using 1549 * it until the system is restarted, even after disconnecting from the VMBus. 1550 */ 1551 static void hv_balloon_vmdev_reset(VMBusDevice *vdev) 1552 { 1553 HvBalloon *balloon = HV_BALLOON(vdev); 1554 1555 if (balloon->state == S_POST_RESET_CLOSED) { 1556 return; 1557 } 1558 1559 if (balloon->our_range) { 1560 hvb_our_range_clear_removed_trees(OUR_RANGE(balloon->our_range)); 1561 } 1562 1563 hvb_page_range_tree_destroy(&balloon->removed_guest); 1564 hvb_page_range_tree_destroy(&balloon->removed_both); 1565 hvb_page_range_tree_init(&balloon->removed_guest); 1566 hvb_page_range_tree_init(&balloon->removed_both); 1567 1568 balloon->trans_id = 0; 1569 balloon->removed_guest_ctr = 0; 1570 balloon->removed_both_ctr = 0; 1571 1572 HV_BALLOON_SET_STATE(balloon, S_POST_RESET_CLOSED); 1573 hv_balloon_event_loop(balloon); 1574 } 1575 1576 /* 1577 * Clean up things that were (possibly) allocated pre-realization, for example 1578 * from memory_device_pre_plug(), so we don't leak them if the device don't 1579 * actually get realized in the end. 1580 */ 1581 static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon) 1582 { 1583 hv_balloon_free_mr(balloon); 1584 balloon->addr = 0; 1585 1586 balloon->memslot_count = 0; 1587 } 1588 1589 static void hv_balloon_vmdev_unrealize(VMBusDevice *vdev) 1590 { 1591 HvBalloon *balloon = HV_BALLOON(vdev); 1592 1593 qemu_unregister_reset(hv_balloon_system_reset, balloon); 1594 1595 hv_balloon_system_reset_unrealize_common(balloon); 1596 1597 qemu_remove_balloon_handler(balloon); 1598 1599 if (balloon->hostmem) { 1600 vmstate_unregister_ram(host_memory_backend_get_memory(balloon->hostmem), 1601 DEVICE(balloon)); 1602 host_memory_backend_set_mapped(balloon->hostmem, false); 1603 } 1604 1605 hvb_page_range_tree_destroy(&balloon->removed_guest); 1606 hvb_page_range_tree_destroy(&balloon->removed_both); 1607 1608 hv_balloon_unrealize_finalize_common(balloon); 1609 } 1610 1611 static uint64_t hv_balloon_md_get_addr(const MemoryDeviceState *md) 1612 { 1613 return object_property_get_uint(OBJECT(md), HV_BALLOON_ADDR_PROP, 1614 &error_abort); 1615 } 1616 1617 static void hv_balloon_md_set_addr(MemoryDeviceState *md, uint64_t addr, 1618 Error **errp) 1619 { 1620 object_property_set_uint(OBJECT(md), HV_BALLOON_ADDR_PROP, addr, errp); 1621 } 1622 1623 static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md, 1624 Error **errp) 1625 { 1626 HvBalloon *balloon = HV_BALLOON(md); 1627 1628 if (!balloon->hostmem) { 1629 return NULL; 1630 } 1631 1632 hv_balloon_ensure_mr(balloon); 1633 1634 return balloon->mr; 1635 } 1636 1637 static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md) 1638 { 1639 /* 1640 * The VM can indicate an alignment up to 32 GiB. Memory device core can 1641 * usually only handle/guarantee 1 GiB alignment. The user will have to 1642 * specify a larger maxmem eventually. 1643 * 1644 * The memory device core will warn the user in case maxmem might have to be 1645 * increased and will fail plugging the device if there is not sufficient 1646 * space after alignment. 1647 * 1648 * TODO: we could do the alignment ourselves in a slightly bigger region. 1649 * But this feels better, although the warning might be annoying. Maybe 1650 * we can optimize that in the future (e.g., with such a device on the 1651 * cmdline place/size the device memory region differently. 1652 */ 1653 return 32 * GiB; 1654 } 1655 1656 static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md, 1657 MemoryDeviceInfo *info) 1658 { 1659 HvBalloonDeviceInfo *hi = g_new0(HvBalloonDeviceInfo, 1); 1660 const HvBalloon *balloon = HV_BALLOON(md); 1661 DeviceState *dev = DEVICE(md); 1662 1663 if (dev->id) { 1664 hi->id = g_strdup(dev->id); 1665 } 1666 1667 if (balloon->hostmem) { 1668 hi->memdev = object_get_canonical_path(OBJECT(balloon->hostmem)); 1669 hi->memaddr = balloon->addr; 1670 hi->has_memaddr = true; 1671 hi->max_size = memory_region_size(balloon->mr); 1672 /* TODO: expose current provided size or something else? */ 1673 } else { 1674 hi->max_size = 0; 1675 } 1676 1677 info->u.hv_balloon.data = hi; 1678 info->type = MEMORY_DEVICE_INFO_KIND_HV_BALLOON; 1679 } 1680 1681 static void hv_balloon_decide_memslots(MemoryDeviceState *md, 1682 unsigned int limit) 1683 { 1684 HvBalloon *balloon = HV_BALLOON(md); 1685 MemoryRegion *hostmem_mr; 1686 uint64_t region_size, memslot_size, memslots; 1687 1688 /* We're called exactly once, before realizing the device. */ 1689 assert(!balloon->memslot_count); 1690 1691 /* We should not be called if we don't have a memory backend */ 1692 assert(balloon->hostmem); 1693 1694 hostmem_mr = host_memory_backend_get_memory(balloon->hostmem); 1695 region_size = memory_region_size(hostmem_mr); 1696 1697 assert(region_size > 0); 1698 memslot_size = QEMU_ALIGN_UP(region_size / limit, 1699 HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN); 1700 memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size; 1701 1702 if (memslots > 1) { 1703 balloon->memslot_size = memslot_size; 1704 } else { 1705 balloon->memslot_size = region_size; 1706 } 1707 1708 assert(memslots <= UINT_MAX); 1709 balloon->memslot_count = memslots; 1710 } 1711 1712 static unsigned int hv_balloon_get_memslots(MemoryDeviceState *md) 1713 { 1714 const HvBalloon *balloon = HV_BALLOON(md); 1715 1716 /* We're called after setting the suggested limit. */ 1717 assert(balloon->memslot_count > 0); 1718 1719 return balloon->memslot_count; 1720 } 1721 1722 static void hv_balloon_init(Object *obj) 1723 { 1724 } 1725 1726 static void hv_balloon_finalize(Object *obj) 1727 { 1728 HvBalloon *balloon = HV_BALLOON(obj); 1729 1730 hv_balloon_unrealize_finalize_common(balloon); 1731 } 1732 1733 static const Property hv_balloon_properties[] = { 1734 DEFINE_PROP_BOOL("status-report", HvBalloon, 1735 status_report.enabled, false), 1736 1737 /* MEMORY_DEVICE props */ 1738 DEFINE_PROP_LINK(HV_BALLOON_MEMDEV_PROP, HvBalloon, hostmem, 1739 TYPE_MEMORY_BACKEND, HostMemoryBackend *), 1740 DEFINE_PROP_UINT64(HV_BALLOON_ADDR_PROP, HvBalloon, addr, 0), 1741 }; 1742 1743 static void hv_balloon_class_init(ObjectClass *klass, const void *data) 1744 { 1745 DeviceClass *dc = DEVICE_CLASS(klass); 1746 VMBusDeviceClass *vdc = VMBUS_DEVICE_CLASS(klass); 1747 MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass); 1748 1749 device_class_set_props(dc, hv_balloon_properties); 1750 qemu_uuid_parse(HV_BALLOON_GUID, &vdc->classid); 1751 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1752 1753 vdc->vmdev_realize = hv_balloon_vmdev_realize; 1754 vdc->vmdev_unrealize = hv_balloon_vmdev_unrealize; 1755 vdc->vmdev_reset = hv_balloon_vmdev_reset; 1756 vdc->open_channel = hv_balloon_vmdev_open_channel; 1757 vdc->close_channel = hv_balloon_vmdev_close_channel; 1758 vdc->chan_notify_cb = hv_balloon_vmdev_chan_notify; 1759 1760 mdc->get_addr = hv_balloon_md_get_addr; 1761 mdc->set_addr = hv_balloon_md_set_addr; 1762 mdc->get_plugged_size = memory_device_get_region_size; 1763 mdc->get_memory_region = hv_balloon_md_get_memory_region; 1764 mdc->decide_memslots = hv_balloon_decide_memslots; 1765 mdc->get_memslots = hv_balloon_get_memslots; 1766 mdc->get_min_alignment = hv_balloon_md_get_min_alignment; 1767 mdc->fill_device_info = hv_balloon_md_fill_device_info; 1768 } 1769