xref: /qemu/hw/virtio/virtio-balloon.c (revision 1c5cfc2b7153dd72bf4b8ddc456408eb2b9b66d8)
1 /*
2  * Virtio Balloon Device
3  *
4  * Copyright IBM, Corp. 2008
5  * Copyright (C) 2011 Red Hat, Inc.
6  * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
7  *
8  * Authors:
9  *  Anthony Liguori   <aliguori@us.ibm.com>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2.  See
12  * the COPYING file in the top-level directory.
13  *
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/iov.h"
18 #include "qemu/module.h"
19 #include "qemu/timer.h"
20 #include "hw/virtio/virtio.h"
21 #include "hw/mem/pc-dimm.h"
22 #include "sysemu/balloon.h"
23 #include "hw/virtio/virtio-balloon.h"
24 #include "exec/address-spaces.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-events-misc.h"
27 #include "qapi/visitor.h"
28 #include "trace.h"
29 #include "qemu/error-report.h"
30 #include "migration/misc.h"
31 
32 #include "hw/virtio/virtio-bus.h"
33 #include "hw/virtio/virtio-access.h"
34 
35 #define BALLOON_PAGE_SIZE  (1 << VIRTIO_BALLOON_PFN_SHIFT)
36 
37 struct PartiallyBalloonedPage {
38     ram_addr_t base_gpa;
39     long subpages;
40     unsigned long *bitmap;
41 };
42 
43 static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp)
44 {
45     if (!pbp) {
46         return;
47     }
48     g_free(pbp->bitmap);
49     g_free(pbp);
50 }
51 
52 static PartiallyBalloonedPage *virtio_balloon_pbp_alloc(ram_addr_t base_gpa,
53                                                         long subpages)
54 {
55     PartiallyBalloonedPage *pbp = g_new0(PartiallyBalloonedPage, 1);
56 
57     pbp->base_gpa = base_gpa;
58     pbp->subpages = subpages;
59     pbp->bitmap = bitmap_new(subpages);
60 
61     return pbp;
62 }
63 
64 static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp,
65                                        ram_addr_t base_gpa, long subpages)
66 {
67     return pbp->subpages == subpages && pbp->base_gpa == base_gpa;
68 }
69 
70 static void balloon_inflate_page(VirtIOBalloon *balloon,
71                                  MemoryRegion *mr, hwaddr mr_offset)
72 {
73     void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
74     ram_addr_t rb_offset, rb_aligned_offset, base_gpa;
75     PartiallyBalloonedPage **pbp = &balloon->pbp;
76     RAMBlock *rb;
77     size_t rb_page_size;
78     int subpages;
79 
80     /* XXX is there a better way to get to the RAMBlock than via a
81      * host address? */
82     rb = qemu_ram_block_from_host(addr, false, &rb_offset);
83     rb_page_size = qemu_ram_pagesize(rb);
84 
85     if (rb_page_size == BALLOON_PAGE_SIZE) {
86         /* Easy case */
87 
88         ram_block_discard_range(rb, rb_offset, rb_page_size);
89         /* We ignore errors from ram_block_discard_range(), because it
90          * has already reported them, and failing to discard a balloon
91          * page is not fatal */
92         return;
93     }
94 
95     /* Hard case
96      *
97      * We've put a piece of a larger host page into the balloon - we
98      * need to keep track until we have a whole host page to
99      * discard
100      */
101     warn_report_once(
102 "Balloon used with backing page size > 4kiB, this may not be reliable");
103 
104     rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size);
105     subpages = rb_page_size / BALLOON_PAGE_SIZE;
106     base_gpa = memory_region_get_ram_addr(mr) + mr_offset -
107                (rb_offset - rb_aligned_offset);
108 
109     if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa, subpages)) {
110         /* We've partially ballooned part of a host page, but now
111          * we're trying to balloon part of a different one.  Too hard,
112          * give up on the old partial page */
113         virtio_balloon_pbp_free(*pbp);
114         *pbp = NULL;
115     }
116 
117     if (!*pbp) {
118         *pbp = virtio_balloon_pbp_alloc(base_gpa, subpages);
119     }
120 
121     set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE,
122             (*pbp)->bitmap);
123 
124     if (bitmap_full((*pbp)->bitmap, subpages)) {
125         /* We've accumulated a full host page, we can actually discard
126          * it now */
127 
128         ram_block_discard_range(rb, rb_aligned_offset, rb_page_size);
129         /* We ignore errors from ram_block_discard_range(), because it
130          * has already reported them, and failing to discard a balloon
131          * page is not fatal */
132         virtio_balloon_pbp_free(*pbp);
133         *pbp = NULL;
134     }
135 }
136 
137 static void balloon_deflate_page(VirtIOBalloon *balloon,
138                                  MemoryRegion *mr, hwaddr mr_offset)
139 {
140     void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
141     ram_addr_t rb_offset;
142     RAMBlock *rb;
143     size_t rb_page_size;
144     void *host_addr;
145     int ret;
146 
147     /* XXX is there a better way to get to the RAMBlock than via a
148      * host address? */
149     rb = qemu_ram_block_from_host(addr, false, &rb_offset);
150     rb_page_size = qemu_ram_pagesize(rb);
151 
152     if (balloon->pbp) {
153         /* Let's play safe and always reset the pbp on deflation requests. */
154         virtio_balloon_pbp_free(balloon->pbp);
155         balloon->pbp = NULL;
156     }
157 
158     host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1));
159 
160     /* When a page is deflated, we hint the whole host page it lives
161      * on, since we can't do anything smaller */
162     ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED);
163     if (ret != 0) {
164         warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s",
165                     strerror(errno));
166         /* Otherwise ignore, failing to page hint shouldn't be fatal */
167     }
168 }
169 
170 static const char *balloon_stat_names[] = {
171    [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
172    [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
173    [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
174    [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
175    [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
176    [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
177    [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory",
178    [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches",
179    [VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc",
180    [VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail",
181    [VIRTIO_BALLOON_S_NR] = NULL
182 };
183 
184 /*
185  * reset_stats - Mark all items in the stats array as unset
186  *
187  * This function needs to be called at device initialization and before
188  * updating to a set of newly-generated stats.  This will ensure that no
189  * stale values stick around in case the guest reports a subset of the supported
190  * statistics.
191  */
192 static inline void reset_stats(VirtIOBalloon *dev)
193 {
194     int i;
195     for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
196 }
197 
198 static bool balloon_stats_supported(const VirtIOBalloon *s)
199 {
200     VirtIODevice *vdev = VIRTIO_DEVICE(s);
201     return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ);
202 }
203 
204 static bool balloon_stats_enabled(const VirtIOBalloon *s)
205 {
206     return s->stats_poll_interval > 0;
207 }
208 
209 static void balloon_stats_destroy_timer(VirtIOBalloon *s)
210 {
211     if (balloon_stats_enabled(s)) {
212         timer_del(s->stats_timer);
213         timer_free(s->stats_timer);
214         s->stats_timer = NULL;
215         s->stats_poll_interval = 0;
216     }
217 }
218 
219 static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
220 {
221     timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
222 }
223 
224 static void balloon_stats_poll_cb(void *opaque)
225 {
226     VirtIOBalloon *s = opaque;
227     VirtIODevice *vdev = VIRTIO_DEVICE(s);
228 
229     if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) {
230         /* re-schedule */
231         balloon_stats_change_timer(s, s->stats_poll_interval);
232         return;
233     }
234 
235     virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
236     virtio_notify(vdev, s->svq);
237     g_free(s->stats_vq_elem);
238     s->stats_vq_elem = NULL;
239 }
240 
241 static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
242                                   void *opaque, Error **errp)
243 {
244     Error *err = NULL;
245     VirtIOBalloon *s = opaque;
246     int i;
247 
248     visit_start_struct(v, name, NULL, 0, &err);
249     if (err) {
250         goto out;
251     }
252     visit_type_int(v, "last-update", &s->stats_last_update, &err);
253     if (err) {
254         goto out_end;
255     }
256 
257     visit_start_struct(v, "stats", NULL, 0, &err);
258     if (err) {
259         goto out_end;
260     }
261     for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
262         visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err);
263         if (err) {
264             goto out_nested;
265         }
266     }
267     visit_check_struct(v, &err);
268 out_nested:
269     visit_end_struct(v, NULL);
270 
271     if (!err) {
272         visit_check_struct(v, &err);
273     }
274 out_end:
275     visit_end_struct(v, NULL);
276 out:
277     error_propagate(errp, err);
278 }
279 
280 static void balloon_stats_get_poll_interval(Object *obj, Visitor *v,
281                                             const char *name, void *opaque,
282                                             Error **errp)
283 {
284     VirtIOBalloon *s = opaque;
285     visit_type_int(v, name, &s->stats_poll_interval, errp);
286 }
287 
288 static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
289                                             const char *name, void *opaque,
290                                             Error **errp)
291 {
292     VirtIOBalloon *s = opaque;
293     Error *local_err = NULL;
294     int64_t value;
295 
296     visit_type_int(v, name, &value, &local_err);
297     if (local_err) {
298         error_propagate(errp, local_err);
299         return;
300     }
301 
302     if (value < 0) {
303         error_setg(errp, "timer value must be greater than zero");
304         return;
305     }
306 
307     if (value > UINT32_MAX) {
308         error_setg(errp, "timer value is too big");
309         return;
310     }
311 
312     if (value == s->stats_poll_interval) {
313         return;
314     }
315 
316     if (value == 0) {
317         /* timer=0 disables the timer */
318         balloon_stats_destroy_timer(s);
319         return;
320     }
321 
322     if (balloon_stats_enabled(s)) {
323         /* timer interval change */
324         s->stats_poll_interval = value;
325         balloon_stats_change_timer(s, value);
326         return;
327     }
328 
329     /* create a new timer */
330     g_assert(s->stats_timer == NULL);
331     s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
332     s->stats_poll_interval = value;
333     balloon_stats_change_timer(s, 0);
334 }
335 
336 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
337 {
338     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
339     VirtQueueElement *elem;
340     MemoryRegionSection section;
341 
342     for (;;) {
343         size_t offset = 0;
344         uint32_t pfn;
345         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
346         if (!elem) {
347             return;
348         }
349 
350         while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) {
351             unsigned int p = virtio_ldl_p(vdev, &pfn);
352             hwaddr pa;
353 
354             pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT;
355             offset += 4;
356 
357             section = memory_region_find(get_system_memory(), pa,
358                                          BALLOON_PAGE_SIZE);
359             if (!section.mr) {
360                 trace_virtio_balloon_bad_addr(pa);
361                 continue;
362             }
363             if (!memory_region_is_ram(section.mr) ||
364                 memory_region_is_rom(section.mr) ||
365                 memory_region_is_romd(section.mr)) {
366                 trace_virtio_balloon_bad_addr(pa);
367                 memory_region_unref(section.mr);
368                 continue;
369             }
370 
371             trace_virtio_balloon_handle_output(memory_region_name(section.mr),
372                                                pa);
373             if (!qemu_balloon_is_inhibited()) {
374                 if (vq == s->ivq) {
375                     balloon_inflate_page(s, section.mr,
376                                          section.offset_within_region);
377                 } else if (vq == s->dvq) {
378                     balloon_deflate_page(s, section.mr, section.offset_within_region);
379                 } else {
380                     g_assert_not_reached();
381                 }
382             }
383             memory_region_unref(section.mr);
384         }
385 
386         virtqueue_push(vq, elem, offset);
387         virtio_notify(vdev, vq);
388         g_free(elem);
389     }
390 }
391 
392 static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
393 {
394     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
395     VirtQueueElement *elem;
396     VirtIOBalloonStat stat;
397     size_t offset = 0;
398     qemu_timeval tv;
399 
400     elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
401     if (!elem) {
402         goto out;
403     }
404 
405     if (s->stats_vq_elem != NULL) {
406         /* This should never happen if the driver follows the spec. */
407         virtqueue_push(vq, s->stats_vq_elem, 0);
408         virtio_notify(vdev, vq);
409         g_free(s->stats_vq_elem);
410     }
411 
412     s->stats_vq_elem = elem;
413 
414     /* Initialize the stats to get rid of any stale values.  This is only
415      * needed to handle the case where a guest supports fewer stats than it
416      * used to (ie. it has booted into an old kernel).
417      */
418     reset_stats(s);
419 
420     while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
421            == sizeof(stat)) {
422         uint16_t tag = virtio_tswap16(vdev, stat.tag);
423         uint64_t val = virtio_tswap64(vdev, stat.val);
424 
425         offset += sizeof(stat);
426         if (tag < VIRTIO_BALLOON_S_NR)
427             s->stats[tag] = val;
428     }
429     s->stats_vq_offset = offset;
430 
431     if (qemu_gettimeofday(&tv) < 0) {
432         warn_report("%s: failed to get time of day", __func__);
433         goto out;
434     }
435 
436     s->stats_last_update = tv.tv_sec;
437 
438 out:
439     if (balloon_stats_enabled(s)) {
440         balloon_stats_change_timer(s, s->stats_poll_interval);
441     }
442 }
443 
444 static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
445                                                VirtQueue *vq)
446 {
447     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
448     qemu_bh_schedule(s->free_page_bh);
449 }
450 
451 static bool get_free_page_hints(VirtIOBalloon *dev)
452 {
453     VirtQueueElement *elem;
454     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
455     VirtQueue *vq = dev->free_page_vq;
456     bool ret = true;
457 
458     while (dev->block_iothread) {
459         qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
460     }
461 
462     elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
463     if (!elem) {
464         return false;
465     }
466 
467     if (elem->out_num) {
468         uint32_t id;
469         size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
470                                  &id, sizeof(id));
471 
472         virtio_tswap32s(vdev, &id);
473         if (unlikely(size != sizeof(id))) {
474             virtio_error(vdev, "received an incorrect cmd id");
475             ret = false;
476             goto out;
477         }
478         if (id == dev->free_page_report_cmd_id) {
479             dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
480         } else {
481             /*
482              * Stop the optimization only when it has started. This
483              * avoids a stale stop sign for the previous command.
484              */
485             if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
486                 dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
487             }
488         }
489     }
490 
491     if (elem->in_num) {
492         if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
493             qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
494                                       elem->in_sg[0].iov_len);
495         }
496     }
497 
498 out:
499     virtqueue_push(vq, elem, 1);
500     g_free(elem);
501     return ret;
502 }
503 
504 static void virtio_ballloon_get_free_page_hints(void *opaque)
505 {
506     VirtIOBalloon *dev = opaque;
507     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
508     VirtQueue *vq = dev->free_page_vq;
509     bool continue_to_get_hints;
510 
511     do {
512         qemu_mutex_lock(&dev->free_page_lock);
513         virtio_queue_set_notification(vq, 0);
514         continue_to_get_hints = get_free_page_hints(dev);
515         qemu_mutex_unlock(&dev->free_page_lock);
516         virtio_notify(vdev, vq);
517       /*
518        * Start to poll the vq once the reporting started. Otherwise, continue
519        * only when there are entries on the vq, which need to be given back.
520        */
521     } while (continue_to_get_hints ||
522              dev->free_page_report_status == FREE_PAGE_REPORT_S_START);
523     virtio_queue_set_notification(vq, 1);
524 }
525 
526 static bool virtio_balloon_free_page_support(void *opaque)
527 {
528     VirtIOBalloon *s = opaque;
529     VirtIODevice *vdev = VIRTIO_DEVICE(s);
530 
531     return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
532 }
533 
534 static void virtio_balloon_free_page_start(VirtIOBalloon *s)
535 {
536     VirtIODevice *vdev = VIRTIO_DEVICE(s);
537 
538     /* For the stop and copy phase, we don't need to start the optimization */
539     if (!vdev->vm_running) {
540         return;
541     }
542 
543     if (s->free_page_report_cmd_id == UINT_MAX) {
544         s->free_page_report_cmd_id =
545                        VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
546     } else {
547         s->free_page_report_cmd_id++;
548     }
549 
550     s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
551     virtio_notify_config(vdev);
552 }
553 
554 static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
555 {
556     VirtIODevice *vdev = VIRTIO_DEVICE(s);
557 
558     if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) {
559         /*
560          * The lock also guarantees us that the
561          * virtio_ballloon_get_free_page_hints exits after the
562          * free_page_report_status is set to S_STOP.
563          */
564         qemu_mutex_lock(&s->free_page_lock);
565         /*
566          * The guest hasn't done the reporting, so host sends a notification
567          * to the guest to actively stop the reporting.
568          */
569         s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
570         qemu_mutex_unlock(&s->free_page_lock);
571         virtio_notify_config(vdev);
572     }
573 }
574 
575 static void virtio_balloon_free_page_done(VirtIOBalloon *s)
576 {
577     VirtIODevice *vdev = VIRTIO_DEVICE(s);
578 
579     s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
580     virtio_notify_config(vdev);
581 }
582 
583 static int
584 virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data)
585 {
586     VirtIOBalloon *dev = container_of(n, VirtIOBalloon,
587                                       free_page_report_notify);
588     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
589     PrecopyNotifyData *pnd = data;
590 
591     if (!virtio_balloon_free_page_support(dev)) {
592         /*
593          * This is an optimization provided to migration, so just return 0 to
594          * have the normal migration process not affected when this feature is
595          * not supported.
596          */
597         return 0;
598     }
599 
600     switch (pnd->reason) {
601     case PRECOPY_NOTIFY_SETUP:
602         precopy_enable_free_page_optimization();
603         break;
604     case PRECOPY_NOTIFY_COMPLETE:
605     case PRECOPY_NOTIFY_CLEANUP:
606     case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
607         virtio_balloon_free_page_stop(dev);
608         break;
609     case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
610         if (vdev->vm_running) {
611             virtio_balloon_free_page_start(dev);
612         } else {
613             virtio_balloon_free_page_done(dev);
614         }
615         break;
616     default:
617         virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
618     }
619 
620     return 0;
621 }
622 
623 static size_t virtio_balloon_config_size(VirtIOBalloon *s)
624 {
625     uint64_t features = s->host_features;
626 
627     if (s->qemu_4_0_config_size) {
628         return sizeof(struct virtio_balloon_config);
629     }
630     if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
631         return sizeof(struct virtio_balloon_config);
632     }
633     if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
634         return offsetof(struct virtio_balloon_config, poison_val);
635     }
636     return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
637 }
638 
639 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
640 {
641     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
642     struct virtio_balloon_config config = {};
643 
644     config.num_pages = cpu_to_le32(dev->num_pages);
645     config.actual = cpu_to_le32(dev->actual);
646 
647     if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
648         config.free_page_report_cmd_id =
649                        cpu_to_le32(dev->free_page_report_cmd_id);
650     } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
651         config.free_page_report_cmd_id =
652                        cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
653     } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
654         config.free_page_report_cmd_id =
655                        cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
656     }
657 
658     trace_virtio_balloon_get_config(config.num_pages, config.actual);
659     memcpy(config_data, &config, virtio_balloon_config_size(dev));
660 }
661 
662 static int build_dimm_list(Object *obj, void *opaque)
663 {
664     GSList **list = opaque;
665 
666     if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
667         DeviceState *dev = DEVICE(obj);
668         if (dev->realized) { /* only realized DIMMs matter */
669             *list = g_slist_prepend(*list, dev);
670         }
671     }
672 
673     object_child_foreach(obj, build_dimm_list, opaque);
674     return 0;
675 }
676 
677 static ram_addr_t get_current_ram_size(void)
678 {
679     GSList *list = NULL, *item;
680     ram_addr_t size = ram_size;
681 
682     build_dimm_list(qdev_get_machine(), &list);
683     for (item = list; item; item = g_slist_next(item)) {
684         Object *obj = OBJECT(item->data);
685         if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
686             size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
687                                             &error_abort);
688         }
689     }
690     g_slist_free(list);
691 
692     return size;
693 }
694 
695 static void virtio_balloon_set_config(VirtIODevice *vdev,
696                                       const uint8_t *config_data)
697 {
698     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
699     struct virtio_balloon_config config;
700     uint32_t oldactual = dev->actual;
701     ram_addr_t vm_ram_size = get_current_ram_size();
702 
703     memcpy(&config, config_data, virtio_balloon_config_size(dev));
704     dev->actual = le32_to_cpu(config.actual);
705     if (dev->actual != oldactual) {
706         qapi_event_send_balloon_change(vm_ram_size -
707                         ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
708     }
709     trace_virtio_balloon_set_config(dev->actual, oldactual);
710 }
711 
712 static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
713                                             Error **errp)
714 {
715     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
716     f |= dev->host_features;
717     virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
718 
719     return f;
720 }
721 
722 static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
723 {
724     VirtIOBalloon *dev = opaque;
725     info->actual = get_current_ram_size() - ((uint64_t) dev->actual <<
726                                              VIRTIO_BALLOON_PFN_SHIFT);
727 }
728 
729 static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
730 {
731     VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
732     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
733     ram_addr_t vm_ram_size = get_current_ram_size();
734 
735     if (target > vm_ram_size) {
736         target = vm_ram_size;
737     }
738     if (target) {
739         dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
740         virtio_notify_config(vdev);
741     }
742     trace_virtio_balloon_to_target(target, dev->num_pages);
743 }
744 
745 static int virtio_balloon_post_load_device(void *opaque, int version_id)
746 {
747     VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
748 
749     if (balloon_stats_enabled(s)) {
750         balloon_stats_change_timer(s, s->stats_poll_interval);
751     }
752     return 0;
753 }
754 
755 static const VMStateDescription vmstate_virtio_balloon_free_page_report = {
756     .name = "virtio-balloon-device/free-page-report",
757     .version_id = 1,
758     .minimum_version_id = 1,
759     .needed = virtio_balloon_free_page_support,
760     .fields = (VMStateField[]) {
761         VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon),
762         VMSTATE_UINT32(free_page_report_status, VirtIOBalloon),
763         VMSTATE_END_OF_LIST()
764     }
765 };
766 
767 static const VMStateDescription vmstate_virtio_balloon_device = {
768     .name = "virtio-balloon-device",
769     .version_id = 1,
770     .minimum_version_id = 1,
771     .post_load = virtio_balloon_post_load_device,
772     .fields = (VMStateField[]) {
773         VMSTATE_UINT32(num_pages, VirtIOBalloon),
774         VMSTATE_UINT32(actual, VirtIOBalloon),
775         VMSTATE_END_OF_LIST()
776     },
777     .subsections = (const VMStateDescription * []) {
778         &vmstate_virtio_balloon_free_page_report,
779         NULL
780     }
781 };
782 
783 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
784 {
785     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
786     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
787     int ret;
788 
789     virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
790                 virtio_balloon_config_size(s));
791 
792     ret = qemu_add_balloon_handler(virtio_balloon_to_target,
793                                    virtio_balloon_stat, s);
794 
795     if (ret < 0) {
796         error_setg(errp, "Only one balloon device is supported");
797         virtio_cleanup(vdev);
798         return;
799     }
800 
801     s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
802     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
803     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
804 
805     if (virtio_has_feature(s->host_features,
806                            VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
807         s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
808                                            virtio_balloon_handle_free_page_vq);
809         s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
810         s->free_page_report_cmd_id =
811                            VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
812         s->free_page_report_notify.notify =
813                                        virtio_balloon_free_page_report_notify;
814         precopy_add_notifier(&s->free_page_report_notify);
815         if (s->iothread) {
816             object_ref(OBJECT(s->iothread));
817             s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
818                                        virtio_ballloon_get_free_page_hints, s);
819             qemu_mutex_init(&s->free_page_lock);
820             qemu_cond_init(&s->free_page_cond);
821             s->block_iothread = false;
822         } else {
823             /* Simply disable this feature if the iothread wasn't created. */
824             s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
825             virtio_error(vdev, "iothread is missing");
826         }
827     }
828     reset_stats(s);
829 }
830 
831 static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
832 {
833     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
834     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
835 
836     if (virtio_balloon_free_page_support(s)) {
837         qemu_bh_delete(s->free_page_bh);
838         virtio_balloon_free_page_stop(s);
839         precopy_remove_notifier(&s->free_page_report_notify);
840     }
841     balloon_stats_destroy_timer(s);
842     qemu_remove_balloon_handler(s);
843     virtio_cleanup(vdev);
844 }
845 
846 static void virtio_balloon_device_reset(VirtIODevice *vdev)
847 {
848     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
849 
850     if (virtio_balloon_free_page_support(s)) {
851         virtio_balloon_free_page_stop(s);
852     }
853 
854     if (s->stats_vq_elem != NULL) {
855         virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
856         g_free(s->stats_vq_elem);
857         s->stats_vq_elem = NULL;
858     }
859 }
860 
861 static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
862 {
863     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
864 
865     if (!s->stats_vq_elem && vdev->vm_running &&
866         (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) {
867         /* poll stats queue for the element we have discarded when the VM
868          * was stopped */
869         virtio_balloon_receive_stats(vdev, s->svq);
870     }
871 
872     if (virtio_balloon_free_page_support(s)) {
873         /*
874          * The VM is woken up and the iothread was blocked, so signal it to
875          * continue.
876          */
877         if (vdev->vm_running && s->block_iothread) {
878             qemu_mutex_lock(&s->free_page_lock);
879             s->block_iothread = false;
880             qemu_cond_signal(&s->free_page_cond);
881             qemu_mutex_unlock(&s->free_page_lock);
882         }
883 
884         /* The VM is stopped, block the iothread. */
885         if (!vdev->vm_running) {
886             qemu_mutex_lock(&s->free_page_lock);
887             s->block_iothread = true;
888             qemu_mutex_unlock(&s->free_page_lock);
889         }
890     }
891 }
892 
893 static void virtio_balloon_instance_init(Object *obj)
894 {
895     VirtIOBalloon *s = VIRTIO_BALLOON(obj);
896 
897     object_property_add(obj, "guest-stats", "guest statistics",
898                         balloon_stats_get_all, NULL, NULL, s, NULL);
899 
900     object_property_add(obj, "guest-stats-polling-interval", "int",
901                         balloon_stats_get_poll_interval,
902                         balloon_stats_set_poll_interval,
903                         NULL, s, NULL);
904 }
905 
906 static const VMStateDescription vmstate_virtio_balloon = {
907     .name = "virtio-balloon",
908     .minimum_version_id = 1,
909     .version_id = 1,
910     .fields = (VMStateField[]) {
911         VMSTATE_VIRTIO_DEVICE,
912         VMSTATE_END_OF_LIST()
913     },
914 };
915 
916 static Property virtio_balloon_properties[] = {
917     DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
918                     VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
919     DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
920                     VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
921     /* QEMU 4.0 accidentally changed the config size even when free-page-hint
922      * is disabled, resulting in QEMU 3.1 migration incompatibility.  This
923      * property retains this quirk for QEMU 4.1 machine types.
924      */
925     DEFINE_PROP_BOOL("qemu-4-0-config-size", VirtIOBalloon,
926                      qemu_4_0_config_size, false),
927     DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
928                      IOThread *),
929     DEFINE_PROP_END_OF_LIST(),
930 };
931 
932 static void virtio_balloon_class_init(ObjectClass *klass, void *data)
933 {
934     DeviceClass *dc = DEVICE_CLASS(klass);
935     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
936 
937     dc->props = virtio_balloon_properties;
938     dc->vmsd = &vmstate_virtio_balloon;
939     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
940     vdc->realize = virtio_balloon_device_realize;
941     vdc->unrealize = virtio_balloon_device_unrealize;
942     vdc->reset = virtio_balloon_device_reset;
943     vdc->get_config = virtio_balloon_get_config;
944     vdc->set_config = virtio_balloon_set_config;
945     vdc->get_features = virtio_balloon_get_features;
946     vdc->set_status = virtio_balloon_set_status;
947     vdc->vmsd = &vmstate_virtio_balloon_device;
948 }
949 
950 static const TypeInfo virtio_balloon_info = {
951     .name = TYPE_VIRTIO_BALLOON,
952     .parent = TYPE_VIRTIO_DEVICE,
953     .instance_size = sizeof(VirtIOBalloon),
954     .instance_init = virtio_balloon_instance_init,
955     .class_init = virtio_balloon_class_init,
956 };
957 
958 static void virtio_register_types(void)
959 {
960     type_register_static(&virtio_balloon_info);
961 }
962 
963 type_init(virtio_register_types)
964