xref: /qemu/hw/virtio/virtio-balloon.c (revision 2ffc49eea1bbd454913a88a0ad872c2649b36950)
1 /*
2  * Virtio Balloon Device
3  *
4  * Copyright IBM, Corp. 2008
5  * Copyright (C) 2011 Red Hat, Inc.
6  * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
7  *
8  * Authors:
9  *  Anthony Liguori   <aliguori@us.ibm.com>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2.  See
12  * the COPYING file in the top-level directory.
13  *
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/iov.h"
18 #include "qemu/module.h"
19 #include "qemu/timer.h"
20 #include "hw/virtio/virtio.h"
21 #include "hw/mem/pc-dimm.h"
22 #include "sysemu/balloon.h"
23 #include "hw/virtio/virtio-balloon.h"
24 #include "exec/address-spaces.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-events-misc.h"
27 #include "qapi/visitor.h"
28 #include "trace.h"
29 #include "qemu/error-report.h"
30 #include "migration/misc.h"
31 
32 #include "hw/virtio/virtio-bus.h"
33 #include "hw/virtio/virtio-access.h"
34 
35 #define BALLOON_PAGE_SIZE  (1 << VIRTIO_BALLOON_PFN_SHIFT)
36 
37 struct PartiallyBalloonedPage {
38     RAMBlock *rb;
39     ram_addr_t base;
40     unsigned long bitmap[];
41 };
42 
43 static void balloon_inflate_page(VirtIOBalloon *balloon,
44                                  MemoryRegion *mr, hwaddr offset)
45 {
46     void *addr = memory_region_get_ram_ptr(mr) + offset;
47     RAMBlock *rb;
48     size_t rb_page_size;
49     int subpages;
50     ram_addr_t ram_offset, host_page_base;
51 
52     /* XXX is there a better way to get to the RAMBlock than via a
53      * host address? */
54     rb = qemu_ram_block_from_host(addr, false, &ram_offset);
55     rb_page_size = qemu_ram_pagesize(rb);
56     host_page_base = ram_offset & ~(rb_page_size - 1);
57 
58     if (rb_page_size == BALLOON_PAGE_SIZE) {
59         /* Easy case */
60 
61         ram_block_discard_range(rb, ram_offset, rb_page_size);
62         /* We ignore errors from ram_block_discard_range(), because it
63          * has already reported them, and failing to discard a balloon
64          * page is not fatal */
65         return;
66     }
67 
68     /* Hard case
69      *
70      * We've put a piece of a larger host page into the balloon - we
71      * need to keep track until we have a whole host page to
72      * discard
73      */
74     warn_report_once(
75 "Balloon used with backing page size > 4kiB, this may not be reliable");
76 
77     subpages = rb_page_size / BALLOON_PAGE_SIZE;
78 
79     if (balloon->pbp
80         && (rb != balloon->pbp->rb
81             || host_page_base != balloon->pbp->base)) {
82         /* We've partially ballooned part of a host page, but now
83          * we're trying to balloon part of a different one.  Too hard,
84          * give up on the old partial page */
85         g_free(balloon->pbp);
86         balloon->pbp = NULL;
87     }
88 
89     if (!balloon->pbp) {
90         /* Starting on a new host page */
91         size_t bitlen = BITS_TO_LONGS(subpages) * sizeof(unsigned long);
92         balloon->pbp = g_malloc0(sizeof(PartiallyBalloonedPage) + bitlen);
93         balloon->pbp->rb = rb;
94         balloon->pbp->base = host_page_base;
95     }
96 
97     set_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE,
98             balloon->pbp->bitmap);
99 
100     if (bitmap_full(balloon->pbp->bitmap, subpages)) {
101         /* We've accumulated a full host page, we can actually discard
102          * it now */
103 
104         ram_block_discard_range(rb, balloon->pbp->base, rb_page_size);
105         /* We ignore errors from ram_block_discard_range(), because it
106          * has already reported them, and failing to discard a balloon
107          * page is not fatal */
108 
109         g_free(balloon->pbp);
110         balloon->pbp = NULL;
111     }
112 }
113 
114 static void balloon_deflate_page(VirtIOBalloon *balloon,
115                                  MemoryRegion *mr, hwaddr offset)
116 {
117     void *addr = memory_region_get_ram_ptr(mr) + offset;
118     RAMBlock *rb;
119     size_t rb_page_size;
120     ram_addr_t ram_offset;
121     void *host_addr;
122     int ret;
123 
124     /* XXX is there a better way to get to the RAMBlock than via a
125      * host address? */
126     rb = qemu_ram_block_from_host(addr, false, &ram_offset);
127     rb_page_size = qemu_ram_pagesize(rb);
128 
129     if (balloon->pbp) {
130         /* Let's play safe and always reset the pbp on deflation requests. */
131         g_free(balloon->pbp);
132         balloon->pbp = NULL;
133     }
134 
135     host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1));
136 
137     /* When a page is deflated, we hint the whole host page it lives
138      * on, since we can't do anything smaller */
139     ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED);
140     if (ret != 0) {
141         warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s",
142                     strerror(errno));
143         /* Otherwise ignore, failing to page hint shouldn't be fatal */
144     }
145 }
146 
147 static const char *balloon_stat_names[] = {
148    [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
149    [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
150    [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
151    [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
152    [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
153    [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
154    [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory",
155    [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches",
156    [VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc",
157    [VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail",
158    [VIRTIO_BALLOON_S_NR] = NULL
159 };
160 
161 /*
162  * reset_stats - Mark all items in the stats array as unset
163  *
164  * This function needs to be called at device initialization and before
165  * updating to a set of newly-generated stats.  This will ensure that no
166  * stale values stick around in case the guest reports a subset of the supported
167  * statistics.
168  */
169 static inline void reset_stats(VirtIOBalloon *dev)
170 {
171     int i;
172     for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
173 }
174 
175 static bool balloon_stats_supported(const VirtIOBalloon *s)
176 {
177     VirtIODevice *vdev = VIRTIO_DEVICE(s);
178     return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ);
179 }
180 
181 static bool balloon_stats_enabled(const VirtIOBalloon *s)
182 {
183     return s->stats_poll_interval > 0;
184 }
185 
186 static void balloon_stats_destroy_timer(VirtIOBalloon *s)
187 {
188     if (balloon_stats_enabled(s)) {
189         timer_del(s->stats_timer);
190         timer_free(s->stats_timer);
191         s->stats_timer = NULL;
192         s->stats_poll_interval = 0;
193     }
194 }
195 
196 static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
197 {
198     timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
199 }
200 
201 static void balloon_stats_poll_cb(void *opaque)
202 {
203     VirtIOBalloon *s = opaque;
204     VirtIODevice *vdev = VIRTIO_DEVICE(s);
205 
206     if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) {
207         /* re-schedule */
208         balloon_stats_change_timer(s, s->stats_poll_interval);
209         return;
210     }
211 
212     virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
213     virtio_notify(vdev, s->svq);
214     g_free(s->stats_vq_elem);
215     s->stats_vq_elem = NULL;
216 }
217 
218 static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
219                                   void *opaque, Error **errp)
220 {
221     Error *err = NULL;
222     VirtIOBalloon *s = opaque;
223     int i;
224 
225     visit_start_struct(v, name, NULL, 0, &err);
226     if (err) {
227         goto out;
228     }
229     visit_type_int(v, "last-update", &s->stats_last_update, &err);
230     if (err) {
231         goto out_end;
232     }
233 
234     visit_start_struct(v, "stats", NULL, 0, &err);
235     if (err) {
236         goto out_end;
237     }
238     for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
239         visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err);
240         if (err) {
241             goto out_nested;
242         }
243     }
244     visit_check_struct(v, &err);
245 out_nested:
246     visit_end_struct(v, NULL);
247 
248     if (!err) {
249         visit_check_struct(v, &err);
250     }
251 out_end:
252     visit_end_struct(v, NULL);
253 out:
254     error_propagate(errp, err);
255 }
256 
257 static void balloon_stats_get_poll_interval(Object *obj, Visitor *v,
258                                             const char *name, void *opaque,
259                                             Error **errp)
260 {
261     VirtIOBalloon *s = opaque;
262     visit_type_int(v, name, &s->stats_poll_interval, errp);
263 }
264 
265 static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
266                                             const char *name, void *opaque,
267                                             Error **errp)
268 {
269     VirtIOBalloon *s = opaque;
270     Error *local_err = NULL;
271     int64_t value;
272 
273     visit_type_int(v, name, &value, &local_err);
274     if (local_err) {
275         error_propagate(errp, local_err);
276         return;
277     }
278 
279     if (value < 0) {
280         error_setg(errp, "timer value must be greater than zero");
281         return;
282     }
283 
284     if (value > UINT32_MAX) {
285         error_setg(errp, "timer value is too big");
286         return;
287     }
288 
289     if (value == s->stats_poll_interval) {
290         return;
291     }
292 
293     if (value == 0) {
294         /* timer=0 disables the timer */
295         balloon_stats_destroy_timer(s);
296         return;
297     }
298 
299     if (balloon_stats_enabled(s)) {
300         /* timer interval change */
301         s->stats_poll_interval = value;
302         balloon_stats_change_timer(s, value);
303         return;
304     }
305 
306     /* create a new timer */
307     g_assert(s->stats_timer == NULL);
308     s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
309     s->stats_poll_interval = value;
310     balloon_stats_change_timer(s, 0);
311 }
312 
313 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
314 {
315     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
316     VirtQueueElement *elem;
317     MemoryRegionSection section;
318 
319     for (;;) {
320         size_t offset = 0;
321         uint32_t pfn;
322         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
323         if (!elem) {
324             return;
325         }
326 
327         while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) {
328             unsigned int p = virtio_ldl_p(vdev, &pfn);
329             hwaddr pa;
330 
331             pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT;
332             offset += 4;
333 
334             section = memory_region_find(get_system_memory(), pa,
335                                          BALLOON_PAGE_SIZE);
336             if (!section.mr) {
337                 trace_virtio_balloon_bad_addr(pa);
338                 continue;
339             }
340             if (!memory_region_is_ram(section.mr) ||
341                 memory_region_is_rom(section.mr) ||
342                 memory_region_is_romd(section.mr)) {
343                 trace_virtio_balloon_bad_addr(pa);
344                 memory_region_unref(section.mr);
345                 continue;
346             }
347 
348             trace_virtio_balloon_handle_output(memory_region_name(section.mr),
349                                                pa);
350             if (!qemu_balloon_is_inhibited()) {
351                 if (vq == s->ivq) {
352                     balloon_inflate_page(s, section.mr,
353                                          section.offset_within_region);
354                 } else if (vq == s->dvq) {
355                     balloon_deflate_page(s, section.mr, section.offset_within_region);
356                 } else {
357                     g_assert_not_reached();
358                 }
359             }
360             memory_region_unref(section.mr);
361         }
362 
363         virtqueue_push(vq, elem, offset);
364         virtio_notify(vdev, vq);
365         g_free(elem);
366     }
367 }
368 
369 static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
370 {
371     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
372     VirtQueueElement *elem;
373     VirtIOBalloonStat stat;
374     size_t offset = 0;
375     qemu_timeval tv;
376 
377     elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
378     if (!elem) {
379         goto out;
380     }
381 
382     if (s->stats_vq_elem != NULL) {
383         /* This should never happen if the driver follows the spec. */
384         virtqueue_push(vq, s->stats_vq_elem, 0);
385         virtio_notify(vdev, vq);
386         g_free(s->stats_vq_elem);
387     }
388 
389     s->stats_vq_elem = elem;
390 
391     /* Initialize the stats to get rid of any stale values.  This is only
392      * needed to handle the case where a guest supports fewer stats than it
393      * used to (ie. it has booted into an old kernel).
394      */
395     reset_stats(s);
396 
397     while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
398            == sizeof(stat)) {
399         uint16_t tag = virtio_tswap16(vdev, stat.tag);
400         uint64_t val = virtio_tswap64(vdev, stat.val);
401 
402         offset += sizeof(stat);
403         if (tag < VIRTIO_BALLOON_S_NR)
404             s->stats[tag] = val;
405     }
406     s->stats_vq_offset = offset;
407 
408     if (qemu_gettimeofday(&tv) < 0) {
409         warn_report("%s: failed to get time of day", __func__);
410         goto out;
411     }
412 
413     s->stats_last_update = tv.tv_sec;
414 
415 out:
416     if (balloon_stats_enabled(s)) {
417         balloon_stats_change_timer(s, s->stats_poll_interval);
418     }
419 }
420 
421 static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
422                                                VirtQueue *vq)
423 {
424     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
425     qemu_bh_schedule(s->free_page_bh);
426 }
427 
428 static bool get_free_page_hints(VirtIOBalloon *dev)
429 {
430     VirtQueueElement *elem;
431     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
432     VirtQueue *vq = dev->free_page_vq;
433     bool ret = true;
434 
435     while (dev->block_iothread) {
436         qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
437     }
438 
439     elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
440     if (!elem) {
441         return false;
442     }
443 
444     if (elem->out_num) {
445         uint32_t id;
446         size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
447                                  &id, sizeof(id));
448 
449         virtio_tswap32s(vdev, &id);
450         if (unlikely(size != sizeof(id))) {
451             virtio_error(vdev, "received an incorrect cmd id");
452             ret = false;
453             goto out;
454         }
455         if (id == dev->free_page_report_cmd_id) {
456             dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
457         } else {
458             /*
459              * Stop the optimization only when it has started. This
460              * avoids a stale stop sign for the previous command.
461              */
462             if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
463                 dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
464             }
465         }
466     }
467 
468     if (elem->in_num) {
469         if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
470             qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
471                                       elem->in_sg[0].iov_len);
472         }
473     }
474 
475 out:
476     virtqueue_push(vq, elem, 1);
477     g_free(elem);
478     return ret;
479 }
480 
481 static void virtio_ballloon_get_free_page_hints(void *opaque)
482 {
483     VirtIOBalloon *dev = opaque;
484     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
485     VirtQueue *vq = dev->free_page_vq;
486     bool continue_to_get_hints;
487 
488     do {
489         qemu_mutex_lock(&dev->free_page_lock);
490         virtio_queue_set_notification(vq, 0);
491         continue_to_get_hints = get_free_page_hints(dev);
492         qemu_mutex_unlock(&dev->free_page_lock);
493         virtio_notify(vdev, vq);
494       /*
495        * Start to poll the vq once the reporting started. Otherwise, continue
496        * only when there are entries on the vq, which need to be given back.
497        */
498     } while (continue_to_get_hints ||
499              dev->free_page_report_status == FREE_PAGE_REPORT_S_START);
500     virtio_queue_set_notification(vq, 1);
501 }
502 
503 static bool virtio_balloon_free_page_support(void *opaque)
504 {
505     VirtIOBalloon *s = opaque;
506     VirtIODevice *vdev = VIRTIO_DEVICE(s);
507 
508     return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
509 }
510 
511 static void virtio_balloon_free_page_start(VirtIOBalloon *s)
512 {
513     VirtIODevice *vdev = VIRTIO_DEVICE(s);
514 
515     /* For the stop and copy phase, we don't need to start the optimization */
516     if (!vdev->vm_running) {
517         return;
518     }
519 
520     if (s->free_page_report_cmd_id == UINT_MAX) {
521         s->free_page_report_cmd_id =
522                        VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
523     } else {
524         s->free_page_report_cmd_id++;
525     }
526 
527     s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
528     virtio_notify_config(vdev);
529 }
530 
531 static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
532 {
533     VirtIODevice *vdev = VIRTIO_DEVICE(s);
534 
535     if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) {
536         /*
537          * The lock also guarantees us that the
538          * virtio_ballloon_get_free_page_hints exits after the
539          * free_page_report_status is set to S_STOP.
540          */
541         qemu_mutex_lock(&s->free_page_lock);
542         /*
543          * The guest hasn't done the reporting, so host sends a notification
544          * to the guest to actively stop the reporting.
545          */
546         s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
547         qemu_mutex_unlock(&s->free_page_lock);
548         virtio_notify_config(vdev);
549     }
550 }
551 
552 static void virtio_balloon_free_page_done(VirtIOBalloon *s)
553 {
554     VirtIODevice *vdev = VIRTIO_DEVICE(s);
555 
556     s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
557     virtio_notify_config(vdev);
558 }
559 
560 static int
561 virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data)
562 {
563     VirtIOBalloon *dev = container_of(n, VirtIOBalloon,
564                                       free_page_report_notify);
565     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
566     PrecopyNotifyData *pnd = data;
567 
568     if (!virtio_balloon_free_page_support(dev)) {
569         /*
570          * This is an optimization provided to migration, so just return 0 to
571          * have the normal migration process not affected when this feature is
572          * not supported.
573          */
574         return 0;
575     }
576 
577     switch (pnd->reason) {
578     case PRECOPY_NOTIFY_SETUP:
579         precopy_enable_free_page_optimization();
580         break;
581     case PRECOPY_NOTIFY_COMPLETE:
582     case PRECOPY_NOTIFY_CLEANUP:
583     case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
584         virtio_balloon_free_page_stop(dev);
585         break;
586     case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
587         if (vdev->vm_running) {
588             virtio_balloon_free_page_start(dev);
589         } else {
590             virtio_balloon_free_page_done(dev);
591         }
592         break;
593     default:
594         virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
595     }
596 
597     return 0;
598 }
599 
600 static size_t virtio_balloon_config_size(VirtIOBalloon *s)
601 {
602     uint64_t features = s->host_features;
603 
604     if (s->qemu_4_0_config_size) {
605         return sizeof(struct virtio_balloon_config);
606     }
607     if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
608         return sizeof(struct virtio_balloon_config);
609     }
610     if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
611         return offsetof(struct virtio_balloon_config, poison_val);
612     }
613     return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
614 }
615 
616 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
617 {
618     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
619     struct virtio_balloon_config config = {};
620 
621     config.num_pages = cpu_to_le32(dev->num_pages);
622     config.actual = cpu_to_le32(dev->actual);
623 
624     if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
625         config.free_page_report_cmd_id =
626                        cpu_to_le32(dev->free_page_report_cmd_id);
627     } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
628         config.free_page_report_cmd_id =
629                        cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
630     } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
631         config.free_page_report_cmd_id =
632                        cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
633     }
634 
635     trace_virtio_balloon_get_config(config.num_pages, config.actual);
636     memcpy(config_data, &config, virtio_balloon_config_size(dev));
637 }
638 
639 static int build_dimm_list(Object *obj, void *opaque)
640 {
641     GSList **list = opaque;
642 
643     if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
644         DeviceState *dev = DEVICE(obj);
645         if (dev->realized) { /* only realized DIMMs matter */
646             *list = g_slist_prepend(*list, dev);
647         }
648     }
649 
650     object_child_foreach(obj, build_dimm_list, opaque);
651     return 0;
652 }
653 
654 static ram_addr_t get_current_ram_size(void)
655 {
656     GSList *list = NULL, *item;
657     ram_addr_t size = ram_size;
658 
659     build_dimm_list(qdev_get_machine(), &list);
660     for (item = list; item; item = g_slist_next(item)) {
661         Object *obj = OBJECT(item->data);
662         if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
663             size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
664                                             &error_abort);
665         }
666     }
667     g_slist_free(list);
668 
669     return size;
670 }
671 
672 static void virtio_balloon_set_config(VirtIODevice *vdev,
673                                       const uint8_t *config_data)
674 {
675     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
676     struct virtio_balloon_config config;
677     uint32_t oldactual = dev->actual;
678     ram_addr_t vm_ram_size = get_current_ram_size();
679 
680     memcpy(&config, config_data, virtio_balloon_config_size(dev));
681     dev->actual = le32_to_cpu(config.actual);
682     if (dev->actual != oldactual) {
683         qapi_event_send_balloon_change(vm_ram_size -
684                         ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
685     }
686     trace_virtio_balloon_set_config(dev->actual, oldactual);
687 }
688 
689 static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
690                                             Error **errp)
691 {
692     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
693     f |= dev->host_features;
694     virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
695 
696     return f;
697 }
698 
699 static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
700 {
701     VirtIOBalloon *dev = opaque;
702     info->actual = get_current_ram_size() - ((uint64_t) dev->actual <<
703                                              VIRTIO_BALLOON_PFN_SHIFT);
704 }
705 
706 static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
707 {
708     VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
709     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
710     ram_addr_t vm_ram_size = get_current_ram_size();
711 
712     if (target > vm_ram_size) {
713         target = vm_ram_size;
714     }
715     if (target) {
716         dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
717         virtio_notify_config(vdev);
718     }
719     trace_virtio_balloon_to_target(target, dev->num_pages);
720 }
721 
722 static int virtio_balloon_post_load_device(void *opaque, int version_id)
723 {
724     VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
725 
726     if (balloon_stats_enabled(s)) {
727         balloon_stats_change_timer(s, s->stats_poll_interval);
728     }
729     return 0;
730 }
731 
732 static const VMStateDescription vmstate_virtio_balloon_free_page_report = {
733     .name = "virtio-balloon-device/free-page-report",
734     .version_id = 1,
735     .minimum_version_id = 1,
736     .needed = virtio_balloon_free_page_support,
737     .fields = (VMStateField[]) {
738         VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon),
739         VMSTATE_UINT32(free_page_report_status, VirtIOBalloon),
740         VMSTATE_END_OF_LIST()
741     }
742 };
743 
744 static const VMStateDescription vmstate_virtio_balloon_device = {
745     .name = "virtio-balloon-device",
746     .version_id = 1,
747     .minimum_version_id = 1,
748     .post_load = virtio_balloon_post_load_device,
749     .fields = (VMStateField[]) {
750         VMSTATE_UINT32(num_pages, VirtIOBalloon),
751         VMSTATE_UINT32(actual, VirtIOBalloon),
752         VMSTATE_END_OF_LIST()
753     },
754     .subsections = (const VMStateDescription * []) {
755         &vmstate_virtio_balloon_free_page_report,
756         NULL
757     }
758 };
759 
760 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
761 {
762     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
763     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
764     int ret;
765 
766     virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
767                 virtio_balloon_config_size(s));
768 
769     ret = qemu_add_balloon_handler(virtio_balloon_to_target,
770                                    virtio_balloon_stat, s);
771 
772     if (ret < 0) {
773         error_setg(errp, "Only one balloon device is supported");
774         virtio_cleanup(vdev);
775         return;
776     }
777 
778     s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
779     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
780     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
781 
782     if (virtio_has_feature(s->host_features,
783                            VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
784         s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
785                                            virtio_balloon_handle_free_page_vq);
786         s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
787         s->free_page_report_cmd_id =
788                            VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
789         s->free_page_report_notify.notify =
790                                        virtio_balloon_free_page_report_notify;
791         precopy_add_notifier(&s->free_page_report_notify);
792         if (s->iothread) {
793             object_ref(OBJECT(s->iothread));
794             s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
795                                        virtio_ballloon_get_free_page_hints, s);
796             qemu_mutex_init(&s->free_page_lock);
797             qemu_cond_init(&s->free_page_cond);
798             s->block_iothread = false;
799         } else {
800             /* Simply disable this feature if the iothread wasn't created. */
801             s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
802             virtio_error(vdev, "iothread is missing");
803         }
804     }
805     reset_stats(s);
806 }
807 
808 static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
809 {
810     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
811     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
812 
813     if (virtio_balloon_free_page_support(s)) {
814         qemu_bh_delete(s->free_page_bh);
815         virtio_balloon_free_page_stop(s);
816         precopy_remove_notifier(&s->free_page_report_notify);
817     }
818     balloon_stats_destroy_timer(s);
819     qemu_remove_balloon_handler(s);
820     virtio_cleanup(vdev);
821 }
822 
823 static void virtio_balloon_device_reset(VirtIODevice *vdev)
824 {
825     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
826 
827     if (virtio_balloon_free_page_support(s)) {
828         virtio_balloon_free_page_stop(s);
829     }
830 
831     if (s->stats_vq_elem != NULL) {
832         virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
833         g_free(s->stats_vq_elem);
834         s->stats_vq_elem = NULL;
835     }
836 }
837 
838 static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
839 {
840     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
841 
842     if (!s->stats_vq_elem && vdev->vm_running &&
843         (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) {
844         /* poll stats queue for the element we have discarded when the VM
845          * was stopped */
846         virtio_balloon_receive_stats(vdev, s->svq);
847     }
848 
849     if (virtio_balloon_free_page_support(s)) {
850         /*
851          * The VM is woken up and the iothread was blocked, so signal it to
852          * continue.
853          */
854         if (vdev->vm_running && s->block_iothread) {
855             qemu_mutex_lock(&s->free_page_lock);
856             s->block_iothread = false;
857             qemu_cond_signal(&s->free_page_cond);
858             qemu_mutex_unlock(&s->free_page_lock);
859         }
860 
861         /* The VM is stopped, block the iothread. */
862         if (!vdev->vm_running) {
863             qemu_mutex_lock(&s->free_page_lock);
864             s->block_iothread = true;
865             qemu_mutex_unlock(&s->free_page_lock);
866         }
867     }
868 }
869 
870 static void virtio_balloon_instance_init(Object *obj)
871 {
872     VirtIOBalloon *s = VIRTIO_BALLOON(obj);
873 
874     object_property_add(obj, "guest-stats", "guest statistics",
875                         balloon_stats_get_all, NULL, NULL, s, NULL);
876 
877     object_property_add(obj, "guest-stats-polling-interval", "int",
878                         balloon_stats_get_poll_interval,
879                         balloon_stats_set_poll_interval,
880                         NULL, s, NULL);
881 }
882 
883 static const VMStateDescription vmstate_virtio_balloon = {
884     .name = "virtio-balloon",
885     .minimum_version_id = 1,
886     .version_id = 1,
887     .fields = (VMStateField[]) {
888         VMSTATE_VIRTIO_DEVICE,
889         VMSTATE_END_OF_LIST()
890     },
891 };
892 
893 static Property virtio_balloon_properties[] = {
894     DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
895                     VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
896     DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
897                     VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
898     /* QEMU 4.0 accidentally changed the config size even when free-page-hint
899      * is disabled, resulting in QEMU 3.1 migration incompatibility.  This
900      * property retains this quirk for QEMU 4.1 machine types.
901      */
902     DEFINE_PROP_BOOL("qemu-4-0-config-size", VirtIOBalloon,
903                      qemu_4_0_config_size, false),
904     DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
905                      IOThread *),
906     DEFINE_PROP_END_OF_LIST(),
907 };
908 
909 static void virtio_balloon_class_init(ObjectClass *klass, void *data)
910 {
911     DeviceClass *dc = DEVICE_CLASS(klass);
912     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
913 
914     dc->props = virtio_balloon_properties;
915     dc->vmsd = &vmstate_virtio_balloon;
916     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
917     vdc->realize = virtio_balloon_device_realize;
918     vdc->unrealize = virtio_balloon_device_unrealize;
919     vdc->reset = virtio_balloon_device_reset;
920     vdc->get_config = virtio_balloon_get_config;
921     vdc->set_config = virtio_balloon_set_config;
922     vdc->get_features = virtio_balloon_get_features;
923     vdc->set_status = virtio_balloon_set_status;
924     vdc->vmsd = &vmstate_virtio_balloon_device;
925 }
926 
927 static const TypeInfo virtio_balloon_info = {
928     .name = TYPE_VIRTIO_BALLOON,
929     .parent = TYPE_VIRTIO_DEVICE,
930     .instance_size = sizeof(VirtIOBalloon),
931     .instance_init = virtio_balloon_instance_init,
932     .class_init = virtio_balloon_class_init,
933 };
934 
935 static void virtio_register_types(void)
936 {
937     type_register_static(&virtio_balloon_info);
938 }
939 
940 type_init(virtio_register_types)
941