xref: /qemu/hw/virtio/virtio.c (revision a7290a79fa262124916dab2bb75188cfd07faad6)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "exec/address-spaces.h"
19 #include "qemu/error-report.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 
31 /*
32  * The alignment to use between consumer and producer parts of vring.
33  * x86 pagesize again. This is the default, used by transports like PCI
34  * which don't provide a means for the guest to tell the host the alignment.
35  */
36 #define VIRTIO_PCI_VRING_ALIGN         4096
37 
38 typedef struct VRingDesc
39 {
40     uint64_t addr;
41     uint32_t len;
42     uint16_t flags;
43     uint16_t next;
44 } VRingDesc;
45 
46 typedef struct VRingPackedDesc {
47     uint64_t addr;
48     uint32_t len;
49     uint16_t id;
50     uint16_t flags;
51 } VRingPackedDesc;
52 
53 typedef struct VRingAvail
54 {
55     uint16_t flags;
56     uint16_t idx;
57     uint16_t ring[0];
58 } VRingAvail;
59 
60 typedef struct VRingUsedElem
61 {
62     uint32_t id;
63     uint32_t len;
64 } VRingUsedElem;
65 
66 typedef struct VRingUsed
67 {
68     uint16_t flags;
69     uint16_t idx;
70     VRingUsedElem ring[0];
71 } VRingUsed;
72 
73 typedef struct VRingMemoryRegionCaches {
74     struct rcu_head rcu;
75     MemoryRegionCache desc;
76     MemoryRegionCache avail;
77     MemoryRegionCache used;
78 } VRingMemoryRegionCaches;
79 
80 typedef struct VRing
81 {
82     unsigned int num;
83     unsigned int num_default;
84     unsigned int align;
85     hwaddr desc;
86     hwaddr avail;
87     hwaddr used;
88     VRingMemoryRegionCaches *caches;
89 } VRing;
90 
91 typedef struct VRingPackedDescEvent {
92     uint16_t off_wrap;
93     uint16_t flags;
94 } VRingPackedDescEvent ;
95 
96 struct VirtQueue
97 {
98     VRing vring;
99     VirtQueueElement *used_elems;
100 
101     /* Next head to pop */
102     uint16_t last_avail_idx;
103     bool last_avail_wrap_counter;
104 
105     /* Last avail_idx read from VQ. */
106     uint16_t shadow_avail_idx;
107     bool shadow_avail_wrap_counter;
108 
109     uint16_t used_idx;
110     bool used_wrap_counter;
111 
112     /* Last used index value we have signalled on */
113     uint16_t signalled_used;
114 
115     /* Last used index value we have signalled on */
116     bool signalled_used_valid;
117 
118     /* Notification enabled? */
119     bool notification;
120 
121     uint16_t queue_index;
122 
123     unsigned int inuse;
124 
125     uint16_t vector;
126     VirtIOHandleOutput handle_output;
127     VirtIOHandleAIOOutput handle_aio_output;
128     VirtIODevice *vdev;
129     EventNotifier guest_notifier;
130     EventNotifier host_notifier;
131     bool host_notifier_enabled;
132     QLIST_ENTRY(VirtQueue) node;
133 };
134 
135 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
136 {
137     if (!caches) {
138         return;
139     }
140 
141     address_space_cache_destroy(&caches->desc);
142     address_space_cache_destroy(&caches->avail);
143     address_space_cache_destroy(&caches->used);
144     g_free(caches);
145 }
146 
147 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
148 {
149     VRingMemoryRegionCaches *caches;
150 
151     caches = atomic_read(&vq->vring.caches);
152     atomic_rcu_set(&vq->vring.caches, NULL);
153     if (caches) {
154         call_rcu(caches, virtio_free_region_cache, rcu);
155     }
156 }
157 
158 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
159 {
160     VirtQueue *vq = &vdev->vq[n];
161     VRingMemoryRegionCaches *old = vq->vring.caches;
162     VRingMemoryRegionCaches *new = NULL;
163     hwaddr addr, size;
164     int64_t len;
165     bool packed;
166 
167 
168     addr = vq->vring.desc;
169     if (!addr) {
170         goto out_no_cache;
171     }
172     new = g_new0(VRingMemoryRegionCaches, 1);
173     size = virtio_queue_get_desc_size(vdev, n);
174     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
175                                    true : false;
176     len = address_space_cache_init(&new->desc, vdev->dma_as,
177                                    addr, size, packed);
178     if (len < size) {
179         virtio_error(vdev, "Cannot map desc");
180         goto err_desc;
181     }
182 
183     size = virtio_queue_get_used_size(vdev, n);
184     len = address_space_cache_init(&new->used, vdev->dma_as,
185                                    vq->vring.used, size, true);
186     if (len < size) {
187         virtio_error(vdev, "Cannot map used");
188         goto err_used;
189     }
190 
191     size = virtio_queue_get_avail_size(vdev, n);
192     len = address_space_cache_init(&new->avail, vdev->dma_as,
193                                    vq->vring.avail, size, false);
194     if (len < size) {
195         virtio_error(vdev, "Cannot map avail");
196         goto err_avail;
197     }
198 
199     atomic_rcu_set(&vq->vring.caches, new);
200     if (old) {
201         call_rcu(old, virtio_free_region_cache, rcu);
202     }
203     return;
204 
205 err_avail:
206     address_space_cache_destroy(&new->avail);
207 err_used:
208     address_space_cache_destroy(&new->used);
209 err_desc:
210     address_space_cache_destroy(&new->desc);
211 out_no_cache:
212     g_free(new);
213     virtio_virtqueue_reset_region_cache(vq);
214 }
215 
216 /* virt queue functions */
217 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
218 {
219     VRing *vring = &vdev->vq[n].vring;
220 
221     if (!vring->num || !vring->desc || !vring->align) {
222         /* not yet setup -> nothing to do */
223         return;
224     }
225     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
226     vring->used = vring_align(vring->avail +
227                               offsetof(VRingAvail, ring[vring->num]),
228                               vring->align);
229     virtio_init_region_cache(vdev, n);
230 }
231 
232 /* Called within rcu_read_lock().  */
233 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
234                                   MemoryRegionCache *cache, int i)
235 {
236     address_space_read_cached(cache, i * sizeof(VRingDesc),
237                               desc, sizeof(VRingDesc));
238     virtio_tswap64s(vdev, &desc->addr);
239     virtio_tswap32s(vdev, &desc->len);
240     virtio_tswap16s(vdev, &desc->flags);
241     virtio_tswap16s(vdev, &desc->next);
242 }
243 
244 static void vring_packed_event_read(VirtIODevice *vdev,
245                                     MemoryRegionCache *cache,
246                                     VRingPackedDescEvent *e)
247 {
248     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
249     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
250 
251     address_space_read_cached(cache, off_flags, &e->flags,
252                               sizeof(e->flags));
253     /* Make sure flags is seen before off_wrap */
254     smp_rmb();
255     address_space_read_cached(cache, off_off, &e->off_wrap,
256                               sizeof(e->off_wrap));
257     virtio_tswap16s(vdev, &e->off_wrap);
258     virtio_tswap16s(vdev, &e->flags);
259 }
260 
261 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
262                                         MemoryRegionCache *cache,
263                                         uint16_t off_wrap)
264 {
265     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
266 
267     virtio_tswap16s(vdev, &off_wrap);
268     address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
269     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
270 }
271 
272 static void vring_packed_flags_write(VirtIODevice *vdev,
273                                      MemoryRegionCache *cache, uint16_t flags)
274 {
275     hwaddr off = offsetof(VRingPackedDescEvent, flags);
276 
277     virtio_tswap16s(vdev, &flags);
278     address_space_write_cached(cache, off, &flags, sizeof(flags));
279     address_space_cache_invalidate(cache, off, sizeof(flags));
280 }
281 
282 /* Called within rcu_read_lock().  */
283 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
284 {
285     return atomic_rcu_read(&vq->vring.caches);
286 }
287 
288 /* Called within rcu_read_lock().  */
289 static inline uint16_t vring_avail_flags(VirtQueue *vq)
290 {
291     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
292     hwaddr pa = offsetof(VRingAvail, flags);
293 
294     if (!caches) {
295         return 0;
296     }
297 
298     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
299 }
300 
301 /* Called within rcu_read_lock().  */
302 static inline uint16_t vring_avail_idx(VirtQueue *vq)
303 {
304     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
305     hwaddr pa = offsetof(VRingAvail, idx);
306 
307     if (!caches) {
308         return 0;
309     }
310 
311     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
312     return vq->shadow_avail_idx;
313 }
314 
315 /* Called within rcu_read_lock().  */
316 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
317 {
318     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
319     hwaddr pa = offsetof(VRingAvail, ring[i]);
320 
321     if (!caches) {
322         return 0;
323     }
324 
325     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
326 }
327 
328 /* Called within rcu_read_lock().  */
329 static inline uint16_t vring_get_used_event(VirtQueue *vq)
330 {
331     return vring_avail_ring(vq, vq->vring.num);
332 }
333 
334 /* Called within rcu_read_lock().  */
335 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
336                                     int i)
337 {
338     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
339     hwaddr pa = offsetof(VRingUsed, ring[i]);
340 
341     if (!caches) {
342         return;
343     }
344 
345     virtio_tswap32s(vq->vdev, &uelem->id);
346     virtio_tswap32s(vq->vdev, &uelem->len);
347     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
348     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
349 }
350 
351 /* Called within rcu_read_lock().  */
352 static uint16_t vring_used_idx(VirtQueue *vq)
353 {
354     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
355     hwaddr pa = offsetof(VRingUsed, idx);
356 
357     if (!caches) {
358         return 0;
359     }
360 
361     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
362 }
363 
364 /* Called within rcu_read_lock().  */
365 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
366 {
367     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
368     hwaddr pa = offsetof(VRingUsed, idx);
369 
370     if (caches) {
371         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
372         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
373     }
374 
375     vq->used_idx = val;
376 }
377 
378 /* Called within rcu_read_lock().  */
379 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
380 {
381     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
382     VirtIODevice *vdev = vq->vdev;
383     hwaddr pa = offsetof(VRingUsed, flags);
384     uint16_t flags;
385 
386     if (!caches) {
387         return;
388     }
389 
390     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
391     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
392     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
393 }
394 
395 /* Called within rcu_read_lock().  */
396 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
397 {
398     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
399     VirtIODevice *vdev = vq->vdev;
400     hwaddr pa = offsetof(VRingUsed, flags);
401     uint16_t flags;
402 
403     if (!caches) {
404         return;
405     }
406 
407     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
408     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
409     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
410 }
411 
412 /* Called within rcu_read_lock().  */
413 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
414 {
415     VRingMemoryRegionCaches *caches;
416     hwaddr pa;
417     if (!vq->notification) {
418         return;
419     }
420 
421     caches = vring_get_region_caches(vq);
422     if (!caches) {
423         return;
424     }
425 
426     pa = offsetof(VRingUsed, ring[vq->vring.num]);
427     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
428     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
429 }
430 
431 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
432 {
433     RCU_READ_LOCK_GUARD();
434 
435     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
436         vring_set_avail_event(vq, vring_avail_idx(vq));
437     } else if (enable) {
438         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
439     } else {
440         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
441     }
442     if (enable) {
443         /* Expose avail event/used flags before caller checks the avail idx. */
444         smp_mb();
445     }
446 }
447 
448 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
449 {
450     uint16_t off_wrap;
451     VRingPackedDescEvent e;
452     VRingMemoryRegionCaches *caches;
453 
454     RCU_READ_LOCK_GUARD();
455     caches = vring_get_region_caches(vq);
456     if (!caches) {
457         return;
458     }
459 
460     vring_packed_event_read(vq->vdev, &caches->used, &e);
461 
462     if (!enable) {
463         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
464     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
465         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
466         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
467         /* Make sure off_wrap is wrote before flags */
468         smp_wmb();
469         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
470     } else {
471         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
472     }
473 
474     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
475     if (enable) {
476         /* Expose avail event/used flags before caller checks the avail idx. */
477         smp_mb();
478     }
479 }
480 
481 bool virtio_queue_get_notification(VirtQueue *vq)
482 {
483     return vq->notification;
484 }
485 
486 void virtio_queue_set_notification(VirtQueue *vq, int enable)
487 {
488     vq->notification = enable;
489 
490     if (!vq->vring.desc) {
491         return;
492     }
493 
494     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
495         virtio_queue_packed_set_notification(vq, enable);
496     } else {
497         virtio_queue_split_set_notification(vq, enable);
498     }
499 }
500 
501 int virtio_queue_ready(VirtQueue *vq)
502 {
503     return vq->vring.avail != 0;
504 }
505 
506 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
507                                          uint16_t *flags,
508                                          MemoryRegionCache *cache,
509                                          int i)
510 {
511     address_space_read_cached(cache,
512                               i * sizeof(VRingPackedDesc) +
513                               offsetof(VRingPackedDesc, flags),
514                               flags, sizeof(*flags));
515     virtio_tswap16s(vdev, flags);
516 }
517 
518 static void vring_packed_desc_read(VirtIODevice *vdev,
519                                    VRingPackedDesc *desc,
520                                    MemoryRegionCache *cache,
521                                    int i, bool strict_order)
522 {
523     hwaddr off = i * sizeof(VRingPackedDesc);
524 
525     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
526 
527     if (strict_order) {
528         /* Make sure flags is read before the rest fields. */
529         smp_rmb();
530     }
531 
532     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
533                               &desc->addr, sizeof(desc->addr));
534     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
535                               &desc->id, sizeof(desc->id));
536     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
537                               &desc->len, sizeof(desc->len));
538     virtio_tswap64s(vdev, &desc->addr);
539     virtio_tswap16s(vdev, &desc->id);
540     virtio_tswap32s(vdev, &desc->len);
541 }
542 
543 static void vring_packed_desc_write_data(VirtIODevice *vdev,
544                                          VRingPackedDesc *desc,
545                                          MemoryRegionCache *cache,
546                                          int i)
547 {
548     hwaddr off_id = i * sizeof(VRingPackedDesc) +
549                     offsetof(VRingPackedDesc, id);
550     hwaddr off_len = i * sizeof(VRingPackedDesc) +
551                     offsetof(VRingPackedDesc, len);
552 
553     virtio_tswap32s(vdev, &desc->len);
554     virtio_tswap16s(vdev, &desc->id);
555     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
556     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
557     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
558     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
559 }
560 
561 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
562                                           VRingPackedDesc *desc,
563                                           MemoryRegionCache *cache,
564                                           int i)
565 {
566     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
567 
568     virtio_tswap16s(vdev, &desc->flags);
569     address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
570     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
571 }
572 
573 static void vring_packed_desc_write(VirtIODevice *vdev,
574                                     VRingPackedDesc *desc,
575                                     MemoryRegionCache *cache,
576                                     int i, bool strict_order)
577 {
578     vring_packed_desc_write_data(vdev, desc, cache, i);
579     if (strict_order) {
580         /* Make sure data is wrote before flags. */
581         smp_wmb();
582     }
583     vring_packed_desc_write_flags(vdev, desc, cache, i);
584 }
585 
586 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
587 {
588     bool avail, used;
589 
590     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
591     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
592     return (avail != used) && (avail == wrap_counter);
593 }
594 
595 /* Fetch avail_idx from VQ memory only when we really need to know if
596  * guest has added some buffers.
597  * Called within rcu_read_lock().  */
598 static int virtio_queue_empty_rcu(VirtQueue *vq)
599 {
600     if (virtio_device_disabled(vq->vdev)) {
601         return 1;
602     }
603 
604     if (unlikely(!vq->vring.avail)) {
605         return 1;
606     }
607 
608     if (vq->shadow_avail_idx != vq->last_avail_idx) {
609         return 0;
610     }
611 
612     return vring_avail_idx(vq) == vq->last_avail_idx;
613 }
614 
615 static int virtio_queue_split_empty(VirtQueue *vq)
616 {
617     bool empty;
618 
619     if (virtio_device_disabled(vq->vdev)) {
620         return 1;
621     }
622 
623     if (unlikely(!vq->vring.avail)) {
624         return 1;
625     }
626 
627     if (vq->shadow_avail_idx != vq->last_avail_idx) {
628         return 0;
629     }
630 
631     RCU_READ_LOCK_GUARD();
632     empty = vring_avail_idx(vq) == vq->last_avail_idx;
633     return empty;
634 }
635 
636 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
637 {
638     struct VRingPackedDesc desc;
639     VRingMemoryRegionCaches *cache;
640 
641     if (unlikely(!vq->vring.desc)) {
642         return 1;
643     }
644 
645     cache = vring_get_region_caches(vq);
646     if (!cache) {
647         return 1;
648     }
649 
650     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
651                                  vq->last_avail_idx);
652 
653     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
654 }
655 
656 static int virtio_queue_packed_empty(VirtQueue *vq)
657 {
658     RCU_READ_LOCK_GUARD();
659     return virtio_queue_packed_empty_rcu(vq);
660 }
661 
662 int virtio_queue_empty(VirtQueue *vq)
663 {
664     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
665         return virtio_queue_packed_empty(vq);
666     } else {
667         return virtio_queue_split_empty(vq);
668     }
669 }
670 
671 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
672                                unsigned int len)
673 {
674     AddressSpace *dma_as = vq->vdev->dma_as;
675     unsigned int offset;
676     int i;
677 
678     offset = 0;
679     for (i = 0; i < elem->in_num; i++) {
680         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
681 
682         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
683                          elem->in_sg[i].iov_len,
684                          DMA_DIRECTION_FROM_DEVICE, size);
685 
686         offset += size;
687     }
688 
689     for (i = 0; i < elem->out_num; i++)
690         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
691                          elem->out_sg[i].iov_len,
692                          DMA_DIRECTION_TO_DEVICE,
693                          elem->out_sg[i].iov_len);
694 }
695 
696 /* virtqueue_detach_element:
697  * @vq: The #VirtQueue
698  * @elem: The #VirtQueueElement
699  * @len: number of bytes written
700  *
701  * Detach the element from the virtqueue.  This function is suitable for device
702  * reset or other situations where a #VirtQueueElement is simply freed and will
703  * not be pushed or discarded.
704  */
705 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
706                               unsigned int len)
707 {
708     vq->inuse -= elem->ndescs;
709     virtqueue_unmap_sg(vq, elem, len);
710 }
711 
712 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
713 {
714     vq->last_avail_idx -= num;
715 }
716 
717 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
718 {
719     if (vq->last_avail_idx < num) {
720         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
721         vq->last_avail_wrap_counter ^= 1;
722     } else {
723         vq->last_avail_idx -= num;
724     }
725 }
726 
727 /* virtqueue_unpop:
728  * @vq: The #VirtQueue
729  * @elem: The #VirtQueueElement
730  * @len: number of bytes written
731  *
732  * Pretend the most recent element wasn't popped from the virtqueue.  The next
733  * call to virtqueue_pop() will refetch the element.
734  */
735 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
736                      unsigned int len)
737 {
738 
739     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
740         virtqueue_packed_rewind(vq, 1);
741     } else {
742         virtqueue_split_rewind(vq, 1);
743     }
744 
745     virtqueue_detach_element(vq, elem, len);
746 }
747 
748 /* virtqueue_rewind:
749  * @vq: The #VirtQueue
750  * @num: Number of elements to push back
751  *
752  * Pretend that elements weren't popped from the virtqueue.  The next
753  * virtqueue_pop() will refetch the oldest element.
754  *
755  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
756  *
757  * Returns: true on success, false if @num is greater than the number of in use
758  * elements.
759  */
760 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
761 {
762     if (num > vq->inuse) {
763         return false;
764     }
765 
766     vq->inuse -= num;
767     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
768         virtqueue_packed_rewind(vq, num);
769     } else {
770         virtqueue_split_rewind(vq, num);
771     }
772     return true;
773 }
774 
775 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
776                     unsigned int len, unsigned int idx)
777 {
778     VRingUsedElem uelem;
779 
780     if (unlikely(!vq->vring.used)) {
781         return;
782     }
783 
784     idx = (idx + vq->used_idx) % vq->vring.num;
785 
786     uelem.id = elem->index;
787     uelem.len = len;
788     vring_used_write(vq, &uelem, idx);
789 }
790 
791 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
792                                   unsigned int len, unsigned int idx)
793 {
794     vq->used_elems[idx].index = elem->index;
795     vq->used_elems[idx].len = len;
796     vq->used_elems[idx].ndescs = elem->ndescs;
797 }
798 
799 static void virtqueue_packed_fill_desc(VirtQueue *vq,
800                                        const VirtQueueElement *elem,
801                                        unsigned int idx,
802                                        bool strict_order)
803 {
804     uint16_t head;
805     VRingMemoryRegionCaches *caches;
806     VRingPackedDesc desc = {
807         .id = elem->index,
808         .len = elem->len,
809     };
810     bool wrap_counter = vq->used_wrap_counter;
811 
812     if (unlikely(!vq->vring.desc)) {
813         return;
814     }
815 
816     head = vq->used_idx + idx;
817     if (head >= vq->vring.num) {
818         head -= vq->vring.num;
819         wrap_counter ^= 1;
820     }
821     if (wrap_counter) {
822         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
823         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
824     } else {
825         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
826         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
827     }
828 
829     caches = vring_get_region_caches(vq);
830     if (!caches) {
831         return;
832     }
833 
834     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
835 }
836 
837 /* Called within rcu_read_lock().  */
838 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
839                     unsigned int len, unsigned int idx)
840 {
841     trace_virtqueue_fill(vq, elem, len, idx);
842 
843     virtqueue_unmap_sg(vq, elem, len);
844 
845     if (virtio_device_disabled(vq->vdev)) {
846         return;
847     }
848 
849     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
850         virtqueue_packed_fill(vq, elem, len, idx);
851     } else {
852         virtqueue_split_fill(vq, elem, len, idx);
853     }
854 }
855 
856 /* Called within rcu_read_lock().  */
857 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
858 {
859     uint16_t old, new;
860 
861     if (unlikely(!vq->vring.used)) {
862         return;
863     }
864 
865     /* Make sure buffer is written before we update index. */
866     smp_wmb();
867     trace_virtqueue_flush(vq, count);
868     old = vq->used_idx;
869     new = old + count;
870     vring_used_idx_set(vq, new);
871     vq->inuse -= count;
872     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
873         vq->signalled_used_valid = false;
874 }
875 
876 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
877 {
878     unsigned int i, ndescs = 0;
879 
880     if (unlikely(!vq->vring.desc)) {
881         return;
882     }
883 
884     for (i = 1; i < count; i++) {
885         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
886         ndescs += vq->used_elems[i].ndescs;
887     }
888     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
889     ndescs += vq->used_elems[0].ndescs;
890 
891     vq->inuse -= ndescs;
892     vq->used_idx += ndescs;
893     if (vq->used_idx >= vq->vring.num) {
894         vq->used_idx -= vq->vring.num;
895         vq->used_wrap_counter ^= 1;
896     }
897 }
898 
899 void virtqueue_flush(VirtQueue *vq, unsigned int count)
900 {
901     if (virtio_device_disabled(vq->vdev)) {
902         vq->inuse -= count;
903         return;
904     }
905 
906     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
907         virtqueue_packed_flush(vq, count);
908     } else {
909         virtqueue_split_flush(vq, count);
910     }
911 }
912 
913 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
914                     unsigned int len)
915 {
916     RCU_READ_LOCK_GUARD();
917     virtqueue_fill(vq, elem, len, 0);
918     virtqueue_flush(vq, 1);
919 }
920 
921 /* Called within rcu_read_lock().  */
922 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
923 {
924     uint16_t num_heads = vring_avail_idx(vq) - idx;
925 
926     /* Check it isn't doing very strange things with descriptor numbers. */
927     if (num_heads > vq->vring.num) {
928         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
929                      idx, vq->shadow_avail_idx);
930         return -EINVAL;
931     }
932     /* On success, callers read a descriptor at vq->last_avail_idx.
933      * Make sure descriptor read does not bypass avail index read. */
934     if (num_heads) {
935         smp_rmb();
936     }
937 
938     return num_heads;
939 }
940 
941 /* Called within rcu_read_lock().  */
942 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
943                                unsigned int *head)
944 {
945     /* Grab the next descriptor number they're advertising, and increment
946      * the index we've seen. */
947     *head = vring_avail_ring(vq, idx % vq->vring.num);
948 
949     /* If their number is silly, that's a fatal mistake. */
950     if (*head >= vq->vring.num) {
951         virtio_error(vq->vdev, "Guest says index %u is available", *head);
952         return false;
953     }
954 
955     return true;
956 }
957 
958 enum {
959     VIRTQUEUE_READ_DESC_ERROR = -1,
960     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
961     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
962 };
963 
964 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
965                                           MemoryRegionCache *desc_cache,
966                                           unsigned int max, unsigned int *next)
967 {
968     /* If this descriptor says it doesn't chain, we're done. */
969     if (!(desc->flags & VRING_DESC_F_NEXT)) {
970         return VIRTQUEUE_READ_DESC_DONE;
971     }
972 
973     /* Check they're not leading us off end of descriptors. */
974     *next = desc->next;
975     /* Make sure compiler knows to grab that: we don't want it changing! */
976     smp_wmb();
977 
978     if (*next >= max) {
979         virtio_error(vdev, "Desc next is %u", *next);
980         return VIRTQUEUE_READ_DESC_ERROR;
981     }
982 
983     vring_split_desc_read(vdev, desc, desc_cache, *next);
984     return VIRTQUEUE_READ_DESC_MORE;
985 }
986 
987 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
988                             unsigned int *in_bytes, unsigned int *out_bytes,
989                             unsigned max_in_bytes, unsigned max_out_bytes)
990 {
991     VirtIODevice *vdev = vq->vdev;
992     unsigned int max, idx;
993     unsigned int total_bufs, in_total, out_total;
994     VRingMemoryRegionCaches *caches;
995     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
996     int64_t len = 0;
997     int rc;
998 
999     RCU_READ_LOCK_GUARD();
1000 
1001     idx = vq->last_avail_idx;
1002     total_bufs = in_total = out_total = 0;
1003 
1004     max = vq->vring.num;
1005     caches = vring_get_region_caches(vq);
1006     if (!caches) {
1007         goto err;
1008     }
1009 
1010     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1011         MemoryRegionCache *desc_cache = &caches->desc;
1012         unsigned int num_bufs;
1013         VRingDesc desc;
1014         unsigned int i;
1015 
1016         num_bufs = total_bufs;
1017 
1018         if (!virtqueue_get_head(vq, idx++, &i)) {
1019             goto err;
1020         }
1021 
1022         vring_split_desc_read(vdev, &desc, desc_cache, i);
1023 
1024         if (desc.flags & VRING_DESC_F_INDIRECT) {
1025             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1026                 virtio_error(vdev, "Invalid size for indirect buffer table");
1027                 goto err;
1028             }
1029 
1030             /* If we've got too many, that implies a descriptor loop. */
1031             if (num_bufs >= max) {
1032                 virtio_error(vdev, "Looped descriptor");
1033                 goto err;
1034             }
1035 
1036             /* loop over the indirect descriptor table */
1037             len = address_space_cache_init(&indirect_desc_cache,
1038                                            vdev->dma_as,
1039                                            desc.addr, desc.len, false);
1040             desc_cache = &indirect_desc_cache;
1041             if (len < desc.len) {
1042                 virtio_error(vdev, "Cannot map indirect buffer");
1043                 goto err;
1044             }
1045 
1046             max = desc.len / sizeof(VRingDesc);
1047             num_bufs = i = 0;
1048             vring_split_desc_read(vdev, &desc, desc_cache, i);
1049         }
1050 
1051         do {
1052             /* If we've got too many, that implies a descriptor loop. */
1053             if (++num_bufs > max) {
1054                 virtio_error(vdev, "Looped descriptor");
1055                 goto err;
1056             }
1057 
1058             if (desc.flags & VRING_DESC_F_WRITE) {
1059                 in_total += desc.len;
1060             } else {
1061                 out_total += desc.len;
1062             }
1063             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1064                 goto done;
1065             }
1066 
1067             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1068         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1069 
1070         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1071             goto err;
1072         }
1073 
1074         if (desc_cache == &indirect_desc_cache) {
1075             address_space_cache_destroy(&indirect_desc_cache);
1076             total_bufs++;
1077         } else {
1078             total_bufs = num_bufs;
1079         }
1080     }
1081 
1082     if (rc < 0) {
1083         goto err;
1084     }
1085 
1086 done:
1087     address_space_cache_destroy(&indirect_desc_cache);
1088     if (in_bytes) {
1089         *in_bytes = in_total;
1090     }
1091     if (out_bytes) {
1092         *out_bytes = out_total;
1093     }
1094     return;
1095 
1096 err:
1097     in_total = out_total = 0;
1098     goto done;
1099 }
1100 
1101 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1102                                            VRingPackedDesc *desc,
1103                                            MemoryRegionCache
1104                                            *desc_cache,
1105                                            unsigned int max,
1106                                            unsigned int *next,
1107                                            bool indirect)
1108 {
1109     /* If this descriptor says it doesn't chain, we're done. */
1110     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1111         return VIRTQUEUE_READ_DESC_DONE;
1112     }
1113 
1114     ++*next;
1115     if (*next == max) {
1116         if (indirect) {
1117             return VIRTQUEUE_READ_DESC_DONE;
1118         } else {
1119             (*next) -= vq->vring.num;
1120         }
1121     }
1122 
1123     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1124     return VIRTQUEUE_READ_DESC_MORE;
1125 }
1126 
1127 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1128                                              unsigned int *in_bytes,
1129                                              unsigned int *out_bytes,
1130                                              unsigned max_in_bytes,
1131                                              unsigned max_out_bytes)
1132 {
1133     VirtIODevice *vdev = vq->vdev;
1134     unsigned int max, idx;
1135     unsigned int total_bufs, in_total, out_total;
1136     MemoryRegionCache *desc_cache;
1137     VRingMemoryRegionCaches *caches;
1138     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1139     int64_t len = 0;
1140     VRingPackedDesc desc;
1141     bool wrap_counter;
1142 
1143     RCU_READ_LOCK_GUARD();
1144     idx = vq->last_avail_idx;
1145     wrap_counter = vq->last_avail_wrap_counter;
1146     total_bufs = in_total = out_total = 0;
1147 
1148     max = vq->vring.num;
1149     caches = vring_get_region_caches(vq);
1150     if (!caches) {
1151         goto err;
1152     }
1153 
1154     for (;;) {
1155         unsigned int num_bufs = total_bufs;
1156         unsigned int i = idx;
1157         int rc;
1158 
1159         desc_cache = &caches->desc;
1160         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1161         if (!is_desc_avail(desc.flags, wrap_counter)) {
1162             break;
1163         }
1164 
1165         if (desc.flags & VRING_DESC_F_INDIRECT) {
1166             if (desc.len % sizeof(VRingPackedDesc)) {
1167                 virtio_error(vdev, "Invalid size for indirect buffer table");
1168                 goto err;
1169             }
1170 
1171             /* If we've got too many, that implies a descriptor loop. */
1172             if (num_bufs >= max) {
1173                 virtio_error(vdev, "Looped descriptor");
1174                 goto err;
1175             }
1176 
1177             /* loop over the indirect descriptor table */
1178             len = address_space_cache_init(&indirect_desc_cache,
1179                                            vdev->dma_as,
1180                                            desc.addr, desc.len, false);
1181             desc_cache = &indirect_desc_cache;
1182             if (len < desc.len) {
1183                 virtio_error(vdev, "Cannot map indirect buffer");
1184                 goto err;
1185             }
1186 
1187             max = desc.len / sizeof(VRingPackedDesc);
1188             num_bufs = i = 0;
1189             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1190         }
1191 
1192         do {
1193             /* If we've got too many, that implies a descriptor loop. */
1194             if (++num_bufs > max) {
1195                 virtio_error(vdev, "Looped descriptor");
1196                 goto err;
1197             }
1198 
1199             if (desc.flags & VRING_DESC_F_WRITE) {
1200                 in_total += desc.len;
1201             } else {
1202                 out_total += desc.len;
1203             }
1204             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1205                 goto done;
1206             }
1207 
1208             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1209                                                  &i, desc_cache ==
1210                                                  &indirect_desc_cache);
1211         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1212 
1213         if (desc_cache == &indirect_desc_cache) {
1214             address_space_cache_destroy(&indirect_desc_cache);
1215             total_bufs++;
1216             idx++;
1217         } else {
1218             idx += num_bufs - total_bufs;
1219             total_bufs = num_bufs;
1220         }
1221 
1222         if (idx >= vq->vring.num) {
1223             idx -= vq->vring.num;
1224             wrap_counter ^= 1;
1225         }
1226     }
1227 
1228     /* Record the index and wrap counter for a kick we want */
1229     vq->shadow_avail_idx = idx;
1230     vq->shadow_avail_wrap_counter = wrap_counter;
1231 done:
1232     address_space_cache_destroy(&indirect_desc_cache);
1233     if (in_bytes) {
1234         *in_bytes = in_total;
1235     }
1236     if (out_bytes) {
1237         *out_bytes = out_total;
1238     }
1239     return;
1240 
1241 err:
1242     in_total = out_total = 0;
1243     goto done;
1244 }
1245 
1246 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1247                                unsigned int *out_bytes,
1248                                unsigned max_in_bytes, unsigned max_out_bytes)
1249 {
1250     uint16_t desc_size;
1251     VRingMemoryRegionCaches *caches;
1252 
1253     if (unlikely(!vq->vring.desc)) {
1254         goto err;
1255     }
1256 
1257     caches = vring_get_region_caches(vq);
1258     if (!caches) {
1259         goto err;
1260     }
1261 
1262     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1263                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1264     if (caches->desc.len < vq->vring.num * desc_size) {
1265         virtio_error(vq->vdev, "Cannot map descriptor ring");
1266         goto err;
1267     }
1268 
1269     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1270         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1271                                          max_in_bytes, max_out_bytes);
1272     } else {
1273         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1274                                         max_in_bytes, max_out_bytes);
1275     }
1276 
1277     return;
1278 err:
1279     if (in_bytes) {
1280         *in_bytes = 0;
1281     }
1282     if (out_bytes) {
1283         *out_bytes = 0;
1284     }
1285 }
1286 
1287 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1288                           unsigned int out_bytes)
1289 {
1290     unsigned int in_total, out_total;
1291 
1292     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1293     return in_bytes <= in_total && out_bytes <= out_total;
1294 }
1295 
1296 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1297                                hwaddr *addr, struct iovec *iov,
1298                                unsigned int max_num_sg, bool is_write,
1299                                hwaddr pa, size_t sz)
1300 {
1301     bool ok = false;
1302     unsigned num_sg = *p_num_sg;
1303     assert(num_sg <= max_num_sg);
1304 
1305     if (!sz) {
1306         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1307         goto out;
1308     }
1309 
1310     while (sz) {
1311         hwaddr len = sz;
1312 
1313         if (num_sg == max_num_sg) {
1314             virtio_error(vdev, "virtio: too many write descriptors in "
1315                                "indirect table");
1316             goto out;
1317         }
1318 
1319         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1320                                               is_write ?
1321                                               DMA_DIRECTION_FROM_DEVICE :
1322                                               DMA_DIRECTION_TO_DEVICE);
1323         if (!iov[num_sg].iov_base) {
1324             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1325             goto out;
1326         }
1327 
1328         iov[num_sg].iov_len = len;
1329         addr[num_sg] = pa;
1330 
1331         sz -= len;
1332         pa += len;
1333         num_sg++;
1334     }
1335     ok = true;
1336 
1337 out:
1338     *p_num_sg = num_sg;
1339     return ok;
1340 }
1341 
1342 /* Only used by error code paths before we have a VirtQueueElement (therefore
1343  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1344  * yet.
1345  */
1346 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1347                                     struct iovec *iov)
1348 {
1349     unsigned int i;
1350 
1351     for (i = 0; i < out_num + in_num; i++) {
1352         int is_write = i >= out_num;
1353 
1354         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1355         iov++;
1356     }
1357 }
1358 
1359 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1360                                 hwaddr *addr, unsigned int num_sg,
1361                                 int is_write)
1362 {
1363     unsigned int i;
1364     hwaddr len;
1365 
1366     for (i = 0; i < num_sg; i++) {
1367         len = sg[i].iov_len;
1368         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1369                                         addr[i], &len, is_write ?
1370                                         DMA_DIRECTION_FROM_DEVICE :
1371                                         DMA_DIRECTION_TO_DEVICE);
1372         if (!sg[i].iov_base) {
1373             error_report("virtio: error trying to map MMIO memory");
1374             exit(1);
1375         }
1376         if (len != sg[i].iov_len) {
1377             error_report("virtio: unexpected memory split");
1378             exit(1);
1379         }
1380     }
1381 }
1382 
1383 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1384 {
1385     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, 1);
1386     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num, 0);
1387 }
1388 
1389 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1390 {
1391     VirtQueueElement *elem;
1392     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1393     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1394     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1395     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1396     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1397     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1398 
1399     assert(sz >= sizeof(VirtQueueElement));
1400     elem = g_malloc(out_sg_end);
1401     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1402     elem->out_num = out_num;
1403     elem->in_num = in_num;
1404     elem->in_addr = (void *)elem + in_addr_ofs;
1405     elem->out_addr = (void *)elem + out_addr_ofs;
1406     elem->in_sg = (void *)elem + in_sg_ofs;
1407     elem->out_sg = (void *)elem + out_sg_ofs;
1408     return elem;
1409 }
1410 
1411 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1412 {
1413     unsigned int i, head, max;
1414     VRingMemoryRegionCaches *caches;
1415     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1416     MemoryRegionCache *desc_cache;
1417     int64_t len;
1418     VirtIODevice *vdev = vq->vdev;
1419     VirtQueueElement *elem = NULL;
1420     unsigned out_num, in_num, elem_entries;
1421     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1422     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1423     VRingDesc desc;
1424     int rc;
1425 
1426     RCU_READ_LOCK_GUARD();
1427     if (virtio_queue_empty_rcu(vq)) {
1428         goto done;
1429     }
1430     /* Needed after virtio_queue_empty(), see comment in
1431      * virtqueue_num_heads(). */
1432     smp_rmb();
1433 
1434     /* When we start there are none of either input nor output. */
1435     out_num = in_num = elem_entries = 0;
1436 
1437     max = vq->vring.num;
1438 
1439     if (vq->inuse >= vq->vring.num) {
1440         virtio_error(vdev, "Virtqueue size exceeded");
1441         goto done;
1442     }
1443 
1444     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1445         goto done;
1446     }
1447 
1448     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1449         vring_set_avail_event(vq, vq->last_avail_idx);
1450     }
1451 
1452     i = head;
1453 
1454     caches = vring_get_region_caches(vq);
1455     if (!caches) {
1456         virtio_error(vdev, "Region caches not initialized");
1457         goto done;
1458     }
1459 
1460     if (caches->desc.len < max * sizeof(VRingDesc)) {
1461         virtio_error(vdev, "Cannot map descriptor ring");
1462         goto done;
1463     }
1464 
1465     desc_cache = &caches->desc;
1466     vring_split_desc_read(vdev, &desc, desc_cache, i);
1467     if (desc.flags & VRING_DESC_F_INDIRECT) {
1468         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1469             virtio_error(vdev, "Invalid size for indirect buffer table");
1470             goto done;
1471         }
1472 
1473         /* loop over the indirect descriptor table */
1474         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1475                                        desc.addr, desc.len, false);
1476         desc_cache = &indirect_desc_cache;
1477         if (len < desc.len) {
1478             virtio_error(vdev, "Cannot map indirect buffer");
1479             goto done;
1480         }
1481 
1482         max = desc.len / sizeof(VRingDesc);
1483         i = 0;
1484         vring_split_desc_read(vdev, &desc, desc_cache, i);
1485     }
1486 
1487     /* Collect all the descriptors */
1488     do {
1489         bool map_ok;
1490 
1491         if (desc.flags & VRING_DESC_F_WRITE) {
1492             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1493                                         iov + out_num,
1494                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1495                                         desc.addr, desc.len);
1496         } else {
1497             if (in_num) {
1498                 virtio_error(vdev, "Incorrect order for descriptors");
1499                 goto err_undo_map;
1500             }
1501             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1502                                         VIRTQUEUE_MAX_SIZE, false,
1503                                         desc.addr, desc.len);
1504         }
1505         if (!map_ok) {
1506             goto err_undo_map;
1507         }
1508 
1509         /* If we've got too many, that implies a descriptor loop. */
1510         if (++elem_entries > max) {
1511             virtio_error(vdev, "Looped descriptor");
1512             goto err_undo_map;
1513         }
1514 
1515         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1516     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1517 
1518     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1519         goto err_undo_map;
1520     }
1521 
1522     /* Now copy what we have collected and mapped */
1523     elem = virtqueue_alloc_element(sz, out_num, in_num);
1524     elem->index = head;
1525     elem->ndescs = 1;
1526     for (i = 0; i < out_num; i++) {
1527         elem->out_addr[i] = addr[i];
1528         elem->out_sg[i] = iov[i];
1529     }
1530     for (i = 0; i < in_num; i++) {
1531         elem->in_addr[i] = addr[out_num + i];
1532         elem->in_sg[i] = iov[out_num + i];
1533     }
1534 
1535     vq->inuse++;
1536 
1537     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1538 done:
1539     address_space_cache_destroy(&indirect_desc_cache);
1540 
1541     return elem;
1542 
1543 err_undo_map:
1544     virtqueue_undo_map_desc(out_num, in_num, iov);
1545     goto done;
1546 }
1547 
1548 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1549 {
1550     unsigned int i, max;
1551     VRingMemoryRegionCaches *caches;
1552     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1553     MemoryRegionCache *desc_cache;
1554     int64_t len;
1555     VirtIODevice *vdev = vq->vdev;
1556     VirtQueueElement *elem = NULL;
1557     unsigned out_num, in_num, elem_entries;
1558     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1559     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1560     VRingPackedDesc desc;
1561     uint16_t id;
1562     int rc;
1563 
1564     RCU_READ_LOCK_GUARD();
1565     if (virtio_queue_packed_empty_rcu(vq)) {
1566         goto done;
1567     }
1568 
1569     /* When we start there are none of either input nor output. */
1570     out_num = in_num = elem_entries = 0;
1571 
1572     max = vq->vring.num;
1573 
1574     if (vq->inuse >= vq->vring.num) {
1575         virtio_error(vdev, "Virtqueue size exceeded");
1576         goto done;
1577     }
1578 
1579     i = vq->last_avail_idx;
1580 
1581     caches = vring_get_region_caches(vq);
1582     if (!caches) {
1583         virtio_error(vdev, "Region caches not initialized");
1584         goto done;
1585     }
1586 
1587     if (caches->desc.len < max * sizeof(VRingDesc)) {
1588         virtio_error(vdev, "Cannot map descriptor ring");
1589         goto done;
1590     }
1591 
1592     desc_cache = &caches->desc;
1593     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1594     id = desc.id;
1595     if (desc.flags & VRING_DESC_F_INDIRECT) {
1596         if (desc.len % sizeof(VRingPackedDesc)) {
1597             virtio_error(vdev, "Invalid size for indirect buffer table");
1598             goto done;
1599         }
1600 
1601         /* loop over the indirect descriptor table */
1602         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1603                                        desc.addr, desc.len, false);
1604         desc_cache = &indirect_desc_cache;
1605         if (len < desc.len) {
1606             virtio_error(vdev, "Cannot map indirect buffer");
1607             goto done;
1608         }
1609 
1610         max = desc.len / sizeof(VRingPackedDesc);
1611         i = 0;
1612         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1613     }
1614 
1615     /* Collect all the descriptors */
1616     do {
1617         bool map_ok;
1618 
1619         if (desc.flags & VRING_DESC_F_WRITE) {
1620             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1621                                         iov + out_num,
1622                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1623                                         desc.addr, desc.len);
1624         } else {
1625             if (in_num) {
1626                 virtio_error(vdev, "Incorrect order for descriptors");
1627                 goto err_undo_map;
1628             }
1629             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1630                                         VIRTQUEUE_MAX_SIZE, false,
1631                                         desc.addr, desc.len);
1632         }
1633         if (!map_ok) {
1634             goto err_undo_map;
1635         }
1636 
1637         /* If we've got too many, that implies a descriptor loop. */
1638         if (++elem_entries > max) {
1639             virtio_error(vdev, "Looped descriptor");
1640             goto err_undo_map;
1641         }
1642 
1643         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1644                                              desc_cache ==
1645                                              &indirect_desc_cache);
1646     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1647 
1648     /* Now copy what we have collected and mapped */
1649     elem = virtqueue_alloc_element(sz, out_num, in_num);
1650     for (i = 0; i < out_num; i++) {
1651         elem->out_addr[i] = addr[i];
1652         elem->out_sg[i] = iov[i];
1653     }
1654     for (i = 0; i < in_num; i++) {
1655         elem->in_addr[i] = addr[out_num + i];
1656         elem->in_sg[i] = iov[out_num + i];
1657     }
1658 
1659     elem->index = id;
1660     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1661     vq->last_avail_idx += elem->ndescs;
1662     vq->inuse += elem->ndescs;
1663 
1664     if (vq->last_avail_idx >= vq->vring.num) {
1665         vq->last_avail_idx -= vq->vring.num;
1666         vq->last_avail_wrap_counter ^= 1;
1667     }
1668 
1669     vq->shadow_avail_idx = vq->last_avail_idx;
1670     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1671 
1672     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1673 done:
1674     address_space_cache_destroy(&indirect_desc_cache);
1675 
1676     return elem;
1677 
1678 err_undo_map:
1679     virtqueue_undo_map_desc(out_num, in_num, iov);
1680     goto done;
1681 }
1682 
1683 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1684 {
1685     if (virtio_device_disabled(vq->vdev)) {
1686         return NULL;
1687     }
1688 
1689     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1690         return virtqueue_packed_pop(vq, sz);
1691     } else {
1692         return virtqueue_split_pop(vq, sz);
1693     }
1694 }
1695 
1696 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1697 {
1698     VRingMemoryRegionCaches *caches;
1699     MemoryRegionCache *desc_cache;
1700     unsigned int dropped = 0;
1701     VirtQueueElement elem = {};
1702     VirtIODevice *vdev = vq->vdev;
1703     VRingPackedDesc desc;
1704 
1705     caches = vring_get_region_caches(vq);
1706     if (!caches) {
1707         return 0;
1708     }
1709 
1710     desc_cache = &caches->desc;
1711 
1712     virtio_queue_set_notification(vq, 0);
1713 
1714     while (vq->inuse < vq->vring.num) {
1715         unsigned int idx = vq->last_avail_idx;
1716         /*
1717          * works similar to virtqueue_pop but does not map buffers
1718          * and does not allocate any memory.
1719          */
1720         vring_packed_desc_read(vdev, &desc, desc_cache,
1721                                vq->last_avail_idx , true);
1722         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1723             break;
1724         }
1725         elem.index = desc.id;
1726         elem.ndescs = 1;
1727         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1728                                                vq->vring.num, &idx, false)) {
1729             ++elem.ndescs;
1730         }
1731         /*
1732          * immediately push the element, nothing to unmap
1733          * as both in_num and out_num are set to 0.
1734          */
1735         virtqueue_push(vq, &elem, 0);
1736         dropped++;
1737         vq->last_avail_idx += elem.ndescs;
1738         if (vq->last_avail_idx >= vq->vring.num) {
1739             vq->last_avail_idx -= vq->vring.num;
1740             vq->last_avail_wrap_counter ^= 1;
1741         }
1742     }
1743 
1744     return dropped;
1745 }
1746 
1747 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1748 {
1749     unsigned int dropped = 0;
1750     VirtQueueElement elem = {};
1751     VirtIODevice *vdev = vq->vdev;
1752     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1753 
1754     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1755         /* works similar to virtqueue_pop but does not map buffers
1756         * and does not allocate any memory */
1757         smp_rmb();
1758         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1759             break;
1760         }
1761         vq->inuse++;
1762         vq->last_avail_idx++;
1763         if (fEventIdx) {
1764             vring_set_avail_event(vq, vq->last_avail_idx);
1765         }
1766         /* immediately push the element, nothing to unmap
1767          * as both in_num and out_num are set to 0 */
1768         virtqueue_push(vq, &elem, 0);
1769         dropped++;
1770     }
1771 
1772     return dropped;
1773 }
1774 
1775 /* virtqueue_drop_all:
1776  * @vq: The #VirtQueue
1777  * Drops all queued buffers and indicates them to the guest
1778  * as if they are done. Useful when buffers can not be
1779  * processed but must be returned to the guest.
1780  */
1781 unsigned int virtqueue_drop_all(VirtQueue *vq)
1782 {
1783     struct VirtIODevice *vdev = vq->vdev;
1784 
1785     if (virtio_device_disabled(vq->vdev)) {
1786         return 0;
1787     }
1788 
1789     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1790         return virtqueue_packed_drop_all(vq);
1791     } else {
1792         return virtqueue_split_drop_all(vq);
1793     }
1794 }
1795 
1796 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1797  * it is what QEMU has always done by mistake.  We can change it sooner
1798  * or later by bumping the version number of the affected vm states.
1799  * In the meanwhile, since the in-memory layout of VirtQueueElement
1800  * has changed, we need to marshal to and from the layout that was
1801  * used before the change.
1802  */
1803 typedef struct VirtQueueElementOld {
1804     unsigned int index;
1805     unsigned int out_num;
1806     unsigned int in_num;
1807     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1808     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1809     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1810     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1811 } VirtQueueElementOld;
1812 
1813 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1814 {
1815     VirtQueueElement *elem;
1816     VirtQueueElementOld data;
1817     int i;
1818 
1819     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1820 
1821     /* TODO: teach all callers that this can fail, and return failure instead
1822      * of asserting here.
1823      * This is just one thing (there are probably more) that must be
1824      * fixed before we can allow NDEBUG compilation.
1825      */
1826     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1827     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1828 
1829     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1830     elem->index = data.index;
1831 
1832     for (i = 0; i < elem->in_num; i++) {
1833         elem->in_addr[i] = data.in_addr[i];
1834     }
1835 
1836     for (i = 0; i < elem->out_num; i++) {
1837         elem->out_addr[i] = data.out_addr[i];
1838     }
1839 
1840     for (i = 0; i < elem->in_num; i++) {
1841         /* Base is overwritten by virtqueue_map.  */
1842         elem->in_sg[i].iov_base = 0;
1843         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1844     }
1845 
1846     for (i = 0; i < elem->out_num; i++) {
1847         /* Base is overwritten by virtqueue_map.  */
1848         elem->out_sg[i].iov_base = 0;
1849         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1850     }
1851 
1852     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1853         qemu_get_be32s(f, &elem->ndescs);
1854     }
1855 
1856     virtqueue_map(vdev, elem);
1857     return elem;
1858 }
1859 
1860 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1861                                 VirtQueueElement *elem)
1862 {
1863     VirtQueueElementOld data;
1864     int i;
1865 
1866     memset(&data, 0, sizeof(data));
1867     data.index = elem->index;
1868     data.in_num = elem->in_num;
1869     data.out_num = elem->out_num;
1870 
1871     for (i = 0; i < elem->in_num; i++) {
1872         data.in_addr[i] = elem->in_addr[i];
1873     }
1874 
1875     for (i = 0; i < elem->out_num; i++) {
1876         data.out_addr[i] = elem->out_addr[i];
1877     }
1878 
1879     for (i = 0; i < elem->in_num; i++) {
1880         /* Base is overwritten by virtqueue_map when loading.  Do not
1881          * save it, as it would leak the QEMU address space layout.  */
1882         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1883     }
1884 
1885     for (i = 0; i < elem->out_num; i++) {
1886         /* Do not save iov_base as above.  */
1887         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1888     }
1889 
1890     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1891         qemu_put_be32s(f, &elem->ndescs);
1892     }
1893 
1894     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1895 }
1896 
1897 /* virtio device */
1898 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1899 {
1900     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1901     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1902 
1903     if (virtio_device_disabled(vdev)) {
1904         return;
1905     }
1906 
1907     if (k->notify) {
1908         k->notify(qbus->parent, vector);
1909     }
1910 }
1911 
1912 void virtio_update_irq(VirtIODevice *vdev)
1913 {
1914     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1915 }
1916 
1917 static int virtio_validate_features(VirtIODevice *vdev)
1918 {
1919     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1920 
1921     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1922         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1923         return -EFAULT;
1924     }
1925 
1926     if (k->validate_features) {
1927         return k->validate_features(vdev);
1928     } else {
1929         return 0;
1930     }
1931 }
1932 
1933 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1934 {
1935     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1936     trace_virtio_set_status(vdev, val);
1937 
1938     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1939         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1940             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1941             int ret = virtio_validate_features(vdev);
1942 
1943             if (ret) {
1944                 return ret;
1945             }
1946         }
1947     }
1948 
1949     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1950         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1951         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1952     }
1953 
1954     if (k->set_status) {
1955         k->set_status(vdev, val);
1956     }
1957     vdev->status = val;
1958 
1959     return 0;
1960 }
1961 
1962 static enum virtio_device_endian virtio_default_endian(void)
1963 {
1964     if (target_words_bigendian()) {
1965         return VIRTIO_DEVICE_ENDIAN_BIG;
1966     } else {
1967         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1968     }
1969 }
1970 
1971 static enum virtio_device_endian virtio_current_cpu_endian(void)
1972 {
1973     CPUClass *cc = CPU_GET_CLASS(current_cpu);
1974 
1975     if (cc->virtio_is_big_endian(current_cpu)) {
1976         return VIRTIO_DEVICE_ENDIAN_BIG;
1977     } else {
1978         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1979     }
1980 }
1981 
1982 void virtio_reset(void *opaque)
1983 {
1984     VirtIODevice *vdev = opaque;
1985     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1986     int i;
1987 
1988     virtio_set_status(vdev, 0);
1989     if (current_cpu) {
1990         /* Guest initiated reset */
1991         vdev->device_endian = virtio_current_cpu_endian();
1992     } else {
1993         /* System reset */
1994         vdev->device_endian = virtio_default_endian();
1995     }
1996 
1997     if (k->reset) {
1998         k->reset(vdev);
1999     }
2000 
2001     vdev->start_on_kick = false;
2002     vdev->started = false;
2003     vdev->broken = false;
2004     vdev->guest_features = 0;
2005     vdev->queue_sel = 0;
2006     vdev->status = 0;
2007     vdev->disabled = false;
2008     atomic_set(&vdev->isr, 0);
2009     vdev->config_vector = VIRTIO_NO_VECTOR;
2010     virtio_notify_vector(vdev, vdev->config_vector);
2011 
2012     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2013         vdev->vq[i].vring.desc = 0;
2014         vdev->vq[i].vring.avail = 0;
2015         vdev->vq[i].vring.used = 0;
2016         vdev->vq[i].last_avail_idx = 0;
2017         vdev->vq[i].shadow_avail_idx = 0;
2018         vdev->vq[i].used_idx = 0;
2019         vdev->vq[i].last_avail_wrap_counter = true;
2020         vdev->vq[i].shadow_avail_wrap_counter = true;
2021         vdev->vq[i].used_wrap_counter = true;
2022         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2023         vdev->vq[i].signalled_used = 0;
2024         vdev->vq[i].signalled_used_valid = false;
2025         vdev->vq[i].notification = true;
2026         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2027         vdev->vq[i].inuse = 0;
2028         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2029     }
2030 }
2031 
2032 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2033 {
2034     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2035     uint8_t val;
2036 
2037     if (addr + sizeof(val) > vdev->config_len) {
2038         return (uint32_t)-1;
2039     }
2040 
2041     k->get_config(vdev, vdev->config);
2042 
2043     val = ldub_p(vdev->config + addr);
2044     return val;
2045 }
2046 
2047 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2048 {
2049     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2050     uint16_t val;
2051 
2052     if (addr + sizeof(val) > vdev->config_len) {
2053         return (uint32_t)-1;
2054     }
2055 
2056     k->get_config(vdev, vdev->config);
2057 
2058     val = lduw_p(vdev->config + addr);
2059     return val;
2060 }
2061 
2062 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2063 {
2064     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2065     uint32_t val;
2066 
2067     if (addr + sizeof(val) > vdev->config_len) {
2068         return (uint32_t)-1;
2069     }
2070 
2071     k->get_config(vdev, vdev->config);
2072 
2073     val = ldl_p(vdev->config + addr);
2074     return val;
2075 }
2076 
2077 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2078 {
2079     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2080     uint8_t val = data;
2081 
2082     if (addr + sizeof(val) > vdev->config_len) {
2083         return;
2084     }
2085 
2086     stb_p(vdev->config + addr, val);
2087 
2088     if (k->set_config) {
2089         k->set_config(vdev, vdev->config);
2090     }
2091 }
2092 
2093 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2094 {
2095     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2096     uint16_t val = data;
2097 
2098     if (addr + sizeof(val) > vdev->config_len) {
2099         return;
2100     }
2101 
2102     stw_p(vdev->config + addr, val);
2103 
2104     if (k->set_config) {
2105         k->set_config(vdev, vdev->config);
2106     }
2107 }
2108 
2109 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2110 {
2111     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2112     uint32_t val = data;
2113 
2114     if (addr + sizeof(val) > vdev->config_len) {
2115         return;
2116     }
2117 
2118     stl_p(vdev->config + addr, val);
2119 
2120     if (k->set_config) {
2121         k->set_config(vdev, vdev->config);
2122     }
2123 }
2124 
2125 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2126 {
2127     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2128     uint8_t val;
2129 
2130     if (addr + sizeof(val) > vdev->config_len) {
2131         return (uint32_t)-1;
2132     }
2133 
2134     k->get_config(vdev, vdev->config);
2135 
2136     val = ldub_p(vdev->config + addr);
2137     return val;
2138 }
2139 
2140 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2141 {
2142     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2143     uint16_t val;
2144 
2145     if (addr + sizeof(val) > vdev->config_len) {
2146         return (uint32_t)-1;
2147     }
2148 
2149     k->get_config(vdev, vdev->config);
2150 
2151     val = lduw_le_p(vdev->config + addr);
2152     return val;
2153 }
2154 
2155 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2156 {
2157     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2158     uint32_t val;
2159 
2160     if (addr + sizeof(val) > vdev->config_len) {
2161         return (uint32_t)-1;
2162     }
2163 
2164     k->get_config(vdev, vdev->config);
2165 
2166     val = ldl_le_p(vdev->config + addr);
2167     return val;
2168 }
2169 
2170 void virtio_config_modern_writeb(VirtIODevice *vdev,
2171                                  uint32_t addr, uint32_t data)
2172 {
2173     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2174     uint8_t val = data;
2175 
2176     if (addr + sizeof(val) > vdev->config_len) {
2177         return;
2178     }
2179 
2180     stb_p(vdev->config + addr, val);
2181 
2182     if (k->set_config) {
2183         k->set_config(vdev, vdev->config);
2184     }
2185 }
2186 
2187 void virtio_config_modern_writew(VirtIODevice *vdev,
2188                                  uint32_t addr, uint32_t data)
2189 {
2190     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2191     uint16_t val = data;
2192 
2193     if (addr + sizeof(val) > vdev->config_len) {
2194         return;
2195     }
2196 
2197     stw_le_p(vdev->config + addr, val);
2198 
2199     if (k->set_config) {
2200         k->set_config(vdev, vdev->config);
2201     }
2202 }
2203 
2204 void virtio_config_modern_writel(VirtIODevice *vdev,
2205                                  uint32_t addr, uint32_t data)
2206 {
2207     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2208     uint32_t val = data;
2209 
2210     if (addr + sizeof(val) > vdev->config_len) {
2211         return;
2212     }
2213 
2214     stl_le_p(vdev->config + addr, val);
2215 
2216     if (k->set_config) {
2217         k->set_config(vdev, vdev->config);
2218     }
2219 }
2220 
2221 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2222 {
2223     if (!vdev->vq[n].vring.num) {
2224         return;
2225     }
2226     vdev->vq[n].vring.desc = addr;
2227     virtio_queue_update_rings(vdev, n);
2228 }
2229 
2230 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2231 {
2232     return vdev->vq[n].vring.desc;
2233 }
2234 
2235 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2236                             hwaddr avail, hwaddr used)
2237 {
2238     if (!vdev->vq[n].vring.num) {
2239         return;
2240     }
2241     vdev->vq[n].vring.desc = desc;
2242     vdev->vq[n].vring.avail = avail;
2243     vdev->vq[n].vring.used = used;
2244     virtio_init_region_cache(vdev, n);
2245 }
2246 
2247 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2248 {
2249     /* Don't allow guest to flip queue between existent and
2250      * nonexistent states, or to set it to an invalid size.
2251      */
2252     if (!!num != !!vdev->vq[n].vring.num ||
2253         num > VIRTQUEUE_MAX_SIZE ||
2254         num < 0) {
2255         return;
2256     }
2257     vdev->vq[n].vring.num = num;
2258 }
2259 
2260 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2261 {
2262     return QLIST_FIRST(&vdev->vector_queues[vector]);
2263 }
2264 
2265 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2266 {
2267     return QLIST_NEXT(vq, node);
2268 }
2269 
2270 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2271 {
2272     return vdev->vq[n].vring.num;
2273 }
2274 
2275 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2276 {
2277     return vdev->vq[n].vring.num_default;
2278 }
2279 
2280 int virtio_get_num_queues(VirtIODevice *vdev)
2281 {
2282     int i;
2283 
2284     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2285         if (!virtio_queue_get_num(vdev, i)) {
2286             break;
2287         }
2288     }
2289 
2290     return i;
2291 }
2292 
2293 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2294 {
2295     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2296     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2297 
2298     /* virtio-1 compliant devices cannot change the alignment */
2299     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2300         error_report("tried to modify queue alignment for virtio-1 device");
2301         return;
2302     }
2303     /* Check that the transport told us it was going to do this
2304      * (so a buggy transport will immediately assert rather than
2305      * silently failing to migrate this state)
2306      */
2307     assert(k->has_variable_vring_alignment);
2308 
2309     if (align) {
2310         vdev->vq[n].vring.align = align;
2311         virtio_queue_update_rings(vdev, n);
2312     }
2313 }
2314 
2315 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2316 {
2317     bool ret = false;
2318 
2319     if (vq->vring.desc && vq->handle_aio_output) {
2320         VirtIODevice *vdev = vq->vdev;
2321 
2322         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2323         ret = vq->handle_aio_output(vdev, vq);
2324 
2325         if (unlikely(vdev->start_on_kick)) {
2326             virtio_set_started(vdev, true);
2327         }
2328     }
2329 
2330     return ret;
2331 }
2332 
2333 static void virtio_queue_notify_vq(VirtQueue *vq)
2334 {
2335     if (vq->vring.desc && vq->handle_output) {
2336         VirtIODevice *vdev = vq->vdev;
2337 
2338         if (unlikely(vdev->broken)) {
2339             return;
2340         }
2341 
2342         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2343         vq->handle_output(vdev, vq);
2344 
2345         if (unlikely(vdev->start_on_kick)) {
2346             virtio_set_started(vdev, true);
2347         }
2348     }
2349 }
2350 
2351 void virtio_queue_notify(VirtIODevice *vdev, int n)
2352 {
2353     VirtQueue *vq = &vdev->vq[n];
2354 
2355     if (unlikely(!vq->vring.desc || vdev->broken)) {
2356         return;
2357     }
2358 
2359     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2360     if (vq->host_notifier_enabled) {
2361         event_notifier_set(&vq->host_notifier);
2362     } else if (vq->handle_output) {
2363         vq->handle_output(vdev, vq);
2364 
2365         if (unlikely(vdev->start_on_kick)) {
2366             virtio_set_started(vdev, true);
2367         }
2368     }
2369 }
2370 
2371 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2372 {
2373     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2374         VIRTIO_NO_VECTOR;
2375 }
2376 
2377 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2378 {
2379     VirtQueue *vq = &vdev->vq[n];
2380 
2381     if (n < VIRTIO_QUEUE_MAX) {
2382         if (vdev->vector_queues &&
2383             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2384             QLIST_REMOVE(vq, node);
2385         }
2386         vdev->vq[n].vector = vector;
2387         if (vdev->vector_queues &&
2388             vector != VIRTIO_NO_VECTOR) {
2389             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2390         }
2391     }
2392 }
2393 
2394 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2395                             VirtIOHandleOutput handle_output)
2396 {
2397     int i;
2398 
2399     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2400         if (vdev->vq[i].vring.num == 0)
2401             break;
2402     }
2403 
2404     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2405         abort();
2406 
2407     vdev->vq[i].vring.num = queue_size;
2408     vdev->vq[i].vring.num_default = queue_size;
2409     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2410     vdev->vq[i].handle_output = handle_output;
2411     vdev->vq[i].handle_aio_output = NULL;
2412     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2413                                        queue_size);
2414 
2415     return &vdev->vq[i];
2416 }
2417 
2418 void virtio_delete_queue(VirtQueue *vq)
2419 {
2420     vq->vring.num = 0;
2421     vq->vring.num_default = 0;
2422     vq->handle_output = NULL;
2423     vq->handle_aio_output = NULL;
2424     g_free(vq->used_elems);
2425     vq->used_elems = NULL;
2426     virtio_virtqueue_reset_region_cache(vq);
2427 }
2428 
2429 void virtio_del_queue(VirtIODevice *vdev, int n)
2430 {
2431     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2432         abort();
2433     }
2434 
2435     virtio_delete_queue(&vdev->vq[n]);
2436 }
2437 
2438 static void virtio_set_isr(VirtIODevice *vdev, int value)
2439 {
2440     uint8_t old = atomic_read(&vdev->isr);
2441 
2442     /* Do not write ISR if it does not change, so that its cacheline remains
2443      * shared in the common case where the guest does not read it.
2444      */
2445     if ((old & value) != value) {
2446         atomic_or(&vdev->isr, value);
2447     }
2448 }
2449 
2450 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2451 {
2452     uint16_t old, new;
2453     bool v;
2454     /* We need to expose used array entries before checking used event. */
2455     smp_mb();
2456     /* Always notify when queue is empty (when feature acknowledge) */
2457     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2458         !vq->inuse && virtio_queue_empty(vq)) {
2459         return true;
2460     }
2461 
2462     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2463         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2464     }
2465 
2466     v = vq->signalled_used_valid;
2467     vq->signalled_used_valid = true;
2468     old = vq->signalled_used;
2469     new = vq->signalled_used = vq->used_idx;
2470     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2471 }
2472 
2473 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2474                                     uint16_t off_wrap, uint16_t new,
2475                                     uint16_t old)
2476 {
2477     int off = off_wrap & ~(1 << 15);
2478 
2479     if (wrap != off_wrap >> 15) {
2480         off -= vq->vring.num;
2481     }
2482 
2483     return vring_need_event(off, new, old);
2484 }
2485 
2486 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2487 {
2488     VRingPackedDescEvent e;
2489     uint16_t old, new;
2490     bool v;
2491     VRingMemoryRegionCaches *caches;
2492 
2493     caches = vring_get_region_caches(vq);
2494     if (!caches) {
2495         return false;
2496     }
2497 
2498     vring_packed_event_read(vdev, &caches->avail, &e);
2499 
2500     old = vq->signalled_used;
2501     new = vq->signalled_used = vq->used_idx;
2502     v = vq->signalled_used_valid;
2503     vq->signalled_used_valid = true;
2504 
2505     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2506         return false;
2507     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2508         return true;
2509     }
2510 
2511     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2512                                          e.off_wrap, new, old);
2513 }
2514 
2515 /* Called within rcu_read_lock().  */
2516 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2517 {
2518     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2519         return virtio_packed_should_notify(vdev, vq);
2520     } else {
2521         return virtio_split_should_notify(vdev, vq);
2522     }
2523 }
2524 
2525 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2526 {
2527     WITH_RCU_READ_LOCK_GUARD() {
2528         if (!virtio_should_notify(vdev, vq)) {
2529             return;
2530         }
2531     }
2532 
2533     trace_virtio_notify_irqfd(vdev, vq);
2534 
2535     /*
2536      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2537      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2538      * incorrectly polling this bit during crashdump and hibernation
2539      * in MSI mode, causing a hang if this bit is never updated.
2540      * Recent releases of Windows do not really shut down, but rather
2541      * log out and hibernate to make the next startup faster.  Hence,
2542      * this manifested as a more serious hang during shutdown with
2543      *
2544      * Next driver release from 2016 fixed this problem, so working around it
2545      * is not a must, but it's easy to do so let's do it here.
2546      *
2547      * Note: it's safe to update ISR from any thread as it was switched
2548      * to an atomic operation.
2549      */
2550     virtio_set_isr(vq->vdev, 0x1);
2551     event_notifier_set(&vq->guest_notifier);
2552 }
2553 
2554 static void virtio_irq(VirtQueue *vq)
2555 {
2556     virtio_set_isr(vq->vdev, 0x1);
2557     virtio_notify_vector(vq->vdev, vq->vector);
2558 }
2559 
2560 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2561 {
2562     WITH_RCU_READ_LOCK_GUARD() {
2563         if (!virtio_should_notify(vdev, vq)) {
2564             return;
2565         }
2566     }
2567 
2568     trace_virtio_notify(vdev, vq);
2569     virtio_irq(vq);
2570 }
2571 
2572 void virtio_notify_config(VirtIODevice *vdev)
2573 {
2574     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2575         return;
2576 
2577     virtio_set_isr(vdev, 0x3);
2578     vdev->generation++;
2579     virtio_notify_vector(vdev, vdev->config_vector);
2580 }
2581 
2582 static bool virtio_device_endian_needed(void *opaque)
2583 {
2584     VirtIODevice *vdev = opaque;
2585 
2586     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2587     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2588         return vdev->device_endian != virtio_default_endian();
2589     }
2590     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2591     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2592 }
2593 
2594 static bool virtio_64bit_features_needed(void *opaque)
2595 {
2596     VirtIODevice *vdev = opaque;
2597 
2598     return (vdev->host_features >> 32) != 0;
2599 }
2600 
2601 static bool virtio_virtqueue_needed(void *opaque)
2602 {
2603     VirtIODevice *vdev = opaque;
2604 
2605     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2606 }
2607 
2608 static bool virtio_packed_virtqueue_needed(void *opaque)
2609 {
2610     VirtIODevice *vdev = opaque;
2611 
2612     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2613 }
2614 
2615 static bool virtio_ringsize_needed(void *opaque)
2616 {
2617     VirtIODevice *vdev = opaque;
2618     int i;
2619 
2620     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2621         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2622             return true;
2623         }
2624     }
2625     return false;
2626 }
2627 
2628 static bool virtio_extra_state_needed(void *opaque)
2629 {
2630     VirtIODevice *vdev = opaque;
2631     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2632     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2633 
2634     return k->has_extra_state &&
2635         k->has_extra_state(qbus->parent);
2636 }
2637 
2638 static bool virtio_broken_needed(void *opaque)
2639 {
2640     VirtIODevice *vdev = opaque;
2641 
2642     return vdev->broken;
2643 }
2644 
2645 static bool virtio_started_needed(void *opaque)
2646 {
2647     VirtIODevice *vdev = opaque;
2648 
2649     return vdev->started;
2650 }
2651 
2652 static bool virtio_disabled_needed(void *opaque)
2653 {
2654     VirtIODevice *vdev = opaque;
2655 
2656     return vdev->disabled;
2657 }
2658 
2659 static const VMStateDescription vmstate_virtqueue = {
2660     .name = "virtqueue_state",
2661     .version_id = 1,
2662     .minimum_version_id = 1,
2663     .fields = (VMStateField[]) {
2664         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2665         VMSTATE_UINT64(vring.used, struct VirtQueue),
2666         VMSTATE_END_OF_LIST()
2667     }
2668 };
2669 
2670 static const VMStateDescription vmstate_packed_virtqueue = {
2671     .name = "packed_virtqueue_state",
2672     .version_id = 1,
2673     .minimum_version_id = 1,
2674     .fields = (VMStateField[]) {
2675         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2676         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2677         VMSTATE_UINT16(used_idx, struct VirtQueue),
2678         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2679         VMSTATE_UINT32(inuse, struct VirtQueue),
2680         VMSTATE_END_OF_LIST()
2681     }
2682 };
2683 
2684 static const VMStateDescription vmstate_virtio_virtqueues = {
2685     .name = "virtio/virtqueues",
2686     .version_id = 1,
2687     .minimum_version_id = 1,
2688     .needed = &virtio_virtqueue_needed,
2689     .fields = (VMStateField[]) {
2690         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2691                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2692         VMSTATE_END_OF_LIST()
2693     }
2694 };
2695 
2696 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2697     .name = "virtio/packed_virtqueues",
2698     .version_id = 1,
2699     .minimum_version_id = 1,
2700     .needed = &virtio_packed_virtqueue_needed,
2701     .fields = (VMStateField[]) {
2702         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2703                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2704         VMSTATE_END_OF_LIST()
2705     }
2706 };
2707 
2708 static const VMStateDescription vmstate_ringsize = {
2709     .name = "ringsize_state",
2710     .version_id = 1,
2711     .minimum_version_id = 1,
2712     .fields = (VMStateField[]) {
2713         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2714         VMSTATE_END_OF_LIST()
2715     }
2716 };
2717 
2718 static const VMStateDescription vmstate_virtio_ringsize = {
2719     .name = "virtio/ringsize",
2720     .version_id = 1,
2721     .minimum_version_id = 1,
2722     .needed = &virtio_ringsize_needed,
2723     .fields = (VMStateField[]) {
2724         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2725                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2726         VMSTATE_END_OF_LIST()
2727     }
2728 };
2729 
2730 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2731                            const VMStateField *field)
2732 {
2733     VirtIODevice *vdev = pv;
2734     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2735     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2736 
2737     if (!k->load_extra_state) {
2738         return -1;
2739     } else {
2740         return k->load_extra_state(qbus->parent, f);
2741     }
2742 }
2743 
2744 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2745                            const VMStateField *field, QJSON *vmdesc)
2746 {
2747     VirtIODevice *vdev = pv;
2748     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2749     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2750 
2751     k->save_extra_state(qbus->parent, f);
2752     return 0;
2753 }
2754 
2755 static const VMStateInfo vmstate_info_extra_state = {
2756     .name = "virtqueue_extra_state",
2757     .get = get_extra_state,
2758     .put = put_extra_state,
2759 };
2760 
2761 static const VMStateDescription vmstate_virtio_extra_state = {
2762     .name = "virtio/extra_state",
2763     .version_id = 1,
2764     .minimum_version_id = 1,
2765     .needed = &virtio_extra_state_needed,
2766     .fields = (VMStateField[]) {
2767         {
2768             .name         = "extra_state",
2769             .version_id   = 0,
2770             .field_exists = NULL,
2771             .size         = 0,
2772             .info         = &vmstate_info_extra_state,
2773             .flags        = VMS_SINGLE,
2774             .offset       = 0,
2775         },
2776         VMSTATE_END_OF_LIST()
2777     }
2778 };
2779 
2780 static const VMStateDescription vmstate_virtio_device_endian = {
2781     .name = "virtio/device_endian",
2782     .version_id = 1,
2783     .minimum_version_id = 1,
2784     .needed = &virtio_device_endian_needed,
2785     .fields = (VMStateField[]) {
2786         VMSTATE_UINT8(device_endian, VirtIODevice),
2787         VMSTATE_END_OF_LIST()
2788     }
2789 };
2790 
2791 static const VMStateDescription vmstate_virtio_64bit_features = {
2792     .name = "virtio/64bit_features",
2793     .version_id = 1,
2794     .minimum_version_id = 1,
2795     .needed = &virtio_64bit_features_needed,
2796     .fields = (VMStateField[]) {
2797         VMSTATE_UINT64(guest_features, VirtIODevice),
2798         VMSTATE_END_OF_LIST()
2799     }
2800 };
2801 
2802 static const VMStateDescription vmstate_virtio_broken = {
2803     .name = "virtio/broken",
2804     .version_id = 1,
2805     .minimum_version_id = 1,
2806     .needed = &virtio_broken_needed,
2807     .fields = (VMStateField[]) {
2808         VMSTATE_BOOL(broken, VirtIODevice),
2809         VMSTATE_END_OF_LIST()
2810     }
2811 };
2812 
2813 static const VMStateDescription vmstate_virtio_started = {
2814     .name = "virtio/started",
2815     .version_id = 1,
2816     .minimum_version_id = 1,
2817     .needed = &virtio_started_needed,
2818     .fields = (VMStateField[]) {
2819         VMSTATE_BOOL(started, VirtIODevice),
2820         VMSTATE_END_OF_LIST()
2821     }
2822 };
2823 
2824 static const VMStateDescription vmstate_virtio_disabled = {
2825     .name = "virtio/disabled",
2826     .version_id = 1,
2827     .minimum_version_id = 1,
2828     .needed = &virtio_disabled_needed,
2829     .fields = (VMStateField[]) {
2830         VMSTATE_BOOL(disabled, VirtIODevice),
2831         VMSTATE_END_OF_LIST()
2832     }
2833 };
2834 
2835 static const VMStateDescription vmstate_virtio = {
2836     .name = "virtio",
2837     .version_id = 1,
2838     .minimum_version_id = 1,
2839     .minimum_version_id_old = 1,
2840     .fields = (VMStateField[]) {
2841         VMSTATE_END_OF_LIST()
2842     },
2843     .subsections = (const VMStateDescription*[]) {
2844         &vmstate_virtio_device_endian,
2845         &vmstate_virtio_64bit_features,
2846         &vmstate_virtio_virtqueues,
2847         &vmstate_virtio_ringsize,
2848         &vmstate_virtio_broken,
2849         &vmstate_virtio_extra_state,
2850         &vmstate_virtio_started,
2851         &vmstate_virtio_packed_virtqueues,
2852         &vmstate_virtio_disabled,
2853         NULL
2854     }
2855 };
2856 
2857 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2858 {
2859     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2860     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2861     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2862     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2863     int i;
2864 
2865     if (k->save_config) {
2866         k->save_config(qbus->parent, f);
2867     }
2868 
2869     qemu_put_8s(f, &vdev->status);
2870     qemu_put_8s(f, &vdev->isr);
2871     qemu_put_be16s(f, &vdev->queue_sel);
2872     qemu_put_be32s(f, &guest_features_lo);
2873     qemu_put_be32(f, vdev->config_len);
2874     qemu_put_buffer(f, vdev->config, vdev->config_len);
2875 
2876     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2877         if (vdev->vq[i].vring.num == 0)
2878             break;
2879     }
2880 
2881     qemu_put_be32(f, i);
2882 
2883     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2884         if (vdev->vq[i].vring.num == 0)
2885             break;
2886 
2887         qemu_put_be32(f, vdev->vq[i].vring.num);
2888         if (k->has_variable_vring_alignment) {
2889             qemu_put_be32(f, vdev->vq[i].vring.align);
2890         }
2891         /*
2892          * Save desc now, the rest of the ring addresses are saved in
2893          * subsections for VIRTIO-1 devices.
2894          */
2895         qemu_put_be64(f, vdev->vq[i].vring.desc);
2896         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2897         if (k->save_queue) {
2898             k->save_queue(qbus->parent, i, f);
2899         }
2900     }
2901 
2902     if (vdc->save != NULL) {
2903         vdc->save(vdev, f);
2904     }
2905 
2906     if (vdc->vmsd) {
2907         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2908         if (ret) {
2909             return ret;
2910         }
2911     }
2912 
2913     /* Subsections */
2914     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2915 }
2916 
2917 /* A wrapper for use as a VMState .put function */
2918 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2919                               const VMStateField *field, QJSON *vmdesc)
2920 {
2921     return virtio_save(VIRTIO_DEVICE(opaque), f);
2922 }
2923 
2924 /* A wrapper for use as a VMState .get function */
2925 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2926                              const VMStateField *field)
2927 {
2928     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2929     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2930 
2931     return virtio_load(vdev, f, dc->vmsd->version_id);
2932 }
2933 
2934 const VMStateInfo  virtio_vmstate_info = {
2935     .name = "virtio",
2936     .get = virtio_device_get,
2937     .put = virtio_device_put,
2938 };
2939 
2940 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2941 {
2942     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2943     bool bad = (val & ~(vdev->host_features)) != 0;
2944 
2945     val &= vdev->host_features;
2946     if (k->set_features) {
2947         k->set_features(vdev, val);
2948     }
2949     vdev->guest_features = val;
2950     return bad ? -1 : 0;
2951 }
2952 
2953 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2954 {
2955     int ret;
2956     /*
2957      * The driver must not attempt to set features after feature negotiation
2958      * has finished.
2959      */
2960     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2961         return -EINVAL;
2962     }
2963     ret = virtio_set_features_nocheck(vdev, val);
2964     if (!ret) {
2965         if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2966             /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2967             int i;
2968             for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2969                 if (vdev->vq[i].vring.num != 0) {
2970                     virtio_init_region_cache(vdev, i);
2971                 }
2972             }
2973         }
2974 
2975         if (!virtio_device_started(vdev, vdev->status) &&
2976             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2977             vdev->start_on_kick = true;
2978         }
2979     }
2980     return ret;
2981 }
2982 
2983 size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes,
2984                                       uint64_t host_features)
2985 {
2986     size_t config_size = 0;
2987     int i;
2988 
2989     for (i = 0; feature_sizes[i].flags != 0; i++) {
2990         if (host_features & feature_sizes[i].flags) {
2991             config_size = MAX(feature_sizes[i].end, config_size);
2992         }
2993     }
2994 
2995     return config_size;
2996 }
2997 
2998 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2999 {
3000     int i, ret;
3001     int32_t config_len;
3002     uint32_t num;
3003     uint32_t features;
3004     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3005     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3006     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3007 
3008     /*
3009      * We poison the endianness to ensure it does not get used before
3010      * subsections have been loaded.
3011      */
3012     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3013 
3014     if (k->load_config) {
3015         ret = k->load_config(qbus->parent, f);
3016         if (ret)
3017             return ret;
3018     }
3019 
3020     qemu_get_8s(f, &vdev->status);
3021     qemu_get_8s(f, &vdev->isr);
3022     qemu_get_be16s(f, &vdev->queue_sel);
3023     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3024         return -1;
3025     }
3026     qemu_get_be32s(f, &features);
3027 
3028     /*
3029      * Temporarily set guest_features low bits - needed by
3030      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3031      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3032      *
3033      * Note: devices should always test host features in future - don't create
3034      * new dependencies like this.
3035      */
3036     vdev->guest_features = features;
3037 
3038     config_len = qemu_get_be32(f);
3039 
3040     /*
3041      * There are cases where the incoming config can be bigger or smaller
3042      * than what we have; so load what we have space for, and skip
3043      * any excess that's in the stream.
3044      */
3045     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3046 
3047     while (config_len > vdev->config_len) {
3048         qemu_get_byte(f);
3049         config_len--;
3050     }
3051 
3052     num = qemu_get_be32(f);
3053 
3054     if (num > VIRTIO_QUEUE_MAX) {
3055         error_report("Invalid number of virtqueues: 0x%x", num);
3056         return -1;
3057     }
3058 
3059     for (i = 0; i < num; i++) {
3060         vdev->vq[i].vring.num = qemu_get_be32(f);
3061         if (k->has_variable_vring_alignment) {
3062             vdev->vq[i].vring.align = qemu_get_be32(f);
3063         }
3064         vdev->vq[i].vring.desc = qemu_get_be64(f);
3065         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3066         vdev->vq[i].signalled_used_valid = false;
3067         vdev->vq[i].notification = true;
3068 
3069         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3070             error_report("VQ %d address 0x0 "
3071                          "inconsistent with Host index 0x%x",
3072                          i, vdev->vq[i].last_avail_idx);
3073             return -1;
3074         }
3075         if (k->load_queue) {
3076             ret = k->load_queue(qbus->parent, i, f);
3077             if (ret)
3078                 return ret;
3079         }
3080     }
3081 
3082     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3083 
3084     if (vdc->load != NULL) {
3085         ret = vdc->load(vdev, f, version_id);
3086         if (ret) {
3087             return ret;
3088         }
3089     }
3090 
3091     if (vdc->vmsd) {
3092         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3093         if (ret) {
3094             return ret;
3095         }
3096     }
3097 
3098     /* Subsections */
3099     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3100     if (ret) {
3101         return ret;
3102     }
3103 
3104     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3105         vdev->device_endian = virtio_default_endian();
3106     }
3107 
3108     if (virtio_64bit_features_needed(vdev)) {
3109         /*
3110          * Subsection load filled vdev->guest_features.  Run them
3111          * through virtio_set_features to sanity-check them against
3112          * host_features.
3113          */
3114         uint64_t features64 = vdev->guest_features;
3115         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3116             error_report("Features 0x%" PRIx64 " unsupported. "
3117                          "Allowed features: 0x%" PRIx64,
3118                          features64, vdev->host_features);
3119             return -1;
3120         }
3121     } else {
3122         if (virtio_set_features_nocheck(vdev, features) < 0) {
3123             error_report("Features 0x%x unsupported. "
3124                          "Allowed features: 0x%" PRIx64,
3125                          features, vdev->host_features);
3126             return -1;
3127         }
3128     }
3129 
3130     if (!virtio_device_started(vdev, vdev->status) &&
3131         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3132         vdev->start_on_kick = true;
3133     }
3134 
3135     RCU_READ_LOCK_GUARD();
3136     for (i = 0; i < num; i++) {
3137         if (vdev->vq[i].vring.desc) {
3138             uint16_t nheads;
3139 
3140             /*
3141              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3142              * only the region cache needs to be set up.  Legacy devices need
3143              * to calculate used and avail ring addresses based on the desc
3144              * address.
3145              */
3146             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3147                 virtio_init_region_cache(vdev, i);
3148             } else {
3149                 virtio_queue_update_rings(vdev, i);
3150             }
3151 
3152             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3153                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3154                 vdev->vq[i].shadow_avail_wrap_counter =
3155                                         vdev->vq[i].last_avail_wrap_counter;
3156                 continue;
3157             }
3158 
3159             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3160             /* Check it isn't doing strange things with descriptor numbers. */
3161             if (nheads > vdev->vq[i].vring.num) {
3162                 error_report("VQ %d size 0x%x Guest index 0x%x "
3163                              "inconsistent with Host index 0x%x: delta 0x%x",
3164                              i, vdev->vq[i].vring.num,
3165                              vring_avail_idx(&vdev->vq[i]),
3166                              vdev->vq[i].last_avail_idx, nheads);
3167                 return -1;
3168             }
3169             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3170             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3171 
3172             /*
3173              * Some devices migrate VirtQueueElements that have been popped
3174              * from the avail ring but not yet returned to the used ring.
3175              * Since max ring size < UINT16_MAX it's safe to use modulo
3176              * UINT16_MAX + 1 subtraction.
3177              */
3178             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3179                                 vdev->vq[i].used_idx);
3180             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3181                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3182                              "used_idx 0x%x",
3183                              i, vdev->vq[i].vring.num,
3184                              vdev->vq[i].last_avail_idx,
3185                              vdev->vq[i].used_idx);
3186                 return -1;
3187             }
3188         }
3189     }
3190 
3191     if (vdc->post_load) {
3192         ret = vdc->post_load(vdev);
3193         if (ret) {
3194             return ret;
3195         }
3196     }
3197 
3198     return 0;
3199 }
3200 
3201 void virtio_cleanup(VirtIODevice *vdev)
3202 {
3203     qemu_del_vm_change_state_handler(vdev->vmstate);
3204 }
3205 
3206 static void virtio_vmstate_change(void *opaque, int running, RunState state)
3207 {
3208     VirtIODevice *vdev = opaque;
3209     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3210     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3211     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3212     vdev->vm_running = running;
3213 
3214     if (backend_run) {
3215         virtio_set_status(vdev, vdev->status);
3216     }
3217 
3218     if (k->vmstate_change) {
3219         k->vmstate_change(qbus->parent, backend_run);
3220     }
3221 
3222     if (!backend_run) {
3223         virtio_set_status(vdev, vdev->status);
3224     }
3225 }
3226 
3227 void virtio_instance_init_common(Object *proxy_obj, void *data,
3228                                  size_t vdev_size, const char *vdev_name)
3229 {
3230     DeviceState *vdev = data;
3231 
3232     object_initialize_child(proxy_obj, "virtio-backend", vdev, vdev_size,
3233                             vdev_name, &error_abort, NULL);
3234     qdev_alias_all_properties(vdev, proxy_obj);
3235 }
3236 
3237 void virtio_init(VirtIODevice *vdev, const char *name,
3238                  uint16_t device_id, size_t config_size)
3239 {
3240     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3241     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3242     int i;
3243     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3244 
3245     if (nvectors) {
3246         vdev->vector_queues =
3247             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3248     }
3249 
3250     vdev->start_on_kick = false;
3251     vdev->started = false;
3252     vdev->device_id = device_id;
3253     vdev->status = 0;
3254     atomic_set(&vdev->isr, 0);
3255     vdev->queue_sel = 0;
3256     vdev->config_vector = VIRTIO_NO_VECTOR;
3257     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3258     vdev->vm_running = runstate_is_running();
3259     vdev->broken = false;
3260     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3261         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3262         vdev->vq[i].vdev = vdev;
3263         vdev->vq[i].queue_index = i;
3264         vdev->vq[i].host_notifier_enabled = false;
3265     }
3266 
3267     vdev->name = name;
3268     vdev->config_len = config_size;
3269     if (vdev->config_len) {
3270         vdev->config = g_malloc0(config_size);
3271     } else {
3272         vdev->config = NULL;
3273     }
3274     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3275             virtio_vmstate_change, vdev);
3276     vdev->device_endian = virtio_default_endian();
3277     vdev->use_guest_notifier_mask = true;
3278 }
3279 
3280 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3281 {
3282     return vdev->vq[n].vring.desc;
3283 }
3284 
3285 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3286 {
3287     return virtio_queue_get_desc_addr(vdev, n) != 0;
3288 }
3289 
3290 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3291 {
3292     return vdev->vq[n].vring.avail;
3293 }
3294 
3295 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3296 {
3297     return vdev->vq[n].vring.used;
3298 }
3299 
3300 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3301 {
3302     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3303 }
3304 
3305 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3306 {
3307     int s;
3308 
3309     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3310         return sizeof(struct VRingPackedDescEvent);
3311     }
3312 
3313     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3314     return offsetof(VRingAvail, ring) +
3315         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3316 }
3317 
3318 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3319 {
3320     int s;
3321 
3322     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3323         return sizeof(struct VRingPackedDescEvent);
3324     }
3325 
3326     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3327     return offsetof(VRingUsed, ring) +
3328         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3329 }
3330 
3331 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3332                                                            int n)
3333 {
3334     unsigned int avail, used;
3335 
3336     avail = vdev->vq[n].last_avail_idx;
3337     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3338 
3339     used = vdev->vq[n].used_idx;
3340     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3341 
3342     return avail | used << 16;
3343 }
3344 
3345 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3346                                                       int n)
3347 {
3348     return vdev->vq[n].last_avail_idx;
3349 }
3350 
3351 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3352 {
3353     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3354         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3355     } else {
3356         return virtio_queue_split_get_last_avail_idx(vdev, n);
3357     }
3358 }
3359 
3360 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3361                                                    int n, unsigned int idx)
3362 {
3363     struct VirtQueue *vq = &vdev->vq[n];
3364 
3365     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3366     vq->last_avail_wrap_counter =
3367         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3368     idx >>= 16;
3369     vq->used_idx = idx & 0x7ffff;
3370     vq->used_wrap_counter = !!(idx & 0x8000);
3371 }
3372 
3373 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3374                                                   int n, unsigned int idx)
3375 {
3376         vdev->vq[n].last_avail_idx = idx;
3377         vdev->vq[n].shadow_avail_idx = idx;
3378 }
3379 
3380 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3381                                      unsigned int idx)
3382 {
3383     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3384         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3385     } else {
3386         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3387     }
3388 }
3389 
3390 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3391                                                        int n)
3392 {
3393     /* We don't have a reference like avail idx in shared memory */
3394     return;
3395 }
3396 
3397 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3398                                                       int n)
3399 {
3400     RCU_READ_LOCK_GUARD();
3401     if (vdev->vq[n].vring.desc) {
3402         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3403         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3404     }
3405 }
3406 
3407 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3408 {
3409     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3410         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3411     } else {
3412         virtio_queue_split_restore_last_avail_idx(vdev, n);
3413     }
3414 }
3415 
3416 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3417 {
3418     /* used idx was updated through set_last_avail_idx() */
3419     return;
3420 }
3421 
3422 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3423 {
3424     RCU_READ_LOCK_GUARD();
3425     if (vdev->vq[n].vring.desc) {
3426         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3427     }
3428 }
3429 
3430 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3431 {
3432     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3433         return virtio_queue_packed_update_used_idx(vdev, n);
3434     } else {
3435         return virtio_split_packed_update_used_idx(vdev, n);
3436     }
3437 }
3438 
3439 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3440 {
3441     vdev->vq[n].signalled_used_valid = false;
3442 }
3443 
3444 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3445 {
3446     return vdev->vq + n;
3447 }
3448 
3449 uint16_t virtio_get_queue_index(VirtQueue *vq)
3450 {
3451     return vq->queue_index;
3452 }
3453 
3454 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3455 {
3456     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3457     if (event_notifier_test_and_clear(n)) {
3458         virtio_irq(vq);
3459     }
3460 }
3461 
3462 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3463                                                 bool with_irqfd)
3464 {
3465     if (assign && !with_irqfd) {
3466         event_notifier_set_handler(&vq->guest_notifier,
3467                                    virtio_queue_guest_notifier_read);
3468     } else {
3469         event_notifier_set_handler(&vq->guest_notifier, NULL);
3470     }
3471     if (!assign) {
3472         /* Test and clear notifier before closing it,
3473          * in case poll callback didn't have time to run. */
3474         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3475     }
3476 }
3477 
3478 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3479 {
3480     return &vq->guest_notifier;
3481 }
3482 
3483 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3484 {
3485     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3486     if (event_notifier_test_and_clear(n)) {
3487         virtio_queue_notify_aio_vq(vq);
3488     }
3489 }
3490 
3491 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3492 {
3493     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3494 
3495     virtio_queue_set_notification(vq, 0);
3496 }
3497 
3498 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3499 {
3500     EventNotifier *n = opaque;
3501     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3502 
3503     if (!vq->vring.desc || virtio_queue_empty(vq)) {
3504         return false;
3505     }
3506 
3507     return virtio_queue_notify_aio_vq(vq);
3508 }
3509 
3510 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3511 {
3512     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3513 
3514     /* Caller polls once more after this to catch requests that race with us */
3515     virtio_queue_set_notification(vq, 1);
3516 }
3517 
3518 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3519                                                 VirtIOHandleAIOOutput handle_output)
3520 {
3521     if (handle_output) {
3522         vq->handle_aio_output = handle_output;
3523         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3524                                virtio_queue_host_notifier_aio_read,
3525                                virtio_queue_host_notifier_aio_poll);
3526         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3527                                     virtio_queue_host_notifier_aio_poll_begin,
3528                                     virtio_queue_host_notifier_aio_poll_end);
3529     } else {
3530         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3531         /* Test and clear notifier before after disabling event,
3532          * in case poll callback didn't have time to run. */
3533         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3534         vq->handle_aio_output = NULL;
3535     }
3536 }
3537 
3538 void virtio_queue_host_notifier_read(EventNotifier *n)
3539 {
3540     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3541     if (event_notifier_test_and_clear(n)) {
3542         virtio_queue_notify_vq(vq);
3543     }
3544 }
3545 
3546 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3547 {
3548     return &vq->host_notifier;
3549 }
3550 
3551 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3552 {
3553     vq->host_notifier_enabled = enabled;
3554 }
3555 
3556 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3557                                       MemoryRegion *mr, bool assign)
3558 {
3559     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3560     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3561 
3562     if (k->set_host_notifier_mr) {
3563         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3564     }
3565 
3566     return -1;
3567 }
3568 
3569 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3570 {
3571     g_free(vdev->bus_name);
3572     vdev->bus_name = g_strdup(bus_name);
3573 }
3574 
3575 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3576 {
3577     va_list ap;
3578 
3579     va_start(ap, fmt);
3580     error_vreport(fmt, ap);
3581     va_end(ap);
3582 
3583     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3584         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3585         virtio_notify_config(vdev);
3586     }
3587 
3588     vdev->broken = true;
3589 }
3590 
3591 static void virtio_memory_listener_commit(MemoryListener *listener)
3592 {
3593     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3594     int i;
3595 
3596     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3597         if (vdev->vq[i].vring.num == 0) {
3598             break;
3599         }
3600         virtio_init_region_cache(vdev, i);
3601     }
3602 }
3603 
3604 static void virtio_device_realize(DeviceState *dev, Error **errp)
3605 {
3606     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3607     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3608     Error *err = NULL;
3609 
3610     /* Devices should either use vmsd or the load/save methods */
3611     assert(!vdc->vmsd || !vdc->load);
3612 
3613     if (vdc->realize != NULL) {
3614         vdc->realize(dev, &err);
3615         if (err != NULL) {
3616             error_propagate(errp, err);
3617             return;
3618         }
3619     }
3620 
3621     virtio_bus_device_plugged(vdev, &err);
3622     if (err != NULL) {
3623         error_propagate(errp, err);
3624         vdc->unrealize(dev, NULL);
3625         return;
3626     }
3627 
3628     vdev->listener.commit = virtio_memory_listener_commit;
3629     memory_listener_register(&vdev->listener, vdev->dma_as);
3630 }
3631 
3632 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
3633 {
3634     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3635     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3636     Error *err = NULL;
3637 
3638     virtio_bus_device_unplugged(vdev);
3639 
3640     if (vdc->unrealize != NULL) {
3641         vdc->unrealize(dev, &err);
3642         if (err != NULL) {
3643             error_propagate(errp, err);
3644             return;
3645         }
3646     }
3647 
3648     g_free(vdev->bus_name);
3649     vdev->bus_name = NULL;
3650 }
3651 
3652 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3653 {
3654     int i;
3655     if (!vdev->vq) {
3656         return;
3657     }
3658 
3659     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3660         if (vdev->vq[i].vring.num == 0) {
3661             break;
3662         }
3663         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3664     }
3665     g_free(vdev->vq);
3666 }
3667 
3668 static void virtio_device_instance_finalize(Object *obj)
3669 {
3670     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3671 
3672     memory_listener_unregister(&vdev->listener);
3673     virtio_device_free_virtqueues(vdev);
3674 
3675     g_free(vdev->config);
3676     g_free(vdev->vector_queues);
3677 }
3678 
3679 static Property virtio_properties[] = {
3680     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3681     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3682     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3683     DEFINE_PROP_END_OF_LIST(),
3684 };
3685 
3686 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3687 {
3688     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3689     int i, n, r, err;
3690 
3691     memory_region_transaction_begin();
3692     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3693         VirtQueue *vq = &vdev->vq[n];
3694         if (!virtio_queue_get_num(vdev, n)) {
3695             continue;
3696         }
3697         r = virtio_bus_set_host_notifier(qbus, n, true);
3698         if (r < 0) {
3699             err = r;
3700             goto assign_error;
3701         }
3702         event_notifier_set_handler(&vq->host_notifier,
3703                                    virtio_queue_host_notifier_read);
3704     }
3705 
3706     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3707         /* Kick right away to begin processing requests already in vring */
3708         VirtQueue *vq = &vdev->vq[n];
3709         if (!vq->vring.num) {
3710             continue;
3711         }
3712         event_notifier_set(&vq->host_notifier);
3713     }
3714     memory_region_transaction_commit();
3715     return 0;
3716 
3717 assign_error:
3718     i = n; /* save n for a second iteration after transaction is committed. */
3719     while (--n >= 0) {
3720         VirtQueue *vq = &vdev->vq[n];
3721         if (!virtio_queue_get_num(vdev, n)) {
3722             continue;
3723         }
3724 
3725         event_notifier_set_handler(&vq->host_notifier, NULL);
3726         r = virtio_bus_set_host_notifier(qbus, n, false);
3727         assert(r >= 0);
3728     }
3729     memory_region_transaction_commit();
3730 
3731     while (--i >= 0) {
3732         if (!virtio_queue_get_num(vdev, i)) {
3733             continue;
3734         }
3735         virtio_bus_cleanup_host_notifier(qbus, i);
3736     }
3737     return err;
3738 }
3739 
3740 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3741 {
3742     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3743     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3744 
3745     return virtio_bus_start_ioeventfd(vbus);
3746 }
3747 
3748 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3749 {
3750     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3751     int n, r;
3752 
3753     memory_region_transaction_begin();
3754     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3755         VirtQueue *vq = &vdev->vq[n];
3756 
3757         if (!virtio_queue_get_num(vdev, n)) {
3758             continue;
3759         }
3760         event_notifier_set_handler(&vq->host_notifier, NULL);
3761         r = virtio_bus_set_host_notifier(qbus, n, false);
3762         assert(r >= 0);
3763     }
3764     memory_region_transaction_commit();
3765 
3766     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3767         if (!virtio_queue_get_num(vdev, n)) {
3768             continue;
3769         }
3770         virtio_bus_cleanup_host_notifier(qbus, n);
3771     }
3772 }
3773 
3774 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3775 {
3776     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3777     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3778 
3779     return virtio_bus_grab_ioeventfd(vbus);
3780 }
3781 
3782 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3783 {
3784     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3785     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3786 
3787     virtio_bus_release_ioeventfd(vbus);
3788 }
3789 
3790 static void virtio_device_class_init(ObjectClass *klass, void *data)
3791 {
3792     /* Set the default value here. */
3793     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3794     DeviceClass *dc = DEVICE_CLASS(klass);
3795 
3796     dc->realize = virtio_device_realize;
3797     dc->unrealize = virtio_device_unrealize;
3798     dc->bus_type = TYPE_VIRTIO_BUS;
3799     device_class_set_props(dc, virtio_properties);
3800     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3801     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3802 
3803     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3804 }
3805 
3806 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3807 {
3808     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3809     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3810 
3811     return virtio_bus_ioeventfd_enabled(vbus);
3812 }
3813 
3814 static const TypeInfo virtio_device_info = {
3815     .name = TYPE_VIRTIO_DEVICE,
3816     .parent = TYPE_DEVICE,
3817     .instance_size = sizeof(VirtIODevice),
3818     .class_init = virtio_device_class_init,
3819     .instance_finalize = virtio_device_instance_finalize,
3820     .abstract = true,
3821     .class_size = sizeof(VirtioDeviceClass),
3822 };
3823 
3824 static void virtio_register_types(void)
3825 {
3826     type_register_static(&virtio_device_info);
3827 }
3828 
3829 type_init(virtio_register_types)
3830