xref: /qemu/hw/virtio/virtio.c (revision 6322b753f798337835e205b6d805356bea582c86)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qapi/qapi-commands-virtio.h"
17 #include "trace.h"
18 #include "qemu/defer-call.h"
19 #include "qemu/error-report.h"
20 #include "qemu/log.h"
21 #include "qemu/main-loop.h"
22 #include "qemu/module.h"
23 #include "exec/tswap.h"
24 #include "qom/object_interfaces.h"
25 #include "hw/core/cpu.h"
26 #include "hw/virtio/virtio.h"
27 #include "hw/virtio/vhost.h"
28 #include "migration/qemu-file-types.h"
29 #include "qemu/atomic.h"
30 #include "hw/virtio/virtio-bus.h"
31 #include "hw/qdev-properties.h"
32 #include "hw/virtio/virtio-access.h"
33 #include "system/dma.h"
34 #include "system/runstate.h"
35 #include "virtio-qmp.h"
36 
37 #include "standard-headers/linux/virtio_ids.h"
38 #include "standard-headers/linux/vhost_types.h"
39 #include "standard-headers/linux/virtio_blk.h"
40 #include "standard-headers/linux/virtio_console.h"
41 #include "standard-headers/linux/virtio_gpu.h"
42 #include "standard-headers/linux/virtio_net.h"
43 #include "standard-headers/linux/virtio_scsi.h"
44 #include "standard-headers/linux/virtio_i2c.h"
45 #include "standard-headers/linux/virtio_balloon.h"
46 #include "standard-headers/linux/virtio_iommu.h"
47 #include "standard-headers/linux/virtio_mem.h"
48 #include "standard-headers/linux/virtio_vsock.h"
49 
50 /*
51  * Maximum size of virtio device config space
52  */
53 #define VHOST_USER_MAX_CONFIG_SIZE 256
54 
55 /*
56  * The alignment to use between consumer and producer parts of vring.
57  * x86 pagesize again. This is the default, used by transports like PCI
58  * which don't provide a means for the guest to tell the host the alignment.
59  */
60 #define VIRTIO_PCI_VRING_ALIGN         4096
61 
62 typedef struct VRingDesc
63 {
64     uint64_t addr;
65     uint32_t len;
66     uint16_t flags;
67     uint16_t next;
68 } VRingDesc;
69 
70 typedef struct VRingPackedDesc {
71     uint64_t addr;
72     uint32_t len;
73     uint16_t id;
74     uint16_t flags;
75 } VRingPackedDesc;
76 
77 typedef struct VRingAvail
78 {
79     uint16_t flags;
80     uint16_t idx;
81     uint16_t ring[];
82 } VRingAvail;
83 
84 typedef struct VRingUsedElem
85 {
86     uint32_t id;
87     uint32_t len;
88 } VRingUsedElem;
89 
90 typedef struct VRingUsed
91 {
92     uint16_t flags;
93     uint16_t idx;
94     VRingUsedElem ring[];
95 } VRingUsed;
96 
97 typedef struct VRingMemoryRegionCaches {
98     struct rcu_head rcu;
99     MemoryRegionCache desc;
100     MemoryRegionCache avail;
101     MemoryRegionCache used;
102 } VRingMemoryRegionCaches;
103 
104 typedef struct VRing
105 {
106     unsigned int num;
107     unsigned int num_default;
108     unsigned int align;
109     hwaddr desc;
110     hwaddr avail;
111     hwaddr used;
112     VRingMemoryRegionCaches *caches;
113 } VRing;
114 
115 typedef struct VRingPackedDescEvent {
116     uint16_t off_wrap;
117     uint16_t flags;
118 } VRingPackedDescEvent ;
119 
120 struct VirtQueue
121 {
122     VRing vring;
123     VirtQueueElement *used_elems;
124 
125     /* Next head to pop */
126     uint16_t last_avail_idx;
127     bool last_avail_wrap_counter;
128 
129     /* Last avail_idx read from VQ. */
130     uint16_t shadow_avail_idx;
131     bool shadow_avail_wrap_counter;
132 
133     uint16_t used_idx;
134     bool used_wrap_counter;
135 
136     /* Last used index value we have signalled on */
137     uint16_t signalled_used;
138 
139     /* Last used index value we have signalled on */
140     bool signalled_used_valid;
141 
142     /* Notification enabled? */
143     bool notification;
144 
145     uint16_t queue_index;
146 
147     unsigned int inuse;
148 
149     uint16_t vector;
150     VirtIOHandleOutput handle_output;
151     VirtIODevice *vdev;
152     EventNotifier guest_notifier;
153     EventNotifier host_notifier;
154     bool host_notifier_enabled;
155     QLIST_ENTRY(VirtQueue) node;
156 };
157 
158 const char *virtio_device_names[] = {
159     [VIRTIO_ID_NET] = "virtio-net",
160     [VIRTIO_ID_BLOCK] = "virtio-blk",
161     [VIRTIO_ID_CONSOLE] = "virtio-serial",
162     [VIRTIO_ID_RNG] = "virtio-rng",
163     [VIRTIO_ID_BALLOON] = "virtio-balloon",
164     [VIRTIO_ID_IOMEM] = "virtio-iomem",
165     [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
166     [VIRTIO_ID_SCSI] = "virtio-scsi",
167     [VIRTIO_ID_9P] = "virtio-9p",
168     [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
169     [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
170     [VIRTIO_ID_CAIF] = "virtio-caif",
171     [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
172     [VIRTIO_ID_GPU] = "virtio-gpu",
173     [VIRTIO_ID_CLOCK] = "virtio-clk",
174     [VIRTIO_ID_INPUT] = "virtio-input",
175     [VIRTIO_ID_VSOCK] = "vhost-vsock",
176     [VIRTIO_ID_CRYPTO] = "virtio-crypto",
177     [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
178     [VIRTIO_ID_PSTORE] = "virtio-pstore",
179     [VIRTIO_ID_IOMMU] = "virtio-iommu",
180     [VIRTIO_ID_MEM] = "virtio-mem",
181     [VIRTIO_ID_SOUND] = "virtio-sound",
182     [VIRTIO_ID_FS] = "virtio-user-fs",
183     [VIRTIO_ID_PMEM] = "virtio-pmem",
184     [VIRTIO_ID_RPMB] = "virtio-rpmb",
185     [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
186     [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
187     [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
188     [VIRTIO_ID_SCMI] = "virtio-scmi",
189     [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
190     [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
191     [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
192     [VIRTIO_ID_CAN] = "virtio-can",
193     [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
194     [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
195     [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
196     [VIRTIO_ID_BT] = "virtio-bluetooth",
197     [VIRTIO_ID_GPIO] = "virtio-gpio"
198 };
199 
virtio_id_to_name(uint16_t device_id)200 static const char *virtio_id_to_name(uint16_t device_id)
201 {
202     assert(device_id < G_N_ELEMENTS(virtio_device_names));
203     const char *name = virtio_device_names[device_id];
204     assert(name != NULL);
205     return name;
206 }
207 
virtio_check_indirect_feature(VirtIODevice * vdev)208 static void virtio_check_indirect_feature(VirtIODevice *vdev)
209 {
210     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
211         qemu_log_mask(LOG_GUEST_ERROR,
212                       "Device %s: indirect_desc was not negotiated!\n",
213                       vdev->name);
214     }
215 }
216 
217 /* Called within call_rcu().  */
virtio_free_region_cache(VRingMemoryRegionCaches * caches)218 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
219 {
220     assert(caches != NULL);
221     address_space_cache_destroy(&caches->desc);
222     address_space_cache_destroy(&caches->avail);
223     address_space_cache_destroy(&caches->used);
224     g_free(caches);
225 }
226 
virtio_virtqueue_reset_region_cache(struct VirtQueue * vq)227 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
228 {
229     VRingMemoryRegionCaches *caches;
230 
231     caches = qatomic_read(&vq->vring.caches);
232     qatomic_rcu_set(&vq->vring.caches, NULL);
233     if (caches) {
234         call_rcu(caches, virtio_free_region_cache, rcu);
235     }
236 }
237 
virtio_init_region_cache(VirtIODevice * vdev,int n)238 void virtio_init_region_cache(VirtIODevice *vdev, int n)
239 {
240     VirtQueue *vq = &vdev->vq[n];
241     VRingMemoryRegionCaches *old = vq->vring.caches;
242     VRingMemoryRegionCaches *new = NULL;
243     hwaddr addr, size;
244     int64_t len;
245     bool packed;
246 
247 
248     addr = vq->vring.desc;
249     if (!addr) {
250         goto out_no_cache;
251     }
252     new = g_new0(VRingMemoryRegionCaches, 1);
253     size = virtio_queue_get_desc_size(vdev, n);
254     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
255                                    true : false;
256     len = address_space_cache_init(&new->desc, vdev->dma_as,
257                                    addr, size, packed);
258     if (len < size) {
259         virtio_error(vdev, "Cannot map desc");
260         goto err_desc;
261     }
262 
263     size = virtio_queue_get_used_size(vdev, n);
264     len = address_space_cache_init(&new->used, vdev->dma_as,
265                                    vq->vring.used, size, true);
266     if (len < size) {
267         virtio_error(vdev, "Cannot map used");
268         goto err_used;
269     }
270 
271     size = virtio_queue_get_avail_size(vdev, n);
272     len = address_space_cache_init(&new->avail, vdev->dma_as,
273                                    vq->vring.avail, size, false);
274     if (len < size) {
275         virtio_error(vdev, "Cannot map avail");
276         goto err_avail;
277     }
278 
279     qatomic_rcu_set(&vq->vring.caches, new);
280     if (old) {
281         call_rcu(old, virtio_free_region_cache, rcu);
282     }
283     return;
284 
285 err_avail:
286     address_space_cache_destroy(&new->avail);
287 err_used:
288     address_space_cache_destroy(&new->used);
289 err_desc:
290     address_space_cache_destroy(&new->desc);
291 out_no_cache:
292     g_free(new);
293     virtio_virtqueue_reset_region_cache(vq);
294 }
295 
296 /* virt queue functions */
virtio_queue_update_rings(VirtIODevice * vdev,int n)297 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
298 {
299     VRing *vring = &vdev->vq[n].vring;
300 
301     if (!vring->num || !vring->desc || !vring->align) {
302         /* not yet setup -> nothing to do */
303         return;
304     }
305     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
306     vring->used = vring_align(vring->avail +
307                               offsetof(VRingAvail, ring[vring->num]),
308                               vring->align);
309     virtio_init_region_cache(vdev, n);
310 }
311 
312 /* Called within rcu_read_lock().  */
vring_split_desc_read(VirtIODevice * vdev,VRingDesc * desc,MemoryRegionCache * cache,int i)313 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
314                                   MemoryRegionCache *cache, int i)
315 {
316     address_space_read_cached(cache, i * sizeof(VRingDesc),
317                               desc, sizeof(VRingDesc));
318     virtio_tswap64s(vdev, &desc->addr);
319     virtio_tswap32s(vdev, &desc->len);
320     virtio_tswap16s(vdev, &desc->flags);
321     virtio_tswap16s(vdev, &desc->next);
322 }
323 
vring_packed_event_read(VirtIODevice * vdev,MemoryRegionCache * cache,VRingPackedDescEvent * e)324 static void vring_packed_event_read(VirtIODevice *vdev,
325                                     MemoryRegionCache *cache,
326                                     VRingPackedDescEvent *e)
327 {
328     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
329     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
330 
331     e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
332     /* Make sure flags is seen before off_wrap */
333     smp_rmb();
334     e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
335 }
336 
vring_packed_off_wrap_write(VirtIODevice * vdev,MemoryRegionCache * cache,uint16_t off_wrap)337 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
338                                         MemoryRegionCache *cache,
339                                         uint16_t off_wrap)
340 {
341     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
342 
343     virtio_stw_phys_cached(vdev, cache, off, off_wrap);
344     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
345 }
346 
vring_packed_flags_write(VirtIODevice * vdev,MemoryRegionCache * cache,uint16_t flags)347 static void vring_packed_flags_write(VirtIODevice *vdev,
348                                      MemoryRegionCache *cache, uint16_t flags)
349 {
350     hwaddr off = offsetof(VRingPackedDescEvent, flags);
351 
352     virtio_stw_phys_cached(vdev, cache, off, flags);
353     address_space_cache_invalidate(cache, off, sizeof(flags));
354 }
355 
356 /* Called within rcu_read_lock().  */
vring_get_region_caches(struct VirtQueue * vq)357 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
358 {
359     return qatomic_rcu_read(&vq->vring.caches);
360 }
361 
362 /* Called within rcu_read_lock().  */
vring_avail_flags(VirtQueue * vq)363 static inline uint16_t vring_avail_flags(VirtQueue *vq)
364 {
365     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
366     hwaddr pa = offsetof(VRingAvail, flags);
367 
368     if (!caches) {
369         return 0;
370     }
371 
372     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
373 }
374 
375 /* Called within rcu_read_lock().  */
vring_avail_idx(VirtQueue * vq)376 static inline uint16_t vring_avail_idx(VirtQueue *vq)
377 {
378     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
379     hwaddr pa = offsetof(VRingAvail, idx);
380 
381     if (!caches) {
382         return 0;
383     }
384 
385     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
386     return vq->shadow_avail_idx;
387 }
388 
389 /* Called within rcu_read_lock().  */
vring_avail_ring(VirtQueue * vq,int i)390 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
391 {
392     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
393     hwaddr pa = offsetof(VRingAvail, ring[i]);
394 
395     if (!caches) {
396         return 0;
397     }
398 
399     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
400 }
401 
402 /* Called within rcu_read_lock().  */
vring_get_used_event(VirtQueue * vq)403 static inline uint16_t vring_get_used_event(VirtQueue *vq)
404 {
405     return vring_avail_ring(vq, vq->vring.num);
406 }
407 
408 /* Called within rcu_read_lock().  */
vring_used_write(VirtQueue * vq,VRingUsedElem * uelem,int i)409 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
410                                     int i)
411 {
412     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
413     hwaddr pa = offsetof(VRingUsed, ring[i]);
414 
415     if (!caches) {
416         return;
417     }
418 
419     virtio_tswap32s(vq->vdev, &uelem->id);
420     virtio_tswap32s(vq->vdev, &uelem->len);
421     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
422     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
423 }
424 
425 /* Called within rcu_read_lock(). */
vring_used_flags(VirtQueue * vq)426 static inline uint16_t vring_used_flags(VirtQueue *vq)
427 {
428     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
429     hwaddr pa = offsetof(VRingUsed, flags);
430 
431     if (!caches) {
432         return 0;
433     }
434 
435     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
436 }
437 
438 /* Called within rcu_read_lock().  */
vring_used_idx(VirtQueue * vq)439 static uint16_t vring_used_idx(VirtQueue *vq)
440 {
441     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
442     hwaddr pa = offsetof(VRingUsed, idx);
443 
444     if (!caches) {
445         return 0;
446     }
447 
448     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
449 }
450 
451 /* Called within rcu_read_lock().  */
vring_used_idx_set(VirtQueue * vq,uint16_t val)452 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
453 {
454     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
455     hwaddr pa = offsetof(VRingUsed, idx);
456 
457     if (caches) {
458         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
459         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
460     }
461 
462     vq->used_idx = val;
463 }
464 
465 /* Called within rcu_read_lock().  */
vring_used_flags_set_bit(VirtQueue * vq,int mask)466 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
467 {
468     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
469     VirtIODevice *vdev = vq->vdev;
470     hwaddr pa = offsetof(VRingUsed, flags);
471     uint16_t flags;
472 
473     if (!caches) {
474         return;
475     }
476 
477     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
478     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
479     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
480 }
481 
482 /* Called within rcu_read_lock().  */
vring_used_flags_unset_bit(VirtQueue * vq,int mask)483 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
484 {
485     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
486     VirtIODevice *vdev = vq->vdev;
487     hwaddr pa = offsetof(VRingUsed, flags);
488     uint16_t flags;
489 
490     if (!caches) {
491         return;
492     }
493 
494     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
495     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
496     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
497 }
498 
499 /* Called within rcu_read_lock().  */
vring_set_avail_event(VirtQueue * vq,uint16_t val)500 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
501 {
502     VRingMemoryRegionCaches *caches;
503     hwaddr pa;
504     if (!vq->notification) {
505         return;
506     }
507 
508     caches = vring_get_region_caches(vq);
509     if (!caches) {
510         return;
511     }
512 
513     pa = offsetof(VRingUsed, ring[vq->vring.num]);
514     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
515     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
516 }
517 
virtio_queue_split_set_notification(VirtQueue * vq,int enable)518 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
519 {
520     RCU_READ_LOCK_GUARD();
521 
522     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
523         vring_set_avail_event(vq, vring_avail_idx(vq));
524     } else if (enable) {
525         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
526     } else {
527         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
528     }
529     if (enable) {
530         /* Expose avail event/used flags before caller checks the avail idx. */
531         smp_mb();
532     }
533 }
534 
virtio_queue_packed_set_notification(VirtQueue * vq,int enable)535 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
536 {
537     uint16_t off_wrap;
538     VRingPackedDescEvent e;
539     VRingMemoryRegionCaches *caches;
540 
541     RCU_READ_LOCK_GUARD();
542     caches = vring_get_region_caches(vq);
543     if (!caches) {
544         return;
545     }
546 
547     vring_packed_event_read(vq->vdev, &caches->used, &e);
548 
549     if (!enable) {
550         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
551     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
552         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
553         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
554         /* Make sure off_wrap is wrote before flags */
555         smp_wmb();
556         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
557     } else {
558         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
559     }
560 
561     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
562     if (enable) {
563         /* Expose avail event/used flags before caller checks the avail idx. */
564         smp_mb();
565     }
566 }
567 
virtio_queue_get_notification(VirtQueue * vq)568 bool virtio_queue_get_notification(VirtQueue *vq)
569 {
570     return vq->notification;
571 }
572 
virtio_queue_set_notification(VirtQueue * vq,int enable)573 void virtio_queue_set_notification(VirtQueue *vq, int enable)
574 {
575     vq->notification = enable;
576 
577     if (!vq->vring.desc) {
578         return;
579     }
580 
581     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
582         virtio_queue_packed_set_notification(vq, enable);
583     } else {
584         virtio_queue_split_set_notification(vq, enable);
585     }
586 }
587 
virtio_queue_ready(VirtQueue * vq)588 int virtio_queue_ready(VirtQueue *vq)
589 {
590     return vq->vring.avail != 0;
591 }
592 
vring_packed_desc_read_flags(VirtIODevice * vdev,uint16_t * flags,MemoryRegionCache * cache,int i)593 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
594                                          uint16_t *flags,
595                                          MemoryRegionCache *cache,
596                                          int i)
597 {
598     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
599 
600     *flags = virtio_lduw_phys_cached(vdev, cache, off);
601 }
602 
vring_packed_desc_read(VirtIODevice * vdev,VRingPackedDesc * desc,MemoryRegionCache * cache,int i,bool strict_order)603 static void vring_packed_desc_read(VirtIODevice *vdev,
604                                    VRingPackedDesc *desc,
605                                    MemoryRegionCache *cache,
606                                    int i, bool strict_order)
607 {
608     hwaddr off = i * sizeof(VRingPackedDesc);
609 
610     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
611 
612     if (strict_order) {
613         /* Make sure flags is read before the rest fields. */
614         smp_rmb();
615     }
616 
617     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
618                               &desc->addr, sizeof(desc->addr));
619     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
620                               &desc->id, sizeof(desc->id));
621     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
622                               &desc->len, sizeof(desc->len));
623     virtio_tswap64s(vdev, &desc->addr);
624     virtio_tswap16s(vdev, &desc->id);
625     virtio_tswap32s(vdev, &desc->len);
626 }
627 
vring_packed_desc_write_data(VirtIODevice * vdev,VRingPackedDesc * desc,MemoryRegionCache * cache,int i)628 static void vring_packed_desc_write_data(VirtIODevice *vdev,
629                                          VRingPackedDesc *desc,
630                                          MemoryRegionCache *cache,
631                                          int i)
632 {
633     hwaddr off_id = i * sizeof(VRingPackedDesc) +
634                     offsetof(VRingPackedDesc, id);
635     hwaddr off_len = i * sizeof(VRingPackedDesc) +
636                     offsetof(VRingPackedDesc, len);
637 
638     virtio_tswap32s(vdev, &desc->len);
639     virtio_tswap16s(vdev, &desc->id);
640     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
641     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
642     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
643     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
644 }
645 
vring_packed_desc_write_flags(VirtIODevice * vdev,VRingPackedDesc * desc,MemoryRegionCache * cache,int i)646 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
647                                           VRingPackedDesc *desc,
648                                           MemoryRegionCache *cache,
649                                           int i)
650 {
651     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
652 
653     virtio_stw_phys_cached(vdev, cache, off, desc->flags);
654     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
655 }
656 
vring_packed_desc_write(VirtIODevice * vdev,VRingPackedDesc * desc,MemoryRegionCache * cache,int i,bool strict_order)657 static void vring_packed_desc_write(VirtIODevice *vdev,
658                                     VRingPackedDesc *desc,
659                                     MemoryRegionCache *cache,
660                                     int i, bool strict_order)
661 {
662     vring_packed_desc_write_data(vdev, desc, cache, i);
663     if (strict_order) {
664         /* Make sure data is wrote before flags. */
665         smp_wmb();
666     }
667     vring_packed_desc_write_flags(vdev, desc, cache, i);
668 }
669 
is_desc_avail(uint16_t flags,bool wrap_counter)670 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
671 {
672     bool avail, used;
673 
674     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
675     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
676     return (avail != used) && (avail == wrap_counter);
677 }
678 
679 /* Fetch avail_idx from VQ memory only when we really need to know if
680  * guest has added some buffers.
681  * Called within rcu_read_lock().  */
virtio_queue_empty_rcu(VirtQueue * vq)682 static int virtio_queue_empty_rcu(VirtQueue *vq)
683 {
684     if (virtio_device_disabled(vq->vdev)) {
685         return 1;
686     }
687 
688     if (unlikely(!vq->vring.avail)) {
689         return 1;
690     }
691 
692     if (vq->shadow_avail_idx != vq->last_avail_idx) {
693         return 0;
694     }
695 
696     return vring_avail_idx(vq) == vq->last_avail_idx;
697 }
698 
virtio_queue_split_empty(VirtQueue * vq)699 static int virtio_queue_split_empty(VirtQueue *vq)
700 {
701     bool empty;
702 
703     if (virtio_device_disabled(vq->vdev)) {
704         return 1;
705     }
706 
707     if (unlikely(!vq->vring.avail)) {
708         return 1;
709     }
710 
711     if (vq->shadow_avail_idx != vq->last_avail_idx) {
712         return 0;
713     }
714 
715     RCU_READ_LOCK_GUARD();
716     empty = vring_avail_idx(vq) == vq->last_avail_idx;
717     return empty;
718 }
719 
720 /* Called within rcu_read_lock().  */
virtio_queue_packed_empty_rcu(VirtQueue * vq)721 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
722 {
723     struct VRingPackedDesc desc;
724     VRingMemoryRegionCaches *cache;
725 
726     if (unlikely(!vq->vring.desc)) {
727         return 1;
728     }
729 
730     cache = vring_get_region_caches(vq);
731     if (!cache) {
732         return 1;
733     }
734 
735     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
736                                  vq->last_avail_idx);
737 
738     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
739 }
740 
virtio_queue_packed_empty(VirtQueue * vq)741 static int virtio_queue_packed_empty(VirtQueue *vq)
742 {
743     RCU_READ_LOCK_GUARD();
744     return virtio_queue_packed_empty_rcu(vq);
745 }
746 
virtio_queue_empty(VirtQueue * vq)747 int virtio_queue_empty(VirtQueue *vq)
748 {
749     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
750         return virtio_queue_packed_empty(vq);
751     } else {
752         return virtio_queue_split_empty(vq);
753     }
754 }
755 
virtio_queue_split_poll(VirtQueue * vq,unsigned shadow_idx)756 static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx)
757 {
758     if (unlikely(!vq->vring.avail)) {
759         return false;
760     }
761 
762     return (uint16_t)shadow_idx != vring_avail_idx(vq);
763 }
764 
virtio_queue_packed_poll(VirtQueue * vq,unsigned shadow_idx)765 static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx)
766 {
767     VRingPackedDesc desc;
768     VRingMemoryRegionCaches *caches;
769 
770     if (unlikely(!vq->vring.desc)) {
771         return false;
772     }
773 
774     caches = vring_get_region_caches(vq);
775     if (!caches) {
776         return false;
777     }
778 
779     vring_packed_desc_read(vq->vdev, &desc, &caches->desc,
780                            shadow_idx, true);
781 
782     return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter);
783 }
784 
virtio_queue_poll(VirtQueue * vq,unsigned shadow_idx)785 static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx)
786 {
787     if (virtio_device_disabled(vq->vdev)) {
788         return false;
789     }
790 
791     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
792         return virtio_queue_packed_poll(vq, shadow_idx);
793     } else {
794         return virtio_queue_split_poll(vq, shadow_idx);
795     }
796 }
797 
virtio_queue_enable_notification_and_check(VirtQueue * vq,int opaque)798 bool virtio_queue_enable_notification_and_check(VirtQueue *vq,
799                                                 int opaque)
800 {
801     virtio_queue_set_notification(vq, 1);
802 
803     if (opaque >= 0) {
804         return virtio_queue_poll(vq, (unsigned)opaque);
805     } else {
806         return false;
807     }
808 }
809 
virtqueue_unmap_sg(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)810 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
811                                unsigned int len)
812 {
813     AddressSpace *dma_as = vq->vdev->dma_as;
814     unsigned int offset;
815     int i;
816 
817     offset = 0;
818     for (i = 0; i < elem->in_num; i++) {
819         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
820 
821         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
822                          elem->in_sg[i].iov_len,
823                          DMA_DIRECTION_FROM_DEVICE, size);
824 
825         offset += size;
826     }
827 
828     for (i = 0; i < elem->out_num; i++)
829         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
830                          elem->out_sg[i].iov_len,
831                          DMA_DIRECTION_TO_DEVICE,
832                          elem->out_sg[i].iov_len);
833 }
834 
835 /* virtqueue_detach_element:
836  * @vq: The #VirtQueue
837  * @elem: The #VirtQueueElement
838  * @len: number of bytes written
839  *
840  * Detach the element from the virtqueue.  This function is suitable for device
841  * reset or other situations where a #VirtQueueElement is simply freed and will
842  * not be pushed or discarded.
843  */
virtqueue_detach_element(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)844 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
845                               unsigned int len)
846 {
847     vq->inuse -= elem->ndescs;
848     virtqueue_unmap_sg(vq, elem, len);
849 }
850 
virtqueue_split_rewind(VirtQueue * vq,unsigned int num)851 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
852 {
853     vq->last_avail_idx -= num;
854 }
855 
virtqueue_packed_rewind(VirtQueue * vq,unsigned int num)856 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
857 {
858     if (vq->last_avail_idx < num) {
859         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
860         vq->last_avail_wrap_counter ^= 1;
861     } else {
862         vq->last_avail_idx -= num;
863     }
864 }
865 
866 /* virtqueue_unpop:
867  * @vq: The #VirtQueue
868  * @elem: The #VirtQueueElement
869  * @len: number of bytes written
870  *
871  * Pretend the most recent element wasn't popped from the virtqueue.  The next
872  * call to virtqueue_pop() will refetch the element.
873  */
virtqueue_unpop(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)874 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
875                      unsigned int len)
876 {
877 
878     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
879         virtqueue_packed_rewind(vq, 1);
880     } else {
881         virtqueue_split_rewind(vq, 1);
882     }
883 
884     virtqueue_detach_element(vq, elem, len);
885 }
886 
887 /* virtqueue_rewind:
888  * @vq: The #VirtQueue
889  * @num: Number of elements to push back
890  *
891  * Pretend that elements weren't popped from the virtqueue.  The next
892  * virtqueue_pop() will refetch the oldest element.
893  *
894  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
895  *
896  * Returns: true on success, false if @num is greater than the number of in use
897  * elements.
898  */
virtqueue_rewind(VirtQueue * vq,unsigned int num)899 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
900 {
901     if (num > vq->inuse) {
902         return false;
903     }
904 
905     vq->inuse -= num;
906     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
907         virtqueue_packed_rewind(vq, num);
908     } else {
909         virtqueue_split_rewind(vq, num);
910     }
911     return true;
912 }
913 
virtqueue_split_fill(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len,unsigned int idx)914 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
915                     unsigned int len, unsigned int idx)
916 {
917     VRingUsedElem uelem;
918 
919     if (unlikely(!vq->vring.used)) {
920         return;
921     }
922 
923     idx = (idx + vq->used_idx) % vq->vring.num;
924 
925     uelem.id = elem->index;
926     uelem.len = len;
927     vring_used_write(vq, &uelem, idx);
928 }
929 
virtqueue_packed_fill(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len,unsigned int idx)930 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
931                                   unsigned int len, unsigned int idx)
932 {
933     vq->used_elems[idx].index = elem->index;
934     vq->used_elems[idx].len = len;
935     vq->used_elems[idx].ndescs = elem->ndescs;
936 }
937 
virtqueue_ordered_fill(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)938 static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem,
939                                    unsigned int len)
940 {
941     unsigned int i, steps, max_steps;
942 
943     i = vq->used_idx % vq->vring.num;
944     steps = 0;
945     /*
946      * We shouldn't need to increase 'i' by more than the distance
947      * between used_idx and last_avail_idx.
948      */
949     max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num;
950 
951     /* Search for element in vq->used_elems */
952     while (steps <= max_steps) {
953         /* Found element, set length and mark as filled */
954         if (vq->used_elems[i].index == elem->index) {
955             vq->used_elems[i].len = len;
956             vq->used_elems[i].in_order_filled = true;
957             break;
958         }
959 
960         i += vq->used_elems[i].ndescs;
961         steps += vq->used_elems[i].ndescs;
962 
963         if (i >= vq->vring.num) {
964             i -= vq->vring.num;
965         }
966     }
967 
968     /*
969      * We should be able to find a matching VirtQueueElement in
970      * used_elems. If we don't, this is an error.
971      */
972     if (steps >= max_steps) {
973         qemu_log_mask(LOG_GUEST_ERROR, "%s: %s cannot fill buffer id %u\n",
974                       __func__, vq->vdev->name, elem->index);
975     }
976 }
977 
virtqueue_packed_fill_desc(VirtQueue * vq,const VirtQueueElement * elem,unsigned int idx,bool strict_order)978 static void virtqueue_packed_fill_desc(VirtQueue *vq,
979                                        const VirtQueueElement *elem,
980                                        unsigned int idx,
981                                        bool strict_order)
982 {
983     uint16_t head;
984     VRingMemoryRegionCaches *caches;
985     VRingPackedDesc desc = {
986         .id = elem->index,
987         .len = elem->len,
988     };
989     bool wrap_counter = vq->used_wrap_counter;
990 
991     if (unlikely(!vq->vring.desc)) {
992         return;
993     }
994 
995     head = vq->used_idx + idx;
996     if (head >= vq->vring.num) {
997         head -= vq->vring.num;
998         wrap_counter ^= 1;
999     }
1000     if (wrap_counter) {
1001         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
1002         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
1003     } else {
1004         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
1005         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
1006     }
1007 
1008     caches = vring_get_region_caches(vq);
1009     if (!caches) {
1010         return;
1011     }
1012 
1013     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
1014 }
1015 
1016 /* Called within rcu_read_lock().  */
virtqueue_fill(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len,unsigned int idx)1017 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
1018                     unsigned int len, unsigned int idx)
1019 {
1020     trace_virtqueue_fill(vq, elem, len, idx);
1021 
1022     virtqueue_unmap_sg(vq, elem, len);
1023 
1024     if (virtio_device_disabled(vq->vdev)) {
1025         return;
1026     }
1027 
1028     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) {
1029         virtqueue_ordered_fill(vq, elem, len);
1030     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1031         virtqueue_packed_fill(vq, elem, len, idx);
1032     } else {
1033         virtqueue_split_fill(vq, elem, len, idx);
1034     }
1035 }
1036 
1037 /* Called within rcu_read_lock().  */
virtqueue_split_flush(VirtQueue * vq,unsigned int count)1038 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
1039 {
1040     uint16_t old, new;
1041 
1042     if (unlikely(!vq->vring.used)) {
1043         return;
1044     }
1045 
1046     /* Make sure buffer is written before we update index. */
1047     smp_wmb();
1048     trace_virtqueue_flush(vq, count);
1049     old = vq->used_idx;
1050     new = old + count;
1051     vring_used_idx_set(vq, new);
1052     vq->inuse -= count;
1053     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
1054         vq->signalled_used_valid = false;
1055 }
1056 
virtqueue_packed_flush(VirtQueue * vq,unsigned int count)1057 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
1058 {
1059     unsigned int i, ndescs = 0;
1060 
1061     if (unlikely(!vq->vring.desc)) {
1062         return;
1063     }
1064 
1065     /*
1066      * For indirect element's 'ndescs' is 1.
1067      * For all other elemment's 'ndescs' is the
1068      * number of descriptors chained by NEXT (as set in virtqueue_packed_pop).
1069      * So When the 'elem' be filled into the descriptor ring,
1070      * The 'idx' of this 'elem' shall be
1071      * the value of 'vq->used_idx' plus the 'ndescs'.
1072      */
1073     ndescs += vq->used_elems[0].ndescs;
1074     for (i = 1; i < count; i++) {
1075         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false);
1076         ndescs += vq->used_elems[i].ndescs;
1077     }
1078     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
1079 
1080     vq->inuse -= ndescs;
1081     vq->used_idx += ndescs;
1082     if (vq->used_idx >= vq->vring.num) {
1083         vq->used_idx -= vq->vring.num;
1084         vq->used_wrap_counter ^= 1;
1085         vq->signalled_used_valid = false;
1086     }
1087 }
1088 
virtqueue_ordered_flush(VirtQueue * vq)1089 static void virtqueue_ordered_flush(VirtQueue *vq)
1090 {
1091     unsigned int i = vq->used_idx % vq->vring.num;
1092     unsigned int ndescs = 0;
1093     uint16_t old = vq->used_idx;
1094     uint16_t new;
1095     bool packed;
1096     VRingUsedElem uelem;
1097 
1098     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED);
1099 
1100     if (packed) {
1101         if (unlikely(!vq->vring.desc)) {
1102             return;
1103         }
1104     } else if (unlikely(!vq->vring.used)) {
1105         return;
1106     }
1107 
1108     /* First expected in-order element isn't ready, nothing to do */
1109     if (!vq->used_elems[i].in_order_filled) {
1110         return;
1111     }
1112 
1113     /* Search for filled elements in-order */
1114     while (vq->used_elems[i].in_order_filled) {
1115         /*
1116          * First entry for packed VQs is written last so the guest
1117          * doesn't see invalid descriptors.
1118          */
1119         if (packed && i != vq->used_idx) {
1120             virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false);
1121         } else if (!packed) {
1122             uelem.id = vq->used_elems[i].index;
1123             uelem.len = vq->used_elems[i].len;
1124             vring_used_write(vq, &uelem, i);
1125         }
1126 
1127         vq->used_elems[i].in_order_filled = false;
1128         ndescs += vq->used_elems[i].ndescs;
1129         i += vq->used_elems[i].ndescs;
1130         if (i >= vq->vring.num) {
1131             i -= vq->vring.num;
1132         }
1133     }
1134 
1135     if (packed) {
1136         virtqueue_packed_fill_desc(vq, &vq->used_elems[vq->used_idx], 0, true);
1137         vq->used_idx += ndescs;
1138         if (vq->used_idx >= vq->vring.num) {
1139             vq->used_idx -= vq->vring.num;
1140             vq->used_wrap_counter ^= 1;
1141             vq->signalled_used_valid = false;
1142         }
1143     } else {
1144         /* Make sure buffer is written before we update index. */
1145         smp_wmb();
1146         new = old + ndescs;
1147         vring_used_idx_set(vq, new);
1148         if (unlikely((int16_t)(new - vq->signalled_used) <
1149                      (uint16_t)(new - old))) {
1150             vq->signalled_used_valid = false;
1151         }
1152     }
1153     vq->inuse -= ndescs;
1154 }
1155 
virtqueue_flush(VirtQueue * vq,unsigned int count)1156 void virtqueue_flush(VirtQueue *vq, unsigned int count)
1157 {
1158     if (virtio_device_disabled(vq->vdev)) {
1159         vq->inuse -= count;
1160         return;
1161     }
1162 
1163     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) {
1164         virtqueue_ordered_flush(vq);
1165     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1166         virtqueue_packed_flush(vq, count);
1167     } else {
1168         virtqueue_split_flush(vq, count);
1169     }
1170 }
1171 
virtqueue_push(VirtQueue * vq,const VirtQueueElement * elem,unsigned int len)1172 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
1173                     unsigned int len)
1174 {
1175     RCU_READ_LOCK_GUARD();
1176     virtqueue_fill(vq, elem, len, 0);
1177     virtqueue_flush(vq, 1);
1178 }
1179 
1180 /* Called within rcu_read_lock().  */
virtqueue_num_heads(VirtQueue * vq,unsigned int idx)1181 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
1182 {
1183     uint16_t avail_idx, num_heads;
1184 
1185     /* Use shadow index whenever possible. */
1186     avail_idx = (vq->shadow_avail_idx != idx) ? vq->shadow_avail_idx
1187                                               : vring_avail_idx(vq);
1188     num_heads = avail_idx - idx;
1189 
1190     /* Check it isn't doing very strange things with descriptor numbers. */
1191     if (num_heads > vq->vring.num) {
1192         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
1193                      idx, vq->shadow_avail_idx);
1194         return -EINVAL;
1195     }
1196     /*
1197      * On success, callers read a descriptor at vq->last_avail_idx.
1198      * Make sure descriptor read does not bypass avail index read.
1199      *
1200      * This is necessary even if we are using a shadow index, since
1201      * the shadow index could have been initialized by calling
1202      * vring_avail_idx() outside of this function, i.e., by a guest
1203      * memory read not accompanied by a barrier.
1204      */
1205     if (num_heads) {
1206         smp_rmb();
1207     }
1208 
1209     return num_heads;
1210 }
1211 
1212 /* Called within rcu_read_lock().  */
virtqueue_get_head(VirtQueue * vq,unsigned int idx,unsigned int * head)1213 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
1214                                unsigned int *head)
1215 {
1216     /* Grab the next descriptor number they're advertising, and increment
1217      * the index we've seen. */
1218     *head = vring_avail_ring(vq, idx % vq->vring.num);
1219 
1220     /* If their number is silly, that's a fatal mistake. */
1221     if (*head >= vq->vring.num) {
1222         virtio_error(vq->vdev, "Guest says index %u is available", *head);
1223         return false;
1224     }
1225 
1226     return true;
1227 }
1228 
1229 enum {
1230     VIRTQUEUE_READ_DESC_ERROR = -1,
1231     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
1232     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
1233 };
1234 
1235 /* Reads the 'desc->next' descriptor into '*desc'. */
virtqueue_split_read_next_desc(VirtIODevice * vdev,VRingDesc * desc,MemoryRegionCache * desc_cache,unsigned int max)1236 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
1237                                           MemoryRegionCache *desc_cache,
1238                                           unsigned int max)
1239 {
1240     /* If this descriptor says it doesn't chain, we're done. */
1241     if (!(desc->flags & VRING_DESC_F_NEXT)) {
1242         return VIRTQUEUE_READ_DESC_DONE;
1243     }
1244 
1245     /* Check they're not leading us off end of descriptors. */
1246     if (desc->next >= max) {
1247         virtio_error(vdev, "Desc next is %u", desc->next);
1248         return VIRTQUEUE_READ_DESC_ERROR;
1249     }
1250 
1251     vring_split_desc_read(vdev, desc, desc_cache, desc->next);
1252     return VIRTQUEUE_READ_DESC_MORE;
1253 }
1254 
1255 /* Called within rcu_read_lock().  */
virtqueue_split_get_avail_bytes(VirtQueue * vq,unsigned int * in_bytes,unsigned int * out_bytes,unsigned max_in_bytes,unsigned max_out_bytes,VRingMemoryRegionCaches * caches)1256 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
1257                             unsigned int *in_bytes, unsigned int *out_bytes,
1258                             unsigned max_in_bytes, unsigned max_out_bytes,
1259                             VRingMemoryRegionCaches *caches)
1260 {
1261     VirtIODevice *vdev = vq->vdev;
1262     unsigned int idx;
1263     unsigned int total_bufs, in_total, out_total;
1264     MemoryRegionCache indirect_desc_cache;
1265     int64_t len = 0;
1266     int rc;
1267 
1268     address_space_cache_init_empty(&indirect_desc_cache);
1269 
1270     idx = vq->last_avail_idx;
1271     total_bufs = in_total = out_total = 0;
1272 
1273     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1274         MemoryRegionCache *desc_cache = &caches->desc;
1275         unsigned int num_bufs;
1276         VRingDesc desc;
1277         unsigned int i;
1278         unsigned int max = vq->vring.num;
1279 
1280         num_bufs = total_bufs;
1281 
1282         if (!virtqueue_get_head(vq, idx++, &i)) {
1283             goto err;
1284         }
1285 
1286         vring_split_desc_read(vdev, &desc, desc_cache, i);
1287 
1288         if (desc.flags & VRING_DESC_F_INDIRECT) {
1289             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1290                 virtio_error(vdev, "Invalid size for indirect buffer table");
1291                 goto err;
1292             }
1293 
1294             /* If we've got too many, that implies a descriptor loop. */
1295             if (num_bufs >= max) {
1296                 virtio_error(vdev, "Looped descriptor");
1297                 goto err;
1298             }
1299 
1300             /* loop over the indirect descriptor table */
1301             len = address_space_cache_init(&indirect_desc_cache,
1302                                            vdev->dma_as,
1303                                            desc.addr, desc.len, false);
1304             desc_cache = &indirect_desc_cache;
1305             if (len < desc.len) {
1306                 virtio_error(vdev, "Cannot map indirect buffer");
1307                 goto err;
1308             }
1309 
1310             max = desc.len / sizeof(VRingDesc);
1311             num_bufs = i = 0;
1312             vring_split_desc_read(vdev, &desc, desc_cache, i);
1313         }
1314 
1315         do {
1316             /* If we've got too many, that implies a descriptor loop. */
1317             if (++num_bufs > max) {
1318                 virtio_error(vdev, "Looped descriptor");
1319                 goto err;
1320             }
1321 
1322             if (desc.flags & VRING_DESC_F_WRITE) {
1323                 in_total += desc.len;
1324             } else {
1325                 out_total += desc.len;
1326             }
1327             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1328                 goto done;
1329             }
1330 
1331             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max);
1332         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1333 
1334         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1335             goto err;
1336         }
1337 
1338         if (desc_cache == &indirect_desc_cache) {
1339             address_space_cache_destroy(&indirect_desc_cache);
1340             total_bufs++;
1341         } else {
1342             total_bufs = num_bufs;
1343         }
1344     }
1345 
1346     if (rc < 0) {
1347         goto err;
1348     }
1349 
1350 done:
1351     address_space_cache_destroy(&indirect_desc_cache);
1352     if (in_bytes) {
1353         *in_bytes = in_total;
1354     }
1355     if (out_bytes) {
1356         *out_bytes = out_total;
1357     }
1358     return;
1359 
1360 err:
1361     in_total = out_total = 0;
1362     goto done;
1363 }
1364 
virtqueue_packed_read_next_desc(VirtQueue * vq,VRingPackedDesc * desc,MemoryRegionCache * desc_cache,unsigned int max,unsigned int * next,bool indirect)1365 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1366                                            VRingPackedDesc *desc,
1367                                            MemoryRegionCache
1368                                            *desc_cache,
1369                                            unsigned int max,
1370                                            unsigned int *next,
1371                                            bool indirect)
1372 {
1373     /* If this descriptor says it doesn't chain, we're done. */
1374     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1375         return VIRTQUEUE_READ_DESC_DONE;
1376     }
1377 
1378     ++*next;
1379     if (*next == max) {
1380         if (indirect) {
1381             return VIRTQUEUE_READ_DESC_DONE;
1382         } else {
1383             (*next) -= vq->vring.num;
1384         }
1385     }
1386 
1387     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1388     return VIRTQUEUE_READ_DESC_MORE;
1389 }
1390 
1391 /* Called within rcu_read_lock().  */
virtqueue_packed_get_avail_bytes(VirtQueue * vq,unsigned int * in_bytes,unsigned int * out_bytes,unsigned max_in_bytes,unsigned max_out_bytes,VRingMemoryRegionCaches * caches)1392 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1393                                              unsigned int *in_bytes,
1394                                              unsigned int *out_bytes,
1395                                              unsigned max_in_bytes,
1396                                              unsigned max_out_bytes,
1397                                              VRingMemoryRegionCaches *caches)
1398 {
1399     VirtIODevice *vdev = vq->vdev;
1400     unsigned int idx;
1401     unsigned int total_bufs, in_total, out_total;
1402     MemoryRegionCache indirect_desc_cache;
1403     MemoryRegionCache *desc_cache;
1404     int64_t len = 0;
1405     VRingPackedDesc desc;
1406     bool wrap_counter;
1407 
1408     address_space_cache_init_empty(&indirect_desc_cache);
1409 
1410     idx = vq->last_avail_idx;
1411     wrap_counter = vq->last_avail_wrap_counter;
1412     total_bufs = in_total = out_total = 0;
1413 
1414     for (;;) {
1415         unsigned int num_bufs = total_bufs;
1416         unsigned int i = idx;
1417         int rc;
1418         unsigned int max = vq->vring.num;
1419 
1420         desc_cache = &caches->desc;
1421 
1422         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1423         if (!is_desc_avail(desc.flags, wrap_counter)) {
1424             break;
1425         }
1426 
1427         if (desc.flags & VRING_DESC_F_INDIRECT) {
1428             if (desc.len % sizeof(VRingPackedDesc)) {
1429                 virtio_error(vdev, "Invalid size for indirect buffer table");
1430                 goto err;
1431             }
1432 
1433             /* If we've got too many, that implies a descriptor loop. */
1434             if (num_bufs >= max) {
1435                 virtio_error(vdev, "Looped descriptor");
1436                 goto err;
1437             }
1438 
1439             /* loop over the indirect descriptor table */
1440             len = address_space_cache_init(&indirect_desc_cache,
1441                                            vdev->dma_as,
1442                                            desc.addr, desc.len, false);
1443             desc_cache = &indirect_desc_cache;
1444             if (len < desc.len) {
1445                 virtio_error(vdev, "Cannot map indirect buffer");
1446                 goto err;
1447             }
1448 
1449             max = desc.len / sizeof(VRingPackedDesc);
1450             num_bufs = i = 0;
1451             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1452         }
1453 
1454         do {
1455             /* If we've got too many, that implies a descriptor loop. */
1456             if (++num_bufs > max) {
1457                 virtio_error(vdev, "Looped descriptor");
1458                 goto err;
1459             }
1460 
1461             if (desc.flags & VRING_DESC_F_WRITE) {
1462                 in_total += desc.len;
1463             } else {
1464                 out_total += desc.len;
1465             }
1466             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1467                 goto done;
1468             }
1469 
1470             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1471                                                  &i, desc_cache ==
1472                                                  &indirect_desc_cache);
1473         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1474 
1475         if (desc_cache == &indirect_desc_cache) {
1476             address_space_cache_destroy(&indirect_desc_cache);
1477             total_bufs++;
1478             idx++;
1479         } else {
1480             idx += num_bufs - total_bufs;
1481             total_bufs = num_bufs;
1482         }
1483 
1484         if (idx >= vq->vring.num) {
1485             idx -= vq->vring.num;
1486             wrap_counter ^= 1;
1487         }
1488     }
1489 
1490     /* Record the index and wrap counter for a kick we want */
1491     vq->shadow_avail_idx = idx;
1492     vq->shadow_avail_wrap_counter = wrap_counter;
1493 done:
1494     address_space_cache_destroy(&indirect_desc_cache);
1495     if (in_bytes) {
1496         *in_bytes = in_total;
1497     }
1498     if (out_bytes) {
1499         *out_bytes = out_total;
1500     }
1501     return;
1502 
1503 err:
1504     in_total = out_total = 0;
1505     goto done;
1506 }
1507 
virtqueue_get_avail_bytes(VirtQueue * vq,unsigned int * in_bytes,unsigned int * out_bytes,unsigned max_in_bytes,unsigned max_out_bytes)1508 int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1509                               unsigned int *out_bytes, unsigned max_in_bytes,
1510                               unsigned max_out_bytes)
1511 {
1512     uint16_t desc_size;
1513     VRingMemoryRegionCaches *caches;
1514 
1515     RCU_READ_LOCK_GUARD();
1516 
1517     if (unlikely(!vq->vring.desc)) {
1518         goto err;
1519     }
1520 
1521     caches = vring_get_region_caches(vq);
1522     if (!caches) {
1523         goto err;
1524     }
1525 
1526     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1527                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1528     if (caches->desc.len < vq->vring.num * desc_size) {
1529         virtio_error(vq->vdev, "Cannot map descriptor ring");
1530         goto err;
1531     }
1532 
1533     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1534         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1535                                          max_in_bytes, max_out_bytes,
1536                                          caches);
1537     } else {
1538         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1539                                         max_in_bytes, max_out_bytes,
1540                                         caches);
1541     }
1542 
1543     return (int)vq->shadow_avail_idx;
1544 err:
1545     if (in_bytes) {
1546         *in_bytes = 0;
1547     }
1548     if (out_bytes) {
1549         *out_bytes = 0;
1550     }
1551 
1552     return -1;
1553 }
1554 
virtqueue_avail_bytes(VirtQueue * vq,unsigned int in_bytes,unsigned int out_bytes)1555 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1556                           unsigned int out_bytes)
1557 {
1558     unsigned int in_total, out_total;
1559 
1560     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1561     return in_bytes <= in_total && out_bytes <= out_total;
1562 }
1563 
virtqueue_map_desc(VirtIODevice * vdev,unsigned int * p_num_sg,hwaddr * addr,struct iovec * iov,unsigned int max_num_sg,bool is_write,hwaddr pa,size_t sz)1564 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1565                                hwaddr *addr, struct iovec *iov,
1566                                unsigned int max_num_sg, bool is_write,
1567                                hwaddr pa, size_t sz)
1568 {
1569     bool ok = false;
1570     unsigned num_sg = *p_num_sg;
1571     assert(num_sg <= max_num_sg);
1572 
1573     if (!sz) {
1574         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1575         goto out;
1576     }
1577 
1578     while (sz) {
1579         hwaddr len = sz;
1580 
1581         if (num_sg == max_num_sg) {
1582             virtio_error(vdev, "virtio: too many write descriptors in "
1583                                "indirect table");
1584             goto out;
1585         }
1586 
1587         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1588                                               is_write ?
1589                                               DMA_DIRECTION_FROM_DEVICE :
1590                                               DMA_DIRECTION_TO_DEVICE,
1591                                               MEMTXATTRS_UNSPECIFIED);
1592         if (!iov[num_sg].iov_base) {
1593             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1594             goto out;
1595         }
1596 
1597         iov[num_sg].iov_len = len;
1598         addr[num_sg] = pa;
1599 
1600         sz -= len;
1601         pa += len;
1602         num_sg++;
1603     }
1604     ok = true;
1605 
1606 out:
1607     *p_num_sg = num_sg;
1608     return ok;
1609 }
1610 
1611 /* Only used by error code paths before we have a VirtQueueElement (therefore
1612  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1613  * yet.
1614  */
virtqueue_undo_map_desc(unsigned int out_num,unsigned int in_num,struct iovec * iov)1615 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1616                                     struct iovec *iov)
1617 {
1618     unsigned int i;
1619 
1620     for (i = 0; i < out_num + in_num; i++) {
1621         int is_write = i >= out_num;
1622 
1623         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1624         iov++;
1625     }
1626 }
1627 
virtqueue_map_iovec(VirtIODevice * vdev,struct iovec * sg,hwaddr * addr,unsigned int num_sg,bool is_write)1628 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1629                                 hwaddr *addr, unsigned int num_sg,
1630                                 bool is_write)
1631 {
1632     unsigned int i;
1633     hwaddr len;
1634 
1635     for (i = 0; i < num_sg; i++) {
1636         len = sg[i].iov_len;
1637         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1638                                         addr[i], &len, is_write ?
1639                                         DMA_DIRECTION_FROM_DEVICE :
1640                                         DMA_DIRECTION_TO_DEVICE,
1641                                         MEMTXATTRS_UNSPECIFIED);
1642         if (!sg[i].iov_base) {
1643             error_report("virtio: error trying to map MMIO memory");
1644             exit(1);
1645         }
1646         if (len != sg[i].iov_len) {
1647             error_report("virtio: unexpected memory split");
1648             exit(1);
1649         }
1650     }
1651 }
1652 
virtqueue_map(VirtIODevice * vdev,VirtQueueElement * elem)1653 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1654 {
1655     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1656     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1657                                                                         false);
1658 }
1659 
virtqueue_alloc_element(size_t sz,unsigned out_num,unsigned in_num)1660 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1661 {
1662     VirtQueueElement *elem;
1663     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1664     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1665     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1666     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1667     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1668     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1669 
1670     assert(sz >= sizeof(VirtQueueElement));
1671     elem = g_malloc(out_sg_end);
1672     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1673     elem->out_num = out_num;
1674     elem->in_num = in_num;
1675     elem->in_addr = (void *)elem + in_addr_ofs;
1676     elem->out_addr = (void *)elem + out_addr_ofs;
1677     elem->in_sg = (void *)elem + in_sg_ofs;
1678     elem->out_sg = (void *)elem + out_sg_ofs;
1679     return elem;
1680 }
1681 
virtqueue_split_pop(VirtQueue * vq,size_t sz)1682 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1683 {
1684     unsigned int i, head, max, idx;
1685     VRingMemoryRegionCaches *caches;
1686     MemoryRegionCache indirect_desc_cache;
1687     MemoryRegionCache *desc_cache;
1688     int64_t len;
1689     VirtIODevice *vdev = vq->vdev;
1690     VirtQueueElement *elem = NULL;
1691     unsigned out_num, in_num, elem_entries;
1692     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1693     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1694     VRingDesc desc;
1695     int rc;
1696 
1697     address_space_cache_init_empty(&indirect_desc_cache);
1698 
1699     RCU_READ_LOCK_GUARD();
1700     if (virtio_queue_empty_rcu(vq)) {
1701         goto done;
1702     }
1703     /* Needed after virtio_queue_empty(), see comment in
1704      * virtqueue_num_heads(). */
1705     smp_rmb();
1706 
1707     /* When we start there are none of either input nor output. */
1708     out_num = in_num = elem_entries = 0;
1709 
1710     max = vq->vring.num;
1711 
1712     if (vq->inuse >= vq->vring.num) {
1713         virtio_error(vdev, "Virtqueue size exceeded");
1714         goto done;
1715     }
1716 
1717     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1718         goto done;
1719     }
1720 
1721     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1722         vring_set_avail_event(vq, vq->last_avail_idx);
1723     }
1724 
1725     i = head;
1726 
1727     caches = vring_get_region_caches(vq);
1728     if (!caches) {
1729         virtio_error(vdev, "Region caches not initialized");
1730         goto done;
1731     }
1732 
1733     if (caches->desc.len < max * sizeof(VRingDesc)) {
1734         virtio_error(vdev, "Cannot map descriptor ring");
1735         goto done;
1736     }
1737 
1738     desc_cache = &caches->desc;
1739     vring_split_desc_read(vdev, &desc, desc_cache, i);
1740     if (desc.flags & VRING_DESC_F_INDIRECT) {
1741         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1742             virtio_error(vdev, "Invalid size for indirect buffer table");
1743             goto done;
1744         }
1745         virtio_check_indirect_feature(vdev);
1746 
1747         /* loop over the indirect descriptor table */
1748         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1749                                        desc.addr, desc.len, false);
1750         desc_cache = &indirect_desc_cache;
1751         if (len < desc.len) {
1752             virtio_error(vdev, "Cannot map indirect buffer");
1753             goto done;
1754         }
1755 
1756         max = desc.len / sizeof(VRingDesc);
1757         i = 0;
1758         vring_split_desc_read(vdev, &desc, desc_cache, i);
1759     }
1760 
1761     /* Collect all the descriptors */
1762     do {
1763         bool map_ok;
1764 
1765         if (desc.flags & VRING_DESC_F_WRITE) {
1766             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1767                                         iov + out_num,
1768                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1769                                         desc.addr, desc.len);
1770         } else {
1771             if (in_num) {
1772                 virtio_error(vdev, "Incorrect order for descriptors");
1773                 goto err_undo_map;
1774             }
1775             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1776                                         VIRTQUEUE_MAX_SIZE, false,
1777                                         desc.addr, desc.len);
1778         }
1779         if (!map_ok) {
1780             goto err_undo_map;
1781         }
1782 
1783         /* If we've got too many, that implies a descriptor loop. */
1784         if (++elem_entries > max) {
1785             virtio_error(vdev, "Looped descriptor");
1786             goto err_undo_map;
1787         }
1788 
1789         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max);
1790     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1791 
1792     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1793         goto err_undo_map;
1794     }
1795 
1796     /* Now copy what we have collected and mapped */
1797     elem = virtqueue_alloc_element(sz, out_num, in_num);
1798     elem->index = head;
1799     elem->ndescs = 1;
1800     for (i = 0; i < out_num; i++) {
1801         elem->out_addr[i] = addr[i];
1802         elem->out_sg[i] = iov[i];
1803     }
1804     for (i = 0; i < in_num; i++) {
1805         elem->in_addr[i] = addr[out_num + i];
1806         elem->in_sg[i] = iov[out_num + i];
1807     }
1808 
1809     if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) {
1810         idx = (vq->last_avail_idx - 1) % vq->vring.num;
1811         vq->used_elems[idx].index = elem->index;
1812         vq->used_elems[idx].len = elem->len;
1813         vq->used_elems[idx].ndescs = elem->ndescs;
1814     }
1815 
1816     vq->inuse++;
1817 
1818     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1819 done:
1820     address_space_cache_destroy(&indirect_desc_cache);
1821 
1822     return elem;
1823 
1824 err_undo_map:
1825     virtqueue_undo_map_desc(out_num, in_num, iov);
1826     goto done;
1827 }
1828 
virtqueue_packed_pop(VirtQueue * vq,size_t sz)1829 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1830 {
1831     unsigned int i, max;
1832     VRingMemoryRegionCaches *caches;
1833     MemoryRegionCache indirect_desc_cache;
1834     MemoryRegionCache *desc_cache;
1835     int64_t len;
1836     VirtIODevice *vdev = vq->vdev;
1837     VirtQueueElement *elem = NULL;
1838     unsigned out_num, in_num, elem_entries;
1839     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1840     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1841     VRingPackedDesc desc;
1842     uint16_t id;
1843     int rc;
1844 
1845     address_space_cache_init_empty(&indirect_desc_cache);
1846 
1847     RCU_READ_LOCK_GUARD();
1848     if (virtio_queue_packed_empty_rcu(vq)) {
1849         goto done;
1850     }
1851 
1852     /* When we start there are none of either input nor output. */
1853     out_num = in_num = elem_entries = 0;
1854 
1855     max = vq->vring.num;
1856 
1857     if (vq->inuse >= vq->vring.num) {
1858         virtio_error(vdev, "Virtqueue size exceeded");
1859         goto done;
1860     }
1861 
1862     i = vq->last_avail_idx;
1863 
1864     caches = vring_get_region_caches(vq);
1865     if (!caches) {
1866         virtio_error(vdev, "Region caches not initialized");
1867         goto done;
1868     }
1869 
1870     if (caches->desc.len < max * sizeof(VRingDesc)) {
1871         virtio_error(vdev, "Cannot map descriptor ring");
1872         goto done;
1873     }
1874 
1875     desc_cache = &caches->desc;
1876     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1877     id = desc.id;
1878     if (desc.flags & VRING_DESC_F_INDIRECT) {
1879         if (desc.len % sizeof(VRingPackedDesc)) {
1880             virtio_error(vdev, "Invalid size for indirect buffer table");
1881             goto done;
1882         }
1883         virtio_check_indirect_feature(vdev);
1884 
1885         /* loop over the indirect descriptor table */
1886         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1887                                        desc.addr, desc.len, false);
1888         desc_cache = &indirect_desc_cache;
1889         if (len < desc.len) {
1890             virtio_error(vdev, "Cannot map indirect buffer");
1891             goto done;
1892         }
1893 
1894         max = desc.len / sizeof(VRingPackedDesc);
1895         i = 0;
1896         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1897     }
1898 
1899     /* Collect all the descriptors */
1900     do {
1901         bool map_ok;
1902 
1903         if (desc.flags & VRING_DESC_F_WRITE) {
1904             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1905                                         iov + out_num,
1906                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1907                                         desc.addr, desc.len);
1908         } else {
1909             if (in_num) {
1910                 virtio_error(vdev, "Incorrect order for descriptors");
1911                 goto err_undo_map;
1912             }
1913             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1914                                         VIRTQUEUE_MAX_SIZE, false,
1915                                         desc.addr, desc.len);
1916         }
1917         if (!map_ok) {
1918             goto err_undo_map;
1919         }
1920 
1921         /* If we've got too many, that implies a descriptor loop. */
1922         if (++elem_entries > max) {
1923             virtio_error(vdev, "Looped descriptor");
1924             goto err_undo_map;
1925         }
1926 
1927         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1928                                              desc_cache ==
1929                                              &indirect_desc_cache);
1930     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1931 
1932     if (desc_cache != &indirect_desc_cache) {
1933         /* Buffer ID is included in the last descriptor in the list. */
1934         id = desc.id;
1935     }
1936 
1937     /* Now copy what we have collected and mapped */
1938     elem = virtqueue_alloc_element(sz, out_num, in_num);
1939     for (i = 0; i < out_num; i++) {
1940         elem->out_addr[i] = addr[i];
1941         elem->out_sg[i] = iov[i];
1942     }
1943     for (i = 0; i < in_num; i++) {
1944         elem->in_addr[i] = addr[out_num + i];
1945         elem->in_sg[i] = iov[out_num + i];
1946     }
1947 
1948     elem->index = id;
1949     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1950 
1951     if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) {
1952         vq->used_elems[vq->last_avail_idx].index = elem->index;
1953         vq->used_elems[vq->last_avail_idx].len = elem->len;
1954         vq->used_elems[vq->last_avail_idx].ndescs = elem->ndescs;
1955     }
1956 
1957     vq->last_avail_idx += elem->ndescs;
1958     vq->inuse += elem->ndescs;
1959 
1960     if (vq->last_avail_idx >= vq->vring.num) {
1961         vq->last_avail_idx -= vq->vring.num;
1962         vq->last_avail_wrap_counter ^= 1;
1963     }
1964 
1965     vq->shadow_avail_idx = vq->last_avail_idx;
1966     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1967 
1968     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1969 done:
1970     address_space_cache_destroy(&indirect_desc_cache);
1971 
1972     return elem;
1973 
1974 err_undo_map:
1975     virtqueue_undo_map_desc(out_num, in_num, iov);
1976     goto done;
1977 }
1978 
virtqueue_pop(VirtQueue * vq,size_t sz)1979 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1980 {
1981     if (virtio_device_disabled(vq->vdev)) {
1982         return NULL;
1983     }
1984 
1985     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1986         return virtqueue_packed_pop(vq, sz);
1987     } else {
1988         return virtqueue_split_pop(vq, sz);
1989     }
1990 }
1991 
virtqueue_packed_drop_all(VirtQueue * vq)1992 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1993 {
1994     VRingMemoryRegionCaches *caches;
1995     MemoryRegionCache *desc_cache;
1996     unsigned int dropped = 0;
1997     VirtQueueElement elem = {};
1998     VirtIODevice *vdev = vq->vdev;
1999     VRingPackedDesc desc;
2000 
2001     RCU_READ_LOCK_GUARD();
2002 
2003     caches = vring_get_region_caches(vq);
2004     if (!caches) {
2005         return 0;
2006     }
2007 
2008     desc_cache = &caches->desc;
2009 
2010     virtio_queue_set_notification(vq, 0);
2011 
2012     while (vq->inuse < vq->vring.num) {
2013         unsigned int idx = vq->last_avail_idx;
2014         /*
2015          * works similar to virtqueue_pop but does not map buffers
2016          * and does not allocate any memory.
2017          */
2018         vring_packed_desc_read(vdev, &desc, desc_cache,
2019                                vq->last_avail_idx , true);
2020         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
2021             break;
2022         }
2023         elem.index = desc.id;
2024         elem.ndescs = 1;
2025         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
2026                                                vq->vring.num, &idx, false)) {
2027             ++elem.ndescs;
2028         }
2029         /*
2030          * immediately push the element, nothing to unmap
2031          * as both in_num and out_num are set to 0.
2032          */
2033         virtqueue_push(vq, &elem, 0);
2034         dropped++;
2035         vq->last_avail_idx += elem.ndescs;
2036         if (vq->last_avail_idx >= vq->vring.num) {
2037             vq->last_avail_idx -= vq->vring.num;
2038             vq->last_avail_wrap_counter ^= 1;
2039         }
2040     }
2041 
2042     return dropped;
2043 }
2044 
virtqueue_split_drop_all(VirtQueue * vq)2045 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
2046 {
2047     unsigned int dropped = 0;
2048     VirtQueueElement elem = {};
2049     VirtIODevice *vdev = vq->vdev;
2050     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2051 
2052     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
2053         /* works similar to virtqueue_pop but does not map buffers
2054         * and does not allocate any memory */
2055         smp_rmb();
2056         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
2057             break;
2058         }
2059         vq->inuse++;
2060         vq->last_avail_idx++;
2061         if (fEventIdx) {
2062             vring_set_avail_event(vq, vq->last_avail_idx);
2063         }
2064         /* immediately push the element, nothing to unmap
2065          * as both in_num and out_num are set to 0 */
2066         virtqueue_push(vq, &elem, 0);
2067         dropped++;
2068     }
2069 
2070     return dropped;
2071 }
2072 
2073 /* virtqueue_drop_all:
2074  * @vq: The #VirtQueue
2075  * Drops all queued buffers and indicates them to the guest
2076  * as if they are done. Useful when buffers can not be
2077  * processed but must be returned to the guest.
2078  */
virtqueue_drop_all(VirtQueue * vq)2079 unsigned int virtqueue_drop_all(VirtQueue *vq)
2080 {
2081     struct VirtIODevice *vdev = vq->vdev;
2082 
2083     if (virtio_device_disabled(vq->vdev)) {
2084         return 0;
2085     }
2086 
2087     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2088         return virtqueue_packed_drop_all(vq);
2089     } else {
2090         return virtqueue_split_drop_all(vq);
2091     }
2092 }
2093 
2094 /* Reading and writing a structure directly to QEMUFile is *awful*, but
2095  * it is what QEMU has always done by mistake.  We can change it sooner
2096  * or later by bumping the version number of the affected vm states.
2097  * In the meanwhile, since the in-memory layout of VirtQueueElement
2098  * has changed, we need to marshal to and from the layout that was
2099  * used before the change.
2100  */
2101 typedef struct VirtQueueElementOld {
2102     unsigned int index;
2103     unsigned int out_num;
2104     unsigned int in_num;
2105     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
2106     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
2107     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
2108     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
2109 } VirtQueueElementOld;
2110 
qemu_get_virtqueue_element(VirtIODevice * vdev,QEMUFile * f,size_t sz)2111 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
2112 {
2113     VirtQueueElement *elem;
2114     VirtQueueElementOld data;
2115     int i;
2116 
2117     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2118 
2119     /* TODO: teach all callers that this can fail, and return failure instead
2120      * of asserting here.
2121      * This is just one thing (there are probably more) that must be
2122      * fixed before we can allow NDEBUG compilation.
2123      */
2124     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
2125     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
2126 
2127     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
2128     elem->index = data.index;
2129 
2130     for (i = 0; i < elem->in_num; i++) {
2131         elem->in_addr[i] = data.in_addr[i];
2132     }
2133 
2134     for (i = 0; i < elem->out_num; i++) {
2135         elem->out_addr[i] = data.out_addr[i];
2136     }
2137 
2138     for (i = 0; i < elem->in_num; i++) {
2139         /* Base is overwritten by virtqueue_map.  */
2140         elem->in_sg[i].iov_base = 0;
2141         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
2142     }
2143 
2144     for (i = 0; i < elem->out_num; i++) {
2145         /* Base is overwritten by virtqueue_map.  */
2146         elem->out_sg[i].iov_base = 0;
2147         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
2148     }
2149 
2150     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2151         qemu_get_be32s(f, &elem->ndescs);
2152     }
2153 
2154     virtqueue_map(vdev, elem);
2155     return elem;
2156 }
2157 
qemu_put_virtqueue_element(VirtIODevice * vdev,QEMUFile * f,VirtQueueElement * elem)2158 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
2159                                 VirtQueueElement *elem)
2160 {
2161     VirtQueueElementOld data;
2162     int i;
2163 
2164     memset(&data, 0, sizeof(data));
2165     data.index = elem->index;
2166     data.in_num = elem->in_num;
2167     data.out_num = elem->out_num;
2168 
2169     for (i = 0; i < elem->in_num; i++) {
2170         data.in_addr[i] = elem->in_addr[i];
2171     }
2172 
2173     for (i = 0; i < elem->out_num; i++) {
2174         data.out_addr[i] = elem->out_addr[i];
2175     }
2176 
2177     for (i = 0; i < elem->in_num; i++) {
2178         /* Base is overwritten by virtqueue_map when loading.  Do not
2179          * save it, as it would leak the QEMU address space layout.  */
2180         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
2181     }
2182 
2183     for (i = 0; i < elem->out_num; i++) {
2184         /* Do not save iov_base as above.  */
2185         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
2186     }
2187 
2188     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2189         qemu_put_be32s(f, &elem->ndescs);
2190     }
2191 
2192     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
2193 }
2194 
2195 /* virtio device */
virtio_notify_vector(VirtIODevice * vdev,uint16_t vector)2196 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
2197 {
2198     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2199     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2200 
2201     if (virtio_device_disabled(vdev)) {
2202         return;
2203     }
2204 
2205     if (k->notify) {
2206         k->notify(qbus->parent, vector);
2207     }
2208 }
2209 
virtio_update_irq(VirtIODevice * vdev)2210 void virtio_update_irq(VirtIODevice *vdev)
2211 {
2212     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2213 }
2214 
virtio_validate_features(VirtIODevice * vdev)2215 static int virtio_validate_features(VirtIODevice *vdev)
2216 {
2217     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2218 
2219     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
2220         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2221         return -EFAULT;
2222     }
2223 
2224     if (k->validate_features) {
2225         return k->validate_features(vdev);
2226     } else {
2227         return 0;
2228     }
2229 }
2230 
virtio_set_status(VirtIODevice * vdev,uint8_t val)2231 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
2232 {
2233     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2234     trace_virtio_set_status(vdev, val);
2235     int ret = 0;
2236 
2237     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2238         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
2239             val & VIRTIO_CONFIG_S_FEATURES_OK) {
2240             ret = virtio_validate_features(vdev);
2241             if (ret) {
2242                 return ret;
2243             }
2244         }
2245     }
2246 
2247     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
2248         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
2249         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
2250     }
2251 
2252     if (k->set_status) {
2253         ret = k->set_status(vdev, val);
2254         if (ret) {
2255             qemu_log("set %s status to %d failed, old status: %d\n",
2256                      vdev->name, val, vdev->status);
2257         }
2258     }
2259     vdev->status = val;
2260 
2261     return ret;
2262 }
2263 
virtio_default_endian(void)2264 static enum virtio_device_endian virtio_default_endian(void)
2265 {
2266     if (target_big_endian()) {
2267         return VIRTIO_DEVICE_ENDIAN_BIG;
2268     } else {
2269         return VIRTIO_DEVICE_ENDIAN_LITTLE;
2270     }
2271 }
2272 
virtio_current_cpu_endian(void)2273 static enum virtio_device_endian virtio_current_cpu_endian(void)
2274 {
2275     if (cpu_virtio_is_big_endian(current_cpu)) {
2276         return VIRTIO_DEVICE_ENDIAN_BIG;
2277     } else {
2278         return VIRTIO_DEVICE_ENDIAN_LITTLE;
2279     }
2280 }
2281 
__virtio_queue_reset(VirtIODevice * vdev,uint32_t i)2282 static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
2283 {
2284     vdev->vq[i].vring.desc = 0;
2285     vdev->vq[i].vring.avail = 0;
2286     vdev->vq[i].vring.used = 0;
2287     vdev->vq[i].last_avail_idx = 0;
2288     vdev->vq[i].shadow_avail_idx = 0;
2289     vdev->vq[i].used_idx = 0;
2290     vdev->vq[i].last_avail_wrap_counter = true;
2291     vdev->vq[i].shadow_avail_wrap_counter = true;
2292     vdev->vq[i].used_wrap_counter = true;
2293     virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2294     vdev->vq[i].signalled_used = 0;
2295     vdev->vq[i].signalled_used_valid = false;
2296     vdev->vq[i].notification = true;
2297     vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2298     vdev->vq[i].inuse = 0;
2299     virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2300 }
2301 
virtio_queue_reset(VirtIODevice * vdev,uint32_t queue_index)2302 void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
2303 {
2304     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2305 
2306     if (k->queue_reset) {
2307         k->queue_reset(vdev, queue_index);
2308     }
2309 
2310     __virtio_queue_reset(vdev, queue_index);
2311 }
2312 
virtio_queue_enable(VirtIODevice * vdev,uint32_t queue_index)2313 void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
2314 {
2315     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2316 
2317     /*
2318      * TODO: Seabios is currently out of spec and triggering this error.
2319      * So this needs to be fixed in Seabios, then this can
2320      * be re-enabled for new machine types only, and also after
2321      * being converted to LOG_GUEST_ERROR.
2322      *
2323     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2324         error_report("queue_enable is only supported in devices of virtio "
2325                      "1.0 or later.");
2326     }
2327     */
2328 
2329     if (k->queue_enable) {
2330         k->queue_enable(vdev, queue_index);
2331     }
2332 }
2333 
virtio_queue_set_addr(VirtIODevice * vdev,int n,hwaddr addr)2334 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2335 {
2336     if (!vdev->vq[n].vring.num) {
2337         return;
2338     }
2339     vdev->vq[n].vring.desc = addr;
2340     virtio_queue_update_rings(vdev, n);
2341 }
2342 
virtio_queue_get_addr(VirtIODevice * vdev,int n)2343 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2344 {
2345     return vdev->vq[n].vring.desc;
2346 }
2347 
virtio_queue_set_rings(VirtIODevice * vdev,int n,hwaddr desc,hwaddr avail,hwaddr used)2348 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2349                             hwaddr avail, hwaddr used)
2350 {
2351     if (!vdev->vq[n].vring.num) {
2352         return;
2353     }
2354     vdev->vq[n].vring.desc = desc;
2355     vdev->vq[n].vring.avail = avail;
2356     vdev->vq[n].vring.used = used;
2357     virtio_init_region_cache(vdev, n);
2358 }
2359 
virtio_queue_set_num(VirtIODevice * vdev,int n,int num)2360 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2361 {
2362     /* Don't allow guest to flip queue between existent and
2363      * nonexistent states, or to set it to an invalid size.
2364      */
2365     if (!!num != !!vdev->vq[n].vring.num ||
2366         num > VIRTQUEUE_MAX_SIZE ||
2367         num < 0) {
2368         return;
2369     }
2370     vdev->vq[n].vring.num = num;
2371 }
2372 
virtio_vector_first_queue(VirtIODevice * vdev,uint16_t vector)2373 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2374 {
2375     return QLIST_FIRST(&vdev->vector_queues[vector]);
2376 }
2377 
virtio_vector_next_queue(VirtQueue * vq)2378 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2379 {
2380     return QLIST_NEXT(vq, node);
2381 }
2382 
virtio_queue_get_num(VirtIODevice * vdev,int n)2383 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2384 {
2385     return vdev->vq[n].vring.num;
2386 }
2387 
virtio_queue_get_max_num(VirtIODevice * vdev,int n)2388 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2389 {
2390     return vdev->vq[n].vring.num_default;
2391 }
2392 
virtio_get_num_queues(VirtIODevice * vdev)2393 int virtio_get_num_queues(VirtIODevice *vdev)
2394 {
2395     int i;
2396 
2397     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2398         if (!virtio_queue_get_num(vdev, i)) {
2399             break;
2400         }
2401     }
2402 
2403     return i;
2404 }
2405 
virtio_queue_set_align(VirtIODevice * vdev,int n,int align)2406 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2407 {
2408     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2409     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2410 
2411     /* virtio-1 compliant devices cannot change the alignment */
2412     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2413         error_report("tried to modify queue alignment for virtio-1 device");
2414         return;
2415     }
2416     /* Check that the transport told us it was going to do this
2417      * (so a buggy transport will immediately assert rather than
2418      * silently failing to migrate this state)
2419      */
2420     assert(k->has_variable_vring_alignment);
2421 
2422     if (align) {
2423         vdev->vq[n].vring.align = align;
2424         virtio_queue_update_rings(vdev, n);
2425     }
2426 }
2427 
virtio_queue_set_shadow_avail_idx(VirtQueue * vq,uint16_t shadow_avail_idx)2428 void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t shadow_avail_idx)
2429 {
2430     if (!vq->vring.desc) {
2431         return;
2432     }
2433 
2434     /*
2435      * 16-bit data for packed VQs include 1-bit wrap counter and
2436      * 15-bit shadow_avail_idx.
2437      */
2438     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
2439         vq->shadow_avail_wrap_counter = (shadow_avail_idx >> 15) & 0x1;
2440         vq->shadow_avail_idx = shadow_avail_idx & 0x7FFF;
2441     } else {
2442         vq->shadow_avail_idx = shadow_avail_idx;
2443     }
2444 }
2445 
virtio_queue_notify_vq(VirtQueue * vq)2446 static void virtio_queue_notify_vq(VirtQueue *vq)
2447 {
2448     if (vq->vring.desc && vq->handle_output) {
2449         VirtIODevice *vdev = vq->vdev;
2450 
2451         if (unlikely(vdev->broken)) {
2452             return;
2453         }
2454 
2455         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2456         vq->handle_output(vdev, vq);
2457 
2458         if (unlikely(vdev->start_on_kick)) {
2459             virtio_set_started(vdev, true);
2460         }
2461     }
2462 }
2463 
virtio_queue_notify(VirtIODevice * vdev,int n)2464 void virtio_queue_notify(VirtIODevice *vdev, int n)
2465 {
2466     VirtQueue *vq = &vdev->vq[n];
2467 
2468     if (unlikely(!vq->vring.desc || vdev->broken)) {
2469         return;
2470     }
2471 
2472     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2473     if (vq->host_notifier_enabled) {
2474         event_notifier_set(&vq->host_notifier);
2475     } else if (vq->handle_output) {
2476         vq->handle_output(vdev, vq);
2477 
2478         if (unlikely(vdev->start_on_kick)) {
2479             virtio_set_started(vdev, true);
2480         }
2481     }
2482 }
2483 
virtio_queue_vector(VirtIODevice * vdev,int n)2484 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2485 {
2486     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2487         VIRTIO_NO_VECTOR;
2488 }
2489 
virtio_queue_set_vector(VirtIODevice * vdev,int n,uint16_t vector)2490 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2491 {
2492     VirtQueue *vq = &vdev->vq[n];
2493 
2494     if (n < VIRTIO_QUEUE_MAX) {
2495         if (vdev->vector_queues &&
2496             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2497             QLIST_REMOVE(vq, node);
2498         }
2499         vdev->vq[n].vector = vector;
2500         if (vdev->vector_queues &&
2501             vector != VIRTIO_NO_VECTOR) {
2502             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2503         }
2504     }
2505 }
2506 
virtio_add_queue(VirtIODevice * vdev,int queue_size,VirtIOHandleOutput handle_output)2507 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2508                             VirtIOHandleOutput handle_output)
2509 {
2510     int i;
2511 
2512     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2513         if (vdev->vq[i].vring.num == 0)
2514             break;
2515     }
2516 
2517     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2518         abort();
2519 
2520     vdev->vq[i].vring.num = queue_size;
2521     vdev->vq[i].vring.num_default = queue_size;
2522     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2523     vdev->vq[i].handle_output = handle_output;
2524     vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
2525 
2526     return &vdev->vq[i];
2527 }
2528 
virtio_delete_queue(VirtQueue * vq)2529 void virtio_delete_queue(VirtQueue *vq)
2530 {
2531     vq->vring.num = 0;
2532     vq->vring.num_default = 0;
2533     vq->handle_output = NULL;
2534     g_free(vq->used_elems);
2535     vq->used_elems = NULL;
2536     virtio_virtqueue_reset_region_cache(vq);
2537 }
2538 
virtio_del_queue(VirtIODevice * vdev,int n)2539 void virtio_del_queue(VirtIODevice *vdev, int n)
2540 {
2541     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2542         abort();
2543     }
2544 
2545     virtio_delete_queue(&vdev->vq[n]);
2546 }
2547 
virtio_set_isr(VirtIODevice * vdev,int value)2548 static void virtio_set_isr(VirtIODevice *vdev, int value)
2549 {
2550     uint8_t old = qatomic_read(&vdev->isr);
2551 
2552     /* Do not write ISR if it does not change, so that its cacheline remains
2553      * shared in the common case where the guest does not read it.
2554      */
2555     if ((old & value) != value) {
2556         qatomic_or(&vdev->isr, value);
2557     }
2558 }
2559 
2560 /* Called within rcu_read_lock(). */
virtio_split_should_notify(VirtIODevice * vdev,VirtQueue * vq)2561 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2562 {
2563     uint16_t old, new;
2564     bool v;
2565     /* We need to expose used array entries before checking used event. */
2566     smp_mb();
2567     /* Always notify when queue is empty (when feature acknowledge) */
2568     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2569         !vq->inuse && virtio_queue_empty(vq)) {
2570         return true;
2571     }
2572 
2573     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2574         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2575     }
2576 
2577     v = vq->signalled_used_valid;
2578     vq->signalled_used_valid = true;
2579     old = vq->signalled_used;
2580     new = vq->signalled_used = vq->used_idx;
2581     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2582 }
2583 
vring_packed_need_event(VirtQueue * vq,bool wrap,uint16_t off_wrap,uint16_t new,uint16_t old)2584 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2585                                     uint16_t off_wrap, uint16_t new,
2586                                     uint16_t old)
2587 {
2588     int off = off_wrap & ~(1 << 15);
2589 
2590     if (wrap != off_wrap >> 15) {
2591         off -= vq->vring.num;
2592     }
2593 
2594     return vring_need_event(off, new, old);
2595 }
2596 
2597 /* Called within rcu_read_lock(). */
virtio_packed_should_notify(VirtIODevice * vdev,VirtQueue * vq)2598 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2599 {
2600     VRingPackedDescEvent e;
2601     uint16_t old, new;
2602     bool v;
2603     VRingMemoryRegionCaches *caches;
2604 
2605     caches = vring_get_region_caches(vq);
2606     if (!caches) {
2607         return false;
2608     }
2609 
2610     vring_packed_event_read(vdev, &caches->avail, &e);
2611 
2612     old = vq->signalled_used;
2613     new = vq->signalled_used = vq->used_idx;
2614     v = vq->signalled_used_valid;
2615     vq->signalled_used_valid = true;
2616 
2617     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2618         return false;
2619     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2620         return true;
2621     }
2622 
2623     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2624                                          e.off_wrap, new, old);
2625 }
2626 
2627 /* Called within rcu_read_lock().  */
virtio_should_notify(VirtIODevice * vdev,VirtQueue * vq)2628 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2629 {
2630     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2631         return virtio_packed_should_notify(vdev, vq);
2632     } else {
2633         return virtio_split_should_notify(vdev, vq);
2634     }
2635 }
2636 
2637 /* Batch irqs while inside a defer_call_begin()/defer_call_end() section */
virtio_notify_irqfd_deferred_fn(void * opaque)2638 static void virtio_notify_irqfd_deferred_fn(void *opaque)
2639 {
2640     EventNotifier *notifier = opaque;
2641     VirtQueue *vq = container_of(notifier, VirtQueue, guest_notifier);
2642 
2643     trace_virtio_notify_irqfd_deferred_fn(vq->vdev, vq);
2644     event_notifier_set(notifier);
2645 }
2646 
virtio_notify_irqfd(VirtIODevice * vdev,VirtQueue * vq)2647 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2648 {
2649     WITH_RCU_READ_LOCK_GUARD() {
2650         if (!virtio_should_notify(vdev, vq)) {
2651             return;
2652         }
2653     }
2654 
2655     trace_virtio_notify_irqfd(vdev, vq);
2656 
2657     /*
2658      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2659      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2660      * incorrectly polling this bit during crashdump and hibernation
2661      * in MSI mode, causing a hang if this bit is never updated.
2662      * Recent releases of Windows do not really shut down, but rather
2663      * log out and hibernate to make the next startup faster.  Hence,
2664      * this manifested as a more serious hang during shutdown with
2665      *
2666      * Next driver release from 2016 fixed this problem, so working around it
2667      * is not a must, but it's easy to do so let's do it here.
2668      *
2669      * Note: it's safe to update ISR from any thread as it was switched
2670      * to an atomic operation.
2671      */
2672     virtio_set_isr(vq->vdev, 0x1);
2673     defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier);
2674 }
2675 
virtio_irq(VirtQueue * vq)2676 static void virtio_irq(VirtQueue *vq)
2677 {
2678     virtio_set_isr(vq->vdev, 0x1);
2679     virtio_notify_vector(vq->vdev, vq->vector);
2680 }
2681 
virtio_notify(VirtIODevice * vdev,VirtQueue * vq)2682 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2683 {
2684     WITH_RCU_READ_LOCK_GUARD() {
2685         if (!virtio_should_notify(vdev, vq)) {
2686             return;
2687         }
2688     }
2689 
2690     trace_virtio_notify(vdev, vq);
2691     virtio_irq(vq);
2692 }
2693 
virtio_notify_config(VirtIODevice * vdev)2694 void virtio_notify_config(VirtIODevice *vdev)
2695 {
2696     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2697         return;
2698 
2699     virtio_set_isr(vdev, 0x3);
2700     vdev->generation++;
2701     virtio_notify_vector(vdev, vdev->config_vector);
2702 }
2703 
virtio_device_endian_needed(void * opaque)2704 static bool virtio_device_endian_needed(void *opaque)
2705 {
2706     VirtIODevice *vdev = opaque;
2707 
2708     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2709     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2710         return vdev->device_endian != virtio_default_endian();
2711     }
2712     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2713     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2714 }
2715 
virtio_64bit_features_needed(void * opaque)2716 static bool virtio_64bit_features_needed(void *opaque)
2717 {
2718     VirtIODevice *vdev = opaque;
2719 
2720     return (vdev->host_features >> 32) != 0;
2721 }
2722 
virtio_virtqueue_needed(void * opaque)2723 static bool virtio_virtqueue_needed(void *opaque)
2724 {
2725     VirtIODevice *vdev = opaque;
2726 
2727     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2728 }
2729 
virtio_packed_virtqueue_needed(void * opaque)2730 static bool virtio_packed_virtqueue_needed(void *opaque)
2731 {
2732     VirtIODevice *vdev = opaque;
2733 
2734     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2735 }
2736 
virtio_ringsize_needed(void * opaque)2737 static bool virtio_ringsize_needed(void *opaque)
2738 {
2739     VirtIODevice *vdev = opaque;
2740     int i;
2741 
2742     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2743         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2744             return true;
2745         }
2746     }
2747     return false;
2748 }
2749 
virtio_extra_state_needed(void * opaque)2750 static bool virtio_extra_state_needed(void *opaque)
2751 {
2752     VirtIODevice *vdev = opaque;
2753     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2754     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2755 
2756     return k->has_extra_state &&
2757         k->has_extra_state(qbus->parent);
2758 }
2759 
virtio_broken_needed(void * opaque)2760 static bool virtio_broken_needed(void *opaque)
2761 {
2762     VirtIODevice *vdev = opaque;
2763 
2764     return vdev->broken;
2765 }
2766 
virtio_started_needed(void * opaque)2767 static bool virtio_started_needed(void *opaque)
2768 {
2769     VirtIODevice *vdev = opaque;
2770 
2771     return vdev->started;
2772 }
2773 
virtio_disabled_needed(void * opaque)2774 static bool virtio_disabled_needed(void *opaque)
2775 {
2776     VirtIODevice *vdev = opaque;
2777 
2778     return vdev->disabled;
2779 }
2780 
2781 static const VMStateDescription vmstate_virtqueue = {
2782     .name = "virtqueue_state",
2783     .version_id = 1,
2784     .minimum_version_id = 1,
2785     .fields = (const VMStateField[]) {
2786         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2787         VMSTATE_UINT64(vring.used, struct VirtQueue),
2788         VMSTATE_END_OF_LIST()
2789     }
2790 };
2791 
2792 static const VMStateDescription vmstate_packed_virtqueue = {
2793     .name = "packed_virtqueue_state",
2794     .version_id = 1,
2795     .minimum_version_id = 1,
2796     .fields = (const VMStateField[]) {
2797         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2798         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2799         VMSTATE_UINT16(used_idx, struct VirtQueue),
2800         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2801         VMSTATE_UINT32(inuse, struct VirtQueue),
2802         VMSTATE_END_OF_LIST()
2803     }
2804 };
2805 
2806 static const VMStateDescription vmstate_virtio_virtqueues = {
2807     .name = "virtio/virtqueues",
2808     .version_id = 1,
2809     .minimum_version_id = 1,
2810     .needed = &virtio_virtqueue_needed,
2811     .fields = (const VMStateField[]) {
2812         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2813                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2814         VMSTATE_END_OF_LIST()
2815     }
2816 };
2817 
2818 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2819     .name = "virtio/packed_virtqueues",
2820     .version_id = 1,
2821     .minimum_version_id = 1,
2822     .needed = &virtio_packed_virtqueue_needed,
2823     .fields = (const VMStateField[]) {
2824         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2825                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2826         VMSTATE_END_OF_LIST()
2827     }
2828 };
2829 
2830 static const VMStateDescription vmstate_ringsize = {
2831     .name = "ringsize_state",
2832     .version_id = 1,
2833     .minimum_version_id = 1,
2834     .fields = (const VMStateField[]) {
2835         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2836         VMSTATE_END_OF_LIST()
2837     }
2838 };
2839 
2840 static const VMStateDescription vmstate_virtio_ringsize = {
2841     .name = "virtio/ringsize",
2842     .version_id = 1,
2843     .minimum_version_id = 1,
2844     .needed = &virtio_ringsize_needed,
2845     .fields = (const VMStateField[]) {
2846         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2847                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2848         VMSTATE_END_OF_LIST()
2849     }
2850 };
2851 
get_extra_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field)2852 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2853                            const VMStateField *field)
2854 {
2855     VirtIODevice *vdev = pv;
2856     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2857     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2858 
2859     if (!k->load_extra_state) {
2860         return -1;
2861     } else {
2862         return k->load_extra_state(qbus->parent, f);
2863     }
2864 }
2865 
put_extra_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field,JSONWriter * vmdesc)2866 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2867                            const VMStateField *field, JSONWriter *vmdesc)
2868 {
2869     VirtIODevice *vdev = pv;
2870     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2871     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2872 
2873     k->save_extra_state(qbus->parent, f);
2874     return 0;
2875 }
2876 
2877 static const VMStateInfo vmstate_info_extra_state = {
2878     .name = "virtqueue_extra_state",
2879     .get = get_extra_state,
2880     .put = put_extra_state,
2881 };
2882 
2883 static const VMStateDescription vmstate_virtio_extra_state = {
2884     .name = "virtio/extra_state",
2885     .version_id = 1,
2886     .minimum_version_id = 1,
2887     .needed = &virtio_extra_state_needed,
2888     .fields = (const VMStateField[]) {
2889         {
2890             .name         = "extra_state",
2891             .version_id   = 0,
2892             .field_exists = NULL,
2893             .size         = 0,
2894             .info         = &vmstate_info_extra_state,
2895             .flags        = VMS_SINGLE,
2896             .offset       = 0,
2897         },
2898         VMSTATE_END_OF_LIST()
2899     }
2900 };
2901 
2902 static const VMStateDescription vmstate_virtio_device_endian = {
2903     .name = "virtio/device_endian",
2904     .version_id = 1,
2905     .minimum_version_id = 1,
2906     .needed = &virtio_device_endian_needed,
2907     .fields = (const VMStateField[]) {
2908         VMSTATE_UINT8(device_endian, VirtIODevice),
2909         VMSTATE_END_OF_LIST()
2910     }
2911 };
2912 
2913 static const VMStateDescription vmstate_virtio_64bit_features = {
2914     .name = "virtio/64bit_features",
2915     .version_id = 1,
2916     .minimum_version_id = 1,
2917     .needed = &virtio_64bit_features_needed,
2918     .fields = (const VMStateField[]) {
2919         VMSTATE_UINT64(guest_features, VirtIODevice),
2920         VMSTATE_END_OF_LIST()
2921     }
2922 };
2923 
2924 static const VMStateDescription vmstate_virtio_broken = {
2925     .name = "virtio/broken",
2926     .version_id = 1,
2927     .minimum_version_id = 1,
2928     .needed = &virtio_broken_needed,
2929     .fields = (const VMStateField[]) {
2930         VMSTATE_BOOL(broken, VirtIODevice),
2931         VMSTATE_END_OF_LIST()
2932     }
2933 };
2934 
2935 static const VMStateDescription vmstate_virtio_started = {
2936     .name = "virtio/started",
2937     .version_id = 1,
2938     .minimum_version_id = 1,
2939     .needed = &virtio_started_needed,
2940     .fields = (const VMStateField[]) {
2941         VMSTATE_BOOL(started, VirtIODevice),
2942         VMSTATE_END_OF_LIST()
2943     }
2944 };
2945 
2946 static const VMStateDescription vmstate_virtio_disabled = {
2947     .name = "virtio/disabled",
2948     .version_id = 1,
2949     .minimum_version_id = 1,
2950     .needed = &virtio_disabled_needed,
2951     .fields = (const VMStateField[]) {
2952         VMSTATE_BOOL(disabled, VirtIODevice),
2953         VMSTATE_END_OF_LIST()
2954     }
2955 };
2956 
2957 static const VMStateDescription vmstate_virtio = {
2958     .name = "virtio",
2959     .version_id = 1,
2960     .minimum_version_id = 1,
2961     .fields = (const VMStateField[]) {
2962         VMSTATE_END_OF_LIST()
2963     },
2964     .subsections = (const VMStateDescription * const []) {
2965         &vmstate_virtio_device_endian,
2966         &vmstate_virtio_64bit_features,
2967         &vmstate_virtio_virtqueues,
2968         &vmstate_virtio_ringsize,
2969         &vmstate_virtio_broken,
2970         &vmstate_virtio_extra_state,
2971         &vmstate_virtio_started,
2972         &vmstate_virtio_packed_virtqueues,
2973         &vmstate_virtio_disabled,
2974         NULL
2975     }
2976 };
2977 
virtio_save(VirtIODevice * vdev,QEMUFile * f)2978 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2979 {
2980     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2981     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2982     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2983     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2984     int i;
2985 
2986     if (k->save_config) {
2987         k->save_config(qbus->parent, f);
2988     }
2989 
2990     qemu_put_8s(f, &vdev->status);
2991     qemu_put_8s(f, &vdev->isr);
2992     qemu_put_be16s(f, &vdev->queue_sel);
2993     qemu_put_be32s(f, &guest_features_lo);
2994     qemu_put_be32(f, vdev->config_len);
2995     qemu_put_buffer(f, vdev->config, vdev->config_len);
2996 
2997     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2998         if (vdev->vq[i].vring.num == 0)
2999             break;
3000     }
3001 
3002     qemu_put_be32(f, i);
3003 
3004     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3005         if (vdev->vq[i].vring.num == 0)
3006             break;
3007 
3008         qemu_put_be32(f, vdev->vq[i].vring.num);
3009         if (k->has_variable_vring_alignment) {
3010             qemu_put_be32(f, vdev->vq[i].vring.align);
3011         }
3012         /*
3013          * Save desc now, the rest of the ring addresses are saved in
3014          * subsections for VIRTIO-1 devices.
3015          */
3016         qemu_put_be64(f, vdev->vq[i].vring.desc);
3017         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
3018         if (k->save_queue) {
3019             k->save_queue(qbus->parent, i, f);
3020         }
3021     }
3022 
3023     if (vdc->save != NULL) {
3024         vdc->save(vdev, f);
3025     }
3026 
3027     if (vdc->vmsd) {
3028         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
3029         if (ret) {
3030             return ret;
3031         }
3032     }
3033 
3034     /* Subsections */
3035     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
3036 }
3037 
3038 /* A wrapper for use as a VMState .put function */
virtio_device_put(QEMUFile * f,void * opaque,size_t size,const VMStateField * field,JSONWriter * vmdesc)3039 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
3040                               const VMStateField *field, JSONWriter *vmdesc)
3041 {
3042     return virtio_save(VIRTIO_DEVICE(opaque), f);
3043 }
3044 
3045 /* A wrapper for use as a VMState .get function */
3046 static int coroutine_mixed_fn
virtio_device_get(QEMUFile * f,void * opaque,size_t size,const VMStateField * field)3047 virtio_device_get(QEMUFile *f, void *opaque, size_t size,
3048                   const VMStateField *field)
3049 {
3050     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
3051     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
3052 
3053     return virtio_load(vdev, f, dc->vmsd->version_id);
3054 }
3055 
3056 const VMStateInfo  virtio_vmstate_info = {
3057     .name = "virtio",
3058     .get = virtio_device_get,
3059     .put = virtio_device_put,
3060 };
3061 
virtio_set_features_nocheck(VirtIODevice * vdev,uint64_t val)3062 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
3063 {
3064     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
3065     bool bad = (val & ~(vdev->host_features)) != 0;
3066 
3067     val &= vdev->host_features;
3068     if (k->set_features) {
3069         k->set_features(vdev, val);
3070     }
3071     vdev->guest_features = val;
3072     return bad ? -1 : 0;
3073 }
3074 
3075 typedef struct VirtioSetFeaturesNocheckData {
3076     Coroutine *co;
3077     VirtIODevice *vdev;
3078     uint64_t val;
3079     int ret;
3080 } VirtioSetFeaturesNocheckData;
3081 
virtio_set_features_nocheck_bh(void * opaque)3082 static void virtio_set_features_nocheck_bh(void *opaque)
3083 {
3084     VirtioSetFeaturesNocheckData *data = opaque;
3085 
3086     data->ret = virtio_set_features_nocheck(data->vdev, data->val);
3087     aio_co_wake(data->co);
3088 }
3089 
3090 static int coroutine_mixed_fn
virtio_set_features_nocheck_maybe_co(VirtIODevice * vdev,uint64_t val)3091 virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val)
3092 {
3093     if (qemu_in_coroutine()) {
3094         VirtioSetFeaturesNocheckData data = {
3095             .co = qemu_coroutine_self(),
3096             .vdev = vdev,
3097             .val = val,
3098         };
3099         aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
3100                                 virtio_set_features_nocheck_bh, &data);
3101         qemu_coroutine_yield();
3102         return data.ret;
3103     } else {
3104         return virtio_set_features_nocheck(vdev, val);
3105     }
3106 }
3107 
virtio_set_features(VirtIODevice * vdev,uint64_t val)3108 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
3109 {
3110     int ret;
3111     /*
3112      * The driver must not attempt to set features after feature negotiation
3113      * has finished.
3114      */
3115     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
3116         return -EINVAL;
3117     }
3118 
3119     if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
3120         qemu_log_mask(LOG_GUEST_ERROR,
3121                       "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
3122                       __func__, vdev->name);
3123     }
3124 
3125     ret = virtio_set_features_nocheck(vdev, val);
3126     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
3127         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
3128         int i;
3129         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3130             if (vdev->vq[i].vring.num != 0) {
3131                 virtio_init_region_cache(vdev, i);
3132             }
3133         }
3134     }
3135     if (!ret) {
3136         if (!virtio_device_started(vdev, vdev->status) &&
3137             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3138             vdev->start_on_kick = true;
3139         }
3140     }
3141     return ret;
3142 }
3143 
virtio_reset(void * opaque)3144 void virtio_reset(void *opaque)
3145 {
3146     VirtIODevice *vdev = opaque;
3147     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
3148     int i;
3149 
3150     virtio_set_status(vdev, 0);
3151     if (current_cpu) {
3152         /* Guest initiated reset */
3153         vdev->device_endian = virtio_current_cpu_endian();
3154     } else {
3155         /* System reset */
3156         vdev->device_endian = virtio_default_endian();
3157     }
3158 
3159     if (k->get_vhost) {
3160         struct vhost_dev *hdev = k->get_vhost(vdev);
3161         /* Only reset when vhost back-end is connected */
3162         if (hdev && hdev->vhost_ops) {
3163             vhost_reset_device(hdev);
3164         }
3165     }
3166 
3167     if (k->reset) {
3168         k->reset(vdev);
3169     }
3170 
3171     vdev->start_on_kick = false;
3172     vdev->started = false;
3173     vdev->broken = false;
3174     virtio_set_features_nocheck(vdev, 0);
3175     vdev->queue_sel = 0;
3176     vdev->status = 0;
3177     vdev->disabled = false;
3178     qatomic_set(&vdev->isr, 0);
3179     vdev->config_vector = VIRTIO_NO_VECTOR;
3180     virtio_notify_vector(vdev, vdev->config_vector);
3181 
3182     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3183         __virtio_queue_reset(vdev, i);
3184     }
3185 }
3186 
virtio_device_check_notification_compatibility(VirtIODevice * vdev,Error ** errp)3187 static void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
3188                                                            Error **errp)
3189 {
3190     VirtioBusState *bus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3191     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
3192     DeviceState *proxy = DEVICE(BUS(bus)->parent);
3193 
3194     if (virtio_host_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA) &&
3195         k->ioeventfd_enabled(proxy)) {
3196         error_setg(errp,
3197                    "notification_data=on without ioeventfd=off is not supported");
3198     }
3199 }
3200 
virtio_get_config_size(const VirtIOConfigSizeParams * params,uint64_t host_features)3201 size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
3202                               uint64_t host_features)
3203 {
3204     size_t config_size = params->min_size;
3205     const VirtIOFeature *feature_sizes = params->feature_sizes;
3206     size_t i;
3207 
3208     for (i = 0; feature_sizes[i].flags != 0; i++) {
3209         if (host_features & feature_sizes[i].flags) {
3210             config_size = MAX(feature_sizes[i].end, config_size);
3211         }
3212     }
3213 
3214     assert(config_size <= params->max_size);
3215     return config_size;
3216 }
3217 
3218 int coroutine_mixed_fn
virtio_load(VirtIODevice * vdev,QEMUFile * f,int version_id)3219 virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3220 {
3221     int i, ret;
3222     int32_t config_len;
3223     uint32_t num;
3224     uint32_t features;
3225     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3226     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3227     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3228 
3229     /*
3230      * We poison the endianness to ensure it does not get used before
3231      * subsections have been loaded.
3232      */
3233     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3234 
3235     if (k->load_config) {
3236         ret = k->load_config(qbus->parent, f);
3237         if (ret)
3238             return ret;
3239     }
3240 
3241     qemu_get_8s(f, &vdev->status);
3242     qemu_get_8s(f, &vdev->isr);
3243     qemu_get_be16s(f, &vdev->queue_sel);
3244     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3245         return -1;
3246     }
3247     qemu_get_be32s(f, &features);
3248 
3249     /*
3250      * Temporarily set guest_features low bits - needed by
3251      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3252      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3253      *
3254      * Note: devices should always test host features in future - don't create
3255      * new dependencies like this.
3256      */
3257     vdev->guest_features = features;
3258 
3259     config_len = qemu_get_be32(f);
3260 
3261     /*
3262      * There are cases where the incoming config can be bigger or smaller
3263      * than what we have; so load what we have space for, and skip
3264      * any excess that's in the stream.
3265      */
3266     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3267 
3268     while (config_len > vdev->config_len) {
3269         qemu_get_byte(f);
3270         config_len--;
3271     }
3272 
3273     if (vdc->pre_load_queues) {
3274         ret = vdc->pre_load_queues(vdev);
3275         if (ret) {
3276             return ret;
3277         }
3278     }
3279 
3280     num = qemu_get_be32(f);
3281 
3282     if (num > VIRTIO_QUEUE_MAX) {
3283         error_report("Invalid number of virtqueues: 0x%x", num);
3284         return -1;
3285     }
3286 
3287     for (i = 0; i < num; i++) {
3288         vdev->vq[i].vring.num = qemu_get_be32(f);
3289         if (k->has_variable_vring_alignment) {
3290             vdev->vq[i].vring.align = qemu_get_be32(f);
3291         }
3292         vdev->vq[i].vring.desc = qemu_get_be64(f);
3293         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3294         vdev->vq[i].signalled_used_valid = false;
3295         vdev->vq[i].notification = true;
3296 
3297         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3298             error_report("VQ %d address 0x0 "
3299                          "inconsistent with Host index 0x%x",
3300                          i, vdev->vq[i].last_avail_idx);
3301             return -1;
3302         }
3303         if (k->load_queue) {
3304             ret = k->load_queue(qbus->parent, i, f);
3305             if (ret)
3306                 return ret;
3307         }
3308     }
3309 
3310     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3311 
3312     if (vdc->load != NULL) {
3313         ret = vdc->load(vdev, f, version_id);
3314         if (ret) {
3315             return ret;
3316         }
3317     }
3318 
3319     if (vdc->vmsd) {
3320         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3321         if (ret) {
3322             return ret;
3323         }
3324     }
3325 
3326     /* Subsections */
3327     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3328     if (ret) {
3329         return ret;
3330     }
3331 
3332     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3333         vdev->device_endian = virtio_default_endian();
3334     }
3335 
3336     if (virtio_64bit_features_needed(vdev)) {
3337         /*
3338          * Subsection load filled vdev->guest_features.  Run them
3339          * through virtio_set_features to sanity-check them against
3340          * host_features.
3341          */
3342         uint64_t features64 = vdev->guest_features;
3343         if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) {
3344             error_report("Features 0x%" PRIx64 " unsupported. "
3345                          "Allowed features: 0x%" PRIx64,
3346                          features64, vdev->host_features);
3347             return -1;
3348         }
3349     } else {
3350         if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) {
3351             error_report("Features 0x%x unsupported. "
3352                          "Allowed features: 0x%" PRIx64,
3353                          features, vdev->host_features);
3354             return -1;
3355         }
3356     }
3357 
3358     if (!virtio_device_started(vdev, vdev->status) &&
3359         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3360         vdev->start_on_kick = true;
3361     }
3362 
3363     RCU_READ_LOCK_GUARD();
3364     for (i = 0; i < num; i++) {
3365         if (vdev->vq[i].vring.desc) {
3366             uint16_t nheads;
3367 
3368             /*
3369              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3370              * only the region cache needs to be set up.  Legacy devices need
3371              * to calculate used and avail ring addresses based on the desc
3372              * address.
3373              */
3374             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3375                 virtio_init_region_cache(vdev, i);
3376             } else {
3377                 virtio_queue_update_rings(vdev, i);
3378             }
3379 
3380             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3381                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3382                 vdev->vq[i].shadow_avail_wrap_counter =
3383                                         vdev->vq[i].last_avail_wrap_counter;
3384                 continue;
3385             }
3386 
3387             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3388             /* Check it isn't doing strange things with descriptor numbers. */
3389             if (nheads > vdev->vq[i].vring.num) {
3390                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3391                              "inconsistent with Host index 0x%x: delta 0x%x",
3392                              i, vdev->vq[i].vring.num,
3393                              vring_avail_idx(&vdev->vq[i]),
3394                              vdev->vq[i].last_avail_idx, nheads);
3395                 vdev->vq[i].used_idx = 0;
3396                 vdev->vq[i].shadow_avail_idx = 0;
3397                 vdev->vq[i].inuse = 0;
3398                 continue;
3399             }
3400             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3401             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3402 
3403             /*
3404              * Some devices migrate VirtQueueElements that have been popped
3405              * from the avail ring but not yet returned to the used ring.
3406              * Since max ring size < UINT16_MAX it's safe to use modulo
3407              * UINT16_MAX + 1 subtraction.
3408              */
3409             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3410                                 vdev->vq[i].used_idx);
3411             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3412                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3413                              "used_idx 0x%x",
3414                              i, vdev->vq[i].vring.num,
3415                              vdev->vq[i].last_avail_idx,
3416                              vdev->vq[i].used_idx);
3417                 return -1;
3418             }
3419         }
3420     }
3421 
3422     if (vdc->post_load) {
3423         ret = vdc->post_load(vdev);
3424         if (ret) {
3425             return ret;
3426         }
3427     }
3428 
3429     return 0;
3430 }
3431 
virtio_cleanup(VirtIODevice * vdev)3432 void virtio_cleanup(VirtIODevice *vdev)
3433 {
3434     qemu_del_vm_change_state_handler(vdev->vmstate);
3435 }
3436 
virtio_vmstate_change(void * opaque,bool running,RunState state)3437 static int virtio_vmstate_change(void *opaque, bool running, RunState state)
3438 {
3439     VirtIODevice *vdev = opaque;
3440     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3441     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3442     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3443     vdev->vm_running = running;
3444 
3445     if (backend_run) {
3446         virtio_set_status(vdev, vdev->status);
3447     }
3448 
3449     if (k->vmstate_change) {
3450         k->vmstate_change(qbus->parent, backend_run);
3451     }
3452 
3453     if (!backend_run) {
3454         int ret = virtio_set_status(vdev, vdev->status);
3455         if (ret) {
3456             return ret;
3457         }
3458     }
3459     return 0;
3460 }
3461 
virtio_instance_init_common(Object * proxy_obj,void * data,size_t vdev_size,const char * vdev_name)3462 void virtio_instance_init_common(Object *proxy_obj, void *data,
3463                                  size_t vdev_size, const char *vdev_name)
3464 {
3465     DeviceState *vdev = data;
3466 
3467     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3468                                        vdev_size, vdev_name, &error_abort,
3469                                        NULL);
3470     qdev_alias_all_properties(vdev, proxy_obj);
3471 }
3472 
virtio_init(VirtIODevice * vdev,uint16_t device_id,size_t config_size)3473 void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
3474 {
3475     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3476     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3477     int i;
3478     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3479 
3480     if (nvectors) {
3481         vdev->vector_queues =
3482             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3483     }
3484 
3485     vdev->start_on_kick = false;
3486     vdev->started = false;
3487     vdev->vhost_started = false;
3488     vdev->device_id = device_id;
3489     vdev->status = 0;
3490     qatomic_set(&vdev->isr, 0);
3491     vdev->queue_sel = 0;
3492     vdev->config_vector = VIRTIO_NO_VECTOR;
3493     vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
3494     vdev->vm_running = runstate_is_running();
3495     vdev->broken = false;
3496     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3497         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3498         vdev->vq[i].vdev = vdev;
3499         vdev->vq[i].queue_index = i;
3500         vdev->vq[i].host_notifier_enabled = false;
3501     }
3502 
3503     vdev->name = virtio_id_to_name(device_id);
3504     vdev->config_len = config_size;
3505     if (vdev->config_len) {
3506         vdev->config = g_malloc0(config_size);
3507     } else {
3508         vdev->config = NULL;
3509     }
3510     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3511             NULL, virtio_vmstate_change, vdev);
3512     vdev->device_endian = virtio_default_endian();
3513     vdev->use_guest_notifier_mask = true;
3514 }
3515 
3516 /*
3517  * Only devices that have already been around prior to defining the virtio
3518  * standard support legacy mode; this includes devices not specified in the
3519  * standard. All newer devices conform to the virtio standard only.
3520  */
virtio_legacy_allowed(VirtIODevice * vdev)3521 bool virtio_legacy_allowed(VirtIODevice *vdev)
3522 {
3523     switch (vdev->device_id) {
3524     case VIRTIO_ID_NET:
3525     case VIRTIO_ID_BLOCK:
3526     case VIRTIO_ID_CONSOLE:
3527     case VIRTIO_ID_RNG:
3528     case VIRTIO_ID_BALLOON:
3529     case VIRTIO_ID_RPMSG:
3530     case VIRTIO_ID_SCSI:
3531     case VIRTIO_ID_9P:
3532     case VIRTIO_ID_RPROC_SERIAL:
3533     case VIRTIO_ID_CAIF:
3534         return true;
3535     default:
3536         return false;
3537     }
3538 }
3539 
virtio_legacy_check_disabled(VirtIODevice * vdev)3540 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3541 {
3542     return vdev->disable_legacy_check;
3543 }
3544 
virtio_queue_get_desc_addr(VirtIODevice * vdev,int n)3545 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3546 {
3547     return vdev->vq[n].vring.desc;
3548 }
3549 
virtio_queue_enabled_legacy(VirtIODevice * vdev,int n)3550 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3551 {
3552     return virtio_queue_get_desc_addr(vdev, n) != 0;
3553 }
3554 
virtio_queue_enabled(VirtIODevice * vdev,int n)3555 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3556 {
3557     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3558     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3559 
3560     if (k->queue_enabled) {
3561         return k->queue_enabled(qbus->parent, n);
3562     }
3563     return virtio_queue_enabled_legacy(vdev, n);
3564 }
3565 
virtio_queue_get_avail_addr(VirtIODevice * vdev,int n)3566 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3567 {
3568     return vdev->vq[n].vring.avail;
3569 }
3570 
virtio_queue_get_used_addr(VirtIODevice * vdev,int n)3571 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3572 {
3573     return vdev->vq[n].vring.used;
3574 }
3575 
virtio_queue_get_desc_size(VirtIODevice * vdev,int n)3576 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3577 {
3578     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3579 }
3580 
virtio_queue_get_avail_size(VirtIODevice * vdev,int n)3581 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3582 {
3583     int s;
3584 
3585     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3586         return sizeof(struct VRingPackedDescEvent);
3587     }
3588 
3589     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3590     return offsetof(VRingAvail, ring) +
3591         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3592 }
3593 
virtio_queue_get_used_size(VirtIODevice * vdev,int n)3594 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3595 {
3596     int s;
3597 
3598     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3599         return sizeof(struct VRingPackedDescEvent);
3600     }
3601 
3602     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3603     return offsetof(VRingUsed, ring) +
3604         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3605 }
3606 
virtio_queue_packed_get_last_avail_idx(VirtIODevice * vdev,int n)3607 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3608                                                            int n)
3609 {
3610     unsigned int avail, used;
3611 
3612     avail = vdev->vq[n].last_avail_idx;
3613     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3614 
3615     used = vdev->vq[n].used_idx;
3616     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3617 
3618     return avail | used << 16;
3619 }
3620 
virtio_queue_split_get_last_avail_idx(VirtIODevice * vdev,int n)3621 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3622                                                       int n)
3623 {
3624     return vdev->vq[n].last_avail_idx;
3625 }
3626 
virtio_queue_get_last_avail_idx(VirtIODevice * vdev,int n)3627 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3628 {
3629     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3630         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3631     } else {
3632         return virtio_queue_split_get_last_avail_idx(vdev, n);
3633     }
3634 }
3635 
virtio_queue_packed_set_last_avail_idx(VirtIODevice * vdev,int n,unsigned int idx)3636 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3637                                                    int n, unsigned int idx)
3638 {
3639     struct VirtQueue *vq = &vdev->vq[n];
3640 
3641     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3642     vq->last_avail_wrap_counter =
3643         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3644     idx >>= 16;
3645     vq->used_idx = idx & 0x7fff;
3646     vq->used_wrap_counter = !!(idx & 0x8000);
3647 }
3648 
virtio_queue_split_set_last_avail_idx(VirtIODevice * vdev,int n,unsigned int idx)3649 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3650                                                   int n, unsigned int idx)
3651 {
3652         vdev->vq[n].last_avail_idx = idx;
3653         vdev->vq[n].shadow_avail_idx = idx;
3654 }
3655 
virtio_queue_set_last_avail_idx(VirtIODevice * vdev,int n,unsigned int idx)3656 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3657                                      unsigned int idx)
3658 {
3659     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3660         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3661     } else {
3662         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3663     }
3664 }
3665 
virtio_queue_packed_restore_last_avail_idx(VirtIODevice * vdev,int n)3666 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3667                                                        int n)
3668 {
3669     /* We don't have a reference like avail idx in shared memory */
3670 }
3671 
virtio_queue_split_restore_last_avail_idx(VirtIODevice * vdev,int n)3672 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3673                                                       int n)
3674 {
3675     RCU_READ_LOCK_GUARD();
3676     if (vdev->vq[n].vring.desc) {
3677         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3678         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3679     }
3680 }
3681 
virtio_queue_restore_last_avail_idx(VirtIODevice * vdev,int n)3682 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3683 {
3684     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3685         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3686     } else {
3687         virtio_queue_split_restore_last_avail_idx(vdev, n);
3688     }
3689 }
3690 
virtio_queue_packed_update_used_idx(VirtIODevice * vdev,int n)3691 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3692 {
3693     /* used idx was updated through set_last_avail_idx() */
3694 }
3695 
virtio_queue_split_update_used_idx(VirtIODevice * vdev,int n)3696 static void virtio_queue_split_update_used_idx(VirtIODevice *vdev, int n)
3697 {
3698     RCU_READ_LOCK_GUARD();
3699     if (vdev->vq[n].vring.desc) {
3700         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3701     }
3702 }
3703 
virtio_queue_update_used_idx(VirtIODevice * vdev,int n)3704 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3705 {
3706     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3707         return virtio_queue_packed_update_used_idx(vdev, n);
3708     } else {
3709         return virtio_queue_split_update_used_idx(vdev, n);
3710     }
3711 }
3712 
virtio_queue_invalidate_signalled_used(VirtIODevice * vdev,int n)3713 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3714 {
3715     vdev->vq[n].signalled_used_valid = false;
3716 }
3717 
virtio_get_queue(VirtIODevice * vdev,int n)3718 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3719 {
3720     return vdev->vq + n;
3721 }
3722 
virtio_get_queue_index(VirtQueue * vq)3723 uint16_t virtio_get_queue_index(VirtQueue *vq)
3724 {
3725     return vq->queue_index;
3726 }
3727 
virtio_queue_guest_notifier_read(EventNotifier * n)3728 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3729 {
3730     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3731     if (event_notifier_test_and_clear(n)) {
3732         virtio_irq(vq);
3733     }
3734 }
virtio_config_guest_notifier_read(EventNotifier * n)3735 static void virtio_config_guest_notifier_read(EventNotifier *n)
3736 {
3737     VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
3738 
3739     if (event_notifier_test_and_clear(n)) {
3740         virtio_notify_config(vdev);
3741     }
3742 }
virtio_queue_set_guest_notifier_fd_handler(VirtQueue * vq,bool assign,bool with_irqfd)3743 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3744                                                 bool with_irqfd)
3745 {
3746     if (assign && !with_irqfd) {
3747         event_notifier_set_handler(&vq->guest_notifier,
3748                                    virtio_queue_guest_notifier_read);
3749     } else {
3750         event_notifier_set_handler(&vq->guest_notifier, NULL);
3751     }
3752     if (!assign) {
3753         /* Test and clear notifier before closing it,
3754          * in case poll callback didn't have time to run. */
3755         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3756     }
3757 }
3758 
virtio_config_set_guest_notifier_fd_handler(VirtIODevice * vdev,bool assign,bool with_irqfd)3759 void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
3760                                                  bool assign, bool with_irqfd)
3761 {
3762     EventNotifier *n;
3763     n = &vdev->config_notifier;
3764     if (assign && !with_irqfd) {
3765         event_notifier_set_handler(n, virtio_config_guest_notifier_read);
3766     } else {
3767         event_notifier_set_handler(n, NULL);
3768     }
3769     if (!assign) {
3770         /* Test and clear notifier before closing it,*/
3771         /* in case poll callback didn't have time to run. */
3772         virtio_config_guest_notifier_read(n);
3773     }
3774 }
3775 
virtio_queue_get_guest_notifier(VirtQueue * vq)3776 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3777 {
3778     return &vq->guest_notifier;
3779 }
3780 
virtio_queue_host_notifier_aio_poll_begin(EventNotifier * n)3781 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3782 {
3783     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3784 
3785     virtio_queue_set_notification(vq, 0);
3786 }
3787 
virtio_queue_host_notifier_aio_poll(void * opaque)3788 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3789 {
3790     EventNotifier *n = opaque;
3791     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3792 
3793     return vq->vring.desc && !virtio_queue_empty(vq);
3794 }
3795 
virtio_queue_host_notifier_aio_poll_ready(EventNotifier * n)3796 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
3797 {
3798     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3799 
3800     virtio_queue_notify_vq(vq);
3801 }
3802 
virtio_queue_host_notifier_aio_poll_end(EventNotifier * n)3803 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3804 {
3805     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3806 
3807     /* Caller polls once more after this to catch requests that race with us */
3808     virtio_queue_set_notification(vq, 1);
3809 }
3810 
virtio_queue_aio_attach_host_notifier(VirtQueue * vq,AioContext * ctx)3811 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
3812 {
3813     /*
3814      * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
3815      * Re-enable them.  (And if detach has not been used before, notifications
3816      * being enabled is still the default state while a notifier is attached;
3817      * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
3818      * notifications enabled once the polling section is left.)
3819      */
3820     if (!virtio_queue_get_notification(vq)) {
3821         virtio_queue_set_notification(vq, 1);
3822     }
3823 
3824     aio_set_event_notifier(ctx, &vq->host_notifier,
3825                            virtio_queue_host_notifier_read,
3826                            virtio_queue_host_notifier_aio_poll,
3827                            virtio_queue_host_notifier_aio_poll_ready);
3828     aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3829                                 virtio_queue_host_notifier_aio_poll_begin,
3830                                 virtio_queue_host_notifier_aio_poll_end);
3831 
3832     /*
3833      * We will have ignored notifications about new requests from the guest
3834      * while no notifiers were attached, so "kick" the virt queue to process
3835      * those requests now.
3836      */
3837     event_notifier_set(&vq->host_notifier);
3838 }
3839 
3840 /*
3841  * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3842  * this for rx virtqueues and similar cases where the virtqueue handler
3843  * function does not pop all elements. When the virtqueue is left non-empty
3844  * polling consumes CPU cycles and should not be used.
3845  */
virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue * vq,AioContext * ctx)3846 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
3847 {
3848     /* See virtio_queue_aio_attach_host_notifier() */
3849     if (!virtio_queue_get_notification(vq)) {
3850         virtio_queue_set_notification(vq, 1);
3851     }
3852 
3853     aio_set_event_notifier(ctx, &vq->host_notifier,
3854                            virtio_queue_host_notifier_read,
3855                            NULL, NULL);
3856 
3857     /*
3858      * See virtio_queue_aio_attach_host_notifier().
3859      * Note that this may be unnecessary for the type of virtqueues this
3860      * function is used for.  Still, it will not hurt to have a quick look into
3861      * whether we can/should process any of the virtqueue elements.
3862      */
3863     event_notifier_set(&vq->host_notifier);
3864 }
3865 
virtio_queue_aio_detach_host_notifier(VirtQueue * vq,AioContext * ctx)3866 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
3867 {
3868     aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
3869 
3870     /*
3871      * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
3872      * will run after io_poll_begin(), so by removing the notifier, we do not
3873      * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
3874      * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
3875      * notifications are enabled or disabled.  It does not really matter anyway;
3876      * we just removed the notifier, so we do not care about notifications until
3877      * we potentially re-attach it.  The attach_host_notifier functions will
3878      * ensure that notifications are enabled again when they are needed.
3879      */
3880 }
3881 
virtio_queue_host_notifier_read(EventNotifier * n)3882 void virtio_queue_host_notifier_read(EventNotifier *n)
3883 {
3884     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3885     if (event_notifier_test_and_clear(n)) {
3886         virtio_queue_notify_vq(vq);
3887     }
3888 }
3889 
virtio_queue_get_host_notifier(VirtQueue * vq)3890 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3891 {
3892     return &vq->host_notifier;
3893 }
3894 
virtio_config_get_guest_notifier(VirtIODevice * vdev)3895 EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
3896 {
3897     return &vdev->config_notifier;
3898 }
3899 
virtio_queue_set_host_notifier_enabled(VirtQueue * vq,bool enabled)3900 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3901 {
3902     vq->host_notifier_enabled = enabled;
3903 }
3904 
virtio_queue_set_host_notifier_mr(VirtIODevice * vdev,int n,MemoryRegion * mr,bool assign)3905 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3906                                       MemoryRegion *mr, bool assign)
3907 {
3908     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3909     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3910 
3911     if (k->set_host_notifier_mr) {
3912         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3913     }
3914 
3915     return -1;
3916 }
3917 
virtio_device_set_child_bus_name(VirtIODevice * vdev,char * bus_name)3918 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3919 {
3920     g_free(vdev->bus_name);
3921     vdev->bus_name = g_strdup(bus_name);
3922 }
3923 
virtio_error(VirtIODevice * vdev,const char * fmt,...)3924 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3925 {
3926     va_list ap;
3927 
3928     va_start(ap, fmt);
3929     error_vreport(fmt, ap);
3930     va_end(ap);
3931 
3932     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3933         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3934         virtio_notify_config(vdev);
3935     }
3936 
3937     vdev->broken = true;
3938 }
3939 
virtio_memory_listener_commit(MemoryListener * listener)3940 static void virtio_memory_listener_commit(MemoryListener *listener)
3941 {
3942     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3943     int i;
3944 
3945     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3946         if (vdev->vq[i].vring.num == 0) {
3947             break;
3948         }
3949         virtio_init_region_cache(vdev, i);
3950     }
3951 }
3952 
virtio_device_realize(DeviceState * dev,Error ** errp)3953 static void virtio_device_realize(DeviceState *dev, Error **errp)
3954 {
3955     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3956     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3957     Error *err = NULL;
3958 
3959     /* Devices should either use vmsd or the load/save methods */
3960     assert(!vdc->vmsd || !vdc->load);
3961 
3962     if (vdc->realize != NULL) {
3963         vdc->realize(dev, &err);
3964         if (err != NULL) {
3965             error_propagate(errp, err);
3966             return;
3967         }
3968     }
3969 
3970     /* Devices should not use both ioeventfd and notification data feature */
3971     virtio_device_check_notification_compatibility(vdev, &err);
3972     if (err != NULL) {
3973         error_propagate(errp, err);
3974         vdc->unrealize(dev);
3975         return;
3976     }
3977 
3978     virtio_bus_device_plugged(vdev, &err);
3979     if (err != NULL) {
3980         error_propagate(errp, err);
3981         vdc->unrealize(dev);
3982         return;
3983     }
3984 
3985     vdev->listener.commit = virtio_memory_listener_commit;
3986     vdev->listener.name = "virtio";
3987     memory_listener_register(&vdev->listener, vdev->dma_as);
3988 }
3989 
virtio_device_unrealize(DeviceState * dev)3990 static void virtio_device_unrealize(DeviceState *dev)
3991 {
3992     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3993     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3994 
3995     memory_listener_unregister(&vdev->listener);
3996     virtio_bus_device_unplugged(vdev);
3997 
3998     if (vdc->unrealize != NULL) {
3999         vdc->unrealize(dev);
4000     }
4001 
4002     g_free(vdev->bus_name);
4003     vdev->bus_name = NULL;
4004 }
4005 
virtio_device_free_virtqueues(VirtIODevice * vdev)4006 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
4007 {
4008     int i;
4009     if (!vdev->vq) {
4010         return;
4011     }
4012 
4013     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
4014         if (vdev->vq[i].vring.num == 0) {
4015             break;
4016         }
4017         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
4018     }
4019     g_free(vdev->vq);
4020 }
4021 
virtio_device_instance_finalize(Object * obj)4022 static void virtio_device_instance_finalize(Object *obj)
4023 {
4024     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
4025 
4026     virtio_device_free_virtqueues(vdev);
4027 
4028     g_free(vdev->config);
4029     g_free(vdev->vector_queues);
4030 }
4031 
4032 static const Property virtio_properties[] = {
4033     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
4034     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
4035     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
4036     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
4037                      disable_legacy_check, false),
4038 };
4039 
virtio_device_start_ioeventfd_impl(VirtIODevice * vdev)4040 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
4041 {
4042     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4043     int i, n, r, err;
4044 
4045     /*
4046      * Batch all the host notifiers in a single transaction to avoid
4047      * quadratic time complexity in address_space_update_ioeventfds().
4048      */
4049     memory_region_transaction_begin();
4050     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4051         VirtQueue *vq = &vdev->vq[n];
4052         if (!virtio_queue_get_num(vdev, n)) {
4053             continue;
4054         }
4055         r = virtio_bus_set_host_notifier(qbus, n, true);
4056         if (r < 0) {
4057             err = r;
4058             goto assign_error;
4059         }
4060         event_notifier_set_handler(&vq->host_notifier,
4061                                    virtio_queue_host_notifier_read);
4062     }
4063 
4064     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4065         /* Kick right away to begin processing requests already in vring */
4066         VirtQueue *vq = &vdev->vq[n];
4067         if (!vq->vring.num) {
4068             continue;
4069         }
4070         event_notifier_set(&vq->host_notifier);
4071     }
4072     memory_region_transaction_commit();
4073     return 0;
4074 
4075 assign_error:
4076     i = n; /* save n for a second iteration after transaction is committed. */
4077     while (--n >= 0) {
4078         VirtQueue *vq = &vdev->vq[n];
4079         if (!virtio_queue_get_num(vdev, n)) {
4080             continue;
4081         }
4082 
4083         event_notifier_set_handler(&vq->host_notifier, NULL);
4084         r = virtio_bus_set_host_notifier(qbus, n, false);
4085         assert(r >= 0);
4086     }
4087     /*
4088      * The transaction expects the ioeventfds to be open when it
4089      * commits. Do it now, before the cleanup loop.
4090      */
4091     memory_region_transaction_commit();
4092 
4093     while (--i >= 0) {
4094         if (!virtio_queue_get_num(vdev, i)) {
4095             continue;
4096         }
4097         virtio_bus_cleanup_host_notifier(qbus, i);
4098     }
4099     return err;
4100 }
4101 
virtio_device_start_ioeventfd(VirtIODevice * vdev)4102 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
4103 {
4104     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4105     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4106 
4107     return virtio_bus_start_ioeventfd(vbus);
4108 }
4109 
virtio_device_stop_ioeventfd_impl(VirtIODevice * vdev)4110 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
4111 {
4112     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
4113     int n, r;
4114 
4115     /*
4116      * Batch all the host notifiers in a single transaction to avoid
4117      * quadratic time complexity in address_space_update_ioeventfds().
4118      */
4119     memory_region_transaction_begin();
4120     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4121         VirtQueue *vq = &vdev->vq[n];
4122 
4123         if (!virtio_queue_get_num(vdev, n)) {
4124             continue;
4125         }
4126         event_notifier_set_handler(&vq->host_notifier, NULL);
4127         r = virtio_bus_set_host_notifier(qbus, n, false);
4128         assert(r >= 0);
4129     }
4130     /*
4131      * The transaction expects the ioeventfds to be open when it
4132      * commits. Do it now, before the cleanup loop.
4133      */
4134     memory_region_transaction_commit();
4135 
4136     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
4137         if (!virtio_queue_get_num(vdev, n)) {
4138             continue;
4139         }
4140         virtio_bus_cleanup_host_notifier(qbus, n);
4141     }
4142 }
4143 
virtio_device_grab_ioeventfd(VirtIODevice * vdev)4144 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
4145 {
4146     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4147     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4148 
4149     return virtio_bus_grab_ioeventfd(vbus);
4150 }
4151 
virtio_device_release_ioeventfd(VirtIODevice * vdev)4152 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
4153 {
4154     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4155     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4156 
4157     virtio_bus_release_ioeventfd(vbus);
4158 }
4159 
virtio_device_class_init(ObjectClass * klass,const void * data)4160 static void virtio_device_class_init(ObjectClass *klass, const void *data)
4161 {
4162     /* Set the default value here. */
4163     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4164     DeviceClass *dc = DEVICE_CLASS(klass);
4165 
4166     dc->realize = virtio_device_realize;
4167     dc->unrealize = virtio_device_unrealize;
4168     dc->bus_type = TYPE_VIRTIO_BUS;
4169     device_class_set_props(dc, virtio_properties);
4170     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
4171     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
4172 
4173     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
4174 }
4175 
virtio_device_ioeventfd_enabled(VirtIODevice * vdev)4176 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
4177 {
4178     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
4179     VirtioBusState *vbus = VIRTIO_BUS(qbus);
4180 
4181     return virtio_bus_ioeventfd_enabled(vbus);
4182 }
4183 
qmp_x_query_virtio_queue_status(const char * path,uint16_t queue,Error ** errp)4184 VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
4185                                                  uint16_t queue,
4186                                                  Error **errp)
4187 {
4188     VirtIODevice *vdev;
4189     VirtQueueStatus *status;
4190 
4191     vdev = qmp_find_virtio_device(path);
4192     if (vdev == NULL) {
4193         error_setg(errp, "Path %s is not a VirtIODevice", path);
4194         return NULL;
4195     }
4196 
4197     if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4198         error_setg(errp, "Invalid virtqueue number %d", queue);
4199         return NULL;
4200     }
4201 
4202     status = g_new0(VirtQueueStatus, 1);
4203     status->name = g_strdup(vdev->name);
4204     status->queue_index = vdev->vq[queue].queue_index;
4205     status->inuse = vdev->vq[queue].inuse;
4206     status->vring_num = vdev->vq[queue].vring.num;
4207     status->vring_num_default = vdev->vq[queue].vring.num_default;
4208     status->vring_align = vdev->vq[queue].vring.align;
4209     status->vring_desc = vdev->vq[queue].vring.desc;
4210     status->vring_avail = vdev->vq[queue].vring.avail;
4211     status->vring_used = vdev->vq[queue].vring.used;
4212     status->used_idx = vdev->vq[queue].used_idx;
4213     status->signalled_used = vdev->vq[queue].signalled_used;
4214     status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
4215 
4216     if (vdev->vhost_started) {
4217         VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
4218         struct vhost_dev *hdev = vdc->get_vhost(vdev);
4219 
4220         /* check if vq index exists for vhost as well  */
4221         if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
4222             status->has_last_avail_idx = true;
4223 
4224             int vhost_vq_index =
4225                 hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
4226             struct vhost_vring_state state = {
4227                 .index = vhost_vq_index,
4228             };
4229 
4230             status->last_avail_idx =
4231                 hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
4232         }
4233     } else {
4234         status->has_shadow_avail_idx = true;
4235         status->has_last_avail_idx = true;
4236         status->last_avail_idx = vdev->vq[queue].last_avail_idx;
4237         status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
4238     }
4239 
4240     return status;
4241 }
4242 
qmp_decode_vring_desc_flags(uint16_t flags)4243 static strList *qmp_decode_vring_desc_flags(uint16_t flags)
4244 {
4245     strList *list = NULL;
4246     strList *node;
4247     int i;
4248 
4249     struct {
4250         uint16_t flag;
4251         const char *value;
4252     } map[] = {
4253         { VRING_DESC_F_NEXT, "next" },
4254         { VRING_DESC_F_WRITE, "write" },
4255         { VRING_DESC_F_INDIRECT, "indirect" },
4256         { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
4257         { 1 << VRING_PACKED_DESC_F_USED, "used" },
4258         { 0, "" }
4259     };
4260 
4261     for (i = 0; map[i].flag; i++) {
4262         if ((map[i].flag & flags) == 0) {
4263             continue;
4264         }
4265         node = g_malloc0(sizeof(strList));
4266         node->value = g_strdup(map[i].value);
4267         node->next = list;
4268         list = node;
4269     }
4270 
4271     return list;
4272 }
4273 
qmp_x_query_virtio_queue_element(const char * path,uint16_t queue,bool has_index,uint16_t index,Error ** errp)4274 VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
4275                                                      uint16_t queue,
4276                                                      bool has_index,
4277                                                      uint16_t index,
4278                                                      Error **errp)
4279 {
4280     VirtIODevice *vdev;
4281     VirtQueue *vq;
4282     VirtioQueueElement *element = NULL;
4283 
4284     vdev = qmp_find_virtio_device(path);
4285     if (vdev == NULL) {
4286         error_setg(errp, "Path %s is not a VirtIO device", path);
4287         return NULL;
4288     }
4289 
4290     if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
4291         error_setg(errp, "Invalid virtqueue number %d", queue);
4292         return NULL;
4293     }
4294     vq = &vdev->vq[queue];
4295 
4296     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
4297         error_setg(errp, "Packed ring not supported");
4298         return NULL;
4299     } else {
4300         unsigned int head, i, max;
4301         VRingMemoryRegionCaches *caches;
4302         MemoryRegionCache indirect_desc_cache;
4303         MemoryRegionCache *desc_cache;
4304         VRingDesc desc;
4305         VirtioRingDescList *list = NULL;
4306         VirtioRingDescList *node;
4307         int rc; int ndescs;
4308 
4309         address_space_cache_init_empty(&indirect_desc_cache);
4310 
4311         RCU_READ_LOCK_GUARD();
4312 
4313         max = vq->vring.num;
4314 
4315         if (!has_index) {
4316             head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
4317         } else {
4318             head = vring_avail_ring(vq, index % vq->vring.num);
4319         }
4320         i = head;
4321 
4322         caches = vring_get_region_caches(vq);
4323         if (!caches) {
4324             error_setg(errp, "Region caches not initialized");
4325             return NULL;
4326         }
4327         if (caches->desc.len < max * sizeof(VRingDesc)) {
4328             error_setg(errp, "Cannot map descriptor ring");
4329             return NULL;
4330         }
4331 
4332         desc_cache = &caches->desc;
4333         vring_split_desc_read(vdev, &desc, desc_cache, i);
4334         if (desc.flags & VRING_DESC_F_INDIRECT) {
4335             int64_t len;
4336             len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
4337                                            desc.addr, desc.len, false);
4338             desc_cache = &indirect_desc_cache;
4339             if (len < desc.len) {
4340                 error_setg(errp, "Cannot map indirect buffer");
4341                 goto done;
4342             }
4343 
4344             max = desc.len / sizeof(VRingDesc);
4345             i = 0;
4346             vring_split_desc_read(vdev, &desc, desc_cache, i);
4347         }
4348 
4349         element = g_new0(VirtioQueueElement, 1);
4350         element->avail = g_new0(VirtioRingAvail, 1);
4351         element->used = g_new0(VirtioRingUsed, 1);
4352         element->name = g_strdup(vdev->name);
4353         element->index = head;
4354         element->avail->flags = vring_avail_flags(vq);
4355         element->avail->idx = vring_avail_idx(vq);
4356         element->avail->ring = head;
4357         element->used->flags = vring_used_flags(vq);
4358         element->used->idx = vring_used_idx(vq);
4359         ndescs = 0;
4360 
4361         do {
4362             /* A buggy driver may produce an infinite loop */
4363             if (ndescs >= max) {
4364                 break;
4365             }
4366             node = g_new0(VirtioRingDescList, 1);
4367             node->value = g_new0(VirtioRingDesc, 1);
4368             node->value->addr = desc.addr;
4369             node->value->len = desc.len;
4370             node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
4371             node->next = list;
4372             list = node;
4373 
4374             ndescs++;
4375             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max);
4376         } while (rc == VIRTQUEUE_READ_DESC_MORE);
4377         element->descs = list;
4378 done:
4379         address_space_cache_destroy(&indirect_desc_cache);
4380     }
4381 
4382     return element;
4383 }
4384 
4385 static const TypeInfo virtio_device_info = {
4386     .name = TYPE_VIRTIO_DEVICE,
4387     .parent = TYPE_DEVICE,
4388     .instance_size = sizeof(VirtIODevice),
4389     .class_init = virtio_device_class_init,
4390     .instance_finalize = virtio_device_instance_finalize,
4391     .abstract = true,
4392     .class_size = sizeof(VirtioDeviceClass),
4393 };
4394 
virtio_register_types(void)4395 static void virtio_register_types(void)
4396 {
4397     type_register_static(&virtio_device_info);
4398 }
4399 
type_init(virtio_register_types)4400 type_init(virtio_register_types)
4401 
4402 QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
4403                                    QEMUBHFunc *cb, void *opaque,
4404                                    const char *name)
4405 {
4406     DeviceState *transport = qdev_get_parent_bus(dev)->parent;
4407 
4408     return qemu_bh_new_full(cb, opaque, name,
4409                             &transport->mem_reentrancy_guard);
4410 }
4411