xref: /qemu/hw/virtio/vhost-shadow-virtqueue.c (revision b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124)
1 /*
2  * vhost shadow virtqueue
3  *
4  * SPDX-FileCopyrightText: Red Hat, Inc. 2021
5  * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
6  *
7  * SPDX-License-Identifier: GPL-2.0-or-later
8  */
9 
10 #include "qemu/osdep.h"
11 #include "hw/virtio/vhost-shadow-virtqueue.h"
12 
13 #include "qemu/error-report.h"
14 #include "qapi/error.h"
15 #include "qemu/main-loop.h"
16 #include "qemu/log.h"
17 #include "qemu/memalign.h"
18 #include "linux-headers/linux/vhost.h"
19 
20 /**
21  * Validate the transport device features that both guests can use with the SVQ
22  * and SVQs can use with the device.
23  *
24  * @dev_features: The features
25  * @errp: Error pointer
26  */
vhost_svq_valid_features(uint64_t features,Error ** errp)27 bool vhost_svq_valid_features(uint64_t features, Error **errp)
28 {
29     bool ok = true;
30     uint64_t svq_features = features;
31 
32     for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
33          ++b) {
34         switch (b) {
35         case VIRTIO_F_ANY_LAYOUT:
36         case VIRTIO_RING_F_EVENT_IDX:
37             continue;
38 
39         case VIRTIO_F_ACCESS_PLATFORM:
40             /* SVQ trust in the host's IOMMU to translate addresses */
41         case VIRTIO_F_VERSION_1:
42             /* SVQ trust that the guest vring is little endian */
43             if (!(svq_features & BIT_ULL(b))) {
44                 svq_features |= BIT_ULL(b);
45                 ok = false;
46             }
47             continue;
48 
49         default:
50             if (svq_features & BIT_ULL(b)) {
51                 svq_features &= ~BIT_ULL(b);
52                 ok = false;
53             }
54         }
55     }
56 
57     if (!ok) {
58         error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
59                          ", ok: 0x%"PRIx64, features, svq_features);
60     }
61     return ok;
62 }
63 
64 /**
65  * Number of descriptors that the SVQ can make available from the guest.
66  *
67  * @svq: The svq
68  */
vhost_svq_available_slots(const VhostShadowVirtqueue * svq)69 uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
70 {
71     return svq->num_free;
72 }
73 
74 /**
75  * Translate addresses between the qemu's virtual address and the SVQ IOVA
76  *
77  * @svq: Shadow VirtQueue
78  * @vaddr: Translated IOVA addresses
79  * @iovec: Source qemu's VA addresses
80  * @num: Length of iovec and minimum length of vaddr
81  * @gpas: Descriptors' GPAs, if backed by guest memory
82  */
vhost_svq_translate_addr(const VhostShadowVirtqueue * svq,hwaddr * addrs,const struct iovec * iovec,size_t num,const hwaddr * gpas)83 static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
84                                      hwaddr *addrs, const struct iovec *iovec,
85                                      size_t num, const hwaddr *gpas)
86 {
87     if (num == 0) {
88         return true;
89     }
90 
91     for (size_t i = 0; i < num; ++i) {
92         Int128 needle_last, map_last;
93         size_t off;
94         const DMAMap *map;
95         DMAMap needle;
96 
97         /* Check if the descriptor is backed by guest memory  */
98         if (gpas) {
99             /* Search the GPA->IOVA tree */
100             needle = (DMAMap) {
101                 .translated_addr = gpas[i],
102                 .size = iovec[i].iov_len,
103             };
104             map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
105         } else {
106             /* Search the IOVA->HVA tree */
107             needle = (DMAMap) {
108                 .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
109                 .size = iovec[i].iov_len,
110             };
111             map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
112         }
113 
114         /*
115          * Map cannot be NULL since iova map contains all guest space and
116          * qemu already has a physical address mapped
117          */
118         if (unlikely(!map)) {
119             qemu_log_mask(LOG_GUEST_ERROR,
120                           "Invalid address 0x%"HWADDR_PRIx" given by guest",
121                           needle.translated_addr);
122             return false;
123         }
124 
125         off = needle.translated_addr - map->translated_addr;
126         addrs[i] = map->iova + off;
127 
128         needle_last = int128_add(int128_make64(needle.translated_addr),
129                                  int128_makes64(iovec[i].iov_len - 1));
130         map_last = int128_make64(map->translated_addr + map->size);
131         if (unlikely(int128_gt(needle_last, map_last))) {
132             qemu_log_mask(LOG_GUEST_ERROR,
133                           "Guest buffer expands over iova range");
134             return false;
135         }
136     }
137 
138     return true;
139 }
140 
141 /**
142  * Write descriptors to SVQ vring
143  *
144  * @svq: The shadow virtqueue
145  * @sg: Cache for hwaddr
146  * @iovec: The iovec from the guest
147  * @num: iovec length
148  * @addr: Descriptors' GPAs, if backed by guest memory
149  * @more_descs: True if more descriptors come in the chain
150  * @write: True if they are writeable descriptors
151  *
152  * Return true if success, false otherwise and print error.
153  */
vhost_svq_vring_write_descs(VhostShadowVirtqueue * svq,hwaddr * sg,const struct iovec * iovec,size_t num,const hwaddr * addr,bool more_descs,bool write)154 static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
155                                         const struct iovec *iovec, size_t num,
156                                         const hwaddr *addr, bool more_descs,
157                                         bool write)
158 {
159     uint16_t i = svq->free_head, last = svq->free_head;
160     unsigned n;
161     uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
162     vring_desc_t *descs = svq->vring.desc;
163     bool ok;
164 
165     if (num == 0) {
166         return true;
167     }
168 
169     ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr);
170     if (unlikely(!ok)) {
171         return false;
172     }
173 
174     for (n = 0; n < num; n++) {
175         if (more_descs || (n + 1 < num)) {
176             descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
177             descs[i].next = cpu_to_le16(svq->desc_next[i]);
178         } else {
179             descs[i].flags = flags;
180         }
181         descs[i].addr = cpu_to_le64(sg[n]);
182         descs[i].len = cpu_to_le32(iovec[n].iov_len);
183 
184         last = i;
185         i = svq->desc_next[i];
186     }
187 
188     svq->free_head = svq->desc_next[last];
189     return true;
190 }
191 
vhost_svq_add_split(VhostShadowVirtqueue * svq,const struct iovec * out_sg,size_t out_num,const hwaddr * out_addr,const struct iovec * in_sg,size_t in_num,const hwaddr * in_addr,unsigned * head)192 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
193                                 const struct iovec *out_sg, size_t out_num,
194                                 const hwaddr *out_addr,
195                                 const struct iovec *in_sg, size_t in_num,
196                                 const hwaddr *in_addr, unsigned *head)
197 {
198     unsigned avail_idx;
199     vring_avail_t *avail = svq->vring.avail;
200     bool ok;
201     g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
202 
203     *head = svq->free_head;
204 
205     /* We need some descriptors here */
206     if (unlikely(!out_num && !in_num)) {
207         qemu_log_mask(LOG_GUEST_ERROR,
208                       "Guest provided element with no descriptors");
209         return false;
210     }
211 
212     ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
213                                      in_num > 0, false);
214     if (unlikely(!ok)) {
215         return false;
216     }
217 
218     ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false,
219                                      true);
220     if (unlikely(!ok)) {
221         return false;
222     }
223 
224     /*
225      * Put the entry in the available array (but don't update avail->idx until
226      * they do sync).
227      */
228     avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
229     avail->ring[avail_idx] = cpu_to_le16(*head);
230     svq->shadow_avail_idx++;
231 
232     /* Update the avail index after write the descriptor */
233     smp_wmb();
234     avail->idx = cpu_to_le16(svq->shadow_avail_idx);
235 
236     return true;
237 }
238 
vhost_svq_kick(VhostShadowVirtqueue * svq)239 static void vhost_svq_kick(VhostShadowVirtqueue *svq)
240 {
241     bool needs_kick;
242 
243     /*
244      * We need to expose the available array entries before checking the used
245      * flags
246      */
247     smp_mb();
248 
249     if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
250         uint16_t avail_event = le16_to_cpu(
251                 *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
252         needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
253     } else {
254         needs_kick =
255                 !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
256     }
257 
258     if (!needs_kick) {
259         return;
260     }
261 
262     event_notifier_set(&svq->hdev_kick);
263 }
264 
265 /**
266  * Add an element to a SVQ.
267  *
268  * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
269  */
vhost_svq_add(VhostShadowVirtqueue * svq,const struct iovec * out_sg,size_t out_num,const hwaddr * out_addr,const struct iovec * in_sg,size_t in_num,const hwaddr * in_addr,VirtQueueElement * elem)270 int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
271                   size_t out_num, const hwaddr *out_addr,
272                   const struct iovec *in_sg, size_t in_num,
273                   const hwaddr *in_addr, VirtQueueElement *elem)
274 {
275     unsigned qemu_head;
276     unsigned ndescs = in_num + out_num;
277     bool ok;
278 
279     if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
280         return -ENOSPC;
281     }
282 
283     ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num,
284                              in_addr, &qemu_head);
285     if (unlikely(!ok)) {
286         return -EINVAL;
287     }
288 
289     svq->num_free -= ndescs;
290     svq->desc_state[qemu_head].elem = elem;
291     svq->desc_state[qemu_head].ndescs = ndescs;
292     vhost_svq_kick(svq);
293     return 0;
294 }
295 
296 /* Convenience wrapper to add a guest's element to SVQ */
vhost_svq_add_element(VhostShadowVirtqueue * svq,VirtQueueElement * elem)297 static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
298                                  VirtQueueElement *elem)
299 {
300     return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->out_addr,
301                          elem->in_sg, elem->in_num, elem->in_addr, elem);
302 }
303 
304 /**
305  * Forward available buffers.
306  *
307  * @svq: Shadow VirtQueue
308  *
309  * Note that this function does not guarantee that all guest's available
310  * buffers are available to the device in SVQ avail ring. The guest may have
311  * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
312  * qemu vaddr.
313  *
314  * If that happens, guest's kick notifications will be disabled until the
315  * device uses some buffers.
316  */
vhost_handle_guest_kick(VhostShadowVirtqueue * svq)317 static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
318 {
319     /* Clear event notifier */
320     event_notifier_test_and_clear(&svq->svq_kick);
321 
322     /* Forward to the device as many available buffers as possible */
323     do {
324         virtio_queue_set_notification(svq->vq, false);
325 
326         while (true) {
327             g_autofree VirtQueueElement *elem = NULL;
328             int r;
329 
330             if (svq->next_guest_avail_elem) {
331                 elem = g_steal_pointer(&svq->next_guest_avail_elem);
332             } else {
333                 elem = virtqueue_pop(svq->vq, sizeof(*elem));
334             }
335 
336             if (!elem) {
337                 break;
338             }
339 
340             if (svq->ops) {
341                 r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
342             } else {
343                 r = vhost_svq_add_element(svq, elem);
344             }
345             if (unlikely(r != 0)) {
346                 if (r == -ENOSPC) {
347                     /*
348                      * This condition is possible since a contiguous buffer in
349                      * GPA does not imply a contiguous buffer in qemu's VA
350                      * scatter-gather segments. If that happens, the buffer
351                      * exposed to the device needs to be a chain of descriptors
352                      * at this moment.
353                      *
354                      * SVQ cannot hold more available buffers if we are here:
355                      * queue the current guest descriptor and ignore kicks
356                      * until some elements are used.
357                      */
358                     svq->next_guest_avail_elem = g_steal_pointer(&elem);
359                 }
360 
361                 /* VQ is full or broken, just return and ignore kicks */
362                 return;
363             }
364             /* elem belongs to SVQ or external caller now */
365             elem = NULL;
366         }
367 
368         virtio_queue_set_notification(svq->vq, true);
369     } while (!virtio_queue_empty(svq->vq));
370 }
371 
372 /**
373  * Handle guest's kick.
374  *
375  * @n: guest kick event notifier, the one that guest set to notify svq.
376  */
vhost_handle_guest_kick_notifier(EventNotifier * n)377 static void vhost_handle_guest_kick_notifier(EventNotifier *n)
378 {
379     VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
380     event_notifier_test_and_clear(n);
381     vhost_handle_guest_kick(svq);
382 }
383 
vhost_svq_more_used(VhostShadowVirtqueue * svq)384 static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
385 {
386     uint16_t *used_idx = &svq->vring.used->idx;
387     if (svq->last_used_idx != svq->shadow_used_idx) {
388         return true;
389     }
390 
391     svq->shadow_used_idx = le16_to_cpu(*(volatile uint16_t *)used_idx);
392 
393     return svq->last_used_idx != svq->shadow_used_idx;
394 }
395 
396 /**
397  * Enable vhost device calls after disable them.
398  *
399  * @svq: The svq
400  *
401  * It returns false if there are pending used buffers from the vhost device,
402  * avoiding the possible races between SVQ checking for more work and enabling
403  * callbacks. True if SVQ used vring has no more pending buffers.
404  */
vhost_svq_enable_notification(VhostShadowVirtqueue * svq)405 static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
406 {
407     if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
408         uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num];
409         *used_event = cpu_to_le16(svq->shadow_used_idx);
410     } else {
411         svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
412     }
413 
414     /* Make sure the event is enabled before the read of used_idx */
415     smp_mb();
416     return !vhost_svq_more_used(svq);
417 }
418 
vhost_svq_disable_notification(VhostShadowVirtqueue * svq)419 static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
420 {
421     /*
422      * No need to disable notification in the event idx case, since used event
423      * index is already an index too far away.
424      */
425     if (!virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
426         svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
427     }
428 }
429 
vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue * svq,uint16_t num,uint16_t i)430 static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
431                                              uint16_t num, uint16_t i)
432 {
433     for (uint16_t j = 0; j < (num - 1); ++j) {
434         i = svq->desc_next[i];
435     }
436 
437     return i;
438 }
439 
440 G_GNUC_WARN_UNUSED_RESULT
vhost_svq_get_buf(VhostShadowVirtqueue * svq,uint32_t * len)441 static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
442                                            uint32_t *len)
443 {
444     const vring_used_t *used = svq->vring.used;
445     vring_used_elem_t used_elem;
446     uint16_t last_used, last_used_chain, num;
447 
448     if (!vhost_svq_more_used(svq)) {
449         return NULL;
450     }
451 
452     /* Only get used array entries after they have been exposed by dev */
453     smp_rmb();
454     last_used = svq->last_used_idx & (svq->vring.num - 1);
455     used_elem.id = le32_to_cpu(used->ring[last_used].id);
456     used_elem.len = le32_to_cpu(used->ring[last_used].len);
457 
458     svq->last_used_idx++;
459     if (unlikely(used_elem.id >= svq->vring.num)) {
460         qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
461                       svq->vdev->name, used_elem.id);
462         return NULL;
463     }
464 
465     if (unlikely(!svq->desc_state[used_elem.id].ndescs)) {
466         qemu_log_mask(LOG_GUEST_ERROR,
467             "Device %s says index %u is used, but it was not available",
468             svq->vdev->name, used_elem.id);
469         return NULL;
470     }
471 
472     num = svq->desc_state[used_elem.id].ndescs;
473     svq->desc_state[used_elem.id].ndescs = 0;
474     last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
475     svq->desc_next[last_used_chain] = svq->free_head;
476     svq->free_head = used_elem.id;
477     svq->num_free += num;
478 
479     *len = used_elem.len;
480     return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
481 }
482 
483 /**
484  * Push an element to SVQ, returning it to the guest.
485  */
vhost_svq_push_elem(VhostShadowVirtqueue * svq,const VirtQueueElement * elem,uint32_t len)486 void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
487                          const VirtQueueElement *elem, uint32_t len)
488 {
489     virtqueue_push(svq->vq, elem, len);
490     if (svq->next_guest_avail_elem) {
491         /*
492          * Avail ring was full when vhost_svq_flush was called, so it's a
493          * good moment to make more descriptors available if possible.
494          */
495         vhost_handle_guest_kick(svq);
496     }
497 }
498 
vhost_svq_flush(VhostShadowVirtqueue * svq,bool check_for_avail_queue)499 static void vhost_svq_flush(VhostShadowVirtqueue *svq,
500                             bool check_for_avail_queue)
501 {
502     VirtQueue *vq = svq->vq;
503 
504     /* Forward as many used buffers as possible. */
505     do {
506         unsigned i = 0;
507 
508         vhost_svq_disable_notification(svq);
509         while (true) {
510             uint32_t len;
511             g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
512             if (!elem) {
513                 break;
514             }
515 
516             if (unlikely(i >= svq->vring.num)) {
517                 qemu_log_mask(LOG_GUEST_ERROR,
518                          "More than %u used buffers obtained in a %u size SVQ",
519                          i, svq->vring.num);
520                 virtqueue_fill(vq, elem, len, i);
521                 virtqueue_flush(vq, i);
522                 return;
523             }
524             virtqueue_fill(vq, elem, len, i++);
525         }
526 
527         virtqueue_flush(vq, i);
528         event_notifier_set(&svq->svq_call);
529 
530         if (check_for_avail_queue && svq->next_guest_avail_elem) {
531             /*
532              * Avail ring was full when vhost_svq_flush was called, so it's a
533              * good moment to make more descriptors available if possible.
534              */
535             vhost_handle_guest_kick(svq);
536         }
537     } while (!vhost_svq_enable_notification(svq));
538 }
539 
540 /**
541  * Poll the SVQ to wait for the device to use the specified number
542  * of elements and return the total length written by the device.
543  *
544  * This function race with main event loop SVQ polling, so extra
545  * synchronization is needed.
546  *
547  * @svq: The svq
548  * @num: The number of elements that need to be used
549  */
vhost_svq_poll(VhostShadowVirtqueue * svq,size_t num)550 size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num)
551 {
552     size_t len = 0;
553 
554     while (num--) {
555         g_autofree VirtQueueElement *elem = NULL;
556         int64_t start_us = g_get_monotonic_time();
557         uint32_t r = 0;
558 
559         do {
560             if (vhost_svq_more_used(svq)) {
561                 break;
562             }
563 
564             if (unlikely(g_get_monotonic_time() - start_us > 10e6)) {
565                 return len;
566             }
567         } while (true);
568 
569         elem = vhost_svq_get_buf(svq, &r);
570         len += r;
571     }
572 
573     return len;
574 }
575 
576 /**
577  * Forward used buffers.
578  *
579  * @n: hdev call event notifier, the one that device set to notify svq.
580  *
581  * Note that we are not making any buffers available in the loop, there is no
582  * way that it runs more than virtqueue size times.
583  */
vhost_svq_handle_call(EventNotifier * n)584 static void vhost_svq_handle_call(EventNotifier *n)
585 {
586     VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
587                                              hdev_call);
588     event_notifier_test_and_clear(n);
589     vhost_svq_flush(svq, true);
590 }
591 
592 /**
593  * Set the call notifier for the SVQ to call the guest
594  *
595  * @svq: Shadow virtqueue
596  * @call_fd: call notifier
597  *
598  * Called on BQL context.
599  */
vhost_svq_set_svq_call_fd(VhostShadowVirtqueue * svq,int call_fd)600 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
601 {
602     if (call_fd == VHOST_FILE_UNBIND) {
603         /*
604          * Fail event_notifier_set if called handling device call.
605          *
606          * SVQ still needs device notifications, since it needs to keep
607          * forwarding used buffers even with the unbind.
608          */
609         memset(&svq->svq_call, 0, sizeof(svq->svq_call));
610     } else {
611         event_notifier_init_fd(&svq->svq_call, call_fd);
612     }
613 }
614 
615 /**
616  * Get the shadow vq vring address.
617  * @svq: Shadow virtqueue
618  * @addr: Destination to store address
619  */
vhost_svq_get_vring_addr(const VhostShadowVirtqueue * svq,struct vhost_vring_addr * addr)620 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
621                               struct vhost_vring_addr *addr)
622 {
623     addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
624     addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
625     addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
626 }
627 
vhost_svq_driver_area_size(const VhostShadowVirtqueue * svq)628 size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
629 {
630     size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
631     size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) +
632                                                               sizeof(uint16_t);
633 
634     return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size());
635 }
636 
vhost_svq_device_area_size(const VhostShadowVirtqueue * svq)637 size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
638 {
639     size_t used_size = offsetof(vring_used_t, ring[svq->vring.num]) +
640                                                               sizeof(uint16_t);
641     return ROUND_UP(used_size, qemu_real_host_page_size());
642 }
643 
644 /**
645  * Set a new file descriptor for the guest to kick the SVQ and notify for avail
646  *
647  * @svq: The svq
648  * @svq_kick_fd: The svq kick fd
649  *
650  * Note that the SVQ will never close the old file descriptor.
651  */
vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue * svq,int svq_kick_fd)652 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
653 {
654     EventNotifier *svq_kick = &svq->svq_kick;
655     bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
656     bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
657 
658     if (poll_stop) {
659         event_notifier_set_handler(svq_kick, NULL);
660     }
661 
662     event_notifier_init_fd(svq_kick, svq_kick_fd);
663     /*
664      * event_notifier_set_handler already checks for guest's notifications if
665      * they arrive at the new file descriptor in the switch, so there is no
666      * need to explicitly check for them.
667      */
668     if (poll_start) {
669         event_notifier_set(svq_kick);
670         event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
671     }
672 }
673 
674 /**
675  * Start the shadow virtqueue operation.
676  *
677  * @svq: Shadow Virtqueue
678  * @vdev: VirtIO device
679  * @vq: Virtqueue to shadow
680  * @iova_tree: Tree to perform descriptors translations
681  */
vhost_svq_start(VhostShadowVirtqueue * svq,VirtIODevice * vdev,VirtQueue * vq,VhostIOVATree * iova_tree)682 void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
683                      VirtQueue *vq, VhostIOVATree *iova_tree)
684 {
685     size_t desc_size;
686 
687     event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
688     svq->next_guest_avail_elem = NULL;
689     svq->shadow_avail_idx = 0;
690     svq->shadow_used_idx = 0;
691     svq->last_used_idx = 0;
692     svq->vdev = vdev;
693     svq->vq = vq;
694     svq->iova_tree = iova_tree;
695 
696     svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
697     svq->num_free = svq->vring.num;
698     svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq),
699                            PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
700                            -1, 0);
701     desc_size = sizeof(vring_desc_t) * svq->vring.num;
702     svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
703     svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq),
704                            PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
705                            -1, 0);
706     svq->desc_state = g_new0(SVQDescState, svq->vring.num);
707     svq->desc_next = g_new0(uint16_t, svq->vring.num);
708     for (unsigned i = 0; i < svq->vring.num - 1; i++) {
709         svq->desc_next[i] = i + 1;
710     }
711 }
712 
713 /**
714  * Stop the shadow virtqueue operation.
715  * @svq: Shadow Virtqueue
716  */
vhost_svq_stop(VhostShadowVirtqueue * svq)717 void vhost_svq_stop(VhostShadowVirtqueue *svq)
718 {
719     vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND);
720     g_autofree VirtQueueElement *next_avail_elem = NULL;
721 
722     if (!svq->vq) {
723         return;
724     }
725 
726     /* Send all pending used descriptors to guest */
727     vhost_svq_flush(svq, false);
728 
729     for (unsigned i = 0; i < svq->vring.num; ++i) {
730         g_autofree VirtQueueElement *elem = NULL;
731         elem = g_steal_pointer(&svq->desc_state[i].elem);
732         if (elem) {
733             /*
734              * TODO: This is ok for networking, but other kinds of devices
735              * might have problems with just unpop these.
736              */
737             virtqueue_unpop(svq->vq, elem, 0);
738         }
739     }
740 
741     next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
742     if (next_avail_elem) {
743         virtqueue_unpop(svq->vq, next_avail_elem, 0);
744     }
745     svq->vq = NULL;
746     g_free(svq->desc_next);
747     g_free(svq->desc_state);
748     munmap(svq->vring.desc, vhost_svq_driver_area_size(svq));
749     munmap(svq->vring.used, vhost_svq_device_area_size(svq));
750     event_notifier_set_handler(&svq->hdev_call, NULL);
751 }
752 
753 /**
754  * Creates vhost shadow virtqueue, and instructs the vhost device to use the
755  * shadow methods and file descriptors.
756  *
757  * @ops: SVQ owner callbacks
758  * @ops_opaque: ops opaque pointer
759  */
vhost_svq_new(const VhostShadowVirtqueueOps * ops,void * ops_opaque)760 VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops,
761                                     void *ops_opaque)
762 {
763     VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
764 
765     event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
766     svq->ops = ops;
767     svq->ops_opaque = ops_opaque;
768     return svq;
769 }
770 
771 /**
772  * Free the resources of the shadow virtqueue.
773  *
774  * @pvq: gpointer to SVQ so it can be used by autofree functions.
775  */
vhost_svq_free(gpointer pvq)776 void vhost_svq_free(gpointer pvq)
777 {
778     VhostShadowVirtqueue *vq = pvq;
779     vhost_svq_stop(vq);
780     g_free(vq);
781 }
782