xref: /qemu/hw/virtio/vhost-shadow-virtqueue.c (revision b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124)
110857ec0SEugenio Pérez /*
210857ec0SEugenio Pérez  * vhost shadow virtqueue
310857ec0SEugenio Pérez  *
410857ec0SEugenio Pérez  * SPDX-FileCopyrightText: Red Hat, Inc. 2021
510857ec0SEugenio Pérez  * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
610857ec0SEugenio Pérez  *
710857ec0SEugenio Pérez  * SPDX-License-Identifier: GPL-2.0-or-later
810857ec0SEugenio Pérez  */
910857ec0SEugenio Pérez 
1010857ec0SEugenio Pérez #include "qemu/osdep.h"
1110857ec0SEugenio Pérez #include "hw/virtio/vhost-shadow-virtqueue.h"
1210857ec0SEugenio Pérez 
1310857ec0SEugenio Pérez #include "qemu/error-report.h"
144725a418SEugenio Pérez #include "qapi/error.h"
15dff4426fSEugenio Pérez #include "qemu/main-loop.h"
16100890f7SEugenio Pérez #include "qemu/log.h"
17100890f7SEugenio Pérez #include "qemu/memalign.h"
18dff4426fSEugenio Pérez #include "linux-headers/linux/vhost.h"
19dff4426fSEugenio Pérez 
20dff4426fSEugenio Pérez /**
214725a418SEugenio Pérez  * Validate the transport device features that both guests can use with the SVQ
224725a418SEugenio Pérez  * and SVQs can use with the device.
234725a418SEugenio Pérez  *
244725a418SEugenio Pérez  * @dev_features: The features
254725a418SEugenio Pérez  * @errp: Error pointer
264725a418SEugenio Pérez  */
vhost_svq_valid_features(uint64_t features,Error ** errp)274725a418SEugenio Pérez bool vhost_svq_valid_features(uint64_t features, Error **errp)
284725a418SEugenio Pérez {
294725a418SEugenio Pérez     bool ok = true;
304725a418SEugenio Pérez     uint64_t svq_features = features;
314725a418SEugenio Pérez 
324725a418SEugenio Pérez     for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
334725a418SEugenio Pérez          ++b) {
344725a418SEugenio Pérez         switch (b) {
354725a418SEugenio Pérez         case VIRTIO_F_ANY_LAYOUT:
36396d5126SEugenio Pérez         case VIRTIO_RING_F_EVENT_IDX:
374725a418SEugenio Pérez             continue;
384725a418SEugenio Pérez 
394725a418SEugenio Pérez         case VIRTIO_F_ACCESS_PLATFORM:
404725a418SEugenio Pérez             /* SVQ trust in the host's IOMMU to translate addresses */
414725a418SEugenio Pérez         case VIRTIO_F_VERSION_1:
424725a418SEugenio Pérez             /* SVQ trust that the guest vring is little endian */
434725a418SEugenio Pérez             if (!(svq_features & BIT_ULL(b))) {
444725a418SEugenio Pérez                 svq_features |= BIT_ULL(b);
454725a418SEugenio Pérez                 ok = false;
464725a418SEugenio Pérez             }
474725a418SEugenio Pérez             continue;
484725a418SEugenio Pérez 
494725a418SEugenio Pérez         default:
504725a418SEugenio Pérez             if (svq_features & BIT_ULL(b)) {
514725a418SEugenio Pérez                 svq_features &= ~BIT_ULL(b);
524725a418SEugenio Pérez                 ok = false;
534725a418SEugenio Pérez             }
544725a418SEugenio Pérez         }
554725a418SEugenio Pérez     }
564725a418SEugenio Pérez 
574725a418SEugenio Pérez     if (!ok) {
584725a418SEugenio Pérez         error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
594725a418SEugenio Pérez                          ", ok: 0x%"PRIx64, features, svq_features);
604725a418SEugenio Pérez     }
614725a418SEugenio Pérez     return ok;
624725a418SEugenio Pérez }
634725a418SEugenio Pérez 
644725a418SEugenio Pérez /**
65100890f7SEugenio Pérez  * Number of descriptors that the SVQ can make available from the guest.
66dff4426fSEugenio Pérez  *
67100890f7SEugenio Pérez  * @svq: The svq
68dff4426fSEugenio Pérez  */
vhost_svq_available_slots(const VhostShadowVirtqueue * svq)6999d6a324SHawkins Jiawei uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
70dff4426fSEugenio Pérez {
715d410557SHawkins Jiawei     return svq->num_free;
72100890f7SEugenio Pérez }
73100890f7SEugenio Pérez 
7434e3c94eSEugenio Pérez /**
7534e3c94eSEugenio Pérez  * Translate addresses between the qemu's virtual address and the SVQ IOVA
7634e3c94eSEugenio Pérez  *
7734e3c94eSEugenio Pérez  * @svq: Shadow VirtQueue
7834e3c94eSEugenio Pérez  * @vaddr: Translated IOVA addresses
7934e3c94eSEugenio Pérez  * @iovec: Source qemu's VA addresses
8034e3c94eSEugenio Pérez  * @num: Length of iovec and minimum length of vaddr
8105063f55SJonah Palmer  * @gpas: Descriptors' GPAs, if backed by guest memory
8234e3c94eSEugenio Pérez  */
vhost_svq_translate_addr(const VhostShadowVirtqueue * svq,hwaddr * addrs,const struct iovec * iovec,size_t num,const hwaddr * gpas)8334e3c94eSEugenio Pérez static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
8434e3c94eSEugenio Pérez                                      hwaddr *addrs, const struct iovec *iovec,
8505063f55SJonah Palmer                                      size_t num, const hwaddr *gpas)
8634e3c94eSEugenio Pérez {
8734e3c94eSEugenio Pérez     if (num == 0) {
8834e3c94eSEugenio Pérez         return true;
8934e3c94eSEugenio Pérez     }
9034e3c94eSEugenio Pérez 
9134e3c94eSEugenio Pérez     for (size_t i = 0; i < num; ++i) {
9205063f55SJonah Palmer         Int128 needle_last, map_last;
9305063f55SJonah Palmer         size_t off;
9405063f55SJonah Palmer         const DMAMap *map;
9505063f55SJonah Palmer         DMAMap needle;
9605063f55SJonah Palmer 
9705063f55SJonah Palmer         /* Check if the descriptor is backed by guest memory  */
9805063f55SJonah Palmer         if (gpas) {
9905063f55SJonah Palmer             /* Search the GPA->IOVA tree */
10005063f55SJonah Palmer             needle = (DMAMap) {
10105063f55SJonah Palmer                 .translated_addr = gpas[i],
10205063f55SJonah Palmer                 .size = iovec[i].iov_len,
10305063f55SJonah Palmer             };
10405063f55SJonah Palmer             map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
10505063f55SJonah Palmer         } else {
10605063f55SJonah Palmer             /* Search the IOVA->HVA tree */
10705063f55SJonah Palmer             needle = (DMAMap) {
10834e3c94eSEugenio Pérez                 .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
10934e3c94eSEugenio Pérez                 .size = iovec[i].iov_len,
11034e3c94eSEugenio Pérez             };
11105063f55SJonah Palmer             map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
11205063f55SJonah Palmer         }
11334e3c94eSEugenio Pérez 
11434e3c94eSEugenio Pérez         /*
11534e3c94eSEugenio Pérez          * Map cannot be NULL since iova map contains all guest space and
11634e3c94eSEugenio Pérez          * qemu already has a physical address mapped
11734e3c94eSEugenio Pérez          */
11834e3c94eSEugenio Pérez         if (unlikely(!map)) {
11934e3c94eSEugenio Pérez             qemu_log_mask(LOG_GUEST_ERROR,
12034e3c94eSEugenio Pérez                           "Invalid address 0x%"HWADDR_PRIx" given by guest",
12134e3c94eSEugenio Pérez                           needle.translated_addr);
12234e3c94eSEugenio Pérez             return false;
12334e3c94eSEugenio Pérez         }
12434e3c94eSEugenio Pérez 
12534e3c94eSEugenio Pérez         off = needle.translated_addr - map->translated_addr;
12634e3c94eSEugenio Pérez         addrs[i] = map->iova + off;
12734e3c94eSEugenio Pérez 
12834e3c94eSEugenio Pérez         needle_last = int128_add(int128_make64(needle.translated_addr),
129b77a5f22SHawkins Jiawei                                  int128_makes64(iovec[i].iov_len - 1));
13034e3c94eSEugenio Pérez         map_last = int128_make64(map->translated_addr + map->size);
13134e3c94eSEugenio Pérez         if (unlikely(int128_gt(needle_last, map_last))) {
13234e3c94eSEugenio Pérez             qemu_log_mask(LOG_GUEST_ERROR,
13334e3c94eSEugenio Pérez                           "Guest buffer expands over iova range");
13434e3c94eSEugenio Pérez             return false;
13534e3c94eSEugenio Pérez         }
13634e3c94eSEugenio Pérez     }
13734e3c94eSEugenio Pérez 
13834e3c94eSEugenio Pérez     return true;
13934e3c94eSEugenio Pérez }
14034e3c94eSEugenio Pérez 
141009c2549SEugenio Pérez /**
142009c2549SEugenio Pérez  * Write descriptors to SVQ vring
143009c2549SEugenio Pérez  *
144009c2549SEugenio Pérez  * @svq: The shadow virtqueue
145009c2549SEugenio Pérez  * @sg: Cache for hwaddr
146009c2549SEugenio Pérez  * @iovec: The iovec from the guest
147009c2549SEugenio Pérez  * @num: iovec length
14805063f55SJonah Palmer  * @addr: Descriptors' GPAs, if backed by guest memory
149009c2549SEugenio Pérez  * @more_descs: True if more descriptors come in the chain
150009c2549SEugenio Pérez  * @write: True if they are writeable descriptors
151009c2549SEugenio Pérez  *
152009c2549SEugenio Pérez  * Return true if success, false otherwise and print error.
153009c2549SEugenio Pérez  */
vhost_svq_vring_write_descs(VhostShadowVirtqueue * svq,hwaddr * sg,const struct iovec * iovec,size_t num,const hwaddr * addr,bool more_descs,bool write)154009c2549SEugenio Pérez static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
155100890f7SEugenio Pérez                                         const struct iovec *iovec, size_t num,
15605063f55SJonah Palmer                                         const hwaddr *addr, bool more_descs,
15705063f55SJonah Palmer                                         bool write)
158100890f7SEugenio Pérez {
159100890f7SEugenio Pérez     uint16_t i = svq->free_head, last = svq->free_head;
160100890f7SEugenio Pérez     unsigned n;
161100890f7SEugenio Pérez     uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
162100890f7SEugenio Pérez     vring_desc_t *descs = svq->vring.desc;
163009c2549SEugenio Pérez     bool ok;
164100890f7SEugenio Pérez 
165100890f7SEugenio Pérez     if (num == 0) {
166009c2549SEugenio Pérez         return true;
167009c2549SEugenio Pérez     }
168009c2549SEugenio Pérez 
16905063f55SJonah Palmer     ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr);
170009c2549SEugenio Pérez     if (unlikely(!ok)) {
171009c2549SEugenio Pérez         return false;
172100890f7SEugenio Pérez     }
173100890f7SEugenio Pérez 
174100890f7SEugenio Pérez     for (n = 0; n < num; n++) {
175100890f7SEugenio Pérez         if (more_descs || (n + 1 < num)) {
176100890f7SEugenio Pérez             descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
177495fe3a7SEugenio Pérez             descs[i].next = cpu_to_le16(svq->desc_next[i]);
178100890f7SEugenio Pérez         } else {
179100890f7SEugenio Pérez             descs[i].flags = flags;
180100890f7SEugenio Pérez         }
18134e3c94eSEugenio Pérez         descs[i].addr = cpu_to_le64(sg[n]);
182100890f7SEugenio Pérez         descs[i].len = cpu_to_le32(iovec[n].iov_len);
183100890f7SEugenio Pérez 
184100890f7SEugenio Pérez         last = i;
185*50e97541SKonstantin Shkolnyy         i = svq->desc_next[i];
186100890f7SEugenio Pérez     }
187100890f7SEugenio Pérez 
188*50e97541SKonstantin Shkolnyy     svq->free_head = svq->desc_next[last];
189009c2549SEugenio Pérez     return true;
190100890f7SEugenio Pérez }
191100890f7SEugenio Pérez 
vhost_svq_add_split(VhostShadowVirtqueue * svq,const struct iovec * out_sg,size_t out_num,const hwaddr * out_addr,const struct iovec * in_sg,size_t in_num,const hwaddr * in_addr,unsigned * head)192100890f7SEugenio Pérez static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
1931f46ae65SEugenio Pérez                                 const struct iovec *out_sg, size_t out_num,
19405063f55SJonah Palmer                                 const hwaddr *out_addr,
1951f46ae65SEugenio Pérez                                 const struct iovec *in_sg, size_t in_num,
19605063f55SJonah Palmer                                 const hwaddr *in_addr, unsigned *head)
197100890f7SEugenio Pérez {
198100890f7SEugenio Pérez     unsigned avail_idx;
199100890f7SEugenio Pérez     vring_avail_t *avail = svq->vring.avail;
20034e3c94eSEugenio Pérez     bool ok;
2011f46ae65SEugenio Pérez     g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
202100890f7SEugenio Pérez 
203100890f7SEugenio Pérez     *head = svq->free_head;
204100890f7SEugenio Pérez 
205100890f7SEugenio Pérez     /* We need some descriptors here */
2061f46ae65SEugenio Pérez     if (unlikely(!out_num && !in_num)) {
207100890f7SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR,
208100890f7SEugenio Pérez                       "Guest provided element with no descriptors");
209100890f7SEugenio Pérez         return false;
210100890f7SEugenio Pérez     }
211100890f7SEugenio Pérez 
21205063f55SJonah Palmer     ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
21305063f55SJonah Palmer                                      in_num > 0, false);
21434e3c94eSEugenio Pérez     if (unlikely(!ok)) {
21534e3c94eSEugenio Pérez         return false;
21634e3c94eSEugenio Pérez     }
21734e3c94eSEugenio Pérez 
21805063f55SJonah Palmer     ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false,
21905063f55SJonah Palmer                                      true);
220009c2549SEugenio Pérez     if (unlikely(!ok)) {
221009c2549SEugenio Pérez         return false;
222009c2549SEugenio Pérez     }
223100890f7SEugenio Pérez 
224100890f7SEugenio Pérez     /*
225100890f7SEugenio Pérez      * Put the entry in the available array (but don't update avail->idx until
226100890f7SEugenio Pérez      * they do sync).
227100890f7SEugenio Pérez      */
228100890f7SEugenio Pérez     avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
229100890f7SEugenio Pérez     avail->ring[avail_idx] = cpu_to_le16(*head);
230100890f7SEugenio Pérez     svq->shadow_avail_idx++;
231100890f7SEugenio Pérez 
232100890f7SEugenio Pérez     /* Update the avail index after write the descriptor */
233100890f7SEugenio Pérez     smp_wmb();
234100890f7SEugenio Pérez     avail->idx = cpu_to_le16(svq->shadow_avail_idx);
235100890f7SEugenio Pérez 
236100890f7SEugenio Pérez     return true;
237100890f7SEugenio Pérez }
238100890f7SEugenio Pérez 
vhost_svq_kick(VhostShadowVirtqueue * svq)239d93a2405SEugenio Pérez static void vhost_svq_kick(VhostShadowVirtqueue *svq)
240d93a2405SEugenio Pérez {
24122a6840fSEugenio Pérez     bool needs_kick;
24222a6840fSEugenio Pérez 
243d93a2405SEugenio Pérez     /*
244d93a2405SEugenio Pérez      * We need to expose the available array entries before checking the used
245d93a2405SEugenio Pérez      * flags
246d93a2405SEugenio Pérez      */
247d93a2405SEugenio Pérez     smp_mb();
24822a6840fSEugenio Pérez 
24922a6840fSEugenio Pérez     if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
250*50e97541SKonstantin Shkolnyy         uint16_t avail_event = le16_to_cpu(
251*50e97541SKonstantin Shkolnyy                 *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
25222a6840fSEugenio Pérez         needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
25322a6840fSEugenio Pérez     } else {
254*50e97541SKonstantin Shkolnyy         needs_kick =
255*50e97541SKonstantin Shkolnyy                 !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
25622a6840fSEugenio Pérez     }
25722a6840fSEugenio Pérez 
25822a6840fSEugenio Pérez     if (!needs_kick) {
259d93a2405SEugenio Pérez         return;
260d93a2405SEugenio Pérez     }
261d93a2405SEugenio Pérez 
262d93a2405SEugenio Pérez     event_notifier_set(&svq->hdev_kick);
263d93a2405SEugenio Pérez }
264d93a2405SEugenio Pérez 
2655181db13SEugenio Pérez /**
2665181db13SEugenio Pérez  * Add an element to a SVQ.
2675181db13SEugenio Pérez  *
268f20b70ebSEugenio Pérez  * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
2695181db13SEugenio Pérez  */
vhost_svq_add(VhostShadowVirtqueue * svq,const struct iovec * out_sg,size_t out_num,const hwaddr * out_addr,const struct iovec * in_sg,size_t in_num,const hwaddr * in_addr,VirtQueueElement * elem)270d0291f3fSEugenio Pérez int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
27105063f55SJonah Palmer                   size_t out_num, const hwaddr *out_addr,
27205063f55SJonah Palmer                   const struct iovec *in_sg, size_t in_num,
27305063f55SJonah Palmer                   const hwaddr *in_addr, VirtQueueElement *elem)
274100890f7SEugenio Pérez {
275100890f7SEugenio Pérez     unsigned qemu_head;
2761f46ae65SEugenio Pérez     unsigned ndescs = in_num + out_num;
277f20b70ebSEugenio Pérez     bool ok;
278f20b70ebSEugenio Pérez 
279f20b70ebSEugenio Pérez     if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
280f20b70ebSEugenio Pérez         return -ENOSPC;
281f20b70ebSEugenio Pérez     }
282f20b70ebSEugenio Pérez 
28305063f55SJonah Palmer     ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num,
28405063f55SJonah Palmer                              in_addr, &qemu_head);
285100890f7SEugenio Pérez     if (unlikely(!ok)) {
286f20b70ebSEugenio Pérez         return -EINVAL;
287100890f7SEugenio Pérez     }
288100890f7SEugenio Pérez 
2895d410557SHawkins Jiawei     svq->num_free -= ndescs;
2909e87868fSEugenio Pérez     svq->desc_state[qemu_head].elem = elem;
291ac4cfdc6SEugenio Pérez     svq->desc_state[qemu_head].ndescs = ndescs;
29298b5adefSEugenio Pérez     vhost_svq_kick(svq);
293f20b70ebSEugenio Pérez     return 0;
294100890f7SEugenio Pérez }
295100890f7SEugenio Pérez 
2961f46ae65SEugenio Pérez /* Convenience wrapper to add a guest's element to SVQ */
vhost_svq_add_element(VhostShadowVirtqueue * svq,VirtQueueElement * elem)2971f46ae65SEugenio Pérez static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
2981f46ae65SEugenio Pérez                                  VirtQueueElement *elem)
2991f46ae65SEugenio Pérez {
30005063f55SJonah Palmer     return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->out_addr,
30105063f55SJonah Palmer                          elem->in_sg, elem->in_num, elem->in_addr, elem);
3021f46ae65SEugenio Pérez }
3031f46ae65SEugenio Pérez 
304dff4426fSEugenio Pérez /**
305100890f7SEugenio Pérez  * Forward available buffers.
306100890f7SEugenio Pérez  *
307100890f7SEugenio Pérez  * @svq: Shadow VirtQueue
308100890f7SEugenio Pérez  *
309100890f7SEugenio Pérez  * Note that this function does not guarantee that all guest's available
310100890f7SEugenio Pérez  * buffers are available to the device in SVQ avail ring. The guest may have
311100890f7SEugenio Pérez  * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
312100890f7SEugenio Pérez  * qemu vaddr.
313100890f7SEugenio Pérez  *
314100890f7SEugenio Pérez  * If that happens, guest's kick notifications will be disabled until the
315100890f7SEugenio Pérez  * device uses some buffers.
316100890f7SEugenio Pérez  */
vhost_handle_guest_kick(VhostShadowVirtqueue * svq)317100890f7SEugenio Pérez static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
318100890f7SEugenio Pérez {
319100890f7SEugenio Pérez     /* Clear event notifier */
320100890f7SEugenio Pérez     event_notifier_test_and_clear(&svq->svq_kick);
321100890f7SEugenio Pérez 
322100890f7SEugenio Pérez     /* Forward to the device as many available buffers as possible */
323100890f7SEugenio Pérez     do {
324100890f7SEugenio Pérez         virtio_queue_set_notification(svq->vq, false);
325100890f7SEugenio Pérez 
326100890f7SEugenio Pérez         while (true) {
327ad9f958dSBernhard Beschow             g_autofree VirtQueueElement *elem = NULL;
328f20b70ebSEugenio Pérez             int r;
329100890f7SEugenio Pérez 
330100890f7SEugenio Pérez             if (svq->next_guest_avail_elem) {
331100890f7SEugenio Pérez                 elem = g_steal_pointer(&svq->next_guest_avail_elem);
332100890f7SEugenio Pérez             } else {
333100890f7SEugenio Pérez                 elem = virtqueue_pop(svq->vq, sizeof(*elem));
334100890f7SEugenio Pérez             }
335100890f7SEugenio Pérez 
336100890f7SEugenio Pérez             if (!elem) {
337100890f7SEugenio Pérez                 break;
338100890f7SEugenio Pérez             }
339100890f7SEugenio Pérez 
340e966c0b7SEugenio Pérez             if (svq->ops) {
341e966c0b7SEugenio Pérez                 r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
342e966c0b7SEugenio Pérez             } else {
3431f46ae65SEugenio Pérez                 r = vhost_svq_add_element(svq, elem);
344e966c0b7SEugenio Pérez             }
345f20b70ebSEugenio Pérez             if (unlikely(r != 0)) {
346f20b70ebSEugenio Pérez                 if (r == -ENOSPC) {
347100890f7SEugenio Pérez                     /*
348f20b70ebSEugenio Pérez                      * This condition is possible since a contiguous buffer in
349f20b70ebSEugenio Pérez                      * GPA does not imply a contiguous buffer in qemu's VA
350f20b70ebSEugenio Pérez                      * scatter-gather segments. If that happens, the buffer
351f20b70ebSEugenio Pérez                      * exposed to the device needs to be a chain of descriptors
352f20b70ebSEugenio Pérez                      * at this moment.
353100890f7SEugenio Pérez                      *
354100890f7SEugenio Pérez                      * SVQ cannot hold more available buffers if we are here:
355f20b70ebSEugenio Pérez                      * queue the current guest descriptor and ignore kicks
356100890f7SEugenio Pérez                      * until some elements are used.
357100890f7SEugenio Pérez                      */
3589c2ab2f1SEugenio Pérez                     svq->next_guest_avail_elem = g_steal_pointer(&elem);
359100890f7SEugenio Pérez                 }
360100890f7SEugenio Pérez 
361f20b70ebSEugenio Pérez                 /* VQ is full or broken, just return and ignore kicks */
362100890f7SEugenio Pérez                 return;
363100890f7SEugenio Pérez             }
3649c2ab2f1SEugenio Pérez             /* elem belongs to SVQ or external caller now */
3659c2ab2f1SEugenio Pérez             elem = NULL;
366100890f7SEugenio Pérez         }
367100890f7SEugenio Pérez 
368100890f7SEugenio Pérez         virtio_queue_set_notification(svq->vq, true);
369100890f7SEugenio Pérez     } while (!virtio_queue_empty(svq->vq));
370100890f7SEugenio Pérez }
371100890f7SEugenio Pérez 
372100890f7SEugenio Pérez /**
373100890f7SEugenio Pérez  * Handle guest's kick.
374100890f7SEugenio Pérez  *
375100890f7SEugenio Pérez  * @n: guest kick event notifier, the one that guest set to notify svq.
376100890f7SEugenio Pérez  */
vhost_handle_guest_kick_notifier(EventNotifier * n)377100890f7SEugenio Pérez static void vhost_handle_guest_kick_notifier(EventNotifier *n)
378100890f7SEugenio Pérez {
379100890f7SEugenio Pérez     VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
380100890f7SEugenio Pérez     event_notifier_test_and_clear(n);
381100890f7SEugenio Pérez     vhost_handle_guest_kick(svq);
382100890f7SEugenio Pérez }
383100890f7SEugenio Pérez 
vhost_svq_more_used(VhostShadowVirtqueue * svq)384100890f7SEugenio Pérez static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
385100890f7SEugenio Pérez {
386c381abc3SEugenio Pérez     uint16_t *used_idx = &svq->vring.used->idx;
387100890f7SEugenio Pérez     if (svq->last_used_idx != svq->shadow_used_idx) {
388100890f7SEugenio Pérez         return true;
389100890f7SEugenio Pérez     }
390100890f7SEugenio Pérez 
391*50e97541SKonstantin Shkolnyy     svq->shadow_used_idx = le16_to_cpu(*(volatile uint16_t *)used_idx);
392100890f7SEugenio Pérez 
393100890f7SEugenio Pérez     return svq->last_used_idx != svq->shadow_used_idx;
394100890f7SEugenio Pérez }
395100890f7SEugenio Pérez 
396100890f7SEugenio Pérez /**
397100890f7SEugenio Pérez  * Enable vhost device calls after disable them.
398100890f7SEugenio Pérez  *
399100890f7SEugenio Pérez  * @svq: The svq
400100890f7SEugenio Pérez  *
401100890f7SEugenio Pérez  * It returns false if there are pending used buffers from the vhost device,
402100890f7SEugenio Pérez  * avoiding the possible races between SVQ checking for more work and enabling
403100890f7SEugenio Pérez  * callbacks. True if SVQ used vring has no more pending buffers.
404100890f7SEugenio Pérez  */
vhost_svq_enable_notification(VhostShadowVirtqueue * svq)405100890f7SEugenio Pérez static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
406100890f7SEugenio Pérez {
40701f8beacSEugenio Pérez     if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
40801f8beacSEugenio Pérez         uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num];
409*50e97541SKonstantin Shkolnyy         *used_event = cpu_to_le16(svq->shadow_used_idx);
41001f8beacSEugenio Pérez     } else {
411100890f7SEugenio Pérez         svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
41201f8beacSEugenio Pérez     }
41301f8beacSEugenio Pérez 
41401f8beacSEugenio Pérez     /* Make sure the event is enabled before the read of used_idx */
415100890f7SEugenio Pérez     smp_mb();
416100890f7SEugenio Pérez     return !vhost_svq_more_used(svq);
417100890f7SEugenio Pérez }
418100890f7SEugenio Pérez 
vhost_svq_disable_notification(VhostShadowVirtqueue * svq)419100890f7SEugenio Pérez static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
420100890f7SEugenio Pérez {
42101f8beacSEugenio Pérez     /*
42201f8beacSEugenio Pérez      * No need to disable notification in the event idx case, since used event
42301f8beacSEugenio Pérez      * index is already an index too far away.
42401f8beacSEugenio Pérez      */
42501f8beacSEugenio Pérez     if (!virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
426100890f7SEugenio Pérez         svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
427100890f7SEugenio Pérez     }
42801f8beacSEugenio Pérez }
429100890f7SEugenio Pérez 
vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue * svq,uint16_t num,uint16_t i)43081abfa57SEugenio Pérez static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
43181abfa57SEugenio Pérez                                              uint16_t num, uint16_t i)
43281abfa57SEugenio Pérez {
43381abfa57SEugenio Pérez     for (uint16_t j = 0; j < (num - 1); ++j) {
434*50e97541SKonstantin Shkolnyy         i = svq->desc_next[i];
43581abfa57SEugenio Pérez     }
43681abfa57SEugenio Pérez 
43781abfa57SEugenio Pérez     return i;
43881abfa57SEugenio Pérez }
43981abfa57SEugenio Pérez 
44095eaaa76SMarc-André Lureau G_GNUC_WARN_UNUSED_RESULT
vhost_svq_get_buf(VhostShadowVirtqueue * svq,uint32_t * len)441100890f7SEugenio Pérez static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
442100890f7SEugenio Pérez                                            uint32_t *len)
443100890f7SEugenio Pérez {
444100890f7SEugenio Pérez     const vring_used_t *used = svq->vring.used;
445100890f7SEugenio Pérez     vring_used_elem_t used_elem;
44681abfa57SEugenio Pérez     uint16_t last_used, last_used_chain, num;
447100890f7SEugenio Pérez 
448100890f7SEugenio Pérez     if (!vhost_svq_more_used(svq)) {
449100890f7SEugenio Pérez         return NULL;
450100890f7SEugenio Pérez     }
451100890f7SEugenio Pérez 
452100890f7SEugenio Pérez     /* Only get used array entries after they have been exposed by dev */
453100890f7SEugenio Pérez     smp_rmb();
454100890f7SEugenio Pérez     last_used = svq->last_used_idx & (svq->vring.num - 1);
455100890f7SEugenio Pérez     used_elem.id = le32_to_cpu(used->ring[last_used].id);
456100890f7SEugenio Pérez     used_elem.len = le32_to_cpu(used->ring[last_used].len);
457100890f7SEugenio Pérez 
458100890f7SEugenio Pérez     svq->last_used_idx++;
459100890f7SEugenio Pérez     if (unlikely(used_elem.id >= svq->vring.num)) {
460100890f7SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
461100890f7SEugenio Pérez                       svq->vdev->name, used_elem.id);
462100890f7SEugenio Pérez         return NULL;
463100890f7SEugenio Pérez     }
464100890f7SEugenio Pérez 
46586f5f254SEugenio Pérez     if (unlikely(!svq->desc_state[used_elem.id].ndescs)) {
466100890f7SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR,
467100890f7SEugenio Pérez             "Device %s says index %u is used, but it was not available",
468100890f7SEugenio Pérez             svq->vdev->name, used_elem.id);
469100890f7SEugenio Pérez         return NULL;
470100890f7SEugenio Pérez     }
471100890f7SEugenio Pérez 
472ac4cfdc6SEugenio Pérez     num = svq->desc_state[used_elem.id].ndescs;
47386f5f254SEugenio Pérez     svq->desc_state[used_elem.id].ndescs = 0;
47481abfa57SEugenio Pérez     last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
47581abfa57SEugenio Pérez     svq->desc_next[last_used_chain] = svq->free_head;
476100890f7SEugenio Pérez     svq->free_head = used_elem.id;
4775d410557SHawkins Jiawei     svq->num_free += num;
478100890f7SEugenio Pérez 
479100890f7SEugenio Pérez     *len = used_elem.len;
4809e87868fSEugenio Pérez     return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
481100890f7SEugenio Pérez }
482100890f7SEugenio Pérez 
483432efd14SEugenio Pérez /**
484432efd14SEugenio Pérez  * Push an element to SVQ, returning it to the guest.
485432efd14SEugenio Pérez  */
vhost_svq_push_elem(VhostShadowVirtqueue * svq,const VirtQueueElement * elem,uint32_t len)486432efd14SEugenio Pérez void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
487432efd14SEugenio Pérez                          const VirtQueueElement *elem, uint32_t len)
488432efd14SEugenio Pérez {
489432efd14SEugenio Pérez     virtqueue_push(svq->vq, elem, len);
490432efd14SEugenio Pérez     if (svq->next_guest_avail_elem) {
491432efd14SEugenio Pérez         /*
492432efd14SEugenio Pérez          * Avail ring was full when vhost_svq_flush was called, so it's a
493432efd14SEugenio Pérez          * good moment to make more descriptors available if possible.
494432efd14SEugenio Pérez          */
495432efd14SEugenio Pérez         vhost_handle_guest_kick(svq);
496432efd14SEugenio Pérez     }
497432efd14SEugenio Pérez }
498432efd14SEugenio Pérez 
vhost_svq_flush(VhostShadowVirtqueue * svq,bool check_for_avail_queue)499100890f7SEugenio Pérez static void vhost_svq_flush(VhostShadowVirtqueue *svq,
500100890f7SEugenio Pérez                             bool check_for_avail_queue)
501100890f7SEugenio Pérez {
502100890f7SEugenio Pérez     VirtQueue *vq = svq->vq;
503100890f7SEugenio Pérez 
504100890f7SEugenio Pérez     /* Forward as many used buffers as possible. */
505100890f7SEugenio Pérez     do {
506100890f7SEugenio Pérez         unsigned i = 0;
507100890f7SEugenio Pérez 
508100890f7SEugenio Pérez         vhost_svq_disable_notification(svq);
509100890f7SEugenio Pérez         while (true) {
510100890f7SEugenio Pérez             uint32_t len;
511100890f7SEugenio Pérez             g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
512100890f7SEugenio Pérez             if (!elem) {
513100890f7SEugenio Pérez                 break;
514100890f7SEugenio Pérez             }
515100890f7SEugenio Pérez 
516100890f7SEugenio Pérez             if (unlikely(i >= svq->vring.num)) {
517100890f7SEugenio Pérez                 qemu_log_mask(LOG_GUEST_ERROR,
518100890f7SEugenio Pérez                          "More than %u used buffers obtained in a %u size SVQ",
519100890f7SEugenio Pérez                          i, svq->vring.num);
520100890f7SEugenio Pérez                 virtqueue_fill(vq, elem, len, i);
521100890f7SEugenio Pérez                 virtqueue_flush(vq, i);
522100890f7SEugenio Pérez                 return;
523100890f7SEugenio Pérez             }
524100890f7SEugenio Pérez             virtqueue_fill(vq, elem, len, i++);
525100890f7SEugenio Pérez         }
526100890f7SEugenio Pérez 
527100890f7SEugenio Pérez         virtqueue_flush(vq, i);
528100890f7SEugenio Pérez         event_notifier_set(&svq->svq_call);
529100890f7SEugenio Pérez 
530100890f7SEugenio Pérez         if (check_for_avail_queue && svq->next_guest_avail_elem) {
531100890f7SEugenio Pérez             /*
532100890f7SEugenio Pérez              * Avail ring was full when vhost_svq_flush was called, so it's a
533100890f7SEugenio Pérez              * good moment to make more descriptors available if possible.
534100890f7SEugenio Pérez              */
535100890f7SEugenio Pérez             vhost_handle_guest_kick(svq);
536100890f7SEugenio Pérez         }
537100890f7SEugenio Pérez     } while (!vhost_svq_enable_notification(svq));
538100890f7SEugenio Pérez }
539100890f7SEugenio Pérez 
540100890f7SEugenio Pérez /**
541b0de17a2SHawkins Jiawei  * Poll the SVQ to wait for the device to use the specified number
542b0de17a2SHawkins Jiawei  * of elements and return the total length written by the device.
5433f44d13dSEugenio Pérez  *
5443f44d13dSEugenio Pérez  * This function race with main event loop SVQ polling, so extra
5453f44d13dSEugenio Pérez  * synchronization is needed.
5463f44d13dSEugenio Pérez  *
547b0de17a2SHawkins Jiawei  * @svq: The svq
548b0de17a2SHawkins Jiawei  * @num: The number of elements that need to be used
5493f44d13dSEugenio Pérez  */
vhost_svq_poll(VhostShadowVirtqueue * svq,size_t num)550b0de17a2SHawkins Jiawei size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num)
5513f44d13dSEugenio Pérez {
552b0de17a2SHawkins Jiawei     size_t len = 0;
553b0de17a2SHawkins Jiawei 
554b0de17a2SHawkins Jiawei     while (num--) {
55595eaaa76SMarc-André Lureau         g_autofree VirtQueueElement *elem = NULL;
5563f44d13dSEugenio Pérez         int64_t start_us = g_get_monotonic_time();
5573073c6b9SMarc-André Lureau         uint32_t r = 0;
558d368c0b0SEugenio Pérez 
559d368c0b0SEugenio Pérez         do {
560d368c0b0SEugenio Pérez             if (vhost_svq_more_used(svq)) {
561d368c0b0SEugenio Pérez                 break;
5623f44d13dSEugenio Pérez             }
5633f44d13dSEugenio Pérez 
5643f44d13dSEugenio Pérez             if (unlikely(g_get_monotonic_time() - start_us > 10e6)) {
565b0de17a2SHawkins Jiawei                 return len;
5663f44d13dSEugenio Pérez             }
5673f44d13dSEugenio Pérez         } while (true);
568d368c0b0SEugenio Pérez 
56995eaaa76SMarc-André Lureau         elem = vhost_svq_get_buf(svq, &r);
570b0de17a2SHawkins Jiawei         len += r;
571b0de17a2SHawkins Jiawei     }
572b0de17a2SHawkins Jiawei 
573d368c0b0SEugenio Pérez     return len;
5743f44d13dSEugenio Pérez }
5753f44d13dSEugenio Pérez 
5763f44d13dSEugenio Pérez /**
577100890f7SEugenio Pérez  * Forward used buffers.
578a8ac8858SEugenio Pérez  *
579a8ac8858SEugenio Pérez  * @n: hdev call event notifier, the one that device set to notify svq.
580100890f7SEugenio Pérez  *
581100890f7SEugenio Pérez  * Note that we are not making any buffers available in the loop, there is no
582100890f7SEugenio Pérez  * way that it runs more than virtqueue size times.
583a8ac8858SEugenio Pérez  */
vhost_svq_handle_call(EventNotifier * n)584a8ac8858SEugenio Pérez static void vhost_svq_handle_call(EventNotifier *n)
585a8ac8858SEugenio Pérez {
586a8ac8858SEugenio Pérez     VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
587a8ac8858SEugenio Pérez                                              hdev_call);
588a8ac8858SEugenio Pérez     event_notifier_test_and_clear(n);
589100890f7SEugenio Pérez     vhost_svq_flush(svq, true);
590a8ac8858SEugenio Pérez }
591a8ac8858SEugenio Pérez 
592a8ac8858SEugenio Pérez /**
593a8ac8858SEugenio Pérez  * Set the call notifier for the SVQ to call the guest
594a8ac8858SEugenio Pérez  *
595a8ac8858SEugenio Pérez  * @svq: Shadow virtqueue
596a8ac8858SEugenio Pérez  * @call_fd: call notifier
597a8ac8858SEugenio Pérez  *
598a8ac8858SEugenio Pérez  * Called on BQL context.
599a8ac8858SEugenio Pérez  */
vhost_svq_set_svq_call_fd(VhostShadowVirtqueue * svq,int call_fd)600a8ac8858SEugenio Pérez void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
601a8ac8858SEugenio Pérez {
602a8ac8858SEugenio Pérez     if (call_fd == VHOST_FILE_UNBIND) {
603a8ac8858SEugenio Pérez         /*
604a8ac8858SEugenio Pérez          * Fail event_notifier_set if called handling device call.
605a8ac8858SEugenio Pérez          *
606a8ac8858SEugenio Pérez          * SVQ still needs device notifications, since it needs to keep
607a8ac8858SEugenio Pérez          * forwarding used buffers even with the unbind.
608a8ac8858SEugenio Pérez          */
609a8ac8858SEugenio Pérez         memset(&svq->svq_call, 0, sizeof(svq->svq_call));
610a8ac8858SEugenio Pérez     } else {
611a8ac8858SEugenio Pérez         event_notifier_init_fd(&svq->svq_call, call_fd);
612a8ac8858SEugenio Pérez     }
613a8ac8858SEugenio Pérez }
614a8ac8858SEugenio Pérez 
615a8ac8858SEugenio Pérez /**
616dafb34c9SEugenio Pérez  * Get the shadow vq vring address.
617dafb34c9SEugenio Pérez  * @svq: Shadow virtqueue
618dafb34c9SEugenio Pérez  * @addr: Destination to store address
619dafb34c9SEugenio Pérez  */
vhost_svq_get_vring_addr(const VhostShadowVirtqueue * svq,struct vhost_vring_addr * addr)620dafb34c9SEugenio Pérez void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
621dafb34c9SEugenio Pérez                               struct vhost_vring_addr *addr)
622dafb34c9SEugenio Pérez {
62334e3c94eSEugenio Pérez     addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
62434e3c94eSEugenio Pérez     addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
62534e3c94eSEugenio Pérez     addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
626dafb34c9SEugenio Pérez }
627dafb34c9SEugenio Pérez 
vhost_svq_driver_area_size(const VhostShadowVirtqueue * svq)628dafb34c9SEugenio Pérez size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
629dafb34c9SEugenio Pérez {
630dafb34c9SEugenio Pérez     size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
631f0c48e05SEugenio Pérez     size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) +
632f0c48e05SEugenio Pérez                                                               sizeof(uint16_t);
633dafb34c9SEugenio Pérez 
6348e3b0cbbSMarc-André Lureau     return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size());
635dafb34c9SEugenio Pérez }
636dafb34c9SEugenio Pérez 
vhost_svq_device_area_size(const VhostShadowVirtqueue * svq)637dafb34c9SEugenio Pérez size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
638dafb34c9SEugenio Pérez {
639f0c48e05SEugenio Pérez     size_t used_size = offsetof(vring_used_t, ring[svq->vring.num]) +
640f0c48e05SEugenio Pérez                                                               sizeof(uint16_t);
6418e3b0cbbSMarc-André Lureau     return ROUND_UP(used_size, qemu_real_host_page_size());
642dafb34c9SEugenio Pérez }
643dafb34c9SEugenio Pérez 
644dafb34c9SEugenio Pérez /**
645dff4426fSEugenio Pérez  * Set a new file descriptor for the guest to kick the SVQ and notify for avail
646dff4426fSEugenio Pérez  *
647dff4426fSEugenio Pérez  * @svq: The svq
648dff4426fSEugenio Pérez  * @svq_kick_fd: The svq kick fd
649dff4426fSEugenio Pérez  *
650dff4426fSEugenio Pérez  * Note that the SVQ will never close the old file descriptor.
651dff4426fSEugenio Pérez  */
vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue * svq,int svq_kick_fd)652dff4426fSEugenio Pérez void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
653dff4426fSEugenio Pérez {
654dff4426fSEugenio Pérez     EventNotifier *svq_kick = &svq->svq_kick;
655dff4426fSEugenio Pérez     bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
656dff4426fSEugenio Pérez     bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
657dff4426fSEugenio Pérez 
658dff4426fSEugenio Pérez     if (poll_stop) {
659dff4426fSEugenio Pérez         event_notifier_set_handler(svq_kick, NULL);
660dff4426fSEugenio Pérez     }
661dff4426fSEugenio Pérez 
6628b64e486SEugenio Pérez     event_notifier_init_fd(svq_kick, svq_kick_fd);
663dff4426fSEugenio Pérez     /*
664dff4426fSEugenio Pérez      * event_notifier_set_handler already checks for guest's notifications if
665dff4426fSEugenio Pérez      * they arrive at the new file descriptor in the switch, so there is no
666dff4426fSEugenio Pérez      * need to explicitly check for them.
667dff4426fSEugenio Pérez      */
668dff4426fSEugenio Pérez     if (poll_start) {
669dff4426fSEugenio Pérez         event_notifier_set(svq_kick);
670100890f7SEugenio Pérez         event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
671100890f7SEugenio Pérez     }
672100890f7SEugenio Pérez }
673100890f7SEugenio Pérez 
674100890f7SEugenio Pérez /**
675100890f7SEugenio Pérez  * Start the shadow virtqueue operation.
676100890f7SEugenio Pérez  *
677100890f7SEugenio Pérez  * @svq: Shadow Virtqueue
678100890f7SEugenio Pérez  * @vdev: VirtIO device
679100890f7SEugenio Pérez  * @vq: Virtqueue to shadow
6805fde952bSEugenio Pérez  * @iova_tree: Tree to perform descriptors translations
681100890f7SEugenio Pérez  */
vhost_svq_start(VhostShadowVirtqueue * svq,VirtIODevice * vdev,VirtQueue * vq,VhostIOVATree * iova_tree)682100890f7SEugenio Pérez void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
6835fde952bSEugenio Pérez                      VirtQueue *vq, VhostIOVATree *iova_tree)
684100890f7SEugenio Pérez {
685babf8b87SEugenio Pérez     size_t desc_size;
686100890f7SEugenio Pérez 
68720e7412bSEugenio Pérez     event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
688100890f7SEugenio Pérez     svq->next_guest_avail_elem = NULL;
689100890f7SEugenio Pérez     svq->shadow_avail_idx = 0;
690100890f7SEugenio Pérez     svq->shadow_used_idx = 0;
691100890f7SEugenio Pérez     svq->last_used_idx = 0;
692100890f7SEugenio Pérez     svq->vdev = vdev;
693100890f7SEugenio Pérez     svq->vq = vq;
6945fde952bSEugenio Pérez     svq->iova_tree = iova_tree;
695100890f7SEugenio Pérez 
696100890f7SEugenio Pérez     svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
6975d410557SHawkins Jiawei     svq->num_free = svq->vring.num;
698babf8b87SEugenio Pérez     svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq),
699babf8b87SEugenio Pérez                            PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
700babf8b87SEugenio Pérez                            -1, 0);
701100890f7SEugenio Pérez     desc_size = sizeof(vring_desc_t) * svq->vring.num;
702100890f7SEugenio Pérez     svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
703babf8b87SEugenio Pérez     svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq),
704babf8b87SEugenio Pérez                            PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
705babf8b87SEugenio Pérez                            -1, 0);
7069e87868fSEugenio Pérez     svq->desc_state = g_new0(SVQDescState, svq->vring.num);
707495fe3a7SEugenio Pérez     svq->desc_next = g_new0(uint16_t, svq->vring.num);
708100890f7SEugenio Pérez     for (unsigned i = 0; i < svq->vring.num - 1; i++) {
709*50e97541SKonstantin Shkolnyy         svq->desc_next[i] = i + 1;
710dff4426fSEugenio Pérez     }
711dff4426fSEugenio Pérez }
712dff4426fSEugenio Pérez 
713dff4426fSEugenio Pérez /**
714dff4426fSEugenio Pérez  * Stop the shadow virtqueue operation.
715dff4426fSEugenio Pérez  * @svq: Shadow Virtqueue
716dff4426fSEugenio Pérez  */
vhost_svq_stop(VhostShadowVirtqueue * svq)717dff4426fSEugenio Pérez void vhost_svq_stop(VhostShadowVirtqueue *svq)
718dff4426fSEugenio Pérez {
7198b64e486SEugenio Pérez     vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND);
720100890f7SEugenio Pérez     g_autofree VirtQueueElement *next_avail_elem = NULL;
721100890f7SEugenio Pérez 
722100890f7SEugenio Pérez     if (!svq->vq) {
723100890f7SEugenio Pérez         return;
724100890f7SEugenio Pérez     }
725100890f7SEugenio Pérez 
726100890f7SEugenio Pérez     /* Send all pending used descriptors to guest */
727100890f7SEugenio Pérez     vhost_svq_flush(svq, false);
728100890f7SEugenio Pérez 
729100890f7SEugenio Pérez     for (unsigned i = 0; i < svq->vring.num; ++i) {
730100890f7SEugenio Pérez         g_autofree VirtQueueElement *elem = NULL;
7319e87868fSEugenio Pérez         elem = g_steal_pointer(&svq->desc_state[i].elem);
732100890f7SEugenio Pérez         if (elem) {
7334241e8bdSEugenio Pérez             /*
7344241e8bdSEugenio Pérez              * TODO: This is ok for networking, but other kinds of devices
7354241e8bdSEugenio Pérez              * might have problems with just unpop these.
7364241e8bdSEugenio Pérez              */
7374241e8bdSEugenio Pérez             virtqueue_unpop(svq->vq, elem, 0);
738100890f7SEugenio Pérez         }
739100890f7SEugenio Pérez     }
740100890f7SEugenio Pérez 
741100890f7SEugenio Pérez     next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
742100890f7SEugenio Pérez     if (next_avail_elem) {
7434241e8bdSEugenio Pérez         virtqueue_unpop(svq->vq, next_avail_elem, 0);
744100890f7SEugenio Pérez     }
745100890f7SEugenio Pérez     svq->vq = NULL;
746495fe3a7SEugenio Pérez     g_free(svq->desc_next);
7479e87868fSEugenio Pérez     g_free(svq->desc_state);
748babf8b87SEugenio Pérez     munmap(svq->vring.desc, vhost_svq_driver_area_size(svq));
749babf8b87SEugenio Pérez     munmap(svq->vring.used, vhost_svq_device_area_size(svq));
75020e7412bSEugenio Pérez     event_notifier_set_handler(&svq->hdev_call, NULL);
751dff4426fSEugenio Pérez }
75210857ec0SEugenio Pérez 
75310857ec0SEugenio Pérez /**
75410857ec0SEugenio Pérez  * Creates vhost shadow virtqueue, and instructs the vhost device to use the
75510857ec0SEugenio Pérez  * shadow methods and file descriptors.
75610857ec0SEugenio Pérez  *
757e966c0b7SEugenio Pérez  * @ops: SVQ owner callbacks
758e966c0b7SEugenio Pérez  * @ops_opaque: ops opaque pointer
75910857ec0SEugenio Pérez  */
vhost_svq_new(const VhostShadowVirtqueueOps * ops,void * ops_opaque)7605fde952bSEugenio Pérez VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops,
761e966c0b7SEugenio Pérez                                     void *ops_opaque)
76210857ec0SEugenio Pérez {
7633cfb4d06SEugenio Pérez     VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
76410857ec0SEugenio Pérez 
765dff4426fSEugenio Pérez     event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
766e966c0b7SEugenio Pérez     svq->ops = ops;
767e966c0b7SEugenio Pérez     svq->ops_opaque = ops_opaque;
7683cfb4d06SEugenio Pérez     return svq;
76910857ec0SEugenio Pérez }
77010857ec0SEugenio Pérez 
77110857ec0SEugenio Pérez /**
77210857ec0SEugenio Pérez  * Free the resources of the shadow virtqueue.
77310857ec0SEugenio Pérez  *
77410857ec0SEugenio Pérez  * @pvq: gpointer to SVQ so it can be used by autofree functions.
77510857ec0SEugenio Pérez  */
vhost_svq_free(gpointer pvq)77610857ec0SEugenio Pérez void vhost_svq_free(gpointer pvq)
77710857ec0SEugenio Pérez {
77810857ec0SEugenio Pérez     VhostShadowVirtqueue *vq = pvq;
779dff4426fSEugenio Pérez     vhost_svq_stop(vq);
78010857ec0SEugenio Pérez     g_free(vq);
78110857ec0SEugenio Pérez }
782