110857ec0SEugenio Pérez /* 210857ec0SEugenio Pérez * vhost shadow virtqueue 310857ec0SEugenio Pérez * 410857ec0SEugenio Pérez * SPDX-FileCopyrightText: Red Hat, Inc. 2021 510857ec0SEugenio Pérez * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> 610857ec0SEugenio Pérez * 710857ec0SEugenio Pérez * SPDX-License-Identifier: GPL-2.0-or-later 810857ec0SEugenio Pérez */ 910857ec0SEugenio Pérez 1010857ec0SEugenio Pérez #include "qemu/osdep.h" 1110857ec0SEugenio Pérez #include "hw/virtio/vhost-shadow-virtqueue.h" 1210857ec0SEugenio Pérez 1310857ec0SEugenio Pérez #include "qemu/error-report.h" 144725a418SEugenio Pérez #include "qapi/error.h" 15dff4426fSEugenio Pérez #include "qemu/main-loop.h" 16100890f7SEugenio Pérez #include "qemu/log.h" 17100890f7SEugenio Pérez #include "qemu/memalign.h" 18dff4426fSEugenio Pérez #include "linux-headers/linux/vhost.h" 19dff4426fSEugenio Pérez 20dff4426fSEugenio Pérez /** 214725a418SEugenio Pérez * Validate the transport device features that both guests can use with the SVQ 224725a418SEugenio Pérez * and SVQs can use with the device. 234725a418SEugenio Pérez * 244725a418SEugenio Pérez * @dev_features: The features 254725a418SEugenio Pérez * @errp: Error pointer 264725a418SEugenio Pérez */ 274725a418SEugenio Pérez bool vhost_svq_valid_features(uint64_t features, Error **errp) 284725a418SEugenio Pérez { 294725a418SEugenio Pérez bool ok = true; 304725a418SEugenio Pérez uint64_t svq_features = features; 314725a418SEugenio Pérez 324725a418SEugenio Pérez for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END; 334725a418SEugenio Pérez ++b) { 344725a418SEugenio Pérez switch (b) { 354725a418SEugenio Pérez case VIRTIO_F_ANY_LAYOUT: 364725a418SEugenio Pérez continue; 374725a418SEugenio Pérez 384725a418SEugenio Pérez case VIRTIO_F_ACCESS_PLATFORM: 394725a418SEugenio Pérez /* SVQ trust in the host's IOMMU to translate addresses */ 404725a418SEugenio Pérez case VIRTIO_F_VERSION_1: 414725a418SEugenio Pérez /* SVQ trust that the guest vring is little endian */ 424725a418SEugenio Pérez if (!(svq_features & BIT_ULL(b))) { 434725a418SEugenio Pérez svq_features |= BIT_ULL(b); 444725a418SEugenio Pérez ok = false; 454725a418SEugenio Pérez } 464725a418SEugenio Pérez continue; 474725a418SEugenio Pérez 484725a418SEugenio Pérez default: 494725a418SEugenio Pérez if (svq_features & BIT_ULL(b)) { 504725a418SEugenio Pérez svq_features &= ~BIT_ULL(b); 514725a418SEugenio Pérez ok = false; 524725a418SEugenio Pérez } 534725a418SEugenio Pérez } 544725a418SEugenio Pérez } 554725a418SEugenio Pérez 564725a418SEugenio Pérez if (!ok) { 574725a418SEugenio Pérez error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64 584725a418SEugenio Pérez ", ok: 0x%"PRIx64, features, svq_features); 594725a418SEugenio Pérez } 604725a418SEugenio Pérez return ok; 614725a418SEugenio Pérez } 624725a418SEugenio Pérez 634725a418SEugenio Pérez /** 64100890f7SEugenio Pérez * Number of descriptors that the SVQ can make available from the guest. 65dff4426fSEugenio Pérez * 66100890f7SEugenio Pérez * @svq: The svq 67dff4426fSEugenio Pérez */ 68100890f7SEugenio Pérez static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) 69dff4426fSEugenio Pérez { 70100890f7SEugenio Pérez return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); 71100890f7SEugenio Pérez } 72100890f7SEugenio Pérez 73*34e3c94eSEugenio Pérez /** 74*34e3c94eSEugenio Pérez * Translate addresses between the qemu's virtual address and the SVQ IOVA 75*34e3c94eSEugenio Pérez * 76*34e3c94eSEugenio Pérez * @svq: Shadow VirtQueue 77*34e3c94eSEugenio Pérez * @vaddr: Translated IOVA addresses 78*34e3c94eSEugenio Pérez * @iovec: Source qemu's VA addresses 79*34e3c94eSEugenio Pérez * @num: Length of iovec and minimum length of vaddr 80*34e3c94eSEugenio Pérez */ 81*34e3c94eSEugenio Pérez static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, 82*34e3c94eSEugenio Pérez hwaddr *addrs, const struct iovec *iovec, 83*34e3c94eSEugenio Pérez size_t num) 84*34e3c94eSEugenio Pérez { 85*34e3c94eSEugenio Pérez if (num == 0) { 86*34e3c94eSEugenio Pérez return true; 87*34e3c94eSEugenio Pérez } 88*34e3c94eSEugenio Pérez 89*34e3c94eSEugenio Pérez for (size_t i = 0; i < num; ++i) { 90*34e3c94eSEugenio Pérez DMAMap needle = { 91*34e3c94eSEugenio Pérez .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, 92*34e3c94eSEugenio Pérez .size = iovec[i].iov_len, 93*34e3c94eSEugenio Pérez }; 94*34e3c94eSEugenio Pérez Int128 needle_last, map_last; 95*34e3c94eSEugenio Pérez size_t off; 96*34e3c94eSEugenio Pérez 97*34e3c94eSEugenio Pérez const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); 98*34e3c94eSEugenio Pérez /* 99*34e3c94eSEugenio Pérez * Map cannot be NULL since iova map contains all guest space and 100*34e3c94eSEugenio Pérez * qemu already has a physical address mapped 101*34e3c94eSEugenio Pérez */ 102*34e3c94eSEugenio Pérez if (unlikely(!map)) { 103*34e3c94eSEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 104*34e3c94eSEugenio Pérez "Invalid address 0x%"HWADDR_PRIx" given by guest", 105*34e3c94eSEugenio Pérez needle.translated_addr); 106*34e3c94eSEugenio Pérez return false; 107*34e3c94eSEugenio Pérez } 108*34e3c94eSEugenio Pérez 109*34e3c94eSEugenio Pérez off = needle.translated_addr - map->translated_addr; 110*34e3c94eSEugenio Pérez addrs[i] = map->iova + off; 111*34e3c94eSEugenio Pérez 112*34e3c94eSEugenio Pérez needle_last = int128_add(int128_make64(needle.translated_addr), 113*34e3c94eSEugenio Pérez int128_make64(iovec[i].iov_len)); 114*34e3c94eSEugenio Pérez map_last = int128_make64(map->translated_addr + map->size); 115*34e3c94eSEugenio Pérez if (unlikely(int128_gt(needle_last, map_last))) { 116*34e3c94eSEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 117*34e3c94eSEugenio Pérez "Guest buffer expands over iova range"); 118*34e3c94eSEugenio Pérez return false; 119*34e3c94eSEugenio Pérez } 120*34e3c94eSEugenio Pérez } 121*34e3c94eSEugenio Pérez 122*34e3c94eSEugenio Pérez return true; 123*34e3c94eSEugenio Pérez } 124*34e3c94eSEugenio Pérez 125*34e3c94eSEugenio Pérez static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, 126100890f7SEugenio Pérez const struct iovec *iovec, size_t num, 127100890f7SEugenio Pérez bool more_descs, bool write) 128100890f7SEugenio Pérez { 129100890f7SEugenio Pérez uint16_t i = svq->free_head, last = svq->free_head; 130100890f7SEugenio Pérez unsigned n; 131100890f7SEugenio Pérez uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; 132100890f7SEugenio Pérez vring_desc_t *descs = svq->vring.desc; 133100890f7SEugenio Pérez 134100890f7SEugenio Pérez if (num == 0) { 135100890f7SEugenio Pérez return; 136100890f7SEugenio Pérez } 137100890f7SEugenio Pérez 138100890f7SEugenio Pérez for (n = 0; n < num; n++) { 139100890f7SEugenio Pérez if (more_descs || (n + 1 < num)) { 140100890f7SEugenio Pérez descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); 141100890f7SEugenio Pérez } else { 142100890f7SEugenio Pérez descs[i].flags = flags; 143100890f7SEugenio Pérez } 144*34e3c94eSEugenio Pérez descs[i].addr = cpu_to_le64(sg[n]); 145100890f7SEugenio Pérez descs[i].len = cpu_to_le32(iovec[n].iov_len); 146100890f7SEugenio Pérez 147100890f7SEugenio Pérez last = i; 148100890f7SEugenio Pérez i = cpu_to_le16(descs[i].next); 149100890f7SEugenio Pérez } 150100890f7SEugenio Pérez 151100890f7SEugenio Pérez svq->free_head = le16_to_cpu(descs[last].next); 152100890f7SEugenio Pérez } 153100890f7SEugenio Pérez 154100890f7SEugenio Pérez static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, 155100890f7SEugenio Pérez VirtQueueElement *elem, unsigned *head) 156100890f7SEugenio Pérez { 157100890f7SEugenio Pérez unsigned avail_idx; 158100890f7SEugenio Pérez vring_avail_t *avail = svq->vring.avail; 159*34e3c94eSEugenio Pérez bool ok; 160*34e3c94eSEugenio Pérez g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); 161100890f7SEugenio Pérez 162100890f7SEugenio Pérez *head = svq->free_head; 163100890f7SEugenio Pérez 164100890f7SEugenio Pérez /* We need some descriptors here */ 165100890f7SEugenio Pérez if (unlikely(!elem->out_num && !elem->in_num)) { 166100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 167100890f7SEugenio Pérez "Guest provided element with no descriptors"); 168100890f7SEugenio Pérez return false; 169100890f7SEugenio Pérez } 170100890f7SEugenio Pérez 171*34e3c94eSEugenio Pérez ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); 172*34e3c94eSEugenio Pérez if (unlikely(!ok)) { 173*34e3c94eSEugenio Pérez return false; 174*34e3c94eSEugenio Pérez } 175*34e3c94eSEugenio Pérez vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, 176*34e3c94eSEugenio Pérez elem->in_num > 0, false); 177*34e3c94eSEugenio Pérez 178*34e3c94eSEugenio Pérez 179*34e3c94eSEugenio Pérez ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); 180*34e3c94eSEugenio Pérez if (unlikely(!ok)) { 181*34e3c94eSEugenio Pérez return false; 182*34e3c94eSEugenio Pérez } 183*34e3c94eSEugenio Pérez 184*34e3c94eSEugenio Pérez vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); 185100890f7SEugenio Pérez 186100890f7SEugenio Pérez /* 187100890f7SEugenio Pérez * Put the entry in the available array (but don't update avail->idx until 188100890f7SEugenio Pérez * they do sync). 189100890f7SEugenio Pérez */ 190100890f7SEugenio Pérez avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1); 191100890f7SEugenio Pérez avail->ring[avail_idx] = cpu_to_le16(*head); 192100890f7SEugenio Pérez svq->shadow_avail_idx++; 193100890f7SEugenio Pérez 194100890f7SEugenio Pérez /* Update the avail index after write the descriptor */ 195100890f7SEugenio Pérez smp_wmb(); 196100890f7SEugenio Pérez avail->idx = cpu_to_le16(svq->shadow_avail_idx); 197100890f7SEugenio Pérez 198100890f7SEugenio Pérez return true; 199100890f7SEugenio Pérez } 200100890f7SEugenio Pérez 201100890f7SEugenio Pérez static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) 202100890f7SEugenio Pérez { 203100890f7SEugenio Pérez unsigned qemu_head; 204100890f7SEugenio Pérez bool ok = vhost_svq_add_split(svq, elem, &qemu_head); 205100890f7SEugenio Pérez if (unlikely(!ok)) { 206100890f7SEugenio Pérez return false; 207100890f7SEugenio Pérez } 208100890f7SEugenio Pérez 209100890f7SEugenio Pérez svq->ring_id_maps[qemu_head] = elem; 210100890f7SEugenio Pérez return true; 211100890f7SEugenio Pérez } 212100890f7SEugenio Pérez 213100890f7SEugenio Pérez static void vhost_svq_kick(VhostShadowVirtqueue *svq) 214100890f7SEugenio Pérez { 215100890f7SEugenio Pérez /* 216100890f7SEugenio Pérez * We need to expose the available array entries before checking the used 217100890f7SEugenio Pérez * flags 218100890f7SEugenio Pérez */ 219100890f7SEugenio Pérez smp_mb(); 220100890f7SEugenio Pérez if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { 221100890f7SEugenio Pérez return; 222100890f7SEugenio Pérez } 223100890f7SEugenio Pérez 224dff4426fSEugenio Pérez event_notifier_set(&svq->hdev_kick); 225dff4426fSEugenio Pérez } 226dff4426fSEugenio Pérez 227dff4426fSEugenio Pérez /** 228100890f7SEugenio Pérez * Forward available buffers. 229100890f7SEugenio Pérez * 230100890f7SEugenio Pérez * @svq: Shadow VirtQueue 231100890f7SEugenio Pérez * 232100890f7SEugenio Pérez * Note that this function does not guarantee that all guest's available 233100890f7SEugenio Pérez * buffers are available to the device in SVQ avail ring. The guest may have 234100890f7SEugenio Pérez * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in 235100890f7SEugenio Pérez * qemu vaddr. 236100890f7SEugenio Pérez * 237100890f7SEugenio Pérez * If that happens, guest's kick notifications will be disabled until the 238100890f7SEugenio Pérez * device uses some buffers. 239100890f7SEugenio Pérez */ 240100890f7SEugenio Pérez static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) 241100890f7SEugenio Pérez { 242100890f7SEugenio Pérez /* Clear event notifier */ 243100890f7SEugenio Pérez event_notifier_test_and_clear(&svq->svq_kick); 244100890f7SEugenio Pérez 245100890f7SEugenio Pérez /* Forward to the device as many available buffers as possible */ 246100890f7SEugenio Pérez do { 247100890f7SEugenio Pérez virtio_queue_set_notification(svq->vq, false); 248100890f7SEugenio Pérez 249100890f7SEugenio Pérez while (true) { 250100890f7SEugenio Pérez VirtQueueElement *elem; 251100890f7SEugenio Pérez bool ok; 252100890f7SEugenio Pérez 253100890f7SEugenio Pérez if (svq->next_guest_avail_elem) { 254100890f7SEugenio Pérez elem = g_steal_pointer(&svq->next_guest_avail_elem); 255100890f7SEugenio Pérez } else { 256100890f7SEugenio Pérez elem = virtqueue_pop(svq->vq, sizeof(*elem)); 257100890f7SEugenio Pérez } 258100890f7SEugenio Pérez 259100890f7SEugenio Pérez if (!elem) { 260100890f7SEugenio Pérez break; 261100890f7SEugenio Pérez } 262100890f7SEugenio Pérez 263100890f7SEugenio Pérez if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { 264100890f7SEugenio Pérez /* 265100890f7SEugenio Pérez * This condition is possible since a contiguous buffer in GPA 266100890f7SEugenio Pérez * does not imply a contiguous buffer in qemu's VA 267100890f7SEugenio Pérez * scatter-gather segments. If that happens, the buffer exposed 268100890f7SEugenio Pérez * to the device needs to be a chain of descriptors at this 269100890f7SEugenio Pérez * moment. 270100890f7SEugenio Pérez * 271100890f7SEugenio Pérez * SVQ cannot hold more available buffers if we are here: 272100890f7SEugenio Pérez * queue the current guest descriptor and ignore further kicks 273100890f7SEugenio Pérez * until some elements are used. 274100890f7SEugenio Pérez */ 275100890f7SEugenio Pérez svq->next_guest_avail_elem = elem; 276100890f7SEugenio Pérez return; 277100890f7SEugenio Pérez } 278100890f7SEugenio Pérez 279100890f7SEugenio Pérez ok = vhost_svq_add(svq, elem); 280100890f7SEugenio Pérez if (unlikely(!ok)) { 281100890f7SEugenio Pérez /* VQ is broken, just return and ignore any other kicks */ 282100890f7SEugenio Pérez return; 283100890f7SEugenio Pérez } 284100890f7SEugenio Pérez vhost_svq_kick(svq); 285100890f7SEugenio Pérez } 286100890f7SEugenio Pérez 287100890f7SEugenio Pérez virtio_queue_set_notification(svq->vq, true); 288100890f7SEugenio Pérez } while (!virtio_queue_empty(svq->vq)); 289100890f7SEugenio Pérez } 290100890f7SEugenio Pérez 291100890f7SEugenio Pérez /** 292100890f7SEugenio Pérez * Handle guest's kick. 293100890f7SEugenio Pérez * 294100890f7SEugenio Pérez * @n: guest kick event notifier, the one that guest set to notify svq. 295100890f7SEugenio Pérez */ 296100890f7SEugenio Pérez static void vhost_handle_guest_kick_notifier(EventNotifier *n) 297100890f7SEugenio Pérez { 298100890f7SEugenio Pérez VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick); 299100890f7SEugenio Pérez event_notifier_test_and_clear(n); 300100890f7SEugenio Pérez vhost_handle_guest_kick(svq); 301100890f7SEugenio Pérez } 302100890f7SEugenio Pérez 303100890f7SEugenio Pérez static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) 304100890f7SEugenio Pérez { 305100890f7SEugenio Pérez if (svq->last_used_idx != svq->shadow_used_idx) { 306100890f7SEugenio Pérez return true; 307100890f7SEugenio Pérez } 308100890f7SEugenio Pérez 309100890f7SEugenio Pérez svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); 310100890f7SEugenio Pérez 311100890f7SEugenio Pérez return svq->last_used_idx != svq->shadow_used_idx; 312100890f7SEugenio Pérez } 313100890f7SEugenio Pérez 314100890f7SEugenio Pérez /** 315100890f7SEugenio Pérez * Enable vhost device calls after disable them. 316100890f7SEugenio Pérez * 317100890f7SEugenio Pérez * @svq: The svq 318100890f7SEugenio Pérez * 319100890f7SEugenio Pérez * It returns false if there are pending used buffers from the vhost device, 320100890f7SEugenio Pérez * avoiding the possible races between SVQ checking for more work and enabling 321100890f7SEugenio Pérez * callbacks. True if SVQ used vring has no more pending buffers. 322100890f7SEugenio Pérez */ 323100890f7SEugenio Pérez static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) 324100890f7SEugenio Pérez { 325100890f7SEugenio Pérez svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); 326100890f7SEugenio Pérez /* Make sure the flag is written before the read of used_idx */ 327100890f7SEugenio Pérez smp_mb(); 328100890f7SEugenio Pérez return !vhost_svq_more_used(svq); 329100890f7SEugenio Pérez } 330100890f7SEugenio Pérez 331100890f7SEugenio Pérez static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) 332100890f7SEugenio Pérez { 333100890f7SEugenio Pérez svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); 334100890f7SEugenio Pérez } 335100890f7SEugenio Pérez 336100890f7SEugenio Pérez static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, 337100890f7SEugenio Pérez uint32_t *len) 338100890f7SEugenio Pérez { 339100890f7SEugenio Pérez vring_desc_t *descs = svq->vring.desc; 340100890f7SEugenio Pérez const vring_used_t *used = svq->vring.used; 341100890f7SEugenio Pérez vring_used_elem_t used_elem; 342100890f7SEugenio Pérez uint16_t last_used; 343100890f7SEugenio Pérez 344100890f7SEugenio Pérez if (!vhost_svq_more_used(svq)) { 345100890f7SEugenio Pérez return NULL; 346100890f7SEugenio Pérez } 347100890f7SEugenio Pérez 348100890f7SEugenio Pérez /* Only get used array entries after they have been exposed by dev */ 349100890f7SEugenio Pérez smp_rmb(); 350100890f7SEugenio Pérez last_used = svq->last_used_idx & (svq->vring.num - 1); 351100890f7SEugenio Pérez used_elem.id = le32_to_cpu(used->ring[last_used].id); 352100890f7SEugenio Pérez used_elem.len = le32_to_cpu(used->ring[last_used].len); 353100890f7SEugenio Pérez 354100890f7SEugenio Pérez svq->last_used_idx++; 355100890f7SEugenio Pérez if (unlikely(used_elem.id >= svq->vring.num)) { 356100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used", 357100890f7SEugenio Pérez svq->vdev->name, used_elem.id); 358100890f7SEugenio Pérez return NULL; 359100890f7SEugenio Pérez } 360100890f7SEugenio Pérez 361100890f7SEugenio Pérez if (unlikely(!svq->ring_id_maps[used_elem.id])) { 362100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 363100890f7SEugenio Pérez "Device %s says index %u is used, but it was not available", 364100890f7SEugenio Pérez svq->vdev->name, used_elem.id); 365100890f7SEugenio Pérez return NULL; 366100890f7SEugenio Pérez } 367100890f7SEugenio Pérez 368100890f7SEugenio Pérez descs[used_elem.id].next = svq->free_head; 369100890f7SEugenio Pérez svq->free_head = used_elem.id; 370100890f7SEugenio Pérez 371100890f7SEugenio Pérez *len = used_elem.len; 372100890f7SEugenio Pérez return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); 373100890f7SEugenio Pérez } 374100890f7SEugenio Pérez 375100890f7SEugenio Pérez static void vhost_svq_flush(VhostShadowVirtqueue *svq, 376100890f7SEugenio Pérez bool check_for_avail_queue) 377100890f7SEugenio Pérez { 378100890f7SEugenio Pérez VirtQueue *vq = svq->vq; 379100890f7SEugenio Pérez 380100890f7SEugenio Pérez /* Forward as many used buffers as possible. */ 381100890f7SEugenio Pérez do { 382100890f7SEugenio Pérez unsigned i = 0; 383100890f7SEugenio Pérez 384100890f7SEugenio Pérez vhost_svq_disable_notification(svq); 385100890f7SEugenio Pérez while (true) { 386100890f7SEugenio Pérez uint32_t len; 387100890f7SEugenio Pérez g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); 388100890f7SEugenio Pérez if (!elem) { 389100890f7SEugenio Pérez break; 390100890f7SEugenio Pérez } 391100890f7SEugenio Pérez 392100890f7SEugenio Pérez if (unlikely(i >= svq->vring.num)) { 393100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 394100890f7SEugenio Pérez "More than %u used buffers obtained in a %u size SVQ", 395100890f7SEugenio Pérez i, svq->vring.num); 396100890f7SEugenio Pérez virtqueue_fill(vq, elem, len, i); 397100890f7SEugenio Pérez virtqueue_flush(vq, i); 398100890f7SEugenio Pérez return; 399100890f7SEugenio Pérez } 400100890f7SEugenio Pérez virtqueue_fill(vq, elem, len, i++); 401100890f7SEugenio Pérez } 402100890f7SEugenio Pérez 403100890f7SEugenio Pérez virtqueue_flush(vq, i); 404100890f7SEugenio Pérez event_notifier_set(&svq->svq_call); 405100890f7SEugenio Pérez 406100890f7SEugenio Pérez if (check_for_avail_queue && svq->next_guest_avail_elem) { 407100890f7SEugenio Pérez /* 408100890f7SEugenio Pérez * Avail ring was full when vhost_svq_flush was called, so it's a 409100890f7SEugenio Pérez * good moment to make more descriptors available if possible. 410100890f7SEugenio Pérez */ 411100890f7SEugenio Pérez vhost_handle_guest_kick(svq); 412100890f7SEugenio Pérez } 413100890f7SEugenio Pérez } while (!vhost_svq_enable_notification(svq)); 414100890f7SEugenio Pérez } 415100890f7SEugenio Pérez 416100890f7SEugenio Pérez /** 417100890f7SEugenio Pérez * Forward used buffers. 418a8ac8858SEugenio Pérez * 419a8ac8858SEugenio Pérez * @n: hdev call event notifier, the one that device set to notify svq. 420100890f7SEugenio Pérez * 421100890f7SEugenio Pérez * Note that we are not making any buffers available in the loop, there is no 422100890f7SEugenio Pérez * way that it runs more than virtqueue size times. 423a8ac8858SEugenio Pérez */ 424a8ac8858SEugenio Pérez static void vhost_svq_handle_call(EventNotifier *n) 425a8ac8858SEugenio Pérez { 426a8ac8858SEugenio Pérez VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, 427a8ac8858SEugenio Pérez hdev_call); 428a8ac8858SEugenio Pérez event_notifier_test_and_clear(n); 429100890f7SEugenio Pérez vhost_svq_flush(svq, true); 430a8ac8858SEugenio Pérez } 431a8ac8858SEugenio Pérez 432a8ac8858SEugenio Pérez /** 433a8ac8858SEugenio Pérez * Set the call notifier for the SVQ to call the guest 434a8ac8858SEugenio Pérez * 435a8ac8858SEugenio Pérez * @svq: Shadow virtqueue 436a8ac8858SEugenio Pérez * @call_fd: call notifier 437a8ac8858SEugenio Pérez * 438a8ac8858SEugenio Pérez * Called on BQL context. 439a8ac8858SEugenio Pérez */ 440a8ac8858SEugenio Pérez void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) 441a8ac8858SEugenio Pérez { 442a8ac8858SEugenio Pérez if (call_fd == VHOST_FILE_UNBIND) { 443a8ac8858SEugenio Pérez /* 444a8ac8858SEugenio Pérez * Fail event_notifier_set if called handling device call. 445a8ac8858SEugenio Pérez * 446a8ac8858SEugenio Pérez * SVQ still needs device notifications, since it needs to keep 447a8ac8858SEugenio Pérez * forwarding used buffers even with the unbind. 448a8ac8858SEugenio Pérez */ 449a8ac8858SEugenio Pérez memset(&svq->svq_call, 0, sizeof(svq->svq_call)); 450a8ac8858SEugenio Pérez } else { 451a8ac8858SEugenio Pérez event_notifier_init_fd(&svq->svq_call, call_fd); 452a8ac8858SEugenio Pérez } 453a8ac8858SEugenio Pérez } 454a8ac8858SEugenio Pérez 455a8ac8858SEugenio Pérez /** 456dafb34c9SEugenio Pérez * Get the shadow vq vring address. 457dafb34c9SEugenio Pérez * @svq: Shadow virtqueue 458dafb34c9SEugenio Pérez * @addr: Destination to store address 459dafb34c9SEugenio Pérez */ 460dafb34c9SEugenio Pérez void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, 461dafb34c9SEugenio Pérez struct vhost_vring_addr *addr) 462dafb34c9SEugenio Pérez { 463*34e3c94eSEugenio Pérez addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc; 464*34e3c94eSEugenio Pérez addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail; 465*34e3c94eSEugenio Pérez addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used; 466dafb34c9SEugenio Pérez } 467dafb34c9SEugenio Pérez 468dafb34c9SEugenio Pérez size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) 469dafb34c9SEugenio Pérez { 470dafb34c9SEugenio Pérez size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; 471dafb34c9SEugenio Pérez size_t avail_size = offsetof(vring_avail_t, ring) + 472dafb34c9SEugenio Pérez sizeof(uint16_t) * svq->vring.num; 473dafb34c9SEugenio Pérez 474dafb34c9SEugenio Pérez return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size); 475dafb34c9SEugenio Pérez } 476dafb34c9SEugenio Pérez 477dafb34c9SEugenio Pérez size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) 478dafb34c9SEugenio Pérez { 479dafb34c9SEugenio Pérez size_t used_size = offsetof(vring_used_t, ring) + 480dafb34c9SEugenio Pérez sizeof(vring_used_elem_t) * svq->vring.num; 481dafb34c9SEugenio Pérez return ROUND_UP(used_size, qemu_real_host_page_size); 482dafb34c9SEugenio Pérez } 483dafb34c9SEugenio Pérez 484dafb34c9SEugenio Pérez /** 485dff4426fSEugenio Pérez * Set a new file descriptor for the guest to kick the SVQ and notify for avail 486dff4426fSEugenio Pérez * 487dff4426fSEugenio Pérez * @svq: The svq 488dff4426fSEugenio Pérez * @svq_kick_fd: The svq kick fd 489dff4426fSEugenio Pérez * 490dff4426fSEugenio Pérez * Note that the SVQ will never close the old file descriptor. 491dff4426fSEugenio Pérez */ 492dff4426fSEugenio Pérez void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) 493dff4426fSEugenio Pérez { 494dff4426fSEugenio Pérez EventNotifier *svq_kick = &svq->svq_kick; 495dff4426fSEugenio Pérez bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick); 496dff4426fSEugenio Pérez bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND; 497dff4426fSEugenio Pérez 498dff4426fSEugenio Pérez if (poll_stop) { 499dff4426fSEugenio Pérez event_notifier_set_handler(svq_kick, NULL); 500dff4426fSEugenio Pérez } 501dff4426fSEugenio Pérez 502dff4426fSEugenio Pérez /* 503dff4426fSEugenio Pérez * event_notifier_set_handler already checks for guest's notifications if 504dff4426fSEugenio Pérez * they arrive at the new file descriptor in the switch, so there is no 505dff4426fSEugenio Pérez * need to explicitly check for them. 506dff4426fSEugenio Pérez */ 507dff4426fSEugenio Pérez if (poll_start) { 508dff4426fSEugenio Pérez event_notifier_init_fd(svq_kick, svq_kick_fd); 509dff4426fSEugenio Pérez event_notifier_set(svq_kick); 510100890f7SEugenio Pérez event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); 511100890f7SEugenio Pérez } 512100890f7SEugenio Pérez } 513100890f7SEugenio Pérez 514100890f7SEugenio Pérez /** 515100890f7SEugenio Pérez * Start the shadow virtqueue operation. 516100890f7SEugenio Pérez * 517100890f7SEugenio Pérez * @svq: Shadow Virtqueue 518100890f7SEugenio Pérez * @vdev: VirtIO device 519100890f7SEugenio Pérez * @vq: Virtqueue to shadow 520100890f7SEugenio Pérez */ 521100890f7SEugenio Pérez void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, 522100890f7SEugenio Pérez VirtQueue *vq) 523100890f7SEugenio Pérez { 524100890f7SEugenio Pérez size_t desc_size, driver_size, device_size; 525100890f7SEugenio Pérez 526100890f7SEugenio Pérez svq->next_guest_avail_elem = NULL; 527100890f7SEugenio Pérez svq->shadow_avail_idx = 0; 528100890f7SEugenio Pérez svq->shadow_used_idx = 0; 529100890f7SEugenio Pérez svq->last_used_idx = 0; 530100890f7SEugenio Pérez svq->vdev = vdev; 531100890f7SEugenio Pérez svq->vq = vq; 532100890f7SEugenio Pérez 533100890f7SEugenio Pérez svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); 534100890f7SEugenio Pérez driver_size = vhost_svq_driver_area_size(svq); 535100890f7SEugenio Pérez device_size = vhost_svq_device_area_size(svq); 536100890f7SEugenio Pérez svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size); 537100890f7SEugenio Pérez desc_size = sizeof(vring_desc_t) * svq->vring.num; 538100890f7SEugenio Pérez svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); 539100890f7SEugenio Pérez memset(svq->vring.desc, 0, driver_size); 540100890f7SEugenio Pérez svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); 541100890f7SEugenio Pérez memset(svq->vring.used, 0, device_size); 542100890f7SEugenio Pérez svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); 543100890f7SEugenio Pérez for (unsigned i = 0; i < svq->vring.num - 1; i++) { 544100890f7SEugenio Pérez svq->vring.desc[i].next = cpu_to_le16(i + 1); 545dff4426fSEugenio Pérez } 546dff4426fSEugenio Pérez } 547dff4426fSEugenio Pérez 548dff4426fSEugenio Pérez /** 549dff4426fSEugenio Pérez * Stop the shadow virtqueue operation. 550dff4426fSEugenio Pérez * @svq: Shadow Virtqueue 551dff4426fSEugenio Pérez */ 552dff4426fSEugenio Pérez void vhost_svq_stop(VhostShadowVirtqueue *svq) 553dff4426fSEugenio Pérez { 554dff4426fSEugenio Pérez event_notifier_set_handler(&svq->svq_kick, NULL); 555100890f7SEugenio Pérez g_autofree VirtQueueElement *next_avail_elem = NULL; 556100890f7SEugenio Pérez 557100890f7SEugenio Pérez if (!svq->vq) { 558100890f7SEugenio Pérez return; 559100890f7SEugenio Pérez } 560100890f7SEugenio Pérez 561100890f7SEugenio Pérez /* Send all pending used descriptors to guest */ 562100890f7SEugenio Pérez vhost_svq_flush(svq, false); 563100890f7SEugenio Pérez 564100890f7SEugenio Pérez for (unsigned i = 0; i < svq->vring.num; ++i) { 565100890f7SEugenio Pérez g_autofree VirtQueueElement *elem = NULL; 566100890f7SEugenio Pérez elem = g_steal_pointer(&svq->ring_id_maps[i]); 567100890f7SEugenio Pérez if (elem) { 568100890f7SEugenio Pérez virtqueue_detach_element(svq->vq, elem, 0); 569100890f7SEugenio Pérez } 570100890f7SEugenio Pérez } 571100890f7SEugenio Pérez 572100890f7SEugenio Pérez next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem); 573100890f7SEugenio Pérez if (next_avail_elem) { 574100890f7SEugenio Pérez virtqueue_detach_element(svq->vq, next_avail_elem, 0); 575100890f7SEugenio Pérez } 576100890f7SEugenio Pérez svq->vq = NULL; 577100890f7SEugenio Pérez g_free(svq->ring_id_maps); 578100890f7SEugenio Pérez qemu_vfree(svq->vring.desc); 579100890f7SEugenio Pérez qemu_vfree(svq->vring.used); 580dff4426fSEugenio Pérez } 58110857ec0SEugenio Pérez 58210857ec0SEugenio Pérez /** 58310857ec0SEugenio Pérez * Creates vhost shadow virtqueue, and instructs the vhost device to use the 58410857ec0SEugenio Pérez * shadow methods and file descriptors. 58510857ec0SEugenio Pérez * 586*34e3c94eSEugenio Pérez * @iova_tree: Tree to perform descriptors translations 587*34e3c94eSEugenio Pérez * 58810857ec0SEugenio Pérez * Returns the new virtqueue or NULL. 58910857ec0SEugenio Pérez * 59010857ec0SEugenio Pérez * In case of error, reason is reported through error_report. 59110857ec0SEugenio Pérez */ 592*34e3c94eSEugenio Pérez VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) 59310857ec0SEugenio Pérez { 59410857ec0SEugenio Pérez g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); 59510857ec0SEugenio Pérez int r; 59610857ec0SEugenio Pérez 59710857ec0SEugenio Pérez r = event_notifier_init(&svq->hdev_kick, 0); 59810857ec0SEugenio Pérez if (r != 0) { 59910857ec0SEugenio Pérez error_report("Couldn't create kick event notifier: %s (%d)", 60010857ec0SEugenio Pérez g_strerror(errno), errno); 60110857ec0SEugenio Pérez goto err_init_hdev_kick; 60210857ec0SEugenio Pérez } 60310857ec0SEugenio Pérez 60410857ec0SEugenio Pérez r = event_notifier_init(&svq->hdev_call, 0); 60510857ec0SEugenio Pérez if (r != 0) { 60610857ec0SEugenio Pérez error_report("Couldn't create call event notifier: %s (%d)", 60710857ec0SEugenio Pérez g_strerror(errno), errno); 60810857ec0SEugenio Pérez goto err_init_hdev_call; 60910857ec0SEugenio Pérez } 61010857ec0SEugenio Pérez 611dff4426fSEugenio Pérez event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); 612a8ac8858SEugenio Pérez event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); 613*34e3c94eSEugenio Pérez svq->iova_tree = iova_tree; 61410857ec0SEugenio Pérez return g_steal_pointer(&svq); 61510857ec0SEugenio Pérez 61610857ec0SEugenio Pérez err_init_hdev_call: 61710857ec0SEugenio Pérez event_notifier_cleanup(&svq->hdev_kick); 61810857ec0SEugenio Pérez 61910857ec0SEugenio Pérez err_init_hdev_kick: 62010857ec0SEugenio Pérez return NULL; 62110857ec0SEugenio Pérez } 62210857ec0SEugenio Pérez 62310857ec0SEugenio Pérez /** 62410857ec0SEugenio Pérez * Free the resources of the shadow virtqueue. 62510857ec0SEugenio Pérez * 62610857ec0SEugenio Pérez * @pvq: gpointer to SVQ so it can be used by autofree functions. 62710857ec0SEugenio Pérez */ 62810857ec0SEugenio Pérez void vhost_svq_free(gpointer pvq) 62910857ec0SEugenio Pérez { 63010857ec0SEugenio Pérez VhostShadowVirtqueue *vq = pvq; 631dff4426fSEugenio Pérez vhost_svq_stop(vq); 63210857ec0SEugenio Pérez event_notifier_cleanup(&vq->hdev_kick); 633a8ac8858SEugenio Pérez event_notifier_set_handler(&vq->hdev_call, NULL); 63410857ec0SEugenio Pérez event_notifier_cleanup(&vq->hdev_call); 63510857ec0SEugenio Pérez g_free(vq); 63610857ec0SEugenio Pérez } 637