xref: /qemu/hw/virtio/vhost-vdpa.c (revision 34e3c94edaef2e0748ebb7bc6bb83a970345a6ad)
1 /*
2  * vhost-vdpa
3  *
4  *  Copyright(c) 2017-2018 Intel Corporation.
5  *  Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-shadow-virtqueue.h"
21 #include "hw/virtio/vhost-vdpa.h"
22 #include "exec/address-spaces.h"
23 #include "qemu/main-loop.h"
24 #include "cpu.h"
25 #include "trace.h"
26 #include "qemu-common.h"
27 #include "qapi/error.h"
28 
29 /*
30  * Return one past the end of the end of section. Be careful with uint64_t
31  * conversions!
32  */
33 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section)
34 {
35     Int128 llend = int128_make64(section->offset_within_address_space);
36     llend = int128_add(llend, section->size);
37     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
38 
39     return llend;
40 }
41 
42 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
43                                                 uint64_t iova_min,
44                                                 uint64_t iova_max)
45 {
46     Int128 llend;
47 
48     if ((!memory_region_is_ram(section->mr) &&
49          !memory_region_is_iommu(section->mr)) ||
50         memory_region_is_protected(section->mr) ||
51         /* vhost-vDPA doesn't allow MMIO to be mapped  */
52         memory_region_is_ram_device(section->mr)) {
53         return true;
54     }
55 
56     if (section->offset_within_address_space < iova_min) {
57         error_report("RAM section out of device range (min=0x%" PRIx64
58                      ", addr=0x%" HWADDR_PRIx ")",
59                      iova_min, section->offset_within_address_space);
60         return true;
61     }
62 
63     llend = vhost_vdpa_section_end(section);
64     if (int128_gt(llend, int128_make64(iova_max))) {
65         error_report("RAM section out of device range (max=0x%" PRIx64
66                      ", end addr=0x%" PRIx64 ")",
67                      iova_max, int128_get64(llend));
68         return true;
69     }
70 
71     return false;
72 }
73 
74 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
75                               void *vaddr, bool readonly)
76 {
77     struct vhost_msg_v2 msg = {};
78     int fd = v->device_fd;
79     int ret = 0;
80 
81     msg.type = v->msg_type;
82     msg.iotlb.iova = iova;
83     msg.iotlb.size = size;
84     msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
85     msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
86     msg.iotlb.type = VHOST_IOTLB_UPDATE;
87 
88    trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size,
89                             msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type);
90 
91     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
92         error_report("failed to write, fd=%d, errno=%d (%s)",
93             fd, errno, strerror(errno));
94         return -EIO ;
95     }
96 
97     return ret;
98 }
99 
100 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
101                                 hwaddr size)
102 {
103     struct vhost_msg_v2 msg = {};
104     int fd = v->device_fd;
105     int ret = 0;
106 
107     msg.type = v->msg_type;
108     msg.iotlb.iova = iova;
109     msg.iotlb.size = size;
110     msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
111 
112     trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova,
113                                msg.iotlb.size, msg.iotlb.type);
114 
115     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
116         error_report("failed to write, fd=%d, errno=%d (%s)",
117             fd, errno, strerror(errno));
118         return -EIO ;
119     }
120 
121     return ret;
122 }
123 
124 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
125 {
126     int fd = v->device_fd;
127     struct vhost_msg_v2 msg = {
128         .type = v->msg_type,
129         .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
130     };
131 
132     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
133         error_report("failed to write, fd=%d, errno=%d (%s)",
134                      fd, errno, strerror(errno));
135     }
136 }
137 
138 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v)
139 {
140     if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) &&
141         !v->iotlb_batch_begin_sent) {
142         vhost_vdpa_listener_begin_batch(v);
143     }
144 
145     v->iotlb_batch_begin_sent = true;
146 }
147 
148 static void vhost_vdpa_listener_commit(MemoryListener *listener)
149 {
150     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
151     struct vhost_dev *dev = v->dev;
152     struct vhost_msg_v2 msg = {};
153     int fd = v->device_fd;
154 
155     if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
156         return;
157     }
158 
159     if (!v->iotlb_batch_begin_sent) {
160         return;
161     }
162 
163     msg.type = v->msg_type;
164     msg.iotlb.type = VHOST_IOTLB_BATCH_END;
165 
166     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
167         error_report("failed to write, fd=%d, errno=%d (%s)",
168                      fd, errno, strerror(errno));
169     }
170 
171     v->iotlb_batch_begin_sent = false;
172 }
173 
174 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
175                                            MemoryRegionSection *section)
176 {
177     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
178     hwaddr iova;
179     Int128 llend, llsize;
180     void *vaddr;
181     int ret;
182 
183     if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
184                                             v->iova_range.last)) {
185         return;
186     }
187 
188     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
189                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
190         error_report("%s received unaligned region", __func__);
191         return;
192     }
193 
194     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
195     llend = vhost_vdpa_section_end(section);
196     if (int128_ge(int128_make64(iova), llend)) {
197         return;
198     }
199 
200     memory_region_ref(section->mr);
201 
202     /* Here we assume that memory_region_is_ram(section->mr)==true */
203 
204     vaddr = memory_region_get_ram_ptr(section->mr) +
205             section->offset_within_region +
206             (iova - section->offset_within_address_space);
207 
208     trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend),
209                                          vaddr, section->readonly);
210 
211     llsize = int128_sub(llend, int128_make64(iova));
212     if (v->shadow_vqs_enabled) {
213         DMAMap mem_region = {
214             .translated_addr = (hwaddr)(uintptr_t)vaddr,
215             .size = int128_get64(llsize) - 1,
216             .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
217         };
218 
219         int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
220         if (unlikely(r != IOVA_OK)) {
221             error_report("Can't allocate a mapping (%d)", r);
222             goto fail;
223         }
224 
225         iova = mem_region.iova;
226     }
227 
228     vhost_vdpa_iotlb_batch_begin_once(v);
229     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
230                              vaddr, section->readonly);
231     if (ret) {
232         error_report("vhost vdpa map fail!");
233         goto fail;
234     }
235 
236     return;
237 
238 fail:
239     /*
240      * On the initfn path, store the first error in the container so we
241      * can gracefully fail.  Runtime, there's not much we can do other
242      * than throw a hardware error.
243      */
244     error_report("vhost-vdpa: DMA mapping failed, unable to continue");
245     return;
246 
247 }
248 
249 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
250                                            MemoryRegionSection *section)
251 {
252     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
253     hwaddr iova;
254     Int128 llend, llsize;
255     int ret;
256 
257     if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
258                                             v->iova_range.last)) {
259         return;
260     }
261 
262     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
263                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
264         error_report("%s received unaligned region", __func__);
265         return;
266     }
267 
268     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
269     llend = vhost_vdpa_section_end(section);
270 
271     trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
272 
273     if (int128_ge(int128_make64(iova), llend)) {
274         return;
275     }
276 
277     llsize = int128_sub(llend, int128_make64(iova));
278 
279     if (v->shadow_vqs_enabled) {
280         const DMAMap *result;
281         const void *vaddr = memory_region_get_ram_ptr(section->mr) +
282             section->offset_within_region +
283             (iova - section->offset_within_address_space);
284         DMAMap mem_region = {
285             .translated_addr = (hwaddr)(uintptr_t)vaddr,
286             .size = int128_get64(llsize) - 1,
287         };
288 
289         result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
290         iova = result->iova;
291         vhost_iova_tree_remove(v->iova_tree, &mem_region);
292     }
293     vhost_vdpa_iotlb_batch_begin_once(v);
294     ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
295     if (ret) {
296         error_report("vhost_vdpa dma unmap error!");
297     }
298 
299     memory_region_unref(section->mr);
300 }
301 /*
302  * IOTLB API is used by vhost-vpda which requires incremental updating
303  * of the mapping. So we can not use generic vhost memory listener which
304  * depends on the addnop().
305  */
306 static const MemoryListener vhost_vdpa_memory_listener = {
307     .name = "vhost-vdpa",
308     .commit = vhost_vdpa_listener_commit,
309     .region_add = vhost_vdpa_listener_region_add,
310     .region_del = vhost_vdpa_listener_region_del,
311 };
312 
313 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
314                              void *arg)
315 {
316     struct vhost_vdpa *v = dev->opaque;
317     int fd = v->device_fd;
318     int ret;
319 
320     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
321 
322     ret = ioctl(fd, request, arg);
323     return ret < 0 ? -errno : ret;
324 }
325 
326 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
327 {
328     uint8_t s;
329     int ret;
330 
331     trace_vhost_vdpa_add_status(dev, status);
332     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
333     if (ret < 0) {
334         return ret;
335     }
336 
337     s |= status;
338 
339     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
340     if (ret < 0) {
341         return ret;
342     }
343 
344     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
345     if (ret < 0) {
346         return ret;
347     }
348 
349     if (!(s & status)) {
350         return -EIO;
351     }
352 
353     return 0;
354 }
355 
356 static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
357 {
358     int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE,
359                               &v->iova_range);
360     if (ret != 0) {
361         v->iova_range.first = 0;
362         v->iova_range.last = UINT64_MAX;
363     }
364 
365     trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first,
366                                     v->iova_range.last);
367 }
368 
369 static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
370 {
371     struct vhost_vdpa *v = dev->opaque;
372 
373     return v->index != 0;
374 }
375 
376 static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
377                                Error **errp)
378 {
379     g_autoptr(GPtrArray) shadow_vqs = NULL;
380     uint64_t dev_features, svq_features;
381     int r;
382     bool ok;
383 
384     if (!v->shadow_vqs_enabled) {
385         return 0;
386     }
387 
388     r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
389     if (r != 0) {
390         error_setg_errno(errp, -r, "Can't get vdpa device features");
391         return r;
392     }
393 
394     svq_features = dev_features;
395     ok = vhost_svq_valid_features(svq_features, errp);
396     if (unlikely(!ok)) {
397         return -1;
398     }
399 
400     shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
401     for (unsigned n = 0; n < hdev->nvqs; ++n) {
402         g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
403 
404         if (unlikely(!svq)) {
405             error_setg(errp, "Cannot create svq %u", n);
406             return -1;
407         }
408         g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
409     }
410 
411     v->shadow_vqs = g_steal_pointer(&shadow_vqs);
412     return 0;
413 }
414 
415 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
416 {
417     struct vhost_vdpa *v;
418     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
419     trace_vhost_vdpa_init(dev, opaque);
420     int ret;
421 
422     /*
423      * Similar to VFIO, we end up pinning all guest memory and have to
424      * disable discarding of RAM.
425      */
426     ret = ram_block_discard_disable(true);
427     if (ret) {
428         error_report("Cannot set discarding of RAM broken");
429         return ret;
430     }
431 
432     v = opaque;
433     v->dev = dev;
434     dev->opaque =  opaque ;
435     v->listener = vhost_vdpa_memory_listener;
436     v->msg_type = VHOST_IOTLB_MSG_V2;
437     ret = vhost_vdpa_init_svq(dev, v, errp);
438     if (ret) {
439         goto err;
440     }
441 
442     vhost_vdpa_get_iova_range(v);
443 
444     if (vhost_vdpa_one_time_request(dev)) {
445         return 0;
446     }
447 
448     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
449                                VIRTIO_CONFIG_S_DRIVER);
450 
451     return 0;
452 
453 err:
454     ram_block_discard_disable(false);
455     return ret;
456 }
457 
458 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
459                                             int queue_index)
460 {
461     size_t page_size = qemu_real_host_page_size;
462     struct vhost_vdpa *v = dev->opaque;
463     VirtIODevice *vdev = dev->vdev;
464     VhostVDPAHostNotifier *n;
465 
466     n = &v->notifier[queue_index];
467 
468     if (n->addr) {
469         virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false);
470         object_unparent(OBJECT(&n->mr));
471         munmap(n->addr, page_size);
472         n->addr = NULL;
473     }
474 }
475 
476 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
477 {
478     size_t page_size = qemu_real_host_page_size;
479     struct vhost_vdpa *v = dev->opaque;
480     VirtIODevice *vdev = dev->vdev;
481     VhostVDPAHostNotifier *n;
482     int fd = v->device_fd;
483     void *addr;
484     char *name;
485 
486     vhost_vdpa_host_notifier_uninit(dev, queue_index);
487 
488     n = &v->notifier[queue_index];
489 
490     addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
491                 queue_index * page_size);
492     if (addr == MAP_FAILED) {
493         goto err;
494     }
495 
496     name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
497                            v, queue_index);
498     memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
499                                       page_size, addr);
500     g_free(name);
501 
502     if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) {
503         object_unparent(OBJECT(&n->mr));
504         munmap(addr, page_size);
505         goto err;
506     }
507     n->addr = addr;
508 
509     return 0;
510 
511 err:
512     return -1;
513 }
514 
515 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
516 {
517     int i;
518 
519     for (i = dev->vq_index; i < dev->vq_index + n; i++) {
520         vhost_vdpa_host_notifier_uninit(dev, i);
521     }
522 }
523 
524 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
525 {
526     struct vhost_vdpa *v = dev->opaque;
527     int i;
528 
529     if (v->shadow_vqs_enabled) {
530         /* FIXME SVQ is not compatible with host notifiers mr */
531         return;
532     }
533 
534     for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
535         if (vhost_vdpa_host_notifier_init(dev, i)) {
536             goto err;
537         }
538     }
539 
540     return;
541 
542 err:
543     vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index);
544     return;
545 }
546 
547 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
548 {
549     struct vhost_vdpa *v = dev->opaque;
550     size_t idx;
551 
552     if (!v->shadow_vqs) {
553         return;
554     }
555 
556     for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
557         vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
558     }
559     g_ptr_array_free(v->shadow_vqs, true);
560 }
561 
562 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
563 {
564     struct vhost_vdpa *v;
565     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
566     v = dev->opaque;
567     trace_vhost_vdpa_cleanup(dev, v);
568     vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
569     memory_listener_unregister(&v->listener);
570     vhost_vdpa_svq_cleanup(dev);
571 
572     dev->opaque = NULL;
573     ram_block_discard_disable(false);
574 
575     return 0;
576 }
577 
578 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
579 {
580     trace_vhost_vdpa_memslots_limit(dev, INT_MAX);
581     return INT_MAX;
582 }
583 
584 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
585                                     struct vhost_memory *mem)
586 {
587     if (vhost_vdpa_one_time_request(dev)) {
588         return 0;
589     }
590 
591     trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding);
592     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) &&
593         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) {
594         int i;
595         for (i = 0; i < mem->nregions; i++) {
596             trace_vhost_vdpa_dump_regions(dev, i,
597                                           mem->regions[i].guest_phys_addr,
598                                           mem->regions[i].memory_size,
599                                           mem->regions[i].userspace_addr,
600                                           mem->regions[i].flags_padding);
601         }
602     }
603     if (mem->padding) {
604         return -EINVAL;
605     }
606 
607     return 0;
608 }
609 
610 static int vhost_vdpa_set_features(struct vhost_dev *dev,
611                                    uint64_t features)
612 {
613     int ret;
614 
615     if (vhost_vdpa_one_time_request(dev)) {
616         return 0;
617     }
618 
619     trace_vhost_vdpa_set_features(dev, features);
620     ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
621     if (ret) {
622         return ret;
623     }
624 
625     return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
626 }
627 
628 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
629 {
630     uint64_t features;
631     uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
632         0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
633     int r;
634 
635     if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
636         return -EFAULT;
637     }
638 
639     features &= f;
640 
641     if (vhost_vdpa_one_time_request(dev)) {
642         r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
643         if (r) {
644             return -EFAULT;
645         }
646     }
647 
648     dev->backend_cap = features;
649 
650     return 0;
651 }
652 
653 static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
654                                     uint32_t *device_id)
655 {
656     int ret;
657     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
658     trace_vhost_vdpa_get_device_id(dev, *device_id);
659     return ret;
660 }
661 
662 static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
663 {
664     if (!v->shadow_vqs_enabled) {
665         return;
666     }
667 
668     for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
669         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
670         vhost_svq_stop(svq);
671     }
672 }
673 
674 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
675 {
676     struct vhost_vdpa *v = dev->opaque;
677     int ret;
678     uint8_t status = 0;
679 
680     vhost_vdpa_reset_svq(v);
681 
682     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
683     trace_vhost_vdpa_reset_device(dev, status);
684     return ret;
685 }
686 
687 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
688 {
689     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
690 
691     trace_vhost_vdpa_get_vq_index(dev, idx, idx);
692     return idx;
693 }
694 
695 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
696 {
697     int i;
698     trace_vhost_vdpa_set_vring_ready(dev);
699     for (i = 0; i < dev->nvqs; ++i) {
700         struct vhost_vring_state state = {
701             .index = dev->vq_index + i,
702             .num = 1,
703         };
704         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
705     }
706     return 0;
707 }
708 
709 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
710                                    uint32_t config_len)
711 {
712     int b, len;
713     char line[QEMU_HEXDUMP_LINE_LEN];
714 
715     for (b = 0; b < config_len; b += 16) {
716         len = config_len - b;
717         qemu_hexdump_line(line, b, config, len, false);
718         trace_vhost_vdpa_dump_config(dev, line);
719     }
720 }
721 
722 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
723                                    uint32_t offset, uint32_t size,
724                                    uint32_t flags)
725 {
726     struct vhost_vdpa_config *config;
727     int ret;
728     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
729 
730     trace_vhost_vdpa_set_config(dev, offset, size, flags);
731     config = g_malloc(size + config_size);
732     config->off = offset;
733     config->len = size;
734     memcpy(config->buf, data, size);
735     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) &&
736         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
737         vhost_vdpa_dump_config(dev, data, size);
738     }
739     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
740     g_free(config);
741     return ret;
742 }
743 
744 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
745                                    uint32_t config_len, Error **errp)
746 {
747     struct vhost_vdpa_config *v_config;
748     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
749     int ret;
750 
751     trace_vhost_vdpa_get_config(dev, config, config_len);
752     v_config = g_malloc(config_len + config_size);
753     v_config->len = config_len;
754     v_config->off = 0;
755     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
756     memcpy(config, v_config->buf, config_len);
757     g_free(v_config);
758     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
759         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
760         vhost_vdpa_dump_config(dev, config, config_len);
761     }
762     return ret;
763  }
764 
765 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
766                                          struct vhost_vring_state *ring)
767 {
768     trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
769     return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
770 }
771 
772 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
773                                          struct vhost_vring_file *file)
774 {
775     trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
776     return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
777 }
778 
779 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
780                                          struct vhost_vring_file *file)
781 {
782     trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
783     return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
784 }
785 
786 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
787                                          struct vhost_vring_addr *addr)
788 {
789     trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
790                                 addr->desc_user_addr, addr->used_user_addr,
791                                 addr->avail_user_addr,
792                                 addr->log_guest_addr);
793 
794     return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
795 
796 }
797 
798 /**
799  * Set the shadow virtqueue descriptors to the device
800  *
801  * @dev: The vhost device model
802  * @svq: The shadow virtqueue
803  * @idx: The index of the virtqueue in the vhost device
804  * @errp: Error
805  *
806  * Note that this function does not rewind kick file descriptor if cannot set
807  * call one.
808  */
809 static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
810                                   VhostShadowVirtqueue *svq, unsigned idx,
811                                   Error **errp)
812 {
813     struct vhost_vring_file file = {
814         .index = dev->vq_index + idx,
815     };
816     const EventNotifier *event_notifier = &svq->hdev_kick;
817     int r;
818 
819     file.fd = event_notifier_get_fd(event_notifier);
820     r = vhost_vdpa_set_vring_dev_kick(dev, &file);
821     if (unlikely(r != 0)) {
822         error_setg_errno(errp, -r, "Can't set device kick fd");
823         return r;
824     }
825 
826     event_notifier = &svq->hdev_call;
827     file.fd = event_notifier_get_fd(event_notifier);
828     r = vhost_vdpa_set_vring_dev_call(dev, &file);
829     if (unlikely(r != 0)) {
830         error_setg_errno(errp, -r, "Can't set device call fd");
831     }
832 
833     return r;
834 }
835 
836 /**
837  * Unmap a SVQ area in the device
838  */
839 static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
840                                       const DMAMap *needle)
841 {
842     const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
843     hwaddr size;
844     int r;
845 
846     if (unlikely(!result)) {
847         error_report("Unable to find SVQ address to unmap");
848         return false;
849     }
850 
851     size = ROUND_UP(result->size, qemu_real_host_page_size);
852     r = vhost_vdpa_dma_unmap(v, result->iova, size);
853     return r == 0;
854 }
855 
856 static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
857                                        const VhostShadowVirtqueue *svq)
858 {
859     DMAMap needle = {};
860     struct vhost_vdpa *v = dev->opaque;
861     struct vhost_vring_addr svq_addr;
862     bool ok;
863 
864     vhost_svq_get_vring_addr(svq, &svq_addr);
865 
866     needle.translated_addr = svq_addr.desc_user_addr;
867     ok = vhost_vdpa_svq_unmap_ring(v, &needle);
868     if (unlikely(!ok)) {
869         return false;
870     }
871 
872     needle.translated_addr = svq_addr.used_user_addr;
873     return vhost_vdpa_svq_unmap_ring(v, &needle);
874 }
875 
876 /**
877  * Map the SVQ area in the device
878  *
879  * @v: Vhost-vdpa device
880  * @needle: The area to search iova
881  * @errorp: Error pointer
882  */
883 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
884                                     Error **errp)
885 {
886     int r;
887 
888     r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
889     if (unlikely(r != IOVA_OK)) {
890         error_setg(errp, "Cannot allocate iova (%d)", r);
891         return false;
892     }
893 
894     r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1,
895                            (void *)(uintptr_t)needle->translated_addr,
896                            needle->perm == IOMMU_RO);
897     if (unlikely(r != 0)) {
898         error_setg_errno(errp, -r, "Cannot map region to device");
899         vhost_iova_tree_remove(v->iova_tree, needle);
900     }
901 
902     return r == 0;
903 }
904 
905 /**
906  * Map the shadow virtqueue rings in the device
907  *
908  * @dev: The vhost device
909  * @svq: The shadow virtqueue
910  * @addr: Assigned IOVA addresses
911  * @errp: Error pointer
912  */
913 static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
914                                      const VhostShadowVirtqueue *svq,
915                                      struct vhost_vring_addr *addr,
916                                      Error **errp)
917 {
918     DMAMap device_region, driver_region;
919     struct vhost_vring_addr svq_addr;
920     struct vhost_vdpa *v = dev->opaque;
921     size_t device_size = vhost_svq_device_area_size(svq);
922     size_t driver_size = vhost_svq_driver_area_size(svq);
923     size_t avail_offset;
924     bool ok;
925 
926     ERRP_GUARD();
927     vhost_svq_get_vring_addr(svq, &svq_addr);
928 
929     driver_region = (DMAMap) {
930         .translated_addr = svq_addr.desc_user_addr,
931         .size = driver_size - 1,
932         .perm = IOMMU_RO,
933     };
934     ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
935     if (unlikely(!ok)) {
936         error_prepend(errp, "Cannot create vq driver region: ");
937         return false;
938     }
939     addr->desc_user_addr = driver_region.iova;
940     avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
941     addr->avail_user_addr = driver_region.iova + avail_offset;
942 
943     device_region = (DMAMap) {
944         .translated_addr = svq_addr.used_user_addr,
945         .size = device_size - 1,
946         .perm = IOMMU_RW,
947     };
948     ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
949     if (unlikely(!ok)) {
950         error_prepend(errp, "Cannot create vq device region: ");
951         vhost_vdpa_svq_unmap_ring(v, &driver_region);
952     }
953     addr->used_user_addr = device_region.iova;
954 
955     return ok;
956 }
957 
958 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
959                                  VhostShadowVirtqueue *svq, unsigned idx,
960                                  Error **errp)
961 {
962     uint16_t vq_index = dev->vq_index + idx;
963     struct vhost_vring_state s = {
964         .index = vq_index,
965     };
966     int r;
967 
968     r = vhost_vdpa_set_dev_vring_base(dev, &s);
969     if (unlikely(r)) {
970         error_setg_errno(errp, -r, "Cannot set vring base");
971         return false;
972     }
973 
974     r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
975     return r == 0;
976 }
977 
978 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
979 {
980     struct vhost_vdpa *v = dev->opaque;
981     Error *err = NULL;
982     unsigned i;
983 
984     if (!v->shadow_vqs) {
985         return true;
986     }
987 
988     for (i = 0; i < v->shadow_vqs->len; ++i) {
989         VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
990         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
991         struct vhost_vring_addr addr = {
992             .index = i,
993         };
994         int r;
995         bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
996         if (unlikely(!ok)) {
997             goto err;
998         }
999 
1000         vhost_svq_start(svq, dev->vdev, vq);
1001         ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
1002         if (unlikely(!ok)) {
1003             goto err_map;
1004         }
1005 
1006         /* Override vring GPA set by vhost subsystem */
1007         r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
1008         if (unlikely(r != 0)) {
1009             error_setg_errno(&err, -r, "Cannot set device address");
1010             goto err_set_addr;
1011         }
1012     }
1013 
1014     return true;
1015 
1016 err_set_addr:
1017     vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
1018 
1019 err_map:
1020     vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
1021 
1022 err:
1023     error_reportf_err(err, "Cannot setup SVQ %u: ", i);
1024     for (unsigned j = 0; j < i; ++j) {
1025         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
1026         vhost_vdpa_svq_unmap_rings(dev, svq);
1027         vhost_svq_stop(svq);
1028     }
1029 
1030     return false;
1031 }
1032 
1033 static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
1034 {
1035     struct vhost_vdpa *v = dev->opaque;
1036 
1037     if (!v->shadow_vqs) {
1038         return true;
1039     }
1040 
1041     for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
1042         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
1043         bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
1044         if (unlikely(!ok)) {
1045             return false;
1046         }
1047     }
1048 
1049     return true;
1050 }
1051 
1052 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
1053 {
1054     struct vhost_vdpa *v = dev->opaque;
1055     bool ok;
1056     trace_vhost_vdpa_dev_start(dev, started);
1057 
1058     if (started) {
1059         vhost_vdpa_host_notifiers_init(dev);
1060         ok = vhost_vdpa_svqs_start(dev);
1061         if (unlikely(!ok)) {
1062             return -1;
1063         }
1064         vhost_vdpa_set_vring_ready(dev);
1065     } else {
1066         ok = vhost_vdpa_svqs_stop(dev);
1067         if (unlikely(!ok)) {
1068             return -1;
1069         }
1070         vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
1071     }
1072 
1073     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
1074         return 0;
1075     }
1076 
1077     if (started) {
1078         memory_listener_register(&v->listener, &address_space_memory);
1079         return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
1080     } else {
1081         vhost_vdpa_reset_device(dev);
1082         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
1083                                    VIRTIO_CONFIG_S_DRIVER);
1084         memory_listener_unregister(&v->listener);
1085 
1086         return 0;
1087     }
1088 }
1089 
1090 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
1091                                      struct vhost_log *log)
1092 {
1093     if (vhost_vdpa_one_time_request(dev)) {
1094         return 0;
1095     }
1096 
1097     trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd,
1098                                   log->log);
1099     return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
1100 }
1101 
1102 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
1103                                        struct vhost_vring_addr *addr)
1104 {
1105     struct vhost_vdpa *v = dev->opaque;
1106 
1107     if (v->shadow_vqs_enabled) {
1108         /*
1109          * Device vring addr was set at device start. SVQ base is handled by
1110          * VirtQueue code.
1111          */
1112         return 0;
1113     }
1114 
1115     return vhost_vdpa_set_vring_dev_addr(dev, addr);
1116 }
1117 
1118 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
1119                                       struct vhost_vring_state *ring)
1120 {
1121     trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num);
1122     return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
1123 }
1124 
1125 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
1126                                        struct vhost_vring_state *ring)
1127 {
1128     struct vhost_vdpa *v = dev->opaque;
1129 
1130     if (v->shadow_vqs_enabled) {
1131         /*
1132          * Device vring base was set at device start. SVQ base is handled by
1133          * VirtQueue code.
1134          */
1135         return 0;
1136     }
1137 
1138     return vhost_vdpa_set_dev_vring_base(dev, ring);
1139 }
1140 
1141 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
1142                                        struct vhost_vring_state *ring)
1143 {
1144     int ret;
1145 
1146     ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
1147     trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
1148     return ret;
1149 }
1150 
1151 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
1152                                        struct vhost_vring_file *file)
1153 {
1154     struct vhost_vdpa *v = dev->opaque;
1155     int vdpa_idx = file->index - dev->vq_index;
1156 
1157     if (v->shadow_vqs_enabled) {
1158         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
1159         vhost_svq_set_svq_kick_fd(svq, file->fd);
1160         return 0;
1161     } else {
1162         return vhost_vdpa_set_vring_dev_kick(dev, file);
1163     }
1164 }
1165 
1166 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
1167                                        struct vhost_vring_file *file)
1168 {
1169     struct vhost_vdpa *v = dev->opaque;
1170 
1171     if (v->shadow_vqs_enabled) {
1172         int vdpa_idx = file->index - dev->vq_index;
1173         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
1174 
1175         vhost_svq_set_svq_call_fd(svq, file->fd);
1176         return 0;
1177     } else {
1178         return vhost_vdpa_set_vring_dev_call(dev, file);
1179     }
1180 }
1181 
1182 static int vhost_vdpa_get_features(struct vhost_dev *dev,
1183                                      uint64_t *features)
1184 {
1185     int ret;
1186 
1187     ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
1188     trace_vhost_vdpa_get_features(dev, *features);
1189     return ret;
1190 }
1191 
1192 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
1193 {
1194     if (vhost_vdpa_one_time_request(dev)) {
1195         return 0;
1196     }
1197 
1198     trace_vhost_vdpa_set_owner(dev);
1199     return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
1200 }
1201 
1202 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
1203                     struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
1204 {
1205     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
1206     addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
1207     addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
1208     addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
1209     trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr,
1210                                  addr->avail_user_addr, addr->used_user_addr);
1211     return 0;
1212 }
1213 
1214 static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
1215 {
1216     return true;
1217 }
1218 
1219 const VhostOps vdpa_ops = {
1220         .backend_type = VHOST_BACKEND_TYPE_VDPA,
1221         .vhost_backend_init = vhost_vdpa_init,
1222         .vhost_backend_cleanup = vhost_vdpa_cleanup,
1223         .vhost_set_log_base = vhost_vdpa_set_log_base,
1224         .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
1225         .vhost_set_vring_num = vhost_vdpa_set_vring_num,
1226         .vhost_set_vring_base = vhost_vdpa_set_vring_base,
1227         .vhost_get_vring_base = vhost_vdpa_get_vring_base,
1228         .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
1229         .vhost_set_vring_call = vhost_vdpa_set_vring_call,
1230         .vhost_get_features = vhost_vdpa_get_features,
1231         .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
1232         .vhost_set_owner = vhost_vdpa_set_owner,
1233         .vhost_set_vring_endian = NULL,
1234         .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
1235         .vhost_set_mem_table = vhost_vdpa_set_mem_table,
1236         .vhost_set_features = vhost_vdpa_set_features,
1237         .vhost_reset_device = vhost_vdpa_reset_device,
1238         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
1239         .vhost_get_config  = vhost_vdpa_get_config,
1240         .vhost_set_config = vhost_vdpa_set_config,
1241         .vhost_requires_shm_log = NULL,
1242         .vhost_migration_done = NULL,
1243         .vhost_backend_can_merge = NULL,
1244         .vhost_net_set_mtu = NULL,
1245         .vhost_set_iotlb_callback = NULL,
1246         .vhost_send_device_iotlb_msg = NULL,
1247         .vhost_dev_start = vhost_vdpa_dev_start,
1248         .vhost_get_device_id = vhost_vdpa_get_device_id,
1249         .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
1250         .vhost_force_iommu = vhost_vdpa_force_iommu,
1251 };
1252