xref: /qemu/hw/virtio/vhost-vdpa.c (revision d96be4c8942a9f983f8a1642b397150d0d80c6ea)
1 /*
2  * vhost-vdpa
3  *
4  *  Copyright(c) 2017-2018 Intel Corporation.
5  *  Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-shadow-virtqueue.h"
21 #include "hw/virtio/vhost-vdpa.h"
22 #include "exec/address-spaces.h"
23 #include "qemu/main-loop.h"
24 #include "cpu.h"
25 #include "trace.h"
26 #include "qemu-common.h"
27 #include "qapi/error.h"
28 
29 /*
30  * Return one past the end of the end of section. Be careful with uint64_t
31  * conversions!
32  */
33 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section)
34 {
35     Int128 llend = int128_make64(section->offset_within_address_space);
36     llend = int128_add(llend, section->size);
37     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
38 
39     return llend;
40 }
41 
42 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
43                                                 uint64_t iova_min,
44                                                 uint64_t iova_max)
45 {
46     Int128 llend;
47 
48     if ((!memory_region_is_ram(section->mr) &&
49          !memory_region_is_iommu(section->mr)) ||
50         memory_region_is_protected(section->mr) ||
51         /* vhost-vDPA doesn't allow MMIO to be mapped  */
52         memory_region_is_ram_device(section->mr)) {
53         return true;
54     }
55 
56     if (section->offset_within_address_space < iova_min) {
57         error_report("RAM section out of device range (min=0x%" PRIx64
58                      ", addr=0x%" HWADDR_PRIx ")",
59                      iova_min, section->offset_within_address_space);
60         return true;
61     }
62 
63     llend = vhost_vdpa_section_end(section);
64     if (int128_gt(llend, int128_make64(iova_max))) {
65         error_report("RAM section out of device range (max=0x%" PRIx64
66                      ", end addr=0x%" PRIx64 ")",
67                      iova_max, int128_get64(llend));
68         return true;
69     }
70 
71     return false;
72 }
73 
74 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
75                               void *vaddr, bool readonly)
76 {
77     struct vhost_msg_v2 msg = {};
78     int fd = v->device_fd;
79     int ret = 0;
80 
81     msg.type = v->msg_type;
82     msg.iotlb.iova = iova;
83     msg.iotlb.size = size;
84     msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
85     msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
86     msg.iotlb.type = VHOST_IOTLB_UPDATE;
87 
88    trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size,
89                             msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type);
90 
91     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
92         error_report("failed to write, fd=%d, errno=%d (%s)",
93             fd, errno, strerror(errno));
94         return -EIO ;
95     }
96 
97     return ret;
98 }
99 
100 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
101                                 hwaddr size)
102 {
103     struct vhost_msg_v2 msg = {};
104     int fd = v->device_fd;
105     int ret = 0;
106 
107     msg.type = v->msg_type;
108     msg.iotlb.iova = iova;
109     msg.iotlb.size = size;
110     msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
111 
112     trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova,
113                                msg.iotlb.size, msg.iotlb.type);
114 
115     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
116         error_report("failed to write, fd=%d, errno=%d (%s)",
117             fd, errno, strerror(errno));
118         return -EIO ;
119     }
120 
121     return ret;
122 }
123 
124 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
125 {
126     int fd = v->device_fd;
127     struct vhost_msg_v2 msg = {
128         .type = v->msg_type,
129         .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
130     };
131 
132     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
133         error_report("failed to write, fd=%d, errno=%d (%s)",
134                      fd, errno, strerror(errno));
135     }
136 }
137 
138 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v)
139 {
140     if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) &&
141         !v->iotlb_batch_begin_sent) {
142         vhost_vdpa_listener_begin_batch(v);
143     }
144 
145     v->iotlb_batch_begin_sent = true;
146 }
147 
148 static void vhost_vdpa_listener_commit(MemoryListener *listener)
149 {
150     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
151     struct vhost_dev *dev = v->dev;
152     struct vhost_msg_v2 msg = {};
153     int fd = v->device_fd;
154 
155     if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
156         return;
157     }
158 
159     if (!v->iotlb_batch_begin_sent) {
160         return;
161     }
162 
163     msg.type = v->msg_type;
164     msg.iotlb.type = VHOST_IOTLB_BATCH_END;
165 
166     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
167         error_report("failed to write, fd=%d, errno=%d (%s)",
168                      fd, errno, strerror(errno));
169     }
170 
171     v->iotlb_batch_begin_sent = false;
172 }
173 
174 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
175                                            MemoryRegionSection *section)
176 {
177     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
178     hwaddr iova;
179     Int128 llend, llsize;
180     void *vaddr;
181     int ret;
182 
183     if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
184                                             v->iova_range.last)) {
185         return;
186     }
187 
188     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
189                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
190         error_report("%s received unaligned region", __func__);
191         return;
192     }
193 
194     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
195     llend = vhost_vdpa_section_end(section);
196     if (int128_ge(int128_make64(iova), llend)) {
197         return;
198     }
199 
200     memory_region_ref(section->mr);
201 
202     /* Here we assume that memory_region_is_ram(section->mr)==true */
203 
204     vaddr = memory_region_get_ram_ptr(section->mr) +
205             section->offset_within_region +
206             (iova - section->offset_within_address_space);
207 
208     trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend),
209                                          vaddr, section->readonly);
210 
211     llsize = int128_sub(llend, int128_make64(iova));
212 
213     vhost_vdpa_iotlb_batch_begin_once(v);
214     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
215                              vaddr, section->readonly);
216     if (ret) {
217         error_report("vhost vdpa map fail!");
218         goto fail;
219     }
220 
221     return;
222 
223 fail:
224     /*
225      * On the initfn path, store the first error in the container so we
226      * can gracefully fail.  Runtime, there's not much we can do other
227      * than throw a hardware error.
228      */
229     error_report("vhost-vdpa: DMA mapping failed, unable to continue");
230     return;
231 
232 }
233 
234 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
235                                            MemoryRegionSection *section)
236 {
237     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
238     hwaddr iova;
239     Int128 llend, llsize;
240     int ret;
241 
242     if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
243                                             v->iova_range.last)) {
244         return;
245     }
246 
247     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
248                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
249         error_report("%s received unaligned region", __func__);
250         return;
251     }
252 
253     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
254     llend = vhost_vdpa_section_end(section);
255 
256     trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
257 
258     if (int128_ge(int128_make64(iova), llend)) {
259         return;
260     }
261 
262     llsize = int128_sub(llend, int128_make64(iova));
263 
264     vhost_vdpa_iotlb_batch_begin_once(v);
265     ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
266     if (ret) {
267         error_report("vhost_vdpa dma unmap error!");
268     }
269 
270     memory_region_unref(section->mr);
271 }
272 /*
273  * IOTLB API is used by vhost-vpda which requires incremental updating
274  * of the mapping. So we can not use generic vhost memory listener which
275  * depends on the addnop().
276  */
277 static const MemoryListener vhost_vdpa_memory_listener = {
278     .name = "vhost-vdpa",
279     .commit = vhost_vdpa_listener_commit,
280     .region_add = vhost_vdpa_listener_region_add,
281     .region_del = vhost_vdpa_listener_region_del,
282 };
283 
284 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
285                              void *arg)
286 {
287     struct vhost_vdpa *v = dev->opaque;
288     int fd = v->device_fd;
289     int ret;
290 
291     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
292 
293     ret = ioctl(fd, request, arg);
294     return ret < 0 ? -errno : ret;
295 }
296 
297 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
298 {
299     uint8_t s;
300     int ret;
301 
302     trace_vhost_vdpa_add_status(dev, status);
303     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
304     if (ret < 0) {
305         return ret;
306     }
307 
308     s |= status;
309 
310     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
311     if (ret < 0) {
312         return ret;
313     }
314 
315     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
316     if (ret < 0) {
317         return ret;
318     }
319 
320     if (!(s & status)) {
321         return -EIO;
322     }
323 
324     return 0;
325 }
326 
327 static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
328 {
329     int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE,
330                               &v->iova_range);
331     if (ret != 0) {
332         v->iova_range.first = 0;
333         v->iova_range.last = UINT64_MAX;
334     }
335 
336     trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first,
337                                     v->iova_range.last);
338 }
339 
340 static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
341 {
342     struct vhost_vdpa *v = dev->opaque;
343 
344     return v->index != 0;
345 }
346 
347 static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
348                                Error **errp)
349 {
350     g_autoptr(GPtrArray) shadow_vqs = NULL;
351     uint64_t dev_features, svq_features;
352     int r;
353     bool ok;
354 
355     if (!v->shadow_vqs_enabled) {
356         return 0;
357     }
358 
359     r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
360     if (r != 0) {
361         error_setg_errno(errp, -r, "Can't get vdpa device features");
362         return r;
363     }
364 
365     svq_features = dev_features;
366     ok = vhost_svq_valid_features(svq_features, errp);
367     if (unlikely(!ok)) {
368         return -1;
369     }
370 
371     shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
372     for (unsigned n = 0; n < hdev->nvqs; ++n) {
373         g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
374 
375         if (unlikely(!svq)) {
376             error_setg(errp, "Cannot create svq %u", n);
377             return -1;
378         }
379         g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
380     }
381 
382     v->shadow_vqs = g_steal_pointer(&shadow_vqs);
383     return 0;
384 }
385 
386 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
387 {
388     struct vhost_vdpa *v;
389     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
390     trace_vhost_vdpa_init(dev, opaque);
391     int ret;
392 
393     /*
394      * Similar to VFIO, we end up pinning all guest memory and have to
395      * disable discarding of RAM.
396      */
397     ret = ram_block_discard_disable(true);
398     if (ret) {
399         error_report("Cannot set discarding of RAM broken");
400         return ret;
401     }
402 
403     v = opaque;
404     v->dev = dev;
405     dev->opaque =  opaque ;
406     v->listener = vhost_vdpa_memory_listener;
407     v->msg_type = VHOST_IOTLB_MSG_V2;
408     ret = vhost_vdpa_init_svq(dev, v, errp);
409     if (ret) {
410         goto err;
411     }
412 
413     vhost_vdpa_get_iova_range(v);
414 
415     if (vhost_vdpa_one_time_request(dev)) {
416         return 0;
417     }
418 
419     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
420                                VIRTIO_CONFIG_S_DRIVER);
421 
422     return 0;
423 
424 err:
425     ram_block_discard_disable(false);
426     return ret;
427 }
428 
429 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
430                                             int queue_index)
431 {
432     size_t page_size = qemu_real_host_page_size;
433     struct vhost_vdpa *v = dev->opaque;
434     VirtIODevice *vdev = dev->vdev;
435     VhostVDPAHostNotifier *n;
436 
437     n = &v->notifier[queue_index];
438 
439     if (n->addr) {
440         virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false);
441         object_unparent(OBJECT(&n->mr));
442         munmap(n->addr, page_size);
443         n->addr = NULL;
444     }
445 }
446 
447 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
448 {
449     size_t page_size = qemu_real_host_page_size;
450     struct vhost_vdpa *v = dev->opaque;
451     VirtIODevice *vdev = dev->vdev;
452     VhostVDPAHostNotifier *n;
453     int fd = v->device_fd;
454     void *addr;
455     char *name;
456 
457     vhost_vdpa_host_notifier_uninit(dev, queue_index);
458 
459     n = &v->notifier[queue_index];
460 
461     addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
462                 queue_index * page_size);
463     if (addr == MAP_FAILED) {
464         goto err;
465     }
466 
467     name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
468                            v, queue_index);
469     memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
470                                       page_size, addr);
471     g_free(name);
472 
473     if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) {
474         object_unparent(OBJECT(&n->mr));
475         munmap(addr, page_size);
476         goto err;
477     }
478     n->addr = addr;
479 
480     return 0;
481 
482 err:
483     return -1;
484 }
485 
486 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
487 {
488     int i;
489 
490     for (i = dev->vq_index; i < dev->vq_index + n; i++) {
491         vhost_vdpa_host_notifier_uninit(dev, i);
492     }
493 }
494 
495 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
496 {
497     struct vhost_vdpa *v = dev->opaque;
498     int i;
499 
500     if (v->shadow_vqs_enabled) {
501         /* FIXME SVQ is not compatible with host notifiers mr */
502         return;
503     }
504 
505     for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
506         if (vhost_vdpa_host_notifier_init(dev, i)) {
507             goto err;
508         }
509     }
510 
511     return;
512 
513 err:
514     vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index);
515     return;
516 }
517 
518 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
519 {
520     struct vhost_vdpa *v = dev->opaque;
521     size_t idx;
522 
523     if (!v->shadow_vqs) {
524         return;
525     }
526 
527     for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
528         vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
529     }
530     g_ptr_array_free(v->shadow_vqs, true);
531 }
532 
533 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
534 {
535     struct vhost_vdpa *v;
536     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
537     v = dev->opaque;
538     trace_vhost_vdpa_cleanup(dev, v);
539     vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
540     memory_listener_unregister(&v->listener);
541     vhost_vdpa_svq_cleanup(dev);
542 
543     dev->opaque = NULL;
544     ram_block_discard_disable(false);
545 
546     return 0;
547 }
548 
549 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
550 {
551     trace_vhost_vdpa_memslots_limit(dev, INT_MAX);
552     return INT_MAX;
553 }
554 
555 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
556                                     struct vhost_memory *mem)
557 {
558     if (vhost_vdpa_one_time_request(dev)) {
559         return 0;
560     }
561 
562     trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding);
563     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) &&
564         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) {
565         int i;
566         for (i = 0; i < mem->nregions; i++) {
567             trace_vhost_vdpa_dump_regions(dev, i,
568                                           mem->regions[i].guest_phys_addr,
569                                           mem->regions[i].memory_size,
570                                           mem->regions[i].userspace_addr,
571                                           mem->regions[i].flags_padding);
572         }
573     }
574     if (mem->padding) {
575         return -EINVAL;
576     }
577 
578     return 0;
579 }
580 
581 static int vhost_vdpa_set_features(struct vhost_dev *dev,
582                                    uint64_t features)
583 {
584     int ret;
585 
586     if (vhost_vdpa_one_time_request(dev)) {
587         return 0;
588     }
589 
590     trace_vhost_vdpa_set_features(dev, features);
591     ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
592     if (ret) {
593         return ret;
594     }
595 
596     return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
597 }
598 
599 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
600 {
601     uint64_t features;
602     uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
603         0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
604     int r;
605 
606     if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
607         return -EFAULT;
608     }
609 
610     features &= f;
611 
612     if (vhost_vdpa_one_time_request(dev)) {
613         r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
614         if (r) {
615             return -EFAULT;
616         }
617     }
618 
619     dev->backend_cap = features;
620 
621     return 0;
622 }
623 
624 static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
625                                     uint32_t *device_id)
626 {
627     int ret;
628     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
629     trace_vhost_vdpa_get_device_id(dev, *device_id);
630     return ret;
631 }
632 
633 static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
634 {
635     if (!v->shadow_vqs_enabled) {
636         return;
637     }
638 
639     for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
640         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
641         vhost_svq_stop(svq);
642     }
643 }
644 
645 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
646 {
647     struct vhost_vdpa *v = dev->opaque;
648     int ret;
649     uint8_t status = 0;
650 
651     vhost_vdpa_reset_svq(v);
652 
653     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
654     trace_vhost_vdpa_reset_device(dev, status);
655     return ret;
656 }
657 
658 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
659 {
660     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
661 
662     trace_vhost_vdpa_get_vq_index(dev, idx, idx);
663     return idx;
664 }
665 
666 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
667 {
668     int i;
669     trace_vhost_vdpa_set_vring_ready(dev);
670     for (i = 0; i < dev->nvqs; ++i) {
671         struct vhost_vring_state state = {
672             .index = dev->vq_index + i,
673             .num = 1,
674         };
675         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
676     }
677     return 0;
678 }
679 
680 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
681                                    uint32_t config_len)
682 {
683     int b, len;
684     char line[QEMU_HEXDUMP_LINE_LEN];
685 
686     for (b = 0; b < config_len; b += 16) {
687         len = config_len - b;
688         qemu_hexdump_line(line, b, config, len, false);
689         trace_vhost_vdpa_dump_config(dev, line);
690     }
691 }
692 
693 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
694                                    uint32_t offset, uint32_t size,
695                                    uint32_t flags)
696 {
697     struct vhost_vdpa_config *config;
698     int ret;
699     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
700 
701     trace_vhost_vdpa_set_config(dev, offset, size, flags);
702     config = g_malloc(size + config_size);
703     config->off = offset;
704     config->len = size;
705     memcpy(config->buf, data, size);
706     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) &&
707         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
708         vhost_vdpa_dump_config(dev, data, size);
709     }
710     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
711     g_free(config);
712     return ret;
713 }
714 
715 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
716                                    uint32_t config_len, Error **errp)
717 {
718     struct vhost_vdpa_config *v_config;
719     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
720     int ret;
721 
722     trace_vhost_vdpa_get_config(dev, config, config_len);
723     v_config = g_malloc(config_len + config_size);
724     v_config->len = config_len;
725     v_config->off = 0;
726     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
727     memcpy(config, v_config->buf, config_len);
728     g_free(v_config);
729     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
730         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
731         vhost_vdpa_dump_config(dev, config, config_len);
732     }
733     return ret;
734  }
735 
736 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
737                                          struct vhost_vring_state *ring)
738 {
739     trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
740     return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
741 }
742 
743 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
744                                          struct vhost_vring_file *file)
745 {
746     trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
747     return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
748 }
749 
750 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
751                                          struct vhost_vring_file *file)
752 {
753     trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
754     return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
755 }
756 
757 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
758                                          struct vhost_vring_addr *addr)
759 {
760     trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
761                                 addr->desc_user_addr, addr->used_user_addr,
762                                 addr->avail_user_addr,
763                                 addr->log_guest_addr);
764 
765     return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
766 
767 }
768 
769 /**
770  * Set the shadow virtqueue descriptors to the device
771  *
772  * @dev: The vhost device model
773  * @svq: The shadow virtqueue
774  * @idx: The index of the virtqueue in the vhost device
775  * @errp: Error
776  *
777  * Note that this function does not rewind kick file descriptor if cannot set
778  * call one.
779  */
780 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
781                                  VhostShadowVirtqueue *svq, unsigned idx,
782                                  Error **errp)
783 {
784     struct vhost_vring_file file = {
785         .index = dev->vq_index + idx,
786     };
787     const EventNotifier *event_notifier = &svq->hdev_kick;
788     int r;
789 
790     file.fd = event_notifier_get_fd(event_notifier);
791     r = vhost_vdpa_set_vring_dev_kick(dev, &file);
792     if (unlikely(r != 0)) {
793         error_setg_errno(errp, -r, "Can't set device kick fd");
794         return false;
795     }
796 
797     event_notifier = &svq->hdev_call;
798     file.fd = event_notifier_get_fd(event_notifier);
799     r = vhost_vdpa_set_vring_dev_call(dev, &file);
800     if (unlikely(r != 0)) {
801         error_setg_errno(errp, -r, "Can't set device call fd");
802     }
803 
804     return r == 0;
805 }
806 
807 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
808 {
809     struct vhost_vdpa *v = dev->opaque;
810     Error *err = NULL;
811     unsigned i;
812 
813     if (!v->shadow_vqs) {
814         return true;
815     }
816 
817     for (i = 0; i < v->shadow_vqs->len; ++i) {
818         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
819         bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
820         if (unlikely(!ok)) {
821             error_reportf_err(err, "Cannot setup SVQ %u: ", i);
822             return false;
823         }
824     }
825 
826     return true;
827 }
828 
829 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
830 {
831     struct vhost_vdpa *v = dev->opaque;
832     bool ok;
833     trace_vhost_vdpa_dev_start(dev, started);
834 
835     if (started) {
836         vhost_vdpa_host_notifiers_init(dev);
837         ok = vhost_vdpa_svqs_start(dev);
838         if (unlikely(!ok)) {
839             return -1;
840         }
841         vhost_vdpa_set_vring_ready(dev);
842     } else {
843         vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
844     }
845 
846     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
847         return 0;
848     }
849 
850     if (started) {
851         memory_listener_register(&v->listener, &address_space_memory);
852         return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
853     } else {
854         vhost_vdpa_reset_device(dev);
855         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
856                                    VIRTIO_CONFIG_S_DRIVER);
857         memory_listener_unregister(&v->listener);
858 
859         return 0;
860     }
861 }
862 
863 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
864                                      struct vhost_log *log)
865 {
866     if (vhost_vdpa_one_time_request(dev)) {
867         return 0;
868     }
869 
870     trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd,
871                                   log->log);
872     return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
873 }
874 
875 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
876                                        struct vhost_vring_addr *addr)
877 {
878     struct vhost_vdpa *v = dev->opaque;
879 
880     if (v->shadow_vqs_enabled) {
881         /*
882          * Device vring addr was set at device start. SVQ base is handled by
883          * VirtQueue code.
884          */
885         return 0;
886     }
887 
888     return vhost_vdpa_set_vring_dev_addr(dev, addr);
889 }
890 
891 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
892                                       struct vhost_vring_state *ring)
893 {
894     trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num);
895     return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
896 }
897 
898 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
899                                        struct vhost_vring_state *ring)
900 {
901     struct vhost_vdpa *v = dev->opaque;
902 
903     if (v->shadow_vqs_enabled) {
904         /*
905          * Device vring base was set at device start. SVQ base is handled by
906          * VirtQueue code.
907          */
908         return 0;
909     }
910 
911     return vhost_vdpa_set_dev_vring_base(dev, ring);
912 }
913 
914 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
915                                        struct vhost_vring_state *ring)
916 {
917     int ret;
918 
919     ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
920     trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
921     return ret;
922 }
923 
924 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
925                                        struct vhost_vring_file *file)
926 {
927     struct vhost_vdpa *v = dev->opaque;
928     int vdpa_idx = file->index - dev->vq_index;
929 
930     if (v->shadow_vqs_enabled) {
931         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
932         vhost_svq_set_svq_kick_fd(svq, file->fd);
933         return 0;
934     } else {
935         return vhost_vdpa_set_vring_dev_kick(dev, file);
936     }
937 }
938 
939 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
940                                        struct vhost_vring_file *file)
941 {
942     struct vhost_vdpa *v = dev->opaque;
943 
944     if (v->shadow_vqs_enabled) {
945         int vdpa_idx = file->index - dev->vq_index;
946         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
947 
948         vhost_svq_set_svq_call_fd(svq, file->fd);
949         return 0;
950     } else {
951         return vhost_vdpa_set_vring_dev_call(dev, file);
952     }
953 }
954 
955 static int vhost_vdpa_get_features(struct vhost_dev *dev,
956                                      uint64_t *features)
957 {
958     int ret;
959 
960     ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
961     trace_vhost_vdpa_get_features(dev, *features);
962     return ret;
963 }
964 
965 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
966 {
967     if (vhost_vdpa_one_time_request(dev)) {
968         return 0;
969     }
970 
971     trace_vhost_vdpa_set_owner(dev);
972     return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
973 }
974 
975 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
976                     struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
977 {
978     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
979     addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
980     addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
981     addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
982     trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr,
983                                  addr->avail_user_addr, addr->used_user_addr);
984     return 0;
985 }
986 
987 static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
988 {
989     return true;
990 }
991 
992 const VhostOps vdpa_ops = {
993         .backend_type = VHOST_BACKEND_TYPE_VDPA,
994         .vhost_backend_init = vhost_vdpa_init,
995         .vhost_backend_cleanup = vhost_vdpa_cleanup,
996         .vhost_set_log_base = vhost_vdpa_set_log_base,
997         .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
998         .vhost_set_vring_num = vhost_vdpa_set_vring_num,
999         .vhost_set_vring_base = vhost_vdpa_set_vring_base,
1000         .vhost_get_vring_base = vhost_vdpa_get_vring_base,
1001         .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
1002         .vhost_set_vring_call = vhost_vdpa_set_vring_call,
1003         .vhost_get_features = vhost_vdpa_get_features,
1004         .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
1005         .vhost_set_owner = vhost_vdpa_set_owner,
1006         .vhost_set_vring_endian = NULL,
1007         .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
1008         .vhost_set_mem_table = vhost_vdpa_set_mem_table,
1009         .vhost_set_features = vhost_vdpa_set_features,
1010         .vhost_reset_device = vhost_vdpa_reset_device,
1011         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
1012         .vhost_get_config  = vhost_vdpa_get_config,
1013         .vhost_set_config = vhost_vdpa_set_config,
1014         .vhost_requires_shm_log = NULL,
1015         .vhost_migration_done = NULL,
1016         .vhost_backend_can_merge = NULL,
1017         .vhost_net_set_mtu = NULL,
1018         .vhost_set_iotlb_callback = NULL,
1019         .vhost_send_device_iotlb_msg = NULL,
1020         .vhost_dev_start = vhost_vdpa_dev_start,
1021         .vhost_get_device_id = vhost_vdpa_get_device_id,
1022         .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
1023         .vhost_force_iommu = vhost_vdpa_force_iommu,
1024 };
1025