xref: /qemu/hw/virtio/vhost-vdpa.c (revision 4725a4181b0fd5b646c51f079d7eac753b14b094)
1 /*
2  * vhost-vdpa
3  *
4  *  Copyright(c) 2017-2018 Intel Corporation.
5  *  Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-shadow-virtqueue.h"
21 #include "hw/virtio/vhost-vdpa.h"
22 #include "exec/address-spaces.h"
23 #include "qemu/main-loop.h"
24 #include "cpu.h"
25 #include "trace.h"
26 #include "qemu-common.h"
27 #include "qapi/error.h"
28 
29 /*
30  * Return one past the end of the end of section. Be careful with uint64_t
31  * conversions!
32  */
33 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section)
34 {
35     Int128 llend = int128_make64(section->offset_within_address_space);
36     llend = int128_add(llend, section->size);
37     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
38 
39     return llend;
40 }
41 
42 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
43                                                 uint64_t iova_min,
44                                                 uint64_t iova_max)
45 {
46     Int128 llend;
47 
48     if ((!memory_region_is_ram(section->mr) &&
49          !memory_region_is_iommu(section->mr)) ||
50         memory_region_is_protected(section->mr) ||
51         /* vhost-vDPA doesn't allow MMIO to be mapped  */
52         memory_region_is_ram_device(section->mr)) {
53         return true;
54     }
55 
56     if (section->offset_within_address_space < iova_min) {
57         error_report("RAM section out of device range (min=0x%" PRIx64
58                      ", addr=0x%" HWADDR_PRIx ")",
59                      iova_min, section->offset_within_address_space);
60         return true;
61     }
62 
63     llend = vhost_vdpa_section_end(section);
64     if (int128_gt(llend, int128_make64(iova_max))) {
65         error_report("RAM section out of device range (max=0x%" PRIx64
66                      ", end addr=0x%" PRIx64 ")",
67                      iova_max, int128_get64(llend));
68         return true;
69     }
70 
71     return false;
72 }
73 
74 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
75                               void *vaddr, bool readonly)
76 {
77     struct vhost_msg_v2 msg = {};
78     int fd = v->device_fd;
79     int ret = 0;
80 
81     msg.type = v->msg_type;
82     msg.iotlb.iova = iova;
83     msg.iotlb.size = size;
84     msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
85     msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
86     msg.iotlb.type = VHOST_IOTLB_UPDATE;
87 
88    trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size,
89                             msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type);
90 
91     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
92         error_report("failed to write, fd=%d, errno=%d (%s)",
93             fd, errno, strerror(errno));
94         return -EIO ;
95     }
96 
97     return ret;
98 }
99 
100 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
101                                 hwaddr size)
102 {
103     struct vhost_msg_v2 msg = {};
104     int fd = v->device_fd;
105     int ret = 0;
106 
107     msg.type = v->msg_type;
108     msg.iotlb.iova = iova;
109     msg.iotlb.size = size;
110     msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
111 
112     trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova,
113                                msg.iotlb.size, msg.iotlb.type);
114 
115     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
116         error_report("failed to write, fd=%d, errno=%d (%s)",
117             fd, errno, strerror(errno));
118         return -EIO ;
119     }
120 
121     return ret;
122 }
123 
124 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
125 {
126     int fd = v->device_fd;
127     struct vhost_msg_v2 msg = {
128         .type = v->msg_type,
129         .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
130     };
131 
132     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
133         error_report("failed to write, fd=%d, errno=%d (%s)",
134                      fd, errno, strerror(errno));
135     }
136 }
137 
138 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v)
139 {
140     if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) &&
141         !v->iotlb_batch_begin_sent) {
142         vhost_vdpa_listener_begin_batch(v);
143     }
144 
145     v->iotlb_batch_begin_sent = true;
146 }
147 
148 static void vhost_vdpa_listener_commit(MemoryListener *listener)
149 {
150     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
151     struct vhost_dev *dev = v->dev;
152     struct vhost_msg_v2 msg = {};
153     int fd = v->device_fd;
154 
155     if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
156         return;
157     }
158 
159     if (!v->iotlb_batch_begin_sent) {
160         return;
161     }
162 
163     msg.type = v->msg_type;
164     msg.iotlb.type = VHOST_IOTLB_BATCH_END;
165 
166     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
167         error_report("failed to write, fd=%d, errno=%d (%s)",
168                      fd, errno, strerror(errno));
169     }
170 
171     v->iotlb_batch_begin_sent = false;
172 }
173 
174 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
175                                            MemoryRegionSection *section)
176 {
177     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
178     hwaddr iova;
179     Int128 llend, llsize;
180     void *vaddr;
181     int ret;
182 
183     if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
184                                             v->iova_range.last)) {
185         return;
186     }
187 
188     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
189                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
190         error_report("%s received unaligned region", __func__);
191         return;
192     }
193 
194     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
195     llend = vhost_vdpa_section_end(section);
196     if (int128_ge(int128_make64(iova), llend)) {
197         return;
198     }
199 
200     memory_region_ref(section->mr);
201 
202     /* Here we assume that memory_region_is_ram(section->mr)==true */
203 
204     vaddr = memory_region_get_ram_ptr(section->mr) +
205             section->offset_within_region +
206             (iova - section->offset_within_address_space);
207 
208     trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend),
209                                          vaddr, section->readonly);
210 
211     llsize = int128_sub(llend, int128_make64(iova));
212 
213     vhost_vdpa_iotlb_batch_begin_once(v);
214     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
215                              vaddr, section->readonly);
216     if (ret) {
217         error_report("vhost vdpa map fail!");
218         goto fail;
219     }
220 
221     return;
222 
223 fail:
224     /*
225      * On the initfn path, store the first error in the container so we
226      * can gracefully fail.  Runtime, there's not much we can do other
227      * than throw a hardware error.
228      */
229     error_report("vhost-vdpa: DMA mapping failed, unable to continue");
230     return;
231 
232 }
233 
234 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
235                                            MemoryRegionSection *section)
236 {
237     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
238     hwaddr iova;
239     Int128 llend, llsize;
240     int ret;
241 
242     if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
243                                             v->iova_range.last)) {
244         return;
245     }
246 
247     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
248                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
249         error_report("%s received unaligned region", __func__);
250         return;
251     }
252 
253     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
254     llend = vhost_vdpa_section_end(section);
255 
256     trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
257 
258     if (int128_ge(int128_make64(iova), llend)) {
259         return;
260     }
261 
262     llsize = int128_sub(llend, int128_make64(iova));
263 
264     vhost_vdpa_iotlb_batch_begin_once(v);
265     ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
266     if (ret) {
267         error_report("vhost_vdpa dma unmap error!");
268     }
269 
270     memory_region_unref(section->mr);
271 }
272 /*
273  * IOTLB API is used by vhost-vpda which requires incremental updating
274  * of the mapping. So we can not use generic vhost memory listener which
275  * depends on the addnop().
276  */
277 static const MemoryListener vhost_vdpa_memory_listener = {
278     .name = "vhost-vdpa",
279     .commit = vhost_vdpa_listener_commit,
280     .region_add = vhost_vdpa_listener_region_add,
281     .region_del = vhost_vdpa_listener_region_del,
282 };
283 
284 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
285                              void *arg)
286 {
287     struct vhost_vdpa *v = dev->opaque;
288     int fd = v->device_fd;
289     int ret;
290 
291     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
292 
293     ret = ioctl(fd, request, arg);
294     return ret < 0 ? -errno : ret;
295 }
296 
297 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
298 {
299     uint8_t s;
300     int ret;
301 
302     trace_vhost_vdpa_add_status(dev, status);
303     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
304     if (ret < 0) {
305         return ret;
306     }
307 
308     s |= status;
309 
310     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
311     if (ret < 0) {
312         return ret;
313     }
314 
315     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
316     if (ret < 0) {
317         return ret;
318     }
319 
320     if (!(s & status)) {
321         return -EIO;
322     }
323 
324     return 0;
325 }
326 
327 static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
328 {
329     int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE,
330                               &v->iova_range);
331     if (ret != 0) {
332         v->iova_range.first = 0;
333         v->iova_range.last = UINT64_MAX;
334     }
335 
336     trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first,
337                                     v->iova_range.last);
338 }
339 
340 static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
341 {
342     struct vhost_vdpa *v = dev->opaque;
343 
344     return v->index != 0;
345 }
346 
347 static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
348                                Error **errp)
349 {
350     g_autoptr(GPtrArray) shadow_vqs = NULL;
351     uint64_t dev_features, svq_features;
352     int r;
353     bool ok;
354 
355     if (!v->shadow_vqs_enabled) {
356         return 0;
357     }
358 
359     r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
360     if (r != 0) {
361         error_setg_errno(errp, -r, "Can't get vdpa device features");
362         return r;
363     }
364 
365     svq_features = dev_features;
366     ok = vhost_svq_valid_features(svq_features, errp);
367     if (unlikely(!ok)) {
368         return -1;
369     }
370 
371     shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
372     for (unsigned n = 0; n < hdev->nvqs; ++n) {
373         g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
374 
375         if (unlikely(!svq)) {
376             error_setg(errp, "Cannot create svq %u", n);
377             return -1;
378         }
379         g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
380     }
381 
382     v->shadow_vqs = g_steal_pointer(&shadow_vqs);
383     return 0;
384 }
385 
386 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
387 {
388     struct vhost_vdpa *v;
389     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
390     trace_vhost_vdpa_init(dev, opaque);
391     int ret;
392 
393     /*
394      * Similar to VFIO, we end up pinning all guest memory and have to
395      * disable discarding of RAM.
396      */
397     ret = ram_block_discard_disable(true);
398     if (ret) {
399         error_report("Cannot set discarding of RAM broken");
400         return ret;
401     }
402 
403     v = opaque;
404     v->dev = dev;
405     dev->opaque =  opaque ;
406     v->listener = vhost_vdpa_memory_listener;
407     v->msg_type = VHOST_IOTLB_MSG_V2;
408     ret = vhost_vdpa_init_svq(dev, v, errp);
409     if (ret) {
410         goto err;
411     }
412 
413     vhost_vdpa_get_iova_range(v);
414 
415     if (vhost_vdpa_one_time_request(dev)) {
416         return 0;
417     }
418 
419     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
420                                VIRTIO_CONFIG_S_DRIVER);
421 
422     return 0;
423 
424 err:
425     ram_block_discard_disable(false);
426     return ret;
427 }
428 
429 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
430                                             int queue_index)
431 {
432     size_t page_size = qemu_real_host_page_size;
433     struct vhost_vdpa *v = dev->opaque;
434     VirtIODevice *vdev = dev->vdev;
435     VhostVDPAHostNotifier *n;
436 
437     n = &v->notifier[queue_index];
438 
439     if (n->addr) {
440         virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false);
441         object_unparent(OBJECT(&n->mr));
442         munmap(n->addr, page_size);
443         n->addr = NULL;
444     }
445 }
446 
447 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
448 {
449     size_t page_size = qemu_real_host_page_size;
450     struct vhost_vdpa *v = dev->opaque;
451     VirtIODevice *vdev = dev->vdev;
452     VhostVDPAHostNotifier *n;
453     int fd = v->device_fd;
454     void *addr;
455     char *name;
456 
457     vhost_vdpa_host_notifier_uninit(dev, queue_index);
458 
459     n = &v->notifier[queue_index];
460 
461     addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
462                 queue_index * page_size);
463     if (addr == MAP_FAILED) {
464         goto err;
465     }
466 
467     name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
468                            v, queue_index);
469     memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
470                                       page_size, addr);
471     g_free(name);
472 
473     if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) {
474         object_unparent(OBJECT(&n->mr));
475         munmap(addr, page_size);
476         goto err;
477     }
478     n->addr = addr;
479 
480     return 0;
481 
482 err:
483     return -1;
484 }
485 
486 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
487 {
488     int i;
489 
490     for (i = dev->vq_index; i < dev->vq_index + n; i++) {
491         vhost_vdpa_host_notifier_uninit(dev, i);
492     }
493 }
494 
495 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
496 {
497     struct vhost_vdpa *v = dev->opaque;
498     int i;
499 
500     if (v->shadow_vqs_enabled) {
501         /* FIXME SVQ is not compatible with host notifiers mr */
502         return;
503     }
504 
505     for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
506         if (vhost_vdpa_host_notifier_init(dev, i)) {
507             goto err;
508         }
509     }
510 
511     return;
512 
513 err:
514     vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index);
515     return;
516 }
517 
518 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
519 {
520     struct vhost_vdpa *v = dev->opaque;
521     size_t idx;
522 
523     if (!v->shadow_vqs) {
524         return;
525     }
526 
527     for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
528         vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
529     }
530     g_ptr_array_free(v->shadow_vqs, true);
531 }
532 
533 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
534 {
535     struct vhost_vdpa *v;
536     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
537     v = dev->opaque;
538     trace_vhost_vdpa_cleanup(dev, v);
539     vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
540     memory_listener_unregister(&v->listener);
541     vhost_vdpa_svq_cleanup(dev);
542 
543     dev->opaque = NULL;
544     ram_block_discard_disable(false);
545 
546     return 0;
547 }
548 
549 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
550 {
551     trace_vhost_vdpa_memslots_limit(dev, INT_MAX);
552     return INT_MAX;
553 }
554 
555 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
556                                     struct vhost_memory *mem)
557 {
558     if (vhost_vdpa_one_time_request(dev)) {
559         return 0;
560     }
561 
562     trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding);
563     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) &&
564         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) {
565         int i;
566         for (i = 0; i < mem->nregions; i++) {
567             trace_vhost_vdpa_dump_regions(dev, i,
568                                           mem->regions[i].guest_phys_addr,
569                                           mem->regions[i].memory_size,
570                                           mem->regions[i].userspace_addr,
571                                           mem->regions[i].flags_padding);
572         }
573     }
574     if (mem->padding) {
575         return -EINVAL;
576     }
577 
578     return 0;
579 }
580 
581 static int vhost_vdpa_set_features(struct vhost_dev *dev,
582                                    uint64_t features)
583 {
584     int ret;
585 
586     if (vhost_vdpa_one_time_request(dev)) {
587         return 0;
588     }
589 
590     trace_vhost_vdpa_set_features(dev, features);
591     ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
592     if (ret) {
593         return ret;
594     }
595 
596     return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
597 }
598 
599 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
600 {
601     uint64_t features;
602     uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
603         0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
604     int r;
605 
606     if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
607         return -EFAULT;
608     }
609 
610     features &= f;
611 
612     if (vhost_vdpa_one_time_request(dev)) {
613         r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
614         if (r) {
615             return -EFAULT;
616         }
617     }
618 
619     dev->backend_cap = features;
620 
621     return 0;
622 }
623 
624 static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
625                                     uint32_t *device_id)
626 {
627     int ret;
628     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
629     trace_vhost_vdpa_get_device_id(dev, *device_id);
630     return ret;
631 }
632 
633 static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
634 {
635     if (!v->shadow_vqs_enabled) {
636         return;
637     }
638 
639     for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
640         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
641         vhost_svq_stop(svq);
642     }
643 }
644 
645 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
646 {
647     struct vhost_vdpa *v = dev->opaque;
648     int ret;
649     uint8_t status = 0;
650 
651     vhost_vdpa_reset_svq(v);
652 
653     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
654     trace_vhost_vdpa_reset_device(dev, status);
655     return ret;
656 }
657 
658 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
659 {
660     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
661 
662     trace_vhost_vdpa_get_vq_index(dev, idx, idx);
663     return idx;
664 }
665 
666 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
667 {
668     int i;
669     trace_vhost_vdpa_set_vring_ready(dev);
670     for (i = 0; i < dev->nvqs; ++i) {
671         struct vhost_vring_state state = {
672             .index = dev->vq_index + i,
673             .num = 1,
674         };
675         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
676     }
677     return 0;
678 }
679 
680 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
681                                    uint32_t config_len)
682 {
683     int b, len;
684     char line[QEMU_HEXDUMP_LINE_LEN];
685 
686     for (b = 0; b < config_len; b += 16) {
687         len = config_len - b;
688         qemu_hexdump_line(line, b, config, len, false);
689         trace_vhost_vdpa_dump_config(dev, line);
690     }
691 }
692 
693 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
694                                    uint32_t offset, uint32_t size,
695                                    uint32_t flags)
696 {
697     struct vhost_vdpa_config *config;
698     int ret;
699     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
700 
701     trace_vhost_vdpa_set_config(dev, offset, size, flags);
702     config = g_malloc(size + config_size);
703     config->off = offset;
704     config->len = size;
705     memcpy(config->buf, data, size);
706     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) &&
707         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
708         vhost_vdpa_dump_config(dev, data, size);
709     }
710     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
711     g_free(config);
712     return ret;
713 }
714 
715 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
716                                    uint32_t config_len, Error **errp)
717 {
718     struct vhost_vdpa_config *v_config;
719     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
720     int ret;
721 
722     trace_vhost_vdpa_get_config(dev, config, config_len);
723     v_config = g_malloc(config_len + config_size);
724     v_config->len = config_len;
725     v_config->off = 0;
726     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
727     memcpy(config, v_config->buf, config_len);
728     g_free(v_config);
729     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
730         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
731         vhost_vdpa_dump_config(dev, config, config_len);
732     }
733     return ret;
734  }
735 
736 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
737                                          struct vhost_vring_file *file)
738 {
739     trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
740     return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
741 }
742 
743 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
744                                          struct vhost_vring_file *file)
745 {
746     trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
747     return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
748 }
749 
750 /**
751  * Set the shadow virtqueue descriptors to the device
752  *
753  * @dev: The vhost device model
754  * @svq: The shadow virtqueue
755  * @idx: The index of the virtqueue in the vhost device
756  * @errp: Error
757  *
758  * Note that this function does not rewind kick file descriptor if cannot set
759  * call one.
760  */
761 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
762                                  VhostShadowVirtqueue *svq, unsigned idx,
763                                  Error **errp)
764 {
765     struct vhost_vring_file file = {
766         .index = dev->vq_index + idx,
767     };
768     const EventNotifier *event_notifier = &svq->hdev_kick;
769     int r;
770 
771     file.fd = event_notifier_get_fd(event_notifier);
772     r = vhost_vdpa_set_vring_dev_kick(dev, &file);
773     if (unlikely(r != 0)) {
774         error_setg_errno(errp, -r, "Can't set device kick fd");
775         return false;
776     }
777 
778     event_notifier = &svq->hdev_call;
779     file.fd = event_notifier_get_fd(event_notifier);
780     r = vhost_vdpa_set_vring_dev_call(dev, &file);
781     if (unlikely(r != 0)) {
782         error_setg_errno(errp, -r, "Can't set device call fd");
783     }
784 
785     return r == 0;
786 }
787 
788 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
789 {
790     struct vhost_vdpa *v = dev->opaque;
791     Error *err = NULL;
792     unsigned i;
793 
794     if (!v->shadow_vqs) {
795         return true;
796     }
797 
798     for (i = 0; i < v->shadow_vqs->len; ++i) {
799         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
800         bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
801         if (unlikely(!ok)) {
802             error_reportf_err(err, "Cannot setup SVQ %u: ", i);
803             return false;
804         }
805     }
806 
807     return true;
808 }
809 
810 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
811 {
812     struct vhost_vdpa *v = dev->opaque;
813     bool ok;
814     trace_vhost_vdpa_dev_start(dev, started);
815 
816     if (started) {
817         vhost_vdpa_host_notifiers_init(dev);
818         ok = vhost_vdpa_svqs_start(dev);
819         if (unlikely(!ok)) {
820             return -1;
821         }
822         vhost_vdpa_set_vring_ready(dev);
823     } else {
824         vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
825     }
826 
827     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
828         return 0;
829     }
830 
831     if (started) {
832         memory_listener_register(&v->listener, &address_space_memory);
833         return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
834     } else {
835         vhost_vdpa_reset_device(dev);
836         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
837                                    VIRTIO_CONFIG_S_DRIVER);
838         memory_listener_unregister(&v->listener);
839 
840         return 0;
841     }
842 }
843 
844 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
845                                      struct vhost_log *log)
846 {
847     if (vhost_vdpa_one_time_request(dev)) {
848         return 0;
849     }
850 
851     trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd,
852                                   log->log);
853     return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
854 }
855 
856 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
857                                        struct vhost_vring_addr *addr)
858 {
859     trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
860                                     addr->desc_user_addr, addr->used_user_addr,
861                                     addr->avail_user_addr,
862                                     addr->log_guest_addr);
863     return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
864 }
865 
866 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
867                                       struct vhost_vring_state *ring)
868 {
869     trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num);
870     return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
871 }
872 
873 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
874                                        struct vhost_vring_state *ring)
875 {
876     trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
877     return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
878 }
879 
880 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
881                                        struct vhost_vring_state *ring)
882 {
883     int ret;
884 
885     ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
886     trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
887     return ret;
888 }
889 
890 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
891                                        struct vhost_vring_file *file)
892 {
893     struct vhost_vdpa *v = dev->opaque;
894     int vdpa_idx = file->index - dev->vq_index;
895 
896     if (v->shadow_vqs_enabled) {
897         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
898         vhost_svq_set_svq_kick_fd(svq, file->fd);
899         return 0;
900     } else {
901         return vhost_vdpa_set_vring_dev_kick(dev, file);
902     }
903 }
904 
905 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
906                                        struct vhost_vring_file *file)
907 {
908     struct vhost_vdpa *v = dev->opaque;
909 
910     if (v->shadow_vqs_enabled) {
911         int vdpa_idx = file->index - dev->vq_index;
912         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
913 
914         vhost_svq_set_svq_call_fd(svq, file->fd);
915         return 0;
916     } else {
917         return vhost_vdpa_set_vring_dev_call(dev, file);
918     }
919 }
920 
921 static int vhost_vdpa_get_features(struct vhost_dev *dev,
922                                      uint64_t *features)
923 {
924     int ret;
925 
926     ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
927     trace_vhost_vdpa_get_features(dev, *features);
928     return ret;
929 }
930 
931 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
932 {
933     if (vhost_vdpa_one_time_request(dev)) {
934         return 0;
935     }
936 
937     trace_vhost_vdpa_set_owner(dev);
938     return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
939 }
940 
941 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
942                     struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
943 {
944     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
945     addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
946     addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
947     addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
948     trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr,
949                                  addr->avail_user_addr, addr->used_user_addr);
950     return 0;
951 }
952 
953 static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
954 {
955     return true;
956 }
957 
958 const VhostOps vdpa_ops = {
959         .backend_type = VHOST_BACKEND_TYPE_VDPA,
960         .vhost_backend_init = vhost_vdpa_init,
961         .vhost_backend_cleanup = vhost_vdpa_cleanup,
962         .vhost_set_log_base = vhost_vdpa_set_log_base,
963         .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
964         .vhost_set_vring_num = vhost_vdpa_set_vring_num,
965         .vhost_set_vring_base = vhost_vdpa_set_vring_base,
966         .vhost_get_vring_base = vhost_vdpa_get_vring_base,
967         .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
968         .vhost_set_vring_call = vhost_vdpa_set_vring_call,
969         .vhost_get_features = vhost_vdpa_get_features,
970         .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
971         .vhost_set_owner = vhost_vdpa_set_owner,
972         .vhost_set_vring_endian = NULL,
973         .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
974         .vhost_set_mem_table = vhost_vdpa_set_mem_table,
975         .vhost_set_features = vhost_vdpa_set_features,
976         .vhost_reset_device = vhost_vdpa_reset_device,
977         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
978         .vhost_get_config  = vhost_vdpa_get_config,
979         .vhost_set_config = vhost_vdpa_set_config,
980         .vhost_requires_shm_log = NULL,
981         .vhost_migration_done = NULL,
982         .vhost_backend_can_merge = NULL,
983         .vhost_net_set_mtu = NULL,
984         .vhost_set_iotlb_callback = NULL,
985         .vhost_send_device_iotlb_msg = NULL,
986         .vhost_dev_start = vhost_vdpa_dev_start,
987         .vhost_get_device_id = vhost_vdpa_get_device_id,
988         .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
989         .vhost_force_iommu = vhost_vdpa_force_iommu,
990 };
991