xref: /qemu/hw/virtio/vhost-vdpa.c (revision dff4426fa6562b6837ddc662a61836ae11ac4fae)
1 /*
2  * vhost-vdpa
3  *
4  *  Copyright(c) 2017-2018 Intel Corporation.
5  *  Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-shadow-virtqueue.h"
21 #include "hw/virtio/vhost-vdpa.h"
22 #include "exec/address-spaces.h"
23 #include "qemu/main-loop.h"
24 #include "cpu.h"
25 #include "trace.h"
26 #include "qemu-common.h"
27 #include "qapi/error.h"
28 
29 /*
30  * Return one past the end of the end of section. Be careful with uint64_t
31  * conversions!
32  */
33 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section)
34 {
35     Int128 llend = int128_make64(section->offset_within_address_space);
36     llend = int128_add(llend, section->size);
37     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
38 
39     return llend;
40 }
41 
42 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
43                                                 uint64_t iova_min,
44                                                 uint64_t iova_max)
45 {
46     Int128 llend;
47 
48     if ((!memory_region_is_ram(section->mr) &&
49          !memory_region_is_iommu(section->mr)) ||
50         memory_region_is_protected(section->mr) ||
51         /* vhost-vDPA doesn't allow MMIO to be mapped  */
52         memory_region_is_ram_device(section->mr)) {
53         return true;
54     }
55 
56     if (section->offset_within_address_space < iova_min) {
57         error_report("RAM section out of device range (min=0x%" PRIx64
58                      ", addr=0x%" HWADDR_PRIx ")",
59                      iova_min, section->offset_within_address_space);
60         return true;
61     }
62 
63     llend = vhost_vdpa_section_end(section);
64     if (int128_gt(llend, int128_make64(iova_max))) {
65         error_report("RAM section out of device range (max=0x%" PRIx64
66                      ", end addr=0x%" PRIx64 ")",
67                      iova_max, int128_get64(llend));
68         return true;
69     }
70 
71     return false;
72 }
73 
74 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
75                               void *vaddr, bool readonly)
76 {
77     struct vhost_msg_v2 msg = {};
78     int fd = v->device_fd;
79     int ret = 0;
80 
81     msg.type = v->msg_type;
82     msg.iotlb.iova = iova;
83     msg.iotlb.size = size;
84     msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
85     msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
86     msg.iotlb.type = VHOST_IOTLB_UPDATE;
87 
88    trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size,
89                             msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type);
90 
91     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
92         error_report("failed to write, fd=%d, errno=%d (%s)",
93             fd, errno, strerror(errno));
94         return -EIO ;
95     }
96 
97     return ret;
98 }
99 
100 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
101                                 hwaddr size)
102 {
103     struct vhost_msg_v2 msg = {};
104     int fd = v->device_fd;
105     int ret = 0;
106 
107     msg.type = v->msg_type;
108     msg.iotlb.iova = iova;
109     msg.iotlb.size = size;
110     msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
111 
112     trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova,
113                                msg.iotlb.size, msg.iotlb.type);
114 
115     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
116         error_report("failed to write, fd=%d, errno=%d (%s)",
117             fd, errno, strerror(errno));
118         return -EIO ;
119     }
120 
121     return ret;
122 }
123 
124 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
125 {
126     int fd = v->device_fd;
127     struct vhost_msg_v2 msg = {
128         .type = v->msg_type,
129         .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
130     };
131 
132     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
133         error_report("failed to write, fd=%d, errno=%d (%s)",
134                      fd, errno, strerror(errno));
135     }
136 }
137 
138 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v)
139 {
140     if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) &&
141         !v->iotlb_batch_begin_sent) {
142         vhost_vdpa_listener_begin_batch(v);
143     }
144 
145     v->iotlb_batch_begin_sent = true;
146 }
147 
148 static void vhost_vdpa_listener_commit(MemoryListener *listener)
149 {
150     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
151     struct vhost_dev *dev = v->dev;
152     struct vhost_msg_v2 msg = {};
153     int fd = v->device_fd;
154 
155     if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
156         return;
157     }
158 
159     if (!v->iotlb_batch_begin_sent) {
160         return;
161     }
162 
163     msg.type = v->msg_type;
164     msg.iotlb.type = VHOST_IOTLB_BATCH_END;
165 
166     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
167         error_report("failed to write, fd=%d, errno=%d (%s)",
168                      fd, errno, strerror(errno));
169     }
170 
171     v->iotlb_batch_begin_sent = false;
172 }
173 
174 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
175                                            MemoryRegionSection *section)
176 {
177     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
178     hwaddr iova;
179     Int128 llend, llsize;
180     void *vaddr;
181     int ret;
182 
183     if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
184                                             v->iova_range.last)) {
185         return;
186     }
187 
188     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
189                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
190         error_report("%s received unaligned region", __func__);
191         return;
192     }
193 
194     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
195     llend = vhost_vdpa_section_end(section);
196     if (int128_ge(int128_make64(iova), llend)) {
197         return;
198     }
199 
200     memory_region_ref(section->mr);
201 
202     /* Here we assume that memory_region_is_ram(section->mr)==true */
203 
204     vaddr = memory_region_get_ram_ptr(section->mr) +
205             section->offset_within_region +
206             (iova - section->offset_within_address_space);
207 
208     trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend),
209                                          vaddr, section->readonly);
210 
211     llsize = int128_sub(llend, int128_make64(iova));
212 
213     vhost_vdpa_iotlb_batch_begin_once(v);
214     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
215                              vaddr, section->readonly);
216     if (ret) {
217         error_report("vhost vdpa map fail!");
218         goto fail;
219     }
220 
221     return;
222 
223 fail:
224     /*
225      * On the initfn path, store the first error in the container so we
226      * can gracefully fail.  Runtime, there's not much we can do other
227      * than throw a hardware error.
228      */
229     error_report("vhost-vdpa: DMA mapping failed, unable to continue");
230     return;
231 
232 }
233 
234 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
235                                            MemoryRegionSection *section)
236 {
237     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
238     hwaddr iova;
239     Int128 llend, llsize;
240     int ret;
241 
242     if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
243                                             v->iova_range.last)) {
244         return;
245     }
246 
247     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
248                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
249         error_report("%s received unaligned region", __func__);
250         return;
251     }
252 
253     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
254     llend = vhost_vdpa_section_end(section);
255 
256     trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
257 
258     if (int128_ge(int128_make64(iova), llend)) {
259         return;
260     }
261 
262     llsize = int128_sub(llend, int128_make64(iova));
263 
264     vhost_vdpa_iotlb_batch_begin_once(v);
265     ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
266     if (ret) {
267         error_report("vhost_vdpa dma unmap error!");
268     }
269 
270     memory_region_unref(section->mr);
271 }
272 /*
273  * IOTLB API is used by vhost-vpda which requires incremental updating
274  * of the mapping. So we can not use generic vhost memory listener which
275  * depends on the addnop().
276  */
277 static const MemoryListener vhost_vdpa_memory_listener = {
278     .name = "vhost-vdpa",
279     .commit = vhost_vdpa_listener_commit,
280     .region_add = vhost_vdpa_listener_region_add,
281     .region_del = vhost_vdpa_listener_region_del,
282 };
283 
284 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
285                              void *arg)
286 {
287     struct vhost_vdpa *v = dev->opaque;
288     int fd = v->device_fd;
289     int ret;
290 
291     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
292 
293     ret = ioctl(fd, request, arg);
294     return ret < 0 ? -errno : ret;
295 }
296 
297 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
298 {
299     uint8_t s;
300     int ret;
301 
302     trace_vhost_vdpa_add_status(dev, status);
303     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
304     if (ret < 0) {
305         return ret;
306     }
307 
308     s |= status;
309 
310     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
311     if (ret < 0) {
312         return ret;
313     }
314 
315     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
316     if (ret < 0) {
317         return ret;
318     }
319 
320     if (!(s & status)) {
321         return -EIO;
322     }
323 
324     return 0;
325 }
326 
327 static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
328 {
329     int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE,
330                               &v->iova_range);
331     if (ret != 0) {
332         v->iova_range.first = 0;
333         v->iova_range.last = UINT64_MAX;
334     }
335 
336     trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first,
337                                     v->iova_range.last);
338 }
339 
340 static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
341 {
342     struct vhost_vdpa *v = dev->opaque;
343 
344     return v->index != 0;
345 }
346 
347 static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
348                                Error **errp)
349 {
350     g_autoptr(GPtrArray) shadow_vqs = NULL;
351 
352     if (!v->shadow_vqs_enabled) {
353         return 0;
354     }
355 
356     shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
357     for (unsigned n = 0; n < hdev->nvqs; ++n) {
358         g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
359 
360         if (unlikely(!svq)) {
361             error_setg(errp, "Cannot create svq %u", n);
362             return -1;
363         }
364         g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
365     }
366 
367     v->shadow_vqs = g_steal_pointer(&shadow_vqs);
368     return 0;
369 }
370 
371 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
372 {
373     struct vhost_vdpa *v;
374     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
375     trace_vhost_vdpa_init(dev, opaque);
376     int ret;
377 
378     /*
379      * Similar to VFIO, we end up pinning all guest memory and have to
380      * disable discarding of RAM.
381      */
382     ret = ram_block_discard_disable(true);
383     if (ret) {
384         error_report("Cannot set discarding of RAM broken");
385         return ret;
386     }
387 
388     v = opaque;
389     v->dev = dev;
390     dev->opaque =  opaque ;
391     v->listener = vhost_vdpa_memory_listener;
392     v->msg_type = VHOST_IOTLB_MSG_V2;
393     ret = vhost_vdpa_init_svq(dev, v, errp);
394     if (ret) {
395         goto err;
396     }
397 
398     vhost_vdpa_get_iova_range(v);
399 
400     if (vhost_vdpa_one_time_request(dev)) {
401         return 0;
402     }
403 
404     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
405                                VIRTIO_CONFIG_S_DRIVER);
406 
407     return 0;
408 
409 err:
410     ram_block_discard_disable(false);
411     return ret;
412 }
413 
414 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
415                                             int queue_index)
416 {
417     size_t page_size = qemu_real_host_page_size;
418     struct vhost_vdpa *v = dev->opaque;
419     VirtIODevice *vdev = dev->vdev;
420     VhostVDPAHostNotifier *n;
421 
422     n = &v->notifier[queue_index];
423 
424     if (n->addr) {
425         virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false);
426         object_unparent(OBJECT(&n->mr));
427         munmap(n->addr, page_size);
428         n->addr = NULL;
429     }
430 }
431 
432 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
433 {
434     size_t page_size = qemu_real_host_page_size;
435     struct vhost_vdpa *v = dev->opaque;
436     VirtIODevice *vdev = dev->vdev;
437     VhostVDPAHostNotifier *n;
438     int fd = v->device_fd;
439     void *addr;
440     char *name;
441 
442     vhost_vdpa_host_notifier_uninit(dev, queue_index);
443 
444     n = &v->notifier[queue_index];
445 
446     addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
447                 queue_index * page_size);
448     if (addr == MAP_FAILED) {
449         goto err;
450     }
451 
452     name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
453                            v, queue_index);
454     memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
455                                       page_size, addr);
456     g_free(name);
457 
458     if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) {
459         object_unparent(OBJECT(&n->mr));
460         munmap(addr, page_size);
461         goto err;
462     }
463     n->addr = addr;
464 
465     return 0;
466 
467 err:
468     return -1;
469 }
470 
471 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
472 {
473     int i;
474 
475     for (i = dev->vq_index; i < dev->vq_index + n; i++) {
476         vhost_vdpa_host_notifier_uninit(dev, i);
477     }
478 }
479 
480 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
481 {
482     struct vhost_vdpa *v = dev->opaque;
483     int i;
484 
485     if (v->shadow_vqs_enabled) {
486         /* FIXME SVQ is not compatible with host notifiers mr */
487         return;
488     }
489 
490     for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
491         if (vhost_vdpa_host_notifier_init(dev, i)) {
492             goto err;
493         }
494     }
495 
496     return;
497 
498 err:
499     vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index);
500     return;
501 }
502 
503 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
504 {
505     struct vhost_vdpa *v = dev->opaque;
506     size_t idx;
507 
508     if (!v->shadow_vqs) {
509         return;
510     }
511 
512     for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
513         vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
514     }
515     g_ptr_array_free(v->shadow_vqs, true);
516 }
517 
518 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
519 {
520     struct vhost_vdpa *v;
521     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
522     v = dev->opaque;
523     trace_vhost_vdpa_cleanup(dev, v);
524     vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
525     memory_listener_unregister(&v->listener);
526     vhost_vdpa_svq_cleanup(dev);
527 
528     dev->opaque = NULL;
529     ram_block_discard_disable(false);
530 
531     return 0;
532 }
533 
534 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
535 {
536     trace_vhost_vdpa_memslots_limit(dev, INT_MAX);
537     return INT_MAX;
538 }
539 
540 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
541                                     struct vhost_memory *mem)
542 {
543     if (vhost_vdpa_one_time_request(dev)) {
544         return 0;
545     }
546 
547     trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding);
548     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) &&
549         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) {
550         int i;
551         for (i = 0; i < mem->nregions; i++) {
552             trace_vhost_vdpa_dump_regions(dev, i,
553                                           mem->regions[i].guest_phys_addr,
554                                           mem->regions[i].memory_size,
555                                           mem->regions[i].userspace_addr,
556                                           mem->regions[i].flags_padding);
557         }
558     }
559     if (mem->padding) {
560         return -EINVAL;
561     }
562 
563     return 0;
564 }
565 
566 static int vhost_vdpa_set_features(struct vhost_dev *dev,
567                                    uint64_t features)
568 {
569     int ret;
570 
571     if (vhost_vdpa_one_time_request(dev)) {
572         return 0;
573     }
574 
575     trace_vhost_vdpa_set_features(dev, features);
576     ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
577     if (ret) {
578         return ret;
579     }
580 
581     return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
582 }
583 
584 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
585 {
586     uint64_t features;
587     uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
588         0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
589     int r;
590 
591     if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
592         return -EFAULT;
593     }
594 
595     features &= f;
596 
597     if (vhost_vdpa_one_time_request(dev)) {
598         r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
599         if (r) {
600             return -EFAULT;
601         }
602     }
603 
604     dev->backend_cap = features;
605 
606     return 0;
607 }
608 
609 static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
610                                     uint32_t *device_id)
611 {
612     int ret;
613     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
614     trace_vhost_vdpa_get_device_id(dev, *device_id);
615     return ret;
616 }
617 
618 static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
619 {
620     if (!v->shadow_vqs_enabled) {
621         return;
622     }
623 
624     for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
625         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
626         vhost_svq_stop(svq);
627     }
628 }
629 
630 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
631 {
632     struct vhost_vdpa *v = dev->opaque;
633     int ret;
634     uint8_t status = 0;
635 
636     vhost_vdpa_reset_svq(v);
637 
638     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
639     trace_vhost_vdpa_reset_device(dev, status);
640     return ret;
641 }
642 
643 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
644 {
645     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
646 
647     trace_vhost_vdpa_get_vq_index(dev, idx, idx);
648     return idx;
649 }
650 
651 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
652 {
653     int i;
654     trace_vhost_vdpa_set_vring_ready(dev);
655     for (i = 0; i < dev->nvqs; ++i) {
656         struct vhost_vring_state state = {
657             .index = dev->vq_index + i,
658             .num = 1,
659         };
660         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
661     }
662     return 0;
663 }
664 
665 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
666                                    uint32_t config_len)
667 {
668     int b, len;
669     char line[QEMU_HEXDUMP_LINE_LEN];
670 
671     for (b = 0; b < config_len; b += 16) {
672         len = config_len - b;
673         qemu_hexdump_line(line, b, config, len, false);
674         trace_vhost_vdpa_dump_config(dev, line);
675     }
676 }
677 
678 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
679                                    uint32_t offset, uint32_t size,
680                                    uint32_t flags)
681 {
682     struct vhost_vdpa_config *config;
683     int ret;
684     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
685 
686     trace_vhost_vdpa_set_config(dev, offset, size, flags);
687     config = g_malloc(size + config_size);
688     config->off = offset;
689     config->len = size;
690     memcpy(config->buf, data, size);
691     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) &&
692         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
693         vhost_vdpa_dump_config(dev, data, size);
694     }
695     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
696     g_free(config);
697     return ret;
698 }
699 
700 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
701                                    uint32_t config_len, Error **errp)
702 {
703     struct vhost_vdpa_config *v_config;
704     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
705     int ret;
706 
707     trace_vhost_vdpa_get_config(dev, config, config_len);
708     v_config = g_malloc(config_len + config_size);
709     v_config->len = config_len;
710     v_config->off = 0;
711     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
712     memcpy(config, v_config->buf, config_len);
713     g_free(v_config);
714     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
715         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
716         vhost_vdpa_dump_config(dev, config, config_len);
717     }
718     return ret;
719  }
720 
721 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
722                                          struct vhost_vring_file *file)
723 {
724     trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
725     return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
726 }
727 
728 /**
729  * Set the shadow virtqueue descriptors to the device
730  *
731  * @dev: The vhost device model
732  * @svq: The shadow virtqueue
733  * @idx: The index of the virtqueue in the vhost device
734  * @errp: Error
735  */
736 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
737                                  VhostShadowVirtqueue *svq, unsigned idx,
738                                  Error **errp)
739 {
740     struct vhost_vring_file file = {
741         .index = dev->vq_index + idx,
742     };
743     const EventNotifier *event_notifier = &svq->hdev_kick;
744     int r;
745 
746     file.fd = event_notifier_get_fd(event_notifier);
747     r = vhost_vdpa_set_vring_dev_kick(dev, &file);
748     if (unlikely(r != 0)) {
749         error_setg_errno(errp, -r, "Can't set device kick fd");
750     }
751 
752     return r == 0;
753 }
754 
755 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
756 {
757     struct vhost_vdpa *v = dev->opaque;
758     Error *err = NULL;
759     unsigned i;
760 
761     if (!v->shadow_vqs) {
762         return true;
763     }
764 
765     for (i = 0; i < v->shadow_vqs->len; ++i) {
766         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
767         bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
768         if (unlikely(!ok)) {
769             error_reportf_err(err, "Cannot setup SVQ %u: ", i);
770             return false;
771         }
772     }
773 
774     return true;
775 }
776 
777 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
778 {
779     struct vhost_vdpa *v = dev->opaque;
780     bool ok;
781     trace_vhost_vdpa_dev_start(dev, started);
782 
783     if (started) {
784         vhost_vdpa_host_notifiers_init(dev);
785         ok = vhost_vdpa_svqs_start(dev);
786         if (unlikely(!ok)) {
787             return -1;
788         }
789         vhost_vdpa_set_vring_ready(dev);
790     } else {
791         vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
792     }
793 
794     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
795         return 0;
796     }
797 
798     if (started) {
799         memory_listener_register(&v->listener, &address_space_memory);
800         return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
801     } else {
802         vhost_vdpa_reset_device(dev);
803         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
804                                    VIRTIO_CONFIG_S_DRIVER);
805         memory_listener_unregister(&v->listener);
806 
807         return 0;
808     }
809 }
810 
811 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
812                                      struct vhost_log *log)
813 {
814     if (vhost_vdpa_one_time_request(dev)) {
815         return 0;
816     }
817 
818     trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd,
819                                   log->log);
820     return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
821 }
822 
823 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
824                                        struct vhost_vring_addr *addr)
825 {
826     trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
827                                     addr->desc_user_addr, addr->used_user_addr,
828                                     addr->avail_user_addr,
829                                     addr->log_guest_addr);
830     return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
831 }
832 
833 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
834                                       struct vhost_vring_state *ring)
835 {
836     trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num);
837     return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
838 }
839 
840 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
841                                        struct vhost_vring_state *ring)
842 {
843     trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
844     return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
845 }
846 
847 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
848                                        struct vhost_vring_state *ring)
849 {
850     int ret;
851 
852     ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
853     trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
854     return ret;
855 }
856 
857 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
858                                        struct vhost_vring_file *file)
859 {
860     struct vhost_vdpa *v = dev->opaque;
861     int vdpa_idx = file->index - dev->vq_index;
862 
863     if (v->shadow_vqs_enabled) {
864         VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
865         vhost_svq_set_svq_kick_fd(svq, file->fd);
866         return 0;
867     } else {
868         return vhost_vdpa_set_vring_dev_kick(dev, file);
869     }
870 }
871 
872 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
873                                        struct vhost_vring_file *file)
874 {
875     trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
876     return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
877 }
878 
879 static int vhost_vdpa_get_features(struct vhost_dev *dev,
880                                      uint64_t *features)
881 {
882     int ret;
883 
884     ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
885     trace_vhost_vdpa_get_features(dev, *features);
886     return ret;
887 }
888 
889 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
890 {
891     if (vhost_vdpa_one_time_request(dev)) {
892         return 0;
893     }
894 
895     trace_vhost_vdpa_set_owner(dev);
896     return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
897 }
898 
899 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
900                     struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
901 {
902     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
903     addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
904     addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
905     addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
906     trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr,
907                                  addr->avail_user_addr, addr->used_user_addr);
908     return 0;
909 }
910 
911 static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
912 {
913     return true;
914 }
915 
916 const VhostOps vdpa_ops = {
917         .backend_type = VHOST_BACKEND_TYPE_VDPA,
918         .vhost_backend_init = vhost_vdpa_init,
919         .vhost_backend_cleanup = vhost_vdpa_cleanup,
920         .vhost_set_log_base = vhost_vdpa_set_log_base,
921         .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
922         .vhost_set_vring_num = vhost_vdpa_set_vring_num,
923         .vhost_set_vring_base = vhost_vdpa_set_vring_base,
924         .vhost_get_vring_base = vhost_vdpa_get_vring_base,
925         .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
926         .vhost_set_vring_call = vhost_vdpa_set_vring_call,
927         .vhost_get_features = vhost_vdpa_get_features,
928         .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
929         .vhost_set_owner = vhost_vdpa_set_owner,
930         .vhost_set_vring_endian = NULL,
931         .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
932         .vhost_set_mem_table = vhost_vdpa_set_mem_table,
933         .vhost_set_features = vhost_vdpa_set_features,
934         .vhost_reset_device = vhost_vdpa_reset_device,
935         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
936         .vhost_get_config  = vhost_vdpa_get_config,
937         .vhost_set_config = vhost_vdpa_set_config,
938         .vhost_requires_shm_log = NULL,
939         .vhost_migration_done = NULL,
940         .vhost_backend_can_merge = NULL,
941         .vhost_net_set_mtu = NULL,
942         .vhost_set_iotlb_callback = NULL,
943         .vhost_send_device_iotlb_msg = NULL,
944         .vhost_dev_start = vhost_vdpa_dev_start,
945         .vhost_get_device_id = vhost_vdpa_get_device_id,
946         .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
947         .vhost_force_iommu = vhost_vdpa_force_iommu,
948 };
949