xref: /qemu/hw/virtio/vhost-vdpa.c (revision 9e32d4ead3531f70d26e374219884d42e5d8b170)
1 /*
2  * vhost-vdpa
3  *
4  *  Copyright(c) 2017-2018 Intel Corporation.
5  *  Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-vdpa.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 
24 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
25 {
26     return (!memory_region_is_ram(section->mr) &&
27             !memory_region_is_iommu(section->mr)) ||
28            /*
29             * Sizing an enabled 64-bit BAR can cause spurious mappings to
30             * addresses in the upper part of the 64-bit address space.  These
31             * are never accessed by the CPU and beyond the address width of
32             * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU width.
33             */
34            section->offset_within_address_space & (1ULL << 63);
35 }
36 
37 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
38                               void *vaddr, bool readonly)
39 {
40     struct vhost_msg_v2 msg = {};
41     int fd = v->device_fd;
42     int ret = 0;
43 
44     msg.type = v->msg_type;
45     msg.iotlb.iova = iova;
46     msg.iotlb.size = size;
47     msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
48     msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
49     msg.iotlb.type = VHOST_IOTLB_UPDATE;
50 
51     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
52         error_report("failed to write, fd=%d, errno=%d (%s)",
53             fd, errno, strerror(errno));
54         return -EIO ;
55     }
56 
57     return ret;
58 }
59 
60 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
61                                 hwaddr size)
62 {
63     struct vhost_msg_v2 msg = {};
64     int fd = v->device_fd;
65     int ret = 0;
66 
67     msg.type = v->msg_type;
68     msg.iotlb.iova = iova;
69     msg.iotlb.size = size;
70     msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
71 
72     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
73         error_report("failed to write, fd=%d, errno=%d (%s)",
74             fd, errno, strerror(errno));
75         return -EIO ;
76     }
77 
78     return ret;
79 }
80 
81 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
82                                            MemoryRegionSection *section)
83 {
84     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
85     hwaddr iova;
86     Int128 llend, llsize;
87     void *vaddr;
88     int ret;
89 
90     if (vhost_vdpa_listener_skipped_section(section)) {
91         return;
92     }
93 
94     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
95                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
96         error_report("%s received unaligned region", __func__);
97         return;
98     }
99 
100     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
101     llend = int128_make64(section->offset_within_address_space);
102     llend = int128_add(llend, section->size);
103     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
104 
105     if (int128_ge(int128_make64(iova), llend)) {
106         return;
107     }
108 
109     memory_region_ref(section->mr);
110 
111     /* Here we assume that memory_region_is_ram(section->mr)==true */
112 
113     vaddr = memory_region_get_ram_ptr(section->mr) +
114             section->offset_within_region +
115             (iova - section->offset_within_address_space);
116 
117     llsize = int128_sub(llend, int128_make64(iova));
118 
119     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
120                              vaddr, section->readonly);
121     if (ret) {
122         error_report("vhost vdpa map fail!");
123         if (memory_region_is_ram_device(section->mr)) {
124             /* Allow unexpected mappings not to be fatal for RAM devices */
125             error_report("map ram fail!");
126           return ;
127         }
128         goto fail;
129     }
130 
131     return;
132 
133 fail:
134     if (memory_region_is_ram_device(section->mr)) {
135         error_report("failed to vdpa_dma_map. pci p2p may not work");
136         return;
137 
138     }
139     /*
140      * On the initfn path, store the first error in the container so we
141      * can gracefully fail.  Runtime, there's not much we can do other
142      * than throw a hardware error.
143      */
144     error_report("vhost-vdpa: DMA mapping failed, unable to continue");
145     return;
146 
147 }
148 
149 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
150                                            MemoryRegionSection *section)
151 {
152     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
153     hwaddr iova;
154     Int128 llend, llsize;
155     int ret;
156 
157     if (vhost_vdpa_listener_skipped_section(section)) {
158         return;
159     }
160 
161     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
162                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
163         error_report("%s received unaligned region", __func__);
164         return;
165     }
166 
167     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
168     llend = int128_make64(section->offset_within_address_space);
169     llend = int128_add(llend, section->size);
170     llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
171 
172     if (int128_ge(int128_make64(iova), llend)) {
173         return;
174     }
175 
176     llsize = int128_sub(llend, int128_make64(iova));
177 
178     ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
179     if (ret) {
180         error_report("vhost_vdpa dma unmap error!");
181     }
182 
183     memory_region_unref(section->mr);
184 }
185 /*
186  * IOTLB API is used by vhost-vpda which requires incremental updating
187  * of the mapping. So we can not use generic vhost memory listener which
188  * depends on the addnop().
189  */
190 static const MemoryListener vhost_vdpa_memory_listener = {
191     .region_add = vhost_vdpa_listener_region_add,
192     .region_del = vhost_vdpa_listener_region_del,
193 };
194 
195 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
196                              void *arg)
197 {
198     struct vhost_vdpa *v = dev->opaque;
199     int fd = v->device_fd;
200 
201     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
202 
203     return ioctl(fd, request, arg);
204 }
205 
206 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
207 {
208     uint8_t s;
209 
210     if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) {
211         return;
212     }
213 
214     s |= status;
215 
216     vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
217 }
218 
219 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
220 {
221     struct vhost_vdpa *v;
222     uint64_t features;
223     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
224 
225     v = opaque;
226     dev->opaque =  opaque ;
227     vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features);
228     dev->backend_features = features;
229     v->listener = vhost_vdpa_memory_listener;
230     v->msg_type = VHOST_IOTLB_MSG_V2;
231 
232     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
233                                VIRTIO_CONFIG_S_DRIVER);
234 
235     return 0;
236 }
237 
238 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
239 {
240     struct vhost_vdpa *v;
241     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
242     v = dev->opaque;
243     memory_listener_unregister(&v->listener);
244 
245     dev->opaque = NULL;
246     return 0;
247 }
248 
249 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
250 {
251     return INT_MAX;
252 }
253 
254 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
255                                     struct vhost_memory *mem)
256 {
257 
258     if (mem->padding) {
259         return -1;
260     }
261 
262     return 0;
263 }
264 
265 static int vhost_vdpa_set_features(struct vhost_dev *dev,
266                                    uint64_t features)
267 {
268     int ret;
269     ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
270     uint8_t status = 0;
271     if (ret) {
272         return ret;
273     }
274     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
275     vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
276 
277     return !(status & VIRTIO_CONFIG_S_FEATURES_OK);
278 }
279 
280 int vhost_vdpa_get_device_id(struct vhost_dev *dev,
281                                    uint32_t *device_id)
282 {
283     return vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
284 }
285 
286 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
287 {
288     uint8_t status = 0;
289 
290     return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
291 }
292 
293 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
294 {
295     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
296 
297     return idx - dev->vq_index;
298 }
299 
300 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
301 {
302     int i;
303     for (i = 0; i < dev->nvqs; ++i) {
304         struct vhost_vring_state state = {
305             .index = dev->vq_index + i,
306             .num = 1,
307         };
308         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
309     }
310     return 0;
311 }
312 
313 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
314                                    uint32_t offset, uint32_t size,
315                                    uint32_t flags)
316 {
317     struct vhost_vdpa_config *config;
318     int ret;
319     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
320 
321     config = g_malloc(size + config_size);
322     config->off = offset;
323     config->len = size;
324     memcpy(config->buf, data, size);
325     ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
326     g_free(config);
327     return ret;
328 }
329 
330 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
331                                    uint32_t config_len)
332 {
333     struct vhost_vdpa_config *v_config;
334     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
335     int ret;
336 
337     v_config = g_malloc(config_len + config_size);
338     v_config->len = config_len;
339     v_config->off = 0;
340     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
341     memcpy(config, v_config->buf, config_len);
342     g_free(v_config);
343     return ret;
344  }
345 
346 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
347 {
348     struct vhost_vdpa *v = dev->opaque;
349     if (started) {
350         uint8_t status = 0;
351         memory_listener_register(&v->listener, &address_space_memory);
352         vhost_vdpa_set_vring_ready(dev);
353         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
354         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
355 
356         return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
357     } else {
358         vhost_vdpa_reset_device(dev);
359         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
360                                    VIRTIO_CONFIG_S_DRIVER);
361         memory_listener_unregister(&v->listener);
362 
363         return 0;
364     }
365 }
366 
367 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
368                                      struct vhost_log *log)
369 {
370     return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
371 }
372 
373 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
374                                        struct vhost_vring_addr *addr)
375 {
376     return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
377 }
378 
379 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
380                                       struct vhost_vring_state *ring)
381 {
382     return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
383 }
384 
385 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
386                                        struct vhost_vring_state *ring)
387 {
388     return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
389 }
390 
391 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
392                                        struct vhost_vring_state *ring)
393 {
394     return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
395 }
396 
397 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
398                                        struct vhost_vring_file *file)
399 {
400     return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
401 }
402 
403 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
404                                        struct vhost_vring_file *file)
405 {
406     return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
407 }
408 
409 static int vhost_vdpa_get_features(struct vhost_dev *dev,
410                                      uint64_t *features)
411 {
412     return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
413 }
414 
415 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
416 {
417     return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
418 }
419 
420 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
421                     struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
422 {
423     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
424     addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
425     addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
426     addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
427     return 0;
428 }
429 
430 static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
431 {
432     return true;
433 }
434 
435 const VhostOps vdpa_ops = {
436         .backend_type = VHOST_BACKEND_TYPE_VDPA,
437         .vhost_backend_init = vhost_vdpa_init,
438         .vhost_backend_cleanup = vhost_vdpa_cleanup,
439         .vhost_set_log_base = vhost_vdpa_set_log_base,
440         .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
441         .vhost_set_vring_num = vhost_vdpa_set_vring_num,
442         .vhost_set_vring_base = vhost_vdpa_set_vring_base,
443         .vhost_get_vring_base = vhost_vdpa_get_vring_base,
444         .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
445         .vhost_set_vring_call = vhost_vdpa_set_vring_call,
446         .vhost_get_features = vhost_vdpa_get_features,
447         .vhost_set_owner = vhost_vdpa_set_owner,
448         .vhost_set_vring_endian = NULL,
449         .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
450         .vhost_set_mem_table = vhost_vdpa_set_mem_table,
451         .vhost_set_features = vhost_vdpa_set_features,
452         .vhost_reset_device = vhost_vdpa_reset_device,
453         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
454         .vhost_get_config  = vhost_vdpa_get_config,
455         .vhost_set_config = vhost_vdpa_set_config,
456         .vhost_requires_shm_log = NULL,
457         .vhost_migration_done = NULL,
458         .vhost_backend_can_merge = NULL,
459         .vhost_net_set_mtu = NULL,
460         .vhost_set_iotlb_callback = NULL,
461         .vhost_send_device_iotlb_msg = NULL,
462         .vhost_dev_start = vhost_vdpa_dev_start,
463         .vhost_get_device_id = vhost_vdpa_get_device_id,
464         .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
465         .vhost_force_iommu = vhost_vdpa_force_iommu,
466 };
467