xref: /qemu/hw/vfio-user/pci.c (revision 3358d926addda99e9f29f57b40d6fd22d2c29472)
1 /*
2  * vfio PCI device over a UNIX socket.
3  *
4  * Copyright © 2018, 2021 Oracle and/or its affiliates.
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include <sys/ioctl.h>
10 #include "qemu/osdep.h"
11 #include "qapi-visit-sockets.h"
12 #include "qemu/error-report.h"
13 
14 #include "hw/qdev-properties.h"
15 #include "hw/vfio/pci.h"
16 #include "hw/vfio-user/device.h"
17 #include "hw/vfio-user/proxy.h"
18 
19 #define TYPE_VFIO_USER_PCI "vfio-user-pci"
20 OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
21 
22 struct VFIOUserPCIDevice {
23     VFIOPCIDevice device;
24     SocketAddress *socket;
25     bool send_queued;   /* all sends are queued */
26     uint32_t wait_time; /* timeout for message replies */
27 };
28 
29 /*
30  * The server maintains the device's pending interrupts,
31  * via its MSIX table and PBA, so we treat these accesses
32  * like PCI config space and forward them.
33  */
34 static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr,
35                                    unsigned size)
36 {
37     VFIOPCIDevice *vdev = opaque;
38     VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region;
39     uint64_t data;
40 
41     /* server copy is what matters */
42     data = vfio_region_read(region, addr + vdev->msix->pba_offset, size);
43     return data;
44 }
45 
46 static void vfio_user_pba_write(void *opaque, hwaddr addr,
47                                   uint64_t data, unsigned size)
48 {
49     /* dropped */
50 }
51 
52 static const MemoryRegionOps vfio_user_pba_ops = {
53     .read = vfio_user_pba_read,
54     .write = vfio_user_pba_write,
55     .endianness = DEVICE_LITTLE_ENDIAN,
56 };
57 
58 static void vfio_user_msix_setup(VFIOPCIDevice *vdev)
59 {
60     MemoryRegion *vfio_reg, *msix_reg, *pba_reg;
61 
62     pba_reg = g_new0(MemoryRegion, 1);
63     vdev->msix->pba_region = pba_reg;
64 
65     vfio_reg = vdev->bars[vdev->msix->pba_bar].mr;
66     msix_reg = &vdev->pdev.msix_pba_mmio;
67     memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev,
68                           "VFIO MSIX PBA", int128_get64(msix_reg->size));
69     memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset,
70                                         pba_reg, 1);
71 }
72 
73 static void vfio_user_msix_teardown(VFIOPCIDevice *vdev)
74 {
75     MemoryRegion *mr, *sub;
76 
77     mr = vdev->bars[vdev->msix->pba_bar].mr;
78     sub = vdev->msix->pba_region;
79     memory_region_del_subregion(mr, sub);
80 
81     g_free(vdev->msix->pba_region);
82     vdev->msix->pba_region = NULL;
83 }
84 
85 static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
86 {
87     PCIDevice *pdev = &vdev->pdev;
88     VFIOUserProxy *proxy = vdev->vbasedev.proxy;
89     VFIOUserDMARW *res;
90     MemTxResult r;
91     size_t size;
92 
93     if (msg->hdr.size < sizeof(*msg)) {
94         vfio_user_send_error(proxy, &msg->hdr, EINVAL);
95         return;
96     }
97     if (msg->count > proxy->max_xfer_size) {
98         vfio_user_send_error(proxy, &msg->hdr, E2BIG);
99         return;
100     }
101 
102     /* switch to our own message buffer */
103     size = msg->count + sizeof(VFIOUserDMARW);
104     res = g_malloc0(size);
105     memcpy(res, msg, sizeof(*res));
106     g_free(msg);
107 
108     r = pci_dma_read(pdev, res->offset, &res->data, res->count);
109 
110     switch (r) {
111     case MEMTX_OK:
112         if (res->hdr.flags & VFIO_USER_NO_REPLY) {
113             g_free(res);
114             return;
115         }
116         vfio_user_send_reply(proxy, &res->hdr, size);
117         break;
118     case MEMTX_ERROR:
119         vfio_user_send_error(proxy, &res->hdr, EFAULT);
120         break;
121     case MEMTX_DECODE_ERROR:
122         vfio_user_send_error(proxy, &res->hdr, ENODEV);
123         break;
124     case MEMTX_ACCESS_ERROR:
125         vfio_user_send_error(proxy, &res->hdr, EPERM);
126         break;
127     default:
128         error_printf("vfio_user_dma_read unknown error %d\n", r);
129         vfio_user_send_error(vdev->vbasedev.proxy, &res->hdr, EINVAL);
130     }
131 }
132 
133 static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
134 {
135     PCIDevice *pdev = &vdev->pdev;
136     VFIOUserProxy *proxy = vdev->vbasedev.proxy;
137     MemTxResult r;
138 
139     if (msg->hdr.size < sizeof(*msg)) {
140         vfio_user_send_error(proxy, &msg->hdr, EINVAL);
141         return;
142     }
143     /* make sure transfer count isn't larger than the message data */
144     if (msg->count > msg->hdr.size - sizeof(*msg)) {
145         vfio_user_send_error(proxy, &msg->hdr, E2BIG);
146         return;
147     }
148 
149     r = pci_dma_write(pdev, msg->offset, &msg->data, msg->count);
150 
151     switch (r) {
152     case MEMTX_OK:
153         if ((msg->hdr.flags & VFIO_USER_NO_REPLY) == 0) {
154             vfio_user_send_reply(proxy, &msg->hdr, sizeof(msg->hdr));
155         } else {
156             g_free(msg);
157         }
158         break;
159     case MEMTX_ERROR:
160         vfio_user_send_error(proxy, &msg->hdr, EFAULT);
161         break;
162     case MEMTX_DECODE_ERROR:
163         vfio_user_send_error(proxy, &msg->hdr, ENODEV);
164         break;
165     case MEMTX_ACCESS_ERROR:
166         vfio_user_send_error(proxy, &msg->hdr, EPERM);
167         break;
168     default:
169         error_printf("vfio_user_dma_write unknown error %d\n", r);
170         vfio_user_send_error(vdev->vbasedev.proxy, &msg->hdr, EINVAL);
171     }
172 }
173 
174 /*
175  * Incoming request message callback.
176  *
177  * Runs off main loop, so BQL held.
178  */
179 static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg)
180 {
181     VFIOPCIDevice *vdev = opaque;
182     VFIOUserHdr *hdr = msg->hdr;
183 
184     /* no incoming PCI requests pass FDs */
185     if (msg->fds != NULL) {
186         vfio_user_send_error(vdev->vbasedev.proxy, hdr, EINVAL);
187         vfio_user_putfds(msg);
188         return;
189     }
190 
191     switch (hdr->command) {
192     case VFIO_USER_DMA_READ:
193         vfio_user_dma_read(vdev, (VFIOUserDMARW *)hdr);
194         break;
195     case VFIO_USER_DMA_WRITE:
196         vfio_user_dma_write(vdev, (VFIOUserDMARW *)hdr);
197         break;
198     default:
199         error_printf("vfio_user_pci_process_req unknown cmd %d\n",
200                      hdr->command);
201         vfio_user_send_error(vdev->vbasedev.proxy, hdr, ENOSYS);
202     }
203 }
204 
205 /*
206  * Emulated devices don't use host hot reset
207  */
208 static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
209 {
210     vbasedev->needs_reset = false;
211 }
212 
213 static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
214 {
215     VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
216                                            device.vbasedev);
217 
218     return OBJECT(vdev);
219 }
220 
221 static VFIODeviceOps vfio_user_pci_ops = {
222     .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
223     .vfio_eoi = vfio_pci_intx_eoi,
224     .vfio_get_object = vfio_user_pci_get_object,
225     /* No live migration support yet. */
226     .vfio_save_config = NULL,
227     .vfio_load_config = NULL,
228 };
229 
230 static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
231 {
232     ERRP_GUARD();
233     VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
234     VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
235     VFIODevice *vbasedev = &vdev->vbasedev;
236     const char *sock_name;
237     AddressSpace *as;
238     SocketAddress addr;
239     VFIOUserProxy *proxy;
240 
241     if (!udev->socket) {
242         error_setg(errp, "No socket specified");
243         error_append_hint(errp, "e.g. -device '{"
244             "\"driver\":\"vfio-user-pci\", "
245             "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", "
246             "\"type\": \"unix\"}'"
247             "}'\n");
248         return;
249     }
250 
251     sock_name = udev->socket->u.q_unix.path;
252 
253     vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
254 
255     memset(&addr, 0, sizeof(addr));
256     addr.type = SOCKET_ADDRESS_TYPE_UNIX;
257     addr.u.q_unix.path = (char *)sock_name;
258     proxy = vfio_user_connect_dev(&addr, errp);
259     if (!proxy) {
260         return;
261     }
262     vbasedev->proxy = proxy;
263     vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev);
264 
265     vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
266 
267     if (udev->send_queued) {
268         proxy->flags |= VFIO_PROXY_FORCE_QUEUED;
269     }
270 
271     /* user specified or 5 sec default */
272     proxy->wait_time = udev->wait_time;
273 
274     if (!vfio_user_validate_version(proxy, errp)) {
275         goto error;
276     }
277 
278     /*
279      * Use socket-based device I/O instead of vfio kernel driver.
280      */
281     vbasedev->io_ops = &vfio_user_device_io_ops_sock;
282 
283     /*
284      * vfio-user devices are effectively mdevs (don't use a host iommu).
285      */
286     vbasedev->mdev = true;
287 
288     /*
289      * Enable per-region fds.
290      */
291     vbasedev->use_region_fds = true;
292 
293     as = pci_device_iommu_address_space(pdev);
294     if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
295                                           vbasedev->name, vbasedev,
296                                           as, errp)) {
297         goto error;
298     }
299 
300     if (!vfio_pci_populate_device(vdev, errp)) {
301         goto error;
302     }
303 
304     if (!vfio_pci_config_setup(vdev, errp)) {
305         goto error;
306     }
307 
308     /*
309      * vfio_pci_config_setup will have registered the device's BARs
310      * and setup any MSIX BARs, so errors after it succeeds must
311      * use out_teardown
312      */
313 
314     if (!vfio_pci_add_capabilities(vdev, errp)) {
315         goto out_teardown;
316     }
317 
318     if (vdev->msix != NULL) {
319         vfio_user_msix_setup(vdev);
320     }
321 
322     if (!vfio_pci_interrupt_setup(vdev, errp)) {
323         goto out_teardown;
324     }
325 
326     vfio_pci_register_err_notifier(vdev);
327     vfio_pci_register_req_notifier(vdev);
328 
329     return;
330 
331 out_teardown:
332     vfio_pci_teardown_msi(vdev);
333     vfio_pci_bars_exit(vdev);
334 error:
335     error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
336     vfio_pci_put_device(vdev);
337 }
338 
339 static void vfio_user_instance_init(Object *obj)
340 {
341     PCIDevice *pci_dev = PCI_DEVICE(obj);
342     VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
343     VFIODevice *vbasedev = &vdev->vbasedev;
344 
345     device_add_bootindex_property(obj, &vdev->bootindex,
346                                   "bootindex", NULL,
347                                   &pci_dev->qdev);
348     vdev->host.domain = ~0U;
349     vdev->host.bus = ~0U;
350     vdev->host.slot = ~0U;
351     vdev->host.function = ~0U;
352 
353     vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
354                      DEVICE(vdev), false);
355 
356     vdev->nv_gpudirect_clique = 0xFF;
357 
358     /*
359      * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
360      * line, therefore, no need to wait to realize like other devices.
361      */
362     pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
363 }
364 
365 static void vfio_user_instance_finalize(Object *obj)
366 {
367     VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
368     VFIODevice *vbasedev = &vdev->vbasedev;
369 
370     if (vdev->msix != NULL) {
371         vfio_user_msix_teardown(vdev);
372     }
373 
374     vfio_pci_put_device(vdev);
375 
376     if (vbasedev->proxy != NULL) {
377         vfio_user_disconnect(vbasedev->proxy);
378     }
379 }
380 
381 static void vfio_user_pci_reset(DeviceState *dev)
382 {
383     VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev);
384     VFIODevice *vbasedev = &vdev->vbasedev;
385 
386     vfio_pci_pre_reset(vdev);
387 
388     if (vbasedev->reset_works) {
389         vfio_user_device_reset(vbasedev->proxy);
390     }
391 
392     vfio_pci_post_reset(vdev);
393 }
394 
395 static const Property vfio_user_pci_dev_properties[] = {
396     DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
397                        vendor_id, PCI_ANY_ID),
398     DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
399                        device_id, PCI_ANY_ID),
400     DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
401                        sub_vendor_id, PCI_ANY_ID),
402     DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
403                        sub_device_id, PCI_ANY_ID),
404     DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false),
405     DEFINE_PROP_UINT32("x-msg-timeout", VFIOUserPCIDevice, wait_time, 5000),
406 };
407 
408 static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name,
409                                      void *opaque, Error **errp)
410 {
411     VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj);
412     bool success;
413 
414     if (udev->device.vbasedev.proxy) {
415         error_setg(errp, "Proxy is connected");
416         return;
417     }
418 
419     qapi_free_SocketAddress(udev->socket);
420 
421     udev->socket = NULL;
422 
423     success = visit_type_SocketAddress(v, name, &udev->socket, errp);
424 
425     if (!success) {
426         return;
427     }
428 
429     if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
430         error_setg(errp, "Unsupported socket type %s",
431                    SocketAddressType_str(udev->socket->type));
432         qapi_free_SocketAddress(udev->socket);
433         udev->socket = NULL;
434         return;
435     }
436 }
437 
438 static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
439 {
440     DeviceClass *dc = DEVICE_CLASS(klass);
441     PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
442 
443     device_class_set_legacy_reset(dc, vfio_user_pci_reset);
444     device_class_set_props(dc, vfio_user_pci_dev_properties);
445 
446     object_class_property_add(klass, "socket", "SocketAddress", NULL,
447                               vfio_user_pci_set_socket, NULL, NULL);
448     object_class_property_set_description(klass, "socket",
449                                           "SocketAddress (UNIX sockets only)");
450 
451     dc->desc = "VFIO over socket PCI device assignment";
452     pdc->realize = vfio_user_pci_realize;
453 }
454 
455 static const TypeInfo vfio_user_pci_dev_info = {
456     .name = TYPE_VFIO_USER_PCI,
457     .parent = TYPE_VFIO_PCI_BASE,
458     .instance_size = sizeof(VFIOUserPCIDevice),
459     .class_init = vfio_user_pci_dev_class_init,
460     .instance_init = vfio_user_instance_init,
461     .instance_finalize = vfio_user_instance_finalize,
462 };
463 
464 static void register_vfio_user_dev_type(void)
465 {
466     type_register_static(&vfio_user_pci_dev_info);
467 }
468 
469  type_init(register_vfio_user_dev_type)
470