xref: /qemu/hw/vfio-user/pci.c (revision c6ac52a4d8f7a7c03452454d36b60ac309f0b9ce)
1 /*
2  * vfio PCI device over a UNIX socket.
3  *
4  * Copyright © 2018, 2021 Oracle and/or its affiliates.
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include <sys/ioctl.h>
10 #include "qemu/osdep.h"
11 #include "qapi-visit-sockets.h"
12 #include "qemu/error-report.h"
13 
14 #include "hw/qdev-properties.h"
15 #include "hw/vfio/pci.h"
16 #include "hw/vfio-user/device.h"
17 #include "hw/vfio-user/proxy.h"
18 
19 #define TYPE_VFIO_USER_PCI "vfio-user-pci"
20 OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
21 
22 struct VFIOUserPCIDevice {
23     VFIOPCIDevice device;
24     SocketAddress *socket;
25     bool send_queued;   /* all sends are queued */
26 };
27 
28 /*
29  * The server maintains the device's pending interrupts,
30  * via its MSIX table and PBA, so we treat these accesses
31  * like PCI config space and forward them.
32  */
33 static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr,
34                                    unsigned size)
35 {
36     VFIOPCIDevice *vdev = opaque;
37     VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region;
38     uint64_t data;
39 
40     /* server copy is what matters */
41     data = vfio_region_read(region, addr + vdev->msix->pba_offset, size);
42     return data;
43 }
44 
45 static void vfio_user_pba_write(void *opaque, hwaddr addr,
46                                   uint64_t data, unsigned size)
47 {
48     /* dropped */
49 }
50 
51 static const MemoryRegionOps vfio_user_pba_ops = {
52     .read = vfio_user_pba_read,
53     .write = vfio_user_pba_write,
54     .endianness = DEVICE_LITTLE_ENDIAN,
55 };
56 
57 static void vfio_user_msix_setup(VFIOPCIDevice *vdev)
58 {
59     MemoryRegion *vfio_reg, *msix_reg, *pba_reg;
60 
61     pba_reg = g_new0(MemoryRegion, 1);
62     vdev->msix->pba_region = pba_reg;
63 
64     vfio_reg = vdev->bars[vdev->msix->pba_bar].mr;
65     msix_reg = &vdev->pdev.msix_pba_mmio;
66     memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev,
67                           "VFIO MSIX PBA", int128_get64(msix_reg->size));
68     memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset,
69                                         pba_reg, 1);
70 }
71 
72 static void vfio_user_msix_teardown(VFIOPCIDevice *vdev)
73 {
74     MemoryRegion *mr, *sub;
75 
76     mr = vdev->bars[vdev->msix->pba_bar].mr;
77     sub = vdev->msix->pba_region;
78     memory_region_del_subregion(mr, sub);
79 
80     g_free(vdev->msix->pba_region);
81     vdev->msix->pba_region = NULL;
82 }
83 
84 static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
85 {
86     PCIDevice *pdev = &vdev->pdev;
87     VFIOUserProxy *proxy = vdev->vbasedev.proxy;
88     VFIOUserDMARW *res;
89     MemTxResult r;
90     size_t size;
91 
92     if (msg->hdr.size < sizeof(*msg)) {
93         vfio_user_send_error(proxy, &msg->hdr, EINVAL);
94         return;
95     }
96     if (msg->count > proxy->max_xfer_size) {
97         vfio_user_send_error(proxy, &msg->hdr, E2BIG);
98         return;
99     }
100 
101     /* switch to our own message buffer */
102     size = msg->count + sizeof(VFIOUserDMARW);
103     res = g_malloc0(size);
104     memcpy(res, msg, sizeof(*res));
105     g_free(msg);
106 
107     r = pci_dma_read(pdev, res->offset, &res->data, res->count);
108 
109     switch (r) {
110     case MEMTX_OK:
111         if (res->hdr.flags & VFIO_USER_NO_REPLY) {
112             g_free(res);
113             return;
114         }
115         vfio_user_send_reply(proxy, &res->hdr, size);
116         break;
117     case MEMTX_ERROR:
118         vfio_user_send_error(proxy, &res->hdr, EFAULT);
119         break;
120     case MEMTX_DECODE_ERROR:
121         vfio_user_send_error(proxy, &res->hdr, ENODEV);
122         break;
123     case MEMTX_ACCESS_ERROR:
124         vfio_user_send_error(proxy, &res->hdr, EPERM);
125         break;
126     default:
127         error_printf("vfio_user_dma_read unknown error %d\n", r);
128         vfio_user_send_error(vdev->vbasedev.proxy, &res->hdr, EINVAL);
129     }
130 }
131 
132 static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
133 {
134     PCIDevice *pdev = &vdev->pdev;
135     VFIOUserProxy *proxy = vdev->vbasedev.proxy;
136     MemTxResult r;
137 
138     if (msg->hdr.size < sizeof(*msg)) {
139         vfio_user_send_error(proxy, &msg->hdr, EINVAL);
140         return;
141     }
142     /* make sure transfer count isn't larger than the message data */
143     if (msg->count > msg->hdr.size - sizeof(*msg)) {
144         vfio_user_send_error(proxy, &msg->hdr, E2BIG);
145         return;
146     }
147 
148     r = pci_dma_write(pdev, msg->offset, &msg->data, msg->count);
149 
150     switch (r) {
151     case MEMTX_OK:
152         if ((msg->hdr.flags & VFIO_USER_NO_REPLY) == 0) {
153             vfio_user_send_reply(proxy, &msg->hdr, sizeof(msg->hdr));
154         } else {
155             g_free(msg);
156         }
157         break;
158     case MEMTX_ERROR:
159         vfio_user_send_error(proxy, &msg->hdr, EFAULT);
160         break;
161     case MEMTX_DECODE_ERROR:
162         vfio_user_send_error(proxy, &msg->hdr, ENODEV);
163         break;
164     case MEMTX_ACCESS_ERROR:
165         vfio_user_send_error(proxy, &msg->hdr, EPERM);
166         break;
167     default:
168         error_printf("vfio_user_dma_write unknown error %d\n", r);
169         vfio_user_send_error(vdev->vbasedev.proxy, &msg->hdr, EINVAL);
170     }
171 }
172 
173 /*
174  * Incoming request message callback.
175  *
176  * Runs off main loop, so BQL held.
177  */
178 static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg)
179 {
180     VFIOPCIDevice *vdev = opaque;
181     VFIOUserHdr *hdr = msg->hdr;
182 
183     /* no incoming PCI requests pass FDs */
184     if (msg->fds != NULL) {
185         vfio_user_send_error(vdev->vbasedev.proxy, hdr, EINVAL);
186         vfio_user_putfds(msg);
187         return;
188     }
189 
190     switch (hdr->command) {
191     case VFIO_USER_DMA_READ:
192         vfio_user_dma_read(vdev, (VFIOUserDMARW *)hdr);
193         break;
194     case VFIO_USER_DMA_WRITE:
195         vfio_user_dma_write(vdev, (VFIOUserDMARW *)hdr);
196         break;
197     default:
198         error_printf("vfio_user_pci_process_req unknown cmd %d\n",
199                      hdr->command);
200         vfio_user_send_error(vdev->vbasedev.proxy, hdr, ENOSYS);
201     }
202 }
203 
204 /*
205  * Emulated devices don't use host hot reset
206  */
207 static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
208 {
209     vbasedev->needs_reset = false;
210 }
211 
212 static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
213 {
214     VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
215                                            device.vbasedev);
216 
217     return OBJECT(vdev);
218 }
219 
220 static VFIODeviceOps vfio_user_pci_ops = {
221     .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
222     .vfio_eoi = vfio_pci_intx_eoi,
223     .vfio_get_object = vfio_user_pci_get_object,
224     /* No live migration support yet. */
225     .vfio_save_config = NULL,
226     .vfio_load_config = NULL,
227 };
228 
229 static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
230 {
231     ERRP_GUARD();
232     VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
233     VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
234     VFIODevice *vbasedev = &vdev->vbasedev;
235     const char *sock_name;
236     AddressSpace *as;
237     SocketAddress addr;
238     VFIOUserProxy *proxy;
239 
240     if (!udev->socket) {
241         error_setg(errp, "No socket specified");
242         error_append_hint(errp, "e.g. -device '{"
243             "\"driver\":\"vfio-user-pci\", "
244             "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", "
245             "\"type\": \"unix\"}'"
246             "}'\n");
247         return;
248     }
249 
250     sock_name = udev->socket->u.q_unix.path;
251 
252     vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
253 
254     memset(&addr, 0, sizeof(addr));
255     addr.type = SOCKET_ADDRESS_TYPE_UNIX;
256     addr.u.q_unix.path = (char *)sock_name;
257     proxy = vfio_user_connect_dev(&addr, errp);
258     if (!proxy) {
259         return;
260     }
261     vbasedev->proxy = proxy;
262     vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev);
263 
264     vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
265 
266     if (udev->send_queued) {
267         proxy->flags |= VFIO_PROXY_FORCE_QUEUED;
268     }
269 
270     if (!vfio_user_validate_version(proxy, errp)) {
271         goto error;
272     }
273 
274     /*
275      * Use socket-based device I/O instead of vfio kernel driver.
276      */
277     vbasedev->io_ops = &vfio_user_device_io_ops_sock;
278 
279     /*
280      * vfio-user devices are effectively mdevs (don't use a host iommu).
281      */
282     vbasedev->mdev = true;
283 
284     /*
285      * Enable per-region fds.
286      */
287     vbasedev->use_region_fds = true;
288 
289     as = pci_device_iommu_address_space(pdev);
290     if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
291                                           vbasedev->name, vbasedev,
292                                           as, errp)) {
293         goto error;
294     }
295 
296     if (!vfio_pci_populate_device(vdev, errp)) {
297         goto error;
298     }
299 
300     if (!vfio_pci_config_setup(vdev, errp)) {
301         goto error;
302     }
303 
304     /*
305      * vfio_pci_config_setup will have registered the device's BARs
306      * and setup any MSIX BARs, so errors after it succeeds must
307      * use out_teardown
308      */
309 
310     if (!vfio_pci_add_capabilities(vdev, errp)) {
311         goto out_teardown;
312     }
313 
314     if (vdev->msix != NULL) {
315         vfio_user_msix_setup(vdev);
316     }
317 
318     if (!vfio_pci_interrupt_setup(vdev, errp)) {
319         goto out_teardown;
320     }
321 
322     vfio_pci_register_err_notifier(vdev);
323     vfio_pci_register_req_notifier(vdev);
324 
325     return;
326 
327 out_teardown:
328     vfio_pci_teardown_msi(vdev);
329     vfio_pci_bars_exit(vdev);
330 error:
331     error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
332     vfio_pci_put_device(vdev);
333 }
334 
335 static void vfio_user_instance_init(Object *obj)
336 {
337     PCIDevice *pci_dev = PCI_DEVICE(obj);
338     VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
339     VFIODevice *vbasedev = &vdev->vbasedev;
340 
341     device_add_bootindex_property(obj, &vdev->bootindex,
342                                   "bootindex", NULL,
343                                   &pci_dev->qdev);
344     vdev->host.domain = ~0U;
345     vdev->host.bus = ~0U;
346     vdev->host.slot = ~0U;
347     vdev->host.function = ~0U;
348 
349     vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
350                      DEVICE(vdev), false);
351 
352     vdev->nv_gpudirect_clique = 0xFF;
353 
354     /*
355      * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
356      * line, therefore, no need to wait to realize like other devices.
357      */
358     pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
359 }
360 
361 static void vfio_user_instance_finalize(Object *obj)
362 {
363     VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
364     VFIODevice *vbasedev = &vdev->vbasedev;
365 
366     if (vdev->msix != NULL) {
367         vfio_user_msix_teardown(vdev);
368     }
369 
370     vfio_pci_put_device(vdev);
371 
372     if (vbasedev->proxy != NULL) {
373         vfio_user_disconnect(vbasedev->proxy);
374     }
375 }
376 
377 static void vfio_user_pci_reset(DeviceState *dev)
378 {
379     VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev);
380     VFIODevice *vbasedev = &vdev->vbasedev;
381 
382     vfio_pci_pre_reset(vdev);
383 
384     if (vbasedev->reset_works) {
385         vfio_user_device_reset(vbasedev->proxy);
386     }
387 
388     vfio_pci_post_reset(vdev);
389 }
390 
391 static const Property vfio_user_pci_dev_properties[] = {
392     DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
393                        vendor_id, PCI_ANY_ID),
394     DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
395                        device_id, PCI_ANY_ID),
396     DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
397                        sub_vendor_id, PCI_ANY_ID),
398     DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
399                        sub_device_id, PCI_ANY_ID),
400     DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false),
401 };
402 
403 static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name,
404                                      void *opaque, Error **errp)
405 {
406     VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj);
407     bool success;
408 
409     if (udev->device.vbasedev.proxy) {
410         error_setg(errp, "Proxy is connected");
411         return;
412     }
413 
414     qapi_free_SocketAddress(udev->socket);
415 
416     udev->socket = NULL;
417 
418     success = visit_type_SocketAddress(v, name, &udev->socket, errp);
419 
420     if (!success) {
421         return;
422     }
423 
424     if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
425         error_setg(errp, "Unsupported socket type %s",
426                    SocketAddressType_str(udev->socket->type));
427         qapi_free_SocketAddress(udev->socket);
428         udev->socket = NULL;
429         return;
430     }
431 }
432 
433 static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
434 {
435     DeviceClass *dc = DEVICE_CLASS(klass);
436     PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
437 
438     device_class_set_legacy_reset(dc, vfio_user_pci_reset);
439     device_class_set_props(dc, vfio_user_pci_dev_properties);
440 
441     object_class_property_add(klass, "socket", "SocketAddress", NULL,
442                               vfio_user_pci_set_socket, NULL, NULL);
443     object_class_property_set_description(klass, "socket",
444                                           "SocketAddress (UNIX sockets only)");
445 
446     dc->desc = "VFIO over socket PCI device assignment";
447     pdc->realize = vfio_user_pci_realize;
448 }
449 
450 static const TypeInfo vfio_user_pci_dev_info = {
451     .name = TYPE_VFIO_USER_PCI,
452     .parent = TYPE_VFIO_PCI_BASE,
453     .instance_size = sizeof(VFIOUserPCIDevice),
454     .class_init = vfio_user_pci_dev_class_init,
455     .instance_init = vfio_user_instance_init,
456     .instance_finalize = vfio_user_instance_finalize,
457 };
458 
459 static void register_vfio_user_dev_type(void)
460 {
461     type_register_static(&vfio_user_pci_dev_info);
462 }
463 
464  type_init(register_vfio_user_dev_type)
465