19fca2b7dSJohn Levon /* 29fca2b7dSJohn Levon * vfio PCI device over a UNIX socket. 39fca2b7dSJohn Levon * 49fca2b7dSJohn Levon * Copyright © 2018, 2021 Oracle and/or its affiliates. 59fca2b7dSJohn Levon * 69fca2b7dSJohn Levon * SPDX-License-Identifier: GPL-2.0-or-later 79fca2b7dSJohn Levon */ 89fca2b7dSJohn Levon 99fca2b7dSJohn Levon #include <sys/ioctl.h> 109fca2b7dSJohn Levon #include "qemu/osdep.h" 119fca2b7dSJohn Levon #include "qapi-visit-sockets.h" 129fca2b7dSJohn Levon 139fca2b7dSJohn Levon #include "hw/qdev-properties.h" 149fca2b7dSJohn Levon #include "hw/vfio/pci.h" 15667866d6SJohn Levon #include "hw/vfio-user/device.h" 16438d863fSJohn Levon #include "hw/vfio-user/proxy.h" 179fca2b7dSJohn Levon 189fca2b7dSJohn Levon #define TYPE_VFIO_USER_PCI "vfio-user-pci" 199fca2b7dSJohn Levon OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI) 209fca2b7dSJohn Levon 219fca2b7dSJohn Levon struct VFIOUserPCIDevice { 229fca2b7dSJohn Levon VFIOPCIDevice device; 239fca2b7dSJohn Levon SocketAddress *socket; 2436227628SJohn Levon bool send_queued; /* all sends are queued */ 259fca2b7dSJohn Levon }; 269fca2b7dSJohn Levon 279fca2b7dSJohn Levon /* 28*777e45c7SJohn Levon * The server maintains the device's pending interrupts, 29*777e45c7SJohn Levon * via its MSIX table and PBA, so we treat these accesses 30*777e45c7SJohn Levon * like PCI config space and forward them. 31*777e45c7SJohn Levon */ 32*777e45c7SJohn Levon static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr, 33*777e45c7SJohn Levon unsigned size) 34*777e45c7SJohn Levon { 35*777e45c7SJohn Levon VFIOPCIDevice *vdev = opaque; 36*777e45c7SJohn Levon VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region; 37*777e45c7SJohn Levon uint64_t data; 38*777e45c7SJohn Levon 39*777e45c7SJohn Levon /* server copy is what matters */ 40*777e45c7SJohn Levon data = vfio_region_read(region, addr + vdev->msix->pba_offset, size); 41*777e45c7SJohn Levon return data; 42*777e45c7SJohn Levon } 43*777e45c7SJohn Levon 44*777e45c7SJohn Levon static void vfio_user_pba_write(void *opaque, hwaddr addr, 45*777e45c7SJohn Levon uint64_t data, unsigned size) 46*777e45c7SJohn Levon { 47*777e45c7SJohn Levon /* dropped */ 48*777e45c7SJohn Levon } 49*777e45c7SJohn Levon 50*777e45c7SJohn Levon static const MemoryRegionOps vfio_user_pba_ops = { 51*777e45c7SJohn Levon .read = vfio_user_pba_read, 52*777e45c7SJohn Levon .write = vfio_user_pba_write, 53*777e45c7SJohn Levon .endianness = DEVICE_LITTLE_ENDIAN, 54*777e45c7SJohn Levon }; 55*777e45c7SJohn Levon 56*777e45c7SJohn Levon static void vfio_user_msix_setup(VFIOPCIDevice *vdev) 57*777e45c7SJohn Levon { 58*777e45c7SJohn Levon MemoryRegion *vfio_reg, *msix_reg, *pba_reg; 59*777e45c7SJohn Levon 60*777e45c7SJohn Levon pba_reg = g_new0(MemoryRegion, 1); 61*777e45c7SJohn Levon vdev->msix->pba_region = pba_reg; 62*777e45c7SJohn Levon 63*777e45c7SJohn Levon vfio_reg = vdev->bars[vdev->msix->pba_bar].mr; 64*777e45c7SJohn Levon msix_reg = &vdev->pdev.msix_pba_mmio; 65*777e45c7SJohn Levon memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev, 66*777e45c7SJohn Levon "VFIO MSIX PBA", int128_get64(msix_reg->size)); 67*777e45c7SJohn Levon memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset, 68*777e45c7SJohn Levon pba_reg, 1); 69*777e45c7SJohn Levon } 70*777e45c7SJohn Levon 71*777e45c7SJohn Levon static void vfio_user_msix_teardown(VFIOPCIDevice *vdev) 72*777e45c7SJohn Levon { 73*777e45c7SJohn Levon MemoryRegion *mr, *sub; 74*777e45c7SJohn Levon 75*777e45c7SJohn Levon mr = vdev->bars[vdev->msix->pba_bar].mr; 76*777e45c7SJohn Levon sub = vdev->msix->pba_region; 77*777e45c7SJohn Levon memory_region_del_subregion(mr, sub); 78*777e45c7SJohn Levon 79*777e45c7SJohn Levon g_free(vdev->msix->pba_region); 80*777e45c7SJohn Levon vdev->msix->pba_region = NULL; 81*777e45c7SJohn Levon } 82*777e45c7SJohn Levon 83*777e45c7SJohn Levon /* 840b3d881aSJohn Levon * Incoming request message callback. 850b3d881aSJohn Levon * 860b3d881aSJohn Levon * Runs off main loop, so BQL held. 870b3d881aSJohn Levon */ 880b3d881aSJohn Levon static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg) 890b3d881aSJohn Levon { 900b3d881aSJohn Levon 910b3d881aSJohn Levon } 920b3d881aSJohn Levon 930b3d881aSJohn Levon /* 949fca2b7dSJohn Levon * Emulated devices don't use host hot reset 959fca2b7dSJohn Levon */ 969fca2b7dSJohn Levon static void vfio_user_compute_needs_reset(VFIODevice *vbasedev) 979fca2b7dSJohn Levon { 989fca2b7dSJohn Levon vbasedev->needs_reset = false; 999fca2b7dSJohn Levon } 1009fca2b7dSJohn Levon 1019fca2b7dSJohn Levon static Object *vfio_user_pci_get_object(VFIODevice *vbasedev) 1029fca2b7dSJohn Levon { 1039fca2b7dSJohn Levon VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice, 1049fca2b7dSJohn Levon device.vbasedev); 1059fca2b7dSJohn Levon 1069fca2b7dSJohn Levon return OBJECT(vdev); 1079fca2b7dSJohn Levon } 1089fca2b7dSJohn Levon 1099fca2b7dSJohn Levon static VFIODeviceOps vfio_user_pci_ops = { 1109fca2b7dSJohn Levon .vfio_compute_needs_reset = vfio_user_compute_needs_reset, 1119fca2b7dSJohn Levon .vfio_eoi = vfio_pci_intx_eoi, 1129fca2b7dSJohn Levon .vfio_get_object = vfio_user_pci_get_object, 1139fca2b7dSJohn Levon /* No live migration support yet. */ 1149fca2b7dSJohn Levon .vfio_save_config = NULL, 1159fca2b7dSJohn Levon .vfio_load_config = NULL, 1169fca2b7dSJohn Levon }; 1179fca2b7dSJohn Levon 1189fca2b7dSJohn Levon static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) 1199fca2b7dSJohn Levon { 1209fca2b7dSJohn Levon ERRP_GUARD(); 1219fca2b7dSJohn Levon VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev); 1229fca2b7dSJohn Levon VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); 1239fca2b7dSJohn Levon VFIODevice *vbasedev = &vdev->vbasedev; 1249fca2b7dSJohn Levon const char *sock_name; 1259fca2b7dSJohn Levon AddressSpace *as; 126438d863fSJohn Levon SocketAddress addr; 127438d863fSJohn Levon VFIOUserProxy *proxy; 1289fca2b7dSJohn Levon 1299fca2b7dSJohn Levon if (!udev->socket) { 1309fca2b7dSJohn Levon error_setg(errp, "No socket specified"); 1319fca2b7dSJohn Levon error_append_hint(errp, "e.g. -device '{" 1329fca2b7dSJohn Levon "\"driver\":\"vfio-user-pci\", " 1339fca2b7dSJohn Levon "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", " 1349fca2b7dSJohn Levon "\"type\": \"unix\"}'" 1359fca2b7dSJohn Levon "}'\n"); 1369fca2b7dSJohn Levon return; 1379fca2b7dSJohn Levon } 1389fca2b7dSJohn Levon 1399fca2b7dSJohn Levon sock_name = udev->socket->u.q_unix.path; 1409fca2b7dSJohn Levon 1419fca2b7dSJohn Levon vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); 1429fca2b7dSJohn Levon 143438d863fSJohn Levon memset(&addr, 0, sizeof(addr)); 144438d863fSJohn Levon addr.type = SOCKET_ADDRESS_TYPE_UNIX; 145438d863fSJohn Levon addr.u.q_unix.path = (char *)sock_name; 146438d863fSJohn Levon proxy = vfio_user_connect_dev(&addr, errp); 147438d863fSJohn Levon if (!proxy) { 148438d863fSJohn Levon return; 149438d863fSJohn Levon } 150438d863fSJohn Levon vbasedev->proxy = proxy; 1510b3d881aSJohn Levon vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev); 152438d863fSJohn Levon 15336227628SJohn Levon vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); 15436227628SJohn Levon 15536227628SJohn Levon if (udev->send_queued) { 15636227628SJohn Levon proxy->flags |= VFIO_PROXY_FORCE_QUEUED; 15736227628SJohn Levon } 15836227628SJohn Levon 15936227628SJohn Levon if (!vfio_user_validate_version(proxy, errp)) { 16036227628SJohn Levon goto error; 16136227628SJohn Levon } 16236227628SJohn Levon 1639fca2b7dSJohn Levon /* 164667866d6SJohn Levon * Use socket-based device I/O instead of vfio kernel driver. 165667866d6SJohn Levon */ 166667866d6SJohn Levon vbasedev->io_ops = &vfio_user_device_io_ops_sock; 167667866d6SJohn Levon 168667866d6SJohn Levon /* 1699fca2b7dSJohn Levon * vfio-user devices are effectively mdevs (don't use a host iommu). 1709fca2b7dSJohn Levon */ 1719fca2b7dSJohn Levon vbasedev->mdev = true; 1729fca2b7dSJohn Levon 173667866d6SJohn Levon /* 174667866d6SJohn Levon * Enable per-region fds. 175667866d6SJohn Levon */ 176667866d6SJohn Levon vbasedev->use_region_fds = true; 177667866d6SJohn Levon 1789fca2b7dSJohn Levon as = pci_device_iommu_address_space(pdev); 1799fca2b7dSJohn Levon if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER, 1809fca2b7dSJohn Levon vbasedev->name, vbasedev, 1819fca2b7dSJohn Levon as, errp)) { 18236227628SJohn Levon goto error; 1839fca2b7dSJohn Levon } 18436227628SJohn Levon 185692e0ec5SJohn Levon if (!vfio_pci_populate_device(vdev, errp)) { 186692e0ec5SJohn Levon goto error; 187692e0ec5SJohn Levon } 188692e0ec5SJohn Levon 189692e0ec5SJohn Levon if (!vfio_pci_config_setup(vdev, errp)) { 190692e0ec5SJohn Levon goto error; 191692e0ec5SJohn Levon } 192692e0ec5SJohn Levon 193692e0ec5SJohn Levon /* 194692e0ec5SJohn Levon * vfio_pci_config_setup will have registered the device's BARs 195692e0ec5SJohn Levon * and setup any MSIX BARs, so errors after it succeeds must 196692e0ec5SJohn Levon * use out_teardown 197692e0ec5SJohn Levon */ 198692e0ec5SJohn Levon 199692e0ec5SJohn Levon if (!vfio_pci_add_capabilities(vdev, errp)) { 200692e0ec5SJohn Levon goto out_teardown; 201692e0ec5SJohn Levon } 202692e0ec5SJohn Levon 203*777e45c7SJohn Levon if (vdev->msix != NULL) { 204*777e45c7SJohn Levon vfio_user_msix_setup(vdev); 205*777e45c7SJohn Levon } 206*777e45c7SJohn Levon 207692e0ec5SJohn Levon if (!vfio_pci_interrupt_setup(vdev, errp)) { 208692e0ec5SJohn Levon goto out_teardown; 209692e0ec5SJohn Levon } 210692e0ec5SJohn Levon 211692e0ec5SJohn Levon vfio_pci_register_err_notifier(vdev); 212692e0ec5SJohn Levon vfio_pci_register_req_notifier(vdev); 213692e0ec5SJohn Levon 21436227628SJohn Levon return; 21536227628SJohn Levon 216692e0ec5SJohn Levon out_teardown: 217692e0ec5SJohn Levon vfio_pci_teardown_msi(vdev); 218692e0ec5SJohn Levon vfio_pci_bars_exit(vdev); 21936227628SJohn Levon error: 22036227628SJohn Levon error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); 221692e0ec5SJohn Levon vfio_pci_put_device(vdev); 2229fca2b7dSJohn Levon } 2239fca2b7dSJohn Levon 2249fca2b7dSJohn Levon static void vfio_user_instance_init(Object *obj) 2259fca2b7dSJohn Levon { 2269fca2b7dSJohn Levon PCIDevice *pci_dev = PCI_DEVICE(obj); 2279fca2b7dSJohn Levon VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); 2289fca2b7dSJohn Levon VFIODevice *vbasedev = &vdev->vbasedev; 2299fca2b7dSJohn Levon 2309fca2b7dSJohn Levon device_add_bootindex_property(obj, &vdev->bootindex, 2319fca2b7dSJohn Levon "bootindex", NULL, 2329fca2b7dSJohn Levon &pci_dev->qdev); 2339fca2b7dSJohn Levon vdev->host.domain = ~0U; 2349fca2b7dSJohn Levon vdev->host.bus = ~0U; 2359fca2b7dSJohn Levon vdev->host.slot = ~0U; 2369fca2b7dSJohn Levon vdev->host.function = ~0U; 2379fca2b7dSJohn Levon 2389fca2b7dSJohn Levon vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops, 2399fca2b7dSJohn Levon DEVICE(vdev), false); 2409fca2b7dSJohn Levon 2419fca2b7dSJohn Levon vdev->nv_gpudirect_clique = 0xFF; 2429fca2b7dSJohn Levon 2439fca2b7dSJohn Levon /* 2449fca2b7dSJohn Levon * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command 2459fca2b7dSJohn Levon * line, therefore, no need to wait to realize like other devices. 2469fca2b7dSJohn Levon */ 2479fca2b7dSJohn Levon pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; 2489fca2b7dSJohn Levon } 2499fca2b7dSJohn Levon 2509fca2b7dSJohn Levon static void vfio_user_instance_finalize(Object *obj) 2519fca2b7dSJohn Levon { 2529fca2b7dSJohn Levon VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); 253438d863fSJohn Levon VFIODevice *vbasedev = &vdev->vbasedev; 2549fca2b7dSJohn Levon 255*777e45c7SJohn Levon if (vdev->msix != NULL) { 256*777e45c7SJohn Levon vfio_user_msix_teardown(vdev); 257*777e45c7SJohn Levon } 258*777e45c7SJohn Levon 2599fca2b7dSJohn Levon vfio_pci_put_device(vdev); 260438d863fSJohn Levon 261438d863fSJohn Levon if (vbasedev->proxy != NULL) { 262438d863fSJohn Levon vfio_user_disconnect(vbasedev->proxy); 263438d863fSJohn Levon } 2649fca2b7dSJohn Levon } 2659fca2b7dSJohn Levon 2669fca2b7dSJohn Levon static const Property vfio_user_pci_dev_properties[] = { 2679fca2b7dSJohn Levon DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, 2689fca2b7dSJohn Levon vendor_id, PCI_ANY_ID), 2699fca2b7dSJohn Levon DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, 2709fca2b7dSJohn Levon device_id, PCI_ANY_ID), 2719fca2b7dSJohn Levon DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice, 2729fca2b7dSJohn Levon sub_vendor_id, PCI_ANY_ID), 2739fca2b7dSJohn Levon DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, 2749fca2b7dSJohn Levon sub_device_id, PCI_ANY_ID), 27536227628SJohn Levon DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), 2769fca2b7dSJohn Levon }; 2779fca2b7dSJohn Levon 2789fca2b7dSJohn Levon static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name, 2799fca2b7dSJohn Levon void *opaque, Error **errp) 2809fca2b7dSJohn Levon { 2819fca2b7dSJohn Levon VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj); 2829fca2b7dSJohn Levon bool success; 2839fca2b7dSJohn Levon 284438d863fSJohn Levon if (udev->device.vbasedev.proxy) { 285438d863fSJohn Levon error_setg(errp, "Proxy is connected"); 286438d863fSJohn Levon return; 287438d863fSJohn Levon } 288438d863fSJohn Levon 2899fca2b7dSJohn Levon qapi_free_SocketAddress(udev->socket); 2909fca2b7dSJohn Levon 2919fca2b7dSJohn Levon udev->socket = NULL; 2929fca2b7dSJohn Levon 2939fca2b7dSJohn Levon success = visit_type_SocketAddress(v, name, &udev->socket, errp); 2949fca2b7dSJohn Levon 2959fca2b7dSJohn Levon if (!success) { 2969fca2b7dSJohn Levon return; 2979fca2b7dSJohn Levon } 2989fca2b7dSJohn Levon 2999fca2b7dSJohn Levon if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { 3009fca2b7dSJohn Levon error_setg(errp, "Unsupported socket type %s", 3019fca2b7dSJohn Levon SocketAddressType_str(udev->socket->type)); 3029fca2b7dSJohn Levon qapi_free_SocketAddress(udev->socket); 3039fca2b7dSJohn Levon udev->socket = NULL; 3049fca2b7dSJohn Levon return; 3059fca2b7dSJohn Levon } 3069fca2b7dSJohn Levon } 3079fca2b7dSJohn Levon 3089fca2b7dSJohn Levon static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data) 3099fca2b7dSJohn Levon { 3109fca2b7dSJohn Levon DeviceClass *dc = DEVICE_CLASS(klass); 3119fca2b7dSJohn Levon PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); 3129fca2b7dSJohn Levon 3139fca2b7dSJohn Levon device_class_set_props(dc, vfio_user_pci_dev_properties); 3149fca2b7dSJohn Levon 3159fca2b7dSJohn Levon object_class_property_add(klass, "socket", "SocketAddress", NULL, 3169fca2b7dSJohn Levon vfio_user_pci_set_socket, NULL, NULL); 3179fca2b7dSJohn Levon object_class_property_set_description(klass, "socket", 3189fca2b7dSJohn Levon "SocketAddress (UNIX sockets only)"); 3199fca2b7dSJohn Levon 3209fca2b7dSJohn Levon dc->desc = "VFIO over socket PCI device assignment"; 3219fca2b7dSJohn Levon pdc->realize = vfio_user_pci_realize; 3229fca2b7dSJohn Levon } 3239fca2b7dSJohn Levon 3249fca2b7dSJohn Levon static const TypeInfo vfio_user_pci_dev_info = { 3259fca2b7dSJohn Levon .name = TYPE_VFIO_USER_PCI, 3269fca2b7dSJohn Levon .parent = TYPE_VFIO_PCI_BASE, 3279fca2b7dSJohn Levon .instance_size = sizeof(VFIOUserPCIDevice), 3289fca2b7dSJohn Levon .class_init = vfio_user_pci_dev_class_init, 3299fca2b7dSJohn Levon .instance_init = vfio_user_instance_init, 3309fca2b7dSJohn Levon .instance_finalize = vfio_user_instance_finalize, 3319fca2b7dSJohn Levon }; 3329fca2b7dSJohn Levon 3339fca2b7dSJohn Levon static void register_vfio_user_dev_type(void) 3349fca2b7dSJohn Levon { 3359fca2b7dSJohn Levon type_register_static(&vfio_user_pci_dev_info); 3369fca2b7dSJohn Levon } 3379fca2b7dSJohn Levon 3389fca2b7dSJohn Levon type_init(register_vfio_user_dev_type) 339