1 /* 2 * vfio PCI device over a UNIX socket. 3 * 4 * Copyright © 2018, 2021 Oracle and/or its affiliates. 5 * 6 * SPDX-License-Identifier: GPL-2.0-or-later 7 */ 8 9 #include <sys/ioctl.h> 10 #include "qemu/osdep.h" 11 #include "qapi-visit-sockets.h" 12 #include "qemu/error-report.h" 13 14 #include "hw/qdev-properties.h" 15 #include "hw/vfio/pci.h" 16 #include "hw/vfio-user/device.h" 17 #include "hw/vfio-user/proxy.h" 18 19 #define TYPE_VFIO_USER_PCI "vfio-user-pci" 20 OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI) 21 22 struct VFIOUserPCIDevice { 23 VFIOPCIDevice device; 24 SocketAddress *socket; 25 bool send_queued; /* all sends are queued */ 26 }; 27 28 /* 29 * The server maintains the device's pending interrupts, 30 * via its MSIX table and PBA, so we treat these accesses 31 * like PCI config space and forward them. 32 */ 33 static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr, 34 unsigned size) 35 { 36 VFIOPCIDevice *vdev = opaque; 37 VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region; 38 uint64_t data; 39 40 /* server copy is what matters */ 41 data = vfio_region_read(region, addr + vdev->msix->pba_offset, size); 42 return data; 43 } 44 45 static void vfio_user_pba_write(void *opaque, hwaddr addr, 46 uint64_t data, unsigned size) 47 { 48 /* dropped */ 49 } 50 51 static const MemoryRegionOps vfio_user_pba_ops = { 52 .read = vfio_user_pba_read, 53 .write = vfio_user_pba_write, 54 .endianness = DEVICE_LITTLE_ENDIAN, 55 }; 56 57 static void vfio_user_msix_setup(VFIOPCIDevice *vdev) 58 { 59 MemoryRegion *vfio_reg, *msix_reg, *pba_reg; 60 61 pba_reg = g_new0(MemoryRegion, 1); 62 vdev->msix->pba_region = pba_reg; 63 64 vfio_reg = vdev->bars[vdev->msix->pba_bar].mr; 65 msix_reg = &vdev->pdev.msix_pba_mmio; 66 memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev, 67 "VFIO MSIX PBA", int128_get64(msix_reg->size)); 68 memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset, 69 pba_reg, 1); 70 } 71 72 static void vfio_user_msix_teardown(VFIOPCIDevice *vdev) 73 { 74 MemoryRegion *mr, *sub; 75 76 mr = vdev->bars[vdev->msix->pba_bar].mr; 77 sub = vdev->msix->pba_region; 78 memory_region_del_subregion(mr, sub); 79 80 g_free(vdev->msix->pba_region); 81 vdev->msix->pba_region = NULL; 82 } 83 84 static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) 85 { 86 PCIDevice *pdev = &vdev->pdev; 87 VFIOUserProxy *proxy = vdev->vbasedev.proxy; 88 VFIOUserDMARW *res; 89 MemTxResult r; 90 size_t size; 91 92 if (msg->hdr.size < sizeof(*msg)) { 93 vfio_user_send_error(proxy, &msg->hdr, EINVAL); 94 return; 95 } 96 if (msg->count > proxy->max_xfer_size) { 97 vfio_user_send_error(proxy, &msg->hdr, E2BIG); 98 return; 99 } 100 101 /* switch to our own message buffer */ 102 size = msg->count + sizeof(VFIOUserDMARW); 103 res = g_malloc0(size); 104 memcpy(res, msg, sizeof(*res)); 105 g_free(msg); 106 107 r = pci_dma_read(pdev, res->offset, &res->data, res->count); 108 109 switch (r) { 110 case MEMTX_OK: 111 if (res->hdr.flags & VFIO_USER_NO_REPLY) { 112 g_free(res); 113 return; 114 } 115 vfio_user_send_reply(proxy, &res->hdr, size); 116 break; 117 case MEMTX_ERROR: 118 vfio_user_send_error(proxy, &res->hdr, EFAULT); 119 break; 120 case MEMTX_DECODE_ERROR: 121 vfio_user_send_error(proxy, &res->hdr, ENODEV); 122 break; 123 case MEMTX_ACCESS_ERROR: 124 vfio_user_send_error(proxy, &res->hdr, EPERM); 125 break; 126 default: 127 error_printf("vfio_user_dma_read unknown error %d\n", r); 128 vfio_user_send_error(vdev->vbasedev.proxy, &res->hdr, EINVAL); 129 } 130 } 131 132 static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) 133 { 134 PCIDevice *pdev = &vdev->pdev; 135 VFIOUserProxy *proxy = vdev->vbasedev.proxy; 136 MemTxResult r; 137 138 if (msg->hdr.size < sizeof(*msg)) { 139 vfio_user_send_error(proxy, &msg->hdr, EINVAL); 140 return; 141 } 142 /* make sure transfer count isn't larger than the message data */ 143 if (msg->count > msg->hdr.size - sizeof(*msg)) { 144 vfio_user_send_error(proxy, &msg->hdr, E2BIG); 145 return; 146 } 147 148 r = pci_dma_write(pdev, msg->offset, &msg->data, msg->count); 149 150 switch (r) { 151 case MEMTX_OK: 152 if ((msg->hdr.flags & VFIO_USER_NO_REPLY) == 0) { 153 vfio_user_send_reply(proxy, &msg->hdr, sizeof(msg->hdr)); 154 } else { 155 g_free(msg); 156 } 157 break; 158 case MEMTX_ERROR: 159 vfio_user_send_error(proxy, &msg->hdr, EFAULT); 160 break; 161 case MEMTX_DECODE_ERROR: 162 vfio_user_send_error(proxy, &msg->hdr, ENODEV); 163 break; 164 case MEMTX_ACCESS_ERROR: 165 vfio_user_send_error(proxy, &msg->hdr, EPERM); 166 break; 167 default: 168 error_printf("vfio_user_dma_write unknown error %d\n", r); 169 vfio_user_send_error(vdev->vbasedev.proxy, &msg->hdr, EINVAL); 170 } 171 } 172 173 /* 174 * Incoming request message callback. 175 * 176 * Runs off main loop, so BQL held. 177 */ 178 static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg) 179 { 180 VFIOPCIDevice *vdev = opaque; 181 VFIOUserHdr *hdr = msg->hdr; 182 183 /* no incoming PCI requests pass FDs */ 184 if (msg->fds != NULL) { 185 vfio_user_send_error(vdev->vbasedev.proxy, hdr, EINVAL); 186 vfio_user_putfds(msg); 187 return; 188 } 189 190 switch (hdr->command) { 191 case VFIO_USER_DMA_READ: 192 vfio_user_dma_read(vdev, (VFIOUserDMARW *)hdr); 193 break; 194 case VFIO_USER_DMA_WRITE: 195 vfio_user_dma_write(vdev, (VFIOUserDMARW *)hdr); 196 break; 197 default: 198 error_printf("vfio_user_pci_process_req unknown cmd %d\n", 199 hdr->command); 200 vfio_user_send_error(vdev->vbasedev.proxy, hdr, ENOSYS); 201 } 202 } 203 204 /* 205 * Emulated devices don't use host hot reset 206 */ 207 static void vfio_user_compute_needs_reset(VFIODevice *vbasedev) 208 { 209 vbasedev->needs_reset = false; 210 } 211 212 static Object *vfio_user_pci_get_object(VFIODevice *vbasedev) 213 { 214 VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice, 215 device.vbasedev); 216 217 return OBJECT(vdev); 218 } 219 220 static VFIODeviceOps vfio_user_pci_ops = { 221 .vfio_compute_needs_reset = vfio_user_compute_needs_reset, 222 .vfio_eoi = vfio_pci_intx_eoi, 223 .vfio_get_object = vfio_user_pci_get_object, 224 /* No live migration support yet. */ 225 .vfio_save_config = NULL, 226 .vfio_load_config = NULL, 227 }; 228 229 static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) 230 { 231 ERRP_GUARD(); 232 VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev); 233 VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); 234 VFIODevice *vbasedev = &vdev->vbasedev; 235 const char *sock_name; 236 AddressSpace *as; 237 SocketAddress addr; 238 VFIOUserProxy *proxy; 239 240 if (!udev->socket) { 241 error_setg(errp, "No socket specified"); 242 error_append_hint(errp, "e.g. -device '{" 243 "\"driver\":\"vfio-user-pci\", " 244 "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", " 245 "\"type\": \"unix\"}'" 246 "}'\n"); 247 return; 248 } 249 250 sock_name = udev->socket->u.q_unix.path; 251 252 vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); 253 254 memset(&addr, 0, sizeof(addr)); 255 addr.type = SOCKET_ADDRESS_TYPE_UNIX; 256 addr.u.q_unix.path = (char *)sock_name; 257 proxy = vfio_user_connect_dev(&addr, errp); 258 if (!proxy) { 259 return; 260 } 261 vbasedev->proxy = proxy; 262 vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev); 263 264 vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); 265 266 if (udev->send_queued) { 267 proxy->flags |= VFIO_PROXY_FORCE_QUEUED; 268 } 269 270 if (!vfio_user_validate_version(proxy, errp)) { 271 goto error; 272 } 273 274 /* 275 * Use socket-based device I/O instead of vfio kernel driver. 276 */ 277 vbasedev->io_ops = &vfio_user_device_io_ops_sock; 278 279 /* 280 * vfio-user devices are effectively mdevs (don't use a host iommu). 281 */ 282 vbasedev->mdev = true; 283 284 /* 285 * Enable per-region fds. 286 */ 287 vbasedev->use_region_fds = true; 288 289 as = pci_device_iommu_address_space(pdev); 290 if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER, 291 vbasedev->name, vbasedev, 292 as, errp)) { 293 goto error; 294 } 295 296 if (!vfio_pci_populate_device(vdev, errp)) { 297 goto error; 298 } 299 300 if (!vfio_pci_config_setup(vdev, errp)) { 301 goto error; 302 } 303 304 /* 305 * vfio_pci_config_setup will have registered the device's BARs 306 * and setup any MSIX BARs, so errors after it succeeds must 307 * use out_teardown 308 */ 309 310 if (!vfio_pci_add_capabilities(vdev, errp)) { 311 goto out_teardown; 312 } 313 314 if (vdev->msix != NULL) { 315 vfio_user_msix_setup(vdev); 316 } 317 318 if (!vfio_pci_interrupt_setup(vdev, errp)) { 319 goto out_teardown; 320 } 321 322 vfio_pci_register_err_notifier(vdev); 323 vfio_pci_register_req_notifier(vdev); 324 325 return; 326 327 out_teardown: 328 vfio_pci_teardown_msi(vdev); 329 vfio_pci_bars_exit(vdev); 330 error: 331 error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); 332 vfio_pci_put_device(vdev); 333 } 334 335 static void vfio_user_instance_init(Object *obj) 336 { 337 PCIDevice *pci_dev = PCI_DEVICE(obj); 338 VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); 339 VFIODevice *vbasedev = &vdev->vbasedev; 340 341 device_add_bootindex_property(obj, &vdev->bootindex, 342 "bootindex", NULL, 343 &pci_dev->qdev); 344 vdev->host.domain = ~0U; 345 vdev->host.bus = ~0U; 346 vdev->host.slot = ~0U; 347 vdev->host.function = ~0U; 348 349 vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops, 350 DEVICE(vdev), false); 351 352 vdev->nv_gpudirect_clique = 0xFF; 353 354 /* 355 * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command 356 * line, therefore, no need to wait to realize like other devices. 357 */ 358 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; 359 } 360 361 static void vfio_user_instance_finalize(Object *obj) 362 { 363 VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); 364 VFIODevice *vbasedev = &vdev->vbasedev; 365 366 if (vdev->msix != NULL) { 367 vfio_user_msix_teardown(vdev); 368 } 369 370 vfio_pci_put_device(vdev); 371 372 if (vbasedev->proxy != NULL) { 373 vfio_user_disconnect(vbasedev->proxy); 374 } 375 } 376 377 static void vfio_user_pci_reset(DeviceState *dev) 378 { 379 VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); 380 VFIODevice *vbasedev = &vdev->vbasedev; 381 382 vfio_pci_pre_reset(vdev); 383 384 if (vbasedev->reset_works) { 385 vfio_user_device_reset(vbasedev->proxy); 386 } 387 388 vfio_pci_post_reset(vdev); 389 } 390 391 static const Property vfio_user_pci_dev_properties[] = { 392 DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, 393 vendor_id, PCI_ANY_ID), 394 DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, 395 device_id, PCI_ANY_ID), 396 DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice, 397 sub_vendor_id, PCI_ANY_ID), 398 DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, 399 sub_device_id, PCI_ANY_ID), 400 DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), 401 }; 402 403 static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name, 404 void *opaque, Error **errp) 405 { 406 VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj); 407 bool success; 408 409 if (udev->device.vbasedev.proxy) { 410 error_setg(errp, "Proxy is connected"); 411 return; 412 } 413 414 qapi_free_SocketAddress(udev->socket); 415 416 udev->socket = NULL; 417 418 success = visit_type_SocketAddress(v, name, &udev->socket, errp); 419 420 if (!success) { 421 return; 422 } 423 424 if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { 425 error_setg(errp, "Unsupported socket type %s", 426 SocketAddressType_str(udev->socket->type)); 427 qapi_free_SocketAddress(udev->socket); 428 udev->socket = NULL; 429 return; 430 } 431 } 432 433 static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data) 434 { 435 DeviceClass *dc = DEVICE_CLASS(klass); 436 PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); 437 438 device_class_set_legacy_reset(dc, vfio_user_pci_reset); 439 device_class_set_props(dc, vfio_user_pci_dev_properties); 440 441 object_class_property_add(klass, "socket", "SocketAddress", NULL, 442 vfio_user_pci_set_socket, NULL, NULL); 443 object_class_property_set_description(klass, "socket", 444 "SocketAddress (UNIX sockets only)"); 445 446 dc->desc = "VFIO over socket PCI device assignment"; 447 pdc->realize = vfio_user_pci_realize; 448 } 449 450 static const TypeInfo vfio_user_pci_dev_info = { 451 .name = TYPE_VFIO_USER_PCI, 452 .parent = TYPE_VFIO_PCI_BASE, 453 .instance_size = sizeof(VFIOUserPCIDevice), 454 .class_init = vfio_user_pci_dev_class_init, 455 .instance_init = vfio_user_instance_init, 456 .instance_finalize = vfio_user_instance_finalize, 457 }; 458 459 static void register_vfio_user_dev_type(void) 460 { 461 type_register_static(&vfio_user_pci_dev_info); 462 } 463 464 type_init(register_vfio_user_dev_type) 465