1 /* 2 * vfio PCI device over a UNIX socket. 3 * 4 * Copyright © 2018, 2021 Oracle and/or its affiliates. 5 * 6 * SPDX-License-Identifier: GPL-2.0-or-later 7 */ 8 9 #include <sys/ioctl.h> 10 #include "qemu/osdep.h" 11 #include "qapi-visit-sockets.h" 12 #include "qemu/error-report.h" 13 14 #include "hw/qdev-properties.h" 15 #include "hw/vfio/pci.h" 16 #include "hw/vfio-user/device.h" 17 #include "hw/vfio-user/proxy.h" 18 19 #define TYPE_VFIO_USER_PCI "vfio-user-pci" 20 OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI) 21 22 struct VFIOUserPCIDevice { 23 VFIOPCIDevice device; 24 SocketAddress *socket; 25 bool send_queued; /* all sends are queued */ 26 uint32_t wait_time; /* timeout for message replies */ 27 bool no_post; /* all region writes are sync */ 28 }; 29 30 /* 31 * The server maintains the device's pending interrupts, 32 * via its MSIX table and PBA, so we treat these accesses 33 * like PCI config space and forward them. 34 */ 35 static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr, 36 unsigned size) 37 { 38 VFIOPCIDevice *vdev = opaque; 39 VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region; 40 uint64_t data; 41 42 /* server copy is what matters */ 43 data = vfio_region_read(region, addr + vdev->msix->pba_offset, size); 44 return data; 45 } 46 47 static void vfio_user_pba_write(void *opaque, hwaddr addr, 48 uint64_t data, unsigned size) 49 { 50 /* dropped */ 51 } 52 53 static const MemoryRegionOps vfio_user_pba_ops = { 54 .read = vfio_user_pba_read, 55 .write = vfio_user_pba_write, 56 .endianness = DEVICE_LITTLE_ENDIAN, 57 }; 58 59 static void vfio_user_msix_setup(VFIOPCIDevice *vdev) 60 { 61 MemoryRegion *vfio_reg, *msix_reg, *pba_reg; 62 63 pba_reg = g_new0(MemoryRegion, 1); 64 vdev->msix->pba_region = pba_reg; 65 66 vfio_reg = vdev->bars[vdev->msix->pba_bar].mr; 67 msix_reg = &vdev->pdev.msix_pba_mmio; 68 memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev, 69 "VFIO MSIX PBA", int128_get64(msix_reg->size)); 70 memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset, 71 pba_reg, 1); 72 } 73 74 static void vfio_user_msix_teardown(VFIOPCIDevice *vdev) 75 { 76 MemoryRegion *mr, *sub; 77 78 mr = vdev->bars[vdev->msix->pba_bar].mr; 79 sub = vdev->msix->pba_region; 80 memory_region_del_subregion(mr, sub); 81 82 g_free(vdev->msix->pba_region); 83 vdev->msix->pba_region = NULL; 84 } 85 86 static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) 87 { 88 PCIDevice *pdev = &vdev->pdev; 89 VFIOUserProxy *proxy = vdev->vbasedev.proxy; 90 VFIOUserDMARW *res; 91 MemTxResult r; 92 size_t size; 93 94 if (msg->hdr.size < sizeof(*msg)) { 95 vfio_user_send_error(proxy, &msg->hdr, EINVAL); 96 return; 97 } 98 if (msg->count > proxy->max_xfer_size) { 99 vfio_user_send_error(proxy, &msg->hdr, E2BIG); 100 return; 101 } 102 103 /* switch to our own message buffer */ 104 size = msg->count + sizeof(VFIOUserDMARW); 105 res = g_malloc0(size); 106 memcpy(res, msg, sizeof(*res)); 107 g_free(msg); 108 109 r = pci_dma_read(pdev, res->offset, &res->data, res->count); 110 111 switch (r) { 112 case MEMTX_OK: 113 if (res->hdr.flags & VFIO_USER_NO_REPLY) { 114 g_free(res); 115 return; 116 } 117 vfio_user_send_reply(proxy, &res->hdr, size); 118 break; 119 case MEMTX_ERROR: 120 vfio_user_send_error(proxy, &res->hdr, EFAULT); 121 break; 122 case MEMTX_DECODE_ERROR: 123 vfio_user_send_error(proxy, &res->hdr, ENODEV); 124 break; 125 case MEMTX_ACCESS_ERROR: 126 vfio_user_send_error(proxy, &res->hdr, EPERM); 127 break; 128 default: 129 error_printf("vfio_user_dma_read unknown error %d\n", r); 130 vfio_user_send_error(vdev->vbasedev.proxy, &res->hdr, EINVAL); 131 } 132 } 133 134 static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) 135 { 136 PCIDevice *pdev = &vdev->pdev; 137 VFIOUserProxy *proxy = vdev->vbasedev.proxy; 138 MemTxResult r; 139 140 if (msg->hdr.size < sizeof(*msg)) { 141 vfio_user_send_error(proxy, &msg->hdr, EINVAL); 142 return; 143 } 144 /* make sure transfer count isn't larger than the message data */ 145 if (msg->count > msg->hdr.size - sizeof(*msg)) { 146 vfio_user_send_error(proxy, &msg->hdr, E2BIG); 147 return; 148 } 149 150 r = pci_dma_write(pdev, msg->offset, &msg->data, msg->count); 151 152 switch (r) { 153 case MEMTX_OK: 154 if ((msg->hdr.flags & VFIO_USER_NO_REPLY) == 0) { 155 vfio_user_send_reply(proxy, &msg->hdr, sizeof(msg->hdr)); 156 } else { 157 g_free(msg); 158 } 159 break; 160 case MEMTX_ERROR: 161 vfio_user_send_error(proxy, &msg->hdr, EFAULT); 162 break; 163 case MEMTX_DECODE_ERROR: 164 vfio_user_send_error(proxy, &msg->hdr, ENODEV); 165 break; 166 case MEMTX_ACCESS_ERROR: 167 vfio_user_send_error(proxy, &msg->hdr, EPERM); 168 break; 169 default: 170 error_printf("vfio_user_dma_write unknown error %d\n", r); 171 vfio_user_send_error(vdev->vbasedev.proxy, &msg->hdr, EINVAL); 172 } 173 } 174 175 /* 176 * Incoming request message callback. 177 * 178 * Runs off main loop, so BQL held. 179 */ 180 static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg) 181 { 182 VFIOPCIDevice *vdev = opaque; 183 VFIOUserHdr *hdr = msg->hdr; 184 185 /* no incoming PCI requests pass FDs */ 186 if (msg->fds != NULL) { 187 vfio_user_send_error(vdev->vbasedev.proxy, hdr, EINVAL); 188 vfio_user_putfds(msg); 189 return; 190 } 191 192 switch (hdr->command) { 193 case VFIO_USER_DMA_READ: 194 vfio_user_dma_read(vdev, (VFIOUserDMARW *)hdr); 195 break; 196 case VFIO_USER_DMA_WRITE: 197 vfio_user_dma_write(vdev, (VFIOUserDMARW *)hdr); 198 break; 199 default: 200 error_printf("vfio_user_pci_process_req unknown cmd %d\n", 201 hdr->command); 202 vfio_user_send_error(vdev->vbasedev.proxy, hdr, ENOSYS); 203 } 204 } 205 206 /* 207 * Emulated devices don't use host hot reset 208 */ 209 static void vfio_user_compute_needs_reset(VFIODevice *vbasedev) 210 { 211 vbasedev->needs_reset = false; 212 } 213 214 static Object *vfio_user_pci_get_object(VFIODevice *vbasedev) 215 { 216 VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice, 217 device.vbasedev); 218 219 return OBJECT(vdev); 220 } 221 222 static VFIODeviceOps vfio_user_pci_ops = { 223 .vfio_compute_needs_reset = vfio_user_compute_needs_reset, 224 .vfio_eoi = vfio_pci_intx_eoi, 225 .vfio_get_object = vfio_user_pci_get_object, 226 /* No live migration support yet. */ 227 .vfio_save_config = NULL, 228 .vfio_load_config = NULL, 229 }; 230 231 static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) 232 { 233 ERRP_GUARD(); 234 VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev); 235 VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); 236 VFIODevice *vbasedev = &vdev->vbasedev; 237 const char *sock_name; 238 AddressSpace *as; 239 SocketAddress addr; 240 VFIOUserProxy *proxy; 241 242 if (!udev->socket) { 243 error_setg(errp, "No socket specified"); 244 error_append_hint(errp, "e.g. -device '{" 245 "\"driver\":\"vfio-user-pci\", " 246 "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", " 247 "\"type\": \"unix\"}'" 248 "}'\n"); 249 return; 250 } 251 252 sock_name = udev->socket->u.q_unix.path; 253 254 vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); 255 256 memset(&addr, 0, sizeof(addr)); 257 addr.type = SOCKET_ADDRESS_TYPE_UNIX; 258 addr.u.q_unix.path = (char *)sock_name; 259 proxy = vfio_user_connect_dev(&addr, errp); 260 if (!proxy) { 261 return; 262 } 263 vbasedev->proxy = proxy; 264 vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev); 265 266 vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); 267 268 if (udev->send_queued) { 269 proxy->flags |= VFIO_PROXY_FORCE_QUEUED; 270 } 271 272 if (udev->no_post) { 273 proxy->flags |= VFIO_PROXY_NO_POST; 274 } 275 276 /* user specified or 5 sec default */ 277 proxy->wait_time = udev->wait_time; 278 279 if (!vfio_user_validate_version(proxy, errp)) { 280 goto error; 281 } 282 283 /* 284 * Use socket-based device I/O instead of vfio kernel driver. 285 */ 286 vbasedev->io_ops = &vfio_user_device_io_ops_sock; 287 288 /* 289 * vfio-user devices are effectively mdevs (don't use a host iommu). 290 */ 291 vbasedev->mdev = true; 292 293 /* 294 * Enable per-region fds. 295 */ 296 vbasedev->use_region_fds = true; 297 298 as = pci_device_iommu_address_space(pdev); 299 if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER, 300 vbasedev->name, vbasedev, 301 as, errp)) { 302 goto error; 303 } 304 305 if (!vfio_pci_populate_device(vdev, errp)) { 306 goto error; 307 } 308 309 if (!vfio_pci_config_setup(vdev, errp)) { 310 goto error; 311 } 312 313 /* 314 * vfio_pci_config_setup will have registered the device's BARs 315 * and setup any MSIX BARs, so errors after it succeeds must 316 * use out_teardown 317 */ 318 319 if (!vfio_pci_add_capabilities(vdev, errp)) { 320 goto out_teardown; 321 } 322 323 if (vdev->msix != NULL) { 324 vfio_user_msix_setup(vdev); 325 } 326 327 if (!vfio_pci_interrupt_setup(vdev, errp)) { 328 goto out_teardown; 329 } 330 331 vfio_pci_register_err_notifier(vdev); 332 vfio_pci_register_req_notifier(vdev); 333 334 return; 335 336 out_teardown: 337 vfio_pci_teardown_msi(vdev); 338 vfio_pci_bars_exit(vdev); 339 error: 340 error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); 341 vfio_pci_put_device(vdev); 342 } 343 344 static void vfio_user_instance_init(Object *obj) 345 { 346 PCIDevice *pci_dev = PCI_DEVICE(obj); 347 VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); 348 VFIODevice *vbasedev = &vdev->vbasedev; 349 350 device_add_bootindex_property(obj, &vdev->bootindex, 351 "bootindex", NULL, 352 &pci_dev->qdev); 353 vdev->host.domain = ~0U; 354 vdev->host.bus = ~0U; 355 vdev->host.slot = ~0U; 356 vdev->host.function = ~0U; 357 358 vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops, 359 DEVICE(vdev), false); 360 361 vdev->nv_gpudirect_clique = 0xFF; 362 363 /* 364 * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command 365 * line, therefore, no need to wait to realize like other devices. 366 */ 367 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; 368 } 369 370 static void vfio_user_instance_finalize(Object *obj) 371 { 372 VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); 373 VFIODevice *vbasedev = &vdev->vbasedev; 374 375 if (vdev->msix != NULL) { 376 vfio_user_msix_teardown(vdev); 377 } 378 379 vfio_pci_put_device(vdev); 380 381 if (vbasedev->proxy != NULL) { 382 vfio_user_disconnect(vbasedev->proxy); 383 } 384 } 385 386 static void vfio_user_pci_reset(DeviceState *dev) 387 { 388 VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); 389 VFIODevice *vbasedev = &vdev->vbasedev; 390 391 vfio_pci_pre_reset(vdev); 392 393 if (vbasedev->reset_works) { 394 vfio_user_device_reset(vbasedev->proxy); 395 } 396 397 vfio_pci_post_reset(vdev); 398 } 399 400 static const Property vfio_user_pci_dev_properties[] = { 401 DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, 402 vendor_id, PCI_ANY_ID), 403 DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, 404 device_id, PCI_ANY_ID), 405 DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice, 406 sub_vendor_id, PCI_ANY_ID), 407 DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, 408 sub_device_id, PCI_ANY_ID), 409 DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), 410 DEFINE_PROP_UINT32("x-msg-timeout", VFIOUserPCIDevice, wait_time, 5000), 411 DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false), 412 }; 413 414 static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name, 415 void *opaque, Error **errp) 416 { 417 VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj); 418 bool success; 419 420 if (udev->device.vbasedev.proxy) { 421 error_setg(errp, "Proxy is connected"); 422 return; 423 } 424 425 qapi_free_SocketAddress(udev->socket); 426 427 udev->socket = NULL; 428 429 success = visit_type_SocketAddress(v, name, &udev->socket, errp); 430 431 if (!success) { 432 return; 433 } 434 435 if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { 436 error_setg(errp, "Unsupported socket type %s", 437 SocketAddressType_str(udev->socket->type)); 438 qapi_free_SocketAddress(udev->socket); 439 udev->socket = NULL; 440 return; 441 } 442 } 443 444 static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data) 445 { 446 DeviceClass *dc = DEVICE_CLASS(klass); 447 PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); 448 449 device_class_set_legacy_reset(dc, vfio_user_pci_reset); 450 device_class_set_props(dc, vfio_user_pci_dev_properties); 451 452 object_class_property_add(klass, "socket", "SocketAddress", NULL, 453 vfio_user_pci_set_socket, NULL, NULL); 454 object_class_property_set_description(klass, "socket", 455 "SocketAddress (UNIX sockets only)"); 456 457 dc->desc = "VFIO over socket PCI device assignment"; 458 pdc->realize = vfio_user_pci_realize; 459 } 460 461 static const TypeInfo vfio_user_pci_dev_info = { 462 .name = TYPE_VFIO_USER_PCI, 463 .parent = TYPE_VFIO_PCI_BASE, 464 .instance_size = sizeof(VFIOUserPCIDevice), 465 .class_init = vfio_user_pci_dev_class_init, 466 .instance_init = vfio_user_instance_init, 467 .instance_finalize = vfio_user_instance_finalize, 468 }; 469 470 static void register_vfio_user_dev_type(void) 471 { 472 type_register_static(&vfio_user_pci_dev_info); 473 } 474 475 type_init(register_vfio_user_dev_type) 476