1 /* 2 * vfio PCI device over a UNIX socket. 3 * 4 * Copyright © 2018, 2021 Oracle and/or its affiliates. 5 * 6 * SPDX-License-Identifier: GPL-2.0-or-later 7 */ 8 9 #include <sys/ioctl.h> 10 #include "qemu/osdep.h" 11 #include "qapi-visit-sockets.h" 12 #include "qemu/error-report.h" 13 14 #include "hw/qdev-properties.h" 15 #include "hw/vfio/pci.h" 16 #include "hw/vfio-user/device.h" 17 #include "hw/vfio-user/proxy.h" 18 19 #define TYPE_VFIO_USER_PCI "vfio-user-pci" 20 OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI) 21 22 struct VFIOUserPCIDevice { 23 VFIOPCIDevice device; 24 SocketAddress *socket; 25 bool send_queued; /* all sends are queued */ 26 uint32_t wait_time; /* timeout for message replies */ 27 }; 28 29 /* 30 * The server maintains the device's pending interrupts, 31 * via its MSIX table and PBA, so we treat these accesses 32 * like PCI config space and forward them. 33 */ 34 static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr, 35 unsigned size) 36 { 37 VFIOPCIDevice *vdev = opaque; 38 VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region; 39 uint64_t data; 40 41 /* server copy is what matters */ 42 data = vfio_region_read(region, addr + vdev->msix->pba_offset, size); 43 return data; 44 } 45 46 static void vfio_user_pba_write(void *opaque, hwaddr addr, 47 uint64_t data, unsigned size) 48 { 49 /* dropped */ 50 } 51 52 static const MemoryRegionOps vfio_user_pba_ops = { 53 .read = vfio_user_pba_read, 54 .write = vfio_user_pba_write, 55 .endianness = DEVICE_LITTLE_ENDIAN, 56 }; 57 58 static void vfio_user_msix_setup(VFIOPCIDevice *vdev) 59 { 60 MemoryRegion *vfio_reg, *msix_reg, *pba_reg; 61 62 pba_reg = g_new0(MemoryRegion, 1); 63 vdev->msix->pba_region = pba_reg; 64 65 vfio_reg = vdev->bars[vdev->msix->pba_bar].mr; 66 msix_reg = &vdev->pdev.msix_pba_mmio; 67 memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev, 68 "VFIO MSIX PBA", int128_get64(msix_reg->size)); 69 memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset, 70 pba_reg, 1); 71 } 72 73 static void vfio_user_msix_teardown(VFIOPCIDevice *vdev) 74 { 75 MemoryRegion *mr, *sub; 76 77 mr = vdev->bars[vdev->msix->pba_bar].mr; 78 sub = vdev->msix->pba_region; 79 memory_region_del_subregion(mr, sub); 80 81 g_free(vdev->msix->pba_region); 82 vdev->msix->pba_region = NULL; 83 } 84 85 static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) 86 { 87 PCIDevice *pdev = &vdev->pdev; 88 VFIOUserProxy *proxy = vdev->vbasedev.proxy; 89 VFIOUserDMARW *res; 90 MemTxResult r; 91 size_t size; 92 93 if (msg->hdr.size < sizeof(*msg)) { 94 vfio_user_send_error(proxy, &msg->hdr, EINVAL); 95 return; 96 } 97 if (msg->count > proxy->max_xfer_size) { 98 vfio_user_send_error(proxy, &msg->hdr, E2BIG); 99 return; 100 } 101 102 /* switch to our own message buffer */ 103 size = msg->count + sizeof(VFIOUserDMARW); 104 res = g_malloc0(size); 105 memcpy(res, msg, sizeof(*res)); 106 g_free(msg); 107 108 r = pci_dma_read(pdev, res->offset, &res->data, res->count); 109 110 switch (r) { 111 case MEMTX_OK: 112 if (res->hdr.flags & VFIO_USER_NO_REPLY) { 113 g_free(res); 114 return; 115 } 116 vfio_user_send_reply(proxy, &res->hdr, size); 117 break; 118 case MEMTX_ERROR: 119 vfio_user_send_error(proxy, &res->hdr, EFAULT); 120 break; 121 case MEMTX_DECODE_ERROR: 122 vfio_user_send_error(proxy, &res->hdr, ENODEV); 123 break; 124 case MEMTX_ACCESS_ERROR: 125 vfio_user_send_error(proxy, &res->hdr, EPERM); 126 break; 127 default: 128 error_printf("vfio_user_dma_read unknown error %d\n", r); 129 vfio_user_send_error(vdev->vbasedev.proxy, &res->hdr, EINVAL); 130 } 131 } 132 133 static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) 134 { 135 PCIDevice *pdev = &vdev->pdev; 136 VFIOUserProxy *proxy = vdev->vbasedev.proxy; 137 MemTxResult r; 138 139 if (msg->hdr.size < sizeof(*msg)) { 140 vfio_user_send_error(proxy, &msg->hdr, EINVAL); 141 return; 142 } 143 /* make sure transfer count isn't larger than the message data */ 144 if (msg->count > msg->hdr.size - sizeof(*msg)) { 145 vfio_user_send_error(proxy, &msg->hdr, E2BIG); 146 return; 147 } 148 149 r = pci_dma_write(pdev, msg->offset, &msg->data, msg->count); 150 151 switch (r) { 152 case MEMTX_OK: 153 if ((msg->hdr.flags & VFIO_USER_NO_REPLY) == 0) { 154 vfio_user_send_reply(proxy, &msg->hdr, sizeof(msg->hdr)); 155 } else { 156 g_free(msg); 157 } 158 break; 159 case MEMTX_ERROR: 160 vfio_user_send_error(proxy, &msg->hdr, EFAULT); 161 break; 162 case MEMTX_DECODE_ERROR: 163 vfio_user_send_error(proxy, &msg->hdr, ENODEV); 164 break; 165 case MEMTX_ACCESS_ERROR: 166 vfio_user_send_error(proxy, &msg->hdr, EPERM); 167 break; 168 default: 169 error_printf("vfio_user_dma_write unknown error %d\n", r); 170 vfio_user_send_error(vdev->vbasedev.proxy, &msg->hdr, EINVAL); 171 } 172 } 173 174 /* 175 * Incoming request message callback. 176 * 177 * Runs off main loop, so BQL held. 178 */ 179 static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg) 180 { 181 VFIOPCIDevice *vdev = opaque; 182 VFIOUserHdr *hdr = msg->hdr; 183 184 /* no incoming PCI requests pass FDs */ 185 if (msg->fds != NULL) { 186 vfio_user_send_error(vdev->vbasedev.proxy, hdr, EINVAL); 187 vfio_user_putfds(msg); 188 return; 189 } 190 191 switch (hdr->command) { 192 case VFIO_USER_DMA_READ: 193 vfio_user_dma_read(vdev, (VFIOUserDMARW *)hdr); 194 break; 195 case VFIO_USER_DMA_WRITE: 196 vfio_user_dma_write(vdev, (VFIOUserDMARW *)hdr); 197 break; 198 default: 199 error_printf("vfio_user_pci_process_req unknown cmd %d\n", 200 hdr->command); 201 vfio_user_send_error(vdev->vbasedev.proxy, hdr, ENOSYS); 202 } 203 } 204 205 /* 206 * Emulated devices don't use host hot reset 207 */ 208 static void vfio_user_compute_needs_reset(VFIODevice *vbasedev) 209 { 210 vbasedev->needs_reset = false; 211 } 212 213 static Object *vfio_user_pci_get_object(VFIODevice *vbasedev) 214 { 215 VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice, 216 device.vbasedev); 217 218 return OBJECT(vdev); 219 } 220 221 static VFIODeviceOps vfio_user_pci_ops = { 222 .vfio_compute_needs_reset = vfio_user_compute_needs_reset, 223 .vfio_eoi = vfio_pci_intx_eoi, 224 .vfio_get_object = vfio_user_pci_get_object, 225 /* No live migration support yet. */ 226 .vfio_save_config = NULL, 227 .vfio_load_config = NULL, 228 }; 229 230 static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) 231 { 232 ERRP_GUARD(); 233 VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev); 234 VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); 235 VFIODevice *vbasedev = &vdev->vbasedev; 236 const char *sock_name; 237 AddressSpace *as; 238 SocketAddress addr; 239 VFIOUserProxy *proxy; 240 241 if (!udev->socket) { 242 error_setg(errp, "No socket specified"); 243 error_append_hint(errp, "e.g. -device '{" 244 "\"driver\":\"vfio-user-pci\", " 245 "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", " 246 "\"type\": \"unix\"}'" 247 "}'\n"); 248 return; 249 } 250 251 sock_name = udev->socket->u.q_unix.path; 252 253 vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); 254 255 memset(&addr, 0, sizeof(addr)); 256 addr.type = SOCKET_ADDRESS_TYPE_UNIX; 257 addr.u.q_unix.path = (char *)sock_name; 258 proxy = vfio_user_connect_dev(&addr, errp); 259 if (!proxy) { 260 return; 261 } 262 vbasedev->proxy = proxy; 263 vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev); 264 265 vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); 266 267 if (udev->send_queued) { 268 proxy->flags |= VFIO_PROXY_FORCE_QUEUED; 269 } 270 271 /* user specified or 5 sec default */ 272 proxy->wait_time = udev->wait_time; 273 274 if (!vfio_user_validate_version(proxy, errp)) { 275 goto error; 276 } 277 278 /* 279 * Use socket-based device I/O instead of vfio kernel driver. 280 */ 281 vbasedev->io_ops = &vfio_user_device_io_ops_sock; 282 283 /* 284 * vfio-user devices are effectively mdevs (don't use a host iommu). 285 */ 286 vbasedev->mdev = true; 287 288 /* 289 * Enable per-region fds. 290 */ 291 vbasedev->use_region_fds = true; 292 293 as = pci_device_iommu_address_space(pdev); 294 if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER, 295 vbasedev->name, vbasedev, 296 as, errp)) { 297 goto error; 298 } 299 300 if (!vfio_pci_populate_device(vdev, errp)) { 301 goto error; 302 } 303 304 if (!vfio_pci_config_setup(vdev, errp)) { 305 goto error; 306 } 307 308 /* 309 * vfio_pci_config_setup will have registered the device's BARs 310 * and setup any MSIX BARs, so errors after it succeeds must 311 * use out_teardown 312 */ 313 314 if (!vfio_pci_add_capabilities(vdev, errp)) { 315 goto out_teardown; 316 } 317 318 if (vdev->msix != NULL) { 319 vfio_user_msix_setup(vdev); 320 } 321 322 if (!vfio_pci_interrupt_setup(vdev, errp)) { 323 goto out_teardown; 324 } 325 326 vfio_pci_register_err_notifier(vdev); 327 vfio_pci_register_req_notifier(vdev); 328 329 return; 330 331 out_teardown: 332 vfio_pci_teardown_msi(vdev); 333 vfio_pci_bars_exit(vdev); 334 error: 335 error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); 336 vfio_pci_put_device(vdev); 337 } 338 339 static void vfio_user_instance_init(Object *obj) 340 { 341 PCIDevice *pci_dev = PCI_DEVICE(obj); 342 VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); 343 VFIODevice *vbasedev = &vdev->vbasedev; 344 345 device_add_bootindex_property(obj, &vdev->bootindex, 346 "bootindex", NULL, 347 &pci_dev->qdev); 348 vdev->host.domain = ~0U; 349 vdev->host.bus = ~0U; 350 vdev->host.slot = ~0U; 351 vdev->host.function = ~0U; 352 353 vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops, 354 DEVICE(vdev), false); 355 356 vdev->nv_gpudirect_clique = 0xFF; 357 358 /* 359 * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command 360 * line, therefore, no need to wait to realize like other devices. 361 */ 362 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; 363 } 364 365 static void vfio_user_instance_finalize(Object *obj) 366 { 367 VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); 368 VFIODevice *vbasedev = &vdev->vbasedev; 369 370 if (vdev->msix != NULL) { 371 vfio_user_msix_teardown(vdev); 372 } 373 374 vfio_pci_put_device(vdev); 375 376 if (vbasedev->proxy != NULL) { 377 vfio_user_disconnect(vbasedev->proxy); 378 } 379 } 380 381 static void vfio_user_pci_reset(DeviceState *dev) 382 { 383 VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); 384 VFIODevice *vbasedev = &vdev->vbasedev; 385 386 vfio_pci_pre_reset(vdev); 387 388 if (vbasedev->reset_works) { 389 vfio_user_device_reset(vbasedev->proxy); 390 } 391 392 vfio_pci_post_reset(vdev); 393 } 394 395 static const Property vfio_user_pci_dev_properties[] = { 396 DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, 397 vendor_id, PCI_ANY_ID), 398 DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, 399 device_id, PCI_ANY_ID), 400 DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice, 401 sub_vendor_id, PCI_ANY_ID), 402 DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, 403 sub_device_id, PCI_ANY_ID), 404 DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), 405 DEFINE_PROP_UINT32("x-msg-timeout", VFIOUserPCIDevice, wait_time, 5000), 406 }; 407 408 static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name, 409 void *opaque, Error **errp) 410 { 411 VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj); 412 bool success; 413 414 if (udev->device.vbasedev.proxy) { 415 error_setg(errp, "Proxy is connected"); 416 return; 417 } 418 419 qapi_free_SocketAddress(udev->socket); 420 421 udev->socket = NULL; 422 423 success = visit_type_SocketAddress(v, name, &udev->socket, errp); 424 425 if (!success) { 426 return; 427 } 428 429 if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { 430 error_setg(errp, "Unsupported socket type %s", 431 SocketAddressType_str(udev->socket->type)); 432 qapi_free_SocketAddress(udev->socket); 433 udev->socket = NULL; 434 return; 435 } 436 } 437 438 static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data) 439 { 440 DeviceClass *dc = DEVICE_CLASS(klass); 441 PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); 442 443 device_class_set_legacy_reset(dc, vfio_user_pci_reset); 444 device_class_set_props(dc, vfio_user_pci_dev_properties); 445 446 object_class_property_add(klass, "socket", "SocketAddress", NULL, 447 vfio_user_pci_set_socket, NULL, NULL); 448 object_class_property_set_description(klass, "socket", 449 "SocketAddress (UNIX sockets only)"); 450 451 dc->desc = "VFIO over socket PCI device assignment"; 452 pdc->realize = vfio_user_pci_realize; 453 } 454 455 static const TypeInfo vfio_user_pci_dev_info = { 456 .name = TYPE_VFIO_USER_PCI, 457 .parent = TYPE_VFIO_PCI_BASE, 458 .instance_size = sizeof(VFIOUserPCIDevice), 459 .class_init = vfio_user_pci_dev_class_init, 460 .instance_init = vfio_user_instance_init, 461 .instance_finalize = vfio_user_instance_finalize, 462 }; 463 464 static void register_vfio_user_dev_type(void) 465 { 466 type_register_static(&vfio_user_pci_dev_info); 467 } 468 469 type_init(register_vfio_user_dev_type) 470