1 /* 2 * iommufd container backend 3 * 4 * Copyright (C) 2023 Intel Corporation. 5 * Copyright Red Hat, Inc. 2023 6 * 7 * Authors: Yi Liu <yi.l.liu@intel.com> 8 * Eric Auger <eric.auger@redhat.com> 9 * 10 * SPDX-License-Identifier: GPL-2.0-or-later 11 */ 12 13 #include "qemu/osdep.h" 14 #include <sys/ioctl.h> 15 #include <linux/vfio.h> 16 #include <linux/iommufd.h> 17 18 #include "hw/vfio/vfio-device.h" 19 #include "qemu/error-report.h" 20 #include "trace.h" 21 #include "qapi/error.h" 22 #include "system/iommufd.h" 23 #include "hw/qdev-core.h" 24 #include "system/reset.h" 25 #include "qemu/cutils.h" 26 #include "qemu/chardev_open.h" 27 #include "pci.h" 28 #include "vfio-iommufd.h" 29 #include "vfio-helpers.h" 30 #include "vfio-cpr.h" 31 #include "vfio-listener.h" 32 33 #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ 34 TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" 35 36 static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, 37 ram_addr_t size, void *vaddr, bool readonly) 38 { 39 const VFIOIOMMUFDContainer *container = 40 container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 41 42 return iommufd_backend_map_dma(container->be, 43 container->ioas_id, 44 iova, size, vaddr, readonly); 45 } 46 47 static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, 48 hwaddr iova, ram_addr_t size, 49 IOMMUTLBEntry *iotlb, bool unmap_all) 50 { 51 const VFIOIOMMUFDContainer *container = 52 container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 53 54 /* unmap in halves */ 55 if (unmap_all) { 56 Int128 llsize = int128_rshift(int128_2_64(), 1); 57 int ret; 58 59 ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, 60 0, int128_get64(llsize)); 61 62 if (ret == 0) { 63 ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, 64 int128_get64(llsize), 65 int128_get64(llsize)); 66 } 67 68 return ret; 69 } 70 71 /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ 72 return iommufd_backend_unmap_dma(container->be, 73 container->ioas_id, iova, size); 74 } 75 76 static bool iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp) 77 { 78 return !vfio_kvm_device_add_fd(vbasedev->fd, errp); 79 } 80 81 static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev) 82 { 83 Error *err = NULL; 84 85 if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) { 86 error_report_err(err); 87 } 88 } 89 90 static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) 91 { 92 IOMMUFDBackend *iommufd = vbasedev->iommufd; 93 struct vfio_device_bind_iommufd bind = { 94 .argsz = sizeof(bind), 95 .flags = 0, 96 }; 97 98 if (!iommufd_backend_connect(iommufd, errp)) { 99 return false; 100 } 101 102 /* 103 * Add device to kvm-vfio to be prepared for the tracking 104 * in KVM. Especially for some emulated devices, it requires 105 * to have kvm information in the device open. 106 */ 107 if (!iommufd_cdev_kvm_device_add(vbasedev, errp)) { 108 goto err_kvm_device_add; 109 } 110 111 /* Bind device to iommufd */ 112 bind.iommufd = iommufd->fd; 113 if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) { 114 error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d", 115 vbasedev->fd, bind.iommufd); 116 goto err_bind; 117 } 118 119 vbasedev->devid = bind.out_devid; 120 trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, 121 vbasedev->fd, vbasedev->devid); 122 return true; 123 err_bind: 124 iommufd_cdev_kvm_device_del(vbasedev); 125 err_kvm_device_add: 126 iommufd_backend_disconnect(iommufd); 127 return false; 128 } 129 130 static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) 131 { 132 /* Unbind is automatically conducted when device fd is closed */ 133 iommufd_cdev_kvm_device_del(vbasedev); 134 iommufd_backend_disconnect(vbasedev->iommufd); 135 } 136 137 static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt) 138 { 139 return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; 140 } 141 142 static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, 143 bool start, Error **errp) 144 { 145 const VFIOIOMMUFDContainer *container = 146 container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 147 VFIOIOASHwpt *hwpt; 148 149 QLIST_FOREACH(hwpt, &container->hwpt_list, next) { 150 if (!iommufd_hwpt_dirty_tracking(hwpt)) { 151 continue; 152 } 153 154 if (!iommufd_backend_set_dirty_tracking(container->be, 155 hwpt->hwpt_id, start, errp)) { 156 goto err; 157 } 158 } 159 160 return 0; 161 162 err: 163 QLIST_FOREACH(hwpt, &container->hwpt_list, next) { 164 if (!iommufd_hwpt_dirty_tracking(hwpt)) { 165 continue; 166 } 167 iommufd_backend_set_dirty_tracking(container->be, 168 hwpt->hwpt_id, !start, NULL); 169 } 170 return -EINVAL; 171 } 172 173 static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer, 174 VFIOBitmap *vbmap, hwaddr iova, 175 hwaddr size, Error **errp) 176 { 177 VFIOIOMMUFDContainer *container = container_of(bcontainer, 178 VFIOIOMMUFDContainer, 179 bcontainer); 180 unsigned long page_size = qemu_real_host_page_size(); 181 VFIOIOASHwpt *hwpt; 182 183 QLIST_FOREACH(hwpt, &container->hwpt_list, next) { 184 if (!iommufd_hwpt_dirty_tracking(hwpt)) { 185 continue; 186 } 187 188 if (!iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id, 189 iova, size, page_size, 190 (uint64_t *)vbmap->bitmap, 191 errp)) { 192 return -EINVAL; 193 } 194 } 195 196 return 0; 197 } 198 199 static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) 200 { 201 ERRP_GUARD(); 202 long int ret = -ENOTTY; 203 g_autofree char *path = NULL; 204 g_autofree char *vfio_dev_path = NULL; 205 g_autofree char *vfio_path = NULL; 206 DIR *dir = NULL; 207 struct dirent *dent; 208 g_autofree gchar *contents = NULL; 209 gsize length; 210 int major, minor; 211 dev_t vfio_devt; 212 213 path = g_strdup_printf("%s/vfio-dev", sysfs_path); 214 dir = opendir(path); 215 if (!dir) { 216 error_setg_errno(errp, errno, "couldn't open directory %s", path); 217 goto out; 218 } 219 220 while ((dent = readdir(dir))) { 221 if (!strncmp(dent->d_name, "vfio", 4)) { 222 vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name); 223 break; 224 } 225 } 226 227 if (!vfio_dev_path) { 228 error_setg(errp, "failed to find vfio-dev/vfioX/dev"); 229 goto out_close_dir; 230 } 231 232 if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) { 233 error_setg(errp, "failed to load \"%s\"", vfio_dev_path); 234 goto out_close_dir; 235 } 236 237 if (sscanf(contents, "%d:%d", &major, &minor) != 2) { 238 error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path); 239 goto out_close_dir; 240 } 241 vfio_devt = makedev(major, minor); 242 243 vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); 244 ret = open_cdev(vfio_path, vfio_devt); 245 if (ret < 0) { 246 error_setg(errp, "Failed to open %s", vfio_path); 247 } 248 249 trace_iommufd_cdev_getfd(vfio_path, ret); 250 251 out_close_dir: 252 closedir(dir); 253 out: 254 if (*errp) { 255 error_prepend(errp, VFIO_MSG_PREFIX, path); 256 } 257 258 return ret; 259 } 260 261 static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, 262 Error **errp) 263 { 264 int iommufd = vbasedev->iommufd->fd; 265 struct vfio_device_attach_iommufd_pt attach_data = { 266 .argsz = sizeof(attach_data), 267 .flags = 0, 268 .pt_id = id, 269 }; 270 271 /* Attach device to an IOAS or hwpt within iommufd */ 272 if (ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data)) { 273 error_setg_errno(errp, errno, 274 "[iommufd=%d] error attach %s (%d) to id=%d", 275 iommufd, vbasedev->name, vbasedev->fd, id); 276 return -errno; 277 } 278 279 trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, 280 vbasedev->fd, id); 281 return 0; 282 } 283 284 static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) 285 { 286 int iommufd = vbasedev->iommufd->fd; 287 struct vfio_device_detach_iommufd_pt detach_data = { 288 .argsz = sizeof(detach_data), 289 .flags = 0, 290 }; 291 292 if (ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data)) { 293 error_setg_errno(errp, errno, "detach %s failed", vbasedev->name); 294 return false; 295 } 296 297 trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name); 298 return true; 299 } 300 301 static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, 302 VFIOIOMMUFDContainer *container, 303 Error **errp) 304 { 305 ERRP_GUARD(); 306 IOMMUFDBackend *iommufd = vbasedev->iommufd; 307 uint32_t type, flags = 0; 308 uint64_t hw_caps; 309 VFIOIOASHwpt *hwpt; 310 uint32_t hwpt_id; 311 int ret; 312 313 /* Try to find a domain */ 314 QLIST_FOREACH(hwpt, &container->hwpt_list, next) { 315 ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); 316 if (ret) { 317 /* -EINVAL means the domain is incompatible with the device. */ 318 if (ret == -EINVAL) { 319 /* 320 * It is an expected failure and it just means we will try 321 * another domain, or create one if no existing compatible 322 * domain is found. Hence why the error is discarded below. 323 */ 324 error_free(*errp); 325 *errp = NULL; 326 continue; 327 } 328 329 return false; 330 } else { 331 vbasedev->hwpt = hwpt; 332 QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); 333 vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); 334 return true; 335 } 336 } 337 338 /* 339 * This is quite early and VFIO Migration state isn't yet fully 340 * initialized, thus rely only on IOMMU hardware capabilities as to 341 * whether IOMMU dirty tracking is going to be requested. Later 342 * vfio_migration_realize() may decide to use VF dirty tracking 343 * instead. 344 */ 345 if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid, 346 &type, NULL, 0, &hw_caps, errp)) { 347 return false; 348 } 349 350 if (hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) { 351 flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; 352 } 353 354 if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, 355 container->ioas_id, flags, 356 IOMMU_HWPT_DATA_NONE, 0, NULL, 357 &hwpt_id, errp)) { 358 return false; 359 } 360 361 hwpt = g_malloc0(sizeof(*hwpt)); 362 hwpt->hwpt_id = hwpt_id; 363 hwpt->hwpt_flags = flags; 364 QLIST_INIT(&hwpt->device_list); 365 366 ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); 367 if (ret) { 368 iommufd_backend_free_id(container->be, hwpt->hwpt_id); 369 g_free(hwpt); 370 return false; 371 } 372 373 vbasedev->hwpt = hwpt; 374 vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); 375 QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); 376 QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); 377 container->bcontainer.dirty_pages_supported |= 378 vbasedev->iommu_dirty_tracking; 379 if (container->bcontainer.dirty_pages_supported && 380 !vbasedev->iommu_dirty_tracking) { 381 warn_report("IOMMU instance for device %s doesn't support dirty tracking", 382 vbasedev->name); 383 } 384 return true; 385 } 386 387 static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev, 388 VFIOIOMMUFDContainer *container) 389 { 390 VFIOIOASHwpt *hwpt = vbasedev->hwpt; 391 392 QLIST_REMOVE(vbasedev, hwpt_next); 393 vbasedev->hwpt = NULL; 394 395 if (QLIST_EMPTY(&hwpt->device_list)) { 396 QLIST_REMOVE(hwpt, next); 397 iommufd_backend_free_id(container->be, hwpt->hwpt_id); 398 g_free(hwpt); 399 } 400 } 401 402 static bool iommufd_cdev_attach_container(VFIODevice *vbasedev, 403 VFIOIOMMUFDContainer *container, 404 Error **errp) 405 { 406 /* mdevs aren't physical devices and will fail with auto domains */ 407 if (!vbasedev->mdev) { 408 return iommufd_cdev_autodomains_get(vbasedev, container, errp); 409 } 410 411 return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); 412 } 413 414 static void iommufd_cdev_detach_container(VFIODevice *vbasedev, 415 VFIOIOMMUFDContainer *container) 416 { 417 Error *err = NULL; 418 419 if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { 420 error_report_err(err); 421 } 422 423 if (vbasedev->hwpt) { 424 iommufd_cdev_autodomains_put(vbasedev, container); 425 } 426 427 } 428 429 static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) 430 { 431 VFIOContainerBase *bcontainer = &container->bcontainer; 432 433 if (!QLIST_EMPTY(&bcontainer->device_list)) { 434 return; 435 } 436 vfio_cpr_unregister_container(bcontainer); 437 vfio_listener_unregister(bcontainer); 438 iommufd_backend_free_id(container->be, container->ioas_id); 439 object_unref(container); 440 } 441 442 static int iommufd_cdev_ram_block_discard_disable(bool state) 443 { 444 /* 445 * We support coordinated discarding of RAM via the RamDiscardManager. 446 */ 447 return ram_block_uncoordinated_discard_disable(state); 448 } 449 450 static bool iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container, 451 uint32_t ioas_id, Error **errp) 452 { 453 VFIOContainerBase *bcontainer = &container->bcontainer; 454 g_autofree struct iommu_ioas_iova_ranges *info = NULL; 455 struct iommu_iova_range *iova_ranges; 456 int sz, fd = container->be->fd; 457 458 info = g_malloc0(sizeof(*info)); 459 info->size = sizeof(*info); 460 info->ioas_id = ioas_id; 461 462 if (ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info) && errno != EMSGSIZE) { 463 goto error; 464 } 465 466 sz = info->num_iovas * sizeof(struct iommu_iova_range); 467 info = g_realloc(info, sizeof(*info) + sz); 468 info->allowed_iovas = (uintptr_t)(info + 1); 469 470 if (ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info)) { 471 goto error; 472 } 473 474 iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas; 475 476 for (int i = 0; i < info->num_iovas; i++) { 477 Range *range = g_new(Range, 1); 478 479 range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last); 480 bcontainer->iova_ranges = 481 range_list_insert(bcontainer->iova_ranges, range); 482 } 483 bcontainer->pgsizes = info->out_iova_alignment; 484 485 return true; 486 487 error: 488 error_setg_errno(errp, errno, "Cannot get IOVA ranges"); 489 return false; 490 } 491 492 static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, 493 AddressSpace *as, Error **errp) 494 { 495 VFIOContainerBase *bcontainer; 496 VFIOIOMMUFDContainer *container; 497 VFIOAddressSpace *space; 498 struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; 499 int ret, devfd; 500 uint32_t ioas_id; 501 Error *err = NULL; 502 const VFIOIOMMUClass *iommufd_vioc = 503 VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); 504 505 if (vbasedev->fd < 0) { 506 devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); 507 if (devfd < 0) { 508 return false; 509 } 510 vbasedev->fd = devfd; 511 } else { 512 devfd = vbasedev->fd; 513 } 514 515 if (!iommufd_cdev_connect_and_bind(vbasedev, errp)) { 516 goto err_connect_bind; 517 } 518 519 space = vfio_address_space_get(as); 520 521 /* try to attach to an existing container in this space */ 522 QLIST_FOREACH(bcontainer, &space->containers, next) { 523 container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 524 if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc || 525 vbasedev->iommufd != container->be) { 526 continue; 527 } 528 if (!iommufd_cdev_attach_container(vbasedev, container, &err)) { 529 const char *msg = error_get_pretty(err); 530 531 trace_iommufd_cdev_fail_attach_existing_container(msg); 532 error_free(err); 533 err = NULL; 534 } else { 535 ret = iommufd_cdev_ram_block_discard_disable(true); 536 if (ret) { 537 error_setg_errno(errp, -ret, 538 "Cannot set discarding of RAM broken"); 539 goto err_discard_disable; 540 } 541 goto found_container; 542 } 543 } 544 545 /* Need to allocate a new dedicated container */ 546 if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) { 547 goto err_alloc_ioas; 548 } 549 550 trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); 551 552 container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); 553 container->be = vbasedev->iommufd; 554 container->ioas_id = ioas_id; 555 QLIST_INIT(&container->hwpt_list); 556 557 bcontainer = &container->bcontainer; 558 vfio_address_space_insert(space, bcontainer); 559 560 if (!iommufd_cdev_attach_container(vbasedev, container, errp)) { 561 goto err_attach_container; 562 } 563 564 ret = iommufd_cdev_ram_block_discard_disable(true); 565 if (ret) { 566 error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); 567 goto err_discard_disable; 568 } 569 570 if (!iommufd_cdev_get_info_iova_range(container, ioas_id, &err)) { 571 error_append_hint(&err, 572 "Fallback to default 64bit IOVA range and 4K page size\n"); 573 warn_report_err(err); 574 err = NULL; 575 bcontainer->pgsizes = qemu_real_host_page_size(); 576 } 577 578 if (!vfio_listener_register(bcontainer, errp)) { 579 goto err_listener_register; 580 } 581 582 if (!vfio_cpr_register_container(bcontainer, errp)) { 583 goto err_listener_register; 584 } 585 586 bcontainer->initialized = true; 587 588 found_container: 589 ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info); 590 if (ret) { 591 error_setg_errno(errp, errno, "error getting device info"); 592 goto err_listener_register; 593 } 594 595 if (!vfio_device_hiod_create_and_realize(vbasedev, 596 TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) { 597 goto err_listener_register; 598 } 599 600 /* 601 * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level 602 * for discarding incompatibility check as well? 603 */ 604 if (vbasedev->ram_block_discard_allowed) { 605 iommufd_cdev_ram_block_discard_disable(false); 606 } 607 608 vfio_device_prepare(vbasedev, bcontainer, &dev_info); 609 610 trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, 611 vbasedev->num_regions, vbasedev->flags); 612 return true; 613 614 err_listener_register: 615 iommufd_cdev_ram_block_discard_disable(false); 616 err_discard_disable: 617 iommufd_cdev_detach_container(vbasedev, container); 618 err_attach_container: 619 iommufd_cdev_container_destroy(container); 620 err_alloc_ioas: 621 vfio_address_space_put(space); 622 iommufd_cdev_unbind_and_disconnect(vbasedev); 623 err_connect_bind: 624 close(vbasedev->fd); 625 return false; 626 } 627 628 static void iommufd_cdev_detach(VFIODevice *vbasedev) 629 { 630 VFIOContainerBase *bcontainer = vbasedev->bcontainer; 631 VFIOAddressSpace *space = bcontainer->space; 632 VFIOIOMMUFDContainer *container = container_of(bcontainer, 633 VFIOIOMMUFDContainer, 634 bcontainer); 635 vfio_device_unprepare(vbasedev); 636 637 if (!vbasedev->ram_block_discard_allowed) { 638 iommufd_cdev_ram_block_discard_disable(false); 639 } 640 641 object_unref(vbasedev->hiod); 642 iommufd_cdev_detach_container(vbasedev, container); 643 iommufd_cdev_container_destroy(container); 644 vfio_address_space_put(space); 645 646 iommufd_cdev_unbind_and_disconnect(vbasedev); 647 close(vbasedev->fd); 648 } 649 650 static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) 651 { 652 VFIODevice *vbasedev_iter; 653 const VFIOIOMMUClass *iommufd_vioc = 654 VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); 655 656 QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { 657 if (VFIO_IOMMU_GET_CLASS(vbasedev_iter->bcontainer) != iommufd_vioc) { 658 continue; 659 } 660 if (devid == vbasedev_iter->devid) { 661 return vbasedev_iter; 662 } 663 } 664 return NULL; 665 } 666 667 static VFIOPCIDevice * 668 iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev, 669 VFIODevice *reset_dev) 670 { 671 VFIODevice *vbasedev_tmp; 672 673 if (dep_dev->devid == reset_dev->devid || 674 dep_dev->devid == VFIO_PCI_DEVID_OWNED) { 675 return NULL; 676 } 677 678 vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid); 679 if (!vbasedev_tmp || !vbasedev_tmp->dev->realized || 680 vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) { 681 return NULL; 682 } 683 684 return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev); 685 } 686 687 static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) 688 { 689 VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); 690 struct vfio_pci_hot_reset_info *info = NULL; 691 struct vfio_pci_dependent_device *devices; 692 struct vfio_pci_hot_reset *reset; 693 int ret, i; 694 bool multi = false; 695 696 trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); 697 698 if (!single) { 699 vfio_pci_pre_reset(vdev); 700 } 701 vdev->vbasedev.needs_reset = false; 702 703 ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); 704 705 if (ret) { 706 goto out_single; 707 } 708 709 assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); 710 711 devices = &info->devices[0]; 712 713 if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { 714 if (!vdev->has_pm_reset) { 715 for (i = 0; i < info->count; i++) { 716 if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { 717 error_report("vfio: Cannot reset device %s, " 718 "depends on device %04x:%02x:%02x.%x " 719 "which is not owned.", 720 vdev->vbasedev.name, devices[i].segment, 721 devices[i].bus, PCI_SLOT(devices[i].devfn), 722 PCI_FUNC(devices[i].devfn)); 723 } 724 } 725 } 726 ret = -EPERM; 727 goto out_single; 728 } 729 730 trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); 731 732 for (i = 0; i < info->count; i++) { 733 VFIOPCIDevice *tmp; 734 735 trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, 736 devices[i].bus, 737 PCI_SLOT(devices[i].devfn), 738 PCI_FUNC(devices[i].devfn), 739 devices[i].devid); 740 741 /* 742 * If a VFIO cdev device is resettable, all the dependent devices 743 * are either bound to same iommufd or within same iommu_groups as 744 * one of the iommufd bound devices. 745 */ 746 assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); 747 748 tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); 749 if (!tmp) { 750 continue; 751 } 752 753 if (single) { 754 ret = -EINVAL; 755 goto out_single; 756 } 757 vfio_pci_pre_reset(tmp); 758 tmp->vbasedev.needs_reset = false; 759 multi = true; 760 } 761 762 if (!single && !multi) { 763 ret = -EINVAL; 764 goto out_single; 765 } 766 767 /* Use zero length array for hot reset with iommufd backend */ 768 reset = g_malloc0(sizeof(*reset)); 769 reset->argsz = sizeof(*reset); 770 771 /* Bus reset! */ 772 ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); 773 g_free(reset); 774 if (ret) { 775 ret = -errno; 776 } 777 778 trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, 779 ret ? strerror(errno) : "Success"); 780 781 /* Re-enable INTx on affected devices */ 782 for (i = 0; i < info->count; i++) { 783 VFIOPCIDevice *tmp; 784 785 tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); 786 if (!tmp) { 787 continue; 788 } 789 vfio_pci_post_reset(tmp); 790 } 791 out_single: 792 if (!single) { 793 vfio_pci_post_reset(vdev); 794 } 795 g_free(info); 796 797 return ret; 798 } 799 800 static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) 801 { 802 VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); 803 804 vioc->dma_map = iommufd_cdev_map; 805 vioc->dma_unmap = iommufd_cdev_unmap; 806 vioc->attach_device = iommufd_cdev_attach; 807 vioc->detach_device = iommufd_cdev_detach; 808 vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; 809 vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking; 810 vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; 811 }; 812 813 static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, 814 Error **errp) 815 { 816 VFIODevice *vdev = opaque; 817 HostIOMMUDeviceCaps *caps = &hiod->caps; 818 enum iommu_hw_info_type type; 819 union { 820 struct iommu_hw_info_vtd vtd; 821 } data; 822 uint64_t hw_caps; 823 824 hiod->agent = opaque; 825 826 if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, 827 &type, &data, sizeof(data), 828 &hw_caps, errp)) { 829 return false; 830 } 831 832 hiod->name = g_strdup(vdev->name); 833 caps->type = type; 834 caps->hw_caps = hw_caps; 835 836 return true; 837 } 838 839 static GList * 840 hiod_iommufd_vfio_get_iova_ranges(HostIOMMUDevice *hiod) 841 { 842 VFIODevice *vdev = hiod->agent; 843 844 g_assert(vdev); 845 return vfio_container_get_iova_ranges(vdev->bcontainer); 846 } 847 848 static uint64_t 849 hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod) 850 { 851 VFIODevice *vdev = hiod->agent; 852 853 g_assert(vdev); 854 return vfio_container_get_page_size_mask(vdev->bcontainer); 855 } 856 857 858 static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data) 859 { 860 HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); 861 862 hiodc->realize = hiod_iommufd_vfio_realize; 863 hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; 864 hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask; 865 }; 866 867 static const TypeInfo types[] = { 868 { 869 .name = TYPE_VFIO_IOMMU_IOMMUFD, 870 .parent = TYPE_VFIO_IOMMU, 871 .instance_size = sizeof(VFIOIOMMUFDContainer), 872 .class_init = vfio_iommu_iommufd_class_init, 873 }, { 874 .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, 875 .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, 876 .class_init = hiod_iommufd_vfio_class_init, 877 } 878 }; 879 880 DEFINE_TYPES(types) 881