16078a454SJean-Philippe Brucker #include "kvm/kvm.h" 26078a454SJean-Philippe Brucker #include "kvm/vfio.h" 382caa882SJean-Philippe Brucker #include "kvm/ioport.h" 46078a454SJean-Philippe Brucker 56078a454SJean-Philippe Brucker #include <linux/list.h> 66078a454SJean-Philippe Brucker 76078a454SJean-Philippe Brucker #define VFIO_DEV_DIR "/dev/vfio" 86078a454SJean-Philippe Brucker #define VFIO_DEV_NODE VFIO_DEV_DIR "/vfio" 96078a454SJean-Philippe Brucker #define IOMMU_GROUP_DIR "/sys/kernel/iommu_groups" 106078a454SJean-Philippe Brucker 116078a454SJean-Philippe Brucker static int vfio_container; 126078a454SJean-Philippe Brucker static LIST_HEAD(vfio_groups); 136078a454SJean-Philippe Brucker static struct vfio_device *vfio_devices; 146078a454SJean-Philippe Brucker 156078a454SJean-Philippe Brucker static int vfio_device_pci_parser(const struct option *opt, char *arg, 166078a454SJean-Philippe Brucker struct vfio_device_params *dev) 176078a454SJean-Philippe Brucker { 186078a454SJean-Philippe Brucker unsigned int domain, bus, devnr, fn; 196078a454SJean-Philippe Brucker 206078a454SJean-Philippe Brucker int nr = sscanf(arg, "%4x:%2x:%2x.%1x", &domain, &bus, &devnr, &fn); 216078a454SJean-Philippe Brucker if (nr < 4) { 226078a454SJean-Philippe Brucker domain = 0; 236078a454SJean-Philippe Brucker nr = sscanf(arg, "%2x:%2x.%1x", &bus, &devnr, &fn); 246078a454SJean-Philippe Brucker if (nr < 3) { 256078a454SJean-Philippe Brucker pr_err("Invalid device identifier %s", arg); 266078a454SJean-Philippe Brucker return -EINVAL; 276078a454SJean-Philippe Brucker } 286078a454SJean-Philippe Brucker } 296078a454SJean-Philippe Brucker 306078a454SJean-Philippe Brucker dev->type = VFIO_DEVICE_PCI; 316078a454SJean-Philippe Brucker dev->bus = "pci"; 326078a454SJean-Philippe Brucker dev->name = malloc(13); 336078a454SJean-Philippe Brucker if (!dev->name) 346078a454SJean-Philippe Brucker return -ENOMEM; 356078a454SJean-Philippe Brucker 366078a454SJean-Philippe Brucker snprintf(dev->name, 13, "%04x:%02x:%02x.%x", domain, bus, devnr, fn); 376078a454SJean-Philippe Brucker 386078a454SJean-Philippe Brucker return 0; 396078a454SJean-Philippe Brucker } 406078a454SJean-Philippe Brucker 416078a454SJean-Philippe Brucker int vfio_device_parser(const struct option *opt, const char *arg, int unset) 426078a454SJean-Philippe Brucker { 436078a454SJean-Philippe Brucker int ret = -EINVAL; 446078a454SJean-Philippe Brucker static int idx = 0; 456078a454SJean-Philippe Brucker struct kvm *kvm = opt->ptr; 466078a454SJean-Philippe Brucker struct vfio_device_params *dev, *devs; 476078a454SJean-Philippe Brucker char *cur, *buf = strdup(arg); 486078a454SJean-Philippe Brucker 496078a454SJean-Philippe Brucker if (!buf) 506078a454SJean-Philippe Brucker return -ENOMEM; 516078a454SJean-Philippe Brucker 526078a454SJean-Philippe Brucker if (idx >= MAX_VFIO_DEVICES) { 536078a454SJean-Philippe Brucker pr_warning("Too many VFIO devices"); 546078a454SJean-Philippe Brucker goto out_free_buf; 556078a454SJean-Philippe Brucker } 566078a454SJean-Philippe Brucker 576078a454SJean-Philippe Brucker devs = realloc(kvm->cfg.vfio_devices, sizeof(*dev) * (idx + 1)); 586078a454SJean-Philippe Brucker if (!devs) { 596078a454SJean-Philippe Brucker ret = -ENOMEM; 606078a454SJean-Philippe Brucker goto out_free_buf; 616078a454SJean-Philippe Brucker } 626078a454SJean-Philippe Brucker 636078a454SJean-Philippe Brucker kvm->cfg.vfio_devices = devs; 646078a454SJean-Philippe Brucker dev = &devs[idx]; 656078a454SJean-Philippe Brucker 666078a454SJean-Philippe Brucker cur = strtok(buf, ","); 676078a454SJean-Philippe Brucker if (!cur) 686078a454SJean-Philippe Brucker goto out_free_buf; 696078a454SJean-Philippe Brucker 706078a454SJean-Philippe Brucker if (!strcmp(opt->long_name, "vfio-pci")) 716078a454SJean-Philippe Brucker ret = vfio_device_pci_parser(opt, cur, dev); 726078a454SJean-Philippe Brucker else 736078a454SJean-Philippe Brucker ret = -EINVAL; 746078a454SJean-Philippe Brucker 756078a454SJean-Philippe Brucker if (!ret) 766078a454SJean-Philippe Brucker kvm->cfg.num_vfio_devices = ++idx; 776078a454SJean-Philippe Brucker 786078a454SJean-Philippe Brucker out_free_buf: 796078a454SJean-Philippe Brucker free(buf); 806078a454SJean-Philippe Brucker 816078a454SJean-Philippe Brucker return ret; 826078a454SJean-Philippe Brucker } 836078a454SJean-Philippe Brucker 8482caa882SJean-Philippe Brucker static bool vfio_ioport_in(struct ioport *ioport, struct kvm_cpu *vcpu, 8582caa882SJean-Philippe Brucker u16 port, void *data, int len) 8682caa882SJean-Philippe Brucker { 8782caa882SJean-Philippe Brucker u32 val; 8882caa882SJean-Philippe Brucker ssize_t nr; 8982caa882SJean-Philippe Brucker struct vfio_region *region = ioport->priv; 9082caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev; 9182caa882SJean-Philippe Brucker 9282caa882SJean-Philippe Brucker u32 offset = port - region->port_base; 9382caa882SJean-Philippe Brucker 9482caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ)) 9582caa882SJean-Philippe Brucker return false; 9682caa882SJean-Philippe Brucker 9782caa882SJean-Philippe Brucker nr = pread(vdev->fd, &val, len, region->info.offset + offset); 9882caa882SJean-Philippe Brucker if (nr != len) { 9982caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not read %d bytes from I/O port 0x%x\n", 10082caa882SJean-Philippe Brucker len, port); 10182caa882SJean-Philippe Brucker return false; 10282caa882SJean-Philippe Brucker } 10382caa882SJean-Philippe Brucker 10482caa882SJean-Philippe Brucker switch (len) { 10582caa882SJean-Philippe Brucker case 1: 10682caa882SJean-Philippe Brucker ioport__write8(data, val); 10782caa882SJean-Philippe Brucker break; 10882caa882SJean-Philippe Brucker case 2: 10982caa882SJean-Philippe Brucker ioport__write16(data, val); 11082caa882SJean-Philippe Brucker break; 11182caa882SJean-Philippe Brucker case 4: 11282caa882SJean-Philippe Brucker ioport__write32(data, val); 11382caa882SJean-Philippe Brucker break; 11482caa882SJean-Philippe Brucker default: 11582caa882SJean-Philippe Brucker return false; 11682caa882SJean-Philippe Brucker } 11782caa882SJean-Philippe Brucker 11882caa882SJean-Philippe Brucker return true; 11982caa882SJean-Philippe Brucker } 12082caa882SJean-Philippe Brucker 12182caa882SJean-Philippe Brucker static bool vfio_ioport_out(struct ioport *ioport, struct kvm_cpu *vcpu, 12282caa882SJean-Philippe Brucker u16 port, void *data, int len) 12382caa882SJean-Philippe Brucker { 12482caa882SJean-Philippe Brucker u32 val; 12582caa882SJean-Philippe Brucker ssize_t nr; 12682caa882SJean-Philippe Brucker struct vfio_region *region = ioport->priv; 12782caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev; 12882caa882SJean-Philippe Brucker 12982caa882SJean-Philippe Brucker u32 offset = port - region->port_base; 13082caa882SJean-Philippe Brucker 13182caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)) 13282caa882SJean-Philippe Brucker return false; 13382caa882SJean-Philippe Brucker 13482caa882SJean-Philippe Brucker switch (len) { 13582caa882SJean-Philippe Brucker case 1: 13682caa882SJean-Philippe Brucker val = ioport__read8(data); 13782caa882SJean-Philippe Brucker break; 13882caa882SJean-Philippe Brucker case 2: 13982caa882SJean-Philippe Brucker val = ioport__read16(data); 14082caa882SJean-Philippe Brucker break; 14182caa882SJean-Philippe Brucker case 4: 14282caa882SJean-Philippe Brucker val = ioport__read32(data); 14382caa882SJean-Philippe Brucker break; 14482caa882SJean-Philippe Brucker default: 14582caa882SJean-Philippe Brucker return false; 14682caa882SJean-Philippe Brucker } 14782caa882SJean-Philippe Brucker 14882caa882SJean-Philippe Brucker nr = pwrite(vdev->fd, &val, len, region->info.offset + offset); 14982caa882SJean-Philippe Brucker if (nr != len) 15082caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not write %d bytes to I/O port 0x%x", 15182caa882SJean-Philippe Brucker len, port); 15282caa882SJean-Philippe Brucker 15382caa882SJean-Philippe Brucker return nr == len; 15482caa882SJean-Philippe Brucker } 15582caa882SJean-Philippe Brucker 15682caa882SJean-Philippe Brucker static struct ioport_operations vfio_ioport_ops = { 15782caa882SJean-Philippe Brucker .io_in = vfio_ioport_in, 15882caa882SJean-Philippe Brucker .io_out = vfio_ioport_out, 15982caa882SJean-Philippe Brucker }; 16082caa882SJean-Philippe Brucker 16182caa882SJean-Philippe Brucker static void vfio_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len, 16282caa882SJean-Philippe Brucker u8 is_write, void *ptr) 16382caa882SJean-Philippe Brucker { 16482caa882SJean-Philippe Brucker u64 val; 16582caa882SJean-Philippe Brucker ssize_t nr; 16682caa882SJean-Philippe Brucker struct vfio_region *region = ptr; 16782caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev; 16882caa882SJean-Philippe Brucker 16982caa882SJean-Philippe Brucker u32 offset = addr - region->guest_phys_addr; 17082caa882SJean-Philippe Brucker 17182caa882SJean-Philippe Brucker if (len < 1 || len > 8) 17282caa882SJean-Philippe Brucker goto err_report; 17382caa882SJean-Philippe Brucker 17482caa882SJean-Philippe Brucker if (is_write) { 17582caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)) 17682caa882SJean-Philippe Brucker goto err_report; 17782caa882SJean-Philippe Brucker 17882caa882SJean-Philippe Brucker memcpy(&val, data, len); 17982caa882SJean-Philippe Brucker 18082caa882SJean-Philippe Brucker nr = pwrite(vdev->fd, &val, len, region->info.offset + offset); 18182caa882SJean-Philippe Brucker if ((u32)nr != len) 18282caa882SJean-Philippe Brucker goto err_report; 18382caa882SJean-Philippe Brucker } else { 18482caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ)) 18582caa882SJean-Philippe Brucker goto err_report; 18682caa882SJean-Philippe Brucker 18782caa882SJean-Philippe Brucker nr = pread(vdev->fd, &val, len, region->info.offset + offset); 18882caa882SJean-Philippe Brucker if ((u32)nr != len) 18982caa882SJean-Philippe Brucker goto err_report; 19082caa882SJean-Philippe Brucker 19182caa882SJean-Philippe Brucker memcpy(data, &val, len); 19282caa882SJean-Philippe Brucker } 19382caa882SJean-Philippe Brucker 19482caa882SJean-Philippe Brucker return; 19582caa882SJean-Philippe Brucker 19682caa882SJean-Philippe Brucker err_report: 19782caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not %s %u bytes at 0x%x (0x%llx)", is_write ? 19882caa882SJean-Philippe Brucker "write" : "read", len, offset, addr); 19982caa882SJean-Philippe Brucker } 20082caa882SJean-Philippe Brucker 20182caa882SJean-Philippe Brucker static int vfio_setup_trap_region(struct kvm *kvm, struct vfio_device *vdev, 20282caa882SJean-Philippe Brucker struct vfio_region *region) 20382caa882SJean-Philippe Brucker { 20482caa882SJean-Philippe Brucker if (region->is_ioport) { 20582caa882SJean-Philippe Brucker int port = ioport__register(kvm, IOPORT_EMPTY, &vfio_ioport_ops, 20682caa882SJean-Philippe Brucker region->info.size, region); 20782caa882SJean-Philippe Brucker if (port < 0) 20882caa882SJean-Philippe Brucker return port; 20982caa882SJean-Philippe Brucker 21082caa882SJean-Philippe Brucker region->port_base = port; 21182caa882SJean-Philippe Brucker return 0; 21282caa882SJean-Philippe Brucker } 21382caa882SJean-Philippe Brucker 21482caa882SJean-Philippe Brucker return kvm__register_mmio(kvm, region->guest_phys_addr, 21582caa882SJean-Philippe Brucker region->info.size, false, vfio_mmio_access, 21682caa882SJean-Philippe Brucker region); 21782caa882SJean-Philippe Brucker } 21882caa882SJean-Philippe Brucker 2196078a454SJean-Philippe Brucker int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev, 2206078a454SJean-Philippe Brucker struct vfio_region *region) 2216078a454SJean-Philippe Brucker { 2226078a454SJean-Philippe Brucker void *base; 2236078a454SJean-Philippe Brucker int ret, prot = 0; 2246078a454SJean-Philippe Brucker /* KVM needs page-aligned regions */ 2256078a454SJean-Philippe Brucker u64 map_size = ALIGN(region->info.size, PAGE_SIZE); 2266078a454SJean-Philippe Brucker 22782caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) 22882caa882SJean-Philippe Brucker return vfio_setup_trap_region(kvm, vdev, region); 2296078a454SJean-Philippe Brucker 2306078a454SJean-Philippe Brucker if (region->info.flags & VFIO_REGION_INFO_FLAG_READ) 2316078a454SJean-Philippe Brucker prot |= PROT_READ; 2326078a454SJean-Philippe Brucker if (region->info.flags & VFIO_REGION_INFO_FLAG_WRITE) 2336078a454SJean-Philippe Brucker prot |= PROT_WRITE; 2346078a454SJean-Philippe Brucker 2356078a454SJean-Philippe Brucker base = mmap(NULL, region->info.size, prot, MAP_SHARED, vdev->fd, 2366078a454SJean-Philippe Brucker region->info.offset); 2376078a454SJean-Philippe Brucker if (base == MAP_FAILED) { 23882caa882SJean-Philippe Brucker /* TODO: support sparse mmap */ 23982caa882SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to mmap region %u (0x%llx bytes), falling back to trapping", 2406078a454SJean-Philippe Brucker region->info.index, region->info.size); 24182caa882SJean-Philippe Brucker return vfio_setup_trap_region(kvm, vdev, region); 2426078a454SJean-Philippe Brucker } 2436078a454SJean-Philippe Brucker region->host_addr = base; 2446078a454SJean-Philippe Brucker 2456078a454SJean-Philippe Brucker ret = kvm__register_dev_mem(kvm, region->guest_phys_addr, map_size, 2466078a454SJean-Philippe Brucker region->host_addr); 2476078a454SJean-Philippe Brucker if (ret) { 2486078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to register region with KVM"); 2496078a454SJean-Philippe Brucker return ret; 2506078a454SJean-Philippe Brucker } 2516078a454SJean-Philippe Brucker 2526078a454SJean-Philippe Brucker return 0; 2536078a454SJean-Philippe Brucker } 2546078a454SJean-Philippe Brucker 2556078a454SJean-Philippe Brucker void vfio_unmap_region(struct kvm *kvm, struct vfio_region *region) 2566078a454SJean-Philippe Brucker { 25782caa882SJean-Philippe Brucker if (region->host_addr) { 2586078a454SJean-Philippe Brucker munmap(region->host_addr, region->info.size); 25982caa882SJean-Philippe Brucker } else if (region->is_ioport) { 26082caa882SJean-Philippe Brucker ioport__unregister(kvm, region->port_base); 26182caa882SJean-Philippe Brucker } else { 26282caa882SJean-Philippe Brucker kvm__deregister_mmio(kvm, region->guest_phys_addr); 26382caa882SJean-Philippe Brucker } 2646078a454SJean-Philippe Brucker } 2656078a454SJean-Philippe Brucker 2666078a454SJean-Philippe Brucker static int vfio_configure_device(struct kvm *kvm, struct vfio_device *vdev) 2676078a454SJean-Philippe Brucker { 2686078a454SJean-Philippe Brucker int ret; 2696078a454SJean-Philippe Brucker struct vfio_group *group = vdev->group; 2706078a454SJean-Philippe Brucker 2716078a454SJean-Philippe Brucker vdev->fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, 2726078a454SJean-Philippe Brucker vdev->params->name); 2736078a454SJean-Philippe Brucker if (vdev->fd < 0) { 2746078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to get fd"); 2756078a454SJean-Philippe Brucker 2766078a454SJean-Philippe Brucker /* The device might be a bridge without an fd */ 2776078a454SJean-Philippe Brucker return 0; 2786078a454SJean-Philippe Brucker } 2796078a454SJean-Philippe Brucker 2806078a454SJean-Philippe Brucker vdev->info.argsz = sizeof(vdev->info); 2816078a454SJean-Philippe Brucker if (ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &vdev->info)) { 2826078a454SJean-Philippe Brucker ret = -errno; 2836078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to get info"); 2846078a454SJean-Philippe Brucker goto err_close_device; 2856078a454SJean-Philippe Brucker } 2866078a454SJean-Philippe Brucker 2876078a454SJean-Philippe Brucker if (vdev->info.flags & VFIO_DEVICE_FLAGS_RESET && 2886078a454SJean-Philippe Brucker ioctl(vdev->fd, VFIO_DEVICE_RESET) < 0) 2896078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to reset device"); 2906078a454SJean-Philippe Brucker 2916078a454SJean-Philippe Brucker vdev->regions = calloc(vdev->info.num_regions, sizeof(*vdev->regions)); 2926078a454SJean-Philippe Brucker if (!vdev->regions) { 2936078a454SJean-Philippe Brucker ret = -ENOMEM; 2946078a454SJean-Philippe Brucker goto err_close_device; 2956078a454SJean-Philippe Brucker } 2966078a454SJean-Philippe Brucker 2976078a454SJean-Philippe Brucker /* Now for the bus-specific initialization... */ 2986078a454SJean-Philippe Brucker switch (vdev->params->type) { 2996078a454SJean-Philippe Brucker case VFIO_DEVICE_PCI: 3006078a454SJean-Philippe Brucker BUG_ON(!(vdev->info.flags & VFIO_DEVICE_FLAGS_PCI)); 3016078a454SJean-Philippe Brucker ret = vfio_pci_setup_device(kvm, vdev); 3026078a454SJean-Philippe Brucker break; 3036078a454SJean-Philippe Brucker default: 3046078a454SJean-Philippe Brucker BUG_ON(1); 3056078a454SJean-Philippe Brucker ret = -EINVAL; 3066078a454SJean-Philippe Brucker } 3076078a454SJean-Philippe Brucker 3086078a454SJean-Philippe Brucker if (ret) 3096078a454SJean-Philippe Brucker goto err_free_regions; 3106078a454SJean-Philippe Brucker 3116078a454SJean-Philippe Brucker vfio_dev_info(vdev, "assigned to device number 0x%x in group %lu", 3126078a454SJean-Philippe Brucker vdev->dev_hdr.dev_num, group->id); 3136078a454SJean-Philippe Brucker 3146078a454SJean-Philippe Brucker return 0; 3156078a454SJean-Philippe Brucker 3166078a454SJean-Philippe Brucker err_free_regions: 3176078a454SJean-Philippe Brucker free(vdev->regions); 3186078a454SJean-Philippe Brucker err_close_device: 3196078a454SJean-Philippe Brucker close(vdev->fd); 3206078a454SJean-Philippe Brucker 3216078a454SJean-Philippe Brucker return ret; 3226078a454SJean-Philippe Brucker } 3236078a454SJean-Philippe Brucker 3246078a454SJean-Philippe Brucker static int vfio_configure_devices(struct kvm *kvm) 3256078a454SJean-Philippe Brucker { 3266078a454SJean-Philippe Brucker int i, ret; 3276078a454SJean-Philippe Brucker 3286078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) { 3296078a454SJean-Philippe Brucker ret = vfio_configure_device(kvm, &vfio_devices[i]); 3306078a454SJean-Philippe Brucker if (ret) 3316078a454SJean-Philippe Brucker return ret; 3326078a454SJean-Philippe Brucker } 3336078a454SJean-Philippe Brucker 3346078a454SJean-Philippe Brucker return 0; 3356078a454SJean-Philippe Brucker } 3366078a454SJean-Philippe Brucker 3376078a454SJean-Philippe Brucker static int vfio_get_iommu_type(void) 3386078a454SJean-Philippe Brucker { 3396078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) 3406078a454SJean-Philippe Brucker return VFIO_TYPE1v2_IOMMU; 3416078a454SJean-Philippe Brucker 3426078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) 3436078a454SJean-Philippe Brucker return VFIO_TYPE1_IOMMU; 3446078a454SJean-Philippe Brucker 3456078a454SJean-Philippe Brucker return -ENODEV; 3466078a454SJean-Philippe Brucker } 3476078a454SJean-Philippe Brucker 3486078a454SJean-Philippe Brucker static int vfio_map_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data) 3496078a454SJean-Philippe Brucker { 3506078a454SJean-Philippe Brucker int ret = 0; 3516078a454SJean-Philippe Brucker struct vfio_iommu_type1_dma_map dma_map = { 3526078a454SJean-Philippe Brucker .argsz = sizeof(dma_map), 3536078a454SJean-Philippe Brucker .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, 3546078a454SJean-Philippe Brucker .vaddr = (unsigned long)bank->host_addr, 3556078a454SJean-Philippe Brucker .iova = (u64)bank->guest_phys_addr, 3566078a454SJean-Philippe Brucker .size = bank->size, 3576078a454SJean-Philippe Brucker }; 3586078a454SJean-Philippe Brucker 3596078a454SJean-Philippe Brucker /* Map the guest memory for DMA (i.e. provide isolation) */ 3606078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_IOMMU_MAP_DMA, &dma_map)) { 3616078a454SJean-Philippe Brucker ret = -errno; 3626078a454SJean-Philippe Brucker pr_err("Failed to map 0x%llx -> 0x%llx (%llu) for DMA", 3636078a454SJean-Philippe Brucker dma_map.iova, dma_map.vaddr, dma_map.size); 3646078a454SJean-Philippe Brucker } 3656078a454SJean-Philippe Brucker 3666078a454SJean-Philippe Brucker return ret; 3676078a454SJean-Philippe Brucker } 3686078a454SJean-Philippe Brucker 3696078a454SJean-Philippe Brucker static int vfio_unmap_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data) 3706078a454SJean-Philippe Brucker { 3716078a454SJean-Philippe Brucker struct vfio_iommu_type1_dma_unmap dma_unmap = { 3726078a454SJean-Philippe Brucker .argsz = sizeof(dma_unmap), 3736078a454SJean-Philippe Brucker .size = bank->size, 3746078a454SJean-Philippe Brucker .iova = bank->guest_phys_addr, 3756078a454SJean-Philippe Brucker }; 3766078a454SJean-Philippe Brucker 3776078a454SJean-Philippe Brucker ioctl(vfio_container, VFIO_IOMMU_UNMAP_DMA, &dma_unmap); 3786078a454SJean-Philippe Brucker 3796078a454SJean-Philippe Brucker return 0; 3806078a454SJean-Philippe Brucker } 3816078a454SJean-Philippe Brucker 382*41d773e2SJean-Philippe Brucker static int vfio_configure_reserved_regions(struct kvm *kvm, 383*41d773e2SJean-Philippe Brucker struct vfio_group *group) 384*41d773e2SJean-Philippe Brucker { 385*41d773e2SJean-Philippe Brucker FILE *file; 386*41d773e2SJean-Philippe Brucker int ret = 0; 387*41d773e2SJean-Philippe Brucker char type[9]; 388*41d773e2SJean-Philippe Brucker char filename[PATH_MAX]; 389*41d773e2SJean-Philippe Brucker unsigned long long start, end; 390*41d773e2SJean-Philippe Brucker 391*41d773e2SJean-Philippe Brucker snprintf(filename, PATH_MAX, IOMMU_GROUP_DIR "/%lu/reserved_regions", 392*41d773e2SJean-Philippe Brucker group->id); 393*41d773e2SJean-Philippe Brucker 394*41d773e2SJean-Philippe Brucker /* reserved_regions might not be present on older systems */ 395*41d773e2SJean-Philippe Brucker if (access(filename, F_OK)) 396*41d773e2SJean-Philippe Brucker return 0; 397*41d773e2SJean-Philippe Brucker 398*41d773e2SJean-Philippe Brucker file = fopen(filename, "r"); 399*41d773e2SJean-Philippe Brucker if (!file) 400*41d773e2SJean-Philippe Brucker return -errno; 401*41d773e2SJean-Philippe Brucker 402*41d773e2SJean-Philippe Brucker while (fscanf(file, "0x%llx 0x%llx %8s\n", &start, &end, type) == 3) { 403*41d773e2SJean-Philippe Brucker ret = kvm__reserve_mem(kvm, start, end - start + 1); 404*41d773e2SJean-Philippe Brucker if (ret) 405*41d773e2SJean-Philippe Brucker break; 406*41d773e2SJean-Philippe Brucker } 407*41d773e2SJean-Philippe Brucker 408*41d773e2SJean-Philippe Brucker fclose(file); 409*41d773e2SJean-Philippe Brucker 410*41d773e2SJean-Philippe Brucker return ret; 411*41d773e2SJean-Philippe Brucker } 412*41d773e2SJean-Philippe Brucker 413*41d773e2SJean-Philippe Brucker static int vfio_configure_groups(struct kvm *kvm) 414*41d773e2SJean-Philippe Brucker { 415*41d773e2SJean-Philippe Brucker int ret; 416*41d773e2SJean-Philippe Brucker struct vfio_group *group; 417*41d773e2SJean-Philippe Brucker 418*41d773e2SJean-Philippe Brucker list_for_each_entry(group, &vfio_groups, list) { 419*41d773e2SJean-Philippe Brucker ret = vfio_configure_reserved_regions(kvm, group); 420*41d773e2SJean-Philippe Brucker if (ret) 421*41d773e2SJean-Philippe Brucker return ret; 422*41d773e2SJean-Philippe Brucker } 423*41d773e2SJean-Philippe Brucker 424*41d773e2SJean-Philippe Brucker return 0; 425*41d773e2SJean-Philippe Brucker } 426*41d773e2SJean-Philippe Brucker 4276078a454SJean-Philippe Brucker static struct vfio_group *vfio_group_create(struct kvm *kvm, unsigned long id) 4286078a454SJean-Philippe Brucker { 4296078a454SJean-Philippe Brucker int ret; 4306078a454SJean-Philippe Brucker struct vfio_group *group; 4316078a454SJean-Philippe Brucker char group_node[PATH_MAX]; 4326078a454SJean-Philippe Brucker struct vfio_group_status group_status = { 4336078a454SJean-Philippe Brucker .argsz = sizeof(group_status), 4346078a454SJean-Philippe Brucker }; 4356078a454SJean-Philippe Brucker 4366078a454SJean-Philippe Brucker group = calloc(1, sizeof(*group)); 4376078a454SJean-Philippe Brucker if (!group) 4386078a454SJean-Philippe Brucker return NULL; 4396078a454SJean-Philippe Brucker 4406078a454SJean-Philippe Brucker group->id = id; 4416078a454SJean-Philippe Brucker group->refs = 1; 4426078a454SJean-Philippe Brucker 4436078a454SJean-Philippe Brucker ret = snprintf(group_node, PATH_MAX, VFIO_DEV_DIR "/%lu", id); 4446078a454SJean-Philippe Brucker if (ret < 0 || ret == PATH_MAX) 4456078a454SJean-Philippe Brucker return NULL; 4466078a454SJean-Philippe Brucker 4476078a454SJean-Philippe Brucker group->fd = open(group_node, O_RDWR); 4486078a454SJean-Philippe Brucker if (group->fd < 0) { 4496078a454SJean-Philippe Brucker pr_err("Failed to open IOMMU group %s", group_node); 4506078a454SJean-Philippe Brucker goto err_free_group; 4516078a454SJean-Philippe Brucker } 4526078a454SJean-Philippe Brucker 4536078a454SJean-Philippe Brucker if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &group_status)) { 4546078a454SJean-Philippe Brucker pr_err("Failed to determine status of IOMMU group %lu", id); 4556078a454SJean-Philippe Brucker goto err_close_group; 4566078a454SJean-Philippe Brucker } 4576078a454SJean-Philippe Brucker 4586078a454SJean-Philippe Brucker if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { 4596078a454SJean-Philippe Brucker pr_err("IOMMU group %lu is not viable", id); 4606078a454SJean-Philippe Brucker goto err_close_group; 4616078a454SJean-Philippe Brucker } 4626078a454SJean-Philippe Brucker 4636078a454SJean-Philippe Brucker if (ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &vfio_container)) { 4646078a454SJean-Philippe Brucker pr_err("Failed to add IOMMU group %lu to VFIO container", id); 4656078a454SJean-Philippe Brucker goto err_close_group; 4666078a454SJean-Philippe Brucker } 4676078a454SJean-Philippe Brucker 4686078a454SJean-Philippe Brucker list_add(&group->list, &vfio_groups); 4696078a454SJean-Philippe Brucker 4706078a454SJean-Philippe Brucker return group; 4716078a454SJean-Philippe Brucker 4726078a454SJean-Philippe Brucker err_close_group: 4736078a454SJean-Philippe Brucker close(group->fd); 4746078a454SJean-Philippe Brucker err_free_group: 4756078a454SJean-Philippe Brucker free(group); 4766078a454SJean-Philippe Brucker 4776078a454SJean-Philippe Brucker return NULL; 4786078a454SJean-Philippe Brucker } 4796078a454SJean-Philippe Brucker 4806078a454SJean-Philippe Brucker static void vfio_group_exit(struct kvm *kvm, struct vfio_group *group) 4816078a454SJean-Philippe Brucker { 4826078a454SJean-Philippe Brucker if (--group->refs != 0) 4836078a454SJean-Philippe Brucker return; 4846078a454SJean-Philippe Brucker 4856078a454SJean-Philippe Brucker ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER); 4866078a454SJean-Philippe Brucker 4876078a454SJean-Philippe Brucker list_del(&group->list); 4886078a454SJean-Philippe Brucker close(group->fd); 4896078a454SJean-Philippe Brucker free(group); 4906078a454SJean-Philippe Brucker } 4916078a454SJean-Philippe Brucker 4926078a454SJean-Philippe Brucker static struct vfio_group * 4936078a454SJean-Philippe Brucker vfio_group_get_for_dev(struct kvm *kvm, struct vfio_device *vdev) 4946078a454SJean-Philippe Brucker { 4956078a454SJean-Philippe Brucker int dirfd; 4966078a454SJean-Philippe Brucker ssize_t ret; 4976078a454SJean-Philippe Brucker char *group_name; 4986078a454SJean-Philippe Brucker unsigned long group_id; 4996078a454SJean-Philippe Brucker char group_path[PATH_MAX]; 5006078a454SJean-Philippe Brucker struct vfio_group *group = NULL; 5016078a454SJean-Philippe Brucker 5026078a454SJean-Philippe Brucker /* Find IOMMU group for this device */ 5036078a454SJean-Philippe Brucker dirfd = open(vdev->sysfs_path, O_DIRECTORY | O_PATH | O_RDONLY); 5046078a454SJean-Philippe Brucker if (dirfd < 0) { 5056078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to open '%s'", vdev->sysfs_path); 5066078a454SJean-Philippe Brucker return NULL; 5076078a454SJean-Philippe Brucker } 5086078a454SJean-Philippe Brucker 5096078a454SJean-Philippe Brucker ret = readlinkat(dirfd, "iommu_group", group_path, PATH_MAX); 5106078a454SJean-Philippe Brucker if (ret < 0) { 5116078a454SJean-Philippe Brucker vfio_dev_err(vdev, "no iommu_group"); 5126078a454SJean-Philippe Brucker goto out_close; 5136078a454SJean-Philippe Brucker } 5146078a454SJean-Philippe Brucker if (ret == PATH_MAX) 5156078a454SJean-Philippe Brucker goto out_close; 5166078a454SJean-Philippe Brucker 5176078a454SJean-Philippe Brucker group_path[ret] = '\0'; 5186078a454SJean-Philippe Brucker 5196078a454SJean-Philippe Brucker group_name = basename(group_path); 5206078a454SJean-Philippe Brucker errno = 0; 5216078a454SJean-Philippe Brucker group_id = strtoul(group_name, NULL, 10); 5226078a454SJean-Philippe Brucker if (errno) 5236078a454SJean-Philippe Brucker goto out_close; 5246078a454SJean-Philippe Brucker 5256078a454SJean-Philippe Brucker list_for_each_entry(group, &vfio_groups, list) { 5266078a454SJean-Philippe Brucker if (group->id == group_id) { 5276078a454SJean-Philippe Brucker group->refs++; 5286078a454SJean-Philippe Brucker return group; 5296078a454SJean-Philippe Brucker } 5306078a454SJean-Philippe Brucker } 5316078a454SJean-Philippe Brucker 5326078a454SJean-Philippe Brucker group = vfio_group_create(kvm, group_id); 5336078a454SJean-Philippe Brucker 5346078a454SJean-Philippe Brucker out_close: 5356078a454SJean-Philippe Brucker close(dirfd); 5366078a454SJean-Philippe Brucker return group; 5376078a454SJean-Philippe Brucker } 5386078a454SJean-Philippe Brucker 5396078a454SJean-Philippe Brucker static int vfio_device_init(struct kvm *kvm, struct vfio_device *vdev) 5406078a454SJean-Philippe Brucker { 5416078a454SJean-Philippe Brucker int ret; 5426078a454SJean-Philippe Brucker char dev_path[PATH_MAX]; 5436078a454SJean-Philippe Brucker struct vfio_group *group; 5446078a454SJean-Philippe Brucker 5456078a454SJean-Philippe Brucker ret = snprintf(dev_path, PATH_MAX, "/sys/bus/%s/devices/%s", 5466078a454SJean-Philippe Brucker vdev->params->bus, vdev->params->name); 5476078a454SJean-Philippe Brucker if (ret < 0 || ret == PATH_MAX) 5486078a454SJean-Philippe Brucker return -EINVAL; 5496078a454SJean-Philippe Brucker 5506078a454SJean-Philippe Brucker vdev->sysfs_path = strndup(dev_path, PATH_MAX); 5516078a454SJean-Philippe Brucker if (!vdev->sysfs_path) 5526078a454SJean-Philippe Brucker return -errno; 5536078a454SJean-Philippe Brucker 5546078a454SJean-Philippe Brucker group = vfio_group_get_for_dev(kvm, vdev); 5556078a454SJean-Philippe Brucker if (!group) { 5566078a454SJean-Philippe Brucker free(vdev->sysfs_path); 5576078a454SJean-Philippe Brucker return -EINVAL; 5586078a454SJean-Philippe Brucker } 5596078a454SJean-Philippe Brucker 5606078a454SJean-Philippe Brucker vdev->group = group; 5616078a454SJean-Philippe Brucker 5626078a454SJean-Philippe Brucker return 0; 5636078a454SJean-Philippe Brucker } 5646078a454SJean-Philippe Brucker 5656078a454SJean-Philippe Brucker static void vfio_device_exit(struct kvm *kvm, struct vfio_device *vdev) 5666078a454SJean-Philippe Brucker { 5676078a454SJean-Philippe Brucker vfio_group_exit(kvm, vdev->group); 5686078a454SJean-Philippe Brucker 5696078a454SJean-Philippe Brucker switch (vdev->params->type) { 5706078a454SJean-Philippe Brucker case VFIO_DEVICE_PCI: 5716078a454SJean-Philippe Brucker vfio_pci_teardown_device(kvm, vdev); 5726078a454SJean-Philippe Brucker break; 5736078a454SJean-Philippe Brucker default: 5746078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "no teardown function for device"); 5756078a454SJean-Philippe Brucker } 5766078a454SJean-Philippe Brucker 5776078a454SJean-Philippe Brucker close(vdev->fd); 5786078a454SJean-Philippe Brucker 5796078a454SJean-Philippe Brucker free(vdev->regions); 5806078a454SJean-Philippe Brucker free(vdev->sysfs_path); 5816078a454SJean-Philippe Brucker } 5826078a454SJean-Philippe Brucker 5836078a454SJean-Philippe Brucker static int vfio_container_init(struct kvm *kvm) 5846078a454SJean-Philippe Brucker { 5856078a454SJean-Philippe Brucker int api, i, ret, iommu_type;; 5866078a454SJean-Philippe Brucker 5876078a454SJean-Philippe Brucker /* Create a container for our IOMMU groups */ 5886078a454SJean-Philippe Brucker vfio_container = open(VFIO_DEV_NODE, O_RDWR); 5896078a454SJean-Philippe Brucker if (vfio_container == -1) { 5906078a454SJean-Philippe Brucker ret = errno; 5916078a454SJean-Philippe Brucker pr_err("Failed to open %s", VFIO_DEV_NODE); 5926078a454SJean-Philippe Brucker return ret; 5936078a454SJean-Philippe Brucker } 5946078a454SJean-Philippe Brucker 5956078a454SJean-Philippe Brucker api = ioctl(vfio_container, VFIO_GET_API_VERSION); 5966078a454SJean-Philippe Brucker if (api != VFIO_API_VERSION) { 5976078a454SJean-Philippe Brucker pr_err("Unknown VFIO API version %d", api); 5986078a454SJean-Philippe Brucker return -ENODEV; 5996078a454SJean-Philippe Brucker } 6006078a454SJean-Philippe Brucker 6016078a454SJean-Philippe Brucker iommu_type = vfio_get_iommu_type(); 6026078a454SJean-Philippe Brucker if (iommu_type < 0) { 6036078a454SJean-Philippe Brucker pr_err("VFIO type-1 IOMMU not supported on this platform"); 6046078a454SJean-Philippe Brucker return iommu_type; 6056078a454SJean-Philippe Brucker } 6066078a454SJean-Philippe Brucker 6076078a454SJean-Philippe Brucker /* Create groups for our devices and add them to the container */ 6086078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) { 6096078a454SJean-Philippe Brucker vfio_devices[i].params = &kvm->cfg.vfio_devices[i]; 6106078a454SJean-Philippe Brucker 6116078a454SJean-Philippe Brucker ret = vfio_device_init(kvm, &vfio_devices[i]); 6126078a454SJean-Philippe Brucker if (ret) 6136078a454SJean-Philippe Brucker return ret; 6146078a454SJean-Philippe Brucker } 6156078a454SJean-Philippe Brucker 6166078a454SJean-Philippe Brucker /* Finalise the container */ 6176078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_SET_IOMMU, iommu_type)) { 6186078a454SJean-Philippe Brucker ret = -errno; 6196078a454SJean-Philippe Brucker pr_err("Failed to set IOMMU type %d for VFIO container", 6206078a454SJean-Philippe Brucker iommu_type); 6216078a454SJean-Philippe Brucker return ret; 6226078a454SJean-Philippe Brucker } else { 6236078a454SJean-Philippe Brucker pr_info("Using IOMMU type %d for VFIO container", iommu_type); 6246078a454SJean-Philippe Brucker } 6256078a454SJean-Philippe Brucker 6266078a454SJean-Philippe Brucker return kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_map_mem_bank, 6276078a454SJean-Philippe Brucker NULL); 6286078a454SJean-Philippe Brucker } 6296078a454SJean-Philippe Brucker 6306078a454SJean-Philippe Brucker static int vfio__init(struct kvm *kvm) 6316078a454SJean-Philippe Brucker { 6326078a454SJean-Philippe Brucker int ret; 6336078a454SJean-Philippe Brucker 6346078a454SJean-Philippe Brucker if (!kvm->cfg.num_vfio_devices) 6356078a454SJean-Philippe Brucker return 0; 6366078a454SJean-Philippe Brucker 6376078a454SJean-Philippe Brucker vfio_devices = calloc(kvm->cfg.num_vfio_devices, sizeof(*vfio_devices)); 6386078a454SJean-Philippe Brucker if (!vfio_devices) 6396078a454SJean-Philippe Brucker return -ENOMEM; 6406078a454SJean-Philippe Brucker 6416078a454SJean-Philippe Brucker ret = vfio_container_init(kvm); 6426078a454SJean-Philippe Brucker if (ret) 6436078a454SJean-Philippe Brucker return ret; 6446078a454SJean-Philippe Brucker 645*41d773e2SJean-Philippe Brucker ret = vfio_configure_groups(kvm); 646*41d773e2SJean-Philippe Brucker if (ret) 647*41d773e2SJean-Philippe Brucker return ret; 648*41d773e2SJean-Philippe Brucker 6496078a454SJean-Philippe Brucker ret = vfio_configure_devices(kvm); 6506078a454SJean-Philippe Brucker if (ret) 6516078a454SJean-Philippe Brucker return ret; 6526078a454SJean-Philippe Brucker 6536078a454SJean-Philippe Brucker return 0; 6546078a454SJean-Philippe Brucker } 6556078a454SJean-Philippe Brucker dev_base_init(vfio__init); 6566078a454SJean-Philippe Brucker 6576078a454SJean-Philippe Brucker static int vfio__exit(struct kvm *kvm) 6586078a454SJean-Philippe Brucker { 6596078a454SJean-Philippe Brucker int i; 6606078a454SJean-Philippe Brucker 6616078a454SJean-Philippe Brucker if (!kvm->cfg.num_vfio_devices) 6626078a454SJean-Philippe Brucker return 0; 6636078a454SJean-Philippe Brucker 6646078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; i++) 6656078a454SJean-Philippe Brucker vfio_device_exit(kvm, &vfio_devices[i]); 6666078a454SJean-Philippe Brucker 6676078a454SJean-Philippe Brucker free(vfio_devices); 6686078a454SJean-Philippe Brucker 6696078a454SJean-Philippe Brucker kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_unmap_mem_bank, NULL); 6706078a454SJean-Philippe Brucker close(vfio_container); 6716078a454SJean-Philippe Brucker 6726078a454SJean-Philippe Brucker free(kvm->cfg.vfio_devices); 6736078a454SJean-Philippe Brucker 6746078a454SJean-Philippe Brucker return 0; 6756078a454SJean-Philippe Brucker } 6766078a454SJean-Philippe Brucker dev_base_exit(vfio__exit); 677