16078a454SJean-Philippe Brucker #include "kvm/kvm.h" 26078a454SJean-Philippe Brucker #include "kvm/vfio.h" 3*82caa882SJean-Philippe Brucker #include "kvm/ioport.h" 46078a454SJean-Philippe Brucker 56078a454SJean-Philippe Brucker #include <linux/list.h> 66078a454SJean-Philippe Brucker 76078a454SJean-Philippe Brucker #define VFIO_DEV_DIR "/dev/vfio" 86078a454SJean-Philippe Brucker #define VFIO_DEV_NODE VFIO_DEV_DIR "/vfio" 96078a454SJean-Philippe Brucker #define IOMMU_GROUP_DIR "/sys/kernel/iommu_groups" 106078a454SJean-Philippe Brucker 116078a454SJean-Philippe Brucker static int vfio_container; 126078a454SJean-Philippe Brucker static LIST_HEAD(vfio_groups); 136078a454SJean-Philippe Brucker static struct vfio_device *vfio_devices; 146078a454SJean-Philippe Brucker 156078a454SJean-Philippe Brucker static int vfio_device_pci_parser(const struct option *opt, char *arg, 166078a454SJean-Philippe Brucker struct vfio_device_params *dev) 176078a454SJean-Philippe Brucker { 186078a454SJean-Philippe Brucker unsigned int domain, bus, devnr, fn; 196078a454SJean-Philippe Brucker 206078a454SJean-Philippe Brucker int nr = sscanf(arg, "%4x:%2x:%2x.%1x", &domain, &bus, &devnr, &fn); 216078a454SJean-Philippe Brucker if (nr < 4) { 226078a454SJean-Philippe Brucker domain = 0; 236078a454SJean-Philippe Brucker nr = sscanf(arg, "%2x:%2x.%1x", &bus, &devnr, &fn); 246078a454SJean-Philippe Brucker if (nr < 3) { 256078a454SJean-Philippe Brucker pr_err("Invalid device identifier %s", arg); 266078a454SJean-Philippe Brucker return -EINVAL; 276078a454SJean-Philippe Brucker } 286078a454SJean-Philippe Brucker } 296078a454SJean-Philippe Brucker 306078a454SJean-Philippe Brucker dev->type = VFIO_DEVICE_PCI; 316078a454SJean-Philippe Brucker dev->bus = "pci"; 326078a454SJean-Philippe Brucker dev->name = malloc(13); 336078a454SJean-Philippe Brucker if (!dev->name) 346078a454SJean-Philippe Brucker return -ENOMEM; 356078a454SJean-Philippe Brucker 366078a454SJean-Philippe Brucker snprintf(dev->name, 13, "%04x:%02x:%02x.%x", domain, bus, devnr, fn); 376078a454SJean-Philippe Brucker 386078a454SJean-Philippe Brucker return 0; 396078a454SJean-Philippe Brucker } 406078a454SJean-Philippe Brucker 416078a454SJean-Philippe Brucker int vfio_device_parser(const struct option *opt, const char *arg, int unset) 426078a454SJean-Philippe Brucker { 436078a454SJean-Philippe Brucker int ret = -EINVAL; 446078a454SJean-Philippe Brucker static int idx = 0; 456078a454SJean-Philippe Brucker struct kvm *kvm = opt->ptr; 466078a454SJean-Philippe Brucker struct vfio_device_params *dev, *devs; 476078a454SJean-Philippe Brucker char *cur, *buf = strdup(arg); 486078a454SJean-Philippe Brucker 496078a454SJean-Philippe Brucker if (!buf) 506078a454SJean-Philippe Brucker return -ENOMEM; 516078a454SJean-Philippe Brucker 526078a454SJean-Philippe Brucker if (idx >= MAX_VFIO_DEVICES) { 536078a454SJean-Philippe Brucker pr_warning("Too many VFIO devices"); 546078a454SJean-Philippe Brucker goto out_free_buf; 556078a454SJean-Philippe Brucker } 566078a454SJean-Philippe Brucker 576078a454SJean-Philippe Brucker devs = realloc(kvm->cfg.vfio_devices, sizeof(*dev) * (idx + 1)); 586078a454SJean-Philippe Brucker if (!devs) { 596078a454SJean-Philippe Brucker ret = -ENOMEM; 606078a454SJean-Philippe Brucker goto out_free_buf; 616078a454SJean-Philippe Brucker } 626078a454SJean-Philippe Brucker 636078a454SJean-Philippe Brucker kvm->cfg.vfio_devices = devs; 646078a454SJean-Philippe Brucker dev = &devs[idx]; 656078a454SJean-Philippe Brucker 666078a454SJean-Philippe Brucker cur = strtok(buf, ","); 676078a454SJean-Philippe Brucker if (!cur) 686078a454SJean-Philippe Brucker goto out_free_buf; 696078a454SJean-Philippe Brucker 706078a454SJean-Philippe Brucker if (!strcmp(opt->long_name, "vfio-pci")) 716078a454SJean-Philippe Brucker ret = vfio_device_pci_parser(opt, cur, dev); 726078a454SJean-Philippe Brucker else 736078a454SJean-Philippe Brucker ret = -EINVAL; 746078a454SJean-Philippe Brucker 756078a454SJean-Philippe Brucker if (!ret) 766078a454SJean-Philippe Brucker kvm->cfg.num_vfio_devices = ++idx; 776078a454SJean-Philippe Brucker 786078a454SJean-Philippe Brucker out_free_buf: 796078a454SJean-Philippe Brucker free(buf); 806078a454SJean-Philippe Brucker 816078a454SJean-Philippe Brucker return ret; 826078a454SJean-Philippe Brucker } 836078a454SJean-Philippe Brucker 84*82caa882SJean-Philippe Brucker static bool vfio_ioport_in(struct ioport *ioport, struct kvm_cpu *vcpu, 85*82caa882SJean-Philippe Brucker u16 port, void *data, int len) 86*82caa882SJean-Philippe Brucker { 87*82caa882SJean-Philippe Brucker u32 val; 88*82caa882SJean-Philippe Brucker ssize_t nr; 89*82caa882SJean-Philippe Brucker struct vfio_region *region = ioport->priv; 90*82caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev; 91*82caa882SJean-Philippe Brucker 92*82caa882SJean-Philippe Brucker u32 offset = port - region->port_base; 93*82caa882SJean-Philippe Brucker 94*82caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ)) 95*82caa882SJean-Philippe Brucker return false; 96*82caa882SJean-Philippe Brucker 97*82caa882SJean-Philippe Brucker nr = pread(vdev->fd, &val, len, region->info.offset + offset); 98*82caa882SJean-Philippe Brucker if (nr != len) { 99*82caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not read %d bytes from I/O port 0x%x\n", 100*82caa882SJean-Philippe Brucker len, port); 101*82caa882SJean-Philippe Brucker return false; 102*82caa882SJean-Philippe Brucker } 103*82caa882SJean-Philippe Brucker 104*82caa882SJean-Philippe Brucker switch (len) { 105*82caa882SJean-Philippe Brucker case 1: 106*82caa882SJean-Philippe Brucker ioport__write8(data, val); 107*82caa882SJean-Philippe Brucker break; 108*82caa882SJean-Philippe Brucker case 2: 109*82caa882SJean-Philippe Brucker ioport__write16(data, val); 110*82caa882SJean-Philippe Brucker break; 111*82caa882SJean-Philippe Brucker case 4: 112*82caa882SJean-Philippe Brucker ioport__write32(data, val); 113*82caa882SJean-Philippe Brucker break; 114*82caa882SJean-Philippe Brucker default: 115*82caa882SJean-Philippe Brucker return false; 116*82caa882SJean-Philippe Brucker } 117*82caa882SJean-Philippe Brucker 118*82caa882SJean-Philippe Brucker return true; 119*82caa882SJean-Philippe Brucker } 120*82caa882SJean-Philippe Brucker 121*82caa882SJean-Philippe Brucker static bool vfio_ioport_out(struct ioport *ioport, struct kvm_cpu *vcpu, 122*82caa882SJean-Philippe Brucker u16 port, void *data, int len) 123*82caa882SJean-Philippe Brucker { 124*82caa882SJean-Philippe Brucker u32 val; 125*82caa882SJean-Philippe Brucker ssize_t nr; 126*82caa882SJean-Philippe Brucker struct vfio_region *region = ioport->priv; 127*82caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev; 128*82caa882SJean-Philippe Brucker 129*82caa882SJean-Philippe Brucker u32 offset = port - region->port_base; 130*82caa882SJean-Philippe Brucker 131*82caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)) 132*82caa882SJean-Philippe Brucker return false; 133*82caa882SJean-Philippe Brucker 134*82caa882SJean-Philippe Brucker switch (len) { 135*82caa882SJean-Philippe Brucker case 1: 136*82caa882SJean-Philippe Brucker val = ioport__read8(data); 137*82caa882SJean-Philippe Brucker break; 138*82caa882SJean-Philippe Brucker case 2: 139*82caa882SJean-Philippe Brucker val = ioport__read16(data); 140*82caa882SJean-Philippe Brucker break; 141*82caa882SJean-Philippe Brucker case 4: 142*82caa882SJean-Philippe Brucker val = ioport__read32(data); 143*82caa882SJean-Philippe Brucker break; 144*82caa882SJean-Philippe Brucker default: 145*82caa882SJean-Philippe Brucker return false; 146*82caa882SJean-Philippe Brucker } 147*82caa882SJean-Philippe Brucker 148*82caa882SJean-Philippe Brucker nr = pwrite(vdev->fd, &val, len, region->info.offset + offset); 149*82caa882SJean-Philippe Brucker if (nr != len) 150*82caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not write %d bytes to I/O port 0x%x", 151*82caa882SJean-Philippe Brucker len, port); 152*82caa882SJean-Philippe Brucker 153*82caa882SJean-Philippe Brucker return nr == len; 154*82caa882SJean-Philippe Brucker } 155*82caa882SJean-Philippe Brucker 156*82caa882SJean-Philippe Brucker static struct ioport_operations vfio_ioport_ops = { 157*82caa882SJean-Philippe Brucker .io_in = vfio_ioport_in, 158*82caa882SJean-Philippe Brucker .io_out = vfio_ioport_out, 159*82caa882SJean-Philippe Brucker }; 160*82caa882SJean-Philippe Brucker 161*82caa882SJean-Philippe Brucker static void vfio_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len, 162*82caa882SJean-Philippe Brucker u8 is_write, void *ptr) 163*82caa882SJean-Philippe Brucker { 164*82caa882SJean-Philippe Brucker u64 val; 165*82caa882SJean-Philippe Brucker ssize_t nr; 166*82caa882SJean-Philippe Brucker struct vfio_region *region = ptr; 167*82caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev; 168*82caa882SJean-Philippe Brucker 169*82caa882SJean-Philippe Brucker u32 offset = addr - region->guest_phys_addr; 170*82caa882SJean-Philippe Brucker 171*82caa882SJean-Philippe Brucker if (len < 1 || len > 8) 172*82caa882SJean-Philippe Brucker goto err_report; 173*82caa882SJean-Philippe Brucker 174*82caa882SJean-Philippe Brucker if (is_write) { 175*82caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)) 176*82caa882SJean-Philippe Brucker goto err_report; 177*82caa882SJean-Philippe Brucker 178*82caa882SJean-Philippe Brucker memcpy(&val, data, len); 179*82caa882SJean-Philippe Brucker 180*82caa882SJean-Philippe Brucker nr = pwrite(vdev->fd, &val, len, region->info.offset + offset); 181*82caa882SJean-Philippe Brucker if ((u32)nr != len) 182*82caa882SJean-Philippe Brucker goto err_report; 183*82caa882SJean-Philippe Brucker } else { 184*82caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ)) 185*82caa882SJean-Philippe Brucker goto err_report; 186*82caa882SJean-Philippe Brucker 187*82caa882SJean-Philippe Brucker nr = pread(vdev->fd, &val, len, region->info.offset + offset); 188*82caa882SJean-Philippe Brucker if ((u32)nr != len) 189*82caa882SJean-Philippe Brucker goto err_report; 190*82caa882SJean-Philippe Brucker 191*82caa882SJean-Philippe Brucker memcpy(data, &val, len); 192*82caa882SJean-Philippe Brucker } 193*82caa882SJean-Philippe Brucker 194*82caa882SJean-Philippe Brucker return; 195*82caa882SJean-Philippe Brucker 196*82caa882SJean-Philippe Brucker err_report: 197*82caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not %s %u bytes at 0x%x (0x%llx)", is_write ? 198*82caa882SJean-Philippe Brucker "write" : "read", len, offset, addr); 199*82caa882SJean-Philippe Brucker } 200*82caa882SJean-Philippe Brucker 201*82caa882SJean-Philippe Brucker static int vfio_setup_trap_region(struct kvm *kvm, struct vfio_device *vdev, 202*82caa882SJean-Philippe Brucker struct vfio_region *region) 203*82caa882SJean-Philippe Brucker { 204*82caa882SJean-Philippe Brucker if (region->is_ioport) { 205*82caa882SJean-Philippe Brucker int port = ioport__register(kvm, IOPORT_EMPTY, &vfio_ioport_ops, 206*82caa882SJean-Philippe Brucker region->info.size, region); 207*82caa882SJean-Philippe Brucker if (port < 0) 208*82caa882SJean-Philippe Brucker return port; 209*82caa882SJean-Philippe Brucker 210*82caa882SJean-Philippe Brucker region->port_base = port; 211*82caa882SJean-Philippe Brucker return 0; 212*82caa882SJean-Philippe Brucker } 213*82caa882SJean-Philippe Brucker 214*82caa882SJean-Philippe Brucker return kvm__register_mmio(kvm, region->guest_phys_addr, 215*82caa882SJean-Philippe Brucker region->info.size, false, vfio_mmio_access, 216*82caa882SJean-Philippe Brucker region); 217*82caa882SJean-Philippe Brucker } 218*82caa882SJean-Philippe Brucker 2196078a454SJean-Philippe Brucker int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev, 2206078a454SJean-Philippe Brucker struct vfio_region *region) 2216078a454SJean-Philippe Brucker { 2226078a454SJean-Philippe Brucker void *base; 2236078a454SJean-Philippe Brucker int ret, prot = 0; 2246078a454SJean-Philippe Brucker /* KVM needs page-aligned regions */ 2256078a454SJean-Philippe Brucker u64 map_size = ALIGN(region->info.size, PAGE_SIZE); 2266078a454SJean-Philippe Brucker 227*82caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) 228*82caa882SJean-Philippe Brucker return vfio_setup_trap_region(kvm, vdev, region); 2296078a454SJean-Philippe Brucker 2306078a454SJean-Philippe Brucker if (region->info.flags & VFIO_REGION_INFO_FLAG_READ) 2316078a454SJean-Philippe Brucker prot |= PROT_READ; 2326078a454SJean-Philippe Brucker if (region->info.flags & VFIO_REGION_INFO_FLAG_WRITE) 2336078a454SJean-Philippe Brucker prot |= PROT_WRITE; 2346078a454SJean-Philippe Brucker 2356078a454SJean-Philippe Brucker base = mmap(NULL, region->info.size, prot, MAP_SHARED, vdev->fd, 2366078a454SJean-Philippe Brucker region->info.offset); 2376078a454SJean-Philippe Brucker if (base == MAP_FAILED) { 238*82caa882SJean-Philippe Brucker /* TODO: support sparse mmap */ 239*82caa882SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to mmap region %u (0x%llx bytes), falling back to trapping", 2406078a454SJean-Philippe Brucker region->info.index, region->info.size); 241*82caa882SJean-Philippe Brucker return vfio_setup_trap_region(kvm, vdev, region); 2426078a454SJean-Philippe Brucker } 2436078a454SJean-Philippe Brucker region->host_addr = base; 2446078a454SJean-Philippe Brucker 2456078a454SJean-Philippe Brucker ret = kvm__register_dev_mem(kvm, region->guest_phys_addr, map_size, 2466078a454SJean-Philippe Brucker region->host_addr); 2476078a454SJean-Philippe Brucker if (ret) { 2486078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to register region with KVM"); 2496078a454SJean-Philippe Brucker return ret; 2506078a454SJean-Philippe Brucker } 2516078a454SJean-Philippe Brucker 2526078a454SJean-Philippe Brucker return 0; 2536078a454SJean-Philippe Brucker } 2546078a454SJean-Philippe Brucker 2556078a454SJean-Philippe Brucker void vfio_unmap_region(struct kvm *kvm, struct vfio_region *region) 2566078a454SJean-Philippe Brucker { 257*82caa882SJean-Philippe Brucker if (region->host_addr) { 2586078a454SJean-Philippe Brucker munmap(region->host_addr, region->info.size); 259*82caa882SJean-Philippe Brucker } else if (region->is_ioport) { 260*82caa882SJean-Philippe Brucker ioport__unregister(kvm, region->port_base); 261*82caa882SJean-Philippe Brucker } else { 262*82caa882SJean-Philippe Brucker kvm__deregister_mmio(kvm, region->guest_phys_addr); 263*82caa882SJean-Philippe Brucker } 2646078a454SJean-Philippe Brucker } 2656078a454SJean-Philippe Brucker 2666078a454SJean-Philippe Brucker static int vfio_configure_device(struct kvm *kvm, struct vfio_device *vdev) 2676078a454SJean-Philippe Brucker { 2686078a454SJean-Philippe Brucker int ret; 2696078a454SJean-Philippe Brucker struct vfio_group *group = vdev->group; 2706078a454SJean-Philippe Brucker 2716078a454SJean-Philippe Brucker vdev->fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, 2726078a454SJean-Philippe Brucker vdev->params->name); 2736078a454SJean-Philippe Brucker if (vdev->fd < 0) { 2746078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to get fd"); 2756078a454SJean-Philippe Brucker 2766078a454SJean-Philippe Brucker /* The device might be a bridge without an fd */ 2776078a454SJean-Philippe Brucker return 0; 2786078a454SJean-Philippe Brucker } 2796078a454SJean-Philippe Brucker 2806078a454SJean-Philippe Brucker vdev->info.argsz = sizeof(vdev->info); 2816078a454SJean-Philippe Brucker if (ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &vdev->info)) { 2826078a454SJean-Philippe Brucker ret = -errno; 2836078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to get info"); 2846078a454SJean-Philippe Brucker goto err_close_device; 2856078a454SJean-Philippe Brucker } 2866078a454SJean-Philippe Brucker 2876078a454SJean-Philippe Brucker if (vdev->info.flags & VFIO_DEVICE_FLAGS_RESET && 2886078a454SJean-Philippe Brucker ioctl(vdev->fd, VFIO_DEVICE_RESET) < 0) 2896078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to reset device"); 2906078a454SJean-Philippe Brucker 2916078a454SJean-Philippe Brucker vdev->regions = calloc(vdev->info.num_regions, sizeof(*vdev->regions)); 2926078a454SJean-Philippe Brucker if (!vdev->regions) { 2936078a454SJean-Philippe Brucker ret = -ENOMEM; 2946078a454SJean-Philippe Brucker goto err_close_device; 2956078a454SJean-Philippe Brucker } 2966078a454SJean-Philippe Brucker 2976078a454SJean-Philippe Brucker /* Now for the bus-specific initialization... */ 2986078a454SJean-Philippe Brucker switch (vdev->params->type) { 2996078a454SJean-Philippe Brucker case VFIO_DEVICE_PCI: 3006078a454SJean-Philippe Brucker BUG_ON(!(vdev->info.flags & VFIO_DEVICE_FLAGS_PCI)); 3016078a454SJean-Philippe Brucker ret = vfio_pci_setup_device(kvm, vdev); 3026078a454SJean-Philippe Brucker break; 3036078a454SJean-Philippe Brucker default: 3046078a454SJean-Philippe Brucker BUG_ON(1); 3056078a454SJean-Philippe Brucker ret = -EINVAL; 3066078a454SJean-Philippe Brucker } 3076078a454SJean-Philippe Brucker 3086078a454SJean-Philippe Brucker if (ret) 3096078a454SJean-Philippe Brucker goto err_free_regions; 3106078a454SJean-Philippe Brucker 3116078a454SJean-Philippe Brucker vfio_dev_info(vdev, "assigned to device number 0x%x in group %lu", 3126078a454SJean-Philippe Brucker vdev->dev_hdr.dev_num, group->id); 3136078a454SJean-Philippe Brucker 3146078a454SJean-Philippe Brucker return 0; 3156078a454SJean-Philippe Brucker 3166078a454SJean-Philippe Brucker err_free_regions: 3176078a454SJean-Philippe Brucker free(vdev->regions); 3186078a454SJean-Philippe Brucker err_close_device: 3196078a454SJean-Philippe Brucker close(vdev->fd); 3206078a454SJean-Philippe Brucker 3216078a454SJean-Philippe Brucker return ret; 3226078a454SJean-Philippe Brucker } 3236078a454SJean-Philippe Brucker 3246078a454SJean-Philippe Brucker static int vfio_configure_devices(struct kvm *kvm) 3256078a454SJean-Philippe Brucker { 3266078a454SJean-Philippe Brucker int i, ret; 3276078a454SJean-Philippe Brucker 3286078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) { 3296078a454SJean-Philippe Brucker ret = vfio_configure_device(kvm, &vfio_devices[i]); 3306078a454SJean-Philippe Brucker if (ret) 3316078a454SJean-Philippe Brucker return ret; 3326078a454SJean-Philippe Brucker } 3336078a454SJean-Philippe Brucker 3346078a454SJean-Philippe Brucker return 0; 3356078a454SJean-Philippe Brucker } 3366078a454SJean-Philippe Brucker 3376078a454SJean-Philippe Brucker static int vfio_get_iommu_type(void) 3386078a454SJean-Philippe Brucker { 3396078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) 3406078a454SJean-Philippe Brucker return VFIO_TYPE1v2_IOMMU; 3416078a454SJean-Philippe Brucker 3426078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) 3436078a454SJean-Philippe Brucker return VFIO_TYPE1_IOMMU; 3446078a454SJean-Philippe Brucker 3456078a454SJean-Philippe Brucker return -ENODEV; 3466078a454SJean-Philippe Brucker } 3476078a454SJean-Philippe Brucker 3486078a454SJean-Philippe Brucker static int vfio_map_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data) 3496078a454SJean-Philippe Brucker { 3506078a454SJean-Philippe Brucker int ret = 0; 3516078a454SJean-Philippe Brucker struct vfio_iommu_type1_dma_map dma_map = { 3526078a454SJean-Philippe Brucker .argsz = sizeof(dma_map), 3536078a454SJean-Philippe Brucker .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, 3546078a454SJean-Philippe Brucker .vaddr = (unsigned long)bank->host_addr, 3556078a454SJean-Philippe Brucker .iova = (u64)bank->guest_phys_addr, 3566078a454SJean-Philippe Brucker .size = bank->size, 3576078a454SJean-Philippe Brucker }; 3586078a454SJean-Philippe Brucker 3596078a454SJean-Philippe Brucker /* Map the guest memory for DMA (i.e. provide isolation) */ 3606078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_IOMMU_MAP_DMA, &dma_map)) { 3616078a454SJean-Philippe Brucker ret = -errno; 3626078a454SJean-Philippe Brucker pr_err("Failed to map 0x%llx -> 0x%llx (%llu) for DMA", 3636078a454SJean-Philippe Brucker dma_map.iova, dma_map.vaddr, dma_map.size); 3646078a454SJean-Philippe Brucker } 3656078a454SJean-Philippe Brucker 3666078a454SJean-Philippe Brucker return ret; 3676078a454SJean-Philippe Brucker } 3686078a454SJean-Philippe Brucker 3696078a454SJean-Philippe Brucker static int vfio_unmap_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data) 3706078a454SJean-Philippe Brucker { 3716078a454SJean-Philippe Brucker struct vfio_iommu_type1_dma_unmap dma_unmap = { 3726078a454SJean-Philippe Brucker .argsz = sizeof(dma_unmap), 3736078a454SJean-Philippe Brucker .size = bank->size, 3746078a454SJean-Philippe Brucker .iova = bank->guest_phys_addr, 3756078a454SJean-Philippe Brucker }; 3766078a454SJean-Philippe Brucker 3776078a454SJean-Philippe Brucker ioctl(vfio_container, VFIO_IOMMU_UNMAP_DMA, &dma_unmap); 3786078a454SJean-Philippe Brucker 3796078a454SJean-Philippe Brucker return 0; 3806078a454SJean-Philippe Brucker } 3816078a454SJean-Philippe Brucker 3826078a454SJean-Philippe Brucker static struct vfio_group *vfio_group_create(struct kvm *kvm, unsigned long id) 3836078a454SJean-Philippe Brucker { 3846078a454SJean-Philippe Brucker int ret; 3856078a454SJean-Philippe Brucker struct vfio_group *group; 3866078a454SJean-Philippe Brucker char group_node[PATH_MAX]; 3876078a454SJean-Philippe Brucker struct vfio_group_status group_status = { 3886078a454SJean-Philippe Brucker .argsz = sizeof(group_status), 3896078a454SJean-Philippe Brucker }; 3906078a454SJean-Philippe Brucker 3916078a454SJean-Philippe Brucker group = calloc(1, sizeof(*group)); 3926078a454SJean-Philippe Brucker if (!group) 3936078a454SJean-Philippe Brucker return NULL; 3946078a454SJean-Philippe Brucker 3956078a454SJean-Philippe Brucker group->id = id; 3966078a454SJean-Philippe Brucker group->refs = 1; 3976078a454SJean-Philippe Brucker 3986078a454SJean-Philippe Brucker ret = snprintf(group_node, PATH_MAX, VFIO_DEV_DIR "/%lu", id); 3996078a454SJean-Philippe Brucker if (ret < 0 || ret == PATH_MAX) 4006078a454SJean-Philippe Brucker return NULL; 4016078a454SJean-Philippe Brucker 4026078a454SJean-Philippe Brucker group->fd = open(group_node, O_RDWR); 4036078a454SJean-Philippe Brucker if (group->fd < 0) { 4046078a454SJean-Philippe Brucker pr_err("Failed to open IOMMU group %s", group_node); 4056078a454SJean-Philippe Brucker goto err_free_group; 4066078a454SJean-Philippe Brucker } 4076078a454SJean-Philippe Brucker 4086078a454SJean-Philippe Brucker if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &group_status)) { 4096078a454SJean-Philippe Brucker pr_err("Failed to determine status of IOMMU group %lu", id); 4106078a454SJean-Philippe Brucker goto err_close_group; 4116078a454SJean-Philippe Brucker } 4126078a454SJean-Philippe Brucker 4136078a454SJean-Philippe Brucker if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { 4146078a454SJean-Philippe Brucker pr_err("IOMMU group %lu is not viable", id); 4156078a454SJean-Philippe Brucker goto err_close_group; 4166078a454SJean-Philippe Brucker } 4176078a454SJean-Philippe Brucker 4186078a454SJean-Philippe Brucker if (ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &vfio_container)) { 4196078a454SJean-Philippe Brucker pr_err("Failed to add IOMMU group %lu to VFIO container", id); 4206078a454SJean-Philippe Brucker goto err_close_group; 4216078a454SJean-Philippe Brucker } 4226078a454SJean-Philippe Brucker 4236078a454SJean-Philippe Brucker list_add(&group->list, &vfio_groups); 4246078a454SJean-Philippe Brucker 4256078a454SJean-Philippe Brucker return group; 4266078a454SJean-Philippe Brucker 4276078a454SJean-Philippe Brucker err_close_group: 4286078a454SJean-Philippe Brucker close(group->fd); 4296078a454SJean-Philippe Brucker err_free_group: 4306078a454SJean-Philippe Brucker free(group); 4316078a454SJean-Philippe Brucker 4326078a454SJean-Philippe Brucker return NULL; 4336078a454SJean-Philippe Brucker } 4346078a454SJean-Philippe Brucker 4356078a454SJean-Philippe Brucker static void vfio_group_exit(struct kvm *kvm, struct vfio_group *group) 4366078a454SJean-Philippe Brucker { 4376078a454SJean-Philippe Brucker if (--group->refs != 0) 4386078a454SJean-Philippe Brucker return; 4396078a454SJean-Philippe Brucker 4406078a454SJean-Philippe Brucker ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER); 4416078a454SJean-Philippe Brucker 4426078a454SJean-Philippe Brucker list_del(&group->list); 4436078a454SJean-Philippe Brucker close(group->fd); 4446078a454SJean-Philippe Brucker free(group); 4456078a454SJean-Philippe Brucker } 4466078a454SJean-Philippe Brucker 4476078a454SJean-Philippe Brucker static struct vfio_group * 4486078a454SJean-Philippe Brucker vfio_group_get_for_dev(struct kvm *kvm, struct vfio_device *vdev) 4496078a454SJean-Philippe Brucker { 4506078a454SJean-Philippe Brucker int dirfd; 4516078a454SJean-Philippe Brucker ssize_t ret; 4526078a454SJean-Philippe Brucker char *group_name; 4536078a454SJean-Philippe Brucker unsigned long group_id; 4546078a454SJean-Philippe Brucker char group_path[PATH_MAX]; 4556078a454SJean-Philippe Brucker struct vfio_group *group = NULL; 4566078a454SJean-Philippe Brucker 4576078a454SJean-Philippe Brucker /* Find IOMMU group for this device */ 4586078a454SJean-Philippe Brucker dirfd = open(vdev->sysfs_path, O_DIRECTORY | O_PATH | O_RDONLY); 4596078a454SJean-Philippe Brucker if (dirfd < 0) { 4606078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to open '%s'", vdev->sysfs_path); 4616078a454SJean-Philippe Brucker return NULL; 4626078a454SJean-Philippe Brucker } 4636078a454SJean-Philippe Brucker 4646078a454SJean-Philippe Brucker ret = readlinkat(dirfd, "iommu_group", group_path, PATH_MAX); 4656078a454SJean-Philippe Brucker if (ret < 0) { 4666078a454SJean-Philippe Brucker vfio_dev_err(vdev, "no iommu_group"); 4676078a454SJean-Philippe Brucker goto out_close; 4686078a454SJean-Philippe Brucker } 4696078a454SJean-Philippe Brucker if (ret == PATH_MAX) 4706078a454SJean-Philippe Brucker goto out_close; 4716078a454SJean-Philippe Brucker 4726078a454SJean-Philippe Brucker group_path[ret] = '\0'; 4736078a454SJean-Philippe Brucker 4746078a454SJean-Philippe Brucker group_name = basename(group_path); 4756078a454SJean-Philippe Brucker errno = 0; 4766078a454SJean-Philippe Brucker group_id = strtoul(group_name, NULL, 10); 4776078a454SJean-Philippe Brucker if (errno) 4786078a454SJean-Philippe Brucker goto out_close; 4796078a454SJean-Philippe Brucker 4806078a454SJean-Philippe Brucker list_for_each_entry(group, &vfio_groups, list) { 4816078a454SJean-Philippe Brucker if (group->id == group_id) { 4826078a454SJean-Philippe Brucker group->refs++; 4836078a454SJean-Philippe Brucker return group; 4846078a454SJean-Philippe Brucker } 4856078a454SJean-Philippe Brucker } 4866078a454SJean-Philippe Brucker 4876078a454SJean-Philippe Brucker group = vfio_group_create(kvm, group_id); 4886078a454SJean-Philippe Brucker 4896078a454SJean-Philippe Brucker out_close: 4906078a454SJean-Philippe Brucker close(dirfd); 4916078a454SJean-Philippe Brucker return group; 4926078a454SJean-Philippe Brucker } 4936078a454SJean-Philippe Brucker 4946078a454SJean-Philippe Brucker static int vfio_device_init(struct kvm *kvm, struct vfio_device *vdev) 4956078a454SJean-Philippe Brucker { 4966078a454SJean-Philippe Brucker int ret; 4976078a454SJean-Philippe Brucker char dev_path[PATH_MAX]; 4986078a454SJean-Philippe Brucker struct vfio_group *group; 4996078a454SJean-Philippe Brucker 5006078a454SJean-Philippe Brucker ret = snprintf(dev_path, PATH_MAX, "/sys/bus/%s/devices/%s", 5016078a454SJean-Philippe Brucker vdev->params->bus, vdev->params->name); 5026078a454SJean-Philippe Brucker if (ret < 0 || ret == PATH_MAX) 5036078a454SJean-Philippe Brucker return -EINVAL; 5046078a454SJean-Philippe Brucker 5056078a454SJean-Philippe Brucker vdev->sysfs_path = strndup(dev_path, PATH_MAX); 5066078a454SJean-Philippe Brucker if (!vdev->sysfs_path) 5076078a454SJean-Philippe Brucker return -errno; 5086078a454SJean-Philippe Brucker 5096078a454SJean-Philippe Brucker group = vfio_group_get_for_dev(kvm, vdev); 5106078a454SJean-Philippe Brucker if (!group) { 5116078a454SJean-Philippe Brucker free(vdev->sysfs_path); 5126078a454SJean-Philippe Brucker return -EINVAL; 5136078a454SJean-Philippe Brucker } 5146078a454SJean-Philippe Brucker 5156078a454SJean-Philippe Brucker vdev->group = group; 5166078a454SJean-Philippe Brucker 5176078a454SJean-Philippe Brucker return 0; 5186078a454SJean-Philippe Brucker } 5196078a454SJean-Philippe Brucker 5206078a454SJean-Philippe Brucker static void vfio_device_exit(struct kvm *kvm, struct vfio_device *vdev) 5216078a454SJean-Philippe Brucker { 5226078a454SJean-Philippe Brucker vfio_group_exit(kvm, vdev->group); 5236078a454SJean-Philippe Brucker 5246078a454SJean-Philippe Brucker switch (vdev->params->type) { 5256078a454SJean-Philippe Brucker case VFIO_DEVICE_PCI: 5266078a454SJean-Philippe Brucker vfio_pci_teardown_device(kvm, vdev); 5276078a454SJean-Philippe Brucker break; 5286078a454SJean-Philippe Brucker default: 5296078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "no teardown function for device"); 5306078a454SJean-Philippe Brucker } 5316078a454SJean-Philippe Brucker 5326078a454SJean-Philippe Brucker close(vdev->fd); 5336078a454SJean-Philippe Brucker 5346078a454SJean-Philippe Brucker free(vdev->regions); 5356078a454SJean-Philippe Brucker free(vdev->sysfs_path); 5366078a454SJean-Philippe Brucker } 5376078a454SJean-Philippe Brucker 5386078a454SJean-Philippe Brucker static int vfio_container_init(struct kvm *kvm) 5396078a454SJean-Philippe Brucker { 5406078a454SJean-Philippe Brucker int api, i, ret, iommu_type;; 5416078a454SJean-Philippe Brucker 5426078a454SJean-Philippe Brucker /* Create a container for our IOMMU groups */ 5436078a454SJean-Philippe Brucker vfio_container = open(VFIO_DEV_NODE, O_RDWR); 5446078a454SJean-Philippe Brucker if (vfio_container == -1) { 5456078a454SJean-Philippe Brucker ret = errno; 5466078a454SJean-Philippe Brucker pr_err("Failed to open %s", VFIO_DEV_NODE); 5476078a454SJean-Philippe Brucker return ret; 5486078a454SJean-Philippe Brucker } 5496078a454SJean-Philippe Brucker 5506078a454SJean-Philippe Brucker api = ioctl(vfio_container, VFIO_GET_API_VERSION); 5516078a454SJean-Philippe Brucker if (api != VFIO_API_VERSION) { 5526078a454SJean-Philippe Brucker pr_err("Unknown VFIO API version %d", api); 5536078a454SJean-Philippe Brucker return -ENODEV; 5546078a454SJean-Philippe Brucker } 5556078a454SJean-Philippe Brucker 5566078a454SJean-Philippe Brucker iommu_type = vfio_get_iommu_type(); 5576078a454SJean-Philippe Brucker if (iommu_type < 0) { 5586078a454SJean-Philippe Brucker pr_err("VFIO type-1 IOMMU not supported on this platform"); 5596078a454SJean-Philippe Brucker return iommu_type; 5606078a454SJean-Philippe Brucker } 5616078a454SJean-Philippe Brucker 5626078a454SJean-Philippe Brucker /* Create groups for our devices and add them to the container */ 5636078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) { 5646078a454SJean-Philippe Brucker vfio_devices[i].params = &kvm->cfg.vfio_devices[i]; 5656078a454SJean-Philippe Brucker 5666078a454SJean-Philippe Brucker ret = vfio_device_init(kvm, &vfio_devices[i]); 5676078a454SJean-Philippe Brucker if (ret) 5686078a454SJean-Philippe Brucker return ret; 5696078a454SJean-Philippe Brucker } 5706078a454SJean-Philippe Brucker 5716078a454SJean-Philippe Brucker /* Finalise the container */ 5726078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_SET_IOMMU, iommu_type)) { 5736078a454SJean-Philippe Brucker ret = -errno; 5746078a454SJean-Philippe Brucker pr_err("Failed to set IOMMU type %d for VFIO container", 5756078a454SJean-Philippe Brucker iommu_type); 5766078a454SJean-Philippe Brucker return ret; 5776078a454SJean-Philippe Brucker } else { 5786078a454SJean-Philippe Brucker pr_info("Using IOMMU type %d for VFIO container", iommu_type); 5796078a454SJean-Philippe Brucker } 5806078a454SJean-Philippe Brucker 5816078a454SJean-Philippe Brucker return kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_map_mem_bank, 5826078a454SJean-Philippe Brucker NULL); 5836078a454SJean-Philippe Brucker } 5846078a454SJean-Philippe Brucker 5856078a454SJean-Philippe Brucker static int vfio__init(struct kvm *kvm) 5866078a454SJean-Philippe Brucker { 5876078a454SJean-Philippe Brucker int ret; 5886078a454SJean-Philippe Brucker 5896078a454SJean-Philippe Brucker if (!kvm->cfg.num_vfio_devices) 5906078a454SJean-Philippe Brucker return 0; 5916078a454SJean-Philippe Brucker 5926078a454SJean-Philippe Brucker vfio_devices = calloc(kvm->cfg.num_vfio_devices, sizeof(*vfio_devices)); 5936078a454SJean-Philippe Brucker if (!vfio_devices) 5946078a454SJean-Philippe Brucker return -ENOMEM; 5956078a454SJean-Philippe Brucker 5966078a454SJean-Philippe Brucker ret = vfio_container_init(kvm); 5976078a454SJean-Philippe Brucker if (ret) 5986078a454SJean-Philippe Brucker return ret; 5996078a454SJean-Philippe Brucker 6006078a454SJean-Philippe Brucker ret = vfio_configure_devices(kvm); 6016078a454SJean-Philippe Brucker if (ret) 6026078a454SJean-Philippe Brucker return ret; 6036078a454SJean-Philippe Brucker 6046078a454SJean-Philippe Brucker return 0; 6056078a454SJean-Philippe Brucker } 6066078a454SJean-Philippe Brucker dev_base_init(vfio__init); 6076078a454SJean-Philippe Brucker 6086078a454SJean-Philippe Brucker static int vfio__exit(struct kvm *kvm) 6096078a454SJean-Philippe Brucker { 6106078a454SJean-Philippe Brucker int i; 6116078a454SJean-Philippe Brucker 6126078a454SJean-Philippe Brucker if (!kvm->cfg.num_vfio_devices) 6136078a454SJean-Philippe Brucker return 0; 6146078a454SJean-Philippe Brucker 6156078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; i++) 6166078a454SJean-Philippe Brucker vfio_device_exit(kvm, &vfio_devices[i]); 6176078a454SJean-Philippe Brucker 6186078a454SJean-Philippe Brucker free(vfio_devices); 6196078a454SJean-Philippe Brucker 6206078a454SJean-Philippe Brucker kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_unmap_mem_bank, NULL); 6216078a454SJean-Philippe Brucker close(vfio_container); 6226078a454SJean-Philippe Brucker 6236078a454SJean-Philippe Brucker free(kvm->cfg.vfio_devices); 6246078a454SJean-Philippe Brucker 6256078a454SJean-Philippe Brucker return 0; 6266078a454SJean-Philippe Brucker } 6276078a454SJean-Philippe Brucker dev_base_exit(vfio__exit); 628