16078a454SJean-Philippe Brucker #include "kvm/kvm.h" 26078a454SJean-Philippe Brucker #include "kvm/vfio.h" 382caa882SJean-Philippe Brucker #include "kvm/ioport.h" 46078a454SJean-Philippe Brucker 56078a454SJean-Philippe Brucker #include <linux/list.h> 66078a454SJean-Philippe Brucker 76078a454SJean-Philippe Brucker #define VFIO_DEV_DIR "/dev/vfio" 86078a454SJean-Philippe Brucker #define VFIO_DEV_NODE VFIO_DEV_DIR "/vfio" 96078a454SJean-Philippe Brucker #define IOMMU_GROUP_DIR "/sys/kernel/iommu_groups" 106078a454SJean-Philippe Brucker 116078a454SJean-Philippe Brucker static int vfio_container; 126078a454SJean-Philippe Brucker static LIST_HEAD(vfio_groups); 136078a454SJean-Philippe Brucker static struct vfio_device *vfio_devices; 146078a454SJean-Philippe Brucker 156078a454SJean-Philippe Brucker static int vfio_device_pci_parser(const struct option *opt, char *arg, 166078a454SJean-Philippe Brucker struct vfio_device_params *dev) 176078a454SJean-Philippe Brucker { 186078a454SJean-Philippe Brucker unsigned int domain, bus, devnr, fn; 196078a454SJean-Philippe Brucker 206078a454SJean-Philippe Brucker int nr = sscanf(arg, "%4x:%2x:%2x.%1x", &domain, &bus, &devnr, &fn); 216078a454SJean-Philippe Brucker if (nr < 4) { 226078a454SJean-Philippe Brucker domain = 0; 236078a454SJean-Philippe Brucker nr = sscanf(arg, "%2x:%2x.%1x", &bus, &devnr, &fn); 246078a454SJean-Philippe Brucker if (nr < 3) { 256078a454SJean-Philippe Brucker pr_err("Invalid device identifier %s", arg); 266078a454SJean-Philippe Brucker return -EINVAL; 276078a454SJean-Philippe Brucker } 286078a454SJean-Philippe Brucker } 296078a454SJean-Philippe Brucker 306078a454SJean-Philippe Brucker dev->type = VFIO_DEVICE_PCI; 316078a454SJean-Philippe Brucker dev->bus = "pci"; 326078a454SJean-Philippe Brucker dev->name = malloc(13); 336078a454SJean-Philippe Brucker if (!dev->name) 346078a454SJean-Philippe Brucker return -ENOMEM; 356078a454SJean-Philippe Brucker 366078a454SJean-Philippe Brucker snprintf(dev->name, 13, "%04x:%02x:%02x.%x", domain, bus, devnr, fn); 376078a454SJean-Philippe Brucker 386078a454SJean-Philippe Brucker return 0; 396078a454SJean-Philippe Brucker } 406078a454SJean-Philippe Brucker 416078a454SJean-Philippe Brucker int vfio_device_parser(const struct option *opt, const char *arg, int unset) 426078a454SJean-Philippe Brucker { 436078a454SJean-Philippe Brucker int ret = -EINVAL; 446078a454SJean-Philippe Brucker static int idx = 0; 456078a454SJean-Philippe Brucker struct kvm *kvm = opt->ptr; 466078a454SJean-Philippe Brucker struct vfio_device_params *dev, *devs; 476078a454SJean-Philippe Brucker char *cur, *buf = strdup(arg); 486078a454SJean-Philippe Brucker 496078a454SJean-Philippe Brucker if (!buf) 506078a454SJean-Philippe Brucker return -ENOMEM; 516078a454SJean-Philippe Brucker 526078a454SJean-Philippe Brucker if (idx >= MAX_VFIO_DEVICES) { 536078a454SJean-Philippe Brucker pr_warning("Too many VFIO devices"); 546078a454SJean-Philippe Brucker goto out_free_buf; 556078a454SJean-Philippe Brucker } 566078a454SJean-Philippe Brucker 576078a454SJean-Philippe Brucker devs = realloc(kvm->cfg.vfio_devices, sizeof(*dev) * (idx + 1)); 586078a454SJean-Philippe Brucker if (!devs) { 596078a454SJean-Philippe Brucker ret = -ENOMEM; 606078a454SJean-Philippe Brucker goto out_free_buf; 616078a454SJean-Philippe Brucker } 626078a454SJean-Philippe Brucker 636078a454SJean-Philippe Brucker kvm->cfg.vfio_devices = devs; 646078a454SJean-Philippe Brucker dev = &devs[idx]; 656078a454SJean-Philippe Brucker 666078a454SJean-Philippe Brucker cur = strtok(buf, ","); 676078a454SJean-Philippe Brucker if (!cur) 686078a454SJean-Philippe Brucker goto out_free_buf; 696078a454SJean-Philippe Brucker 706078a454SJean-Philippe Brucker if (!strcmp(opt->long_name, "vfio-pci")) 716078a454SJean-Philippe Brucker ret = vfio_device_pci_parser(opt, cur, dev); 726078a454SJean-Philippe Brucker else 736078a454SJean-Philippe Brucker ret = -EINVAL; 746078a454SJean-Philippe Brucker 756078a454SJean-Philippe Brucker if (!ret) 766078a454SJean-Philippe Brucker kvm->cfg.num_vfio_devices = ++idx; 776078a454SJean-Philippe Brucker 786078a454SJean-Philippe Brucker out_free_buf: 796078a454SJean-Philippe Brucker free(buf); 806078a454SJean-Philippe Brucker 816078a454SJean-Philippe Brucker return ret; 826078a454SJean-Philippe Brucker } 836078a454SJean-Philippe Brucker 84*a4a0dac7SAndre Przywara static bool _vfio_ioport_in(struct vfio_region *region, u32 offset, 85*a4a0dac7SAndre Przywara void *data, int len) 8682caa882SJean-Philippe Brucker { 8782caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev; 88*a4a0dac7SAndre Przywara ssize_t nr; 89*a4a0dac7SAndre Przywara u32 val; 9082caa882SJean-Philippe Brucker 9182caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ)) 9282caa882SJean-Philippe Brucker return false; 9382caa882SJean-Philippe Brucker 9482caa882SJean-Philippe Brucker nr = pread(vdev->fd, &val, len, region->info.offset + offset); 9582caa882SJean-Philippe Brucker if (nr != len) { 9682caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not read %d bytes from I/O port 0x%x\n", 97*a4a0dac7SAndre Przywara len, offset + region->port_base); 9882caa882SJean-Philippe Brucker return false; 9982caa882SJean-Philippe Brucker } 10082caa882SJean-Philippe Brucker 10182caa882SJean-Philippe Brucker switch (len) { 10282caa882SJean-Philippe Brucker case 1: 10382caa882SJean-Philippe Brucker ioport__write8(data, val); 10482caa882SJean-Philippe Brucker break; 10582caa882SJean-Philippe Brucker case 2: 10682caa882SJean-Philippe Brucker ioport__write16(data, val); 10782caa882SJean-Philippe Brucker break; 10882caa882SJean-Philippe Brucker case 4: 10982caa882SJean-Philippe Brucker ioport__write32(data, val); 11082caa882SJean-Philippe Brucker break; 11182caa882SJean-Philippe Brucker default: 11282caa882SJean-Philippe Brucker return false; 11382caa882SJean-Philippe Brucker } 11482caa882SJean-Philippe Brucker 11582caa882SJean-Philippe Brucker return true; 11682caa882SJean-Philippe Brucker } 11782caa882SJean-Philippe Brucker 118*a4a0dac7SAndre Przywara static bool _vfio_ioport_out(struct vfio_region *region, u32 offset, 119*a4a0dac7SAndre Przywara void *data, int len) 12082caa882SJean-Philippe Brucker { 12182caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev; 122*a4a0dac7SAndre Przywara ssize_t nr; 123*a4a0dac7SAndre Przywara u32 val; 12482caa882SJean-Philippe Brucker 12582caa882SJean-Philippe Brucker 12682caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)) 12782caa882SJean-Philippe Brucker return false; 12882caa882SJean-Philippe Brucker 12982caa882SJean-Philippe Brucker switch (len) { 13082caa882SJean-Philippe Brucker case 1: 13182caa882SJean-Philippe Brucker val = ioport__read8(data); 13282caa882SJean-Philippe Brucker break; 13382caa882SJean-Philippe Brucker case 2: 13482caa882SJean-Philippe Brucker val = ioport__read16(data); 13582caa882SJean-Philippe Brucker break; 13682caa882SJean-Philippe Brucker case 4: 13782caa882SJean-Philippe Brucker val = ioport__read32(data); 13882caa882SJean-Philippe Brucker break; 13982caa882SJean-Philippe Brucker default: 14082caa882SJean-Philippe Brucker return false; 14182caa882SJean-Philippe Brucker } 14282caa882SJean-Philippe Brucker 14382caa882SJean-Philippe Brucker nr = pwrite(vdev->fd, &val, len, region->info.offset + offset); 14482caa882SJean-Philippe Brucker if (nr != len) 14582caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not write %d bytes to I/O port 0x%x", 146*a4a0dac7SAndre Przywara len, offset + region->port_base); 14782caa882SJean-Philippe Brucker 14882caa882SJean-Philippe Brucker return nr == len; 14982caa882SJean-Philippe Brucker } 15082caa882SJean-Philippe Brucker 151*a4a0dac7SAndre Przywara static void vfio_ioport_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len, 152*a4a0dac7SAndre Przywara u8 is_write, void *ptr) 153*a4a0dac7SAndre Przywara { 154*a4a0dac7SAndre Przywara struct vfio_region *region = ptr; 155*a4a0dac7SAndre Przywara u32 offset = addr - region->port_base; 156*a4a0dac7SAndre Przywara 157*a4a0dac7SAndre Przywara if (is_write) 158*a4a0dac7SAndre Przywara _vfio_ioport_out(region, offset, data, len); 159*a4a0dac7SAndre Przywara else 160*a4a0dac7SAndre Przywara _vfio_ioport_in(region, offset, data, len); 161*a4a0dac7SAndre Przywara } 162*a4a0dac7SAndre Przywara 163*a4a0dac7SAndre Przywara static bool vfio_ioport_out(struct ioport *ioport, struct kvm_cpu *vcpu, 164*a4a0dac7SAndre Przywara u16 port, void *data, int len) 165*a4a0dac7SAndre Przywara { 166*a4a0dac7SAndre Przywara vfio_ioport_mmio(vcpu, port, data, len, true, ioport->priv); 167*a4a0dac7SAndre Przywara return true; 168*a4a0dac7SAndre Przywara } 169*a4a0dac7SAndre Przywara 170*a4a0dac7SAndre Przywara static bool vfio_ioport_in(struct ioport *ioport, struct kvm_cpu *vcpu, 171*a4a0dac7SAndre Przywara u16 port, void *data, int len) 172*a4a0dac7SAndre Przywara { 173*a4a0dac7SAndre Przywara vfio_ioport_mmio(vcpu, port, data, len, false, ioport->priv); 174*a4a0dac7SAndre Przywara return true; 175*a4a0dac7SAndre Przywara } 176*a4a0dac7SAndre Przywara 17782caa882SJean-Philippe Brucker static struct ioport_operations vfio_ioport_ops = { 17882caa882SJean-Philippe Brucker .io_in = vfio_ioport_in, 17982caa882SJean-Philippe Brucker .io_out = vfio_ioport_out, 18082caa882SJean-Philippe Brucker }; 18182caa882SJean-Philippe Brucker 18282caa882SJean-Philippe Brucker static void vfio_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len, 18382caa882SJean-Philippe Brucker u8 is_write, void *ptr) 18482caa882SJean-Philippe Brucker { 18582caa882SJean-Philippe Brucker u64 val; 18682caa882SJean-Philippe Brucker ssize_t nr; 18782caa882SJean-Philippe Brucker struct vfio_region *region = ptr; 18882caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev; 18982caa882SJean-Philippe Brucker 19082caa882SJean-Philippe Brucker u32 offset = addr - region->guest_phys_addr; 19182caa882SJean-Philippe Brucker 19282caa882SJean-Philippe Brucker if (len < 1 || len > 8) 19382caa882SJean-Philippe Brucker goto err_report; 19482caa882SJean-Philippe Brucker 19582caa882SJean-Philippe Brucker if (is_write) { 19682caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)) 19782caa882SJean-Philippe Brucker goto err_report; 19882caa882SJean-Philippe Brucker 19982caa882SJean-Philippe Brucker memcpy(&val, data, len); 20082caa882SJean-Philippe Brucker 20182caa882SJean-Philippe Brucker nr = pwrite(vdev->fd, &val, len, region->info.offset + offset); 20282caa882SJean-Philippe Brucker if ((u32)nr != len) 20382caa882SJean-Philippe Brucker goto err_report; 20482caa882SJean-Philippe Brucker } else { 20582caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ)) 20682caa882SJean-Philippe Brucker goto err_report; 20782caa882SJean-Philippe Brucker 20882caa882SJean-Philippe Brucker nr = pread(vdev->fd, &val, len, region->info.offset + offset); 20982caa882SJean-Philippe Brucker if ((u32)nr != len) 21082caa882SJean-Philippe Brucker goto err_report; 21182caa882SJean-Philippe Brucker 21282caa882SJean-Philippe Brucker memcpy(data, &val, len); 21382caa882SJean-Philippe Brucker } 21482caa882SJean-Philippe Brucker 21582caa882SJean-Philippe Brucker return; 21682caa882SJean-Philippe Brucker 21782caa882SJean-Philippe Brucker err_report: 21882caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not %s %u bytes at 0x%x (0x%llx)", is_write ? 21982caa882SJean-Philippe Brucker "write" : "read", len, offset, addr); 22082caa882SJean-Philippe Brucker } 22182caa882SJean-Philippe Brucker 22282caa882SJean-Philippe Brucker static int vfio_setup_trap_region(struct kvm *kvm, struct vfio_device *vdev, 22382caa882SJean-Philippe Brucker struct vfio_region *region) 22482caa882SJean-Philippe Brucker { 22582caa882SJean-Philippe Brucker if (region->is_ioport) { 226a05e576fSAlexandru Elisei int port = ioport__register(kvm, region->port_base, 227a05e576fSAlexandru Elisei &vfio_ioport_ops, region->info.size, 228a05e576fSAlexandru Elisei region); 22982caa882SJean-Philippe Brucker if (port < 0) 23082caa882SJean-Philippe Brucker return port; 23182caa882SJean-Philippe Brucker return 0; 23282caa882SJean-Philippe Brucker } 23382caa882SJean-Philippe Brucker 23482caa882SJean-Philippe Brucker return kvm__register_mmio(kvm, region->guest_phys_addr, 23582caa882SJean-Philippe Brucker region->info.size, false, vfio_mmio_access, 23682caa882SJean-Philippe Brucker region); 23782caa882SJean-Philippe Brucker } 23882caa882SJean-Philippe Brucker 2396078a454SJean-Philippe Brucker int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev, 2406078a454SJean-Philippe Brucker struct vfio_region *region) 2416078a454SJean-Philippe Brucker { 2426078a454SJean-Philippe Brucker void *base; 2436078a454SJean-Philippe Brucker int ret, prot = 0; 2446078a454SJean-Philippe Brucker /* KVM needs page-aligned regions */ 2456078a454SJean-Philippe Brucker u64 map_size = ALIGN(region->info.size, PAGE_SIZE); 2466078a454SJean-Philippe Brucker 24782caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) 24882caa882SJean-Philippe Brucker return vfio_setup_trap_region(kvm, vdev, region); 2496078a454SJean-Philippe Brucker 250b4fc4f60SAlexandru Elisei /* 251b4fc4f60SAlexandru Elisei * KVM_SET_USER_MEMORY_REGION will fail because the guest physical 252b4fc4f60SAlexandru Elisei * address isn't page aligned, let's emulate the region ourselves. 253b4fc4f60SAlexandru Elisei */ 254b4fc4f60SAlexandru Elisei if (region->guest_phys_addr & (PAGE_SIZE - 1)) 255b4fc4f60SAlexandru Elisei return kvm__register_mmio(kvm, region->guest_phys_addr, 256b4fc4f60SAlexandru Elisei region->info.size, false, 257b4fc4f60SAlexandru Elisei vfio_mmio_access, region); 258b4fc4f60SAlexandru Elisei 2596078a454SJean-Philippe Brucker if (region->info.flags & VFIO_REGION_INFO_FLAG_READ) 2606078a454SJean-Philippe Brucker prot |= PROT_READ; 2616078a454SJean-Philippe Brucker if (region->info.flags & VFIO_REGION_INFO_FLAG_WRITE) 2626078a454SJean-Philippe Brucker prot |= PROT_WRITE; 2636078a454SJean-Philippe Brucker 2646078a454SJean-Philippe Brucker base = mmap(NULL, region->info.size, prot, MAP_SHARED, vdev->fd, 2656078a454SJean-Philippe Brucker region->info.offset); 2666078a454SJean-Philippe Brucker if (base == MAP_FAILED) { 26782caa882SJean-Philippe Brucker /* TODO: support sparse mmap */ 26882caa882SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to mmap region %u (0x%llx bytes), falling back to trapping", 2696078a454SJean-Philippe Brucker region->info.index, region->info.size); 27082caa882SJean-Philippe Brucker return vfio_setup_trap_region(kvm, vdev, region); 2716078a454SJean-Philippe Brucker } 2726078a454SJean-Philippe Brucker region->host_addr = base; 2736078a454SJean-Philippe Brucker 2746078a454SJean-Philippe Brucker ret = kvm__register_dev_mem(kvm, region->guest_phys_addr, map_size, 2756078a454SJean-Philippe Brucker region->host_addr); 2766078a454SJean-Philippe Brucker if (ret) { 2776078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to register region with KVM"); 2786078a454SJean-Philippe Brucker return ret; 2796078a454SJean-Philippe Brucker } 2806078a454SJean-Philippe Brucker 2816078a454SJean-Philippe Brucker return 0; 2826078a454SJean-Philippe Brucker } 2836078a454SJean-Philippe Brucker 2846078a454SJean-Philippe Brucker void vfio_unmap_region(struct kvm *kvm, struct vfio_region *region) 2856078a454SJean-Philippe Brucker { 2868d987725SAlexandru Elisei u64 map_size; 2878d987725SAlexandru Elisei 28882caa882SJean-Philippe Brucker if (region->host_addr) { 2898d987725SAlexandru Elisei map_size = ALIGN(region->info.size, PAGE_SIZE); 2908d987725SAlexandru Elisei kvm__destroy_mem(kvm, region->guest_phys_addr, map_size, 2918d987725SAlexandru Elisei region->host_addr); 2926078a454SJean-Philippe Brucker munmap(region->host_addr, region->info.size); 2938d987725SAlexandru Elisei region->host_addr = NULL; 29482caa882SJean-Philippe Brucker } else if (region->is_ioport) { 29582caa882SJean-Philippe Brucker ioport__unregister(kvm, region->port_base); 29682caa882SJean-Philippe Brucker } else { 29782caa882SJean-Philippe Brucker kvm__deregister_mmio(kvm, region->guest_phys_addr); 29882caa882SJean-Philippe Brucker } 2996078a454SJean-Philippe Brucker } 3006078a454SJean-Philippe Brucker 3016078a454SJean-Philippe Brucker static int vfio_configure_device(struct kvm *kvm, struct vfio_device *vdev) 3026078a454SJean-Philippe Brucker { 3036078a454SJean-Philippe Brucker int ret; 3046078a454SJean-Philippe Brucker struct vfio_group *group = vdev->group; 3056078a454SJean-Philippe Brucker 3066078a454SJean-Philippe Brucker vdev->fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, 3076078a454SJean-Philippe Brucker vdev->params->name); 3086078a454SJean-Philippe Brucker if (vdev->fd < 0) { 3096078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to get fd"); 3106078a454SJean-Philippe Brucker 3116078a454SJean-Philippe Brucker /* The device might be a bridge without an fd */ 3126078a454SJean-Philippe Brucker return 0; 3136078a454SJean-Philippe Brucker } 3146078a454SJean-Philippe Brucker 3156078a454SJean-Philippe Brucker vdev->info.argsz = sizeof(vdev->info); 3166078a454SJean-Philippe Brucker if (ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &vdev->info)) { 3176078a454SJean-Philippe Brucker ret = -errno; 3186078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to get info"); 3196078a454SJean-Philippe Brucker goto err_close_device; 3206078a454SJean-Philippe Brucker } 3216078a454SJean-Philippe Brucker 3226078a454SJean-Philippe Brucker if (vdev->info.flags & VFIO_DEVICE_FLAGS_RESET && 3236078a454SJean-Philippe Brucker ioctl(vdev->fd, VFIO_DEVICE_RESET) < 0) 3246078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to reset device"); 3256078a454SJean-Philippe Brucker 3266078a454SJean-Philippe Brucker vdev->regions = calloc(vdev->info.num_regions, sizeof(*vdev->regions)); 3276078a454SJean-Philippe Brucker if (!vdev->regions) { 3286078a454SJean-Philippe Brucker ret = -ENOMEM; 3296078a454SJean-Philippe Brucker goto err_close_device; 3306078a454SJean-Philippe Brucker } 3316078a454SJean-Philippe Brucker 3326078a454SJean-Philippe Brucker /* Now for the bus-specific initialization... */ 3336078a454SJean-Philippe Brucker switch (vdev->params->type) { 3346078a454SJean-Philippe Brucker case VFIO_DEVICE_PCI: 3356078a454SJean-Philippe Brucker BUG_ON(!(vdev->info.flags & VFIO_DEVICE_FLAGS_PCI)); 3366078a454SJean-Philippe Brucker ret = vfio_pci_setup_device(kvm, vdev); 3376078a454SJean-Philippe Brucker break; 3386078a454SJean-Philippe Brucker default: 3396078a454SJean-Philippe Brucker BUG_ON(1); 3406078a454SJean-Philippe Brucker ret = -EINVAL; 3416078a454SJean-Philippe Brucker } 3426078a454SJean-Philippe Brucker 3436078a454SJean-Philippe Brucker if (ret) 3446078a454SJean-Philippe Brucker goto err_free_regions; 3456078a454SJean-Philippe Brucker 3466078a454SJean-Philippe Brucker vfio_dev_info(vdev, "assigned to device number 0x%x in group %lu", 3476078a454SJean-Philippe Brucker vdev->dev_hdr.dev_num, group->id); 3486078a454SJean-Philippe Brucker 3496078a454SJean-Philippe Brucker return 0; 3506078a454SJean-Philippe Brucker 3516078a454SJean-Philippe Brucker err_free_regions: 3526078a454SJean-Philippe Brucker free(vdev->regions); 3536078a454SJean-Philippe Brucker err_close_device: 3546078a454SJean-Philippe Brucker close(vdev->fd); 3556078a454SJean-Philippe Brucker 3566078a454SJean-Philippe Brucker return ret; 3576078a454SJean-Philippe Brucker } 3586078a454SJean-Philippe Brucker 3596078a454SJean-Philippe Brucker static int vfio_configure_devices(struct kvm *kvm) 3606078a454SJean-Philippe Brucker { 3616078a454SJean-Philippe Brucker int i, ret; 3626078a454SJean-Philippe Brucker 3636078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) { 3646078a454SJean-Philippe Brucker ret = vfio_configure_device(kvm, &vfio_devices[i]); 3656078a454SJean-Philippe Brucker if (ret) 3666078a454SJean-Philippe Brucker return ret; 3676078a454SJean-Philippe Brucker } 3686078a454SJean-Philippe Brucker 3696078a454SJean-Philippe Brucker return 0; 3706078a454SJean-Philippe Brucker } 3716078a454SJean-Philippe Brucker 3726078a454SJean-Philippe Brucker static int vfio_get_iommu_type(void) 3736078a454SJean-Philippe Brucker { 3746078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) 3756078a454SJean-Philippe Brucker return VFIO_TYPE1v2_IOMMU; 3766078a454SJean-Philippe Brucker 3776078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) 3786078a454SJean-Philippe Brucker return VFIO_TYPE1_IOMMU; 3796078a454SJean-Philippe Brucker 3806078a454SJean-Philippe Brucker return -ENODEV; 3816078a454SJean-Philippe Brucker } 3826078a454SJean-Philippe Brucker 3836078a454SJean-Philippe Brucker static int vfio_map_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data) 3846078a454SJean-Philippe Brucker { 3856078a454SJean-Philippe Brucker int ret = 0; 3866078a454SJean-Philippe Brucker struct vfio_iommu_type1_dma_map dma_map = { 3876078a454SJean-Philippe Brucker .argsz = sizeof(dma_map), 3886078a454SJean-Philippe Brucker .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, 3896078a454SJean-Philippe Brucker .vaddr = (unsigned long)bank->host_addr, 3906078a454SJean-Philippe Brucker .iova = (u64)bank->guest_phys_addr, 3916078a454SJean-Philippe Brucker .size = bank->size, 3926078a454SJean-Philippe Brucker }; 3936078a454SJean-Philippe Brucker 3946078a454SJean-Philippe Brucker /* Map the guest memory for DMA (i.e. provide isolation) */ 3956078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_IOMMU_MAP_DMA, &dma_map)) { 3966078a454SJean-Philippe Brucker ret = -errno; 3976078a454SJean-Philippe Brucker pr_err("Failed to map 0x%llx -> 0x%llx (%llu) for DMA", 3986078a454SJean-Philippe Brucker dma_map.iova, dma_map.vaddr, dma_map.size); 3996078a454SJean-Philippe Brucker } 4006078a454SJean-Philippe Brucker 4016078a454SJean-Philippe Brucker return ret; 4026078a454SJean-Philippe Brucker } 4036078a454SJean-Philippe Brucker 4046078a454SJean-Philippe Brucker static int vfio_unmap_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data) 4056078a454SJean-Philippe Brucker { 4066078a454SJean-Philippe Brucker struct vfio_iommu_type1_dma_unmap dma_unmap = { 4076078a454SJean-Philippe Brucker .argsz = sizeof(dma_unmap), 4086078a454SJean-Philippe Brucker .size = bank->size, 4096078a454SJean-Philippe Brucker .iova = bank->guest_phys_addr, 4106078a454SJean-Philippe Brucker }; 4116078a454SJean-Philippe Brucker 4126078a454SJean-Philippe Brucker ioctl(vfio_container, VFIO_IOMMU_UNMAP_DMA, &dma_unmap); 4136078a454SJean-Philippe Brucker 4146078a454SJean-Philippe Brucker return 0; 4156078a454SJean-Philippe Brucker } 4166078a454SJean-Philippe Brucker 41741d773e2SJean-Philippe Brucker static int vfio_configure_reserved_regions(struct kvm *kvm, 41841d773e2SJean-Philippe Brucker struct vfio_group *group) 41941d773e2SJean-Philippe Brucker { 42041d773e2SJean-Philippe Brucker FILE *file; 42141d773e2SJean-Philippe Brucker int ret = 0; 42241d773e2SJean-Philippe Brucker char type[9]; 42341d773e2SJean-Philippe Brucker char filename[PATH_MAX]; 42441d773e2SJean-Philippe Brucker unsigned long long start, end; 42541d773e2SJean-Philippe Brucker 42641d773e2SJean-Philippe Brucker snprintf(filename, PATH_MAX, IOMMU_GROUP_DIR "/%lu/reserved_regions", 42741d773e2SJean-Philippe Brucker group->id); 42841d773e2SJean-Philippe Brucker 42941d773e2SJean-Philippe Brucker /* reserved_regions might not be present on older systems */ 43041d773e2SJean-Philippe Brucker if (access(filename, F_OK)) 43141d773e2SJean-Philippe Brucker return 0; 43241d773e2SJean-Philippe Brucker 43341d773e2SJean-Philippe Brucker file = fopen(filename, "r"); 43441d773e2SJean-Philippe Brucker if (!file) 43541d773e2SJean-Philippe Brucker return -errno; 43641d773e2SJean-Philippe Brucker 43741d773e2SJean-Philippe Brucker while (fscanf(file, "0x%llx 0x%llx %8s\n", &start, &end, type) == 3) { 43841d773e2SJean-Philippe Brucker ret = kvm__reserve_mem(kvm, start, end - start + 1); 43941d773e2SJean-Philippe Brucker if (ret) 44041d773e2SJean-Philippe Brucker break; 44141d773e2SJean-Philippe Brucker } 44241d773e2SJean-Philippe Brucker 44341d773e2SJean-Philippe Brucker fclose(file); 44441d773e2SJean-Philippe Brucker 44541d773e2SJean-Philippe Brucker return ret; 44641d773e2SJean-Philippe Brucker } 44741d773e2SJean-Philippe Brucker 44841d773e2SJean-Philippe Brucker static int vfio_configure_groups(struct kvm *kvm) 44941d773e2SJean-Philippe Brucker { 45041d773e2SJean-Philippe Brucker int ret; 45141d773e2SJean-Philippe Brucker struct vfio_group *group; 45241d773e2SJean-Philippe Brucker 45341d773e2SJean-Philippe Brucker list_for_each_entry(group, &vfio_groups, list) { 45441d773e2SJean-Philippe Brucker ret = vfio_configure_reserved_regions(kvm, group); 45541d773e2SJean-Philippe Brucker if (ret) 45641d773e2SJean-Philippe Brucker return ret; 45741d773e2SJean-Philippe Brucker } 45841d773e2SJean-Philippe Brucker 45941d773e2SJean-Philippe Brucker return 0; 46041d773e2SJean-Philippe Brucker } 46141d773e2SJean-Philippe Brucker 4626078a454SJean-Philippe Brucker static struct vfio_group *vfio_group_create(struct kvm *kvm, unsigned long id) 4636078a454SJean-Philippe Brucker { 4646078a454SJean-Philippe Brucker int ret; 4656078a454SJean-Philippe Brucker struct vfio_group *group; 4666078a454SJean-Philippe Brucker char group_node[PATH_MAX]; 4676078a454SJean-Philippe Brucker struct vfio_group_status group_status = { 4686078a454SJean-Philippe Brucker .argsz = sizeof(group_status), 4696078a454SJean-Philippe Brucker }; 4706078a454SJean-Philippe Brucker 4716078a454SJean-Philippe Brucker group = calloc(1, sizeof(*group)); 4726078a454SJean-Philippe Brucker if (!group) 4736078a454SJean-Philippe Brucker return NULL; 4746078a454SJean-Philippe Brucker 4756078a454SJean-Philippe Brucker group->id = id; 4766078a454SJean-Philippe Brucker group->refs = 1; 4776078a454SJean-Philippe Brucker 4786078a454SJean-Philippe Brucker ret = snprintf(group_node, PATH_MAX, VFIO_DEV_DIR "/%lu", id); 4796078a454SJean-Philippe Brucker if (ret < 0 || ret == PATH_MAX) 4806078a454SJean-Philippe Brucker return NULL; 4816078a454SJean-Philippe Brucker 4826078a454SJean-Philippe Brucker group->fd = open(group_node, O_RDWR); 4836078a454SJean-Philippe Brucker if (group->fd < 0) { 4846078a454SJean-Philippe Brucker pr_err("Failed to open IOMMU group %s", group_node); 4856078a454SJean-Philippe Brucker goto err_free_group; 4866078a454SJean-Philippe Brucker } 4876078a454SJean-Philippe Brucker 4886078a454SJean-Philippe Brucker if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &group_status)) { 4896078a454SJean-Philippe Brucker pr_err("Failed to determine status of IOMMU group %lu", id); 4906078a454SJean-Philippe Brucker goto err_close_group; 4916078a454SJean-Philippe Brucker } 4926078a454SJean-Philippe Brucker 4936078a454SJean-Philippe Brucker if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { 4946078a454SJean-Philippe Brucker pr_err("IOMMU group %lu is not viable", id); 4956078a454SJean-Philippe Brucker goto err_close_group; 4966078a454SJean-Philippe Brucker } 4976078a454SJean-Philippe Brucker 4986078a454SJean-Philippe Brucker if (ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &vfio_container)) { 4996078a454SJean-Philippe Brucker pr_err("Failed to add IOMMU group %lu to VFIO container", id); 5006078a454SJean-Philippe Brucker goto err_close_group; 5016078a454SJean-Philippe Brucker } 5026078a454SJean-Philippe Brucker 5036078a454SJean-Philippe Brucker list_add(&group->list, &vfio_groups); 5046078a454SJean-Philippe Brucker 5056078a454SJean-Philippe Brucker return group; 5066078a454SJean-Philippe Brucker 5076078a454SJean-Philippe Brucker err_close_group: 5086078a454SJean-Philippe Brucker close(group->fd); 5096078a454SJean-Philippe Brucker err_free_group: 5106078a454SJean-Philippe Brucker free(group); 5116078a454SJean-Philippe Brucker 5126078a454SJean-Philippe Brucker return NULL; 5136078a454SJean-Philippe Brucker } 5146078a454SJean-Philippe Brucker 5156078a454SJean-Philippe Brucker static void vfio_group_exit(struct kvm *kvm, struct vfio_group *group) 5166078a454SJean-Philippe Brucker { 5176078a454SJean-Philippe Brucker if (--group->refs != 0) 5186078a454SJean-Philippe Brucker return; 5196078a454SJean-Philippe Brucker 5206078a454SJean-Philippe Brucker ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER); 5216078a454SJean-Philippe Brucker 5226078a454SJean-Philippe Brucker list_del(&group->list); 5236078a454SJean-Philippe Brucker close(group->fd); 5246078a454SJean-Philippe Brucker free(group); 5256078a454SJean-Philippe Brucker } 5266078a454SJean-Philippe Brucker 5276078a454SJean-Philippe Brucker static struct vfio_group * 5286078a454SJean-Philippe Brucker vfio_group_get_for_dev(struct kvm *kvm, struct vfio_device *vdev) 5296078a454SJean-Philippe Brucker { 5306078a454SJean-Philippe Brucker int dirfd; 5316078a454SJean-Philippe Brucker ssize_t ret; 5326078a454SJean-Philippe Brucker char *group_name; 5336078a454SJean-Philippe Brucker unsigned long group_id; 5346078a454SJean-Philippe Brucker char group_path[PATH_MAX]; 5356078a454SJean-Philippe Brucker struct vfio_group *group = NULL; 5366078a454SJean-Philippe Brucker 5376078a454SJean-Philippe Brucker /* Find IOMMU group for this device */ 5386078a454SJean-Philippe Brucker dirfd = open(vdev->sysfs_path, O_DIRECTORY | O_PATH | O_RDONLY); 5396078a454SJean-Philippe Brucker if (dirfd < 0) { 5406078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to open '%s'", vdev->sysfs_path); 5416078a454SJean-Philippe Brucker return NULL; 5426078a454SJean-Philippe Brucker } 5436078a454SJean-Philippe Brucker 5446078a454SJean-Philippe Brucker ret = readlinkat(dirfd, "iommu_group", group_path, PATH_MAX); 5456078a454SJean-Philippe Brucker if (ret < 0) { 5466078a454SJean-Philippe Brucker vfio_dev_err(vdev, "no iommu_group"); 5476078a454SJean-Philippe Brucker goto out_close; 5486078a454SJean-Philippe Brucker } 5496078a454SJean-Philippe Brucker if (ret == PATH_MAX) 5506078a454SJean-Philippe Brucker goto out_close; 5516078a454SJean-Philippe Brucker 5526078a454SJean-Philippe Brucker group_path[ret] = '\0'; 5536078a454SJean-Philippe Brucker 5546078a454SJean-Philippe Brucker group_name = basename(group_path); 5556078a454SJean-Philippe Brucker errno = 0; 5566078a454SJean-Philippe Brucker group_id = strtoul(group_name, NULL, 10); 5576078a454SJean-Philippe Brucker if (errno) 5586078a454SJean-Philippe Brucker goto out_close; 5596078a454SJean-Philippe Brucker 5606078a454SJean-Philippe Brucker list_for_each_entry(group, &vfio_groups, list) { 5616078a454SJean-Philippe Brucker if (group->id == group_id) { 5626078a454SJean-Philippe Brucker group->refs++; 5636078a454SJean-Philippe Brucker return group; 5646078a454SJean-Philippe Brucker } 5656078a454SJean-Philippe Brucker } 5666078a454SJean-Philippe Brucker 5676078a454SJean-Philippe Brucker group = vfio_group_create(kvm, group_id); 5686078a454SJean-Philippe Brucker 5696078a454SJean-Philippe Brucker out_close: 5706078a454SJean-Philippe Brucker close(dirfd); 5716078a454SJean-Philippe Brucker return group; 5726078a454SJean-Philippe Brucker } 5736078a454SJean-Philippe Brucker 5746078a454SJean-Philippe Brucker static int vfio_device_init(struct kvm *kvm, struct vfio_device *vdev) 5756078a454SJean-Philippe Brucker { 5766078a454SJean-Philippe Brucker int ret; 5776078a454SJean-Philippe Brucker char dev_path[PATH_MAX]; 5786078a454SJean-Philippe Brucker struct vfio_group *group; 5796078a454SJean-Philippe Brucker 5806078a454SJean-Philippe Brucker ret = snprintf(dev_path, PATH_MAX, "/sys/bus/%s/devices/%s", 5816078a454SJean-Philippe Brucker vdev->params->bus, vdev->params->name); 5826078a454SJean-Philippe Brucker if (ret < 0 || ret == PATH_MAX) 5836078a454SJean-Philippe Brucker return -EINVAL; 5846078a454SJean-Philippe Brucker 5856078a454SJean-Philippe Brucker vdev->sysfs_path = strndup(dev_path, PATH_MAX); 5866078a454SJean-Philippe Brucker if (!vdev->sysfs_path) 5876078a454SJean-Philippe Brucker return -errno; 5886078a454SJean-Philippe Brucker 5896078a454SJean-Philippe Brucker group = vfio_group_get_for_dev(kvm, vdev); 5906078a454SJean-Philippe Brucker if (!group) { 5916078a454SJean-Philippe Brucker free(vdev->sysfs_path); 5926078a454SJean-Philippe Brucker return -EINVAL; 5936078a454SJean-Philippe Brucker } 5946078a454SJean-Philippe Brucker 5956078a454SJean-Philippe Brucker vdev->group = group; 5966078a454SJean-Philippe Brucker 5976078a454SJean-Philippe Brucker return 0; 5986078a454SJean-Philippe Brucker } 5996078a454SJean-Philippe Brucker 6006078a454SJean-Philippe Brucker static void vfio_device_exit(struct kvm *kvm, struct vfio_device *vdev) 6016078a454SJean-Philippe Brucker { 6026078a454SJean-Philippe Brucker vfio_group_exit(kvm, vdev->group); 6036078a454SJean-Philippe Brucker 6046078a454SJean-Philippe Brucker switch (vdev->params->type) { 6056078a454SJean-Philippe Brucker case VFIO_DEVICE_PCI: 6066078a454SJean-Philippe Brucker vfio_pci_teardown_device(kvm, vdev); 6076078a454SJean-Philippe Brucker break; 6086078a454SJean-Philippe Brucker default: 6096078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "no teardown function for device"); 6106078a454SJean-Philippe Brucker } 6116078a454SJean-Philippe Brucker 6126078a454SJean-Philippe Brucker close(vdev->fd); 6136078a454SJean-Philippe Brucker 6146078a454SJean-Philippe Brucker free(vdev->regions); 6156078a454SJean-Philippe Brucker free(vdev->sysfs_path); 6166078a454SJean-Philippe Brucker } 6176078a454SJean-Philippe Brucker 6186078a454SJean-Philippe Brucker static int vfio_container_init(struct kvm *kvm) 6196078a454SJean-Philippe Brucker { 6206078a454SJean-Philippe Brucker int api, i, ret, iommu_type;; 6216078a454SJean-Philippe Brucker 6226078a454SJean-Philippe Brucker /* Create a container for our IOMMU groups */ 6236078a454SJean-Philippe Brucker vfio_container = open(VFIO_DEV_NODE, O_RDWR); 6246078a454SJean-Philippe Brucker if (vfio_container == -1) { 6256078a454SJean-Philippe Brucker ret = errno; 6266078a454SJean-Philippe Brucker pr_err("Failed to open %s", VFIO_DEV_NODE); 6276078a454SJean-Philippe Brucker return ret; 6286078a454SJean-Philippe Brucker } 6296078a454SJean-Philippe Brucker 6306078a454SJean-Philippe Brucker api = ioctl(vfio_container, VFIO_GET_API_VERSION); 6316078a454SJean-Philippe Brucker if (api != VFIO_API_VERSION) { 6326078a454SJean-Philippe Brucker pr_err("Unknown VFIO API version %d", api); 6336078a454SJean-Philippe Brucker return -ENODEV; 6346078a454SJean-Philippe Brucker } 6356078a454SJean-Philippe Brucker 6366078a454SJean-Philippe Brucker iommu_type = vfio_get_iommu_type(); 6376078a454SJean-Philippe Brucker if (iommu_type < 0) { 6386078a454SJean-Philippe Brucker pr_err("VFIO type-1 IOMMU not supported on this platform"); 6396078a454SJean-Philippe Brucker return iommu_type; 6406078a454SJean-Philippe Brucker } 6416078a454SJean-Philippe Brucker 6426078a454SJean-Philippe Brucker /* Create groups for our devices and add them to the container */ 6436078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) { 6446078a454SJean-Philippe Brucker vfio_devices[i].params = &kvm->cfg.vfio_devices[i]; 6456078a454SJean-Philippe Brucker 6466078a454SJean-Philippe Brucker ret = vfio_device_init(kvm, &vfio_devices[i]); 6476078a454SJean-Philippe Brucker if (ret) 6486078a454SJean-Philippe Brucker return ret; 6496078a454SJean-Philippe Brucker } 6506078a454SJean-Philippe Brucker 6516078a454SJean-Philippe Brucker /* Finalise the container */ 6526078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_SET_IOMMU, iommu_type)) { 6536078a454SJean-Philippe Brucker ret = -errno; 6546078a454SJean-Philippe Brucker pr_err("Failed to set IOMMU type %d for VFIO container", 6556078a454SJean-Philippe Brucker iommu_type); 6566078a454SJean-Philippe Brucker return ret; 6576078a454SJean-Philippe Brucker } else { 6586078a454SJean-Philippe Brucker pr_info("Using IOMMU type %d for VFIO container", iommu_type); 6596078a454SJean-Philippe Brucker } 6606078a454SJean-Philippe Brucker 6616078a454SJean-Philippe Brucker return kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_map_mem_bank, 6626078a454SJean-Philippe Brucker NULL); 6636078a454SJean-Philippe Brucker } 6646078a454SJean-Philippe Brucker 6656078a454SJean-Philippe Brucker static int vfio__init(struct kvm *kvm) 6666078a454SJean-Philippe Brucker { 6676078a454SJean-Philippe Brucker int ret; 6686078a454SJean-Philippe Brucker 6696078a454SJean-Philippe Brucker if (!kvm->cfg.num_vfio_devices) 6706078a454SJean-Philippe Brucker return 0; 6716078a454SJean-Philippe Brucker 6726078a454SJean-Philippe Brucker vfio_devices = calloc(kvm->cfg.num_vfio_devices, sizeof(*vfio_devices)); 6736078a454SJean-Philippe Brucker if (!vfio_devices) 6746078a454SJean-Philippe Brucker return -ENOMEM; 6756078a454SJean-Philippe Brucker 6766078a454SJean-Philippe Brucker ret = vfio_container_init(kvm); 6776078a454SJean-Philippe Brucker if (ret) 6786078a454SJean-Philippe Brucker return ret; 6796078a454SJean-Philippe Brucker 68041d773e2SJean-Philippe Brucker ret = vfio_configure_groups(kvm); 68141d773e2SJean-Philippe Brucker if (ret) 68241d773e2SJean-Philippe Brucker return ret; 68341d773e2SJean-Philippe Brucker 6846078a454SJean-Philippe Brucker ret = vfio_configure_devices(kvm); 6856078a454SJean-Philippe Brucker if (ret) 6866078a454SJean-Philippe Brucker return ret; 6876078a454SJean-Philippe Brucker 6886078a454SJean-Philippe Brucker return 0; 6896078a454SJean-Philippe Brucker } 6906078a454SJean-Philippe Brucker dev_base_init(vfio__init); 6916078a454SJean-Philippe Brucker 6926078a454SJean-Philippe Brucker static int vfio__exit(struct kvm *kvm) 6936078a454SJean-Philippe Brucker { 6946078a454SJean-Philippe Brucker int i; 6956078a454SJean-Philippe Brucker 6966078a454SJean-Philippe Brucker if (!kvm->cfg.num_vfio_devices) 6976078a454SJean-Philippe Brucker return 0; 6986078a454SJean-Philippe Brucker 6996078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; i++) 7006078a454SJean-Philippe Brucker vfio_device_exit(kvm, &vfio_devices[i]); 7016078a454SJean-Philippe Brucker 7026078a454SJean-Philippe Brucker free(vfio_devices); 7036078a454SJean-Philippe Brucker 7046078a454SJean-Philippe Brucker kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_unmap_mem_bank, NULL); 7056078a454SJean-Philippe Brucker close(vfio_container); 7066078a454SJean-Philippe Brucker 7076078a454SJean-Philippe Brucker free(kvm->cfg.vfio_devices); 7086078a454SJean-Philippe Brucker 7096078a454SJean-Philippe Brucker return 0; 7106078a454SJean-Philippe Brucker } 7116078a454SJean-Philippe Brucker dev_base_exit(vfio__exit); 712