16078a454SJean-Philippe Brucker #include "kvm/kvm.h"
26078a454SJean-Philippe Brucker #include "kvm/vfio.h"
382caa882SJean-Philippe Brucker #include "kvm/ioport.h"
46078a454SJean-Philippe Brucker
56078a454SJean-Philippe Brucker #include <linux/list.h>
66078a454SJean-Philippe Brucker
76078a454SJean-Philippe Brucker #define VFIO_DEV_DIR "/dev/vfio"
86078a454SJean-Philippe Brucker #define VFIO_DEV_NODE VFIO_DEV_DIR "/vfio"
96078a454SJean-Philippe Brucker #define IOMMU_GROUP_DIR "/sys/kernel/iommu_groups"
106078a454SJean-Philippe Brucker
116078a454SJean-Philippe Brucker static int vfio_container;
126078a454SJean-Philippe Brucker static LIST_HEAD(vfio_groups);
136078a454SJean-Philippe Brucker static struct vfio_device *vfio_devices;
146078a454SJean-Philippe Brucker
vfio_device_pci_parser(const struct option * opt,char * arg,struct vfio_device_params * dev)156078a454SJean-Philippe Brucker static int vfio_device_pci_parser(const struct option *opt, char *arg,
166078a454SJean-Philippe Brucker struct vfio_device_params *dev)
176078a454SJean-Philippe Brucker {
186078a454SJean-Philippe Brucker unsigned int domain, bus, devnr, fn;
196078a454SJean-Philippe Brucker
206078a454SJean-Philippe Brucker int nr = sscanf(arg, "%4x:%2x:%2x.%1x", &domain, &bus, &devnr, &fn);
216078a454SJean-Philippe Brucker if (nr < 4) {
226078a454SJean-Philippe Brucker domain = 0;
236078a454SJean-Philippe Brucker nr = sscanf(arg, "%2x:%2x.%1x", &bus, &devnr, &fn);
246078a454SJean-Philippe Brucker if (nr < 3) {
256078a454SJean-Philippe Brucker pr_err("Invalid device identifier %s", arg);
266078a454SJean-Philippe Brucker return -EINVAL;
276078a454SJean-Philippe Brucker }
286078a454SJean-Philippe Brucker }
296078a454SJean-Philippe Brucker
306078a454SJean-Philippe Brucker dev->type = VFIO_DEVICE_PCI;
316078a454SJean-Philippe Brucker dev->bus = "pci";
326078a454SJean-Philippe Brucker dev->name = malloc(13);
336078a454SJean-Philippe Brucker if (!dev->name)
346078a454SJean-Philippe Brucker return -ENOMEM;
356078a454SJean-Philippe Brucker
366078a454SJean-Philippe Brucker snprintf(dev->name, 13, "%04x:%02x:%02x.%x", domain, bus, devnr, fn);
376078a454SJean-Philippe Brucker
386078a454SJean-Philippe Brucker return 0;
396078a454SJean-Philippe Brucker }
406078a454SJean-Philippe Brucker
vfio_device_parser(const struct option * opt,const char * arg,int unset)416078a454SJean-Philippe Brucker int vfio_device_parser(const struct option *opt, const char *arg, int unset)
426078a454SJean-Philippe Brucker {
436078a454SJean-Philippe Brucker int ret = -EINVAL;
446078a454SJean-Philippe Brucker static int idx = 0;
456078a454SJean-Philippe Brucker struct kvm *kvm = opt->ptr;
466078a454SJean-Philippe Brucker struct vfio_device_params *dev, *devs;
476078a454SJean-Philippe Brucker char *cur, *buf = strdup(arg);
486078a454SJean-Philippe Brucker
496078a454SJean-Philippe Brucker if (!buf)
506078a454SJean-Philippe Brucker return -ENOMEM;
516078a454SJean-Philippe Brucker
526078a454SJean-Philippe Brucker if (idx >= MAX_VFIO_DEVICES) {
536078a454SJean-Philippe Brucker pr_warning("Too many VFIO devices");
546078a454SJean-Philippe Brucker goto out_free_buf;
556078a454SJean-Philippe Brucker }
566078a454SJean-Philippe Brucker
576078a454SJean-Philippe Brucker devs = realloc(kvm->cfg.vfio_devices, sizeof(*dev) * (idx + 1));
586078a454SJean-Philippe Brucker if (!devs) {
596078a454SJean-Philippe Brucker ret = -ENOMEM;
606078a454SJean-Philippe Brucker goto out_free_buf;
616078a454SJean-Philippe Brucker }
626078a454SJean-Philippe Brucker
636078a454SJean-Philippe Brucker kvm->cfg.vfio_devices = devs;
646078a454SJean-Philippe Brucker dev = &devs[idx];
656078a454SJean-Philippe Brucker
666078a454SJean-Philippe Brucker cur = strtok(buf, ",");
676078a454SJean-Philippe Brucker if (!cur)
686078a454SJean-Philippe Brucker goto out_free_buf;
696078a454SJean-Philippe Brucker
706078a454SJean-Philippe Brucker if (!strcmp(opt->long_name, "vfio-pci"))
716078a454SJean-Philippe Brucker ret = vfio_device_pci_parser(opt, cur, dev);
726078a454SJean-Philippe Brucker else
736078a454SJean-Philippe Brucker ret = -EINVAL;
746078a454SJean-Philippe Brucker
756078a454SJean-Philippe Brucker if (!ret)
766078a454SJean-Philippe Brucker kvm->cfg.num_vfio_devices = ++idx;
776078a454SJean-Philippe Brucker
786078a454SJean-Philippe Brucker out_free_buf:
796078a454SJean-Philippe Brucker free(buf);
806078a454SJean-Philippe Brucker
816078a454SJean-Philippe Brucker return ret;
826078a454SJean-Philippe Brucker }
836078a454SJean-Philippe Brucker
vfio_ioport_in(struct vfio_region * region,u32 offset,void * data,int len)84*579bc61fSAndre Przywara static bool vfio_ioport_in(struct vfio_region *region, u32 offset,
85a4a0dac7SAndre Przywara void *data, int len)
8682caa882SJean-Philippe Brucker {
8782caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev;
88a4a0dac7SAndre Przywara ssize_t nr;
89a4a0dac7SAndre Przywara u32 val;
9082caa882SJean-Philippe Brucker
9182caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ))
9282caa882SJean-Philippe Brucker return false;
9382caa882SJean-Philippe Brucker
9482caa882SJean-Philippe Brucker nr = pread(vdev->fd, &val, len, region->info.offset + offset);
9582caa882SJean-Philippe Brucker if (nr != len) {
9682caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not read %d bytes from I/O port 0x%x\n",
97a4a0dac7SAndre Przywara len, offset + region->port_base);
9882caa882SJean-Philippe Brucker return false;
9982caa882SJean-Philippe Brucker }
10082caa882SJean-Philippe Brucker
10182caa882SJean-Philippe Brucker switch (len) {
10282caa882SJean-Philippe Brucker case 1:
10382caa882SJean-Philippe Brucker ioport__write8(data, val);
10482caa882SJean-Philippe Brucker break;
10582caa882SJean-Philippe Brucker case 2:
10682caa882SJean-Philippe Brucker ioport__write16(data, val);
10782caa882SJean-Philippe Brucker break;
10882caa882SJean-Philippe Brucker case 4:
10982caa882SJean-Philippe Brucker ioport__write32(data, val);
11082caa882SJean-Philippe Brucker break;
11182caa882SJean-Philippe Brucker default:
11282caa882SJean-Philippe Brucker return false;
11382caa882SJean-Philippe Brucker }
11482caa882SJean-Philippe Brucker
11582caa882SJean-Philippe Brucker return true;
11682caa882SJean-Philippe Brucker }
11782caa882SJean-Philippe Brucker
vfio_ioport_out(struct vfio_region * region,u32 offset,void * data,int len)118*579bc61fSAndre Przywara static bool vfio_ioport_out(struct vfio_region *region, u32 offset,
119a4a0dac7SAndre Przywara void *data, int len)
12082caa882SJean-Philippe Brucker {
12182caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev;
122a4a0dac7SAndre Przywara ssize_t nr;
123a4a0dac7SAndre Przywara u32 val;
12482caa882SJean-Philippe Brucker
12582caa882SJean-Philippe Brucker
12682caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE))
12782caa882SJean-Philippe Brucker return false;
12882caa882SJean-Philippe Brucker
12982caa882SJean-Philippe Brucker switch (len) {
13082caa882SJean-Philippe Brucker case 1:
13182caa882SJean-Philippe Brucker val = ioport__read8(data);
13282caa882SJean-Philippe Brucker break;
13382caa882SJean-Philippe Brucker case 2:
13482caa882SJean-Philippe Brucker val = ioport__read16(data);
13582caa882SJean-Philippe Brucker break;
13682caa882SJean-Philippe Brucker case 4:
13782caa882SJean-Philippe Brucker val = ioport__read32(data);
13882caa882SJean-Philippe Brucker break;
13982caa882SJean-Philippe Brucker default:
14082caa882SJean-Philippe Brucker return false;
14182caa882SJean-Philippe Brucker }
14282caa882SJean-Philippe Brucker
14382caa882SJean-Philippe Brucker nr = pwrite(vdev->fd, &val, len, region->info.offset + offset);
14482caa882SJean-Philippe Brucker if (nr != len)
14582caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not write %d bytes to I/O port 0x%x",
146a4a0dac7SAndre Przywara len, offset + region->port_base);
14782caa882SJean-Philippe Brucker
14882caa882SJean-Philippe Brucker return nr == len;
14982caa882SJean-Philippe Brucker }
15082caa882SJean-Philippe Brucker
vfio_ioport_mmio(struct kvm_cpu * vcpu,u64 addr,u8 * data,u32 len,u8 is_write,void * ptr)151a4a0dac7SAndre Przywara static void vfio_ioport_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len,
152a4a0dac7SAndre Przywara u8 is_write, void *ptr)
153a4a0dac7SAndre Przywara {
154a4a0dac7SAndre Przywara struct vfio_region *region = ptr;
155a4a0dac7SAndre Przywara u32 offset = addr - region->port_base;
156a4a0dac7SAndre Przywara
157a4a0dac7SAndre Przywara if (is_write)
158*579bc61fSAndre Przywara vfio_ioport_out(region, offset, data, len);
159a4a0dac7SAndre Przywara else
160*579bc61fSAndre Przywara vfio_ioport_in(region, offset, data, len);
161a4a0dac7SAndre Przywara }
162a4a0dac7SAndre Przywara
vfio_mmio_access(struct kvm_cpu * vcpu,u64 addr,u8 * data,u32 len,u8 is_write,void * ptr)16382caa882SJean-Philippe Brucker static void vfio_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len,
16482caa882SJean-Philippe Brucker u8 is_write, void *ptr)
16582caa882SJean-Philippe Brucker {
16682caa882SJean-Philippe Brucker u64 val;
16782caa882SJean-Philippe Brucker ssize_t nr;
16882caa882SJean-Philippe Brucker struct vfio_region *region = ptr;
16982caa882SJean-Philippe Brucker struct vfio_device *vdev = region->vdev;
17082caa882SJean-Philippe Brucker
17182caa882SJean-Philippe Brucker u32 offset = addr - region->guest_phys_addr;
17282caa882SJean-Philippe Brucker
17382caa882SJean-Philippe Brucker if (len < 1 || len > 8)
17482caa882SJean-Philippe Brucker goto err_report;
17582caa882SJean-Philippe Brucker
17682caa882SJean-Philippe Brucker if (is_write) {
17782caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE))
17882caa882SJean-Philippe Brucker goto err_report;
17982caa882SJean-Philippe Brucker
18082caa882SJean-Philippe Brucker memcpy(&val, data, len);
18182caa882SJean-Philippe Brucker
18282caa882SJean-Philippe Brucker nr = pwrite(vdev->fd, &val, len, region->info.offset + offset);
18382caa882SJean-Philippe Brucker if ((u32)nr != len)
18482caa882SJean-Philippe Brucker goto err_report;
18582caa882SJean-Philippe Brucker } else {
18682caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ))
18782caa882SJean-Philippe Brucker goto err_report;
18882caa882SJean-Philippe Brucker
18982caa882SJean-Philippe Brucker nr = pread(vdev->fd, &val, len, region->info.offset + offset);
19082caa882SJean-Philippe Brucker if ((u32)nr != len)
19182caa882SJean-Philippe Brucker goto err_report;
19282caa882SJean-Philippe Brucker
19382caa882SJean-Philippe Brucker memcpy(data, &val, len);
19482caa882SJean-Philippe Brucker }
19582caa882SJean-Philippe Brucker
19682caa882SJean-Philippe Brucker return;
19782caa882SJean-Philippe Brucker
19882caa882SJean-Philippe Brucker err_report:
19982caa882SJean-Philippe Brucker vfio_dev_err(vdev, "could not %s %u bytes at 0x%x (0x%llx)", is_write ?
20082caa882SJean-Philippe Brucker "write" : "read", len, offset, addr);
20182caa882SJean-Philippe Brucker }
20282caa882SJean-Philippe Brucker
vfio_setup_trap_region(struct kvm * kvm,struct vfio_device * vdev,struct vfio_region * region)20382caa882SJean-Philippe Brucker static int vfio_setup_trap_region(struct kvm *kvm, struct vfio_device *vdev,
20482caa882SJean-Philippe Brucker struct vfio_region *region)
20582caa882SJean-Philippe Brucker {
20682caa882SJean-Philippe Brucker if (region->is_ioport) {
207*579bc61fSAndre Przywara int port;
208*579bc61fSAndre Przywara
209*579bc61fSAndre Przywara port = kvm__register_pio(kvm, region->port_base,
210*579bc61fSAndre Przywara region->info.size, vfio_ioport_mmio,
211a05e576fSAlexandru Elisei region);
21282caa882SJean-Philippe Brucker if (port < 0)
21382caa882SJean-Philippe Brucker return port;
21482caa882SJean-Philippe Brucker return 0;
21582caa882SJean-Philippe Brucker }
21682caa882SJean-Philippe Brucker
21782caa882SJean-Philippe Brucker return kvm__register_mmio(kvm, region->guest_phys_addr,
21882caa882SJean-Philippe Brucker region->info.size, false, vfio_mmio_access,
21982caa882SJean-Philippe Brucker region);
22082caa882SJean-Philippe Brucker }
22182caa882SJean-Philippe Brucker
vfio_map_region(struct kvm * kvm,struct vfio_device * vdev,struct vfio_region * region)2226078a454SJean-Philippe Brucker int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev,
2236078a454SJean-Philippe Brucker struct vfio_region *region)
2246078a454SJean-Philippe Brucker {
2256078a454SJean-Philippe Brucker void *base;
2266078a454SJean-Philippe Brucker int ret, prot = 0;
2276078a454SJean-Philippe Brucker /* KVM needs page-aligned regions */
2286078a454SJean-Philippe Brucker u64 map_size = ALIGN(region->info.size, PAGE_SIZE);
2296078a454SJean-Philippe Brucker
23082caa882SJean-Philippe Brucker if (!(region->info.flags & VFIO_REGION_INFO_FLAG_MMAP))
23182caa882SJean-Philippe Brucker return vfio_setup_trap_region(kvm, vdev, region);
2326078a454SJean-Philippe Brucker
233b4fc4f60SAlexandru Elisei /*
234b4fc4f60SAlexandru Elisei * KVM_SET_USER_MEMORY_REGION will fail because the guest physical
235b4fc4f60SAlexandru Elisei * address isn't page aligned, let's emulate the region ourselves.
236b4fc4f60SAlexandru Elisei */
237b4fc4f60SAlexandru Elisei if (region->guest_phys_addr & (PAGE_SIZE - 1))
238b4fc4f60SAlexandru Elisei return kvm__register_mmio(kvm, region->guest_phys_addr,
239b4fc4f60SAlexandru Elisei region->info.size, false,
240b4fc4f60SAlexandru Elisei vfio_mmio_access, region);
241b4fc4f60SAlexandru Elisei
2426078a454SJean-Philippe Brucker if (region->info.flags & VFIO_REGION_INFO_FLAG_READ)
2436078a454SJean-Philippe Brucker prot |= PROT_READ;
2446078a454SJean-Philippe Brucker if (region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
2456078a454SJean-Philippe Brucker prot |= PROT_WRITE;
2466078a454SJean-Philippe Brucker
2476078a454SJean-Philippe Brucker base = mmap(NULL, region->info.size, prot, MAP_SHARED, vdev->fd,
2486078a454SJean-Philippe Brucker region->info.offset);
2496078a454SJean-Philippe Brucker if (base == MAP_FAILED) {
25082caa882SJean-Philippe Brucker /* TODO: support sparse mmap */
25182caa882SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to mmap region %u (0x%llx bytes), falling back to trapping",
2526078a454SJean-Philippe Brucker region->info.index, region->info.size);
25382caa882SJean-Philippe Brucker return vfio_setup_trap_region(kvm, vdev, region);
2546078a454SJean-Philippe Brucker }
2556078a454SJean-Philippe Brucker region->host_addr = base;
2566078a454SJean-Philippe Brucker
2576078a454SJean-Philippe Brucker ret = kvm__register_dev_mem(kvm, region->guest_phys_addr, map_size,
2586078a454SJean-Philippe Brucker region->host_addr);
2596078a454SJean-Philippe Brucker if (ret) {
2606078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to register region with KVM");
2616078a454SJean-Philippe Brucker return ret;
2626078a454SJean-Philippe Brucker }
2636078a454SJean-Philippe Brucker
2646078a454SJean-Philippe Brucker return 0;
2656078a454SJean-Philippe Brucker }
2666078a454SJean-Philippe Brucker
vfio_unmap_region(struct kvm * kvm,struct vfio_region * region)2676078a454SJean-Philippe Brucker void vfio_unmap_region(struct kvm *kvm, struct vfio_region *region)
2686078a454SJean-Philippe Brucker {
2698d987725SAlexandru Elisei u64 map_size;
2708d987725SAlexandru Elisei
27182caa882SJean-Philippe Brucker if (region->host_addr) {
2728d987725SAlexandru Elisei map_size = ALIGN(region->info.size, PAGE_SIZE);
2738d987725SAlexandru Elisei kvm__destroy_mem(kvm, region->guest_phys_addr, map_size,
2748d987725SAlexandru Elisei region->host_addr);
2756078a454SJean-Philippe Brucker munmap(region->host_addr, region->info.size);
2768d987725SAlexandru Elisei region->host_addr = NULL;
27782caa882SJean-Philippe Brucker } else if (region->is_ioport) {
278*579bc61fSAndre Przywara kvm__deregister_pio(kvm, region->port_base);
27982caa882SJean-Philippe Brucker } else {
28082caa882SJean-Philippe Brucker kvm__deregister_mmio(kvm, region->guest_phys_addr);
28182caa882SJean-Philippe Brucker }
2826078a454SJean-Philippe Brucker }
2836078a454SJean-Philippe Brucker
vfio_configure_device(struct kvm * kvm,struct vfio_device * vdev)2846078a454SJean-Philippe Brucker static int vfio_configure_device(struct kvm *kvm, struct vfio_device *vdev)
2856078a454SJean-Philippe Brucker {
2866078a454SJean-Philippe Brucker int ret;
2876078a454SJean-Philippe Brucker struct vfio_group *group = vdev->group;
2886078a454SJean-Philippe Brucker
2896078a454SJean-Philippe Brucker vdev->fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD,
2906078a454SJean-Philippe Brucker vdev->params->name);
2916078a454SJean-Philippe Brucker if (vdev->fd < 0) {
2926078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to get fd");
2936078a454SJean-Philippe Brucker
2946078a454SJean-Philippe Brucker /* The device might be a bridge without an fd */
2956078a454SJean-Philippe Brucker return 0;
2966078a454SJean-Philippe Brucker }
2976078a454SJean-Philippe Brucker
2986078a454SJean-Philippe Brucker vdev->info.argsz = sizeof(vdev->info);
2996078a454SJean-Philippe Brucker if (ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &vdev->info)) {
3006078a454SJean-Philippe Brucker ret = -errno;
3016078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to get info");
3026078a454SJean-Philippe Brucker goto err_close_device;
3036078a454SJean-Philippe Brucker }
3046078a454SJean-Philippe Brucker
3056078a454SJean-Philippe Brucker if (vdev->info.flags & VFIO_DEVICE_FLAGS_RESET &&
3066078a454SJean-Philippe Brucker ioctl(vdev->fd, VFIO_DEVICE_RESET) < 0)
3076078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to reset device");
3086078a454SJean-Philippe Brucker
3096078a454SJean-Philippe Brucker vdev->regions = calloc(vdev->info.num_regions, sizeof(*vdev->regions));
3106078a454SJean-Philippe Brucker if (!vdev->regions) {
3116078a454SJean-Philippe Brucker ret = -ENOMEM;
3126078a454SJean-Philippe Brucker goto err_close_device;
3136078a454SJean-Philippe Brucker }
3146078a454SJean-Philippe Brucker
3156078a454SJean-Philippe Brucker /* Now for the bus-specific initialization... */
3166078a454SJean-Philippe Brucker switch (vdev->params->type) {
3176078a454SJean-Philippe Brucker case VFIO_DEVICE_PCI:
3186078a454SJean-Philippe Brucker BUG_ON(!(vdev->info.flags & VFIO_DEVICE_FLAGS_PCI));
3196078a454SJean-Philippe Brucker ret = vfio_pci_setup_device(kvm, vdev);
3206078a454SJean-Philippe Brucker break;
3216078a454SJean-Philippe Brucker default:
3226078a454SJean-Philippe Brucker BUG_ON(1);
3236078a454SJean-Philippe Brucker ret = -EINVAL;
3246078a454SJean-Philippe Brucker }
3256078a454SJean-Philippe Brucker
3266078a454SJean-Philippe Brucker if (ret)
3276078a454SJean-Philippe Brucker goto err_free_regions;
3286078a454SJean-Philippe Brucker
3296078a454SJean-Philippe Brucker vfio_dev_info(vdev, "assigned to device number 0x%x in group %lu",
3306078a454SJean-Philippe Brucker vdev->dev_hdr.dev_num, group->id);
3316078a454SJean-Philippe Brucker
3326078a454SJean-Philippe Brucker return 0;
3336078a454SJean-Philippe Brucker
3346078a454SJean-Philippe Brucker err_free_regions:
3356078a454SJean-Philippe Brucker free(vdev->regions);
3366078a454SJean-Philippe Brucker err_close_device:
3376078a454SJean-Philippe Brucker close(vdev->fd);
3386078a454SJean-Philippe Brucker
3396078a454SJean-Philippe Brucker return ret;
3406078a454SJean-Philippe Brucker }
3416078a454SJean-Philippe Brucker
vfio_configure_devices(struct kvm * kvm)3426078a454SJean-Philippe Brucker static int vfio_configure_devices(struct kvm *kvm)
3436078a454SJean-Philippe Brucker {
3446078a454SJean-Philippe Brucker int i, ret;
3456078a454SJean-Philippe Brucker
3466078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) {
3476078a454SJean-Philippe Brucker ret = vfio_configure_device(kvm, &vfio_devices[i]);
3486078a454SJean-Philippe Brucker if (ret)
3496078a454SJean-Philippe Brucker return ret;
3506078a454SJean-Philippe Brucker }
3516078a454SJean-Philippe Brucker
3526078a454SJean-Philippe Brucker return 0;
3536078a454SJean-Philippe Brucker }
3546078a454SJean-Philippe Brucker
vfio_get_iommu_type(void)3556078a454SJean-Philippe Brucker static int vfio_get_iommu_type(void)
3566078a454SJean-Philippe Brucker {
3576078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU))
3586078a454SJean-Philippe Brucker return VFIO_TYPE1v2_IOMMU;
3596078a454SJean-Philippe Brucker
3606078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU))
3616078a454SJean-Philippe Brucker return VFIO_TYPE1_IOMMU;
3626078a454SJean-Philippe Brucker
3636078a454SJean-Philippe Brucker return -ENODEV;
3646078a454SJean-Philippe Brucker }
3656078a454SJean-Philippe Brucker
vfio_map_mem_bank(struct kvm * kvm,struct kvm_mem_bank * bank,void * data)3666078a454SJean-Philippe Brucker static int vfio_map_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data)
3676078a454SJean-Philippe Brucker {
3686078a454SJean-Philippe Brucker int ret = 0;
3696078a454SJean-Philippe Brucker struct vfio_iommu_type1_dma_map dma_map = {
3706078a454SJean-Philippe Brucker .argsz = sizeof(dma_map),
3716078a454SJean-Philippe Brucker .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
3726078a454SJean-Philippe Brucker .vaddr = (unsigned long)bank->host_addr,
3736078a454SJean-Philippe Brucker .iova = (u64)bank->guest_phys_addr,
3746078a454SJean-Philippe Brucker .size = bank->size,
3756078a454SJean-Philippe Brucker };
3766078a454SJean-Philippe Brucker
3776078a454SJean-Philippe Brucker /* Map the guest memory for DMA (i.e. provide isolation) */
3786078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
3796078a454SJean-Philippe Brucker ret = -errno;
3806078a454SJean-Philippe Brucker pr_err("Failed to map 0x%llx -> 0x%llx (%llu) for DMA",
3816078a454SJean-Philippe Brucker dma_map.iova, dma_map.vaddr, dma_map.size);
3826078a454SJean-Philippe Brucker }
3836078a454SJean-Philippe Brucker
3846078a454SJean-Philippe Brucker return ret;
3856078a454SJean-Philippe Brucker }
3866078a454SJean-Philippe Brucker
vfio_unmap_mem_bank(struct kvm * kvm,struct kvm_mem_bank * bank,void * data)3876078a454SJean-Philippe Brucker static int vfio_unmap_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data)
3886078a454SJean-Philippe Brucker {
3896078a454SJean-Philippe Brucker struct vfio_iommu_type1_dma_unmap dma_unmap = {
3906078a454SJean-Philippe Brucker .argsz = sizeof(dma_unmap),
3916078a454SJean-Philippe Brucker .size = bank->size,
3926078a454SJean-Philippe Brucker .iova = bank->guest_phys_addr,
3936078a454SJean-Philippe Brucker };
3946078a454SJean-Philippe Brucker
3956078a454SJean-Philippe Brucker ioctl(vfio_container, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
3966078a454SJean-Philippe Brucker
3976078a454SJean-Philippe Brucker return 0;
3986078a454SJean-Philippe Brucker }
3996078a454SJean-Philippe Brucker
vfio_configure_reserved_regions(struct kvm * kvm,struct vfio_group * group)40041d773e2SJean-Philippe Brucker static int vfio_configure_reserved_regions(struct kvm *kvm,
40141d773e2SJean-Philippe Brucker struct vfio_group *group)
40241d773e2SJean-Philippe Brucker {
40341d773e2SJean-Philippe Brucker FILE *file;
40441d773e2SJean-Philippe Brucker int ret = 0;
40541d773e2SJean-Philippe Brucker char type[9];
40641d773e2SJean-Philippe Brucker char filename[PATH_MAX];
40741d773e2SJean-Philippe Brucker unsigned long long start, end;
40841d773e2SJean-Philippe Brucker
40941d773e2SJean-Philippe Brucker snprintf(filename, PATH_MAX, IOMMU_GROUP_DIR "/%lu/reserved_regions",
41041d773e2SJean-Philippe Brucker group->id);
41141d773e2SJean-Philippe Brucker
41241d773e2SJean-Philippe Brucker /* reserved_regions might not be present on older systems */
41341d773e2SJean-Philippe Brucker if (access(filename, F_OK))
41441d773e2SJean-Philippe Brucker return 0;
41541d773e2SJean-Philippe Brucker
41641d773e2SJean-Philippe Brucker file = fopen(filename, "r");
41741d773e2SJean-Philippe Brucker if (!file)
41841d773e2SJean-Philippe Brucker return -errno;
41941d773e2SJean-Philippe Brucker
42041d773e2SJean-Philippe Brucker while (fscanf(file, "0x%llx 0x%llx %8s\n", &start, &end, type) == 3) {
42141d773e2SJean-Philippe Brucker ret = kvm__reserve_mem(kvm, start, end - start + 1);
42241d773e2SJean-Philippe Brucker if (ret)
42341d773e2SJean-Philippe Brucker break;
42441d773e2SJean-Philippe Brucker }
42541d773e2SJean-Philippe Brucker
42641d773e2SJean-Philippe Brucker fclose(file);
42741d773e2SJean-Philippe Brucker
42841d773e2SJean-Philippe Brucker return ret;
42941d773e2SJean-Philippe Brucker }
43041d773e2SJean-Philippe Brucker
vfio_configure_groups(struct kvm * kvm)43141d773e2SJean-Philippe Brucker static int vfio_configure_groups(struct kvm *kvm)
43241d773e2SJean-Philippe Brucker {
43341d773e2SJean-Philippe Brucker int ret;
43441d773e2SJean-Philippe Brucker struct vfio_group *group;
43541d773e2SJean-Philippe Brucker
43641d773e2SJean-Philippe Brucker list_for_each_entry(group, &vfio_groups, list) {
43741d773e2SJean-Philippe Brucker ret = vfio_configure_reserved_regions(kvm, group);
43841d773e2SJean-Philippe Brucker if (ret)
43941d773e2SJean-Philippe Brucker return ret;
44041d773e2SJean-Philippe Brucker }
44141d773e2SJean-Philippe Brucker
44241d773e2SJean-Philippe Brucker return 0;
44341d773e2SJean-Philippe Brucker }
44441d773e2SJean-Philippe Brucker
vfio_group_create(struct kvm * kvm,unsigned long id)4456078a454SJean-Philippe Brucker static struct vfio_group *vfio_group_create(struct kvm *kvm, unsigned long id)
4466078a454SJean-Philippe Brucker {
4476078a454SJean-Philippe Brucker int ret;
4486078a454SJean-Philippe Brucker struct vfio_group *group;
4496078a454SJean-Philippe Brucker char group_node[PATH_MAX];
4506078a454SJean-Philippe Brucker struct vfio_group_status group_status = {
4516078a454SJean-Philippe Brucker .argsz = sizeof(group_status),
4526078a454SJean-Philippe Brucker };
4536078a454SJean-Philippe Brucker
4546078a454SJean-Philippe Brucker group = calloc(1, sizeof(*group));
4556078a454SJean-Philippe Brucker if (!group)
4566078a454SJean-Philippe Brucker return NULL;
4576078a454SJean-Philippe Brucker
4586078a454SJean-Philippe Brucker group->id = id;
4596078a454SJean-Philippe Brucker group->refs = 1;
4606078a454SJean-Philippe Brucker
4616078a454SJean-Philippe Brucker ret = snprintf(group_node, PATH_MAX, VFIO_DEV_DIR "/%lu", id);
4626078a454SJean-Philippe Brucker if (ret < 0 || ret == PATH_MAX)
4636078a454SJean-Philippe Brucker return NULL;
4646078a454SJean-Philippe Brucker
4656078a454SJean-Philippe Brucker group->fd = open(group_node, O_RDWR);
4666078a454SJean-Philippe Brucker if (group->fd < 0) {
4676078a454SJean-Philippe Brucker pr_err("Failed to open IOMMU group %s", group_node);
4686078a454SJean-Philippe Brucker goto err_free_group;
4696078a454SJean-Philippe Brucker }
4706078a454SJean-Philippe Brucker
4716078a454SJean-Philippe Brucker if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &group_status)) {
4726078a454SJean-Philippe Brucker pr_err("Failed to determine status of IOMMU group %lu", id);
4736078a454SJean-Philippe Brucker goto err_close_group;
4746078a454SJean-Philippe Brucker }
4756078a454SJean-Philippe Brucker
4766078a454SJean-Philippe Brucker if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
4776078a454SJean-Philippe Brucker pr_err("IOMMU group %lu is not viable", id);
4786078a454SJean-Philippe Brucker goto err_close_group;
4796078a454SJean-Philippe Brucker }
4806078a454SJean-Philippe Brucker
4816078a454SJean-Philippe Brucker if (ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &vfio_container)) {
4826078a454SJean-Philippe Brucker pr_err("Failed to add IOMMU group %lu to VFIO container", id);
4836078a454SJean-Philippe Brucker goto err_close_group;
4846078a454SJean-Philippe Brucker }
4856078a454SJean-Philippe Brucker
4866078a454SJean-Philippe Brucker list_add(&group->list, &vfio_groups);
4876078a454SJean-Philippe Brucker
4886078a454SJean-Philippe Brucker return group;
4896078a454SJean-Philippe Brucker
4906078a454SJean-Philippe Brucker err_close_group:
4916078a454SJean-Philippe Brucker close(group->fd);
4926078a454SJean-Philippe Brucker err_free_group:
4936078a454SJean-Philippe Brucker free(group);
4946078a454SJean-Philippe Brucker
4956078a454SJean-Philippe Brucker return NULL;
4966078a454SJean-Philippe Brucker }
4976078a454SJean-Philippe Brucker
vfio_group_exit(struct kvm * kvm,struct vfio_group * group)4986078a454SJean-Philippe Brucker static void vfio_group_exit(struct kvm *kvm, struct vfio_group *group)
4996078a454SJean-Philippe Brucker {
5006078a454SJean-Philippe Brucker if (--group->refs != 0)
5016078a454SJean-Philippe Brucker return;
5026078a454SJean-Philippe Brucker
5036078a454SJean-Philippe Brucker ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER);
5046078a454SJean-Philippe Brucker
5056078a454SJean-Philippe Brucker list_del(&group->list);
5066078a454SJean-Philippe Brucker close(group->fd);
5076078a454SJean-Philippe Brucker free(group);
5086078a454SJean-Philippe Brucker }
5096078a454SJean-Philippe Brucker
5106078a454SJean-Philippe Brucker static struct vfio_group *
vfio_group_get_for_dev(struct kvm * kvm,struct vfio_device * vdev)5116078a454SJean-Philippe Brucker vfio_group_get_for_dev(struct kvm *kvm, struct vfio_device *vdev)
5126078a454SJean-Philippe Brucker {
5136078a454SJean-Philippe Brucker int dirfd;
5146078a454SJean-Philippe Brucker ssize_t ret;
5156078a454SJean-Philippe Brucker char *group_name;
5166078a454SJean-Philippe Brucker unsigned long group_id;
5176078a454SJean-Philippe Brucker char group_path[PATH_MAX];
5186078a454SJean-Philippe Brucker struct vfio_group *group = NULL;
5196078a454SJean-Philippe Brucker
5206078a454SJean-Philippe Brucker /* Find IOMMU group for this device */
5216078a454SJean-Philippe Brucker dirfd = open(vdev->sysfs_path, O_DIRECTORY | O_PATH | O_RDONLY);
5226078a454SJean-Philippe Brucker if (dirfd < 0) {
5236078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to open '%s'", vdev->sysfs_path);
5246078a454SJean-Philippe Brucker return NULL;
5256078a454SJean-Philippe Brucker }
5266078a454SJean-Philippe Brucker
5276078a454SJean-Philippe Brucker ret = readlinkat(dirfd, "iommu_group", group_path, PATH_MAX);
5286078a454SJean-Philippe Brucker if (ret < 0) {
5296078a454SJean-Philippe Brucker vfio_dev_err(vdev, "no iommu_group");
5306078a454SJean-Philippe Brucker goto out_close;
5316078a454SJean-Philippe Brucker }
5326078a454SJean-Philippe Brucker if (ret == PATH_MAX)
5336078a454SJean-Philippe Brucker goto out_close;
5346078a454SJean-Philippe Brucker
5356078a454SJean-Philippe Brucker group_path[ret] = '\0';
5366078a454SJean-Philippe Brucker
5376078a454SJean-Philippe Brucker group_name = basename(group_path);
5386078a454SJean-Philippe Brucker errno = 0;
5396078a454SJean-Philippe Brucker group_id = strtoul(group_name, NULL, 10);
5406078a454SJean-Philippe Brucker if (errno)
5416078a454SJean-Philippe Brucker goto out_close;
5426078a454SJean-Philippe Brucker
5436078a454SJean-Philippe Brucker list_for_each_entry(group, &vfio_groups, list) {
5446078a454SJean-Philippe Brucker if (group->id == group_id) {
5456078a454SJean-Philippe Brucker group->refs++;
5466078a454SJean-Philippe Brucker return group;
5476078a454SJean-Philippe Brucker }
5486078a454SJean-Philippe Brucker }
5496078a454SJean-Philippe Brucker
5506078a454SJean-Philippe Brucker group = vfio_group_create(kvm, group_id);
5516078a454SJean-Philippe Brucker
5526078a454SJean-Philippe Brucker out_close:
5536078a454SJean-Philippe Brucker close(dirfd);
5546078a454SJean-Philippe Brucker return group;
5556078a454SJean-Philippe Brucker }
5566078a454SJean-Philippe Brucker
vfio_device_init(struct kvm * kvm,struct vfio_device * vdev)5576078a454SJean-Philippe Brucker static int vfio_device_init(struct kvm *kvm, struct vfio_device *vdev)
5586078a454SJean-Philippe Brucker {
5596078a454SJean-Philippe Brucker int ret;
5606078a454SJean-Philippe Brucker char dev_path[PATH_MAX];
5616078a454SJean-Philippe Brucker struct vfio_group *group;
5626078a454SJean-Philippe Brucker
5636078a454SJean-Philippe Brucker ret = snprintf(dev_path, PATH_MAX, "/sys/bus/%s/devices/%s",
5646078a454SJean-Philippe Brucker vdev->params->bus, vdev->params->name);
5656078a454SJean-Philippe Brucker if (ret < 0 || ret == PATH_MAX)
5666078a454SJean-Philippe Brucker return -EINVAL;
5676078a454SJean-Philippe Brucker
5686078a454SJean-Philippe Brucker vdev->sysfs_path = strndup(dev_path, PATH_MAX);
5696078a454SJean-Philippe Brucker if (!vdev->sysfs_path)
5706078a454SJean-Philippe Brucker return -errno;
5716078a454SJean-Philippe Brucker
5726078a454SJean-Philippe Brucker group = vfio_group_get_for_dev(kvm, vdev);
5736078a454SJean-Philippe Brucker if (!group) {
5746078a454SJean-Philippe Brucker free(vdev->sysfs_path);
5756078a454SJean-Philippe Brucker return -EINVAL;
5766078a454SJean-Philippe Brucker }
5776078a454SJean-Philippe Brucker
5786078a454SJean-Philippe Brucker vdev->group = group;
5796078a454SJean-Philippe Brucker
5806078a454SJean-Philippe Brucker return 0;
5816078a454SJean-Philippe Brucker }
5826078a454SJean-Philippe Brucker
vfio_device_exit(struct kvm * kvm,struct vfio_device * vdev)5836078a454SJean-Philippe Brucker static void vfio_device_exit(struct kvm *kvm, struct vfio_device *vdev)
5846078a454SJean-Philippe Brucker {
5856078a454SJean-Philippe Brucker vfio_group_exit(kvm, vdev->group);
5866078a454SJean-Philippe Brucker
5876078a454SJean-Philippe Brucker switch (vdev->params->type) {
5886078a454SJean-Philippe Brucker case VFIO_DEVICE_PCI:
5896078a454SJean-Philippe Brucker vfio_pci_teardown_device(kvm, vdev);
5906078a454SJean-Philippe Brucker break;
5916078a454SJean-Philippe Brucker default:
5926078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "no teardown function for device");
5936078a454SJean-Philippe Brucker }
5946078a454SJean-Philippe Brucker
5956078a454SJean-Philippe Brucker close(vdev->fd);
5966078a454SJean-Philippe Brucker
5976078a454SJean-Philippe Brucker free(vdev->regions);
5986078a454SJean-Philippe Brucker free(vdev->sysfs_path);
5996078a454SJean-Philippe Brucker }
6006078a454SJean-Philippe Brucker
vfio_container_init(struct kvm * kvm)6016078a454SJean-Philippe Brucker static int vfio_container_init(struct kvm *kvm)
6026078a454SJean-Philippe Brucker {
6036078a454SJean-Philippe Brucker int api, i, ret, iommu_type;;
6046078a454SJean-Philippe Brucker
6056078a454SJean-Philippe Brucker /* Create a container for our IOMMU groups */
6066078a454SJean-Philippe Brucker vfio_container = open(VFIO_DEV_NODE, O_RDWR);
6076078a454SJean-Philippe Brucker if (vfio_container == -1) {
6086078a454SJean-Philippe Brucker ret = errno;
6096078a454SJean-Philippe Brucker pr_err("Failed to open %s", VFIO_DEV_NODE);
6106078a454SJean-Philippe Brucker return ret;
6116078a454SJean-Philippe Brucker }
6126078a454SJean-Philippe Brucker
6136078a454SJean-Philippe Brucker api = ioctl(vfio_container, VFIO_GET_API_VERSION);
6146078a454SJean-Philippe Brucker if (api != VFIO_API_VERSION) {
6156078a454SJean-Philippe Brucker pr_err("Unknown VFIO API version %d", api);
6166078a454SJean-Philippe Brucker return -ENODEV;
6176078a454SJean-Philippe Brucker }
6186078a454SJean-Philippe Brucker
6196078a454SJean-Philippe Brucker iommu_type = vfio_get_iommu_type();
6206078a454SJean-Philippe Brucker if (iommu_type < 0) {
6216078a454SJean-Philippe Brucker pr_err("VFIO type-1 IOMMU not supported on this platform");
6226078a454SJean-Philippe Brucker return iommu_type;
6236078a454SJean-Philippe Brucker }
6246078a454SJean-Philippe Brucker
6256078a454SJean-Philippe Brucker /* Create groups for our devices and add them to the container */
6266078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) {
6276078a454SJean-Philippe Brucker vfio_devices[i].params = &kvm->cfg.vfio_devices[i];
6286078a454SJean-Philippe Brucker
6296078a454SJean-Philippe Brucker ret = vfio_device_init(kvm, &vfio_devices[i]);
6306078a454SJean-Philippe Brucker if (ret)
6316078a454SJean-Philippe Brucker return ret;
6326078a454SJean-Philippe Brucker }
6336078a454SJean-Philippe Brucker
6346078a454SJean-Philippe Brucker /* Finalise the container */
6356078a454SJean-Philippe Brucker if (ioctl(vfio_container, VFIO_SET_IOMMU, iommu_type)) {
6366078a454SJean-Philippe Brucker ret = -errno;
6376078a454SJean-Philippe Brucker pr_err("Failed to set IOMMU type %d for VFIO container",
6386078a454SJean-Philippe Brucker iommu_type);
6396078a454SJean-Philippe Brucker return ret;
6406078a454SJean-Philippe Brucker } else {
6416078a454SJean-Philippe Brucker pr_info("Using IOMMU type %d for VFIO container", iommu_type);
6426078a454SJean-Philippe Brucker }
6436078a454SJean-Philippe Brucker
6446078a454SJean-Philippe Brucker return kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_map_mem_bank,
6456078a454SJean-Philippe Brucker NULL);
6466078a454SJean-Philippe Brucker }
6476078a454SJean-Philippe Brucker
vfio__init(struct kvm * kvm)6486078a454SJean-Philippe Brucker static int vfio__init(struct kvm *kvm)
6496078a454SJean-Philippe Brucker {
6506078a454SJean-Philippe Brucker int ret;
6516078a454SJean-Philippe Brucker
6526078a454SJean-Philippe Brucker if (!kvm->cfg.num_vfio_devices)
6536078a454SJean-Philippe Brucker return 0;
6546078a454SJean-Philippe Brucker
6556078a454SJean-Philippe Brucker vfio_devices = calloc(kvm->cfg.num_vfio_devices, sizeof(*vfio_devices));
6566078a454SJean-Philippe Brucker if (!vfio_devices)
6576078a454SJean-Philippe Brucker return -ENOMEM;
6586078a454SJean-Philippe Brucker
6596078a454SJean-Philippe Brucker ret = vfio_container_init(kvm);
6606078a454SJean-Philippe Brucker if (ret)
6616078a454SJean-Philippe Brucker return ret;
6626078a454SJean-Philippe Brucker
66341d773e2SJean-Philippe Brucker ret = vfio_configure_groups(kvm);
66441d773e2SJean-Philippe Brucker if (ret)
66541d773e2SJean-Philippe Brucker return ret;
66641d773e2SJean-Philippe Brucker
6676078a454SJean-Philippe Brucker ret = vfio_configure_devices(kvm);
6686078a454SJean-Philippe Brucker if (ret)
6696078a454SJean-Philippe Brucker return ret;
6706078a454SJean-Philippe Brucker
6716078a454SJean-Philippe Brucker return 0;
6726078a454SJean-Philippe Brucker }
6736078a454SJean-Philippe Brucker dev_base_init(vfio__init);
6746078a454SJean-Philippe Brucker
vfio__exit(struct kvm * kvm)6756078a454SJean-Philippe Brucker static int vfio__exit(struct kvm *kvm)
6766078a454SJean-Philippe Brucker {
6776078a454SJean-Philippe Brucker int i;
6786078a454SJean-Philippe Brucker
6796078a454SJean-Philippe Brucker if (!kvm->cfg.num_vfio_devices)
6806078a454SJean-Philippe Brucker return 0;
6816078a454SJean-Philippe Brucker
6826078a454SJean-Philippe Brucker for (i = 0; i < kvm->cfg.num_vfio_devices; i++)
6836078a454SJean-Philippe Brucker vfio_device_exit(kvm, &vfio_devices[i]);
6846078a454SJean-Philippe Brucker
6856078a454SJean-Philippe Brucker free(vfio_devices);
6866078a454SJean-Philippe Brucker
6876078a454SJean-Philippe Brucker kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_unmap_mem_bank, NULL);
6886078a454SJean-Philippe Brucker close(vfio_container);
6896078a454SJean-Philippe Brucker
6906078a454SJean-Philippe Brucker free(kvm->cfg.vfio_devices);
6916078a454SJean-Philippe Brucker
6926078a454SJean-Philippe Brucker return 0;
6936078a454SJean-Philippe Brucker }
6946078a454SJean-Philippe Brucker dev_base_exit(vfio__exit);
695