1 #include "kvm/kvm.h"
2 #include "kvm/vfio.h"
3 #include "kvm/ioport.h"
4
5 #include <linux/list.h>
6
7 #define VFIO_DEV_DIR "/dev/vfio"
8 #define VFIO_DEV_NODE VFIO_DEV_DIR "/vfio"
9 #define IOMMU_GROUP_DIR "/sys/kernel/iommu_groups"
10
11 static int vfio_container;
12 static LIST_HEAD(vfio_groups);
13 static struct vfio_device *vfio_devices;
14
vfio_device_pci_parser(const struct option * opt,char * arg,struct vfio_device_params * dev)15 static int vfio_device_pci_parser(const struct option *opt, char *arg,
16 struct vfio_device_params *dev)
17 {
18 unsigned int domain, bus, devnr, fn;
19
20 int nr = sscanf(arg, "%4x:%2x:%2x.%1x", &domain, &bus, &devnr, &fn);
21 if (nr < 4) {
22 domain = 0;
23 nr = sscanf(arg, "%2x:%2x.%1x", &bus, &devnr, &fn);
24 if (nr < 3) {
25 pr_err("Invalid device identifier %s", arg);
26 return -EINVAL;
27 }
28 }
29
30 dev->type = VFIO_DEVICE_PCI;
31 dev->bus = "pci";
32 dev->name = malloc(13);
33 if (!dev->name)
34 return -ENOMEM;
35
36 snprintf(dev->name, 13, "%04x:%02x:%02x.%x", domain, bus, devnr, fn);
37
38 return 0;
39 }
40
vfio_device_parser(const struct option * opt,const char * arg,int unset)41 int vfio_device_parser(const struct option *opt, const char *arg, int unset)
42 {
43 int ret = -EINVAL;
44 static int idx = 0;
45 struct kvm *kvm = opt->ptr;
46 struct vfio_device_params *dev, *devs;
47 char *cur, *buf = strdup(arg);
48
49 if (!buf)
50 return -ENOMEM;
51
52 if (idx >= MAX_VFIO_DEVICES) {
53 pr_warning("Too many VFIO devices");
54 goto out_free_buf;
55 }
56
57 devs = realloc(kvm->cfg.vfio_devices, sizeof(*dev) * (idx + 1));
58 if (!devs) {
59 ret = -ENOMEM;
60 goto out_free_buf;
61 }
62
63 kvm->cfg.vfio_devices = devs;
64 dev = &devs[idx];
65
66 cur = strtok(buf, ",");
67 if (!cur)
68 goto out_free_buf;
69
70 if (!strcmp(opt->long_name, "vfio-pci"))
71 ret = vfio_device_pci_parser(opt, cur, dev);
72 else
73 ret = -EINVAL;
74
75 if (!ret)
76 kvm->cfg.num_vfio_devices = ++idx;
77
78 out_free_buf:
79 free(buf);
80
81 return ret;
82 }
83
vfio_ioport_in(struct vfio_region * region,u32 offset,void * data,int len)84 static bool vfio_ioport_in(struct vfio_region *region, u32 offset,
85 void *data, int len)
86 {
87 struct vfio_device *vdev = region->vdev;
88 ssize_t nr;
89 u32 val;
90
91 if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ))
92 return false;
93
94 nr = pread(vdev->fd, &val, len, region->info.offset + offset);
95 if (nr != len) {
96 vfio_dev_err(vdev, "could not read %d bytes from I/O port 0x%x\n",
97 len, offset + region->port_base);
98 return false;
99 }
100
101 switch (len) {
102 case 1:
103 ioport__write8(data, val);
104 break;
105 case 2:
106 ioport__write16(data, val);
107 break;
108 case 4:
109 ioport__write32(data, val);
110 break;
111 default:
112 return false;
113 }
114
115 return true;
116 }
117
vfio_ioport_out(struct vfio_region * region,u32 offset,void * data,int len)118 static bool vfio_ioport_out(struct vfio_region *region, u32 offset,
119 void *data, int len)
120 {
121 struct vfio_device *vdev = region->vdev;
122 ssize_t nr;
123 u32 val;
124
125
126 if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE))
127 return false;
128
129 switch (len) {
130 case 1:
131 val = ioport__read8(data);
132 break;
133 case 2:
134 val = ioport__read16(data);
135 break;
136 case 4:
137 val = ioport__read32(data);
138 break;
139 default:
140 return false;
141 }
142
143 nr = pwrite(vdev->fd, &val, len, region->info.offset + offset);
144 if (nr != len)
145 vfio_dev_err(vdev, "could not write %d bytes to I/O port 0x%x",
146 len, offset + region->port_base);
147
148 return nr == len;
149 }
150
vfio_ioport_mmio(struct kvm_cpu * vcpu,u64 addr,u8 * data,u32 len,u8 is_write,void * ptr)151 static void vfio_ioport_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len,
152 u8 is_write, void *ptr)
153 {
154 struct vfio_region *region = ptr;
155 u32 offset = addr - region->port_base;
156
157 if (is_write)
158 vfio_ioport_out(region, offset, data, len);
159 else
160 vfio_ioport_in(region, offset, data, len);
161 }
162
vfio_mmio_access(struct kvm_cpu * vcpu,u64 addr,u8 * data,u32 len,u8 is_write,void * ptr)163 static void vfio_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len,
164 u8 is_write, void *ptr)
165 {
166 u64 val;
167 ssize_t nr;
168 struct vfio_region *region = ptr;
169 struct vfio_device *vdev = region->vdev;
170
171 u32 offset = addr - region->guest_phys_addr;
172
173 if (len < 1 || len > 8)
174 goto err_report;
175
176 if (is_write) {
177 if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE))
178 goto err_report;
179
180 memcpy(&val, data, len);
181
182 nr = pwrite(vdev->fd, &val, len, region->info.offset + offset);
183 if ((u32)nr != len)
184 goto err_report;
185 } else {
186 if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ))
187 goto err_report;
188
189 nr = pread(vdev->fd, &val, len, region->info.offset + offset);
190 if ((u32)nr != len)
191 goto err_report;
192
193 memcpy(data, &val, len);
194 }
195
196 return;
197
198 err_report:
199 vfio_dev_err(vdev, "could not %s %u bytes at 0x%x (0x%llx)", is_write ?
200 "write" : "read", len, offset, addr);
201 }
202
vfio_setup_trap_region(struct kvm * kvm,struct vfio_device * vdev,struct vfio_region * region)203 static int vfio_setup_trap_region(struct kvm *kvm, struct vfio_device *vdev,
204 struct vfio_region *region)
205 {
206 if (region->is_ioport) {
207 int port;
208
209 port = kvm__register_pio(kvm, region->port_base,
210 region->info.size, vfio_ioport_mmio,
211 region);
212 if (port < 0)
213 return port;
214 return 0;
215 }
216
217 return kvm__register_mmio(kvm, region->guest_phys_addr,
218 region->info.size, false, vfio_mmio_access,
219 region);
220 }
221
vfio_map_region(struct kvm * kvm,struct vfio_device * vdev,struct vfio_region * region)222 int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev,
223 struct vfio_region *region)
224 {
225 void *base;
226 int ret, prot = 0;
227 /* KVM needs page-aligned regions */
228 u64 map_size = ALIGN(region->info.size, PAGE_SIZE);
229
230 if (!(region->info.flags & VFIO_REGION_INFO_FLAG_MMAP))
231 return vfio_setup_trap_region(kvm, vdev, region);
232
233 /*
234 * KVM_SET_USER_MEMORY_REGION will fail because the guest physical
235 * address isn't page aligned, let's emulate the region ourselves.
236 */
237 if (region->guest_phys_addr & (PAGE_SIZE - 1))
238 return kvm__register_mmio(kvm, region->guest_phys_addr,
239 region->info.size, false,
240 vfio_mmio_access, region);
241
242 if (region->info.flags & VFIO_REGION_INFO_FLAG_READ)
243 prot |= PROT_READ;
244 if (region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
245 prot |= PROT_WRITE;
246
247 base = mmap(NULL, region->info.size, prot, MAP_SHARED, vdev->fd,
248 region->info.offset);
249 if (base == MAP_FAILED) {
250 /* TODO: support sparse mmap */
251 vfio_dev_warn(vdev, "failed to mmap region %u (0x%llx bytes), falling back to trapping",
252 region->info.index, region->info.size);
253 return vfio_setup_trap_region(kvm, vdev, region);
254 }
255 region->host_addr = base;
256
257 ret = kvm__register_dev_mem(kvm, region->guest_phys_addr, map_size,
258 region->host_addr);
259 if (ret) {
260 vfio_dev_err(vdev, "failed to register region with KVM");
261 return ret;
262 }
263
264 return 0;
265 }
266
vfio_unmap_region(struct kvm * kvm,struct vfio_region * region)267 void vfio_unmap_region(struct kvm *kvm, struct vfio_region *region)
268 {
269 u64 map_size;
270
271 if (region->host_addr) {
272 map_size = ALIGN(region->info.size, PAGE_SIZE);
273 kvm__destroy_mem(kvm, region->guest_phys_addr, map_size,
274 region->host_addr);
275 munmap(region->host_addr, region->info.size);
276 region->host_addr = NULL;
277 } else if (region->is_ioport) {
278 kvm__deregister_pio(kvm, region->port_base);
279 } else {
280 kvm__deregister_mmio(kvm, region->guest_phys_addr);
281 }
282 }
283
vfio_configure_device(struct kvm * kvm,struct vfio_device * vdev)284 static int vfio_configure_device(struct kvm *kvm, struct vfio_device *vdev)
285 {
286 int ret;
287 struct vfio_group *group = vdev->group;
288
289 vdev->fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD,
290 vdev->params->name);
291 if (vdev->fd < 0) {
292 vfio_dev_warn(vdev, "failed to get fd");
293
294 /* The device might be a bridge without an fd */
295 return 0;
296 }
297
298 vdev->info.argsz = sizeof(vdev->info);
299 if (ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &vdev->info)) {
300 ret = -errno;
301 vfio_dev_err(vdev, "failed to get info");
302 goto err_close_device;
303 }
304
305 if (vdev->info.flags & VFIO_DEVICE_FLAGS_RESET &&
306 ioctl(vdev->fd, VFIO_DEVICE_RESET) < 0)
307 vfio_dev_warn(vdev, "failed to reset device");
308
309 vdev->regions = calloc(vdev->info.num_regions, sizeof(*vdev->regions));
310 if (!vdev->regions) {
311 ret = -ENOMEM;
312 goto err_close_device;
313 }
314
315 /* Now for the bus-specific initialization... */
316 switch (vdev->params->type) {
317 case VFIO_DEVICE_PCI:
318 BUG_ON(!(vdev->info.flags & VFIO_DEVICE_FLAGS_PCI));
319 ret = vfio_pci_setup_device(kvm, vdev);
320 break;
321 default:
322 BUG_ON(1);
323 ret = -EINVAL;
324 }
325
326 if (ret)
327 goto err_free_regions;
328
329 vfio_dev_info(vdev, "assigned to device number 0x%x in group %lu",
330 vdev->dev_hdr.dev_num, group->id);
331
332 return 0;
333
334 err_free_regions:
335 free(vdev->regions);
336 err_close_device:
337 close(vdev->fd);
338
339 return ret;
340 }
341
vfio_configure_devices(struct kvm * kvm)342 static int vfio_configure_devices(struct kvm *kvm)
343 {
344 int i, ret;
345
346 for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) {
347 ret = vfio_configure_device(kvm, &vfio_devices[i]);
348 if (ret)
349 return ret;
350 }
351
352 return 0;
353 }
354
vfio_get_iommu_type(void)355 static int vfio_get_iommu_type(void)
356 {
357 if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU))
358 return VFIO_TYPE1v2_IOMMU;
359
360 if (ioctl(vfio_container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU))
361 return VFIO_TYPE1_IOMMU;
362
363 return -ENODEV;
364 }
365
vfio_map_mem_bank(struct kvm * kvm,struct kvm_mem_bank * bank,void * data)366 static int vfio_map_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data)
367 {
368 int ret = 0;
369 struct vfio_iommu_type1_dma_map dma_map = {
370 .argsz = sizeof(dma_map),
371 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
372 .vaddr = (unsigned long)bank->host_addr,
373 .iova = (u64)bank->guest_phys_addr,
374 .size = bank->size,
375 };
376
377 /* Map the guest memory for DMA (i.e. provide isolation) */
378 if (ioctl(vfio_container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
379 ret = -errno;
380 pr_err("Failed to map 0x%llx -> 0x%llx (%llu) for DMA",
381 dma_map.iova, dma_map.vaddr, dma_map.size);
382 }
383
384 return ret;
385 }
386
vfio_unmap_mem_bank(struct kvm * kvm,struct kvm_mem_bank * bank,void * data)387 static int vfio_unmap_mem_bank(struct kvm *kvm, struct kvm_mem_bank *bank, void *data)
388 {
389 struct vfio_iommu_type1_dma_unmap dma_unmap = {
390 .argsz = sizeof(dma_unmap),
391 .size = bank->size,
392 .iova = bank->guest_phys_addr,
393 };
394
395 ioctl(vfio_container, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
396
397 return 0;
398 }
399
vfio_configure_reserved_regions(struct kvm * kvm,struct vfio_group * group)400 static int vfio_configure_reserved_regions(struct kvm *kvm,
401 struct vfio_group *group)
402 {
403 FILE *file;
404 int ret = 0;
405 char type[9];
406 char filename[PATH_MAX];
407 unsigned long long start, end;
408
409 snprintf(filename, PATH_MAX, IOMMU_GROUP_DIR "/%lu/reserved_regions",
410 group->id);
411
412 /* reserved_regions might not be present on older systems */
413 if (access(filename, F_OK))
414 return 0;
415
416 file = fopen(filename, "r");
417 if (!file)
418 return -errno;
419
420 while (fscanf(file, "0x%llx 0x%llx %8s\n", &start, &end, type) == 3) {
421 ret = kvm__reserve_mem(kvm, start, end - start + 1);
422 if (ret)
423 break;
424 }
425
426 fclose(file);
427
428 return ret;
429 }
430
vfio_configure_groups(struct kvm * kvm)431 static int vfio_configure_groups(struct kvm *kvm)
432 {
433 int ret;
434 struct vfio_group *group;
435
436 list_for_each_entry(group, &vfio_groups, list) {
437 ret = vfio_configure_reserved_regions(kvm, group);
438 if (ret)
439 return ret;
440 }
441
442 return 0;
443 }
444
vfio_group_create(struct kvm * kvm,unsigned long id)445 static struct vfio_group *vfio_group_create(struct kvm *kvm, unsigned long id)
446 {
447 int ret;
448 struct vfio_group *group;
449 char group_node[PATH_MAX];
450 struct vfio_group_status group_status = {
451 .argsz = sizeof(group_status),
452 };
453
454 group = calloc(1, sizeof(*group));
455 if (!group)
456 return NULL;
457
458 group->id = id;
459 group->refs = 1;
460
461 ret = snprintf(group_node, PATH_MAX, VFIO_DEV_DIR "/%lu", id);
462 if (ret < 0 || ret == PATH_MAX)
463 return NULL;
464
465 group->fd = open(group_node, O_RDWR);
466 if (group->fd < 0) {
467 pr_err("Failed to open IOMMU group %s", group_node);
468 goto err_free_group;
469 }
470
471 if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &group_status)) {
472 pr_err("Failed to determine status of IOMMU group %lu", id);
473 goto err_close_group;
474 }
475
476 if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
477 pr_err("IOMMU group %lu is not viable", id);
478 goto err_close_group;
479 }
480
481 if (ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &vfio_container)) {
482 pr_err("Failed to add IOMMU group %lu to VFIO container", id);
483 goto err_close_group;
484 }
485
486 list_add(&group->list, &vfio_groups);
487
488 return group;
489
490 err_close_group:
491 close(group->fd);
492 err_free_group:
493 free(group);
494
495 return NULL;
496 }
497
vfio_group_exit(struct kvm * kvm,struct vfio_group * group)498 static void vfio_group_exit(struct kvm *kvm, struct vfio_group *group)
499 {
500 if (--group->refs != 0)
501 return;
502
503 ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER);
504
505 list_del(&group->list);
506 close(group->fd);
507 free(group);
508 }
509
510 static struct vfio_group *
vfio_group_get_for_dev(struct kvm * kvm,struct vfio_device * vdev)511 vfio_group_get_for_dev(struct kvm *kvm, struct vfio_device *vdev)
512 {
513 int dirfd;
514 ssize_t ret;
515 char *group_name;
516 unsigned long group_id;
517 char group_path[PATH_MAX];
518 struct vfio_group *group = NULL;
519
520 /* Find IOMMU group for this device */
521 dirfd = open(vdev->sysfs_path, O_DIRECTORY | O_PATH | O_RDONLY);
522 if (dirfd < 0) {
523 vfio_dev_err(vdev, "failed to open '%s'", vdev->sysfs_path);
524 return NULL;
525 }
526
527 ret = readlinkat(dirfd, "iommu_group", group_path, PATH_MAX);
528 if (ret < 0) {
529 vfio_dev_err(vdev, "no iommu_group");
530 goto out_close;
531 }
532 if (ret == PATH_MAX)
533 goto out_close;
534
535 group_path[ret] = '\0';
536
537 group_name = basename(group_path);
538 errno = 0;
539 group_id = strtoul(group_name, NULL, 10);
540 if (errno)
541 goto out_close;
542
543 list_for_each_entry(group, &vfio_groups, list) {
544 if (group->id == group_id) {
545 group->refs++;
546 return group;
547 }
548 }
549
550 group = vfio_group_create(kvm, group_id);
551
552 out_close:
553 close(dirfd);
554 return group;
555 }
556
vfio_device_init(struct kvm * kvm,struct vfio_device * vdev)557 static int vfio_device_init(struct kvm *kvm, struct vfio_device *vdev)
558 {
559 int ret;
560 char dev_path[PATH_MAX];
561 struct vfio_group *group;
562
563 ret = snprintf(dev_path, PATH_MAX, "/sys/bus/%s/devices/%s",
564 vdev->params->bus, vdev->params->name);
565 if (ret < 0 || ret == PATH_MAX)
566 return -EINVAL;
567
568 vdev->sysfs_path = strndup(dev_path, PATH_MAX);
569 if (!vdev->sysfs_path)
570 return -errno;
571
572 group = vfio_group_get_for_dev(kvm, vdev);
573 if (!group) {
574 free(vdev->sysfs_path);
575 return -EINVAL;
576 }
577
578 vdev->group = group;
579
580 return 0;
581 }
582
vfio_device_exit(struct kvm * kvm,struct vfio_device * vdev)583 static void vfio_device_exit(struct kvm *kvm, struct vfio_device *vdev)
584 {
585 vfio_group_exit(kvm, vdev->group);
586
587 switch (vdev->params->type) {
588 case VFIO_DEVICE_PCI:
589 vfio_pci_teardown_device(kvm, vdev);
590 break;
591 default:
592 vfio_dev_warn(vdev, "no teardown function for device");
593 }
594
595 close(vdev->fd);
596
597 free(vdev->regions);
598 free(vdev->sysfs_path);
599 }
600
vfio_container_init(struct kvm * kvm)601 static int vfio_container_init(struct kvm *kvm)
602 {
603 int api, i, ret, iommu_type;;
604
605 /* Create a container for our IOMMU groups */
606 vfio_container = open(VFIO_DEV_NODE, O_RDWR);
607 if (vfio_container == -1) {
608 ret = errno;
609 pr_err("Failed to open %s", VFIO_DEV_NODE);
610 return ret;
611 }
612
613 api = ioctl(vfio_container, VFIO_GET_API_VERSION);
614 if (api != VFIO_API_VERSION) {
615 pr_err("Unknown VFIO API version %d", api);
616 return -ENODEV;
617 }
618
619 iommu_type = vfio_get_iommu_type();
620 if (iommu_type < 0) {
621 pr_err("VFIO type-1 IOMMU not supported on this platform");
622 return iommu_type;
623 }
624
625 /* Create groups for our devices and add them to the container */
626 for (i = 0; i < kvm->cfg.num_vfio_devices; ++i) {
627 vfio_devices[i].params = &kvm->cfg.vfio_devices[i];
628
629 ret = vfio_device_init(kvm, &vfio_devices[i]);
630 if (ret)
631 return ret;
632 }
633
634 /* Finalise the container */
635 if (ioctl(vfio_container, VFIO_SET_IOMMU, iommu_type)) {
636 ret = -errno;
637 pr_err("Failed to set IOMMU type %d for VFIO container",
638 iommu_type);
639 return ret;
640 } else {
641 pr_info("Using IOMMU type %d for VFIO container", iommu_type);
642 }
643
644 return kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_map_mem_bank,
645 NULL);
646 }
647
vfio__init(struct kvm * kvm)648 static int vfio__init(struct kvm *kvm)
649 {
650 int ret;
651
652 if (!kvm->cfg.num_vfio_devices)
653 return 0;
654
655 vfio_devices = calloc(kvm->cfg.num_vfio_devices, sizeof(*vfio_devices));
656 if (!vfio_devices)
657 return -ENOMEM;
658
659 ret = vfio_container_init(kvm);
660 if (ret)
661 return ret;
662
663 ret = vfio_configure_groups(kvm);
664 if (ret)
665 return ret;
666
667 ret = vfio_configure_devices(kvm);
668 if (ret)
669 return ret;
670
671 return 0;
672 }
673 dev_base_init(vfio__init);
674
vfio__exit(struct kvm * kvm)675 static int vfio__exit(struct kvm *kvm)
676 {
677 int i;
678
679 if (!kvm->cfg.num_vfio_devices)
680 return 0;
681
682 for (i = 0; i < kvm->cfg.num_vfio_devices; i++)
683 vfio_device_exit(kvm, &vfio_devices[i]);
684
685 free(vfio_devices);
686
687 kvm__for_each_mem_bank(kvm, KVM_MEM_TYPE_RAM, vfio_unmap_mem_bank, NULL);
688 close(vfio_container);
689
690 free(kvm->cfg.vfio_devices);
691
692 return 0;
693 }
694 dev_base_exit(vfio__exit);
695