1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VFIO core
4 *
5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
6 * Author: Alex Williamson <alex.williamson@redhat.com>
7 *
8 * Derived from original vfio:
9 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
10 * Author: Tom Lyon, pugs@cisco.com
11 */
12
13 #include <linux/cdev.h>
14 #include <linux/compat.h>
15 #include <linux/device.h>
16 #include <linux/fs.h>
17 #include <linux/idr.h>
18 #include <linux/iommu.h>
19 #if IS_ENABLED(CONFIG_KVM)
20 #include <linux/kvm_host.h>
21 #endif
22 #include <linux/list.h>
23 #include <linux/miscdevice.h>
24 #include <linux/module.h>
25 #include <linux/mount.h>
26 #include <linux/mutex.h>
27 #include <linux/pci.h>
28 #include <linux/pseudo_fs.h>
29 #include <linux/rwsem.h>
30 #include <linux/sched.h>
31 #include <linux/seq_file.h>
32 #include <linux/slab.h>
33 #include <linux/stat.h>
34 #include <linux/string.h>
35 #include <linux/uaccess.h>
36 #include <linux/vfio.h>
37 #include <linux/wait.h>
38 #include <linux/sched/signal.h>
39 #include <linux/pm_runtime.h>
40 #include <linux/interval_tree.h>
41 #include <linux/iova_bitmap.h>
42 #include <linux/iommufd.h>
43 #include "vfio.h"
44
45 #define DRIVER_VERSION "0.3"
46 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
47 #define DRIVER_DESC "VFIO - User Level meta-driver"
48
49 #define VFIO_MAGIC 0x5646494f /* "VFIO" */
50
51 static struct vfio {
52 struct class *device_class;
53 struct ida device_ida;
54 struct vfsmount *vfs_mount;
55 int fs_count;
56 } vfio;
57
58 #ifdef CONFIG_VFIO_NOIOMMU
59 bool vfio_noiommu __read_mostly;
60 module_param_named(enable_unsafe_noiommu_mode,
61 vfio_noiommu, bool, S_IRUGO | S_IWUSR);
62 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
63 #endif
64
65 static DEFINE_XARRAY(vfio_device_set_xa);
66
vfio_assign_device_set(struct vfio_device * device,void * set_id)67 int vfio_assign_device_set(struct vfio_device *device, void *set_id)
68 {
69 unsigned long idx = (unsigned long)set_id;
70 struct vfio_device_set *new_dev_set;
71 struct vfio_device_set *dev_set;
72
73 if (WARN_ON(!set_id))
74 return -EINVAL;
75
76 /*
77 * Atomically acquire a singleton object in the xarray for this set_id
78 */
79 xa_lock(&vfio_device_set_xa);
80 dev_set = xa_load(&vfio_device_set_xa, idx);
81 if (dev_set)
82 goto found_get_ref;
83 xa_unlock(&vfio_device_set_xa);
84
85 new_dev_set = kzalloc_obj(*new_dev_set);
86 if (!new_dev_set)
87 return -ENOMEM;
88 mutex_init(&new_dev_set->lock);
89 INIT_LIST_HEAD(&new_dev_set->device_list);
90 new_dev_set->set_id = set_id;
91
92 xa_lock(&vfio_device_set_xa);
93 dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
94 GFP_KERNEL);
95 if (!dev_set) {
96 dev_set = new_dev_set;
97 goto found_get_ref;
98 }
99
100 kfree(new_dev_set);
101 if (xa_is_err(dev_set)) {
102 xa_unlock(&vfio_device_set_xa);
103 return xa_err(dev_set);
104 }
105
106 found_get_ref:
107 dev_set->device_count++;
108 xa_unlock(&vfio_device_set_xa);
109 mutex_lock(&dev_set->lock);
110 device->dev_set = dev_set;
111 list_add_tail(&device->dev_set_list, &dev_set->device_list);
112 mutex_unlock(&dev_set->lock);
113 return 0;
114 }
115 EXPORT_SYMBOL_GPL(vfio_assign_device_set);
116
vfio_release_device_set(struct vfio_device * device)117 static void vfio_release_device_set(struct vfio_device *device)
118 {
119 struct vfio_device_set *dev_set = device->dev_set;
120
121 if (!dev_set)
122 return;
123
124 mutex_lock(&dev_set->lock);
125 list_del(&device->dev_set_list);
126 mutex_unlock(&dev_set->lock);
127
128 xa_lock(&vfio_device_set_xa);
129 if (!--dev_set->device_count) {
130 __xa_erase(&vfio_device_set_xa,
131 (unsigned long)dev_set->set_id);
132 mutex_destroy(&dev_set->lock);
133 kfree(dev_set);
134 }
135 xa_unlock(&vfio_device_set_xa);
136 }
137
vfio_device_set_open_count(struct vfio_device_set * dev_set)138 unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set)
139 {
140 struct vfio_device *cur;
141 unsigned int open_count = 0;
142
143 lockdep_assert_held(&dev_set->lock);
144
145 list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
146 open_count += cur->open_count;
147 return open_count;
148 }
149 EXPORT_SYMBOL_GPL(vfio_device_set_open_count);
150
151 struct vfio_device *
vfio_find_device_in_devset(struct vfio_device_set * dev_set,struct device * dev)152 vfio_find_device_in_devset(struct vfio_device_set *dev_set,
153 struct device *dev)
154 {
155 struct vfio_device *cur;
156
157 lockdep_assert_held(&dev_set->lock);
158
159 list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
160 if (cur->dev == dev)
161 return cur;
162 return NULL;
163 }
164 EXPORT_SYMBOL_GPL(vfio_find_device_in_devset);
165
166 /*
167 * Device objects - create, release, get, put, search
168 */
169 /* Device reference always implies a group reference */
vfio_device_put_registration(struct vfio_device * device)170 void vfio_device_put_registration(struct vfio_device *device)
171 {
172 if (refcount_dec_and_test(&device->refcount))
173 complete(&device->comp);
174 }
175 EXPORT_SYMBOL_GPL(vfio_device_put_registration);
176
vfio_device_try_get_registration(struct vfio_device * device)177 bool vfio_device_try_get_registration(struct vfio_device *device)
178 {
179 return refcount_inc_not_zero(&device->refcount);
180 }
181 EXPORT_SYMBOL_GPL(vfio_device_try_get_registration);
182
183 /*
184 * VFIO driver API
185 */
186 /* Release helper called by vfio_put_device() */
vfio_device_release(struct device * dev)187 static void vfio_device_release(struct device *dev)
188 {
189 struct vfio_device *device =
190 container_of(dev, struct vfio_device, device);
191
192 vfio_release_device_set(device);
193 ida_free(&vfio.device_ida, device->index);
194
195 if (device->ops->release)
196 device->ops->release(device);
197
198 iput(device->inode);
199 simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
200 kvfree(device);
201 }
202
203 static int vfio_init_device(struct vfio_device *device, struct device *dev,
204 const struct vfio_device_ops *ops);
205
206 /*
207 * Allocate and initialize vfio_device so it can be registered to vfio
208 * core.
209 *
210 * Drivers should use the wrapper vfio_alloc_device() for allocation.
211 * @size is the size of the structure to be allocated, including any
212 * private data used by the driver.
213 *
214 * Driver may provide an @init callback to cover device private data.
215 *
216 * Use vfio_put_device() to release the structure after success return.
217 */
_vfio_alloc_device(size_t size,struct device * dev,const struct vfio_device_ops * ops)218 struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
219 const struct vfio_device_ops *ops)
220 {
221 struct vfio_device *device;
222 int ret;
223
224 if (WARN_ON(size < sizeof(struct vfio_device)))
225 return ERR_PTR(-EINVAL);
226
227 device = kvzalloc(size, GFP_KERNEL);
228 if (!device)
229 return ERR_PTR(-ENOMEM);
230
231 ret = vfio_init_device(device, dev, ops);
232 if (ret)
233 goto out_free;
234 return device;
235
236 out_free:
237 kvfree(device);
238 return ERR_PTR(ret);
239 }
240 EXPORT_SYMBOL_GPL(_vfio_alloc_device);
241
vfio_fs_init_fs_context(struct fs_context * fc)242 static int vfio_fs_init_fs_context(struct fs_context *fc)
243 {
244 return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM;
245 }
246
247 static struct file_system_type vfio_fs_type = {
248 .name = "vfio",
249 .owner = THIS_MODULE,
250 .init_fs_context = vfio_fs_init_fs_context,
251 .kill_sb = kill_anon_super,
252 };
253
vfio_fs_inode_new(void)254 static struct inode *vfio_fs_inode_new(void)
255 {
256 struct inode *inode;
257 int ret;
258
259 ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count);
260 if (ret)
261 return ERR_PTR(ret);
262
263 inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb);
264 if (IS_ERR(inode))
265 simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
266
267 return inode;
268 }
269
270 /*
271 * Initialize a vfio_device so it can be registered to vfio core.
272 */
vfio_init_device(struct vfio_device * device,struct device * dev,const struct vfio_device_ops * ops)273 static int vfio_init_device(struct vfio_device *device, struct device *dev,
274 const struct vfio_device_ops *ops)
275 {
276 int ret;
277
278 ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
279 if (ret < 0) {
280 dev_dbg(dev, "Error to alloc index\n");
281 return ret;
282 }
283
284 device->index = ret;
285 init_completion(&device->comp);
286 device->dev = dev;
287 device->ops = ops;
288 device->inode = vfio_fs_inode_new();
289 if (IS_ERR(device->inode)) {
290 ret = PTR_ERR(device->inode);
291 goto out_inode;
292 }
293
294 if (ops->init) {
295 ret = ops->init(device);
296 if (ret)
297 goto out_uninit;
298 }
299
300 device_initialize(&device->device);
301 device->device.release = vfio_device_release;
302 device->device.class = vfio.device_class;
303 device->device.parent = device->dev;
304 return 0;
305
306 out_uninit:
307 iput(device->inode);
308 simple_release_fs(&vfio.vfs_mount, &vfio.fs_count);
309 out_inode:
310 vfio_release_device_set(device);
311 ida_free(&vfio.device_ida, device->index);
312 return ret;
313 }
314
__vfio_register_dev(struct vfio_device * device,enum vfio_group_type type)315 static int __vfio_register_dev(struct vfio_device *device,
316 enum vfio_group_type type)
317 {
318 int ret;
319
320 if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) &&
321 (!device->ops->bind_iommufd ||
322 !device->ops->unbind_iommufd ||
323 !device->ops->attach_ioas ||
324 !device->ops->detach_ioas)))
325 return -EINVAL;
326
327 /*
328 * If the driver doesn't specify a set then the device is added to a
329 * singleton set just for itself.
330 */
331 if (!device->dev_set)
332 vfio_assign_device_set(device, device);
333
334 ret = dev_set_name(&device->device, "vfio%d", device->index);
335 if (ret)
336 return ret;
337
338 ret = vfio_device_set_group(device, type);
339 if (ret)
340 return ret;
341
342 /*
343 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
344 * restore cache coherency. It has to be checked here because it is only
345 * valid for cases where we are using iommu groups.
346 */
347 if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
348 !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) {
349 ret = -EINVAL;
350 goto err_out;
351 }
352
353 ret = vfio_device_add(device);
354 if (ret)
355 goto err_out;
356
357 /* Refcounting can't start until the driver calls register */
358 refcount_set(&device->refcount, 1);
359
360 vfio_device_group_register(device);
361 vfio_device_debugfs_init(device);
362
363 return 0;
364 err_out:
365 vfio_device_remove_group(device);
366 return ret;
367 }
368
vfio_register_group_dev(struct vfio_device * device)369 int vfio_register_group_dev(struct vfio_device *device)
370 {
371 return __vfio_register_dev(device, VFIO_IOMMU);
372 }
373 EXPORT_SYMBOL_GPL(vfio_register_group_dev);
374
375 /*
376 * Register a virtual device without IOMMU backing. The user of this
377 * device must not be able to directly trigger unmediated DMA.
378 */
vfio_register_emulated_iommu_dev(struct vfio_device * device)379 int vfio_register_emulated_iommu_dev(struct vfio_device *device)
380 {
381 return __vfio_register_dev(device, VFIO_EMULATED_IOMMU);
382 }
383 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
384
385 /*
386 * Decrement the device reference count and wait for the device to be
387 * removed. Open file descriptors for the device... */
vfio_unregister_group_dev(struct vfio_device * device)388 void vfio_unregister_group_dev(struct vfio_device *device)
389 {
390 unsigned int i = 0;
391 bool interrupted = false;
392 long rc;
393
394 /*
395 * Prevent new device opened by userspace via the
396 * VFIO_GROUP_GET_DEVICE_FD in the group path.
397 */
398 vfio_device_group_unregister(device);
399
400 /*
401 * Balances vfio_device_add() in register path, also prevents
402 * new device opened by userspace in the cdev path.
403 */
404 vfio_device_del(device);
405
406 vfio_device_put_registration(device);
407 rc = try_wait_for_completion(&device->comp);
408 while (rc <= 0) {
409 if (device->ops->request)
410 device->ops->request(device, i++);
411
412 if (interrupted) {
413 rc = wait_for_completion_timeout(&device->comp,
414 HZ * 10);
415 } else {
416 rc = wait_for_completion_interruptible_timeout(
417 &device->comp, HZ * 10);
418 if (rc < 0) {
419 interrupted = true;
420 dev_warn(device->dev,
421 "Device is currently in use, task"
422 " \"%s\" (%d) "
423 "blocked until device is released",
424 current->comm, task_pid_nr(current));
425 }
426 }
427 }
428
429 vfio_device_debugfs_exit(device);
430 /* Balances vfio_device_set_group in register path */
431 vfio_device_remove_group(device);
432 }
433 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
434
435 #if IS_ENABLED(CONFIG_KVM)
vfio_device_get_kvm_safe(struct vfio_device * device,struct kvm * kvm)436 void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
437 {
438 void (*pfn)(struct kvm *kvm);
439 bool (*fn)(struct kvm *kvm);
440 bool ret;
441
442 lockdep_assert_held(&device->dev_set->lock);
443
444 if (!kvm)
445 return;
446
447 pfn = symbol_get(kvm_put_kvm);
448 if (WARN_ON(!pfn))
449 return;
450
451 fn = symbol_get(kvm_get_kvm_safe);
452 if (WARN_ON(!fn)) {
453 symbol_put(kvm_put_kvm);
454 return;
455 }
456
457 ret = fn(kvm);
458 symbol_put(kvm_get_kvm_safe);
459 if (!ret) {
460 symbol_put(kvm_put_kvm);
461 return;
462 }
463
464 device->put_kvm = pfn;
465 device->kvm = kvm;
466 }
467
vfio_device_put_kvm(struct vfio_device * device)468 void vfio_device_put_kvm(struct vfio_device *device)
469 {
470 lockdep_assert_held(&device->dev_set->lock);
471
472 if (!device->kvm)
473 return;
474
475 if (WARN_ON(!device->put_kvm))
476 goto clear;
477
478 device->put_kvm(device->kvm);
479 device->put_kvm = NULL;
480 symbol_put(kvm_put_kvm);
481
482 clear:
483 device->kvm = NULL;
484 }
485 #endif
486
487 /* true if the vfio_device has open_device() called but not close_device() */
vfio_assert_device_open(struct vfio_device * device)488 static bool vfio_assert_device_open(struct vfio_device *device)
489 {
490 return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
491 }
492
493 struct vfio_device_file *
vfio_allocate_device_file(struct vfio_device * device)494 vfio_allocate_device_file(struct vfio_device *device)
495 {
496 struct vfio_device_file *df;
497
498 df = kzalloc_obj(*df, GFP_KERNEL_ACCOUNT);
499 if (!df)
500 return ERR_PTR(-ENOMEM);
501
502 df->device = device;
503 spin_lock_init(&df->kvm_ref_lock);
504
505 return df;
506 }
507
vfio_df_device_first_open(struct vfio_device_file * df)508 static int vfio_df_device_first_open(struct vfio_device_file *df)
509 {
510 struct vfio_device *device = df->device;
511 struct iommufd_ctx *iommufd = df->iommufd;
512 int ret;
513
514 lockdep_assert_held(&device->dev_set->lock);
515
516 if (!try_module_get(device->dev->driver->owner))
517 return -ENODEV;
518
519 if (iommufd)
520 ret = vfio_df_iommufd_bind(df);
521 else
522 ret = vfio_device_group_use_iommu(device);
523 if (ret)
524 goto err_module_put;
525
526 if (device->ops->open_device) {
527 ret = device->ops->open_device(device);
528 if (ret)
529 goto err_unuse_iommu;
530 }
531 return 0;
532
533 err_unuse_iommu:
534 if (iommufd)
535 vfio_df_iommufd_unbind(df);
536 else
537 vfio_device_group_unuse_iommu(device);
538 err_module_put:
539 module_put(device->dev->driver->owner);
540 return ret;
541 }
542
vfio_df_device_last_close(struct vfio_device_file * df)543 static void vfio_df_device_last_close(struct vfio_device_file *df)
544 {
545 struct vfio_device *device = df->device;
546 struct iommufd_ctx *iommufd = df->iommufd;
547
548 lockdep_assert_held(&device->dev_set->lock);
549
550 if (device->ops->close_device)
551 device->ops->close_device(device);
552 if (iommufd)
553 vfio_df_iommufd_unbind(df);
554 else
555 vfio_device_group_unuse_iommu(device);
556 device->precopy_info_v2 = 0;
557 module_put(device->dev->driver->owner);
558 }
559
vfio_df_open(struct vfio_device_file * df)560 int vfio_df_open(struct vfio_device_file *df)
561 {
562 struct vfio_device *device = df->device;
563 int ret = 0;
564
565 lockdep_assert_held(&device->dev_set->lock);
566
567 /*
568 * Only the group path allows the device to be opened multiple
569 * times. The device cdev path doesn't have a secure way for it.
570 */
571 if (device->open_count != 0 && !df->group)
572 return -EINVAL;
573
574 device->open_count++;
575 if (device->open_count == 1) {
576 ret = vfio_df_device_first_open(df);
577 if (ret)
578 device->open_count--;
579 }
580
581 return ret;
582 }
583
vfio_df_close(struct vfio_device_file * df)584 void vfio_df_close(struct vfio_device_file *df)
585 {
586 struct vfio_device *device = df->device;
587
588 lockdep_assert_held(&device->dev_set->lock);
589
590 if (!vfio_assert_device_open(device))
591 return;
592 if (device->open_count == 1)
593 vfio_df_device_last_close(df);
594 device->open_count--;
595 }
596
597 /*
598 * Wrapper around pm_runtime_resume_and_get().
599 * Return error code on failure or 0 on success.
600 */
vfio_device_pm_runtime_get(struct vfio_device * device)601 static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
602 {
603 struct device *dev = device->dev;
604
605 if (dev->driver && dev->driver->pm) {
606 int ret;
607
608 ret = pm_runtime_resume_and_get(dev);
609 if (ret) {
610 dev_info_ratelimited(dev,
611 "vfio: runtime resume failed %d\n", ret);
612 return -EIO;
613 }
614 }
615
616 return 0;
617 }
618
619 /*
620 * Wrapper around pm_runtime_put().
621 */
vfio_device_pm_runtime_put(struct vfio_device * device)622 static inline void vfio_device_pm_runtime_put(struct vfio_device *device)
623 {
624 struct device *dev = device->dev;
625
626 if (dev->driver && dev->driver->pm)
627 pm_runtime_put(dev);
628 }
629
630 /*
631 * VFIO Device fd
632 */
vfio_device_fops_release(struct inode * inode,struct file * filep)633 static int vfio_device_fops_release(struct inode *inode, struct file *filep)
634 {
635 struct vfio_device_file *df = filep->private_data;
636 struct vfio_device *device = df->device;
637
638 if (df->group)
639 vfio_df_group_close(df);
640 else
641 vfio_df_unbind_iommufd(df);
642
643 vfio_device_put_registration(device);
644
645 kfree(df);
646
647 return 0;
648 }
649
650 /*
651 * vfio_mig_get_next_state - Compute the next step in the FSM
652 * @cur_fsm - The current state the device is in
653 * @new_fsm - The target state to reach
654 * @next_fsm - Pointer to the next step to get to new_fsm
655 *
656 * Return 0 upon success, otherwise -errno
657 * Upon success the next step in the state progression between cur_fsm and
658 * new_fsm will be set in next_fsm.
659 *
660 * This breaks down requests for combination transitions into smaller steps and
661 * returns the next step to get to new_fsm. The function may need to be called
662 * multiple times before reaching new_fsm.
663 *
664 */
vfio_mig_get_next_state(struct vfio_device * device,enum vfio_device_mig_state cur_fsm,enum vfio_device_mig_state new_fsm,enum vfio_device_mig_state * next_fsm)665 int vfio_mig_get_next_state(struct vfio_device *device,
666 enum vfio_device_mig_state cur_fsm,
667 enum vfio_device_mig_state new_fsm,
668 enum vfio_device_mig_state *next_fsm)
669 {
670 enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 };
671 /*
672 * The coding in this table requires the driver to implement the
673 * following FSM arcs:
674 * RESUMING -> STOP
675 * STOP -> RESUMING
676 * STOP -> STOP_COPY
677 * STOP_COPY -> STOP
678 *
679 * If P2P is supported then the driver must also implement these FSM
680 * arcs:
681 * RUNNING -> RUNNING_P2P
682 * RUNNING_P2P -> RUNNING
683 * RUNNING_P2P -> STOP
684 * STOP -> RUNNING_P2P
685 *
686 * If precopy is supported then the driver must support these additional
687 * FSM arcs:
688 * RUNNING -> PRE_COPY
689 * PRE_COPY -> RUNNING
690 * PRE_COPY -> STOP_COPY
691 * However, if precopy and P2P are supported together then the driver
692 * must support these additional arcs beyond the P2P arcs above:
693 * PRE_COPY -> RUNNING
694 * PRE_COPY -> PRE_COPY_P2P
695 * PRE_COPY_P2P -> PRE_COPY
696 * PRE_COPY_P2P -> RUNNING_P2P
697 * PRE_COPY_P2P -> STOP_COPY
698 * RUNNING -> PRE_COPY
699 * RUNNING_P2P -> PRE_COPY_P2P
700 *
701 * Without P2P and precopy the driver must implement:
702 * RUNNING -> STOP
703 * STOP -> RUNNING
704 *
705 * The coding will step through multiple states for some combination
706 * transitions; if all optional features are supported, this means the
707 * following ones:
708 * PRE_COPY -> PRE_COPY_P2P -> STOP_COPY
709 * PRE_COPY -> RUNNING -> RUNNING_P2P
710 * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP
711 * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING
712 * PRE_COPY_P2P -> RUNNING_P2P -> RUNNING
713 * PRE_COPY_P2P -> RUNNING_P2P -> STOP
714 * PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING
715 * RESUMING -> STOP -> RUNNING_P2P
716 * RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P
717 * RESUMING -> STOP -> RUNNING_P2P -> RUNNING
718 * RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
719 * RESUMING -> STOP -> STOP_COPY
720 * RUNNING -> RUNNING_P2P -> PRE_COPY_P2P
721 * RUNNING -> RUNNING_P2P -> STOP
722 * RUNNING -> RUNNING_P2P -> STOP -> RESUMING
723 * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
724 * RUNNING_P2P -> RUNNING -> PRE_COPY
725 * RUNNING_P2P -> STOP -> RESUMING
726 * RUNNING_P2P -> STOP -> STOP_COPY
727 * STOP -> RUNNING_P2P -> PRE_COPY_P2P
728 * STOP -> RUNNING_P2P -> RUNNING
729 * STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
730 * STOP_COPY -> STOP -> RESUMING
731 * STOP_COPY -> STOP -> RUNNING_P2P
732 * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
733 *
734 * The following transitions are blocked:
735 * STOP_COPY -> PRE_COPY
736 * STOP_COPY -> PRE_COPY_P2P
737 */
738 static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
739 [VFIO_DEVICE_STATE_STOP] = {
740 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
741 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
742 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
743 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
744 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
745 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
746 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
747 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
748 },
749 [VFIO_DEVICE_STATE_RUNNING] = {
750 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
751 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
752 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
753 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
754 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
755 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
756 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
757 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
758 },
759 [VFIO_DEVICE_STATE_PRE_COPY] = {
760 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING,
761 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
762 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
763 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
764 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
765 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING,
766 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING,
767 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
768 },
769 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = {
770 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
771 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
772 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
773 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
774 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
775 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
776 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
777 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
778 },
779 [VFIO_DEVICE_STATE_STOP_COPY] = {
780 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
781 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
782 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
783 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
784 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
785 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
786 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
787 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
788 },
789 [VFIO_DEVICE_STATE_RESUMING] = {
790 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
791 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
792 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP,
793 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP,
794 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
795 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
796 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
797 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
798 },
799 [VFIO_DEVICE_STATE_RUNNING_P2P] = {
800 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
801 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
802 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING,
803 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
804 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
805 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
806 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
807 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
808 },
809 [VFIO_DEVICE_STATE_ERROR] = {
810 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
811 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
812 [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
813 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
814 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
815 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
816 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
817 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
818 },
819 };
820
821 static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
822 [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
823 [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
824 [VFIO_DEVICE_STATE_PRE_COPY] =
825 VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY,
826 [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY |
827 VFIO_MIGRATION_P2P |
828 VFIO_MIGRATION_PRE_COPY,
829 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
830 [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
831 [VFIO_DEVICE_STATE_RUNNING_P2P] =
832 VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
833 [VFIO_DEVICE_STATE_ERROR] = ~0U,
834 };
835
836 if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
837 (state_flags_table[cur_fsm] & device->migration_flags) !=
838 state_flags_table[cur_fsm]))
839 return -EINVAL;
840
841 if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
842 (state_flags_table[new_fsm] & device->migration_flags) !=
843 state_flags_table[new_fsm])
844 return -EINVAL;
845
846 /*
847 * Arcs touching optional and unsupported states are skipped over. The
848 * driver will instead see an arc from the original state to the next
849 * logical state, as per the above comment.
850 */
851 *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
852 while ((state_flags_table[*next_fsm] & device->migration_flags) !=
853 state_flags_table[*next_fsm])
854 *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
855
856 return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
857 }
858 EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
859
860 /*
861 * Convert the drivers's struct file into a FD number and return it to userspace
862 */
vfio_ioct_mig_return_fd(struct file * filp,void __user * arg,struct vfio_device_feature_mig_state * mig)863 static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
864 struct vfio_device_feature_mig_state *mig)
865 {
866 int ret;
867 int fd;
868
869 fd = get_unused_fd_flags(O_CLOEXEC);
870 if (fd < 0) {
871 ret = fd;
872 goto out_fput;
873 }
874
875 mig->data_fd = fd;
876 if (copy_to_user(arg, mig, sizeof(*mig))) {
877 ret = -EFAULT;
878 goto out_put_unused;
879 }
880 fd_install(fd, filp);
881 return 0;
882
883 out_put_unused:
884 put_unused_fd(fd);
885 out_fput:
886 fput(filp);
887 return ret;
888 }
889
890 static int
vfio_ioctl_device_feature_mig_device_state(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)891 vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
892 u32 flags, void __user *arg,
893 size_t argsz)
894 {
895 size_t minsz =
896 offsetofend(struct vfio_device_feature_mig_state, data_fd);
897 struct vfio_device_feature_mig_state mig;
898 struct file *filp = NULL;
899 int ret;
900
901 if (!device->mig_ops)
902 return -ENOTTY;
903
904 ret = vfio_check_feature(flags, argsz,
905 VFIO_DEVICE_FEATURE_SET |
906 VFIO_DEVICE_FEATURE_GET,
907 sizeof(mig));
908 if (ret != 1)
909 return ret;
910
911 if (copy_from_user(&mig, arg, minsz))
912 return -EFAULT;
913
914 if (flags & VFIO_DEVICE_FEATURE_GET) {
915 enum vfio_device_mig_state curr_state;
916
917 ret = device->mig_ops->migration_get_state(device,
918 &curr_state);
919 if (ret)
920 return ret;
921 mig.device_state = curr_state;
922 goto out_copy;
923 }
924
925 /* Handle the VFIO_DEVICE_FEATURE_SET */
926 filp = device->mig_ops->migration_set_state(device, mig.device_state);
927 if (IS_ERR(filp) || !filp)
928 goto out_copy;
929
930 return vfio_ioct_mig_return_fd(filp, arg, &mig);
931 out_copy:
932 mig.data_fd = -1;
933 if (copy_to_user(arg, &mig, sizeof(mig)))
934 return -EFAULT;
935 if (IS_ERR(filp))
936 return PTR_ERR(filp);
937 return 0;
938 }
939
940 static int
vfio_ioctl_device_feature_migration_data_size(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)941 vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device,
942 u32 flags, void __user *arg,
943 size_t argsz)
944 {
945 struct vfio_device_feature_mig_data_size data_size = {};
946 unsigned long stop_copy_length;
947 int ret;
948
949 if (!device->mig_ops)
950 return -ENOTTY;
951
952 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
953 sizeof(data_size));
954 if (ret != 1)
955 return ret;
956
957 ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length);
958 if (ret)
959 return ret;
960
961 data_size.stop_copy_length = stop_copy_length;
962 if (copy_to_user(arg, &data_size, sizeof(data_size)))
963 return -EFAULT;
964
965 return 0;
966 }
967
968 static int
vfio_ioctl_device_feature_migration_precopy_info_v2(struct vfio_device * device,u32 flags,size_t argsz)969 vfio_ioctl_device_feature_migration_precopy_info_v2(struct vfio_device *device,
970 u32 flags, size_t argsz)
971 {
972 int ret;
973
974 if (!(device->migration_flags & VFIO_MIGRATION_PRE_COPY))
975 return -EINVAL;
976
977 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0);
978 if (ret != 1)
979 return ret;
980
981 device->precopy_info_v2 = 1;
982 return 0;
983 }
984
vfio_ioctl_device_feature_migration(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)985 static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
986 u32 flags, void __user *arg,
987 size_t argsz)
988 {
989 struct vfio_device_feature_migration mig = {
990 .flags = device->migration_flags,
991 };
992 int ret;
993
994 if (!device->mig_ops)
995 return -ENOTTY;
996
997 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
998 sizeof(mig));
999 if (ret != 1)
1000 return ret;
1001 if (copy_to_user(arg, &mig, sizeof(mig)))
1002 return -EFAULT;
1003 return 0;
1004 }
1005
vfio_combine_iova_ranges(struct rb_root_cached * root,u32 cur_nodes,u32 req_nodes)1006 void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
1007 u32 req_nodes)
1008 {
1009 struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
1010 unsigned long min_gap, curr_gap;
1011
1012 /* Special shortcut when a single range is required */
1013 if (req_nodes == 1) {
1014 unsigned long last;
1015
1016 comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
1017
1018 /* Empty list */
1019 if (WARN_ON_ONCE(!comb_start))
1020 return;
1021
1022 curr = comb_start;
1023 while (curr) {
1024 last = curr->last;
1025 prev = curr;
1026 curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
1027 if (prev != comb_start)
1028 interval_tree_remove(prev, root);
1029 }
1030 comb_start->last = last;
1031 return;
1032 }
1033
1034 /* Combine ranges which have the smallest gap */
1035 while (cur_nodes > req_nodes) {
1036 prev = NULL;
1037 min_gap = ULONG_MAX;
1038 curr = interval_tree_iter_first(root, 0, ULONG_MAX);
1039 while (curr) {
1040 if (prev) {
1041 curr_gap = curr->start - prev->last;
1042 if (curr_gap < min_gap) {
1043 min_gap = curr_gap;
1044 comb_start = prev;
1045 comb_end = curr;
1046 }
1047 }
1048 prev = curr;
1049 curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
1050 }
1051
1052 /* Empty list or no nodes to combine */
1053 if (WARN_ON_ONCE(min_gap == ULONG_MAX))
1054 break;
1055
1056 comb_start->last = comb_end->last;
1057 interval_tree_remove(comb_end, root);
1058 cur_nodes--;
1059 }
1060 }
1061 EXPORT_SYMBOL_GPL(vfio_combine_iova_ranges);
1062
1063 /* Ranges should fit into a single kernel page */
1064 #define LOG_MAX_RANGES \
1065 (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
1066
1067 static int
vfio_ioctl_device_feature_logging_start(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)1068 vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
1069 u32 flags, void __user *arg,
1070 size_t argsz)
1071 {
1072 size_t minsz =
1073 offsetofend(struct vfio_device_feature_dma_logging_control,
1074 ranges);
1075 struct vfio_device_feature_dma_logging_range __user *ranges;
1076 struct vfio_device_feature_dma_logging_control control;
1077 struct vfio_device_feature_dma_logging_range range;
1078 struct rb_root_cached root = RB_ROOT_CACHED;
1079 struct interval_tree_node *nodes;
1080 u64 iova_end;
1081 u32 nnodes;
1082 int i, ret;
1083
1084 if (!device->log_ops)
1085 return -ENOTTY;
1086
1087 ret = vfio_check_feature(flags, argsz,
1088 VFIO_DEVICE_FEATURE_SET,
1089 sizeof(control));
1090 if (ret != 1)
1091 return ret;
1092
1093 if (copy_from_user(&control, arg, minsz))
1094 return -EFAULT;
1095
1096 nnodes = control.num_ranges;
1097 if (!nnodes)
1098 return -EINVAL;
1099
1100 if (nnodes > LOG_MAX_RANGES)
1101 return -E2BIG;
1102
1103 ranges = u64_to_user_ptr(control.ranges);
1104 nodes = kmalloc_objs(struct interval_tree_node, nnodes);
1105 if (!nodes)
1106 return -ENOMEM;
1107
1108 for (i = 0; i < nnodes; i++) {
1109 if (copy_from_user(&range, &ranges[i], sizeof(range))) {
1110 ret = -EFAULT;
1111 goto end;
1112 }
1113 if (!IS_ALIGNED(range.iova, control.page_size) ||
1114 !IS_ALIGNED(range.length, control.page_size)) {
1115 ret = -EINVAL;
1116 goto end;
1117 }
1118
1119 if (check_add_overflow(range.iova, range.length, &iova_end) ||
1120 iova_end > ULONG_MAX) {
1121 ret = -EOVERFLOW;
1122 goto end;
1123 }
1124
1125 nodes[i].start = range.iova;
1126 nodes[i].last = range.iova + range.length - 1;
1127 if (interval_tree_iter_first(&root, nodes[i].start,
1128 nodes[i].last)) {
1129 /* Range overlapping */
1130 ret = -EINVAL;
1131 goto end;
1132 }
1133 interval_tree_insert(nodes + i, &root);
1134 }
1135
1136 ret = device->log_ops->log_start(device, &root, nnodes,
1137 &control.page_size);
1138 if (ret)
1139 goto end;
1140
1141 if (copy_to_user(arg, &control, sizeof(control))) {
1142 ret = -EFAULT;
1143 device->log_ops->log_stop(device);
1144 }
1145
1146 end:
1147 kfree(nodes);
1148 return ret;
1149 }
1150
1151 static int
vfio_ioctl_device_feature_logging_stop(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)1152 vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
1153 u32 flags, void __user *arg,
1154 size_t argsz)
1155 {
1156 int ret;
1157
1158 if (!device->log_ops)
1159 return -ENOTTY;
1160
1161 ret = vfio_check_feature(flags, argsz,
1162 VFIO_DEVICE_FEATURE_SET, 0);
1163 if (ret != 1)
1164 return ret;
1165
1166 return device->log_ops->log_stop(device);
1167 }
1168
vfio_device_log_read_and_clear(struct iova_bitmap * iter,unsigned long iova,size_t length,void * opaque)1169 static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
1170 unsigned long iova, size_t length,
1171 void *opaque)
1172 {
1173 struct vfio_device *device = opaque;
1174
1175 return device->log_ops->log_read_and_clear(device, iova, length, iter);
1176 }
1177
1178 static int
vfio_ioctl_device_feature_logging_report(struct vfio_device * device,u32 flags,void __user * arg,size_t argsz)1179 vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
1180 u32 flags, void __user *arg,
1181 size_t argsz)
1182 {
1183 size_t minsz =
1184 offsetofend(struct vfio_device_feature_dma_logging_report,
1185 bitmap);
1186 struct vfio_device_feature_dma_logging_report report;
1187 struct iova_bitmap *iter;
1188 u64 iova_end;
1189 int ret;
1190
1191 if (!device->log_ops)
1192 return -ENOTTY;
1193
1194 ret = vfio_check_feature(flags, argsz,
1195 VFIO_DEVICE_FEATURE_GET,
1196 sizeof(report));
1197 if (ret != 1)
1198 return ret;
1199
1200 if (copy_from_user(&report, arg, minsz))
1201 return -EFAULT;
1202
1203 if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
1204 return -EINVAL;
1205
1206 if (check_add_overflow(report.iova, report.length, &iova_end) ||
1207 iova_end > ULONG_MAX)
1208 return -EOVERFLOW;
1209
1210 iter = iova_bitmap_alloc(report.iova, report.length,
1211 report.page_size,
1212 u64_to_user_ptr(report.bitmap));
1213 if (IS_ERR(iter))
1214 return PTR_ERR(iter);
1215
1216 ret = iova_bitmap_for_each(iter, device,
1217 vfio_device_log_read_and_clear);
1218
1219 iova_bitmap_free(iter);
1220 return ret;
1221 }
1222
vfio_ioctl_device_feature(struct vfio_device * device,struct vfio_device_feature __user * arg)1223 static int vfio_ioctl_device_feature(struct vfio_device *device,
1224 struct vfio_device_feature __user *arg)
1225 {
1226 size_t minsz = offsetofend(struct vfio_device_feature, flags);
1227 struct vfio_device_feature feature;
1228
1229 if (copy_from_user(&feature, arg, minsz))
1230 return -EFAULT;
1231
1232 if (feature.argsz < minsz)
1233 return -EINVAL;
1234
1235 /* Check unknown flags */
1236 if (feature.flags &
1237 ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
1238 VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
1239 return -EINVAL;
1240
1241 /* GET & SET are mutually exclusive except with PROBE */
1242 if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
1243 (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
1244 (feature.flags & VFIO_DEVICE_FEATURE_GET))
1245 return -EINVAL;
1246
1247 switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
1248 case VFIO_DEVICE_FEATURE_MIGRATION:
1249 return vfio_ioctl_device_feature_migration(
1250 device, feature.flags, arg->data,
1251 feature.argsz - minsz);
1252 case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
1253 return vfio_ioctl_device_feature_mig_device_state(
1254 device, feature.flags, arg->data,
1255 feature.argsz - minsz);
1256 case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
1257 return vfio_ioctl_device_feature_logging_start(
1258 device, feature.flags, arg->data,
1259 feature.argsz - minsz);
1260 case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
1261 return vfio_ioctl_device_feature_logging_stop(
1262 device, feature.flags, arg->data,
1263 feature.argsz - minsz);
1264 case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
1265 return vfio_ioctl_device_feature_logging_report(
1266 device, feature.flags, arg->data,
1267 feature.argsz - minsz);
1268 case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE:
1269 return vfio_ioctl_device_feature_migration_data_size(
1270 device, feature.flags, arg->data,
1271 feature.argsz - minsz);
1272 case VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2:
1273 return vfio_ioctl_device_feature_migration_precopy_info_v2(
1274 device, feature.flags, feature.argsz - minsz);
1275 default:
1276 if (unlikely(!device->ops->device_feature))
1277 return -ENOTTY;
1278 return device->ops->device_feature(device, feature.flags,
1279 arg->data,
1280 feature.argsz - minsz);
1281 }
1282 }
1283
vfio_get_region_info(struct vfio_device * device,struct vfio_region_info __user * arg)1284 static long vfio_get_region_info(struct vfio_device *device,
1285 struct vfio_region_info __user *arg)
1286 {
1287 unsigned long minsz = offsetofend(struct vfio_region_info, offset);
1288 struct vfio_region_info info = {};
1289 struct vfio_info_cap caps = {};
1290 int ret;
1291
1292 if (unlikely(!device->ops->get_region_info_caps))
1293 return -EINVAL;
1294
1295 if (copy_from_user(&info, arg, minsz))
1296 return -EFAULT;
1297 if (info.argsz < minsz)
1298 return -EINVAL;
1299
1300 ret = device->ops->get_region_info_caps(device, &info, &caps);
1301 if (ret)
1302 goto out_free;
1303
1304 if (caps.size) {
1305 info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1306 if (info.argsz < sizeof(info) + caps.size) {
1307 info.argsz = sizeof(info) + caps.size;
1308 info.cap_offset = 0;
1309 } else {
1310 vfio_info_cap_shift(&caps, sizeof(info));
1311 if (copy_to_user(arg + 1, caps.buf, caps.size)) {
1312 ret = -EFAULT;
1313 goto out_free;
1314 }
1315 info.cap_offset = sizeof(info);
1316 }
1317 }
1318
1319 if (copy_to_user(arg, &info, minsz)){
1320 ret = -EFAULT;
1321 goto out_free;
1322 }
1323
1324 out_free:
1325 kfree(caps.buf);
1326 return ret;
1327 }
1328
vfio_device_fops_unl_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)1329 static long vfio_device_fops_unl_ioctl(struct file *filep,
1330 unsigned int cmd, unsigned long arg)
1331 {
1332 struct vfio_device_file *df = filep->private_data;
1333 struct vfio_device *device = df->device;
1334 void __user *uptr = (void __user *)arg;
1335 int ret;
1336
1337 if (cmd == VFIO_DEVICE_BIND_IOMMUFD)
1338 return vfio_df_ioctl_bind_iommufd(df, uptr);
1339
1340 /* Paired with smp_store_release() following vfio_df_open() */
1341 if (!smp_load_acquire(&df->access_granted))
1342 return -EINVAL;
1343
1344 ret = vfio_device_pm_runtime_get(device);
1345 if (ret)
1346 return ret;
1347
1348 /* cdev only ioctls */
1349 if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) {
1350 switch (cmd) {
1351 case VFIO_DEVICE_ATTACH_IOMMUFD_PT:
1352 ret = vfio_df_ioctl_attach_pt(df, uptr);
1353 goto out;
1354
1355 case VFIO_DEVICE_DETACH_IOMMUFD_PT:
1356 ret = vfio_df_ioctl_detach_pt(df, uptr);
1357 goto out;
1358 }
1359 }
1360
1361 switch (cmd) {
1362 case VFIO_DEVICE_FEATURE:
1363 ret = vfio_ioctl_device_feature(device, uptr);
1364 break;
1365
1366 case VFIO_DEVICE_GET_REGION_INFO:
1367 ret = vfio_get_region_info(device, uptr);
1368 break;
1369
1370 default:
1371 if (unlikely(!device->ops->ioctl))
1372 ret = -EINVAL;
1373 else
1374 ret = device->ops->ioctl(device, cmd, arg);
1375 break;
1376 }
1377 out:
1378 vfio_device_pm_runtime_put(device);
1379 return ret;
1380 }
1381
vfio_device_fops_read(struct file * filep,char __user * buf,size_t count,loff_t * ppos)1382 static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1383 size_t count, loff_t *ppos)
1384 {
1385 struct vfio_device_file *df = filep->private_data;
1386 struct vfio_device *device = df->device;
1387
1388 /* Paired with smp_store_release() following vfio_df_open() */
1389 if (!smp_load_acquire(&df->access_granted))
1390 return -EINVAL;
1391
1392 if (unlikely(!device->ops->read))
1393 return -EINVAL;
1394
1395 return device->ops->read(device, buf, count, ppos);
1396 }
1397
vfio_device_fops_write(struct file * filep,const char __user * buf,size_t count,loff_t * ppos)1398 static ssize_t vfio_device_fops_write(struct file *filep,
1399 const char __user *buf,
1400 size_t count, loff_t *ppos)
1401 {
1402 struct vfio_device_file *df = filep->private_data;
1403 struct vfio_device *device = df->device;
1404
1405 /* Paired with smp_store_release() following vfio_df_open() */
1406 if (!smp_load_acquire(&df->access_granted))
1407 return -EINVAL;
1408
1409 if (unlikely(!device->ops->write))
1410 return -EINVAL;
1411
1412 return device->ops->write(device, buf, count, ppos);
1413 }
1414
vfio_device_fops_mmap(struct file * filep,struct vm_area_struct * vma)1415 static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1416 {
1417 struct vfio_device_file *df = filep->private_data;
1418 struct vfio_device *device = df->device;
1419
1420 /* Paired with smp_store_release() following vfio_df_open() */
1421 if (!smp_load_acquire(&df->access_granted))
1422 return -EINVAL;
1423
1424 if (unlikely(!device->ops->mmap))
1425 return -EINVAL;
1426
1427 return device->ops->mmap(device, vma);
1428 }
1429
1430 #ifdef CONFIG_PROC_FS
vfio_device_show_fdinfo(struct seq_file * m,struct file * filep)1431 static void vfio_device_show_fdinfo(struct seq_file *m, struct file *filep)
1432 {
1433 char *path;
1434 struct vfio_device_file *df = filep->private_data;
1435 struct vfio_device *device = df->device;
1436
1437 path = kobject_get_path(&device->dev->kobj, GFP_KERNEL);
1438 if (!path)
1439 return;
1440
1441 seq_printf(m, "vfio-device-syspath: /sys%s\n", path);
1442 kfree(path);
1443 }
1444 #endif
1445
1446 const struct file_operations vfio_device_fops = {
1447 .owner = THIS_MODULE,
1448 .open = vfio_device_fops_cdev_open,
1449 .release = vfio_device_fops_release,
1450 .read = vfio_device_fops_read,
1451 .write = vfio_device_fops_write,
1452 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1453 .compat_ioctl = compat_ptr_ioctl,
1454 .mmap = vfio_device_fops_mmap,
1455 #ifdef CONFIG_PROC_FS
1456 .show_fdinfo = vfio_device_show_fdinfo,
1457 #endif
1458 };
1459
vfio_device_from_file(struct file * file)1460 static struct vfio_device *vfio_device_from_file(struct file *file)
1461 {
1462 struct vfio_device_file *df = file->private_data;
1463
1464 if (file->f_op != &vfio_device_fops)
1465 return NULL;
1466 return df->device;
1467 }
1468
1469 /**
1470 * vfio_file_is_valid - True if the file is valid vfio file
1471 * @file: VFIO group file or VFIO device file
1472 */
vfio_file_is_valid(struct file * file)1473 bool vfio_file_is_valid(struct file *file)
1474 {
1475 return vfio_group_from_file(file) ||
1476 vfio_device_from_file(file);
1477 }
1478 EXPORT_SYMBOL_GPL(vfio_file_is_valid);
1479
1480 /**
1481 * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
1482 * is always CPU cache coherent
1483 * @file: VFIO group file or VFIO device file
1484 *
1485 * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
1486 * bit in DMA transactions. A return of false indicates that the user has
1487 * rights to access additional instructions such as wbinvd on x86.
1488 */
vfio_file_enforced_coherent(struct file * file)1489 bool vfio_file_enforced_coherent(struct file *file)
1490 {
1491 struct vfio_device *device;
1492 struct vfio_group *group;
1493
1494 group = vfio_group_from_file(file);
1495 if (group)
1496 return vfio_group_enforced_coherent(group);
1497
1498 device = vfio_device_from_file(file);
1499 if (device)
1500 return device_iommu_capable(device->dev,
1501 IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
1502
1503 return true;
1504 }
1505 EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
1506
vfio_device_file_set_kvm(struct file * file,struct kvm * kvm)1507 static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm)
1508 {
1509 struct vfio_device_file *df = file->private_data;
1510
1511 /*
1512 * The kvm is first recorded in the vfio_device_file, and will
1513 * be propagated to vfio_device::kvm when the file is bound to
1514 * iommufd successfully in the vfio device cdev path.
1515 */
1516 spin_lock(&df->kvm_ref_lock);
1517 df->kvm = kvm;
1518 spin_unlock(&df->kvm_ref_lock);
1519 }
1520
1521 /**
1522 * vfio_file_set_kvm - Link a kvm with VFIO drivers
1523 * @file: VFIO group file or VFIO device file
1524 * @kvm: KVM to link
1525 *
1526 * When a VFIO device is first opened the KVM will be available in
1527 * device->kvm if one was associated with the file.
1528 */
vfio_file_set_kvm(struct file * file,struct kvm * kvm)1529 void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
1530 {
1531 struct vfio_group *group;
1532
1533 group = vfio_group_from_file(file);
1534 if (group)
1535 vfio_group_set_kvm(group, kvm);
1536
1537 if (vfio_device_from_file(file))
1538 vfio_device_file_set_kvm(file, kvm);
1539 }
1540 EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
1541
1542 /*
1543 * Sub-module support
1544 */
1545 /*
1546 * Helper for managing a buffer of info chain capabilities, allocate or
1547 * reallocate a buffer with additional @size, filling in @id and @version
1548 * of the capability. A pointer to the new capability is returned.
1549 *
1550 * NB. The chain is based at the head of the buffer, so new entries are
1551 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1552 * next offsets prior to copying to the user buffer.
1553 */
vfio_info_cap_add(struct vfio_info_cap * caps,size_t size,u16 id,u16 version)1554 struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1555 size_t size, u16 id, u16 version)
1556 {
1557 void *buf;
1558 struct vfio_info_cap_header *header, *tmp;
1559
1560 /* Ensure that the next capability struct will be aligned */
1561 size = ALIGN(size, sizeof(u64));
1562
1563 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1564 if (!buf) {
1565 kfree(caps->buf);
1566 caps->buf = NULL;
1567 caps->size = 0;
1568 return ERR_PTR(-ENOMEM);
1569 }
1570
1571 caps->buf = buf;
1572 header = buf + caps->size;
1573
1574 /* Eventually copied to user buffer, zero */
1575 memset(header, 0, size);
1576
1577 header->id = id;
1578 header->version = version;
1579
1580 /* Add to the end of the capability chain */
1581 for (tmp = buf; tmp->next; tmp = buf + tmp->next)
1582 ; /* nothing */
1583
1584 tmp->next = caps->size;
1585 caps->size += size;
1586
1587 return header;
1588 }
1589 EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1590
vfio_info_cap_shift(struct vfio_info_cap * caps,size_t offset)1591 void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1592 {
1593 struct vfio_info_cap_header *tmp;
1594 void *buf = (void *)caps->buf;
1595
1596 /* Capability structs should start with proper alignment */
1597 WARN_ON(!IS_ALIGNED(offset, sizeof(u64)));
1598
1599 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
1600 tmp->next += offset;
1601 }
1602 EXPORT_SYMBOL(vfio_info_cap_shift);
1603
vfio_info_add_capability(struct vfio_info_cap * caps,struct vfio_info_cap_header * cap,size_t size)1604 int vfio_info_add_capability(struct vfio_info_cap *caps,
1605 struct vfio_info_cap_header *cap, size_t size)
1606 {
1607 struct vfio_info_cap_header *header;
1608
1609 header = vfio_info_cap_add(caps, size, cap->id, cap->version);
1610 if (IS_ERR(header))
1611 return PTR_ERR(header);
1612
1613 memcpy(header + 1, cap + 1, size - sizeof(*header));
1614
1615 return 0;
1616 }
1617 EXPORT_SYMBOL(vfio_info_add_capability);
1618
vfio_set_irqs_validate_and_prepare(struct vfio_irq_set * hdr,int num_irqs,int max_irq_type,size_t * data_size)1619 int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1620 int max_irq_type, size_t *data_size)
1621 {
1622 unsigned long minsz;
1623 size_t size;
1624
1625 minsz = offsetofend(struct vfio_irq_set, count);
1626
1627 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1628 (hdr->count >= (U32_MAX - hdr->start)) ||
1629 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1630 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1631 return -EINVAL;
1632
1633 if (data_size)
1634 *data_size = 0;
1635
1636 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1637 return -EINVAL;
1638
1639 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1640 case VFIO_IRQ_SET_DATA_NONE:
1641 size = 0;
1642 break;
1643 case VFIO_IRQ_SET_DATA_BOOL:
1644 size = sizeof(uint8_t);
1645 break;
1646 case VFIO_IRQ_SET_DATA_EVENTFD:
1647 size = sizeof(int32_t);
1648 break;
1649 default:
1650 return -EINVAL;
1651 }
1652
1653 if (size) {
1654 if (hdr->argsz - minsz < hdr->count * size)
1655 return -EINVAL;
1656
1657 if (!data_size)
1658 return -EINVAL;
1659
1660 *data_size = hdr->count * size;
1661 }
1662
1663 return 0;
1664 }
1665 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1666
1667 /*
1668 * Pin contiguous user pages and return their associated host pages for local
1669 * domain only.
1670 * @device [in] : device
1671 * @iova [in] : starting IOVA of user pages to be pinned.
1672 * @npage [in] : count of pages to be pinned. This count should not
1673 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1674 * @prot [in] : protection flags
1675 * @pages[out] : array of host pages
1676 * Return error or number of pages pinned.
1677 *
1678 * A driver may only call this function if the vfio_device was created
1679 * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages().
1680 */
vfio_pin_pages(struct vfio_device * device,dma_addr_t iova,int npage,int prot,struct page ** pages)1681 int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
1682 int npage, int prot, struct page **pages)
1683 {
1684 /* group->container cannot change while a vfio device is open */
1685 if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
1686 return -EINVAL;
1687 if (!device->ops->dma_unmap)
1688 return -EINVAL;
1689 if (vfio_device_has_container(device))
1690 return vfio_device_container_pin_pages(device, iova,
1691 npage, prot, pages);
1692 if (device->iommufd_access) {
1693 int ret;
1694
1695 if (iova > ULONG_MAX)
1696 return -EINVAL;
1697 /*
1698 * VFIO ignores the sub page offset, npages is from the start of
1699 * a PAGE_SIZE chunk of IOVA. The caller is expected to recover
1700 * the sub page offset by doing:
1701 * pages[0] + (iova % PAGE_SIZE)
1702 */
1703 ret = iommufd_access_pin_pages(
1704 device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE),
1705 npage * PAGE_SIZE, pages,
1706 (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
1707 if (ret)
1708 return ret;
1709 return npage;
1710 }
1711 return -EINVAL;
1712 }
1713 EXPORT_SYMBOL(vfio_pin_pages);
1714
1715 /*
1716 * Unpin contiguous host pages for local domain only.
1717 * @device [in] : device
1718 * @iova [in] : starting address of user pages to be unpinned.
1719 * @npage [in] : count of pages to be unpinned. This count should not
1720 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1721 */
vfio_unpin_pages(struct vfio_device * device,dma_addr_t iova,int npage)1722 void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
1723 {
1724 if (WARN_ON(!vfio_assert_device_open(device)))
1725 return;
1726 if (WARN_ON(!device->ops->dma_unmap))
1727 return;
1728
1729 if (vfio_device_has_container(device)) {
1730 vfio_device_container_unpin_pages(device, iova, npage);
1731 return;
1732 }
1733 if (device->iommufd_access) {
1734 if (WARN_ON(iova > ULONG_MAX))
1735 return;
1736 iommufd_access_unpin_pages(device->iommufd_access,
1737 ALIGN_DOWN(iova, PAGE_SIZE),
1738 npage * PAGE_SIZE);
1739 return;
1740 }
1741 }
1742 EXPORT_SYMBOL(vfio_unpin_pages);
1743
1744 /*
1745 * This interface allows the CPUs to perform some sort of virtual DMA on
1746 * behalf of the device.
1747 *
1748 * CPUs read/write from/into a range of IOVAs pointing to user space memory
1749 * into/from a kernel buffer.
1750 *
1751 * As the read/write of user space memory is conducted via the CPUs and is
1752 * not a real device DMA, it is not necessary to pin the user space memory.
1753 *
1754 * @device [in] : VFIO device
1755 * @iova [in] : base IOVA of a user space buffer
1756 * @data [in] : pointer to kernel buffer
1757 * @len [in] : kernel buffer length
1758 * @write : indicate read or write
1759 * Return error code on failure or 0 on success.
1760 */
vfio_dma_rw(struct vfio_device * device,dma_addr_t iova,void * data,size_t len,bool write)1761 int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
1762 size_t len, bool write)
1763 {
1764 if (!data || len <= 0 || !vfio_assert_device_open(device))
1765 return -EINVAL;
1766
1767 if (vfio_device_has_container(device))
1768 return vfio_device_container_dma_rw(device, iova,
1769 data, len, write);
1770
1771 if (device->iommufd_access) {
1772 unsigned int flags = 0;
1773
1774 if (iova > ULONG_MAX)
1775 return -EINVAL;
1776
1777 /* VFIO historically tries to auto-detect a kthread */
1778 if (!current->mm)
1779 flags |= IOMMUFD_ACCESS_RW_KTHREAD;
1780 if (write)
1781 flags |= IOMMUFD_ACCESS_RW_WRITE;
1782 return iommufd_access_rw(device->iommufd_access, iova, data,
1783 len, flags);
1784 }
1785 return -EINVAL;
1786 }
1787 EXPORT_SYMBOL(vfio_dma_rw);
1788
1789 /*
1790 * Module/class support
1791 */
vfio_init(void)1792 static int __init vfio_init(void)
1793 {
1794 int ret;
1795
1796 ida_init(&vfio.device_ida);
1797
1798 ret = vfio_group_init();
1799 if (ret)
1800 return ret;
1801
1802 ret = vfio_virqfd_init();
1803 if (ret)
1804 goto err_virqfd;
1805
1806 /* /sys/class/vfio-dev/vfioX */
1807 vfio.device_class = class_create("vfio-dev");
1808 if (IS_ERR(vfio.device_class)) {
1809 ret = PTR_ERR(vfio.device_class);
1810 goto err_dev_class;
1811 }
1812
1813 ret = vfio_cdev_init(vfio.device_class);
1814 if (ret)
1815 goto err_alloc_dev_chrdev;
1816
1817 vfio_debugfs_create_root();
1818 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1819 return 0;
1820
1821 err_alloc_dev_chrdev:
1822 class_destroy(vfio.device_class);
1823 vfio.device_class = NULL;
1824 err_dev_class:
1825 vfio_virqfd_exit();
1826 err_virqfd:
1827 vfio_group_cleanup();
1828 return ret;
1829 }
1830
vfio_cleanup(void)1831 static void __exit vfio_cleanup(void)
1832 {
1833 vfio_debugfs_remove_root();
1834 ida_destroy(&vfio.device_ida);
1835 vfio_cdev_cleanup();
1836 class_destroy(vfio.device_class);
1837 vfio.device_class = NULL;
1838 vfio_virqfd_exit();
1839 vfio_group_cleanup();
1840 xa_destroy(&vfio_device_set_xa);
1841 }
1842
1843 module_init(vfio_init);
1844 module_exit(vfio_cleanup);
1845
1846 MODULE_IMPORT_NS("IOMMUFD");
1847 MODULE_VERSION(DRIVER_VERSION);
1848 MODULE_LICENSE("GPL v2");
1849 MODULE_AUTHOR(DRIVER_AUTHOR);
1850 MODULE_DESCRIPTION(DRIVER_DESC);
1851 MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
1852