1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3 */
4 #include <linux/iommu.h>
5 #include <linux/iommufd.h>
6 #include <linux/pci-ats.h>
7 #include <linux/slab.h>
8 #include <uapi/linux/iommufd.h>
9
10 #include "../iommu-priv.h"
11 #include "io_pagetable.h"
12 #include "iommufd_private.h"
13
14 static bool allow_unsafe_interrupts;
15 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
16 MODULE_PARM_DESC(
17 allow_unsafe_interrupts,
18 "Allow IOMMUFD to bind to devices even if the platform cannot isolate "
19 "the MSI interrupt window. Enabling this is a security weakness.");
20
21 struct iommufd_attach {
22 struct iommufd_hw_pagetable *hwpt;
23 struct xarray device_array;
24 };
25
iommufd_group_release(struct kref * kref)26 static void iommufd_group_release(struct kref *kref)
27 {
28 struct iommufd_group *igroup =
29 container_of(kref, struct iommufd_group, ref);
30
31 WARN_ON(!xa_empty(&igroup->pasid_attach));
32
33 xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
34 NULL, GFP_KERNEL);
35 iommu_group_put(igroup->group);
36 mutex_destroy(&igroup->lock);
37 kfree(igroup);
38 }
39
iommufd_put_group(struct iommufd_group * group)40 static void iommufd_put_group(struct iommufd_group *group)
41 {
42 kref_put(&group->ref, iommufd_group_release);
43 }
44
iommufd_group_try_get(struct iommufd_group * igroup,struct iommu_group * group)45 static bool iommufd_group_try_get(struct iommufd_group *igroup,
46 struct iommu_group *group)
47 {
48 if (!igroup)
49 return false;
50 /*
51 * group ID's cannot be re-used until the group is put back which does
52 * not happen if we could get an igroup pointer under the xa_lock.
53 */
54 if (WARN_ON(igroup->group != group))
55 return false;
56 return kref_get_unless_zero(&igroup->ref);
57 }
58
59 /*
60 * iommufd needs to store some more data for each iommu_group, we keep a
61 * parallel xarray indexed by iommu_group id to hold this instead of putting it
62 * in the core structure. To keep things simple the iommufd_group memory is
63 * unique within the iommufd_ctx. This makes it easy to check there are no
64 * memory leaks.
65 */
iommufd_get_group(struct iommufd_ctx * ictx,struct device * dev)66 static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
67 struct device *dev)
68 {
69 struct iommufd_group *new_igroup;
70 struct iommufd_group *cur_igroup;
71 struct iommufd_group *igroup;
72 struct iommu_group *group;
73 unsigned int id;
74
75 group = iommu_group_get(dev);
76 if (!group)
77 return ERR_PTR(-ENODEV);
78
79 id = iommu_group_id(group);
80
81 xa_lock(&ictx->groups);
82 igroup = xa_load(&ictx->groups, id);
83 if (iommufd_group_try_get(igroup, group)) {
84 xa_unlock(&ictx->groups);
85 iommu_group_put(group);
86 return igroup;
87 }
88 xa_unlock(&ictx->groups);
89
90 new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL);
91 if (!new_igroup) {
92 iommu_group_put(group);
93 return ERR_PTR(-ENOMEM);
94 }
95
96 kref_init(&new_igroup->ref);
97 mutex_init(&new_igroup->lock);
98 xa_init(&new_igroup->pasid_attach);
99 new_igroup->sw_msi_start = PHYS_ADDR_MAX;
100 /* group reference moves into new_igroup */
101 new_igroup->group = group;
102
103 /*
104 * The ictx is not additionally refcounted here becase all objects using
105 * an igroup must put it before their destroy completes.
106 */
107 new_igroup->ictx = ictx;
108
109 /*
110 * We dropped the lock so igroup is invalid. NULL is a safe and likely
111 * value to assume for the xa_cmpxchg algorithm.
112 */
113 cur_igroup = NULL;
114 xa_lock(&ictx->groups);
115 while (true) {
116 igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup,
117 GFP_KERNEL);
118 if (xa_is_err(igroup)) {
119 xa_unlock(&ictx->groups);
120 iommufd_put_group(new_igroup);
121 return ERR_PTR(xa_err(igroup));
122 }
123
124 /* new_group was successfully installed */
125 if (cur_igroup == igroup) {
126 xa_unlock(&ictx->groups);
127 return new_igroup;
128 }
129
130 /* Check again if the current group is any good */
131 if (iommufd_group_try_get(igroup, group)) {
132 xa_unlock(&ictx->groups);
133 iommufd_put_group(new_igroup);
134 return igroup;
135 }
136 cur_igroup = igroup;
137 }
138 }
139
iommufd_device_destroy(struct iommufd_object * obj)140 void iommufd_device_destroy(struct iommufd_object *obj)
141 {
142 struct iommufd_device *idev =
143 container_of(obj, struct iommufd_device, obj);
144
145 iommu_device_release_dma_owner(idev->dev);
146 iommufd_put_group(idev->igroup);
147 if (!iommufd_selftest_is_mock_dev(idev->dev))
148 iommufd_ctx_put(idev->ictx);
149 }
150
151 /**
152 * iommufd_device_bind - Bind a physical device to an iommu fd
153 * @ictx: iommufd file descriptor
154 * @dev: Pointer to a physical device struct
155 * @id: Output ID number to return to userspace for this device
156 *
157 * A successful bind establishes an ownership over the device and returns
158 * struct iommufd_device pointer, otherwise returns error pointer.
159 *
160 * A driver using this API must set driver_managed_dma and must not touch
161 * the device until this routine succeeds and establishes ownership.
162 *
163 * Binding a PCI device places the entire RID under iommufd control.
164 *
165 * The caller must undo this with iommufd_device_unbind()
166 */
iommufd_device_bind(struct iommufd_ctx * ictx,struct device * dev,u32 * id)167 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
168 struct device *dev, u32 *id)
169 {
170 struct iommufd_device *idev;
171 struct iommufd_group *igroup;
172 int rc;
173
174 /*
175 * iommufd always sets IOMMU_CACHE because we offer no way for userspace
176 * to restore cache coherency.
177 */
178 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
179 return ERR_PTR(-EINVAL);
180
181 igroup = iommufd_get_group(ictx, dev);
182 if (IS_ERR(igroup))
183 return ERR_CAST(igroup);
184
185 /*
186 * For historical compat with VFIO the insecure interrupt path is
187 * allowed if the module parameter is set. Secure/Isolated means that a
188 * MemWr operation from the device (eg a simple DMA) cannot trigger an
189 * interrupt outside this iommufd context.
190 */
191 if (!iommufd_selftest_is_mock_dev(dev) &&
192 !iommu_group_has_isolated_msi(igroup->group)) {
193 if (!allow_unsafe_interrupts) {
194 rc = -EPERM;
195 goto out_group_put;
196 }
197
198 dev_warn(
199 dev,
200 "MSI interrupts are not secure, they cannot be isolated by the platform. "
201 "Check that platform features like interrupt remapping are enabled. "
202 "Use the \"allow_unsafe_interrupts\" module parameter to override\n");
203 }
204
205 rc = iommu_device_claim_dma_owner(dev, ictx);
206 if (rc)
207 goto out_group_put;
208
209 idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE);
210 if (IS_ERR(idev)) {
211 rc = PTR_ERR(idev);
212 goto out_release_owner;
213 }
214 idev->ictx = ictx;
215 if (!iommufd_selftest_is_mock_dev(dev))
216 iommufd_ctx_get(ictx);
217 idev->dev = dev;
218 idev->enforce_cache_coherency =
219 device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
220 /* The calling driver is a user until iommufd_device_unbind() */
221 refcount_inc(&idev->obj.users);
222 /* igroup refcount moves into iommufd_device */
223 idev->igroup = igroup;
224 mutex_init(&idev->iopf_lock);
225
226 /*
227 * If the caller fails after this success it must call
228 * iommufd_unbind_device() which is safe since we hold this refcount.
229 * This also means the device is a leaf in the graph and no other object
230 * can take a reference on it.
231 */
232 iommufd_object_finalize(ictx, &idev->obj);
233 *id = idev->obj.id;
234 return idev;
235
236 out_release_owner:
237 iommu_device_release_dma_owner(dev);
238 out_group_put:
239 iommufd_put_group(igroup);
240 return ERR_PTR(rc);
241 }
242 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, "IOMMUFD");
243
244 /**
245 * iommufd_ctx_has_group - True if any device within the group is bound
246 * to the ictx
247 * @ictx: iommufd file descriptor
248 * @group: Pointer to a physical iommu_group struct
249 *
250 * True if any device within the group has been bound to this ictx, ex. via
251 * iommufd_device_bind(), therefore implying ictx ownership of the group.
252 */
iommufd_ctx_has_group(struct iommufd_ctx * ictx,struct iommu_group * group)253 bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group)
254 {
255 struct iommufd_object *obj;
256 unsigned long index;
257
258 if (!ictx || !group)
259 return false;
260
261 xa_lock(&ictx->objects);
262 xa_for_each(&ictx->objects, index, obj) {
263 if (obj->type == IOMMUFD_OBJ_DEVICE &&
264 container_of(obj, struct iommufd_device, obj)
265 ->igroup->group == group) {
266 xa_unlock(&ictx->objects);
267 return true;
268 }
269 }
270 xa_unlock(&ictx->objects);
271 return false;
272 }
273 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD");
274
275 /**
276 * iommufd_device_unbind - Undo iommufd_device_bind()
277 * @idev: Device returned by iommufd_device_bind()
278 *
279 * Release the device from iommufd control. The DMA ownership will return back
280 * to unowned with DMA controlled by the DMA API. This invalidates the
281 * iommufd_device pointer, other APIs that consume it must not be called
282 * concurrently.
283 */
iommufd_device_unbind(struct iommufd_device * idev)284 void iommufd_device_unbind(struct iommufd_device *idev)
285 {
286 iommufd_object_destroy_user(idev->ictx, &idev->obj);
287 }
288 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD");
289
iommufd_device_to_ictx(struct iommufd_device * idev)290 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
291 {
292 return idev->ictx;
293 }
294 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, "IOMMUFD");
295
iommufd_device_to_id(struct iommufd_device * idev)296 u32 iommufd_device_to_id(struct iommufd_device *idev)
297 {
298 return idev->obj.id;
299 }
300 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD");
301
iommufd_group_device_num(struct iommufd_group * igroup,ioasid_t pasid)302 static unsigned int iommufd_group_device_num(struct iommufd_group *igroup,
303 ioasid_t pasid)
304 {
305 struct iommufd_attach *attach;
306 struct iommufd_device *idev;
307 unsigned int count = 0;
308 unsigned long index;
309
310 lockdep_assert_held(&igroup->lock);
311
312 attach = xa_load(&igroup->pasid_attach, pasid);
313 if (attach)
314 xa_for_each(&attach->device_array, index, idev)
315 count++;
316 return count;
317 }
318
319 #ifdef CONFIG_IRQ_MSI_IOMMU
iommufd_group_setup_msi(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)320 static int iommufd_group_setup_msi(struct iommufd_group *igroup,
321 struct iommufd_hwpt_paging *hwpt_paging)
322 {
323 struct iommufd_ctx *ictx = igroup->ictx;
324 struct iommufd_sw_msi_map *cur;
325
326 if (igroup->sw_msi_start == PHYS_ADDR_MAX)
327 return 0;
328
329 /*
330 * Install all the MSI pages the device has been using into the domain
331 */
332 guard(mutex)(&ictx->sw_msi_lock);
333 list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
334 int rc;
335
336 if (cur->sw_msi_start != igroup->sw_msi_start ||
337 !test_bit(cur->id, igroup->required_sw_msi.bitmap))
338 continue;
339
340 rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur);
341 if (rc)
342 return rc;
343 }
344 return 0;
345 }
346 #else
347 static inline int
iommufd_group_setup_msi(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)348 iommufd_group_setup_msi(struct iommufd_group *igroup,
349 struct iommufd_hwpt_paging *hwpt_paging)
350 {
351 return 0;
352 }
353 #endif
354
355 static bool
iommufd_group_first_attach(struct iommufd_group * igroup,ioasid_t pasid)356 iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid)
357 {
358 lockdep_assert_held(&igroup->lock);
359 return !xa_load(&igroup->pasid_attach, pasid);
360 }
361
362 static int
iommufd_device_attach_reserved_iova(struct iommufd_device * idev,struct iommufd_hwpt_paging * hwpt_paging)363 iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
364 struct iommufd_hwpt_paging *hwpt_paging)
365 {
366 struct iommufd_group *igroup = idev->igroup;
367 int rc;
368
369 lockdep_assert_held(&igroup->lock);
370
371 rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
372 idev->dev,
373 &igroup->sw_msi_start);
374 if (rc)
375 return rc;
376
377 if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) {
378 rc = iommufd_group_setup_msi(igroup, hwpt_paging);
379 if (rc) {
380 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
381 idev->dev);
382 return rc;
383 }
384 }
385 return 0;
386 }
387
388 /* The device attach/detach/replace helpers for attach_handle */
389
iommufd_device_is_attached(struct iommufd_device * idev,ioasid_t pasid)390 static bool iommufd_device_is_attached(struct iommufd_device *idev,
391 ioasid_t pasid)
392 {
393 struct iommufd_attach *attach;
394
395 attach = xa_load(&idev->igroup->pasid_attach, pasid);
396 return xa_load(&attach->device_array, idev->obj.id);
397 }
398
iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev,ioasid_t pasid)399 static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt,
400 struct iommufd_device *idev,
401 ioasid_t pasid)
402 {
403 struct iommufd_group *igroup = idev->igroup;
404
405 lockdep_assert_held(&igroup->lock);
406
407 if (pasid == IOMMU_NO_PASID) {
408 unsigned long start = IOMMU_NO_PASID;
409
410 if (!hwpt->pasid_compat &&
411 xa_find_after(&igroup->pasid_attach,
412 &start, UINT_MAX, XA_PRESENT))
413 return -EINVAL;
414 } else {
415 struct iommufd_attach *attach;
416
417 if (!hwpt->pasid_compat)
418 return -EINVAL;
419
420 attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
421 if (attach && attach->hwpt && !attach->hwpt->pasid_compat)
422 return -EINVAL;
423 }
424
425 return 0;
426 }
427
iommufd_hwpt_attach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev,ioasid_t pasid)428 static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
429 struct iommufd_device *idev,
430 ioasid_t pasid)
431 {
432 struct iommufd_attach_handle *handle;
433 int rc;
434
435 rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
436 if (rc)
437 return rc;
438
439 handle = kzalloc(sizeof(*handle), GFP_KERNEL);
440 if (!handle)
441 return -ENOMEM;
442
443 if (hwpt->fault) {
444 rc = iommufd_fault_iopf_enable(idev);
445 if (rc)
446 goto out_free_handle;
447 }
448
449 handle->idev = idev;
450 if (pasid == IOMMU_NO_PASID)
451 rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
452 &handle->handle);
453 else
454 rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid,
455 &handle->handle);
456 if (rc)
457 goto out_disable_iopf;
458
459 return 0;
460
461 out_disable_iopf:
462 if (hwpt->fault)
463 iommufd_fault_iopf_disable(idev);
464 out_free_handle:
465 kfree(handle);
466 return rc;
467 }
468
469 static struct iommufd_attach_handle *
iommufd_device_get_attach_handle(struct iommufd_device * idev,ioasid_t pasid)470 iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid)
471 {
472 struct iommu_attach_handle *handle;
473
474 lockdep_assert_held(&idev->igroup->lock);
475
476 handle =
477 iommu_attach_handle_get(idev->igroup->group, pasid, 0);
478 if (IS_ERR(handle))
479 return NULL;
480 return to_iommufd_handle(handle);
481 }
482
iommufd_hwpt_detach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev,ioasid_t pasid)483 static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
484 struct iommufd_device *idev,
485 ioasid_t pasid)
486 {
487 struct iommufd_attach_handle *handle;
488
489 handle = iommufd_device_get_attach_handle(idev, pasid);
490 if (pasid == IOMMU_NO_PASID)
491 iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
492 else
493 iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid);
494
495 if (hwpt->fault) {
496 iommufd_auto_response_faults(hwpt, handle);
497 iommufd_fault_iopf_disable(idev);
498 }
499 kfree(handle);
500 }
501
iommufd_hwpt_replace_device(struct iommufd_device * idev,ioasid_t pasid,struct iommufd_hw_pagetable * hwpt,struct iommufd_hw_pagetable * old)502 static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
503 ioasid_t pasid,
504 struct iommufd_hw_pagetable *hwpt,
505 struct iommufd_hw_pagetable *old)
506 {
507 struct iommufd_attach_handle *handle, *old_handle;
508 int rc;
509
510 rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
511 if (rc)
512 return rc;
513
514 old_handle = iommufd_device_get_attach_handle(idev, pasid);
515
516 handle = kzalloc(sizeof(*handle), GFP_KERNEL);
517 if (!handle)
518 return -ENOMEM;
519
520 if (hwpt->fault && !old->fault) {
521 rc = iommufd_fault_iopf_enable(idev);
522 if (rc)
523 goto out_free_handle;
524 }
525
526 handle->idev = idev;
527 if (pasid == IOMMU_NO_PASID)
528 rc = iommu_replace_group_handle(idev->igroup->group,
529 hwpt->domain, &handle->handle);
530 else
531 rc = iommu_replace_device_pasid(hwpt->domain, idev->dev,
532 pasid, &handle->handle);
533 if (rc)
534 goto out_disable_iopf;
535
536 if (old->fault) {
537 iommufd_auto_response_faults(hwpt, old_handle);
538 if (!hwpt->fault)
539 iommufd_fault_iopf_disable(idev);
540 }
541 kfree(old_handle);
542
543 return 0;
544
545 out_disable_iopf:
546 if (hwpt->fault && !old->fault)
547 iommufd_fault_iopf_disable(idev);
548 out_free_handle:
549 kfree(handle);
550 return rc;
551 }
552
iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev,ioasid_t pasid)553 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
554 struct iommufd_device *idev, ioasid_t pasid)
555 {
556 struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
557 bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
558 struct iommufd_group *igroup = idev->igroup;
559 struct iommufd_hw_pagetable *old_hwpt;
560 struct iommufd_attach *attach;
561 int rc;
562
563 mutex_lock(&igroup->lock);
564
565 attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL,
566 XA_ZERO_ENTRY, GFP_KERNEL);
567 if (xa_is_err(attach)) {
568 rc = xa_err(attach);
569 goto err_unlock;
570 }
571
572 if (!attach) {
573 attach = kzalloc(sizeof(*attach), GFP_KERNEL);
574 if (!attach) {
575 rc = -ENOMEM;
576 goto err_release_pasid;
577 }
578 xa_init(&attach->device_array);
579 }
580
581 old_hwpt = attach->hwpt;
582
583 rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY,
584 GFP_KERNEL);
585 if (rc) {
586 WARN_ON(rc == -EBUSY && !old_hwpt);
587 goto err_free_attach;
588 }
589
590 if (old_hwpt && old_hwpt != hwpt) {
591 rc = -EINVAL;
592 goto err_release_devid;
593 }
594
595 if (attach_resv) {
596 rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging);
597 if (rc)
598 goto err_release_devid;
599 }
600
601 /*
602 * Only attach to the group once for the first device that is in the
603 * group. All the other devices will follow this attachment. The user
604 * should attach every device individually to the hwpt as the per-device
605 * reserved regions are only updated during individual device
606 * attachment.
607 */
608 if (iommufd_group_first_attach(igroup, pasid)) {
609 rc = iommufd_hwpt_attach_device(hwpt, idev, pasid);
610 if (rc)
611 goto err_unresv;
612 attach->hwpt = hwpt;
613 WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach,
614 GFP_KERNEL)));
615 }
616 refcount_inc(&hwpt->obj.users);
617 WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id,
618 idev, GFP_KERNEL)));
619 mutex_unlock(&igroup->lock);
620 return 0;
621 err_unresv:
622 if (attach_resv)
623 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
624 err_release_devid:
625 xa_release(&attach->device_array, idev->obj.id);
626 err_free_attach:
627 if (iommufd_group_first_attach(igroup, pasid))
628 kfree(attach);
629 err_release_pasid:
630 if (iommufd_group_first_attach(igroup, pasid))
631 xa_release(&igroup->pasid_attach, pasid);
632 err_unlock:
633 mutex_unlock(&igroup->lock);
634 return rc;
635 }
636
637 struct iommufd_hw_pagetable *
iommufd_hw_pagetable_detach(struct iommufd_device * idev,ioasid_t pasid)638 iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid)
639 {
640 struct iommufd_group *igroup = idev->igroup;
641 struct iommufd_hwpt_paging *hwpt_paging;
642 struct iommufd_hw_pagetable *hwpt;
643 struct iommufd_attach *attach;
644
645 mutex_lock(&igroup->lock);
646 attach = xa_load(&igroup->pasid_attach, pasid);
647 if (!attach) {
648 mutex_unlock(&igroup->lock);
649 return NULL;
650 }
651
652 hwpt = attach->hwpt;
653 hwpt_paging = find_hwpt_paging(hwpt);
654
655 xa_erase(&attach->device_array, idev->obj.id);
656 if (xa_empty(&attach->device_array)) {
657 iommufd_hwpt_detach_device(hwpt, idev, pasid);
658 xa_erase(&igroup->pasid_attach, pasid);
659 kfree(attach);
660 }
661 if (hwpt_paging && pasid == IOMMU_NO_PASID)
662 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
663 mutex_unlock(&igroup->lock);
664
665 /* Caller must destroy hwpt */
666 return hwpt;
667 }
668
669 static struct iommufd_hw_pagetable *
iommufd_device_do_attach(struct iommufd_device * idev,ioasid_t pasid,struct iommufd_hw_pagetable * hwpt)670 iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid,
671 struct iommufd_hw_pagetable *hwpt)
672 {
673 int rc;
674
675 rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid);
676 if (rc)
677 return ERR_PTR(rc);
678 return NULL;
679 }
680
681 static void
iommufd_group_remove_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)682 iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
683 struct iommufd_hwpt_paging *hwpt_paging)
684 {
685 struct iommufd_attach *attach;
686 struct iommufd_device *cur;
687 unsigned long index;
688
689 lockdep_assert_held(&igroup->lock);
690
691 attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
692 xa_for_each(&attach->device_array, index, cur)
693 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
694 }
695
696 static int
iommufd_group_do_replace_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)697 iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
698 struct iommufd_hwpt_paging *hwpt_paging)
699 {
700 struct iommufd_hwpt_paging *old_hwpt_paging;
701 struct iommufd_attach *attach;
702 struct iommufd_device *cur;
703 unsigned long index;
704 int rc;
705
706 lockdep_assert_held(&igroup->lock);
707
708 attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
709 old_hwpt_paging = find_hwpt_paging(attach->hwpt);
710 if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) {
711 xa_for_each(&attach->device_array, index, cur) {
712 rc = iopt_table_enforce_dev_resv_regions(
713 &hwpt_paging->ioas->iopt, cur->dev, NULL);
714 if (rc)
715 goto err_unresv;
716 }
717 }
718
719 rc = iommufd_group_setup_msi(igroup, hwpt_paging);
720 if (rc)
721 goto err_unresv;
722 return 0;
723
724 err_unresv:
725 iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
726 return rc;
727 }
728
729 static struct iommufd_hw_pagetable *
iommufd_device_do_replace(struct iommufd_device * idev,ioasid_t pasid,struct iommufd_hw_pagetable * hwpt)730 iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid,
731 struct iommufd_hw_pagetable *hwpt)
732 {
733 struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
734 bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
735 struct iommufd_hwpt_paging *old_hwpt_paging;
736 struct iommufd_group *igroup = idev->igroup;
737 struct iommufd_hw_pagetable *old_hwpt;
738 struct iommufd_attach *attach;
739 unsigned int num_devices;
740 int rc;
741
742 mutex_lock(&igroup->lock);
743
744 attach = xa_load(&igroup->pasid_attach, pasid);
745 if (!attach) {
746 rc = -EINVAL;
747 goto err_unlock;
748 }
749
750 old_hwpt = attach->hwpt;
751
752 WARN_ON(!old_hwpt || xa_empty(&attach->device_array));
753
754 if (!iommufd_device_is_attached(idev, pasid)) {
755 rc = -EINVAL;
756 goto err_unlock;
757 }
758
759 if (hwpt == old_hwpt) {
760 mutex_unlock(&igroup->lock);
761 return NULL;
762 }
763
764 if (attach_resv) {
765 rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging);
766 if (rc)
767 goto err_unlock;
768 }
769
770 rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt);
771 if (rc)
772 goto err_unresv;
773
774 old_hwpt_paging = find_hwpt_paging(old_hwpt);
775 if (old_hwpt_paging && pasid == IOMMU_NO_PASID &&
776 (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas))
777 iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging);
778
779 attach->hwpt = hwpt;
780
781 num_devices = iommufd_group_device_num(igroup, pasid);
782 /*
783 * Move the refcounts held by the device_array to the new hwpt. Retain a
784 * refcount for this thread as the caller will free it.
785 */
786 refcount_add(num_devices, &hwpt->obj.users);
787 if (num_devices > 1)
788 WARN_ON(refcount_sub_and_test(num_devices - 1,
789 &old_hwpt->obj.users));
790 mutex_unlock(&igroup->lock);
791
792 /* Caller must destroy old_hwpt */
793 return old_hwpt;
794 err_unresv:
795 if (attach_resv)
796 iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
797 err_unlock:
798 mutex_unlock(&igroup->lock);
799 return ERR_PTR(rc);
800 }
801
802 typedef struct iommufd_hw_pagetable *(*attach_fn)(
803 struct iommufd_device *idev, ioasid_t pasid,
804 struct iommufd_hw_pagetable *hwpt);
805
806 /*
807 * When automatically managing the domains we search for a compatible domain in
808 * the iopt and if one is found use it, otherwise create a new domain.
809 * Automatic domain selection will never pick a manually created domain.
810 */
811 static struct iommufd_hw_pagetable *
iommufd_device_auto_get_domain(struct iommufd_device * idev,ioasid_t pasid,struct iommufd_ioas * ioas,u32 * pt_id,attach_fn do_attach)812 iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid,
813 struct iommufd_ioas *ioas, u32 *pt_id,
814 attach_fn do_attach)
815 {
816 /*
817 * iommufd_hw_pagetable_attach() is called by
818 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as
819 * iommufd_device_do_attach(). So if we are in this mode then we prefer
820 * to use the immediate_attach path as it supports drivers that can't
821 * directly allocate a domain.
822 */
823 bool immediate_attach = do_attach == iommufd_device_do_attach;
824 struct iommufd_hw_pagetable *destroy_hwpt;
825 struct iommufd_hwpt_paging *hwpt_paging;
826 struct iommufd_hw_pagetable *hwpt;
827
828 /*
829 * There is no differentiation when domains are allocated, so any domain
830 * that is willing to attach to the device is interchangeable with any
831 * other.
832 */
833 mutex_lock(&ioas->mutex);
834 list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) {
835 if (!hwpt_paging->auto_domain)
836 continue;
837
838 hwpt = &hwpt_paging->common;
839 if (!iommufd_lock_obj(&hwpt->obj))
840 continue;
841 destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
842 if (IS_ERR(destroy_hwpt)) {
843 iommufd_put_object(idev->ictx, &hwpt->obj);
844 /*
845 * -EINVAL means the domain is incompatible with the
846 * device. Other error codes should propagate to
847 * userspace as failure. Success means the domain is
848 * attached.
849 */
850 if (PTR_ERR(destroy_hwpt) == -EINVAL)
851 continue;
852 goto out_unlock;
853 }
854 *pt_id = hwpt->obj.id;
855 iommufd_put_object(idev->ictx, &hwpt->obj);
856 goto out_unlock;
857 }
858
859 hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid,
860 0, immediate_attach, NULL);
861 if (IS_ERR(hwpt_paging)) {
862 destroy_hwpt = ERR_CAST(hwpt_paging);
863 goto out_unlock;
864 }
865 hwpt = &hwpt_paging->common;
866
867 if (!immediate_attach) {
868 destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
869 if (IS_ERR(destroy_hwpt))
870 goto out_abort;
871 } else {
872 destroy_hwpt = NULL;
873 }
874
875 hwpt_paging->auto_domain = true;
876 *pt_id = hwpt->obj.id;
877
878 iommufd_object_finalize(idev->ictx, &hwpt->obj);
879 mutex_unlock(&ioas->mutex);
880 return destroy_hwpt;
881
882 out_abort:
883 iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
884 out_unlock:
885 mutex_unlock(&ioas->mutex);
886 return destroy_hwpt;
887 }
888
iommufd_device_change_pt(struct iommufd_device * idev,ioasid_t pasid,u32 * pt_id,attach_fn do_attach)889 static int iommufd_device_change_pt(struct iommufd_device *idev,
890 ioasid_t pasid,
891 u32 *pt_id, attach_fn do_attach)
892 {
893 struct iommufd_hw_pagetable *destroy_hwpt;
894 struct iommufd_object *pt_obj;
895
896 pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
897 if (IS_ERR(pt_obj))
898 return PTR_ERR(pt_obj);
899
900 switch (pt_obj->type) {
901 case IOMMUFD_OBJ_HWPT_NESTED:
902 case IOMMUFD_OBJ_HWPT_PAGING: {
903 struct iommufd_hw_pagetable *hwpt =
904 container_of(pt_obj, struct iommufd_hw_pagetable, obj);
905
906 destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
907 if (IS_ERR(destroy_hwpt))
908 goto out_put_pt_obj;
909 break;
910 }
911 case IOMMUFD_OBJ_IOAS: {
912 struct iommufd_ioas *ioas =
913 container_of(pt_obj, struct iommufd_ioas, obj);
914
915 destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas,
916 pt_id, do_attach);
917 if (IS_ERR(destroy_hwpt))
918 goto out_put_pt_obj;
919 break;
920 }
921 default:
922 destroy_hwpt = ERR_PTR(-EINVAL);
923 goto out_put_pt_obj;
924 }
925 iommufd_put_object(idev->ictx, pt_obj);
926
927 /* This destruction has to be after we unlock everything */
928 if (destroy_hwpt)
929 iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt);
930 return 0;
931
932 out_put_pt_obj:
933 iommufd_put_object(idev->ictx, pt_obj);
934 return PTR_ERR(destroy_hwpt);
935 }
936
937 /**
938 * iommufd_device_attach - Connect a device/pasid to an iommu_domain
939 * @idev: device to attach
940 * @pasid: pasid to attach
941 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
942 * Output the IOMMUFD_OBJ_HWPT_PAGING ID
943 *
944 * This connects the device/pasid to an iommu_domain, either automatically
945 * or manually selected. Once this completes the device could do DMA with
946 * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage.
947 *
948 * The caller should return the resulting pt_id back to userspace.
949 * This function is undone by calling iommufd_device_detach().
950 */
iommufd_device_attach(struct iommufd_device * idev,ioasid_t pasid,u32 * pt_id)951 int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid,
952 u32 *pt_id)
953 {
954 int rc;
955
956 rc = iommufd_device_change_pt(idev, pasid, pt_id,
957 &iommufd_device_do_attach);
958 if (rc)
959 return rc;
960
961 /*
962 * Pairs with iommufd_device_detach() - catches caller bugs attempting
963 * to destroy a device with an attachment.
964 */
965 refcount_inc(&idev->obj.users);
966 return 0;
967 }
968 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
969
970 /**
971 * iommufd_device_replace - Change the device/pasid's iommu_domain
972 * @idev: device to change
973 * @pasid: pasid to change
974 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
975 * Output the IOMMUFD_OBJ_HWPT_PAGING ID
976 *
977 * This is the same as::
978 *
979 * iommufd_device_detach();
980 * iommufd_device_attach();
981 *
982 * If it fails then no change is made to the attachment. The iommu driver may
983 * implement this so there is no disruption in translation. This can only be
984 * called if iommufd_device_attach() has already succeeded. @pasid is
985 * IOMMU_NO_PASID for no pasid usage.
986 */
iommufd_device_replace(struct iommufd_device * idev,ioasid_t pasid,u32 * pt_id)987 int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid,
988 u32 *pt_id)
989 {
990 return iommufd_device_change_pt(idev, pasid, pt_id,
991 &iommufd_device_do_replace);
992 }
993 EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD");
994
995 /**
996 * iommufd_device_detach - Disconnect a device/device to an iommu_domain
997 * @idev: device to detach
998 * @pasid: pasid to detach
999 *
1000 * Undo iommufd_device_attach(). This disconnects the idev from the previously
1001 * attached pt_id. The device returns back to a blocked DMA translation.
1002 * @pasid is IOMMU_NO_PASID for no pasid usage.
1003 */
iommufd_device_detach(struct iommufd_device * idev,ioasid_t pasid)1004 void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid)
1005 {
1006 struct iommufd_hw_pagetable *hwpt;
1007
1008 hwpt = iommufd_hw_pagetable_detach(idev, pasid);
1009 if (!hwpt)
1010 return;
1011 iommufd_hw_pagetable_put(idev->ictx, hwpt);
1012 refcount_dec(&idev->obj.users);
1013 }
1014 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, "IOMMUFD");
1015
1016 /*
1017 * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at
1018 * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should
1019 * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas.
1020 */
iommufd_access_change_ioas(struct iommufd_access * access,struct iommufd_ioas * new_ioas)1021 static int iommufd_access_change_ioas(struct iommufd_access *access,
1022 struct iommufd_ioas *new_ioas)
1023 {
1024 u32 iopt_access_list_id = access->iopt_access_list_id;
1025 struct iommufd_ioas *cur_ioas = access->ioas;
1026 int rc;
1027
1028 lockdep_assert_held(&access->ioas_lock);
1029
1030 /* We are racing with a concurrent detach, bail */
1031 if (cur_ioas != access->ioas_unpin)
1032 return -EBUSY;
1033
1034 if (cur_ioas == new_ioas)
1035 return 0;
1036
1037 /*
1038 * Set ioas to NULL to block any further iommufd_access_pin_pages().
1039 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
1040 */
1041 access->ioas = NULL;
1042
1043 if (new_ioas) {
1044 rc = iopt_add_access(&new_ioas->iopt, access);
1045 if (rc) {
1046 access->ioas = cur_ioas;
1047 return rc;
1048 }
1049 refcount_inc(&new_ioas->obj.users);
1050 }
1051
1052 if (cur_ioas) {
1053 if (access->ops->unmap) {
1054 mutex_unlock(&access->ioas_lock);
1055 access->ops->unmap(access->data, 0, ULONG_MAX);
1056 mutex_lock(&access->ioas_lock);
1057 }
1058 iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id);
1059 refcount_dec(&cur_ioas->obj.users);
1060 }
1061
1062 access->ioas = new_ioas;
1063 access->ioas_unpin = new_ioas;
1064
1065 return 0;
1066 }
1067
iommufd_access_change_ioas_id(struct iommufd_access * access,u32 id)1068 static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
1069 {
1070 struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id);
1071 int rc;
1072
1073 if (IS_ERR(ioas))
1074 return PTR_ERR(ioas);
1075 rc = iommufd_access_change_ioas(access, ioas);
1076 iommufd_put_object(access->ictx, &ioas->obj);
1077 return rc;
1078 }
1079
iommufd_access_destroy_object(struct iommufd_object * obj)1080 void iommufd_access_destroy_object(struct iommufd_object *obj)
1081 {
1082 struct iommufd_access *access =
1083 container_of(obj, struct iommufd_access, obj);
1084
1085 mutex_lock(&access->ioas_lock);
1086 if (access->ioas)
1087 WARN_ON(iommufd_access_change_ioas(access, NULL));
1088 mutex_unlock(&access->ioas_lock);
1089 iommufd_ctx_put(access->ictx);
1090 }
1091
1092 /**
1093 * iommufd_access_create - Create an iommufd_access
1094 * @ictx: iommufd file descriptor
1095 * @ops: Driver's ops to associate with the access
1096 * @data: Opaque data to pass into ops functions
1097 * @id: Output ID number to return to userspace for this access
1098 *
1099 * An iommufd_access allows a driver to read/write to the IOAS without using
1100 * DMA. The underlying CPU memory can be accessed using the
1101 * iommufd_access_pin_pages() or iommufd_access_rw() functions.
1102 *
1103 * The provided ops are required to use iommufd_access_pin_pages().
1104 */
1105 struct iommufd_access *
iommufd_access_create(struct iommufd_ctx * ictx,const struct iommufd_access_ops * ops,void * data,u32 * id)1106 iommufd_access_create(struct iommufd_ctx *ictx,
1107 const struct iommufd_access_ops *ops, void *data, u32 *id)
1108 {
1109 struct iommufd_access *access;
1110
1111 /*
1112 * There is no uAPI for the access object, but to keep things symmetric
1113 * use the object infrastructure anyhow.
1114 */
1115 access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
1116 if (IS_ERR(access))
1117 return access;
1118
1119 access->data = data;
1120 access->ops = ops;
1121
1122 if (ops->needs_pin_pages)
1123 access->iova_alignment = PAGE_SIZE;
1124 else
1125 access->iova_alignment = 1;
1126
1127 /* The calling driver is a user until iommufd_access_destroy() */
1128 refcount_inc(&access->obj.users);
1129 access->ictx = ictx;
1130 iommufd_ctx_get(ictx);
1131 iommufd_object_finalize(ictx, &access->obj);
1132 *id = access->obj.id;
1133 mutex_init(&access->ioas_lock);
1134 return access;
1135 }
1136 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD");
1137
1138 /**
1139 * iommufd_access_destroy - Destroy an iommufd_access
1140 * @access: The access to destroy
1141 *
1142 * The caller must stop using the access before destroying it.
1143 */
iommufd_access_destroy(struct iommufd_access * access)1144 void iommufd_access_destroy(struct iommufd_access *access)
1145 {
1146 iommufd_object_destroy_user(access->ictx, &access->obj);
1147 }
1148 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, "IOMMUFD");
1149
iommufd_access_detach(struct iommufd_access * access)1150 void iommufd_access_detach(struct iommufd_access *access)
1151 {
1152 mutex_lock(&access->ioas_lock);
1153 if (WARN_ON(!access->ioas)) {
1154 mutex_unlock(&access->ioas_lock);
1155 return;
1156 }
1157 WARN_ON(iommufd_access_change_ioas(access, NULL));
1158 mutex_unlock(&access->ioas_lock);
1159 }
1160 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, "IOMMUFD");
1161
iommufd_access_attach(struct iommufd_access * access,u32 ioas_id)1162 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
1163 {
1164 int rc;
1165
1166 mutex_lock(&access->ioas_lock);
1167 if (WARN_ON(access->ioas)) {
1168 mutex_unlock(&access->ioas_lock);
1169 return -EINVAL;
1170 }
1171
1172 rc = iommufd_access_change_ioas_id(access, ioas_id);
1173 mutex_unlock(&access->ioas_lock);
1174 return rc;
1175 }
1176 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD");
1177
iommufd_access_replace(struct iommufd_access * access,u32 ioas_id)1178 int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
1179 {
1180 int rc;
1181
1182 mutex_lock(&access->ioas_lock);
1183 if (!access->ioas) {
1184 mutex_unlock(&access->ioas_lock);
1185 return -ENOENT;
1186 }
1187 rc = iommufd_access_change_ioas_id(access, ioas_id);
1188 mutex_unlock(&access->ioas_lock);
1189 return rc;
1190 }
1191 EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, "IOMMUFD");
1192
1193 /**
1194 * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
1195 * @iopt: iopt to work on
1196 * @iova: Starting iova in the iopt
1197 * @length: Number of bytes
1198 *
1199 * After this function returns there should be no users attached to the pages
1200 * linked to this iopt that intersect with iova,length. Anyone that has attached
1201 * a user through iopt_access_pages() needs to detach it through
1202 * iommufd_access_unpin_pages() before this function returns.
1203 *
1204 * iommufd_access_destroy() will wait for any outstanding unmap callback to
1205 * complete. Once iommufd_access_destroy() no unmap ops are running or will
1206 * run in the future. Due to this a driver must not create locking that prevents
1207 * unmap to complete while iommufd_access_destroy() is running.
1208 */
iommufd_access_notify_unmap(struct io_pagetable * iopt,unsigned long iova,unsigned long length)1209 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
1210 unsigned long length)
1211 {
1212 struct iommufd_ioas *ioas =
1213 container_of(iopt, struct iommufd_ioas, iopt);
1214 struct iommufd_access *access;
1215 unsigned long index;
1216
1217 xa_lock(&ioas->iopt.access_list);
1218 xa_for_each(&ioas->iopt.access_list, index, access) {
1219 if (!iommufd_lock_obj(&access->obj))
1220 continue;
1221 xa_unlock(&ioas->iopt.access_list);
1222
1223 access->ops->unmap(access->data, iova, length);
1224
1225 iommufd_put_object(access->ictx, &access->obj);
1226 xa_lock(&ioas->iopt.access_list);
1227 }
1228 xa_unlock(&ioas->iopt.access_list);
1229 }
1230
1231 /**
1232 * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages
1233 * @access: IOAS access to act on
1234 * @iova: Starting IOVA
1235 * @length: Number of bytes to access
1236 *
1237 * Return the struct page's. The caller must stop accessing them before calling
1238 * this. The iova/length must exactly match the one provided to access_pages.
1239 */
iommufd_access_unpin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length)1240 void iommufd_access_unpin_pages(struct iommufd_access *access,
1241 unsigned long iova, unsigned long length)
1242 {
1243 struct iopt_area_contig_iter iter;
1244 struct io_pagetable *iopt;
1245 unsigned long last_iova;
1246 struct iopt_area *area;
1247
1248 if (WARN_ON(!length) ||
1249 WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
1250 return;
1251
1252 mutex_lock(&access->ioas_lock);
1253 /*
1254 * The driver must be doing something wrong if it calls this before an
1255 * iommufd_access_attach() or after an iommufd_access_detach().
1256 */
1257 if (WARN_ON(!access->ioas_unpin)) {
1258 mutex_unlock(&access->ioas_lock);
1259 return;
1260 }
1261 iopt = &access->ioas_unpin->iopt;
1262
1263 down_read(&iopt->iova_rwsem);
1264 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1265 iopt_area_remove_access(
1266 area, iopt_area_iova_to_index(area, iter.cur_iova),
1267 iopt_area_iova_to_index(
1268 area,
1269 min(last_iova, iopt_area_last_iova(area))));
1270 WARN_ON(!iopt_area_contig_done(&iter));
1271 up_read(&iopt->iova_rwsem);
1272 mutex_unlock(&access->ioas_lock);
1273 }
1274 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, "IOMMUFD");
1275
iopt_area_contig_is_aligned(struct iopt_area_contig_iter * iter)1276 static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
1277 {
1278 if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
1279 return false;
1280
1281 if (!iopt_area_contig_done(iter) &&
1282 (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
1283 PAGE_SIZE) != (PAGE_SIZE - 1))
1284 return false;
1285 return true;
1286 }
1287
check_area_prot(struct iopt_area * area,unsigned int flags)1288 static bool check_area_prot(struct iopt_area *area, unsigned int flags)
1289 {
1290 if (flags & IOMMUFD_ACCESS_RW_WRITE)
1291 return area->iommu_prot & IOMMU_WRITE;
1292 return area->iommu_prot & IOMMU_READ;
1293 }
1294
1295 /**
1296 * iommufd_access_pin_pages() - Return a list of pages under the iova
1297 * @access: IOAS access to act on
1298 * @iova: Starting IOVA
1299 * @length: Number of bytes to access
1300 * @out_pages: Output page list
1301 * @flags: IOPMMUFD_ACCESS_RW_* flags
1302 *
1303 * Reads @length bytes starting at iova and returns the struct page * pointers.
1304 * These can be kmap'd by the caller for CPU access.
1305 *
1306 * The caller must perform iommufd_access_unpin_pages() when done to balance
1307 * this.
1308 *
1309 * This API always requires a page aligned iova. This happens naturally if the
1310 * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However
1311 * smaller alignments have corner cases where this API can fail on otherwise
1312 * aligned iova.
1313 */
iommufd_access_pin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length,struct page ** out_pages,unsigned int flags)1314 int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
1315 unsigned long length, struct page **out_pages,
1316 unsigned int flags)
1317 {
1318 struct iopt_area_contig_iter iter;
1319 struct io_pagetable *iopt;
1320 unsigned long last_iova;
1321 struct iopt_area *area;
1322 int rc;
1323
1324 /* Driver's ops don't support pin_pages */
1325 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
1326 WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
1327 return -EINVAL;
1328
1329 if (!length)
1330 return -EINVAL;
1331 if (check_add_overflow(iova, length - 1, &last_iova))
1332 return -EOVERFLOW;
1333
1334 mutex_lock(&access->ioas_lock);
1335 if (!access->ioas) {
1336 mutex_unlock(&access->ioas_lock);
1337 return -ENOENT;
1338 }
1339 iopt = &access->ioas->iopt;
1340
1341 down_read(&iopt->iova_rwsem);
1342 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1343 unsigned long last = min(last_iova, iopt_area_last_iova(area));
1344 unsigned long last_index = iopt_area_iova_to_index(area, last);
1345 unsigned long index =
1346 iopt_area_iova_to_index(area, iter.cur_iova);
1347
1348 if (area->prevent_access ||
1349 !iopt_area_contig_is_aligned(&iter)) {
1350 rc = -EINVAL;
1351 goto err_remove;
1352 }
1353
1354 if (!check_area_prot(area, flags)) {
1355 rc = -EPERM;
1356 goto err_remove;
1357 }
1358
1359 rc = iopt_area_add_access(area, index, last_index, out_pages,
1360 flags);
1361 if (rc)
1362 goto err_remove;
1363 out_pages += last_index - index + 1;
1364 }
1365 if (!iopt_area_contig_done(&iter)) {
1366 rc = -ENOENT;
1367 goto err_remove;
1368 }
1369
1370 up_read(&iopt->iova_rwsem);
1371 mutex_unlock(&access->ioas_lock);
1372 return 0;
1373
1374 err_remove:
1375 if (iova < iter.cur_iova) {
1376 last_iova = iter.cur_iova - 1;
1377 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1378 iopt_area_remove_access(
1379 area,
1380 iopt_area_iova_to_index(area, iter.cur_iova),
1381 iopt_area_iova_to_index(
1382 area, min(last_iova,
1383 iopt_area_last_iova(area))));
1384 }
1385 up_read(&iopt->iova_rwsem);
1386 mutex_unlock(&access->ioas_lock);
1387 return rc;
1388 }
1389 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, "IOMMUFD");
1390
1391 /**
1392 * iommufd_access_rw - Read or write data under the iova
1393 * @access: IOAS access to act on
1394 * @iova: Starting IOVA
1395 * @data: Kernel buffer to copy to/from
1396 * @length: Number of bytes to access
1397 * @flags: IOMMUFD_ACCESS_RW_* flags
1398 *
1399 * Copy kernel to/from data into the range given by IOVA/length. If flags
1400 * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized
1401 * by changing it into copy_to/from_user().
1402 */
iommufd_access_rw(struct iommufd_access * access,unsigned long iova,void * data,size_t length,unsigned int flags)1403 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
1404 void *data, size_t length, unsigned int flags)
1405 {
1406 struct iopt_area_contig_iter iter;
1407 struct io_pagetable *iopt;
1408 struct iopt_area *area;
1409 unsigned long last_iova;
1410 int rc = -EINVAL;
1411
1412 if (!length)
1413 return -EINVAL;
1414 if (check_add_overflow(iova, length - 1, &last_iova))
1415 return -EOVERFLOW;
1416
1417 mutex_lock(&access->ioas_lock);
1418 if (!access->ioas) {
1419 mutex_unlock(&access->ioas_lock);
1420 return -ENOENT;
1421 }
1422 iopt = &access->ioas->iopt;
1423
1424 down_read(&iopt->iova_rwsem);
1425 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1426 unsigned long last = min(last_iova, iopt_area_last_iova(area));
1427 unsigned long bytes = (last - iter.cur_iova) + 1;
1428
1429 if (area->prevent_access) {
1430 rc = -EINVAL;
1431 goto err_out;
1432 }
1433
1434 if (!check_area_prot(area, flags)) {
1435 rc = -EPERM;
1436 goto err_out;
1437 }
1438
1439 rc = iopt_pages_rw_access(
1440 area->pages, iopt_area_start_byte(area, iter.cur_iova),
1441 data, bytes, flags);
1442 if (rc)
1443 goto err_out;
1444 data += bytes;
1445 }
1446 if (!iopt_area_contig_done(&iter))
1447 rc = -ENOENT;
1448 err_out:
1449 up_read(&iopt->iova_rwsem);
1450 mutex_unlock(&access->ioas_lock);
1451 return rc;
1452 }
1453 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD");
1454
iommufd_get_hw_info(struct iommufd_ucmd * ucmd)1455 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
1456 {
1457 struct iommu_hw_info *cmd = ucmd->cmd;
1458 void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr);
1459 const struct iommu_ops *ops;
1460 struct iommufd_device *idev;
1461 unsigned int data_len;
1462 unsigned int copy_len;
1463 void *data;
1464 int rc;
1465
1466 if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
1467 cmd->__reserved[2])
1468 return -EOPNOTSUPP;
1469
1470 idev = iommufd_get_device(ucmd, cmd->dev_id);
1471 if (IS_ERR(idev))
1472 return PTR_ERR(idev);
1473
1474 ops = dev_iommu_ops(idev->dev);
1475 if (ops->hw_info) {
1476 data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type);
1477 if (IS_ERR(data)) {
1478 rc = PTR_ERR(data);
1479 goto out_put;
1480 }
1481
1482 /*
1483 * drivers that have hw_info callback should have a unique
1484 * iommu_hw_info_type.
1485 */
1486 if (WARN_ON_ONCE(cmd->out_data_type ==
1487 IOMMU_HW_INFO_TYPE_NONE)) {
1488 rc = -ENODEV;
1489 goto out_free;
1490 }
1491 } else {
1492 cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE;
1493 data_len = 0;
1494 data = NULL;
1495 }
1496
1497 copy_len = min(cmd->data_len, data_len);
1498 if (copy_to_user(user_ptr, data, copy_len)) {
1499 rc = -EFAULT;
1500 goto out_free;
1501 }
1502
1503 /*
1504 * Zero the trailing bytes if the user buffer is bigger than the
1505 * data size kernel actually has.
1506 */
1507 if (copy_len < cmd->data_len) {
1508 if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) {
1509 rc = -EFAULT;
1510 goto out_free;
1511 }
1512 }
1513
1514 /*
1515 * We return the length the kernel supports so userspace may know what
1516 * the kernel capability is. It could be larger than the input buffer.
1517 */
1518 cmd->data_len = data_len;
1519
1520 cmd->out_capabilities = 0;
1521 if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
1522 cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
1523
1524 cmd->out_max_pasid_log2 = 0;
1525 /*
1526 * Currently, all iommu drivers enable PASID in the probe_device()
1527 * op if iommu and device supports it. So the max_pasids stored in
1528 * dev->iommu indicates both PASID support and enable status. A
1529 * non-zero dev->iommu->max_pasids means PASID is supported and
1530 * enabled. The iommufd only reports PASID capability to userspace
1531 * if it's enabled.
1532 */
1533 if (idev->dev->iommu->max_pasids) {
1534 cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids);
1535
1536 if (dev_is_pci(idev->dev)) {
1537 struct pci_dev *pdev = to_pci_dev(idev->dev);
1538 int ctrl;
1539
1540 ctrl = pci_pasid_status(pdev);
1541
1542 WARN_ON_ONCE(ctrl < 0 ||
1543 !(ctrl & PCI_PASID_CTRL_ENABLE));
1544
1545 if (ctrl & PCI_PASID_CTRL_EXEC)
1546 cmd->out_capabilities |=
1547 IOMMU_HW_CAP_PCI_PASID_EXEC;
1548 if (ctrl & PCI_PASID_CTRL_PRIV)
1549 cmd->out_capabilities |=
1550 IOMMU_HW_CAP_PCI_PASID_PRIV;
1551 }
1552 }
1553
1554 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
1555 out_free:
1556 kfree(data);
1557 out_put:
1558 iommufd_put_object(ucmd->ictx, &idev->obj);
1559 return rc;
1560 }
1561