1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2024 Intel Corporation
3  */
4 #define pr_fmt(fmt) "iommufd: " fmt
5 
6 #include <linux/anon_inodes.h>
7 #include <linux/file.h>
8 #include <linux/fs.h>
9 #include <linux/iommufd.h>
10 #include <linux/module.h>
11 #include <linux/mutex.h>
12 #include <linux/pci.h>
13 #include <linux/pci-ats.h>
14 #include <linux/poll.h>
15 #include <uapi/linux/iommufd.h>
16 
17 #include "../iommu-priv.h"
18 #include "iommufd_private.h"
19 
20 /* IOMMUFD_OBJ_FAULT Functions */
21 
iommufd_fault_iopf_enable(struct iommufd_device * idev)22 int iommufd_fault_iopf_enable(struct iommufd_device *idev)
23 {
24 	struct device *dev = idev->dev;
25 	int ret;
26 
27 	/*
28 	 * Once we turn on PCI/PRI support for VF, the response failure code
29 	 * should not be forwarded to the hardware due to PRI being a shared
30 	 * resource between PF and VFs. There is no coordination for this
31 	 * shared capability. This waits for a vPRI reset to recover.
32 	 */
33 	if (dev_is_pci(dev)) {
34 		struct pci_dev *pdev = to_pci_dev(dev);
35 
36 		if (pdev->is_virtfn && pci_pri_supported(pdev))
37 			return -EINVAL;
38 	}
39 
40 	mutex_lock(&idev->iopf_lock);
41 	/* Device iopf has already been on. */
42 	if (++idev->iopf_enabled > 1) {
43 		mutex_unlock(&idev->iopf_lock);
44 		return 0;
45 	}
46 
47 	ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
48 	if (ret)
49 		--idev->iopf_enabled;
50 	mutex_unlock(&idev->iopf_lock);
51 
52 	return ret;
53 }
54 
iommufd_fault_iopf_disable(struct iommufd_device * idev)55 void iommufd_fault_iopf_disable(struct iommufd_device *idev)
56 {
57 	mutex_lock(&idev->iopf_lock);
58 	if (!WARN_ON(idev->iopf_enabled == 0)) {
59 		if (--idev->iopf_enabled == 0)
60 			iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
61 	}
62 	mutex_unlock(&idev->iopf_lock);
63 }
64 
iommufd_auto_response_faults(struct iommufd_hw_pagetable * hwpt,struct iommufd_attach_handle * handle)65 void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
66 				  struct iommufd_attach_handle *handle)
67 {
68 	struct iommufd_fault *fault = hwpt->fault;
69 	struct iopf_group *group, *next;
70 	struct list_head free_list;
71 	unsigned long index;
72 
73 	if (!fault)
74 		return;
75 	INIT_LIST_HEAD(&free_list);
76 
77 	mutex_lock(&fault->mutex);
78 	spin_lock(&fault->common.lock);
79 	list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
80 		if (group->attach_handle != &handle->handle)
81 			continue;
82 		list_move(&group->node, &free_list);
83 	}
84 	spin_unlock(&fault->common.lock);
85 
86 	list_for_each_entry_safe(group, next, &free_list, node) {
87 		list_del(&group->node);
88 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
89 		iopf_free_group(group);
90 	}
91 
92 	xa_for_each(&fault->response, index, group) {
93 		if (group->attach_handle != &handle->handle)
94 			continue;
95 		xa_erase(&fault->response, index);
96 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
97 		iopf_free_group(group);
98 	}
99 	mutex_unlock(&fault->mutex);
100 }
101 
iommufd_fault_destroy(struct iommufd_object * obj)102 void iommufd_fault_destroy(struct iommufd_object *obj)
103 {
104 	struct iommufd_eventq *eventq =
105 		container_of(obj, struct iommufd_eventq, obj);
106 	struct iommufd_fault *fault = eventq_to_fault(eventq);
107 	struct iopf_group *group, *next;
108 	unsigned long index;
109 
110 	/*
111 	 * The iommufd object's reference count is zero at this point.
112 	 * We can be confident that no other threads are currently
113 	 * accessing this pointer. Therefore, acquiring the mutex here
114 	 * is unnecessary.
115 	 */
116 	list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
117 		list_del(&group->node);
118 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
119 		iopf_free_group(group);
120 	}
121 	xa_for_each(&fault->response, index, group) {
122 		xa_erase(&fault->response, index);
123 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
124 		iopf_free_group(group);
125 	}
126 	xa_destroy(&fault->response);
127 	mutex_destroy(&fault->mutex);
128 }
129 
iommufd_compose_fault_message(struct iommu_fault * fault,struct iommu_hwpt_pgfault * hwpt_fault,struct iommufd_device * idev,u32 cookie)130 static void iommufd_compose_fault_message(struct iommu_fault *fault,
131 					  struct iommu_hwpt_pgfault *hwpt_fault,
132 					  struct iommufd_device *idev,
133 					  u32 cookie)
134 {
135 	hwpt_fault->flags = fault->prm.flags;
136 	hwpt_fault->dev_id = idev->obj.id;
137 	hwpt_fault->pasid = fault->prm.pasid;
138 	hwpt_fault->grpid = fault->prm.grpid;
139 	hwpt_fault->perm = fault->prm.perm;
140 	hwpt_fault->addr = fault->prm.addr;
141 	hwpt_fault->length = 0;
142 	hwpt_fault->cookie = cookie;
143 }
144 
145 /* Fetch the first node out of the fault->deliver list */
146 static struct iopf_group *
iommufd_fault_deliver_fetch(struct iommufd_fault * fault)147 iommufd_fault_deliver_fetch(struct iommufd_fault *fault)
148 {
149 	struct list_head *list = &fault->common.deliver;
150 	struct iopf_group *group = NULL;
151 
152 	spin_lock(&fault->common.lock);
153 	if (!list_empty(list)) {
154 		group = list_first_entry(list, struct iopf_group, node);
155 		list_del(&group->node);
156 	}
157 	spin_unlock(&fault->common.lock);
158 	return group;
159 }
160 
161 /* Restore a node back to the head of the fault->deliver list */
iommufd_fault_deliver_restore(struct iommufd_fault * fault,struct iopf_group * group)162 static void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
163 					  struct iopf_group *group)
164 {
165 	spin_lock(&fault->common.lock);
166 	list_add(&group->node, &fault->common.deliver);
167 	spin_unlock(&fault->common.lock);
168 }
169 
iommufd_fault_fops_read(struct file * filep,char __user * buf,size_t count,loff_t * ppos)170 static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
171 				       size_t count, loff_t *ppos)
172 {
173 	size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
174 	struct iommufd_eventq *eventq = filep->private_data;
175 	struct iommufd_fault *fault = eventq_to_fault(eventq);
176 	struct iommu_hwpt_pgfault data = {};
177 	struct iommufd_device *idev;
178 	struct iopf_group *group;
179 	struct iopf_fault *iopf;
180 	size_t done = 0;
181 	int rc = 0;
182 
183 	if (*ppos || count % fault_size)
184 		return -ESPIPE;
185 
186 	mutex_lock(&fault->mutex);
187 	while ((group = iommufd_fault_deliver_fetch(fault))) {
188 		if (done >= count ||
189 		    group->fault_count * fault_size > count - done) {
190 			iommufd_fault_deliver_restore(fault, group);
191 			break;
192 		}
193 
194 		rc = xa_alloc(&fault->response, &group->cookie, group,
195 			      xa_limit_32b, GFP_KERNEL);
196 		if (rc) {
197 			iommufd_fault_deliver_restore(fault, group);
198 			break;
199 		}
200 
201 		idev = to_iommufd_handle(group->attach_handle)->idev;
202 		list_for_each_entry(iopf, &group->faults, list) {
203 			iommufd_compose_fault_message(&iopf->fault,
204 						      &data, idev,
205 						      group->cookie);
206 			if (copy_to_user(buf + done, &data, fault_size)) {
207 				xa_erase(&fault->response, group->cookie);
208 				iommufd_fault_deliver_restore(fault, group);
209 				rc = -EFAULT;
210 				break;
211 			}
212 			done += fault_size;
213 		}
214 	}
215 	mutex_unlock(&fault->mutex);
216 
217 	return done == 0 ? rc : done;
218 }
219 
iommufd_fault_fops_write(struct file * filep,const char __user * buf,size_t count,loff_t * ppos)220 static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
221 					size_t count, loff_t *ppos)
222 {
223 	size_t response_size = sizeof(struct iommu_hwpt_page_response);
224 	struct iommufd_eventq *eventq = filep->private_data;
225 	struct iommufd_fault *fault = eventq_to_fault(eventq);
226 	struct iommu_hwpt_page_response response;
227 	struct iopf_group *group;
228 	size_t done = 0;
229 	int rc = 0;
230 
231 	if (*ppos || count % response_size)
232 		return -ESPIPE;
233 
234 	mutex_lock(&fault->mutex);
235 	while (count > done) {
236 		rc = copy_from_user(&response, buf + done, response_size);
237 		if (rc)
238 			break;
239 
240 		static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS ==
241 			      (int)IOMMU_PAGE_RESP_SUCCESS);
242 		static_assert((int)IOMMUFD_PAGE_RESP_INVALID ==
243 			      (int)IOMMU_PAGE_RESP_INVALID);
244 		if (response.code != IOMMUFD_PAGE_RESP_SUCCESS &&
245 		    response.code != IOMMUFD_PAGE_RESP_INVALID) {
246 			rc = -EINVAL;
247 			break;
248 		}
249 
250 		group = xa_erase(&fault->response, response.cookie);
251 		if (!group) {
252 			rc = -EINVAL;
253 			break;
254 		}
255 
256 		iopf_group_response(group, response.code);
257 		iopf_free_group(group);
258 		done += response_size;
259 	}
260 	mutex_unlock(&fault->mutex);
261 
262 	return done == 0 ? rc : done;
263 }
264 
265 /* IOMMUFD_OBJ_VEVENTQ Functions */
266 
iommufd_veventq_abort(struct iommufd_object * obj)267 void iommufd_veventq_abort(struct iommufd_object *obj)
268 {
269 	struct iommufd_eventq *eventq =
270 		container_of(obj, struct iommufd_eventq, obj);
271 	struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
272 	struct iommufd_viommu *viommu = veventq->viommu;
273 	struct iommufd_vevent *cur, *next;
274 
275 	lockdep_assert_held_write(&viommu->veventqs_rwsem);
276 
277 	list_for_each_entry_safe(cur, next, &eventq->deliver, node) {
278 		list_del(&cur->node);
279 		if (cur != &veventq->lost_events_header)
280 			kfree(cur);
281 	}
282 
283 	refcount_dec(&viommu->obj.users);
284 	list_del(&veventq->node);
285 }
286 
iommufd_veventq_destroy(struct iommufd_object * obj)287 void iommufd_veventq_destroy(struct iommufd_object *obj)
288 {
289 	struct iommufd_veventq *veventq = eventq_to_veventq(
290 		container_of(obj, struct iommufd_eventq, obj));
291 
292 	down_write(&veventq->viommu->veventqs_rwsem);
293 	iommufd_veventq_abort(obj);
294 	up_write(&veventq->viommu->veventqs_rwsem);
295 }
296 
297 static struct iommufd_vevent *
iommufd_veventq_deliver_fetch(struct iommufd_veventq * veventq)298 iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq)
299 {
300 	struct iommufd_eventq *eventq = &veventq->common;
301 	struct list_head *list = &eventq->deliver;
302 	struct iommufd_vevent *vevent = NULL;
303 
304 	spin_lock(&eventq->lock);
305 	if (!list_empty(list)) {
306 		struct iommufd_vevent *next;
307 
308 		next = list_first_entry(list, struct iommufd_vevent, node);
309 		/* Make a copy of the lost_events_header for copy_to_user */
310 		if (next == &veventq->lost_events_header) {
311 			vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC);
312 			if (!vevent)
313 				goto out_unlock;
314 		}
315 		list_del(&next->node);
316 		if (vevent)
317 			memcpy(vevent, next, sizeof(*vevent));
318 		else
319 			vevent = next;
320 	}
321 out_unlock:
322 	spin_unlock(&eventq->lock);
323 	return vevent;
324 }
325 
iommufd_veventq_deliver_restore(struct iommufd_veventq * veventq,struct iommufd_vevent * vevent)326 static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq,
327 					    struct iommufd_vevent *vevent)
328 {
329 	struct iommufd_eventq *eventq = &veventq->common;
330 	struct list_head *list = &eventq->deliver;
331 
332 	spin_lock(&eventq->lock);
333 	if (vevent_for_lost_events_header(vevent)) {
334 		/* Remove the copy of the lost_events_header */
335 		kfree(vevent);
336 		vevent = NULL;
337 		/* An empty list needs the lost_events_header back */
338 		if (list_empty(list))
339 			vevent = &veventq->lost_events_header;
340 	}
341 	if (vevent)
342 		list_add(&vevent->node, list);
343 	spin_unlock(&eventq->lock);
344 }
345 
iommufd_veventq_fops_read(struct file * filep,char __user * buf,size_t count,loff_t * ppos)346 static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf,
347 					 size_t count, loff_t *ppos)
348 {
349 	struct iommufd_eventq *eventq = filep->private_data;
350 	struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
351 	struct iommufd_vevent_header *hdr;
352 	struct iommufd_vevent *cur;
353 	size_t done = 0;
354 	int rc = 0;
355 
356 	if (*ppos)
357 		return -ESPIPE;
358 
359 	while ((cur = iommufd_veventq_deliver_fetch(veventq))) {
360 		/* Validate the remaining bytes against the header size */
361 		if (done >= count || sizeof(*hdr) > count - done) {
362 			iommufd_veventq_deliver_restore(veventq, cur);
363 			break;
364 		}
365 		hdr = &cur->header;
366 
367 		/* If being a normal vEVENT, validate against the full size */
368 		if (!vevent_for_lost_events_header(cur) &&
369 		    sizeof(hdr) + cur->data_len > count - done) {
370 			iommufd_veventq_deliver_restore(veventq, cur);
371 			break;
372 		}
373 
374 		if (copy_to_user(buf + done, hdr, sizeof(*hdr))) {
375 			iommufd_veventq_deliver_restore(veventq, cur);
376 			rc = -EFAULT;
377 			break;
378 		}
379 		done += sizeof(*hdr);
380 
381 		if (cur->data_len &&
382 		    copy_to_user(buf + done, cur->event_data, cur->data_len)) {
383 			iommufd_veventq_deliver_restore(veventq, cur);
384 			rc = -EFAULT;
385 			break;
386 		}
387 		spin_lock(&eventq->lock);
388 		if (!vevent_for_lost_events_header(cur))
389 			veventq->num_events--;
390 		spin_unlock(&eventq->lock);
391 		done += cur->data_len;
392 		kfree(cur);
393 	}
394 
395 	return done == 0 ? rc : done;
396 }
397 
398 /* Common Event Queue Functions */
399 
iommufd_eventq_fops_poll(struct file * filep,struct poll_table_struct * wait)400 static __poll_t iommufd_eventq_fops_poll(struct file *filep,
401 					 struct poll_table_struct *wait)
402 {
403 	struct iommufd_eventq *eventq = filep->private_data;
404 	__poll_t pollflags = 0;
405 
406 	if (eventq->obj.type == IOMMUFD_OBJ_FAULT)
407 		pollflags |= EPOLLOUT;
408 
409 	poll_wait(filep, &eventq->wait_queue, wait);
410 	spin_lock(&eventq->lock);
411 	if (!list_empty(&eventq->deliver))
412 		pollflags |= EPOLLIN | EPOLLRDNORM;
413 	spin_unlock(&eventq->lock);
414 
415 	return pollflags;
416 }
417 
iommufd_eventq_fops_release(struct inode * inode,struct file * filep)418 static int iommufd_eventq_fops_release(struct inode *inode, struct file *filep)
419 {
420 	struct iommufd_eventq *eventq = filep->private_data;
421 
422 	refcount_dec(&eventq->obj.users);
423 	iommufd_ctx_put(eventq->ictx);
424 	return 0;
425 }
426 
427 #define INIT_EVENTQ_FOPS(read_op, write_op)                                    \
428 	((const struct file_operations){                                       \
429 		.owner = THIS_MODULE,                                          \
430 		.open = nonseekable_open,                                      \
431 		.read = read_op,                                               \
432 		.write = write_op,                                             \
433 		.poll = iommufd_eventq_fops_poll,                              \
434 		.release = iommufd_eventq_fops_release,                        \
435 	})
436 
iommufd_eventq_init(struct iommufd_eventq * eventq,char * name,struct iommufd_ctx * ictx,const struct file_operations * fops)437 static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name,
438 			       struct iommufd_ctx *ictx,
439 			       const struct file_operations *fops)
440 {
441 	struct file *filep;
442 	int fdno;
443 
444 	spin_lock_init(&eventq->lock);
445 	INIT_LIST_HEAD(&eventq->deliver);
446 	init_waitqueue_head(&eventq->wait_queue);
447 
448 	filep = anon_inode_getfile(name, fops, eventq, O_RDWR);
449 	if (IS_ERR(filep))
450 		return PTR_ERR(filep);
451 
452 	eventq->ictx = ictx;
453 	iommufd_ctx_get(eventq->ictx);
454 	eventq->filep = filep;
455 	refcount_inc(&eventq->obj.users);
456 
457 	fdno = get_unused_fd_flags(O_CLOEXEC);
458 	if (fdno < 0)
459 		fput(filep);
460 	return fdno;
461 }
462 
463 static const struct file_operations iommufd_fault_fops =
464 	INIT_EVENTQ_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write);
465 
iommufd_fault_alloc(struct iommufd_ucmd * ucmd)466 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
467 {
468 	struct iommu_fault_alloc *cmd = ucmd->cmd;
469 	struct iommufd_fault *fault;
470 	int fdno;
471 	int rc;
472 
473 	if (cmd->flags)
474 		return -EOPNOTSUPP;
475 
476 	fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT,
477 				       common.obj);
478 	if (IS_ERR(fault))
479 		return PTR_ERR(fault);
480 
481 	xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
482 	mutex_init(&fault->mutex);
483 
484 	fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]",
485 				   ucmd->ictx, &iommufd_fault_fops);
486 	if (fdno < 0) {
487 		rc = fdno;
488 		goto out_abort;
489 	}
490 
491 	cmd->out_fault_id = fault->common.obj.id;
492 	cmd->out_fault_fd = fdno;
493 
494 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
495 	if (rc)
496 		goto out_put_fdno;
497 	iommufd_object_finalize(ucmd->ictx, &fault->common.obj);
498 
499 	fd_install(fdno, fault->common.filep);
500 
501 	return 0;
502 out_put_fdno:
503 	put_unused_fd(fdno);
504 	fput(fault->common.filep);
505 out_abort:
506 	iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj);
507 
508 	return rc;
509 }
510 
iommufd_fault_iopf_handler(struct iopf_group * group)511 int iommufd_fault_iopf_handler(struct iopf_group *group)
512 {
513 	struct iommufd_hw_pagetable *hwpt;
514 	struct iommufd_fault *fault;
515 
516 	hwpt = group->attach_handle->domain->iommufd_hwpt;
517 	fault = hwpt->fault;
518 
519 	spin_lock(&fault->common.lock);
520 	list_add_tail(&group->node, &fault->common.deliver);
521 	spin_unlock(&fault->common.lock);
522 
523 	wake_up_interruptible(&fault->common.wait_queue);
524 
525 	return 0;
526 }
527 
528 static const struct file_operations iommufd_veventq_fops =
529 	INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL);
530 
iommufd_veventq_alloc(struct iommufd_ucmd * ucmd)531 int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd)
532 {
533 	struct iommu_veventq_alloc *cmd = ucmd->cmd;
534 	struct iommufd_veventq *veventq;
535 	struct iommufd_viommu *viommu;
536 	int fdno;
537 	int rc;
538 
539 	if (cmd->flags || cmd->__reserved ||
540 	    cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT)
541 		return -EOPNOTSUPP;
542 	if (!cmd->veventq_depth)
543 		return -EINVAL;
544 
545 	viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
546 	if (IS_ERR(viommu))
547 		return PTR_ERR(viommu);
548 
549 	down_write(&viommu->veventqs_rwsem);
550 
551 	if (iommufd_viommu_find_veventq(viommu, cmd->type)) {
552 		rc = -EEXIST;
553 		goto out_unlock_veventqs;
554 	}
555 
556 	veventq = __iommufd_object_alloc(ucmd->ictx, veventq,
557 					 IOMMUFD_OBJ_VEVENTQ, common.obj);
558 	if (IS_ERR(veventq)) {
559 		rc = PTR_ERR(veventq);
560 		goto out_unlock_veventqs;
561 	}
562 
563 	veventq->type = cmd->type;
564 	veventq->viommu = viommu;
565 	refcount_inc(&viommu->obj.users);
566 	veventq->depth = cmd->veventq_depth;
567 	list_add_tail(&veventq->node, &viommu->veventqs);
568 	veventq->lost_events_header.header.flags =
569 		IOMMU_VEVENTQ_FLAG_LOST_EVENTS;
570 
571 	fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]",
572 				   ucmd->ictx, &iommufd_veventq_fops);
573 	if (fdno < 0) {
574 		rc = fdno;
575 		goto out_abort;
576 	}
577 
578 	cmd->out_veventq_id = veventq->common.obj.id;
579 	cmd->out_veventq_fd = fdno;
580 
581 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
582 	if (rc)
583 		goto out_put_fdno;
584 
585 	iommufd_object_finalize(ucmd->ictx, &veventq->common.obj);
586 	fd_install(fdno, veventq->common.filep);
587 	goto out_unlock_veventqs;
588 
589 out_put_fdno:
590 	put_unused_fd(fdno);
591 	fput(veventq->common.filep);
592 out_abort:
593 	iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj);
594 out_unlock_veventqs:
595 	up_write(&viommu->veventqs_rwsem);
596 	iommufd_put_object(ucmd->ictx, &viommu->obj);
597 	return rc;
598 }
599