1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * FUSE: Filesystem in Userspace
4  * Copyright (c) 2023-2024 DataDirect Networks.
5  */
6 
7 #include "fuse_i.h"
8 #include "dev_uring_i.h"
9 #include "fuse_dev_i.h"
10 
11 #include <linux/fs.h>
12 #include <linux/io_uring/cmd.h>
13 
14 static bool __read_mostly enable_uring;
15 module_param(enable_uring, bool, 0644);
16 MODULE_PARM_DESC(enable_uring,
17 		 "Enable userspace communication through io-uring");
18 
19 #define FUSE_URING_IOV_SEGS 2 /* header and payload */
20 
21 
fuse_uring_enabled(void)22 bool fuse_uring_enabled(void)
23 {
24 	return enable_uring;
25 }
26 
27 struct fuse_uring_pdu {
28 	struct fuse_ring_ent *ent;
29 };
30 
31 static const struct fuse_iqueue_ops fuse_io_uring_ops;
32 
uring_cmd_set_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_ent * ring_ent)33 static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd,
34 				   struct fuse_ring_ent *ring_ent)
35 {
36 	struct fuse_uring_pdu *pdu =
37 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
38 
39 	pdu->ent = ring_ent;
40 }
41 
uring_cmd_to_ring_ent(struct io_uring_cmd * cmd)42 static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd)
43 {
44 	struct fuse_uring_pdu *pdu =
45 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
46 
47 	return pdu->ent;
48 }
49 
fuse_uring_flush_bg(struct fuse_ring_queue * queue)50 static void fuse_uring_flush_bg(struct fuse_ring_queue *queue)
51 {
52 	struct fuse_ring *ring = queue->ring;
53 	struct fuse_conn *fc = ring->fc;
54 
55 	lockdep_assert_held(&queue->lock);
56 	lockdep_assert_held(&fc->bg_lock);
57 
58 	/*
59 	 * Allow one bg request per queue, ignoring global fc limits.
60 	 * This prevents a single queue from consuming all resources and
61 	 * eliminates the need for remote queue wake-ups when global
62 	 * limits are met but this queue has no more waiting requests.
63 	 */
64 	while ((fc->active_background < fc->max_background ||
65 		!queue->active_background) &&
66 	       (!list_empty(&queue->fuse_req_bg_queue))) {
67 		struct fuse_req *req;
68 
69 		req = list_first_entry(&queue->fuse_req_bg_queue,
70 				       struct fuse_req, list);
71 		fc->active_background++;
72 		queue->active_background++;
73 
74 		list_move_tail(&req->list, &queue->fuse_req_queue);
75 	}
76 }
77 
fuse_uring_req_end(struct fuse_ring_ent * ent,struct fuse_req * req,int error)78 static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req,
79 			       int error)
80 {
81 	struct fuse_ring_queue *queue = ent->queue;
82 	struct fuse_ring *ring = queue->ring;
83 	struct fuse_conn *fc = ring->fc;
84 
85 	lockdep_assert_not_held(&queue->lock);
86 	spin_lock(&queue->lock);
87 	ent->fuse_req = NULL;
88 	if (test_bit(FR_BACKGROUND, &req->flags)) {
89 		queue->active_background--;
90 		spin_lock(&fc->bg_lock);
91 		fuse_uring_flush_bg(queue);
92 		spin_unlock(&fc->bg_lock);
93 	}
94 
95 	spin_unlock(&queue->lock);
96 
97 	if (error)
98 		req->out.h.error = error;
99 
100 	clear_bit(FR_SENT, &req->flags);
101 	fuse_request_end(req);
102 }
103 
104 /* Abort all list queued request on the given ring queue */
fuse_uring_abort_end_queue_requests(struct fuse_ring_queue * queue)105 static void fuse_uring_abort_end_queue_requests(struct fuse_ring_queue *queue)
106 {
107 	struct fuse_req *req;
108 	LIST_HEAD(req_list);
109 
110 	spin_lock(&queue->lock);
111 	list_for_each_entry(req, &queue->fuse_req_queue, list)
112 		clear_bit(FR_PENDING, &req->flags);
113 	list_splice_init(&queue->fuse_req_queue, &req_list);
114 	spin_unlock(&queue->lock);
115 
116 	/* must not hold queue lock to avoid order issues with fi->lock */
117 	fuse_dev_end_requests(&req_list);
118 }
119 
fuse_uring_abort_end_requests(struct fuse_ring * ring)120 void fuse_uring_abort_end_requests(struct fuse_ring *ring)
121 {
122 	int qid;
123 	struct fuse_ring_queue *queue;
124 	struct fuse_conn *fc = ring->fc;
125 
126 	for (qid = 0; qid < ring->nr_queues; qid++) {
127 		queue = READ_ONCE(ring->queues[qid]);
128 		if (!queue)
129 			continue;
130 
131 		queue->stopped = true;
132 
133 		WARN_ON_ONCE(ring->fc->max_background != UINT_MAX);
134 		spin_lock(&queue->lock);
135 		spin_lock(&fc->bg_lock);
136 		fuse_uring_flush_bg(queue);
137 		spin_unlock(&fc->bg_lock);
138 		spin_unlock(&queue->lock);
139 		fuse_uring_abort_end_queue_requests(queue);
140 	}
141 }
142 
fuse_uring_request_expired(struct fuse_conn * fc)143 bool fuse_uring_request_expired(struct fuse_conn *fc)
144 {
145 	struct fuse_ring *ring = fc->ring;
146 	struct fuse_ring_queue *queue;
147 	int qid;
148 
149 	if (!ring)
150 		return false;
151 
152 	for (qid = 0; qid < ring->nr_queues; qid++) {
153 		queue = READ_ONCE(ring->queues[qid]);
154 		if (!queue)
155 			continue;
156 
157 		spin_lock(&queue->lock);
158 		if (fuse_request_expired(fc, &queue->fuse_req_queue) ||
159 		    fuse_request_expired(fc, &queue->fuse_req_bg_queue) ||
160 		    fuse_fpq_processing_expired(fc, queue->fpq.processing)) {
161 			spin_unlock(&queue->lock);
162 			return true;
163 		}
164 		spin_unlock(&queue->lock);
165 	}
166 
167 	return false;
168 }
169 
fuse_uring_destruct(struct fuse_conn * fc)170 void fuse_uring_destruct(struct fuse_conn *fc)
171 {
172 	struct fuse_ring *ring = fc->ring;
173 	int qid;
174 
175 	if (!ring)
176 		return;
177 
178 	for (qid = 0; qid < ring->nr_queues; qid++) {
179 		struct fuse_ring_queue *queue = ring->queues[qid];
180 		struct fuse_ring_ent *ent, *next;
181 
182 		if (!queue)
183 			continue;
184 
185 		WARN_ON(!list_empty(&queue->ent_avail_queue));
186 		WARN_ON(!list_empty(&queue->ent_w_req_queue));
187 		WARN_ON(!list_empty(&queue->ent_commit_queue));
188 		WARN_ON(!list_empty(&queue->ent_in_userspace));
189 
190 		list_for_each_entry_safe(ent, next, &queue->ent_released,
191 					 list) {
192 			list_del_init(&ent->list);
193 			kfree(ent);
194 		}
195 
196 		kfree(queue->fpq.processing);
197 		kfree(queue);
198 		ring->queues[qid] = NULL;
199 	}
200 
201 	kfree(ring->queues);
202 	kfree(ring);
203 	fc->ring = NULL;
204 }
205 
206 /*
207  * Basic ring setup for this connection based on the provided configuration
208  */
fuse_uring_create(struct fuse_conn * fc)209 static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
210 {
211 	struct fuse_ring *ring;
212 	size_t nr_queues = num_possible_cpus();
213 	struct fuse_ring *res = NULL;
214 	size_t max_payload_size;
215 
216 	ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT);
217 	if (!ring)
218 		return NULL;
219 
220 	ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *),
221 			       GFP_KERNEL_ACCOUNT);
222 	if (!ring->queues)
223 		goto out_err;
224 
225 	max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
226 	max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
227 
228 	spin_lock(&fc->lock);
229 	if (fc->ring) {
230 		/* race, another thread created the ring in the meantime */
231 		spin_unlock(&fc->lock);
232 		res = fc->ring;
233 		goto out_err;
234 	}
235 
236 	init_waitqueue_head(&ring->stop_waitq);
237 
238 	ring->nr_queues = nr_queues;
239 	ring->fc = fc;
240 	ring->max_payload_sz = max_payload_size;
241 	smp_store_release(&fc->ring, ring);
242 
243 	spin_unlock(&fc->lock);
244 	return ring;
245 
246 out_err:
247 	kfree(ring->queues);
248 	kfree(ring);
249 	return res;
250 }
251 
fuse_uring_create_queue(struct fuse_ring * ring,int qid)252 static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
253 						       int qid)
254 {
255 	struct fuse_conn *fc = ring->fc;
256 	struct fuse_ring_queue *queue;
257 	struct list_head *pq;
258 
259 	queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
260 	if (!queue)
261 		return NULL;
262 	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
263 	if (!pq) {
264 		kfree(queue);
265 		return NULL;
266 	}
267 
268 	queue->qid = qid;
269 	queue->ring = ring;
270 	spin_lock_init(&queue->lock);
271 
272 	INIT_LIST_HEAD(&queue->ent_avail_queue);
273 	INIT_LIST_HEAD(&queue->ent_commit_queue);
274 	INIT_LIST_HEAD(&queue->ent_w_req_queue);
275 	INIT_LIST_HEAD(&queue->ent_in_userspace);
276 	INIT_LIST_HEAD(&queue->fuse_req_queue);
277 	INIT_LIST_HEAD(&queue->fuse_req_bg_queue);
278 	INIT_LIST_HEAD(&queue->ent_released);
279 
280 	queue->fpq.processing = pq;
281 	fuse_pqueue_init(&queue->fpq);
282 
283 	spin_lock(&fc->lock);
284 	if (ring->queues[qid]) {
285 		spin_unlock(&fc->lock);
286 		kfree(queue->fpq.processing);
287 		kfree(queue);
288 		return ring->queues[qid];
289 	}
290 
291 	/*
292 	 * write_once and lock as the caller mostly doesn't take the lock at all
293 	 */
294 	WRITE_ONCE(ring->queues[qid], queue);
295 	spin_unlock(&fc->lock);
296 
297 	return queue;
298 }
299 
fuse_uring_stop_fuse_req_end(struct fuse_req * req)300 static void fuse_uring_stop_fuse_req_end(struct fuse_req *req)
301 {
302 	clear_bit(FR_SENT, &req->flags);
303 	req->out.h.error = -ECONNABORTED;
304 	fuse_request_end(req);
305 }
306 
307 /*
308  * Release a request/entry on connection tear down
309  */
fuse_uring_entry_teardown(struct fuse_ring_ent * ent)310 static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
311 {
312 	struct fuse_req *req;
313 	struct io_uring_cmd *cmd;
314 
315 	struct fuse_ring_queue *queue = ent->queue;
316 
317 	spin_lock(&queue->lock);
318 	cmd = ent->cmd;
319 	ent->cmd = NULL;
320 	req = ent->fuse_req;
321 	ent->fuse_req = NULL;
322 	if (req) {
323 		/* remove entry from queue->fpq->processing */
324 		list_del_init(&req->list);
325 	}
326 
327 	/*
328 	 * The entry must not be freed immediately, due to access of direct
329 	 * pointer access of entries through IO_URING_F_CANCEL - there is a risk
330 	 * of race between daemon termination (which triggers IO_URING_F_CANCEL
331 	 * and accesses entries without checking the list state first
332 	 */
333 	list_move(&ent->list, &queue->ent_released);
334 	ent->state = FRRS_RELEASED;
335 	spin_unlock(&queue->lock);
336 
337 	if (cmd)
338 		io_uring_cmd_done(cmd, -ENOTCONN, 0, IO_URING_F_UNLOCKED);
339 
340 	if (req)
341 		fuse_uring_stop_fuse_req_end(req);
342 }
343 
fuse_uring_stop_list_entries(struct list_head * head,struct fuse_ring_queue * queue,enum fuse_ring_req_state exp_state)344 static void fuse_uring_stop_list_entries(struct list_head *head,
345 					 struct fuse_ring_queue *queue,
346 					 enum fuse_ring_req_state exp_state)
347 {
348 	struct fuse_ring *ring = queue->ring;
349 	struct fuse_ring_ent *ent, *next;
350 	ssize_t queue_refs = SSIZE_MAX;
351 	LIST_HEAD(to_teardown);
352 
353 	spin_lock(&queue->lock);
354 	list_for_each_entry_safe(ent, next, head, list) {
355 		if (ent->state != exp_state) {
356 			pr_warn("entry teardown qid=%d state=%d expected=%d",
357 				queue->qid, ent->state, exp_state);
358 			continue;
359 		}
360 
361 		ent->state = FRRS_TEARDOWN;
362 		list_move(&ent->list, &to_teardown);
363 	}
364 	spin_unlock(&queue->lock);
365 
366 	/* no queue lock to avoid lock order issues */
367 	list_for_each_entry_safe(ent, next, &to_teardown, list) {
368 		fuse_uring_entry_teardown(ent);
369 		queue_refs = atomic_dec_return(&ring->queue_refs);
370 		WARN_ON_ONCE(queue_refs < 0);
371 	}
372 }
373 
fuse_uring_teardown_entries(struct fuse_ring_queue * queue)374 static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue)
375 {
376 	fuse_uring_stop_list_entries(&queue->ent_in_userspace, queue,
377 				     FRRS_USERSPACE);
378 	fuse_uring_stop_list_entries(&queue->ent_avail_queue, queue,
379 				     FRRS_AVAILABLE);
380 }
381 
382 /*
383  * Log state debug info
384  */
fuse_uring_log_ent_state(struct fuse_ring * ring)385 static void fuse_uring_log_ent_state(struct fuse_ring *ring)
386 {
387 	int qid;
388 	struct fuse_ring_ent *ent;
389 
390 	for (qid = 0; qid < ring->nr_queues; qid++) {
391 		struct fuse_ring_queue *queue = ring->queues[qid];
392 
393 		if (!queue)
394 			continue;
395 
396 		spin_lock(&queue->lock);
397 		/*
398 		 * Log entries from the intermediate queue, the other queues
399 		 * should be empty
400 		 */
401 		list_for_each_entry(ent, &queue->ent_w_req_queue, list) {
402 			pr_info(" ent-req-queue ring=%p qid=%d ent=%p state=%d\n",
403 				ring, qid, ent, ent->state);
404 		}
405 		list_for_each_entry(ent, &queue->ent_commit_queue, list) {
406 			pr_info(" ent-commit-queue ring=%p qid=%d ent=%p state=%d\n",
407 				ring, qid, ent, ent->state);
408 		}
409 		spin_unlock(&queue->lock);
410 	}
411 	ring->stop_debug_log = 1;
412 }
413 
fuse_uring_async_stop_queues(struct work_struct * work)414 static void fuse_uring_async_stop_queues(struct work_struct *work)
415 {
416 	int qid;
417 	struct fuse_ring *ring =
418 		container_of(work, struct fuse_ring, async_teardown_work.work);
419 
420 	/* XXX code dup */
421 	for (qid = 0; qid < ring->nr_queues; qid++) {
422 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
423 
424 		if (!queue)
425 			continue;
426 
427 		fuse_uring_teardown_entries(queue);
428 	}
429 
430 	/*
431 	 * Some ring entries might be in the middle of IO operations,
432 	 * i.e. in process to get handled by file_operations::uring_cmd
433 	 * or on the way to userspace - we could handle that with conditions in
434 	 * run time code, but easier/cleaner to have an async tear down handler
435 	 * If there are still queue references left
436 	 */
437 	if (atomic_read(&ring->queue_refs) > 0) {
438 		if (time_after(jiffies,
439 			       ring->teardown_time + FUSE_URING_TEARDOWN_TIMEOUT))
440 			fuse_uring_log_ent_state(ring);
441 
442 		schedule_delayed_work(&ring->async_teardown_work,
443 				      FUSE_URING_TEARDOWN_INTERVAL);
444 	} else {
445 		wake_up_all(&ring->stop_waitq);
446 	}
447 }
448 
449 /*
450  * Stop the ring queues
451  */
fuse_uring_stop_queues(struct fuse_ring * ring)452 void fuse_uring_stop_queues(struct fuse_ring *ring)
453 {
454 	int qid;
455 
456 	for (qid = 0; qid < ring->nr_queues; qid++) {
457 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]);
458 
459 		if (!queue)
460 			continue;
461 
462 		fuse_uring_teardown_entries(queue);
463 	}
464 
465 	if (atomic_read(&ring->queue_refs) > 0) {
466 		ring->teardown_time = jiffies;
467 		INIT_DELAYED_WORK(&ring->async_teardown_work,
468 				  fuse_uring_async_stop_queues);
469 		schedule_delayed_work(&ring->async_teardown_work,
470 				      FUSE_URING_TEARDOWN_INTERVAL);
471 	} else {
472 		wake_up_all(&ring->stop_waitq);
473 	}
474 }
475 
476 /*
477  * Handle IO_URING_F_CANCEL, typically should come on daemon termination.
478  *
479  * Releasing the last entry should trigger fuse_dev_release() if
480  * the daemon was terminated
481  */
fuse_uring_cancel(struct io_uring_cmd * cmd,unsigned int issue_flags)482 static void fuse_uring_cancel(struct io_uring_cmd *cmd,
483 			      unsigned int issue_flags)
484 {
485 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
486 	struct fuse_ring_queue *queue;
487 	bool need_cmd_done = false;
488 
489 	/*
490 	 * direct access on ent - it must not be destructed as long as
491 	 * IO_URING_F_CANCEL might come up
492 	 */
493 	queue = ent->queue;
494 	spin_lock(&queue->lock);
495 	if (ent->state == FRRS_AVAILABLE) {
496 		ent->state = FRRS_USERSPACE;
497 		list_move(&ent->list, &queue->ent_in_userspace);
498 		need_cmd_done = true;
499 		ent->cmd = NULL;
500 	}
501 	spin_unlock(&queue->lock);
502 
503 	if (need_cmd_done) {
504 		/* no queue lock to avoid lock order issues */
505 		io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags);
506 	}
507 }
508 
fuse_uring_prepare_cancel(struct io_uring_cmd * cmd,int issue_flags,struct fuse_ring_ent * ring_ent)509 static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags,
510 				      struct fuse_ring_ent *ring_ent)
511 {
512 	uring_cmd_set_ring_ent(cmd, ring_ent);
513 	io_uring_cmd_mark_cancelable(cmd, issue_flags);
514 }
515 
516 /*
517  * Checks for errors and stores it into the request
518  */
fuse_uring_out_header_has_err(struct fuse_out_header * oh,struct fuse_req * req,struct fuse_conn * fc)519 static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
520 					 struct fuse_req *req,
521 					 struct fuse_conn *fc)
522 {
523 	int err;
524 
525 	err = -EINVAL;
526 	if (oh->unique == 0) {
527 		/* Not supported through io-uring yet */
528 		pr_warn_once("notify through fuse-io-uring not supported\n");
529 		goto err;
530 	}
531 
532 	if (oh->error <= -ERESTARTSYS || oh->error > 0)
533 		goto err;
534 
535 	if (oh->error) {
536 		err = oh->error;
537 		goto err;
538 	}
539 
540 	err = -ENOENT;
541 	if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) {
542 		pr_warn_ratelimited("unique mismatch, expected: %llu got %llu\n",
543 				    req->in.h.unique,
544 				    oh->unique & ~FUSE_INT_REQ_BIT);
545 		goto err;
546 	}
547 
548 	/*
549 	 * Is it an interrupt reply ID?
550 	 * XXX: Not supported through fuse-io-uring yet, it should not even
551 	 *      find the request - should not happen.
552 	 */
553 	WARN_ON_ONCE(oh->unique & FUSE_INT_REQ_BIT);
554 
555 	err = 0;
556 err:
557 	return err;
558 }
559 
fuse_uring_copy_from_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)560 static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
561 				     struct fuse_req *req,
562 				     struct fuse_ring_ent *ent)
563 {
564 	struct fuse_copy_state cs;
565 	struct fuse_args *args = req->args;
566 	struct iov_iter iter;
567 	int err;
568 	struct fuse_uring_ent_in_out ring_in_out;
569 
570 	err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out,
571 			     sizeof(ring_in_out));
572 	if (err)
573 		return -EFAULT;
574 
575 	err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz,
576 			  &iter);
577 	if (err)
578 		return err;
579 
580 	fuse_copy_init(&cs, 0, &iter);
581 	cs.is_uring = 1;
582 	cs.req = req;
583 
584 	return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
585 }
586 
587  /*
588   * Copy data from the req to the ring buffer
589   */
fuse_uring_args_to_ring(struct fuse_ring * ring,struct fuse_req * req,struct fuse_ring_ent * ent)590 static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
591 				   struct fuse_ring_ent *ent)
592 {
593 	struct fuse_copy_state cs;
594 	struct fuse_args *args = req->args;
595 	struct fuse_in_arg *in_args = args->in_args;
596 	int num_args = args->in_numargs;
597 	int err;
598 	struct iov_iter iter;
599 	struct fuse_uring_ent_in_out ent_in_out = {
600 		.flags = 0,
601 		.commit_id = req->in.h.unique,
602 	};
603 
604 	err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter);
605 	if (err) {
606 		pr_info_ratelimited("fuse: Import of user buffer failed\n");
607 		return err;
608 	}
609 
610 	fuse_copy_init(&cs, 1, &iter);
611 	cs.is_uring = 1;
612 	cs.req = req;
613 
614 	if (num_args > 0) {
615 		/*
616 		 * Expectation is that the first argument is the per op header.
617 		 * Some op code have that as zero size.
618 		 */
619 		if (args->in_args[0].size > 0) {
620 			err = copy_to_user(&ent->headers->op_in, in_args->value,
621 					   in_args->size);
622 			if (err) {
623 				pr_info_ratelimited(
624 					"Copying the header failed.\n");
625 				return -EFAULT;
626 			}
627 		}
628 		in_args++;
629 		num_args--;
630 	}
631 
632 	/* copy the payload */
633 	err = fuse_copy_args(&cs, num_args, args->in_pages,
634 			     (struct fuse_arg *)in_args, 0);
635 	if (err) {
636 		pr_info_ratelimited("%s fuse_copy_args failed\n", __func__);
637 		return err;
638 	}
639 
640 	ent_in_out.payload_sz = cs.ring.copied_sz;
641 	err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out,
642 			   sizeof(ent_in_out));
643 	return err ? -EFAULT : 0;
644 }
645 
fuse_uring_copy_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req)646 static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
647 				   struct fuse_req *req)
648 {
649 	struct fuse_ring_queue *queue = ent->queue;
650 	struct fuse_ring *ring = queue->ring;
651 	int err;
652 
653 	err = -EIO;
654 	if (WARN_ON(ent->state != FRRS_FUSE_REQ)) {
655 		pr_err("qid=%d ring-req=%p invalid state %d on send\n",
656 		       queue->qid, ent, ent->state);
657 		return err;
658 	}
659 
660 	err = -EINVAL;
661 	if (WARN_ON(req->in.h.unique == 0))
662 		return err;
663 
664 	/* copy the request */
665 	err = fuse_uring_args_to_ring(ring, req, ent);
666 	if (unlikely(err)) {
667 		pr_info_ratelimited("Copy to ring failed: %d\n", err);
668 		return err;
669 	}
670 
671 	/* copy fuse_in_header */
672 	err = copy_to_user(&ent->headers->in_out, &req->in.h,
673 			   sizeof(req->in.h));
674 	if (err) {
675 		err = -EFAULT;
676 		return err;
677 	}
678 
679 	return 0;
680 }
681 
fuse_uring_prepare_send(struct fuse_ring_ent * ent,struct fuse_req * req)682 static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
683 				   struct fuse_req *req)
684 {
685 	int err;
686 
687 	err = fuse_uring_copy_to_ring(ent, req);
688 	if (!err)
689 		set_bit(FR_SENT, &req->flags);
690 	else
691 		fuse_uring_req_end(ent, req, err);
692 
693 	return err;
694 }
695 
696 /*
697  * Write data to the ring buffer and send the request to userspace,
698  * userspace will read it
699  * This is comparable with classical read(/dev/fuse)
700  */
fuse_uring_send_next_to_ring(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)701 static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
702 					struct fuse_req *req,
703 					unsigned int issue_flags)
704 {
705 	struct fuse_ring_queue *queue = ent->queue;
706 	int err;
707 	struct io_uring_cmd *cmd;
708 
709 	err = fuse_uring_prepare_send(ent, req);
710 	if (err)
711 		return err;
712 
713 	spin_lock(&queue->lock);
714 	cmd = ent->cmd;
715 	ent->cmd = NULL;
716 	ent->state = FRRS_USERSPACE;
717 	list_move(&ent->list, &queue->ent_in_userspace);
718 	spin_unlock(&queue->lock);
719 
720 	io_uring_cmd_done(cmd, 0, 0, issue_flags);
721 	return 0;
722 }
723 
724 /*
725  * Make a ring entry available for fuse_req assignment
726  */
fuse_uring_ent_avail(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue)727 static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
728 				 struct fuse_ring_queue *queue)
729 {
730 	WARN_ON_ONCE(!ent->cmd);
731 	list_move(&ent->list, &queue->ent_avail_queue);
732 	ent->state = FRRS_AVAILABLE;
733 }
734 
735 /* Used to find the request on SQE commit */
fuse_uring_add_to_pq(struct fuse_ring_ent * ent,struct fuse_req * req)736 static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent,
737 				 struct fuse_req *req)
738 {
739 	struct fuse_ring_queue *queue = ent->queue;
740 	struct fuse_pqueue *fpq = &queue->fpq;
741 	unsigned int hash;
742 
743 	req->ring_entry = ent;
744 	hash = fuse_req_hash(req->in.h.unique);
745 	list_move_tail(&req->list, &fpq->processing[hash]);
746 }
747 
748 /*
749  * Assign a fuse queue entry to the given entry
750  */
fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent * ent,struct fuse_req * req)751 static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
752 					   struct fuse_req *req)
753 {
754 	struct fuse_ring_queue *queue = ent->queue;
755 
756 	lockdep_assert_held(&queue->lock);
757 
758 	if (WARN_ON_ONCE(ent->state != FRRS_AVAILABLE &&
759 			 ent->state != FRRS_COMMIT)) {
760 		pr_warn("%s qid=%d state=%d\n", __func__, ent->queue->qid,
761 			ent->state);
762 	}
763 
764 	clear_bit(FR_PENDING, &req->flags);
765 	ent->fuse_req = req;
766 	ent->state = FRRS_FUSE_REQ;
767 	list_move(&ent->list, &queue->ent_w_req_queue);
768 	fuse_uring_add_to_pq(ent, req);
769 }
770 
771 /* Fetch the next fuse request if available */
fuse_uring_ent_assign_req(struct fuse_ring_ent * ent)772 static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent)
773 	__must_hold(&queue->lock)
774 {
775 	struct fuse_req *req;
776 	struct fuse_ring_queue *queue = ent->queue;
777 	struct list_head *req_queue = &queue->fuse_req_queue;
778 
779 	lockdep_assert_held(&queue->lock);
780 
781 	/* get and assign the next entry while it is still holding the lock */
782 	req = list_first_entry_or_null(req_queue, struct fuse_req, list);
783 	if (req)
784 		fuse_uring_add_req_to_ring_ent(ent, req);
785 
786 	return req;
787 }
788 
789 /*
790  * Read data from the ring buffer, which user space has written to
791  * This is comparible with handling of classical write(/dev/fuse).
792  * Also make the ring request available again for new fuse requests.
793  */
fuse_uring_commit(struct fuse_ring_ent * ent,struct fuse_req * req,unsigned int issue_flags)794 static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req,
795 			      unsigned int issue_flags)
796 {
797 	struct fuse_ring *ring = ent->queue->ring;
798 	struct fuse_conn *fc = ring->fc;
799 	ssize_t err = 0;
800 
801 	err = copy_from_user(&req->out.h, &ent->headers->in_out,
802 			     sizeof(req->out.h));
803 	if (err) {
804 		req->out.h.error = -EFAULT;
805 		goto out;
806 	}
807 
808 	err = fuse_uring_out_header_has_err(&req->out.h, req, fc);
809 	if (err) {
810 		/* req->out.h.error already set */
811 		goto out;
812 	}
813 
814 	err = fuse_uring_copy_from_ring(ring, req, ent);
815 out:
816 	fuse_uring_req_end(ent, req, err);
817 }
818 
819 /*
820  * Get the next fuse req and send it
821  */
fuse_uring_next_fuse_req(struct fuse_ring_ent * ent,struct fuse_ring_queue * queue,unsigned int issue_flags)822 static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent,
823 				     struct fuse_ring_queue *queue,
824 				     unsigned int issue_flags)
825 {
826 	int err;
827 	struct fuse_req *req;
828 
829 retry:
830 	spin_lock(&queue->lock);
831 	fuse_uring_ent_avail(ent, queue);
832 	req = fuse_uring_ent_assign_req(ent);
833 	spin_unlock(&queue->lock);
834 
835 	if (req) {
836 		err = fuse_uring_send_next_to_ring(ent, req, issue_flags);
837 		if (err)
838 			goto retry;
839 	}
840 }
841 
fuse_ring_ent_set_commit(struct fuse_ring_ent * ent)842 static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent)
843 {
844 	struct fuse_ring_queue *queue = ent->queue;
845 
846 	lockdep_assert_held(&queue->lock);
847 
848 	if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE))
849 		return -EIO;
850 
851 	ent->state = FRRS_COMMIT;
852 	list_move(&ent->list, &queue->ent_commit_queue);
853 
854 	return 0;
855 }
856 
857 /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */
fuse_uring_commit_fetch(struct io_uring_cmd * cmd,int issue_flags,struct fuse_conn * fc)858 static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
859 				   struct fuse_conn *fc)
860 {
861 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
862 	struct fuse_ring_ent *ent;
863 	int err;
864 	struct fuse_ring *ring = fc->ring;
865 	struct fuse_ring_queue *queue;
866 	uint64_t commit_id = READ_ONCE(cmd_req->commit_id);
867 	unsigned int qid = READ_ONCE(cmd_req->qid);
868 	struct fuse_pqueue *fpq;
869 	struct fuse_req *req;
870 
871 	err = -ENOTCONN;
872 	if (!ring)
873 		return err;
874 
875 	if (qid >= ring->nr_queues)
876 		return -EINVAL;
877 
878 	queue = ring->queues[qid];
879 	if (!queue)
880 		return err;
881 	fpq = &queue->fpq;
882 
883 	if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped))
884 		return err;
885 
886 	spin_lock(&queue->lock);
887 	/* Find a request based on the unique ID of the fuse request
888 	 * This should get revised, as it needs a hash calculation and list
889 	 * search. And full struct fuse_pqueue is needed (memory overhead).
890 	 * As well as the link from req to ring_ent.
891 	 */
892 	req = fuse_request_find(fpq, commit_id);
893 	err = -ENOENT;
894 	if (!req) {
895 		pr_info("qid=%d commit_id %llu not found\n", queue->qid,
896 			commit_id);
897 		spin_unlock(&queue->lock);
898 		return err;
899 	}
900 	list_del_init(&req->list);
901 	ent = req->ring_entry;
902 	req->ring_entry = NULL;
903 
904 	err = fuse_ring_ent_set_commit(ent);
905 	if (err != 0) {
906 		pr_info_ratelimited("qid=%d commit_id %llu state %d",
907 				    queue->qid, commit_id, ent->state);
908 		spin_unlock(&queue->lock);
909 		req->out.h.error = err;
910 		clear_bit(FR_SENT, &req->flags);
911 		fuse_request_end(req);
912 		return err;
913 	}
914 
915 	ent->cmd = cmd;
916 	spin_unlock(&queue->lock);
917 
918 	/* without the queue lock, as other locks are taken */
919 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
920 	fuse_uring_commit(ent, req, issue_flags);
921 
922 	/*
923 	 * Fetching the next request is absolutely required as queued
924 	 * fuse requests would otherwise not get processed - committing
925 	 * and fetching is done in one step vs legacy fuse, which has separated
926 	 * read (fetch request) and write (commit result).
927 	 */
928 	fuse_uring_next_fuse_req(ent, queue, issue_flags);
929 	return 0;
930 }
931 
is_ring_ready(struct fuse_ring * ring,int current_qid)932 static bool is_ring_ready(struct fuse_ring *ring, int current_qid)
933 {
934 	int qid;
935 	struct fuse_ring_queue *queue;
936 	bool ready = true;
937 
938 	for (qid = 0; qid < ring->nr_queues && ready; qid++) {
939 		if (current_qid == qid)
940 			continue;
941 
942 		queue = ring->queues[qid];
943 		if (!queue) {
944 			ready = false;
945 			break;
946 		}
947 
948 		spin_lock(&queue->lock);
949 		if (list_empty(&queue->ent_avail_queue))
950 			ready = false;
951 		spin_unlock(&queue->lock);
952 	}
953 
954 	return ready;
955 }
956 
957 /*
958  * fuse_uring_req_fetch command handling
959  */
fuse_uring_do_register(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,unsigned int issue_flags)960 static void fuse_uring_do_register(struct fuse_ring_ent *ent,
961 				   struct io_uring_cmd *cmd,
962 				   unsigned int issue_flags)
963 {
964 	struct fuse_ring_queue *queue = ent->queue;
965 	struct fuse_ring *ring = queue->ring;
966 	struct fuse_conn *fc = ring->fc;
967 	struct fuse_iqueue *fiq = &fc->iq;
968 
969 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
970 
971 	spin_lock(&queue->lock);
972 	ent->cmd = cmd;
973 	fuse_uring_ent_avail(ent, queue);
974 	spin_unlock(&queue->lock);
975 
976 	if (!ring->ready) {
977 		bool ready = is_ring_ready(ring, queue->qid);
978 
979 		if (ready) {
980 			WRITE_ONCE(fiq->ops, &fuse_io_uring_ops);
981 			WRITE_ONCE(ring->ready, true);
982 			wake_up_all(&fc->blocked_waitq);
983 		}
984 	}
985 }
986 
987 /*
988  * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
989  * the payload
990  */
fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe * sqe,struct iovec iov[FUSE_URING_IOV_SEGS])991 static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
992 					 struct iovec iov[FUSE_URING_IOV_SEGS])
993 {
994 	struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
995 	struct iov_iter iter;
996 	ssize_t ret;
997 
998 	if (sqe->len != FUSE_URING_IOV_SEGS)
999 		return -EINVAL;
1000 
1001 	/*
1002 	 * Direction for buffer access will actually be READ and WRITE,
1003 	 * using write for the import should include READ access as well.
1004 	 */
1005 	ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
1006 			   FUSE_URING_IOV_SEGS, &iov, &iter);
1007 	if (ret < 0)
1008 		return ret;
1009 
1010 	return 0;
1011 }
1012 
1013 static struct fuse_ring_ent *
fuse_uring_create_ring_ent(struct io_uring_cmd * cmd,struct fuse_ring_queue * queue)1014 fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
1015 			   struct fuse_ring_queue *queue)
1016 {
1017 	struct fuse_ring *ring = queue->ring;
1018 	struct fuse_ring_ent *ent;
1019 	size_t payload_size;
1020 	struct iovec iov[FUSE_URING_IOV_SEGS];
1021 	int err;
1022 
1023 	err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
1024 	if (err) {
1025 		pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
1026 				    err);
1027 		return ERR_PTR(err);
1028 	}
1029 
1030 	err = -EINVAL;
1031 	if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
1032 		pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
1033 		return ERR_PTR(err);
1034 	}
1035 
1036 	payload_size = iov[1].iov_len;
1037 	if (payload_size < ring->max_payload_sz) {
1038 		pr_info_ratelimited("Invalid req payload len %zu\n",
1039 				    payload_size);
1040 		return ERR_PTR(err);
1041 	}
1042 
1043 	err = -ENOMEM;
1044 	ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
1045 	if (!ent)
1046 		return ERR_PTR(err);
1047 
1048 	INIT_LIST_HEAD(&ent->list);
1049 
1050 	ent->queue = queue;
1051 	ent->headers = iov[0].iov_base;
1052 	ent->payload = iov[1].iov_base;
1053 
1054 	atomic_inc(&ring->queue_refs);
1055 	return ent;
1056 }
1057 
1058 /*
1059  * Register header and payload buffer with the kernel and puts the
1060  * entry as "ready to get fuse requests" on the queue
1061  */
fuse_uring_register(struct io_uring_cmd * cmd,unsigned int issue_flags,struct fuse_conn * fc)1062 static int fuse_uring_register(struct io_uring_cmd *cmd,
1063 			       unsigned int issue_flags, struct fuse_conn *fc)
1064 {
1065 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
1066 	struct fuse_ring *ring = smp_load_acquire(&fc->ring);
1067 	struct fuse_ring_queue *queue;
1068 	struct fuse_ring_ent *ent;
1069 	int err;
1070 	unsigned int qid = READ_ONCE(cmd_req->qid);
1071 
1072 	err = -ENOMEM;
1073 	if (!ring) {
1074 		ring = fuse_uring_create(fc);
1075 		if (!ring)
1076 			return err;
1077 	}
1078 
1079 	if (qid >= ring->nr_queues) {
1080 		pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
1081 		return -EINVAL;
1082 	}
1083 
1084 	queue = ring->queues[qid];
1085 	if (!queue) {
1086 		queue = fuse_uring_create_queue(ring, qid);
1087 		if (!queue)
1088 			return err;
1089 	}
1090 
1091 	/*
1092 	 * The created queue above does not need to be destructed in
1093 	 * case of entry errors below, will be done at ring destruction time.
1094 	 */
1095 
1096 	ent = fuse_uring_create_ring_ent(cmd, queue);
1097 	if (IS_ERR(ent))
1098 		return PTR_ERR(ent);
1099 
1100 	fuse_uring_do_register(ent, cmd, issue_flags);
1101 
1102 	return 0;
1103 }
1104 
1105 /*
1106  * Entry function from io_uring to handle the given passthrough command
1107  * (op code IORING_OP_URING_CMD)
1108  */
fuse_uring_cmd(struct io_uring_cmd * cmd,unsigned int issue_flags)1109 int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
1110 {
1111 	struct fuse_dev *fud;
1112 	struct fuse_conn *fc;
1113 	u32 cmd_op = cmd->cmd_op;
1114 	int err;
1115 
1116 	if ((unlikely(issue_flags & IO_URING_F_CANCEL))) {
1117 		fuse_uring_cancel(cmd, issue_flags);
1118 		return 0;
1119 	}
1120 
1121 	/* This extra SQE size holds struct fuse_uring_cmd_req */
1122 	if (!(issue_flags & IO_URING_F_SQE128))
1123 		return -EINVAL;
1124 
1125 	fud = fuse_get_dev(cmd->file);
1126 	if (!fud) {
1127 		pr_info_ratelimited("No fuse device found\n");
1128 		return -ENOTCONN;
1129 	}
1130 	fc = fud->fc;
1131 
1132 	/* Once a connection has io-uring enabled on it, it can't be disabled */
1133 	if (!enable_uring && !fc->io_uring) {
1134 		pr_info_ratelimited("fuse-io-uring is disabled\n");
1135 		return -EOPNOTSUPP;
1136 	}
1137 
1138 	if (fc->aborted)
1139 		return -ECONNABORTED;
1140 	if (!fc->connected)
1141 		return -ENOTCONN;
1142 
1143 	/*
1144 	 * fuse_uring_register() needs the ring to be initialized,
1145 	 * we need to know the max payload size
1146 	 */
1147 	if (!fc->initialized)
1148 		return -EAGAIN;
1149 
1150 	switch (cmd_op) {
1151 	case FUSE_IO_URING_CMD_REGISTER:
1152 		err = fuse_uring_register(cmd, issue_flags, fc);
1153 		if (err) {
1154 			pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
1155 				     err);
1156 			fc->io_uring = 0;
1157 			wake_up_all(&fc->blocked_waitq);
1158 			return err;
1159 		}
1160 		break;
1161 	case FUSE_IO_URING_CMD_COMMIT_AND_FETCH:
1162 		err = fuse_uring_commit_fetch(cmd, issue_flags, fc);
1163 		if (err) {
1164 			pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n",
1165 				     err);
1166 			return err;
1167 		}
1168 		break;
1169 	default:
1170 		return -EINVAL;
1171 	}
1172 
1173 	return -EIOCBQUEUED;
1174 }
1175 
fuse_uring_send(struct fuse_ring_ent * ent,struct io_uring_cmd * cmd,ssize_t ret,unsigned int issue_flags)1176 static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
1177 			    ssize_t ret, unsigned int issue_flags)
1178 {
1179 	struct fuse_ring_queue *queue = ent->queue;
1180 
1181 	spin_lock(&queue->lock);
1182 	ent->state = FRRS_USERSPACE;
1183 	list_move(&ent->list, &queue->ent_in_userspace);
1184 	ent->cmd = NULL;
1185 	spin_unlock(&queue->lock);
1186 
1187 	io_uring_cmd_done(cmd, ret, 0, issue_flags);
1188 }
1189 
1190 /*
1191  * This prepares and sends the ring request in fuse-uring task context.
1192  * User buffers are not mapped yet - the application does not have permission
1193  * to write to it - this has to be executed in ring task context.
1194  */
fuse_uring_send_in_task(struct io_uring_cmd * cmd,unsigned int issue_flags)1195 static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
1196 				    unsigned int issue_flags)
1197 {
1198 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
1199 	struct fuse_ring_queue *queue = ent->queue;
1200 	int err;
1201 
1202 	if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
1203 		err = fuse_uring_prepare_send(ent, ent->fuse_req);
1204 		if (err) {
1205 			fuse_uring_next_fuse_req(ent, queue, issue_flags);
1206 			return;
1207 		}
1208 	} else {
1209 		err = -ECANCELED;
1210 	}
1211 
1212 	fuse_uring_send(ent, cmd, err, issue_flags);
1213 }
1214 
fuse_uring_task_to_queue(struct fuse_ring * ring)1215 static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
1216 {
1217 	unsigned int qid;
1218 	struct fuse_ring_queue *queue;
1219 
1220 	qid = task_cpu(current);
1221 
1222 	if (WARN_ONCE(qid >= ring->nr_queues,
1223 		      "Core number (%u) exceeds nr queues (%zu)\n", qid,
1224 		      ring->nr_queues))
1225 		qid = 0;
1226 
1227 	queue = ring->queues[qid];
1228 	WARN_ONCE(!queue, "Missing queue for qid %d\n", qid);
1229 
1230 	return queue;
1231 }
1232 
fuse_uring_dispatch_ent(struct fuse_ring_ent * ent)1233 static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent)
1234 {
1235 	struct io_uring_cmd *cmd = ent->cmd;
1236 
1237 	uring_cmd_set_ring_ent(cmd, ent);
1238 	io_uring_cmd_complete_in_task(cmd, fuse_uring_send_in_task);
1239 }
1240 
1241 /* queue a fuse request and send it if a ring entry is available */
fuse_uring_queue_fuse_req(struct fuse_iqueue * fiq,struct fuse_req * req)1242 void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
1243 {
1244 	struct fuse_conn *fc = req->fm->fc;
1245 	struct fuse_ring *ring = fc->ring;
1246 	struct fuse_ring_queue *queue;
1247 	struct fuse_ring_ent *ent = NULL;
1248 	int err;
1249 
1250 	err = -EINVAL;
1251 	queue = fuse_uring_task_to_queue(ring);
1252 	if (!queue)
1253 		goto err;
1254 
1255 	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
1256 		req->in.h.unique = fuse_get_unique(fiq);
1257 
1258 	spin_lock(&queue->lock);
1259 	err = -ENOTCONN;
1260 	if (unlikely(queue->stopped))
1261 		goto err_unlock;
1262 
1263 	set_bit(FR_URING, &req->flags);
1264 	req->ring_queue = queue;
1265 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1266 				       struct fuse_ring_ent, list);
1267 	if (ent)
1268 		fuse_uring_add_req_to_ring_ent(ent, req);
1269 	else
1270 		list_add_tail(&req->list, &queue->fuse_req_queue);
1271 	spin_unlock(&queue->lock);
1272 
1273 	if (ent)
1274 		fuse_uring_dispatch_ent(ent);
1275 
1276 	return;
1277 
1278 err_unlock:
1279 	spin_unlock(&queue->lock);
1280 err:
1281 	req->out.h.error = err;
1282 	clear_bit(FR_PENDING, &req->flags);
1283 	fuse_request_end(req);
1284 }
1285 
fuse_uring_queue_bq_req(struct fuse_req * req)1286 bool fuse_uring_queue_bq_req(struct fuse_req *req)
1287 {
1288 	struct fuse_conn *fc = req->fm->fc;
1289 	struct fuse_ring *ring = fc->ring;
1290 	struct fuse_ring_queue *queue;
1291 	struct fuse_ring_ent *ent = NULL;
1292 
1293 	queue = fuse_uring_task_to_queue(ring);
1294 	if (!queue)
1295 		return false;
1296 
1297 	spin_lock(&queue->lock);
1298 	if (unlikely(queue->stopped)) {
1299 		spin_unlock(&queue->lock);
1300 		return false;
1301 	}
1302 
1303 	set_bit(FR_URING, &req->flags);
1304 	req->ring_queue = queue;
1305 	list_add_tail(&req->list, &queue->fuse_req_bg_queue);
1306 
1307 	ent = list_first_entry_or_null(&queue->ent_avail_queue,
1308 				       struct fuse_ring_ent, list);
1309 	spin_lock(&fc->bg_lock);
1310 	fc->num_background++;
1311 	if (fc->num_background == fc->max_background)
1312 		fc->blocked = 1;
1313 	fuse_uring_flush_bg(queue);
1314 	spin_unlock(&fc->bg_lock);
1315 
1316 	/*
1317 	 * Due to bg_queue flush limits there might be other bg requests
1318 	 * in the queue that need to be handled first. Or no further req
1319 	 * might be available.
1320 	 */
1321 	req = list_first_entry_or_null(&queue->fuse_req_queue, struct fuse_req,
1322 				       list);
1323 	if (ent && req) {
1324 		fuse_uring_add_req_to_ring_ent(ent, req);
1325 		spin_unlock(&queue->lock);
1326 
1327 		fuse_uring_dispatch_ent(ent);
1328 	} else {
1329 		spin_unlock(&queue->lock);
1330 	}
1331 
1332 	return true;
1333 }
1334 
fuse_uring_remove_pending_req(struct fuse_req * req)1335 bool fuse_uring_remove_pending_req(struct fuse_req *req)
1336 {
1337 	struct fuse_ring_queue *queue = req->ring_queue;
1338 
1339 	return fuse_remove_pending_req(req, &queue->lock);
1340 }
1341 
1342 static const struct fuse_iqueue_ops fuse_io_uring_ops = {
1343 	/* should be send over io-uring as enhancement */
1344 	.send_forget = fuse_dev_queue_forget,
1345 
1346 	/*
1347 	 * could be send over io-uring, but interrupts should be rare,
1348 	 * no need to make the code complex
1349 	 */
1350 	.send_interrupt = fuse_dev_queue_interrupt,
1351 	.send_req = fuse_uring_queue_fuse_req,
1352 };
1353