xref: /src/sys/fs/fuse/fuse_ipc.c (revision 7755a406a6ae3801e885a79f714155f97c4d2bc6)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Copyright (c) 2019 The FreeBSD Foundation
37  *
38  * Portions of this software were developed by BFF Storage Systems, LLC under
39  * sponsorship from the FreeBSD Foundation.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/param.h>
64 #include <sys/module.h>
65 #include <sys/systm.h>
66 #include <sys/counter.h>
67 #include <sys/errno.h>
68 #include <sys/kernel.h>
69 #include <sys/conf.h>
70 #include <sys/uio.h>
71 #include <sys/malloc.h>
72 #include <sys/queue.h>
73 #include <sys/lock.h>
74 #include <sys/sx.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/mount.h>
78 #include <sys/sdt.h>
79 #include <sys/vnode.h>
80 #include <sys/signalvar.h>
81 #include <sys/syscallsubr.h>
82 #include <sys/sysctl.h>
83 #include <vm/uma.h>
84 
85 #include "fuse.h"
86 #include "fuse_node.h"
87 #include "fuse_ipc.h"
88 #include "fuse_internal.h"
89 
90 SDT_PROVIDER_DECLARE(fusefs);
91 /*
92  * Fuse trace probe:
93  * arg0: verbosity.  Higher numbers give more verbose messages
94  * arg1: Textual message
95  */
96 SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
97 
98 static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
99     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
100 static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
101 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
102 static void fticket_refresh(struct fuse_ticket *ftick);
103 static inline void fticket_reset(struct fuse_ticket *ftick);
104 static void fticket_destroy(struct fuse_ticket *ftick);
105 static int fticket_wait_answer(struct fuse_ticket *ftick);
106 static inline int
107 fticket_aw_pull_uio(struct fuse_ticket *ftick,
108     struct uio *uio);
109 
110 static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
111 
112 static fuse_handler_t fuse_standard_handler;
113 
114 static counter_u64_t fuse_ticket_count;
115 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD,
116     &fuse_ticket_count, "Number of allocated tickets");
117 
118 static long fuse_iov_permanent_bufsize = 1 << 19;
119 
120 SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
121     &fuse_iov_permanent_bufsize, 0,
122     "limit for permanently stored buffer size for fuse_iovs");
123 static int fuse_iov_credit = 16;
124 
125 SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
126     &fuse_iov_credit, 0,
127     "how many times is an oversized fuse_iov tolerated");
128 
129 MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
130 static uma_zone_t ticket_zone;
131 
132 /*
133  * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
134  * leagally never respond
135  */
136 static int
fuse_interrupt_callback(struct fuse_ticket * tick,struct uio * uio)137 fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
138 {
139 	struct fuse_ticket *otick, *x_tick;
140 	struct fuse_interrupt_in *fii;
141 	struct fuse_data *data = tick->tk_data;
142 	bool found = false;
143 
144 	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
145 		sizeof(struct fuse_in_header));
146 
147 	fuse_lck_mtx_lock(data->aw_mtx);
148 	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
149 		if (otick->tk_unique == fii->unique) {
150 			found = true;
151 			break;
152 		}
153 	}
154 	fuse_lck_mtx_unlock(data->aw_mtx);
155 
156 	if (!found) {
157 		/* Original is already complete.  Just return */
158 		return 0;
159 	}
160 
161 	/* Clear the original ticket's interrupt association */
162 	otick->irq_unique = 0;
163 
164 	if (tick->tk_aw_ohead.error == ENOSYS) {
165 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
166 		return 0;
167 	} else if (tick->tk_aw_ohead.error == EAGAIN) {
168 		/*
169 		 * There are two reasons we might get this:
170 		 * 1) the daemon received the INTERRUPT request before the
171 		 *    original, or
172 		 * 2) the daemon received the INTERRUPT request after it
173 		 *    completed the original request.
174 		 * In the first case we should re-send the INTERRUPT.  In the
175 		 * second, we should ignore it.
176 		 */
177 		/* Resend */
178 		fuse_interrupt_send(otick, EINTR);
179 		return 0;
180 	} else {
181 		/* Illegal FUSE_INTERRUPT response */
182 		return EINVAL;
183 	}
184 }
185 
186 /* Interrupt the operation otick.  Return err as its error code */
187 void
fuse_interrupt_send(struct fuse_ticket * otick,int err)188 fuse_interrupt_send(struct fuse_ticket *otick, int err)
189 {
190 	struct fuse_dispatcher fdi;
191 	struct fuse_interrupt_in *fii;
192 	struct fuse_in_header *ftick_hdr;
193 	struct fuse_data *data = otick->tk_data;
194 	struct fuse_ticket *tick, *xtick;
195 	struct ucred reused_creds;
196 
197 	if (otick->irq_unique == 0) {
198 		/*
199 		 * If the daemon hasn't yet received otick, then we can answer
200 		 * it ourselves and return.
201 		 */
202 		fuse_lck_mtx_lock(data->ms_mtx);
203 		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
204 			xtick) {
205 			if (tick == otick) {
206 				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
207 					fuse_ticket, tk_ms_link);
208 				otick->tk_data->ms_count--;
209 				otick->tk_ms_link.stqe_next = NULL;
210 				fuse_lck_mtx_unlock(data->ms_mtx);
211 
212 				fuse_lck_mtx_lock(otick->tk_aw_mtx);
213 				if (!fticket_answered(otick)) {
214 					fticket_set_answered(otick);
215 					otick->tk_aw_errno = err;
216 					wakeup(otick);
217 				}
218 				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
219 
220 				fuse_ticket_drop(tick);
221 				return;
222 			}
223 		}
224 		fuse_lck_mtx_unlock(data->ms_mtx);
225 
226 		/*
227 		 * If the fuse daemon doesn't support interrupts, then there's
228 		 * nothing more that we can do
229 		 */
230 		if (fsess_not_impl(data->mp, FUSE_INTERRUPT))
231 			return;
232 
233 		/*
234 		 * If the fuse daemon has already received otick, then we must
235 		 * send FUSE_INTERRUPT.
236 		 */
237 		ftick_hdr = fticket_in_header(otick);
238 		reused_creds.cr_uid = ftick_hdr->uid;
239 		reused_creds.cr_gid = ftick_hdr->gid;
240 		fdisp_init(&fdi, sizeof(*fii));
241 		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
242 			ftick_hdr->pid, &reused_creds);
243 
244 		fii = fdi.indata;
245 		fii->unique = otick->tk_unique;
246 		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
247 
248 		otick->irq_unique = fdi.tick->tk_unique;
249 		/* Interrupt ops should be delivered ASAP */
250 		fuse_insert_message(fdi.tick, true);
251 		fdisp_destroy(&fdi);
252 	} else {
253 		/* This ticket has already been interrupted */
254 	}
255 }
256 
257 void
fiov_init(struct fuse_iov * fiov,size_t size)258 fiov_init(struct fuse_iov *fiov, size_t size)
259 {
260 	uint32_t msize = FU_AT_LEAST(size);
261 
262 	fiov->len = 0;
263 
264 	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
265 
266 	fiov->allocated_size = msize;
267 	fiov->credit = fuse_iov_credit;
268 }
269 
270 void
fiov_teardown(struct fuse_iov * fiov)271 fiov_teardown(struct fuse_iov *fiov)
272 {
273 	MPASS(fiov->base != NULL);
274 	free(fiov->base, M_FUSEMSG);
275 }
276 
277 void
fiov_adjust(struct fuse_iov * fiov,size_t size)278 fiov_adjust(struct fuse_iov *fiov, size_t size)
279 {
280 	if (fiov->allocated_size < size ||
281 	    (fuse_iov_permanent_bufsize >= 0 &&
282 	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
283 	    --fiov->credit < 0)) {
284 		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
285 		    M_WAITOK | M_ZERO);
286 		if (!fiov->base) {
287 			panic("FUSE: realloc failed");
288 		}
289 		fiov->allocated_size = FU_AT_LEAST(size);
290 		fiov->credit = fuse_iov_credit;
291 		/* Clear data buffer after reallocation */
292 		bzero(fiov->base, size);
293 	} else if (size > fiov->len) {
294 		/* Clear newly extended portion of data buffer */
295 		bzero((char*)fiov->base + fiov->len, size - fiov->len);
296 	}
297 	fiov->len = size;
298 }
299 
300 /* Resize the fiov if needed, and clear it's buffer */
301 void
fiov_refresh(struct fuse_iov * fiov)302 fiov_refresh(struct fuse_iov *fiov)
303 {
304 	fiov_adjust(fiov, 0);
305 }
306 
307 static int
fticket_ctor(void * mem,int size,void * arg,int flags)308 fticket_ctor(void *mem, int size, void *arg, int flags)
309 {
310 	struct fuse_ticket *ftick = mem;
311 	struct fuse_data *data = arg;
312 
313 	FUSE_ASSERT_MS_DONE(ftick);
314 	FUSE_ASSERT_AW_DONE(ftick);
315 
316 	ftick->tk_data = data;
317 	ftick->irq_unique = 0;
318 	refcount_init(&ftick->tk_refcount, 1);
319 	counter_u64_add(fuse_ticket_count, 1);
320 
321 	fticket_refresh(ftick);
322 
323 	return 0;
324 }
325 
326 static void
fticket_dtor(void * mem,int size,void * arg)327 fticket_dtor(void *mem, int size, void *arg)
328 {
329 #ifdef INVARIANTS
330 	struct fuse_ticket *ftick = mem;
331 #endif
332 
333 	FUSE_ASSERT_MS_DONE(ftick);
334 	FUSE_ASSERT_AW_DONE(ftick);
335 
336 	counter_u64_add(fuse_ticket_count, -1);
337 }
338 
339 static int
fticket_init(void * mem,int size,int flags)340 fticket_init(void *mem, int size, int flags)
341 {
342 	struct fuse_ticket *ftick = mem;
343 
344 	bzero(ftick, sizeof(struct fuse_ticket));
345 
346 	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
347 
348 	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
349 	fiov_init(&ftick->tk_aw_fiov, 0);
350 
351 	return 0;
352 }
353 
354 static void
fticket_fini(void * mem,int size)355 fticket_fini(void *mem, int size)
356 {
357 	struct fuse_ticket *ftick = mem;
358 
359 	fiov_teardown(&ftick->tk_ms_fiov);
360 	fiov_teardown(&ftick->tk_aw_fiov);
361 	mtx_destroy(&ftick->tk_aw_mtx);
362 }
363 
364 static inline struct fuse_ticket *
fticket_alloc(struct fuse_data * data)365 fticket_alloc(struct fuse_data *data)
366 {
367 	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
368 }
369 
370 static inline void
fticket_destroy(struct fuse_ticket * ftick)371 fticket_destroy(struct fuse_ticket *ftick)
372 {
373 	return uma_zfree(ticket_zone, ftick);
374 }
375 
376 /* Prepare the ticket to be reused and clear its data buffers */
377 static inline void
fticket_refresh(struct fuse_ticket * ftick)378 fticket_refresh(struct fuse_ticket *ftick)
379 {
380 	fticket_reset(ftick);
381 
382 	fiov_refresh(&ftick->tk_ms_fiov);
383 	fiov_refresh(&ftick->tk_aw_fiov);
384 }
385 
386 /* Prepare the ticket to be reused, but don't clear its data buffers */
387 static inline void
fticket_reset(struct fuse_ticket * ftick)388 fticket_reset(struct fuse_ticket *ftick)
389 {
390 	struct fuse_data *data = ftick->tk_data;
391 
392 	FUSE_ASSERT_MS_DONE(ftick);
393 	FUSE_ASSERT_AW_DONE(ftick);
394 
395 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
396 
397 	ftick->tk_aw_errno = 0;
398 	ftick->tk_flag = 0;
399 
400 	/* May be truncated to 32 bits on LP32 arches */
401 	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
402 	if (ftick->tk_unique == 0)
403 		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
404 }
405 
406 static int
fticket_wait_answer(struct fuse_ticket * ftick)407 fticket_wait_answer(struct fuse_ticket *ftick)
408 {
409 	struct thread *td = curthread;
410 	sigset_t blockedset, oldset;
411 	int err = 0, stops_deferred;
412 	struct fuse_data *data = ftick->tk_data;
413 	bool interrupted = false;
414 
415 	if (fsess_maybe_impl(ftick->tk_data->mp, FUSE_INTERRUPT) &&
416 	    data->dataflags & FSESS_INTR) {
417 		SIGEMPTYSET(blockedset);
418 	} else {
419 		/* Block all signals except (implicitly) SIGKILL */
420 		SIGFILLSET(blockedset);
421 	}
422 	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
423 	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
424 
425 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
426 
427 retry:
428 	if (fticket_answered(ftick)) {
429 		goto out;
430 	}
431 
432 	if (fdata_get_dead(data)) {
433 		err = ENOTCONN;
434 		fticket_set_answered(ftick);
435 		goto out;
436 	}
437 	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
438 	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
439 	    data->daemon_timeout * hz);
440 	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
441 	if (err == EWOULDBLOCK) {
442 		SDT_PROBE2(fusefs, , ipc, trace, 3,
443 			"fticket_wait_answer: EWOULDBLOCK");
444 		err = ETIMEDOUT;
445 		fticket_set_answered(ftick);
446 	} else if ((err == EINTR || err == ERESTART)) {
447 		/*
448 		 * Whether we get EINTR or ERESTART depends on whether
449 		 * SA_RESTART was set by sigaction(2).
450 		 *
451 		 * Try to interrupt the operation and wait for an EINTR response
452 		 * to the original operation.  If the file system does not
453 		 * support FUSE_INTERRUPT, then we'll just wait for it to
454 		 * complete like normal.  If it does support FUSE_INTERRUPT,
455 		 * then it will either respond EINTR to the original operation,
456 		 * or EAGAIN to the interrupt.
457 		 */
458 		sigset_t tmpset;
459 
460 		SDT_PROBE2(fusefs, , ipc, trace, 4,
461 			"fticket_wait_answer: interrupt");
462 		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
463 		fuse_interrupt_send(ftick, err);
464 
465 		PROC_LOCK(td->td_proc);
466 		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
467 		tmpset = td->td_proc->p_siglist;
468 		SIGSETOR(tmpset, td->td_siglist);
469 		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
470 		PROC_UNLOCK(td->td_proc);
471 
472 		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
473 		if (!interrupted && !SIGISMEMBER(tmpset, SIGKILL)) {
474 			/*
475 			 * Block all signals while we wait for an interrupt
476 			 * response.  The protocol doesn't discriminate between
477 			 * different signals.
478 			 */
479 			SIGFILLSET(blockedset);
480 			interrupted = true;
481 			goto retry;
482 		} else {
483 			/*
484 			 * Return immediately for fatal signals, or if this is
485 			 * the second interruption.  We should only be
486 			 * interrupted twice if the thread is stopped, for
487 			 * example during sigexit.
488 			 */
489 		}
490 	} else if (err) {
491 		SDT_PROBE2(fusefs, , ipc, trace, 6,
492 			"fticket_wait_answer: other error");
493 	} else {
494 		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
495 	}
496 out:
497 	if (!(err || fticket_answered(ftick))) {
498 		SDT_PROBE2(fusefs, , ipc, trace, 1,
499 			"FUSE: requester was woken up but still no answer");
500 		err = ENXIO;
501 	}
502 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
503 	sigallowstop(stops_deferred);
504 
505 	return err;
506 }
507 
508 static	inline
509 int
fticket_aw_pull_uio(struct fuse_ticket * ftick,struct uio * uio)510 fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
511 {
512 	int err = 0;
513 	size_t len = uio_resid(uio);
514 
515 	if (len) {
516 		fiov_adjust(fticket_resp(ftick), len);
517 		err = uiomove(fticket_resp(ftick)->base, len, uio);
518 	}
519 	return err;
520 }
521 
522 int
fticket_pull(struct fuse_ticket * ftick,struct uio * uio)523 fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
524 {
525 	int err = 0;
526 
527 	if (ftick->tk_aw_ohead.error) {
528 		return 0;
529 	}
530 	err = fuse_body_audit(ftick, uio_resid(uio));
531 	if (!err) {
532 		err = fticket_aw_pull_uio(ftick, uio);
533 	}
534 	return err;
535 }
536 
537 struct fuse_data *
fdata_alloc(struct cdev * fdev,struct ucred * cred)538 fdata_alloc(struct cdev *fdev, struct ucred *cred)
539 {
540 	struct fuse_data *data;
541 
542 	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
543 
544 	data->fdev = fdev;
545 	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
546 	STAILQ_INIT(&data->ms_head);
547 	data->ms_count = 0;
548 	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
549 	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
550 	TAILQ_INIT(&data->aw_head);
551 	data->daemoncred = crhold(cred);
552 	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
553 	data->ref = 1;
554 
555 	return data;
556 }
557 
558 void
fdata_trydestroy(struct fuse_data * data)559 fdata_trydestroy(struct fuse_data *data)
560 {
561 	data->ref--;
562 	MPASS(data->ref >= 0);
563 	if (data->ref != 0)
564 		return;
565 
566 	/* Driving off stage all that stuff thrown at device... */
567 	crfree(data->daemoncred);
568 	mtx_destroy(&data->aw_mtx);
569 	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
570 	knlist_destroy(&data->ks_rsel.si_note);
571 	mtx_destroy(&data->ms_mtx);
572 
573 	free(data, M_FUSEMSG);
574 }
575 
576 void
fdata_set_dead(struct fuse_data * data)577 fdata_set_dead(struct fuse_data *data)
578 {
579 	FUSE_LOCK();
580 	if (fdata_get_dead(data)) {
581 		FUSE_UNLOCK();
582 		return;
583 	}
584 	fuse_lck_mtx_lock(data->ms_mtx);
585 	data->dataflags |= FSESS_DEAD;
586 	wakeup_one(data);
587 	selwakeuppri(&data->ks_rsel, PZERO);
588 	wakeup(&data->ticketer);
589 	fuse_lck_mtx_unlock(data->ms_mtx);
590 	FUSE_UNLOCK();
591 }
592 
593 struct fuse_ticket *
fuse_ticket_fetch(struct fuse_data * data)594 fuse_ticket_fetch(struct fuse_data *data)
595 {
596 	int err = 0;
597 	struct fuse_ticket *ftick;
598 
599 	ftick = fticket_alloc(data);
600 
601 	if (!(data->dataflags & FSESS_INITED)) {
602 		/* Sleep until get answer for INIT message */
603 		FUSE_LOCK();
604 		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
605 			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
606 			    "fu_ini", 0);
607 			if (err)
608 				fdata_set_dead(data);
609 		} else
610 			FUSE_UNLOCK();
611 	}
612 	return ftick;
613 }
614 
615 int
fuse_ticket_drop(struct fuse_ticket * ftick)616 fuse_ticket_drop(struct fuse_ticket *ftick)
617 {
618 	int die;
619 
620 	die = refcount_release(&ftick->tk_refcount);
621 	if (die)
622 		fticket_destroy(ftick);
623 
624 	return die;
625 }
626 
627 void
fuse_insert_callback(struct fuse_ticket * ftick,fuse_handler_t * handler)628 fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
629 {
630 	if (fdata_get_dead(ftick->tk_data)) {
631 		return;
632 	}
633 	ftick->tk_aw_handler = handler;
634 
635 	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
636 	fuse_aw_push(ftick);
637 	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
638 }
639 
640 /*
641  * Insert a new upgoing ticket into the message queue
642  *
643  * If urgent is true, insert at the front of the queue.  Otherwise, insert in
644  * FIFO order.
645  */
646 void
fuse_insert_message(struct fuse_ticket * ftick,bool urgent)647 fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
648 {
649 	if (ftick->tk_flag & FT_DIRTY) {
650 		panic("FUSE: ticket reused without being refreshed");
651 	}
652 	ftick->tk_flag |= FT_DIRTY;
653 
654 	if (fdata_get_dead(ftick->tk_data)) {
655 		return;
656 	}
657 	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
658 	if (urgent)
659 		fuse_ms_push_head(ftick);
660 	else
661 		fuse_ms_push(ftick);
662 	wakeup_one(ftick->tk_data);
663 	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO);
664 	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
665 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
666 }
667 
668 static int
fuse_body_audit(struct fuse_ticket * ftick,size_t blen)669 fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
670 {
671 	int err = 0;
672 	enum fuse_opcode opcode;
673 
674 	opcode = fticket_opcode(ftick);
675 
676 	switch (opcode) {
677 	case FUSE_BMAP:
678 		err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL;
679 		break;
680 
681 	case FUSE_LINK:
682 	case FUSE_LOOKUP:
683 	case FUSE_MKDIR:
684 	case FUSE_MKNOD:
685 	case FUSE_SYMLINK:
686 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
687 			err = (blen == sizeof(struct fuse_entry_out)) ?
688 				0 : EINVAL;
689 		} else {
690 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL;
691 		}
692 		break;
693 
694 	case FUSE_FORGET:
695 		panic("FUSE: a handler has been installed for FUSE_FORGET");
696 		break;
697 
698 	case FUSE_GETATTR:
699 	case FUSE_SETATTR:
700 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
701 			err = (blen == sizeof(struct fuse_attr_out)) ?
702 			  0 : EINVAL;
703 		} else {
704 			err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL;
705 		}
706 		break;
707 
708 	case FUSE_READLINK:
709 		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
710 		break;
711 
712 	case FUSE_UNLINK:
713 		err = (blen == 0) ? 0 : EINVAL;
714 		break;
715 
716 	case FUSE_RMDIR:
717 		err = (blen == 0) ? 0 : EINVAL;
718 		break;
719 
720 	case FUSE_RENAME:
721 		err = (blen == 0) ? 0 : EINVAL;
722 		break;
723 
724 	case FUSE_OPEN:
725 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
726 		break;
727 
728 	case FUSE_READ:
729 		err = (((struct fuse_read_in *)(
730 		    (char *)ftick->tk_ms_fiov.base +
731 		    sizeof(struct fuse_in_header)
732 		    ))->size >= blen) ? 0 : EINVAL;
733 		break;
734 
735 	case FUSE_WRITE:
736 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
737 		break;
738 
739 	case FUSE_STATFS:
740 		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
741 			err = (blen == sizeof(struct fuse_statfs_out)) ?
742 			  0 : EINVAL;
743 		} else {
744 			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
745 		}
746 		break;
747 
748 	case FUSE_RELEASE:
749 		err = (blen == 0) ? 0 : EINVAL;
750 		break;
751 
752 	case FUSE_FSYNC:
753 		err = (blen == 0) ? 0 : EINVAL;
754 		break;
755 
756 	case FUSE_SETXATTR:
757 		err = (blen == 0) ? 0 : EINVAL;
758 		break;
759 
760 	case FUSE_GETXATTR:
761 	case FUSE_LISTXATTR:
762 		/*
763 		 * These can have varying response lengths, and 0 length
764 		 * isn't necessarily invalid.
765 		 */
766 		err = 0;
767 		break;
768 
769 	case FUSE_REMOVEXATTR:
770 		err = (blen == 0) ? 0 : EINVAL;
771 		break;
772 
773 	case FUSE_FLUSH:
774 		err = (blen == 0) ? 0 : EINVAL;
775 		break;
776 
777 	case FUSE_INIT:
778 		if (blen == sizeof(struct fuse_init_out) ||
779 		    blen == FUSE_COMPAT_INIT_OUT_SIZE ||
780 		    blen == FUSE_COMPAT_22_INIT_OUT_SIZE) {
781 			err = 0;
782 		} else {
783 			err = EINVAL;
784 		}
785 		break;
786 
787 	case FUSE_OPENDIR:
788 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
789 		break;
790 
791 	case FUSE_READDIR:
792 		err = (((struct fuse_read_in *)(
793 		    (char *)ftick->tk_ms_fiov.base +
794 		    sizeof(struct fuse_in_header)
795 		    ))->size >= blen) ? 0 : EINVAL;
796 		break;
797 
798 	case FUSE_RELEASEDIR:
799 		err = (blen == 0) ? 0 : EINVAL;
800 		break;
801 
802 	case FUSE_FSYNCDIR:
803 		err = (blen == 0) ? 0 : EINVAL;
804 		break;
805 
806 	case FUSE_GETLK:
807 		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
808 		break;
809 
810 	case FUSE_SETLK:
811 		err = (blen == 0) ? 0 : EINVAL;
812 		break;
813 
814 	case FUSE_SETLKW:
815 		err = (blen == 0) ? 0 : EINVAL;
816 		break;
817 
818 	case FUSE_ACCESS:
819 		err = (blen == 0) ? 0 : EINVAL;
820 		break;
821 
822 	case FUSE_CREATE:
823 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
824 			err = (blen == sizeof(struct fuse_entry_out) +
825 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
826 		} else {
827 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE +
828 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
829 		}
830 		break;
831 
832 	case FUSE_DESTROY:
833 		err = (blen == 0) ? 0 : EINVAL;
834 		break;
835 
836 	case FUSE_IOCTL:
837 		err = (blen >= sizeof(struct fuse_ioctl_out)) ? 0 : EINVAL;
838 		break;
839 
840 	case FUSE_FALLOCATE:
841 		err = (blen == 0) ? 0 : EINVAL;
842 		break;
843 
844 	case FUSE_LSEEK:
845 		err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL;
846 		break;
847 
848 	case FUSE_COPY_FILE_RANGE:
849 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
850 		break;
851 
852 	default:
853 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
854 	}
855 
856 	return err;
857 }
858 
859 static inline void
fuse_setup_ihead(struct fuse_in_header * ihead,struct fuse_ticket * ftick,uint64_t nid,enum fuse_opcode op,size_t blen,pid_t pid,struct ucred * cred)860 fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
861     uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
862     struct ucred *cred)
863 {
864 	ihead->len = sizeof(*ihead) + blen;
865 	ihead->unique = ftick->tk_unique;
866 	ihead->nodeid = nid;
867 	ihead->opcode = op;
868 
869 	ihead->pid = pid;
870 	ihead->uid = cred->cr_uid;
871 	ihead->gid = cred->cr_gid;
872 }
873 
874 /*
875  * fuse_standard_handler just pulls indata and wakes up pretender.
876  * Doesn't try to interpret data, that's left for the pretender.
877  * Though might do a basic size verification before the pull-in takes place
878  */
879 
880 static int
fuse_standard_handler(struct fuse_ticket * ftick,struct uio * uio)881 fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
882 {
883 	int err = 0;
884 
885 	err = fticket_pull(ftick, uio);
886 
887 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
888 
889 	if (!fticket_answered(ftick)) {
890 		fticket_set_answered(ftick);
891 		ftick->tk_aw_errno = err;
892 		wakeup(ftick);
893 	}
894 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
895 
896 	return err;
897 }
898 
899 /*
900  * Reinitialize a dispatcher from a pid and node id, without resizing or
901  * clearing its data buffers
902  */
903 static void
fdisp_refresh_pid(struct fuse_dispatcher * fdip,enum fuse_opcode op,struct mount * mp,uint64_t nid,pid_t pid,struct ucred * cred)904 fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
905     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
906 {
907 	MPASS(fdip->tick);
908 	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
909 		"Must use fdisp_make_pid to increase the size of the fiov");
910 	fticket_reset(fdip->tick);
911 
912 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
913 	    fdip->indata, fdip->iosize);
914 
915 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
916 		cred);
917 }
918 
919 /* Initialize a dispatcher from a pid and node id */
920 static void
fdisp_make_pid(struct fuse_dispatcher * fdip,enum fuse_opcode op,struct fuse_data * data,uint64_t nid,pid_t pid,struct ucred * cred)921 fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
922     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
923 {
924 	if (fdip->tick) {
925 		fticket_refresh(fdip->tick);
926 	} else {
927 		fdip->tick = fuse_ticket_fetch(data);
928 	}
929 
930 	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
931 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
932 	    fdip->indata, fdip->iosize);
933 
934 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
935 }
936 
937 void
fdisp_make(struct fuse_dispatcher * fdip,enum fuse_opcode op,struct mount * mp,uint64_t nid,struct thread * td,struct ucred * cred)938 fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
939     uint64_t nid, struct thread *td, struct ucred *cred)
940 {
941 	struct fuse_data *data = fuse_get_mpdata(mp);
942 	RECTIFY_TDCR(td, cred);
943 
944 	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
945 }
946 
947 void
fdisp_make_vp(struct fuse_dispatcher * fdip,enum fuse_opcode op,struct vnode * vp,struct thread * td,struct ucred * cred)948 fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
949     struct vnode *vp, struct thread *td, struct ucred *cred)
950 {
951 	struct mount *mp = vnode_mount(vp);
952 	struct fuse_data *data = fuse_get_mpdata(mp);
953 
954 	RECTIFY_TDCR(td, cred);
955 	return fdisp_make_pid(fdip, op, data, VTOI(vp),
956 	    td->td_proc->p_pid, cred);
957 }
958 
959 /* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
960 void
fdisp_refresh_vp(struct fuse_dispatcher * fdip,enum fuse_opcode op,struct vnode * vp,struct thread * td,struct ucred * cred)961 fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
962     struct vnode *vp, struct thread *td, struct ucred *cred)
963 {
964 	RECTIFY_TDCR(td, cred);
965 	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
966 	    td->td_proc->p_pid, cred);
967 }
968 
969 SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
970 
971 int
fdisp_wait_answ(struct fuse_dispatcher * fdip)972 fdisp_wait_answ(struct fuse_dispatcher *fdip)
973 {
974 	int err = 0;
975 
976 	fdip->answ_stat = 0;
977 	fuse_insert_callback(fdip->tick, fuse_standard_handler);
978 	fuse_insert_message(fdip->tick, false);
979 
980 	if ((err = fticket_wait_answer(fdip->tick))) {
981 		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
982 
983 		if (fticket_answered(fdip->tick)) {
984 			/*
985 	                 * Just between noticing the interrupt and getting here,
986 	                 * the standard handler has completed his job.
987 	                 * So we drop the ticket and exit as usual.
988 	                 */
989 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
990 				"IPC: interrupted, already answered", err);
991 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
992 			goto out;
993 		} else {
994 			/*
995 	                 * So we were faster than the standard handler.
996 	                 * Then by setting the answered flag we get *him*
997 	                 * to drop the ticket.
998 	                 */
999 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1000 				"IPC: interrupted, setting to answered", err);
1001 			fticket_set_answered(fdip->tick);
1002 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1003 			return err;
1004 		}
1005 	}
1006 
1007 	if (fdip->tick->tk_aw_errno == ENOTCONN) {
1008 		/* The daemon died while we were waiting for a response */
1009 		err = ENOTCONN;
1010 		goto out;
1011 	} else if (fdip->tick->tk_aw_errno) {
1012 		/*
1013 		 * There was some sort of communication error with the daemon
1014 		 * that the client wouldn't understand.
1015 		 */
1016 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1017 			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
1018 		err = EIO;
1019 		goto out;
1020 	}
1021 	if ((err = fdip->tick->tk_aw_ohead.error)) {
1022 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1023 			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
1024 		/*
1025 	         * This means a "proper" fuse syscall error.
1026 	         * We record this value so the caller will
1027 	         * be able to know it's not a boring messaging
1028 	         * failure, if she wishes so (and if not, she can
1029 	         * just simply propagate the return value of this routine).
1030 	         * [XXX Maybe a bitflag would do the job too,
1031 	         * if other flags needed, this will be converted thusly.]
1032 	         */
1033 		fdip->answ_stat = err;
1034 		goto out;
1035 	}
1036 	fdip->answ = fticket_resp(fdip->tick)->base;
1037 	fdip->iosize = fticket_resp(fdip->tick)->len;
1038 
1039 	return 0;
1040 
1041 out:
1042 	return err;
1043 }
1044 
1045 void
fuse_ipc_init(void)1046 fuse_ipc_init(void)
1047 {
1048 	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
1049 	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
1050 	    UMA_ALIGN_PTR, 0);
1051 	fuse_ticket_count = counter_u64_alloc(M_WAITOK);
1052 }
1053 
1054 void
fuse_ipc_destroy(void)1055 fuse_ipc_destroy(void)
1056 {
1057 	counter_u64_free(fuse_ticket_count);
1058 	uma_zdestroy(ticket_zone);
1059 }
1060 
1061 SDT_PROBE_DEFINE3(fusefs,, ipc, warn, "struct fuse_data*", "unsigned", "char*");
1062 void
fuse_warn(struct fuse_data * data,unsigned flag,const char * msg)1063 fuse_warn(struct fuse_data *data, unsigned flag, const char *msg)
1064 {
1065 	SDT_PROBE3(fusefs, , ipc, warn, data, flag, msg);
1066 	if (!(data->dataflags & flag)) {
1067 		printf("WARNING: FUSE protocol violation for server mounted at "
1068 		    "%s: %s  "
1069 		    "This warning will not be repeated.\n",
1070 		    data->mp->mnt_stat.f_mntonname, msg);
1071 		data->dataflags |= flag;
1072 	}
1073 }
1074