1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 #include "dev_uring_i.h"
11 
12 #include <linux/pagemap.h>
13 #include <linux/slab.h>
14 #include <linux/file.h>
15 #include <linux/seq_file.h>
16 #include <linux/init.h>
17 #include <linux/module.h>
18 #include <linux/moduleparam.h>
19 #include <linux/fs_context.h>
20 #include <linux/fs_parser.h>
21 #include <linux/statfs.h>
22 #include <linux/random.h>
23 #include <linux/sched.h>
24 #include <linux/exportfs.h>
25 #include <linux/posix_acl.h>
26 #include <linux/pid_namespace.h>
27 #include <uapi/linux/magic.h>
28 
29 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
30 MODULE_DESCRIPTION("Filesystem in Userspace");
31 MODULE_LICENSE("GPL");
32 
33 static struct kmem_cache *fuse_inode_cachep;
34 struct list_head fuse_conn_list;
35 DEFINE_MUTEX(fuse_mutex);
36 
37 static int set_global_limit(const char *val, const struct kernel_param *kp);
38 
39 unsigned int fuse_max_pages_limit = 256;
40 /* default is no timeout */
41 unsigned int fuse_default_req_timeout;
42 unsigned int fuse_max_req_timeout;
43 
44 unsigned int max_user_bgreq;
45 module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
46 		  &max_user_bgreq, 0644);
47 __MODULE_PARM_TYPE(max_user_bgreq, "uint");
48 MODULE_PARM_DESC(max_user_bgreq,
49  "Global limit for the maximum number of backgrounded requests an "
50  "unprivileged user can set");
51 
52 unsigned int max_user_congthresh;
53 module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
54 		  &max_user_congthresh, 0644);
55 __MODULE_PARM_TYPE(max_user_congthresh, "uint");
56 MODULE_PARM_DESC(max_user_congthresh,
57  "Global limit for the maximum congestion threshold an "
58  "unprivileged user can set");
59 
60 #define FUSE_DEFAULT_BLKSIZE 512
61 
62 /** Maximum number of outstanding background requests */
63 #define FUSE_DEFAULT_MAX_BACKGROUND 12
64 
65 /** Congestion starts at 75% of maximum */
66 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
67 
68 #ifdef CONFIG_BLOCK
69 static struct file_system_type fuseblk_fs_type;
70 #endif
71 
72 struct fuse_forget_link *fuse_alloc_forget(void)
73 {
74 	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
75 }
76 
77 static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
78 {
79 	struct fuse_submount_lookup *sl;
80 
81 	sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT);
82 	if (!sl)
83 		return NULL;
84 	sl->forget = fuse_alloc_forget();
85 	if (!sl->forget)
86 		goto out_free;
87 
88 	return sl;
89 
90 out_free:
91 	kfree(sl);
92 	return NULL;
93 }
94 
95 static struct inode *fuse_alloc_inode(struct super_block *sb)
96 {
97 	struct fuse_inode *fi;
98 
99 	fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL);
100 	if (!fi)
101 		return NULL;
102 
103 	fi->i_time = 0;
104 	fi->inval_mask = ~0;
105 	fi->nodeid = 0;
106 	fi->nlookup = 0;
107 	fi->attr_version = 0;
108 	fi->orig_ino = 0;
109 	fi->state = 0;
110 	fi->submount_lookup = NULL;
111 	mutex_init(&fi->mutex);
112 	spin_lock_init(&fi->lock);
113 	fi->forget = fuse_alloc_forget();
114 	if (!fi->forget)
115 		goto out_free;
116 
117 	if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
118 		goto out_free_forget;
119 
120 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
121 		fuse_inode_backing_set(fi, NULL);
122 
123 	return &fi->inode;
124 
125 out_free_forget:
126 	kfree(fi->forget);
127 out_free:
128 	kmem_cache_free(fuse_inode_cachep, fi);
129 	return NULL;
130 }
131 
132 static void fuse_free_inode(struct inode *inode)
133 {
134 	struct fuse_inode *fi = get_fuse_inode(inode);
135 
136 	mutex_destroy(&fi->mutex);
137 	kfree(fi->forget);
138 #ifdef CONFIG_FUSE_DAX
139 	kfree(fi->dax);
140 #endif
141 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
142 		fuse_backing_put(fuse_inode_backing(fi));
143 
144 	kmem_cache_free(fuse_inode_cachep, fi);
145 }
146 
147 static void fuse_cleanup_submount_lookup(struct fuse_conn *fc,
148 					 struct fuse_submount_lookup *sl)
149 {
150 	if (!refcount_dec_and_test(&sl->count))
151 		return;
152 
153 	fuse_queue_forget(fc, sl->forget, sl->nodeid, 1);
154 	sl->forget = NULL;
155 	kfree(sl);
156 }
157 
158 static void fuse_evict_inode(struct inode *inode)
159 {
160 	struct fuse_inode *fi = get_fuse_inode(inode);
161 
162 	/* Will write inode on close/munmap and in all other dirtiers */
163 	WARN_ON(inode->i_state & I_DIRTY_INODE);
164 
165 	truncate_inode_pages_final(&inode->i_data);
166 	clear_inode(inode);
167 	if (inode->i_sb->s_flags & SB_ACTIVE) {
168 		struct fuse_conn *fc = get_fuse_conn(inode);
169 
170 		if (FUSE_IS_DAX(inode))
171 			fuse_dax_inode_cleanup(inode);
172 		if (fi->nlookup) {
173 			fuse_queue_forget(fc, fi->forget, fi->nodeid,
174 					  fi->nlookup);
175 			fi->forget = NULL;
176 		}
177 
178 		if (fi->submount_lookup) {
179 			fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
180 			fi->submount_lookup = NULL;
181 		}
182 		/*
183 		 * Evict of non-deleted inode may race with outstanding
184 		 * LOOKUP/READDIRPLUS requests and result in inconsistency when
185 		 * the request finishes.  Deal with that here by bumping a
186 		 * counter that can be compared to the starting value.
187 		 */
188 		if (inode->i_nlink > 0)
189 			atomic64_inc(&fc->evict_ctr);
190 	}
191 	if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
192 		WARN_ON(fi->iocachectr != 0);
193 		WARN_ON(!list_empty(&fi->write_files));
194 		WARN_ON(!list_empty(&fi->queued_writes));
195 	}
196 }
197 
198 static int fuse_reconfigure(struct fs_context *fsc)
199 {
200 	struct super_block *sb = fsc->root->d_sb;
201 
202 	sync_filesystem(sb);
203 	if (fsc->sb_flags & SB_MANDLOCK)
204 		return -EINVAL;
205 
206 	return 0;
207 }
208 
209 /*
210  * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
211  * so that it will fit.
212  */
213 static ino_t fuse_squash_ino(u64 ino64)
214 {
215 	ino_t ino = (ino_t) ino64;
216 	if (sizeof(ino_t) < sizeof(u64))
217 		ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
218 	return ino;
219 }
220 
221 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
222 				   struct fuse_statx *sx,
223 				   u64 attr_valid, u32 cache_mask,
224 				   u64 evict_ctr)
225 {
226 	struct fuse_conn *fc = get_fuse_conn(inode);
227 	struct fuse_inode *fi = get_fuse_inode(inode);
228 
229 	lockdep_assert_held(&fi->lock);
230 
231 	/*
232 	 * Clear basic stats from invalid mask.
233 	 *
234 	 * Don't do this if this is coming from a fuse_iget() call and there
235 	 * might have been a racing evict which would've invalidated the result
236 	 * if the attr_version would've been preserved.
237 	 *
238 	 * !evict_ctr -> this is create
239 	 * fi->attr_version != 0 -> this is not a new inode
240 	 * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request
241 	 */
242 	if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc))
243 		set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
244 
245 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
246 	fi->i_time = attr_valid;
247 
248 	inode->i_ino     = fuse_squash_ino(attr->ino);
249 	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
250 	set_nlink(inode, attr->nlink);
251 	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
252 	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
253 	inode->i_blocks  = attr->blocks;
254 
255 	/* Sanitize nsecs */
256 	attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1);
257 	attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1);
258 	attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1);
259 
260 	inode_set_atime(inode, attr->atime, attr->atimensec);
261 	/* mtime from server may be stale due to local buffered write */
262 	if (!(cache_mask & STATX_MTIME)) {
263 		inode_set_mtime(inode, attr->mtime, attr->mtimensec);
264 	}
265 	if (!(cache_mask & STATX_CTIME)) {
266 		inode_set_ctime(inode, attr->ctime, attr->ctimensec);
267 	}
268 	if (sx) {
269 		/* Sanitize nsecs */
270 		sx->btime.tv_nsec =
271 			min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
272 
273 		/*
274 		 * Btime has been queried, cache is valid (whether or not btime
275 		 * is available or not) so clear STATX_BTIME from inval_mask.
276 		 *
277 		 * Availability of the btime attribute is indicated in
278 		 * FUSE_I_BTIME
279 		 */
280 		set_mask_bits(&fi->inval_mask, STATX_BTIME, 0);
281 		if (sx->mask & STATX_BTIME) {
282 			set_bit(FUSE_I_BTIME, &fi->state);
283 			fi->i_btime.tv_sec = sx->btime.tv_sec;
284 			fi->i_btime.tv_nsec = sx->btime.tv_nsec;
285 		}
286 	}
287 
288 	if (attr->blksize != 0)
289 		inode->i_blkbits = ilog2(attr->blksize);
290 	else
291 		inode->i_blkbits = inode->i_sb->s_blocksize_bits;
292 
293 	/*
294 	 * Don't set the sticky bit in i_mode, unless we want the VFS
295 	 * to check permissions.  This prevents failures due to the
296 	 * check in may_delete().
297 	 */
298 	fi->orig_i_mode = inode->i_mode;
299 	if (!fc->default_permissions)
300 		inode->i_mode &= ~S_ISVTX;
301 
302 	fi->orig_ino = attr->ino;
303 
304 	/*
305 	 * We are refreshing inode data and it is possible that another
306 	 * client set suid/sgid or security.capability xattr. So clear
307 	 * S_NOSEC. Ideally, we could have cleared it only if suid/sgid
308 	 * was set or if security.capability xattr was set. But we don't
309 	 * know if security.capability has been set or not. So clear it
310 	 * anyway. Its less efficient but should be safe.
311 	 */
312 	inode->i_flags &= ~S_NOSEC;
313 }
314 
315 u32 fuse_get_cache_mask(struct inode *inode)
316 {
317 	struct fuse_conn *fc = get_fuse_conn(inode);
318 
319 	if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
320 		return 0;
321 
322 	return STATX_MTIME | STATX_CTIME | STATX_SIZE;
323 }
324 
325 static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr,
326 				     struct fuse_statx *sx, u64 attr_valid,
327 				     u64 attr_version, u64 evict_ctr)
328 {
329 	struct fuse_conn *fc = get_fuse_conn(inode);
330 	struct fuse_inode *fi = get_fuse_inode(inode);
331 	u32 cache_mask;
332 	loff_t oldsize;
333 	struct timespec64 old_mtime;
334 
335 	spin_lock(&fi->lock);
336 	/*
337 	 * In case of writeback_cache enabled, writes update mtime, ctime and
338 	 * may update i_size.  In these cases trust the cached value in the
339 	 * inode.
340 	 */
341 	cache_mask = fuse_get_cache_mask(inode);
342 	if (cache_mask & STATX_SIZE)
343 		attr->size = i_size_read(inode);
344 
345 	if (cache_mask & STATX_MTIME) {
346 		attr->mtime = inode_get_mtime_sec(inode);
347 		attr->mtimensec = inode_get_mtime_nsec(inode);
348 	}
349 	if (cache_mask & STATX_CTIME) {
350 		attr->ctime = inode_get_ctime_sec(inode);
351 		attr->ctimensec = inode_get_ctime_nsec(inode);
352 	}
353 
354 	if ((attr_version != 0 && fi->attr_version > attr_version) ||
355 	    test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
356 		spin_unlock(&fi->lock);
357 		return;
358 	}
359 
360 	old_mtime = inode_get_mtime(inode);
361 	fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask,
362 				      evict_ctr);
363 
364 	oldsize = inode->i_size;
365 	/*
366 	 * In case of writeback_cache enabled, the cached writes beyond EOF
367 	 * extend local i_size without keeping userspace server in sync. So,
368 	 * attr->size coming from server can be stale. We cannot trust it.
369 	 */
370 	if (!(cache_mask & STATX_SIZE))
371 		i_size_write(inode, attr->size);
372 	spin_unlock(&fi->lock);
373 
374 	if (!cache_mask && S_ISREG(inode->i_mode)) {
375 		bool inval = false;
376 
377 		if (oldsize != attr->size) {
378 			truncate_pagecache(inode, attr->size);
379 			if (!fc->explicit_inval_data)
380 				inval = true;
381 		} else if (fc->auto_inval_data) {
382 			struct timespec64 new_mtime = {
383 				.tv_sec = attr->mtime,
384 				.tv_nsec = attr->mtimensec,
385 			};
386 
387 			/*
388 			 * Auto inval mode also checks and invalidates if mtime
389 			 * has changed.
390 			 */
391 			if (!timespec64_equal(&old_mtime, &new_mtime))
392 				inval = true;
393 		}
394 
395 		if (inval)
396 			invalidate_inode_pages2(inode->i_mapping);
397 	}
398 
399 	if (IS_ENABLED(CONFIG_FUSE_DAX))
400 		fuse_dax_dontcache(inode, attr->flags);
401 }
402 
403 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
404 			    struct fuse_statx *sx, u64 attr_valid,
405 			    u64 attr_version)
406 {
407 	fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0);
408 }
409 
410 static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
411 				      u64 nodeid)
412 {
413 	sl->nodeid = nodeid;
414 	refcount_set(&sl->count, 1);
415 }
416 
417 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
418 			    struct fuse_conn *fc)
419 {
420 	inode->i_mode = attr->mode & S_IFMT;
421 	inode->i_size = attr->size;
422 	inode_set_mtime(inode, attr->mtime, attr->mtimensec);
423 	inode_set_ctime(inode, attr->ctime, attr->ctimensec);
424 	if (S_ISREG(inode->i_mode)) {
425 		fuse_init_common(inode);
426 		fuse_init_file_inode(inode, attr->flags);
427 	} else if (S_ISDIR(inode->i_mode))
428 		fuse_init_dir(inode);
429 	else if (S_ISLNK(inode->i_mode))
430 		fuse_init_symlink(inode);
431 	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
432 		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
433 		fuse_init_common(inode);
434 		init_special_inode(inode, inode->i_mode,
435 				   new_decode_dev(attr->rdev));
436 	} else
437 		BUG();
438 	/*
439 	 * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL
440 	 * so they see the exact same behavior as before.
441 	 */
442 	if (!fc->posix_acl)
443 		inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
444 }
445 
446 static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
447 {
448 	u64 nodeid = *(u64 *) _nodeidp;
449 	if (get_node_id(inode) == nodeid)
450 		return 1;
451 	else
452 		return 0;
453 }
454 
455 static int fuse_inode_set(struct inode *inode, void *_nodeidp)
456 {
457 	u64 nodeid = *(u64 *) _nodeidp;
458 	get_fuse_inode(inode)->nodeid = nodeid;
459 	return 0;
460 }
461 
462 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
463 			int generation, struct fuse_attr *attr,
464 			u64 attr_valid, u64 attr_version,
465 			u64 evict_ctr)
466 {
467 	struct inode *inode;
468 	struct fuse_inode *fi;
469 	struct fuse_conn *fc = get_fuse_conn_super(sb);
470 
471 	/*
472 	 * Auto mount points get their node id from the submount root, which is
473 	 * not a unique identifier within this filesystem.
474 	 *
475 	 * To avoid conflicts, do not place submount points into the inode hash
476 	 * table.
477 	 */
478 	if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
479 	    S_ISDIR(attr->mode)) {
480 		struct fuse_inode *fi;
481 
482 		inode = new_inode(sb);
483 		if (!inode)
484 			return NULL;
485 
486 		fuse_init_inode(inode, attr, fc);
487 		fi = get_fuse_inode(inode);
488 		fi->nodeid = nodeid;
489 		fi->submount_lookup = fuse_alloc_submount_lookup();
490 		if (!fi->submount_lookup) {
491 			iput(inode);
492 			return NULL;
493 		}
494 		/* Sets nlookup = 1 on fi->submount_lookup->nlookup */
495 		fuse_init_submount_lookup(fi->submount_lookup, nodeid);
496 		inode->i_flags |= S_AUTOMOUNT;
497 		goto done;
498 	}
499 
500 retry:
501 	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
502 	if (!inode)
503 		return NULL;
504 
505 	if ((inode->i_state & I_NEW)) {
506 		inode->i_flags |= S_NOATIME;
507 		if (!fc->writeback_cache || !S_ISREG(attr->mode))
508 			inode->i_flags |= S_NOCMTIME;
509 		inode->i_generation = generation;
510 		fuse_init_inode(inode, attr, fc);
511 		unlock_new_inode(inode);
512 	} else if (fuse_stale_inode(inode, generation, attr)) {
513 		/* nodeid was reused, any I/O on the old inode should fail */
514 		fuse_make_bad(inode);
515 		if (inode != d_inode(sb->s_root)) {
516 			remove_inode_hash(inode);
517 			iput(inode);
518 			goto retry;
519 		}
520 	}
521 	fi = get_fuse_inode(inode);
522 	spin_lock(&fi->lock);
523 	fi->nlookup++;
524 	spin_unlock(&fi->lock);
525 done:
526 	fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version,
527 				 evict_ctr);
528 	return inode;
529 }
530 
531 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
532 			   struct fuse_mount **fm)
533 {
534 	struct fuse_mount *fm_iter;
535 	struct inode *inode;
536 
537 	WARN_ON(!rwsem_is_locked(&fc->killsb));
538 	list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
539 		if (!fm_iter->sb)
540 			continue;
541 
542 		inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
543 		if (inode) {
544 			if (fm)
545 				*fm = fm_iter;
546 			return inode;
547 		}
548 	}
549 
550 	return NULL;
551 }
552 
553 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
554 			     loff_t offset, loff_t len)
555 {
556 	struct fuse_inode *fi;
557 	struct inode *inode;
558 	pgoff_t pg_start;
559 	pgoff_t pg_end;
560 
561 	inode = fuse_ilookup(fc, nodeid, NULL);
562 	if (!inode)
563 		return -ENOENT;
564 
565 	fi = get_fuse_inode(inode);
566 	spin_lock(&fi->lock);
567 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
568 	spin_unlock(&fi->lock);
569 
570 	fuse_invalidate_attr(inode);
571 	forget_all_cached_acls(inode);
572 	if (offset >= 0) {
573 		pg_start = offset >> PAGE_SHIFT;
574 		if (len <= 0)
575 			pg_end = -1;
576 		else
577 			pg_end = (offset + len - 1) >> PAGE_SHIFT;
578 		invalidate_inode_pages2_range(inode->i_mapping,
579 					      pg_start, pg_end);
580 	}
581 	iput(inode);
582 	return 0;
583 }
584 
585 bool fuse_lock_inode(struct inode *inode)
586 {
587 	bool locked = false;
588 
589 	if (!get_fuse_conn(inode)->parallel_dirops) {
590 		mutex_lock(&get_fuse_inode(inode)->mutex);
591 		locked = true;
592 	}
593 
594 	return locked;
595 }
596 
597 void fuse_unlock_inode(struct inode *inode, bool locked)
598 {
599 	if (locked)
600 		mutex_unlock(&get_fuse_inode(inode)->mutex);
601 }
602 
603 static void fuse_umount_begin(struct super_block *sb)
604 {
605 	struct fuse_conn *fc = get_fuse_conn_super(sb);
606 
607 	if (fc->no_force_umount)
608 		return;
609 
610 	fuse_abort_conn(fc);
611 
612 	// Only retire block-device-based superblocks.
613 	if (sb->s_bdev != NULL)
614 		retire_super(sb);
615 }
616 
617 static void fuse_send_destroy(struct fuse_mount *fm)
618 {
619 	if (fm->fc->conn_init) {
620 		FUSE_ARGS(args);
621 
622 		args.opcode = FUSE_DESTROY;
623 		args.force = true;
624 		args.nocreds = true;
625 		fuse_simple_request(fm, &args);
626 	}
627 }
628 
629 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
630 {
631 	stbuf->f_type    = FUSE_SUPER_MAGIC;
632 	stbuf->f_bsize   = attr->bsize;
633 	stbuf->f_frsize  = attr->frsize;
634 	stbuf->f_blocks  = attr->blocks;
635 	stbuf->f_bfree   = attr->bfree;
636 	stbuf->f_bavail  = attr->bavail;
637 	stbuf->f_files   = attr->files;
638 	stbuf->f_ffree   = attr->ffree;
639 	stbuf->f_namelen = attr->namelen;
640 	/* fsid is left zero */
641 }
642 
643 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
644 {
645 	struct super_block *sb = dentry->d_sb;
646 	struct fuse_mount *fm = get_fuse_mount_super(sb);
647 	FUSE_ARGS(args);
648 	struct fuse_statfs_out outarg;
649 	int err;
650 
651 	if (!fuse_allow_current_process(fm->fc)) {
652 		buf->f_type = FUSE_SUPER_MAGIC;
653 		return 0;
654 	}
655 
656 	memset(&outarg, 0, sizeof(outarg));
657 	args.in_numargs = 0;
658 	args.opcode = FUSE_STATFS;
659 	args.nodeid = get_node_id(d_inode(dentry));
660 	args.out_numargs = 1;
661 	args.out_args[0].size = sizeof(outarg);
662 	args.out_args[0].value = &outarg;
663 	err = fuse_simple_request(fm, &args);
664 	if (!err)
665 		convert_fuse_statfs(buf, &outarg.st);
666 	return err;
667 }
668 
669 static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
670 {
671 	struct fuse_sync_bucket *bucket;
672 
673 	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
674 	if (bucket) {
675 		init_waitqueue_head(&bucket->waitq);
676 		/* Initial active count */
677 		atomic_set(&bucket->count, 1);
678 	}
679 	return bucket;
680 }
681 
682 static void fuse_sync_fs_writes(struct fuse_conn *fc)
683 {
684 	struct fuse_sync_bucket *bucket, *new_bucket;
685 	int count;
686 
687 	new_bucket = fuse_sync_bucket_alloc();
688 	spin_lock(&fc->lock);
689 	bucket = rcu_dereference_protected(fc->curr_bucket, 1);
690 	count = atomic_read(&bucket->count);
691 	WARN_ON(count < 1);
692 	/* No outstanding writes? */
693 	if (count == 1) {
694 		spin_unlock(&fc->lock);
695 		kfree(new_bucket);
696 		return;
697 	}
698 
699 	/*
700 	 * Completion of new bucket depends on completion of this bucket, so add
701 	 * one more count.
702 	 */
703 	atomic_inc(&new_bucket->count);
704 	rcu_assign_pointer(fc->curr_bucket, new_bucket);
705 	spin_unlock(&fc->lock);
706 	/*
707 	 * Drop initial active count.  At this point if all writes in this and
708 	 * ancestor buckets complete, the count will go to zero and this task
709 	 * will be woken up.
710 	 */
711 	atomic_dec(&bucket->count);
712 
713 	wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
714 
715 	/* Drop temp count on descendant bucket */
716 	fuse_sync_bucket_dec(new_bucket);
717 	kfree_rcu(bucket, rcu);
718 }
719 
720 static int fuse_sync_fs(struct super_block *sb, int wait)
721 {
722 	struct fuse_mount *fm = get_fuse_mount_super(sb);
723 	struct fuse_conn *fc = fm->fc;
724 	struct fuse_syncfs_in inarg;
725 	FUSE_ARGS(args);
726 	int err;
727 
728 	/*
729 	 * Userspace cannot handle the wait == 0 case.  Avoid a
730 	 * gratuitous roundtrip.
731 	 */
732 	if (!wait)
733 		return 0;
734 
735 	/* The filesystem is being unmounted.  Nothing to do. */
736 	if (!sb->s_root)
737 		return 0;
738 
739 	if (!fc->sync_fs)
740 		return 0;
741 
742 	fuse_sync_fs_writes(fc);
743 
744 	memset(&inarg, 0, sizeof(inarg));
745 	args.in_numargs = 1;
746 	args.in_args[0].size = sizeof(inarg);
747 	args.in_args[0].value = &inarg;
748 	args.opcode = FUSE_SYNCFS;
749 	args.nodeid = get_node_id(sb->s_root->d_inode);
750 	args.out_numargs = 0;
751 
752 	err = fuse_simple_request(fm, &args);
753 	if (err == -ENOSYS) {
754 		fc->sync_fs = 0;
755 		err = 0;
756 	}
757 
758 	return err;
759 }
760 
761 enum {
762 	OPT_SOURCE,
763 	OPT_SUBTYPE,
764 	OPT_FD,
765 	OPT_ROOTMODE,
766 	OPT_USER_ID,
767 	OPT_GROUP_ID,
768 	OPT_DEFAULT_PERMISSIONS,
769 	OPT_ALLOW_OTHER,
770 	OPT_MAX_READ,
771 	OPT_BLKSIZE,
772 	OPT_ERR
773 };
774 
775 static const struct fs_parameter_spec fuse_fs_parameters[] = {
776 	fsparam_string	("source",		OPT_SOURCE),
777 	fsparam_u32	("fd",			OPT_FD),
778 	fsparam_u32oct	("rootmode",		OPT_ROOTMODE),
779 	fsparam_uid	("user_id",		OPT_USER_ID),
780 	fsparam_gid	("group_id",		OPT_GROUP_ID),
781 	fsparam_flag	("default_permissions",	OPT_DEFAULT_PERMISSIONS),
782 	fsparam_flag	("allow_other",		OPT_ALLOW_OTHER),
783 	fsparam_u32	("max_read",		OPT_MAX_READ),
784 	fsparam_u32	("blksize",		OPT_BLKSIZE),
785 	fsparam_string	("subtype",		OPT_SUBTYPE),
786 	{}
787 };
788 
789 static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
790 {
791 	struct fs_parse_result result;
792 	struct fuse_fs_context *ctx = fsc->fs_private;
793 	int opt;
794 	kuid_t kuid;
795 	kgid_t kgid;
796 
797 	if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
798 		/*
799 		 * Ignore options coming from mount(MS_REMOUNT) for backward
800 		 * compatibility.
801 		 */
802 		if (fsc->oldapi)
803 			return 0;
804 
805 		return invalfc(fsc, "No changes allowed in reconfigure");
806 	}
807 
808 	opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
809 	if (opt < 0)
810 		return opt;
811 
812 	switch (opt) {
813 	case OPT_SOURCE:
814 		if (fsc->source)
815 			return invalfc(fsc, "Multiple sources specified");
816 		fsc->source = param->string;
817 		param->string = NULL;
818 		break;
819 
820 	case OPT_SUBTYPE:
821 		if (ctx->subtype)
822 			return invalfc(fsc, "Multiple subtypes specified");
823 		ctx->subtype = param->string;
824 		param->string = NULL;
825 		return 0;
826 
827 	case OPT_FD:
828 		ctx->fd = result.uint_32;
829 		ctx->fd_present = true;
830 		break;
831 
832 	case OPT_ROOTMODE:
833 		if (!fuse_valid_type(result.uint_32))
834 			return invalfc(fsc, "Invalid rootmode");
835 		ctx->rootmode = result.uint_32;
836 		ctx->rootmode_present = true;
837 		break;
838 
839 	case OPT_USER_ID:
840 		kuid = result.uid;
841 		/*
842 		 * The requested uid must be representable in the
843 		 * filesystem's idmapping.
844 		 */
845 		if (!kuid_has_mapping(fsc->user_ns, kuid))
846 			return invalfc(fsc, "Invalid user_id");
847 		ctx->user_id = kuid;
848 		ctx->user_id_present = true;
849 		break;
850 
851 	case OPT_GROUP_ID:
852 		kgid = result.gid;
853 		/*
854 		 * The requested gid must be representable in the
855 		 * filesystem's idmapping.
856 		 */
857 		if (!kgid_has_mapping(fsc->user_ns, kgid))
858 			return invalfc(fsc, "Invalid group_id");
859 		ctx->group_id = kgid;
860 		ctx->group_id_present = true;
861 		break;
862 
863 	case OPT_DEFAULT_PERMISSIONS:
864 		ctx->default_permissions = true;
865 		break;
866 
867 	case OPT_ALLOW_OTHER:
868 		ctx->allow_other = true;
869 		break;
870 
871 	case OPT_MAX_READ:
872 		ctx->max_read = result.uint_32;
873 		break;
874 
875 	case OPT_BLKSIZE:
876 		if (!ctx->is_bdev)
877 			return invalfc(fsc, "blksize only supported for fuseblk");
878 		ctx->blksize = result.uint_32;
879 		break;
880 
881 	default:
882 		return -EINVAL;
883 	}
884 
885 	return 0;
886 }
887 
888 static void fuse_free_fsc(struct fs_context *fsc)
889 {
890 	struct fuse_fs_context *ctx = fsc->fs_private;
891 
892 	if (ctx) {
893 		kfree(ctx->subtype);
894 		kfree(ctx);
895 	}
896 }
897 
898 static int fuse_show_options(struct seq_file *m, struct dentry *root)
899 {
900 	struct super_block *sb = root->d_sb;
901 	struct fuse_conn *fc = get_fuse_conn_super(sb);
902 
903 	if (fc->legacy_opts_show) {
904 		seq_printf(m, ",user_id=%u",
905 			   from_kuid_munged(fc->user_ns, fc->user_id));
906 		seq_printf(m, ",group_id=%u",
907 			   from_kgid_munged(fc->user_ns, fc->group_id));
908 		if (fc->default_permissions)
909 			seq_puts(m, ",default_permissions");
910 		if (fc->allow_other)
911 			seq_puts(m, ",allow_other");
912 		if (fc->max_read != ~0)
913 			seq_printf(m, ",max_read=%u", fc->max_read);
914 		if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
915 			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
916 	}
917 #ifdef CONFIG_FUSE_DAX
918 	if (fc->dax_mode == FUSE_DAX_ALWAYS)
919 		seq_puts(m, ",dax=always");
920 	else if (fc->dax_mode == FUSE_DAX_NEVER)
921 		seq_puts(m, ",dax=never");
922 	else if (fc->dax_mode == FUSE_DAX_INODE_USER)
923 		seq_puts(m, ",dax=inode");
924 #endif
925 
926 	return 0;
927 }
928 
929 static void fuse_iqueue_init(struct fuse_iqueue *fiq,
930 			     const struct fuse_iqueue_ops *ops,
931 			     void *priv)
932 {
933 	memset(fiq, 0, sizeof(struct fuse_iqueue));
934 	spin_lock_init(&fiq->lock);
935 	init_waitqueue_head(&fiq->waitq);
936 	INIT_LIST_HEAD(&fiq->pending);
937 	INIT_LIST_HEAD(&fiq->interrupts);
938 	fiq->forget_list_tail = &fiq->forget_list_head;
939 	fiq->connected = 1;
940 	fiq->ops = ops;
941 	fiq->priv = priv;
942 }
943 
944 void fuse_pqueue_init(struct fuse_pqueue *fpq)
945 {
946 	unsigned int i;
947 
948 	spin_lock_init(&fpq->lock);
949 	for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
950 		INIT_LIST_HEAD(&fpq->processing[i]);
951 	INIT_LIST_HEAD(&fpq->io);
952 	fpq->connected = 1;
953 }
954 
955 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
956 		    struct user_namespace *user_ns,
957 		    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
958 {
959 	memset(fc, 0, sizeof(*fc));
960 	spin_lock_init(&fc->lock);
961 	spin_lock_init(&fc->bg_lock);
962 	init_rwsem(&fc->killsb);
963 	refcount_set(&fc->count, 1);
964 	atomic_set(&fc->dev_count, 1);
965 	atomic_set(&fc->epoch, 1);
966 	init_waitqueue_head(&fc->blocked_waitq);
967 	fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
968 	INIT_LIST_HEAD(&fc->bg_queue);
969 	INIT_LIST_HEAD(&fc->entry);
970 	INIT_LIST_HEAD(&fc->devices);
971 	atomic_set(&fc->num_waiting, 0);
972 	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
973 	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
974 	atomic64_set(&fc->khctr, 0);
975 	fc->polled_files = RB_ROOT;
976 	fc->blocked = 0;
977 	fc->initialized = 0;
978 	fc->connected = 1;
979 	atomic64_set(&fc->attr_version, 1);
980 	atomic64_set(&fc->evict_ctr, 1);
981 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
982 	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
983 	fc->user_ns = get_user_ns(user_ns);
984 	fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
985 	fc->max_pages_limit = fuse_max_pages_limit;
986 	fc->name_max = FUSE_NAME_LOW_MAX;
987 	fc->timeout.req_timeout = 0;
988 
989 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
990 		fuse_backing_files_init(fc);
991 
992 	INIT_LIST_HEAD(&fc->mounts);
993 	list_add(&fm->fc_entry, &fc->mounts);
994 	fm->fc = fc;
995 }
996 EXPORT_SYMBOL_GPL(fuse_conn_init);
997 
998 static void delayed_release(struct rcu_head *p)
999 {
1000 	struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);
1001 
1002 	fuse_uring_destruct(fc);
1003 
1004 	put_user_ns(fc->user_ns);
1005 	fc->release(fc);
1006 }
1007 
1008 void fuse_conn_put(struct fuse_conn *fc)
1009 {
1010 	if (refcount_dec_and_test(&fc->count)) {
1011 		struct fuse_iqueue *fiq = &fc->iq;
1012 		struct fuse_sync_bucket *bucket;
1013 
1014 		if (IS_ENABLED(CONFIG_FUSE_DAX))
1015 			fuse_dax_conn_free(fc);
1016 		if (fc->timeout.req_timeout)
1017 			cancel_delayed_work_sync(&fc->timeout.work);
1018 		if (fiq->ops->release)
1019 			fiq->ops->release(fiq);
1020 		put_pid_ns(fc->pid_ns);
1021 		bucket = rcu_dereference_protected(fc->curr_bucket, 1);
1022 		if (bucket) {
1023 			WARN_ON(atomic_read(&bucket->count) != 1);
1024 			kfree(bucket);
1025 		}
1026 		if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
1027 			fuse_backing_files_free(fc);
1028 		call_rcu(&fc->rcu, delayed_release);
1029 	}
1030 }
1031 EXPORT_SYMBOL_GPL(fuse_conn_put);
1032 
1033 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
1034 {
1035 	refcount_inc(&fc->count);
1036 	return fc;
1037 }
1038 EXPORT_SYMBOL_GPL(fuse_conn_get);
1039 
1040 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned int mode)
1041 {
1042 	struct fuse_attr attr;
1043 	memset(&attr, 0, sizeof(attr));
1044 
1045 	attr.mode = mode;
1046 	attr.ino = FUSE_ROOT_ID;
1047 	attr.nlink = 1;
1048 	return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0);
1049 }
1050 
1051 struct fuse_inode_handle {
1052 	u64 nodeid;
1053 	u32 generation;
1054 };
1055 
1056 static struct dentry *fuse_get_dentry(struct super_block *sb,
1057 				      struct fuse_inode_handle *handle)
1058 {
1059 	struct fuse_conn *fc = get_fuse_conn_super(sb);
1060 	struct inode *inode;
1061 	struct dentry *entry;
1062 	int err = -ESTALE;
1063 
1064 	if (handle->nodeid == 0)
1065 		goto out_err;
1066 
1067 	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
1068 	if (!inode) {
1069 		struct fuse_entry_out outarg;
1070 		const struct qstr name = QSTR_INIT(".", 1);
1071 
1072 		if (!fc->export_support)
1073 			goto out_err;
1074 
1075 		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
1076 				       &inode);
1077 		if (err && err != -ENOENT)
1078 			goto out_err;
1079 		if (err || !inode) {
1080 			err = -ESTALE;
1081 			goto out_err;
1082 		}
1083 		err = -EIO;
1084 		if (get_node_id(inode) != handle->nodeid)
1085 			goto out_iput;
1086 	}
1087 	err = -ESTALE;
1088 	if (inode->i_generation != handle->generation)
1089 		goto out_iput;
1090 
1091 	entry = d_obtain_alias(inode);
1092 	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
1093 		fuse_invalidate_entry_cache(entry);
1094 
1095 	return entry;
1096 
1097  out_iput:
1098 	iput(inode);
1099  out_err:
1100 	return ERR_PTR(err);
1101 }
1102 
1103 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
1104 			   struct inode *parent)
1105 {
1106 	int len = parent ? 6 : 3;
1107 	u64 nodeid;
1108 	u32 generation;
1109 
1110 	if (*max_len < len) {
1111 		*max_len = len;
1112 		return  FILEID_INVALID;
1113 	}
1114 
1115 	nodeid = get_fuse_inode(inode)->nodeid;
1116 	generation = inode->i_generation;
1117 
1118 	fh[0] = (u32)(nodeid >> 32);
1119 	fh[1] = (u32)(nodeid & 0xffffffff);
1120 	fh[2] = generation;
1121 
1122 	if (parent) {
1123 		nodeid = get_fuse_inode(parent)->nodeid;
1124 		generation = parent->i_generation;
1125 
1126 		fh[3] = (u32)(nodeid >> 32);
1127 		fh[4] = (u32)(nodeid & 0xffffffff);
1128 		fh[5] = generation;
1129 	}
1130 
1131 	*max_len = len;
1132 	return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN;
1133 }
1134 
1135 static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
1136 		struct fid *fid, int fh_len, int fh_type)
1137 {
1138 	struct fuse_inode_handle handle;
1139 
1140 	if ((fh_type != FILEID_INO64_GEN &&
1141 	     fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3)
1142 		return NULL;
1143 
1144 	handle.nodeid = (u64) fid->raw[0] << 32;
1145 	handle.nodeid |= (u64) fid->raw[1];
1146 	handle.generation = fid->raw[2];
1147 	return fuse_get_dentry(sb, &handle);
1148 }
1149 
1150 static struct dentry *fuse_fh_to_parent(struct super_block *sb,
1151 		struct fid *fid, int fh_len, int fh_type)
1152 {
1153 	struct fuse_inode_handle parent;
1154 
1155 	if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6)
1156 		return NULL;
1157 
1158 	parent.nodeid = (u64) fid->raw[3] << 32;
1159 	parent.nodeid |= (u64) fid->raw[4];
1160 	parent.generation = fid->raw[5];
1161 	return fuse_get_dentry(sb, &parent);
1162 }
1163 
1164 static struct dentry *fuse_get_parent(struct dentry *child)
1165 {
1166 	struct inode *child_inode = d_inode(child);
1167 	struct fuse_conn *fc = get_fuse_conn(child_inode);
1168 	struct inode *inode;
1169 	struct dentry *parent;
1170 	struct fuse_entry_out outarg;
1171 	int err;
1172 
1173 	if (!fc->export_support)
1174 		return ERR_PTR(-ESTALE);
1175 
1176 	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
1177 			       &dotdot_name, &outarg, &inode);
1178 	if (err) {
1179 		if (err == -ENOENT)
1180 			return ERR_PTR(-ESTALE);
1181 		return ERR_PTR(err);
1182 	}
1183 
1184 	parent = d_obtain_alias(inode);
1185 	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
1186 		fuse_invalidate_entry_cache(parent);
1187 
1188 	return parent;
1189 }
1190 
1191 /* only for fid encoding; no support for file handle */
1192 static const struct export_operations fuse_export_fid_operations = {
1193 	.encode_fh	= fuse_encode_fh,
1194 };
1195 
1196 static const struct export_operations fuse_export_operations = {
1197 	.fh_to_dentry	= fuse_fh_to_dentry,
1198 	.fh_to_parent	= fuse_fh_to_parent,
1199 	.encode_fh	= fuse_encode_fh,
1200 	.get_parent	= fuse_get_parent,
1201 };
1202 
1203 static const struct super_operations fuse_super_operations = {
1204 	.alloc_inode    = fuse_alloc_inode,
1205 	.free_inode     = fuse_free_inode,
1206 	.evict_inode	= fuse_evict_inode,
1207 	.write_inode	= fuse_write_inode,
1208 	.drop_inode	= generic_delete_inode,
1209 	.umount_begin	= fuse_umount_begin,
1210 	.statfs		= fuse_statfs,
1211 	.sync_fs	= fuse_sync_fs,
1212 	.show_options	= fuse_show_options,
1213 };
1214 
1215 static void sanitize_global_limit(unsigned int *limit)
1216 {
1217 	/*
1218 	 * The default maximum number of async requests is calculated to consume
1219 	 * 1/2^13 of the total memory, assuming 392 bytes per request.
1220 	 */
1221 	if (*limit == 0)
1222 		*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
1223 
1224 	if (*limit >= 1 << 16)
1225 		*limit = (1 << 16) - 1;
1226 }
1227 
1228 static int set_global_limit(const char *val, const struct kernel_param *kp)
1229 {
1230 	int rv;
1231 
1232 	rv = param_set_uint(val, kp);
1233 	if (rv)
1234 		return rv;
1235 
1236 	sanitize_global_limit((unsigned int *)kp->arg);
1237 
1238 	return 0;
1239 }
1240 
1241 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
1242 {
1243 	int cap_sys_admin = capable(CAP_SYS_ADMIN);
1244 
1245 	if (arg->minor < 13)
1246 		return;
1247 
1248 	sanitize_global_limit(&max_user_bgreq);
1249 	sanitize_global_limit(&max_user_congthresh);
1250 
1251 	spin_lock(&fc->bg_lock);
1252 	if (arg->max_background) {
1253 		fc->max_background = arg->max_background;
1254 
1255 		if (!cap_sys_admin && fc->max_background > max_user_bgreq)
1256 			fc->max_background = max_user_bgreq;
1257 	}
1258 	if (arg->congestion_threshold) {
1259 		fc->congestion_threshold = arg->congestion_threshold;
1260 
1261 		if (!cap_sys_admin &&
1262 		    fc->congestion_threshold > max_user_congthresh)
1263 			fc->congestion_threshold = max_user_congthresh;
1264 	}
1265 	spin_unlock(&fc->bg_lock);
1266 }
1267 
1268 static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout)
1269 {
1270 	fc->timeout.req_timeout = secs_to_jiffies(timeout);
1271 	INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout);
1272 	queue_delayed_work(system_wq, &fc->timeout.work,
1273 			   fuse_timeout_timer_freq);
1274 }
1275 
1276 static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout)
1277 {
1278 	if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout)
1279 		return;
1280 
1281 	if (!timeout)
1282 		timeout = fuse_default_req_timeout;
1283 
1284 	if (fuse_max_req_timeout) {
1285 		if (timeout)
1286 			timeout = min(fuse_max_req_timeout, timeout);
1287 		else
1288 			timeout = fuse_max_req_timeout;
1289 	}
1290 
1291 	timeout = max(FUSE_TIMEOUT_TIMER_FREQ, timeout);
1292 
1293 	set_request_timeout(fc, timeout);
1294 }
1295 
1296 struct fuse_init_args {
1297 	struct fuse_args args;
1298 	struct fuse_init_in in;
1299 	struct fuse_init_out out;
1300 };
1301 
1302 static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
1303 			       int error)
1304 {
1305 	struct fuse_conn *fc = fm->fc;
1306 	struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
1307 	struct fuse_init_out *arg = &ia->out;
1308 	bool ok = true;
1309 
1310 	if (error || arg->major != FUSE_KERNEL_VERSION)
1311 		ok = false;
1312 	else {
1313 		unsigned long ra_pages;
1314 		unsigned int timeout = 0;
1315 
1316 		process_init_limits(fc, arg);
1317 
1318 		if (arg->minor >= 6) {
1319 			u64 flags = arg->flags;
1320 
1321 			if (flags & FUSE_INIT_EXT)
1322 				flags |= (u64) arg->flags2 << 32;
1323 
1324 			ra_pages = arg->max_readahead / PAGE_SIZE;
1325 			if (flags & FUSE_ASYNC_READ)
1326 				fc->async_read = 1;
1327 			if (!(flags & FUSE_POSIX_LOCKS))
1328 				fc->no_lock = 1;
1329 			if (arg->minor >= 17) {
1330 				if (!(flags & FUSE_FLOCK_LOCKS))
1331 					fc->no_flock = 1;
1332 			} else {
1333 				if (!(flags & FUSE_POSIX_LOCKS))
1334 					fc->no_flock = 1;
1335 			}
1336 			if (flags & FUSE_ATOMIC_O_TRUNC)
1337 				fc->atomic_o_trunc = 1;
1338 			if (arg->minor >= 9) {
1339 				/* LOOKUP has dependency on proto version */
1340 				if (flags & FUSE_EXPORT_SUPPORT)
1341 					fc->export_support = 1;
1342 			}
1343 			if (flags & FUSE_BIG_WRITES)
1344 				fc->big_writes = 1;
1345 			if (flags & FUSE_DONT_MASK)
1346 				fc->dont_mask = 1;
1347 			if (flags & FUSE_AUTO_INVAL_DATA)
1348 				fc->auto_inval_data = 1;
1349 			else if (flags & FUSE_EXPLICIT_INVAL_DATA)
1350 				fc->explicit_inval_data = 1;
1351 			if (flags & FUSE_DO_READDIRPLUS) {
1352 				fc->do_readdirplus = 1;
1353 				if (flags & FUSE_READDIRPLUS_AUTO)
1354 					fc->readdirplus_auto = 1;
1355 			}
1356 			if (flags & FUSE_ASYNC_DIO)
1357 				fc->async_dio = 1;
1358 			if (flags & FUSE_WRITEBACK_CACHE)
1359 				fc->writeback_cache = 1;
1360 			if (flags & FUSE_PARALLEL_DIROPS)
1361 				fc->parallel_dirops = 1;
1362 			if (flags & FUSE_HANDLE_KILLPRIV)
1363 				fc->handle_killpriv = 1;
1364 			if (arg->time_gran && arg->time_gran <= 1000000000)
1365 				fm->sb->s_time_gran = arg->time_gran;
1366 			if ((flags & FUSE_POSIX_ACL)) {
1367 				fc->default_permissions = 1;
1368 				fc->posix_acl = 1;
1369 			}
1370 			if (flags & FUSE_CACHE_SYMLINKS)
1371 				fc->cache_symlinks = 1;
1372 			if (flags & FUSE_ABORT_ERROR)
1373 				fc->abort_err = 1;
1374 			if (flags & FUSE_MAX_PAGES) {
1375 				fc->max_pages =
1376 					min_t(unsigned int, fc->max_pages_limit,
1377 					max_t(unsigned int, arg->max_pages, 1));
1378 
1379 				/*
1380 				 * PATH_MAX file names might need two pages for
1381 				 * ops like rename
1382 				 */
1383 				if (fc->max_pages > 1)
1384 					fc->name_max = FUSE_NAME_MAX;
1385 			}
1386 			if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1387 				if (flags & FUSE_MAP_ALIGNMENT &&
1388 				    !fuse_dax_check_alignment(fc, arg->map_alignment)) {
1389 					ok = false;
1390 				}
1391 				if (flags & FUSE_HAS_INODE_DAX)
1392 					fc->inode_dax = 1;
1393 			}
1394 			if (flags & FUSE_HANDLE_KILLPRIV_V2) {
1395 				fc->handle_killpriv_v2 = 1;
1396 				fm->sb->s_flags |= SB_NOSEC;
1397 			}
1398 			if (flags & FUSE_SETXATTR_EXT)
1399 				fc->setxattr_ext = 1;
1400 			if (flags & FUSE_SECURITY_CTX)
1401 				fc->init_security = 1;
1402 			if (flags & FUSE_CREATE_SUPP_GROUP)
1403 				fc->create_supp_group = 1;
1404 			if (flags & FUSE_DIRECT_IO_ALLOW_MMAP)
1405 				fc->direct_io_allow_mmap = 1;
1406 			/*
1407 			 * max_stack_depth is the max stack depth of FUSE fs,
1408 			 * so it has to be at least 1 to support passthrough
1409 			 * to backing files.
1410 			 *
1411 			 * with max_stack_depth > 1, the backing files can be
1412 			 * on a stacked fs (e.g. overlayfs) themselves and with
1413 			 * max_stack_depth == 1, FUSE fs can be stacked as the
1414 			 * underlying fs of a stacked fs (e.g. overlayfs).
1415 			 *
1416 			 * Also don't allow the combination of FUSE_PASSTHROUGH
1417 			 * and FUSE_WRITEBACK_CACHE, current design doesn't handle
1418 			 * them together.
1419 			 */
1420 			if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) &&
1421 			    (flags & FUSE_PASSTHROUGH) &&
1422 			    arg->max_stack_depth > 0 &&
1423 			    arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH &&
1424 			    !(flags & FUSE_WRITEBACK_CACHE))  {
1425 				fc->passthrough = 1;
1426 				fc->max_stack_depth = arg->max_stack_depth;
1427 				fm->sb->s_stack_depth = arg->max_stack_depth;
1428 			}
1429 			if (flags & FUSE_NO_EXPORT_SUPPORT)
1430 				fm->sb->s_export_op = &fuse_export_fid_operations;
1431 			if (flags & FUSE_ALLOW_IDMAP) {
1432 				if (fc->default_permissions)
1433 					fm->sb->s_iflags &= ~SB_I_NOIDMAP;
1434 				else
1435 					ok = false;
1436 			}
1437 			if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled())
1438 				fc->io_uring = 1;
1439 
1440 			if (flags & FUSE_REQUEST_TIMEOUT)
1441 				timeout = arg->request_timeout;
1442 		} else {
1443 			ra_pages = fc->max_read / PAGE_SIZE;
1444 			fc->no_lock = 1;
1445 			fc->no_flock = 1;
1446 		}
1447 
1448 		init_server_timeout(fc, timeout);
1449 
1450 		fm->sb->s_bdi->ra_pages =
1451 				min(fm->sb->s_bdi->ra_pages, ra_pages);
1452 		fc->minor = arg->minor;
1453 		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1454 		fc->max_write = max_t(unsigned, 4096, fc->max_write);
1455 		fc->conn_init = 1;
1456 	}
1457 	kfree(ia);
1458 
1459 	if (!ok) {
1460 		fc->conn_init = 0;
1461 		fc->conn_error = 1;
1462 	}
1463 
1464 	fuse_set_initialized(fc);
1465 	wake_up_all(&fc->blocked_waitq);
1466 }
1467 
1468 void fuse_send_init(struct fuse_mount *fm)
1469 {
1470 	struct fuse_init_args *ia;
1471 	u64 flags;
1472 
1473 	ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
1474 
1475 	ia->in.major = FUSE_KERNEL_VERSION;
1476 	ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
1477 	ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
1478 	flags =
1479 		FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
1480 		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
1481 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1482 		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1483 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1484 		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1485 		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1486 		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1487 		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
1488 		FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
1489 		FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
1490 		FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP |
1491 		FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP |
1492 		FUSE_REQUEST_TIMEOUT;
1493 #ifdef CONFIG_FUSE_DAX
1494 	if (fm->fc->dax)
1495 		flags |= FUSE_MAP_ALIGNMENT;
1496 	if (fuse_is_inode_dax_mode(fm->fc->dax_mode))
1497 		flags |= FUSE_HAS_INODE_DAX;
1498 #endif
1499 	if (fm->fc->auto_submounts)
1500 		flags |= FUSE_SUBMOUNTS;
1501 	if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
1502 		flags |= FUSE_PASSTHROUGH;
1503 
1504 	/*
1505 	 * This is just an information flag for fuse server. No need to check
1506 	 * the reply - server is either sending IORING_OP_URING_CMD or not.
1507 	 */
1508 	if (fuse_uring_enabled())
1509 		flags |= FUSE_OVER_IO_URING;
1510 
1511 	ia->in.flags = flags;
1512 	ia->in.flags2 = flags >> 32;
1513 
1514 	ia->args.opcode = FUSE_INIT;
1515 	ia->args.in_numargs = 1;
1516 	ia->args.in_args[0].size = sizeof(ia->in);
1517 	ia->args.in_args[0].value = &ia->in;
1518 	ia->args.out_numargs = 1;
1519 	/* Variable length argument used for backward compatibility
1520 	   with interface version < 7.5.  Rest of init_out is zeroed
1521 	   by do_get_request(), so a short reply is not a problem */
1522 	ia->args.out_argvar = true;
1523 	ia->args.out_args[0].size = sizeof(ia->out);
1524 	ia->args.out_args[0].value = &ia->out;
1525 	ia->args.force = true;
1526 	ia->args.nocreds = true;
1527 	ia->args.end = process_init_reply;
1528 
1529 	if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
1530 		process_init_reply(fm, &ia->args, -ENOTCONN);
1531 }
1532 EXPORT_SYMBOL_GPL(fuse_send_init);
1533 
1534 void fuse_free_conn(struct fuse_conn *fc)
1535 {
1536 	WARN_ON(!list_empty(&fc->devices));
1537 	kfree(fc);
1538 }
1539 EXPORT_SYMBOL_GPL(fuse_free_conn);
1540 
1541 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
1542 {
1543 	int err;
1544 	char *suffix = "";
1545 
1546 	if (sb->s_bdev) {
1547 		suffix = "-fuseblk";
1548 		/*
1549 		 * sb->s_bdi points to blkdev's bdi however we want to redirect
1550 		 * it to our private bdi...
1551 		 */
1552 		bdi_put(sb->s_bdi);
1553 		sb->s_bdi = &noop_backing_dev_info;
1554 	}
1555 	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
1556 				   MINOR(fc->dev), suffix);
1557 	if (err)
1558 		return err;
1559 
1560 	/* fuse does it's own writeback accounting */
1561 	sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
1562 	sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
1563 
1564 	/*
1565 	 * For a single fuse filesystem use max 1% of dirty +
1566 	 * writeback threshold.
1567 	 *
1568 	 * This gives about 1M of write buffer for memory maps on a
1569 	 * machine with 1G and 10% dirty_ratio, which should be more
1570 	 * than enough.
1571 	 *
1572 	 * Privileged users can raise it by writing to
1573 	 *
1574 	 *    /sys/class/bdi/<bdi>/max_ratio
1575 	 */
1576 	bdi_set_max_ratio(sb->s_bdi, 1);
1577 
1578 	return 0;
1579 }
1580 
1581 struct fuse_dev *fuse_dev_alloc(void)
1582 {
1583 	struct fuse_dev *fud;
1584 	struct list_head *pq;
1585 
1586 	fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1587 	if (!fud)
1588 		return NULL;
1589 
1590 	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
1591 	if (!pq) {
1592 		kfree(fud);
1593 		return NULL;
1594 	}
1595 
1596 	fud->pq.processing = pq;
1597 	fuse_pqueue_init(&fud->pq);
1598 
1599 	return fud;
1600 }
1601 EXPORT_SYMBOL_GPL(fuse_dev_alloc);
1602 
1603 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
1604 {
1605 	fud->fc = fuse_conn_get(fc);
1606 	spin_lock(&fc->lock);
1607 	list_add_tail(&fud->entry, &fc->devices);
1608 	spin_unlock(&fc->lock);
1609 }
1610 EXPORT_SYMBOL_GPL(fuse_dev_install);
1611 
1612 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
1613 {
1614 	struct fuse_dev *fud;
1615 
1616 	fud = fuse_dev_alloc();
1617 	if (!fud)
1618 		return NULL;
1619 
1620 	fuse_dev_install(fud, fc);
1621 	return fud;
1622 }
1623 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1624 
1625 void fuse_dev_free(struct fuse_dev *fud)
1626 {
1627 	struct fuse_conn *fc = fud->fc;
1628 
1629 	if (fc) {
1630 		spin_lock(&fc->lock);
1631 		list_del(&fud->entry);
1632 		spin_unlock(&fc->lock);
1633 
1634 		fuse_conn_put(fc);
1635 	}
1636 	kfree(fud->pq.processing);
1637 	kfree(fud);
1638 }
1639 EXPORT_SYMBOL_GPL(fuse_dev_free);
1640 
1641 static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
1642 				      const struct fuse_inode *fi)
1643 {
1644 	struct timespec64 atime = inode_get_atime(&fi->inode);
1645 	struct timespec64 mtime = inode_get_mtime(&fi->inode);
1646 	struct timespec64 ctime = inode_get_ctime(&fi->inode);
1647 
1648 	*attr = (struct fuse_attr){
1649 		.ino		= fi->inode.i_ino,
1650 		.size		= fi->inode.i_size,
1651 		.blocks		= fi->inode.i_blocks,
1652 		.atime		= atime.tv_sec,
1653 		.mtime		= mtime.tv_sec,
1654 		.ctime		= ctime.tv_sec,
1655 		.atimensec	= atime.tv_nsec,
1656 		.mtimensec	= mtime.tv_nsec,
1657 		.ctimensec	= ctime.tv_nsec,
1658 		.mode		= fi->inode.i_mode,
1659 		.nlink		= fi->inode.i_nlink,
1660 		.uid		= __kuid_val(fi->inode.i_uid),
1661 		.gid		= __kgid_val(fi->inode.i_gid),
1662 		.rdev		= fi->inode.i_rdev,
1663 		.blksize	= 1u << fi->inode.i_blkbits,
1664 	};
1665 }
1666 
1667 static void fuse_sb_defaults(struct super_block *sb)
1668 {
1669 	sb->s_magic = FUSE_SUPER_MAGIC;
1670 	sb->s_op = &fuse_super_operations;
1671 	sb->s_xattr = fuse_xattr_handlers;
1672 	sb->s_maxbytes = MAX_LFS_FILESIZE;
1673 	sb->s_time_gran = 1;
1674 	sb->s_export_op = &fuse_export_operations;
1675 	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
1676 	sb->s_iflags |= SB_I_NOIDMAP;
1677 	if (sb->s_user_ns != &init_user_ns)
1678 		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
1679 	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
1680 }
1681 
1682 static int fuse_fill_super_submount(struct super_block *sb,
1683 				    struct fuse_inode *parent_fi)
1684 {
1685 	struct fuse_mount *fm = get_fuse_mount_super(sb);
1686 	struct super_block *parent_sb = parent_fi->inode.i_sb;
1687 	struct fuse_attr root_attr;
1688 	struct inode *root;
1689 	struct fuse_submount_lookup *sl;
1690 	struct fuse_inode *fi;
1691 
1692 	fuse_sb_defaults(sb);
1693 	fm->sb = sb;
1694 
1695 	WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1696 	sb->s_bdi = bdi_get(parent_sb->s_bdi);
1697 
1698 	sb->s_xattr = parent_sb->s_xattr;
1699 	sb->s_export_op = parent_sb->s_export_op;
1700 	sb->s_time_gran = parent_sb->s_time_gran;
1701 	sb->s_blocksize = parent_sb->s_blocksize;
1702 	sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
1703 	sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
1704 	if (parent_sb->s_subtype && !sb->s_subtype)
1705 		return -ENOMEM;
1706 
1707 	fuse_fill_attr_from_inode(&root_attr, parent_fi);
1708 	root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0,
1709 			 fuse_get_evict_ctr(fm->fc));
1710 	/*
1711 	 * This inode is just a duplicate, so it is not looked up and
1712 	 * its nlookup should not be incremented.  fuse_iget() does
1713 	 * that, though, so undo it here.
1714 	 */
1715 	fi = get_fuse_inode(root);
1716 	fi->nlookup--;
1717 
1718 	sb->s_d_op = &fuse_dentry_operations;
1719 	sb->s_root = d_make_root(root);
1720 	if (!sb->s_root)
1721 		return -ENOMEM;
1722 
1723 	/*
1724 	 * Grab the parent's submount_lookup pointer and take a
1725 	 * reference on the shared nlookup from the parent.  This is to
1726 	 * prevent the last forget for this nodeid from getting
1727 	 * triggered until all users have finished with it.
1728 	 */
1729 	sl = parent_fi->submount_lookup;
1730 	WARN_ON(!sl);
1731 	if (sl) {
1732 		refcount_inc(&sl->count);
1733 		fi->submount_lookup = sl;
1734 	}
1735 
1736 	return 0;
1737 }
1738 
1739 /* Filesystem context private data holds the FUSE inode of the mount point */
1740 static int fuse_get_tree_submount(struct fs_context *fsc)
1741 {
1742 	struct fuse_mount *fm;
1743 	struct fuse_inode *mp_fi = fsc->fs_private;
1744 	struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode);
1745 	struct super_block *sb;
1746 	int err;
1747 
1748 	fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
1749 	if (!fm)
1750 		return -ENOMEM;
1751 
1752 	fm->fc = fuse_conn_get(fc);
1753 	fsc->s_fs_info = fm;
1754 	sb = sget_fc(fsc, NULL, set_anon_super_fc);
1755 	if (fsc->s_fs_info)
1756 		fuse_mount_destroy(fm);
1757 	if (IS_ERR(sb))
1758 		return PTR_ERR(sb);
1759 
1760 	/* Initialize superblock, making @mp_fi its root */
1761 	err = fuse_fill_super_submount(sb, mp_fi);
1762 	if (err) {
1763 		deactivate_locked_super(sb);
1764 		return err;
1765 	}
1766 
1767 	down_write(&fc->killsb);
1768 	list_add_tail(&fm->fc_entry, &fc->mounts);
1769 	up_write(&fc->killsb);
1770 
1771 	sb->s_flags |= SB_ACTIVE;
1772 	fsc->root = dget(sb->s_root);
1773 
1774 	return 0;
1775 }
1776 
1777 static const struct fs_context_operations fuse_context_submount_ops = {
1778 	.get_tree	= fuse_get_tree_submount,
1779 };
1780 
1781 int fuse_init_fs_context_submount(struct fs_context *fsc)
1782 {
1783 	fsc->ops = &fuse_context_submount_ops;
1784 	return 0;
1785 }
1786 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
1787 
1788 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
1789 {
1790 	struct fuse_dev *fud = NULL;
1791 	struct fuse_mount *fm = get_fuse_mount_super(sb);
1792 	struct fuse_conn *fc = fm->fc;
1793 	struct inode *root;
1794 	struct dentry *root_dentry;
1795 	int err;
1796 
1797 	err = -EINVAL;
1798 	if (sb->s_flags & SB_MANDLOCK)
1799 		goto err;
1800 
1801 	rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
1802 	fuse_sb_defaults(sb);
1803 
1804 	if (ctx->is_bdev) {
1805 #ifdef CONFIG_BLOCK
1806 		err = -EINVAL;
1807 		if (!sb_set_blocksize(sb, ctx->blksize))
1808 			goto err;
1809 #endif
1810 	} else {
1811 		sb->s_blocksize = PAGE_SIZE;
1812 		sb->s_blocksize_bits = PAGE_SHIFT;
1813 	}
1814 
1815 	sb->s_subtype = ctx->subtype;
1816 	ctx->subtype = NULL;
1817 	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1818 		err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
1819 		if (err)
1820 			goto err;
1821 	}
1822 
1823 	if (ctx->fudptr) {
1824 		err = -ENOMEM;
1825 		fud = fuse_dev_alloc_install(fc);
1826 		if (!fud)
1827 			goto err_free_dax;
1828 	}
1829 
1830 	fc->dev = sb->s_dev;
1831 	fm->sb = sb;
1832 	err = fuse_bdi_init(fc, sb);
1833 	if (err)
1834 		goto err_dev_free;
1835 
1836 	/* Handle umasking inside the fuse code */
1837 	if (sb->s_flags & SB_POSIXACL)
1838 		fc->dont_mask = 1;
1839 	sb->s_flags |= SB_POSIXACL;
1840 
1841 	fc->default_permissions = ctx->default_permissions;
1842 	fc->allow_other = ctx->allow_other;
1843 	fc->user_id = ctx->user_id;
1844 	fc->group_id = ctx->group_id;
1845 	fc->legacy_opts_show = ctx->legacy_opts_show;
1846 	fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
1847 	fc->destroy = ctx->destroy;
1848 	fc->no_control = ctx->no_control;
1849 	fc->no_force_umount = ctx->no_force_umount;
1850 
1851 	err = -ENOMEM;
1852 	root = fuse_get_root_inode(sb, ctx->rootmode);
1853 	sb->s_d_op = &fuse_root_dentry_operations;
1854 	root_dentry = d_make_root(root);
1855 	if (!root_dentry)
1856 		goto err_dev_free;
1857 	/* Root dentry doesn't have .d_revalidate */
1858 	sb->s_d_op = &fuse_dentry_operations;
1859 
1860 	mutex_lock(&fuse_mutex);
1861 	err = -EINVAL;
1862 	if (ctx->fudptr && *ctx->fudptr)
1863 		goto err_unlock;
1864 
1865 	err = fuse_ctl_add_conn(fc);
1866 	if (err)
1867 		goto err_unlock;
1868 
1869 	list_add_tail(&fc->entry, &fuse_conn_list);
1870 	sb->s_root = root_dentry;
1871 	if (ctx->fudptr)
1872 		*ctx->fudptr = fud;
1873 	mutex_unlock(&fuse_mutex);
1874 	return 0;
1875 
1876  err_unlock:
1877 	mutex_unlock(&fuse_mutex);
1878 	dput(root_dentry);
1879  err_dev_free:
1880 	if (fud)
1881 		fuse_dev_free(fud);
1882  err_free_dax:
1883 	if (IS_ENABLED(CONFIG_FUSE_DAX))
1884 		fuse_dax_conn_free(fc);
1885  err:
1886 	return err;
1887 }
1888 EXPORT_SYMBOL_GPL(fuse_fill_super_common);
1889 
1890 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
1891 {
1892 	struct fuse_fs_context *ctx = fsc->fs_private;
1893 	int err;
1894 
1895 	if (!ctx->file || !ctx->rootmode_present ||
1896 	    !ctx->user_id_present || !ctx->group_id_present)
1897 		return -EINVAL;
1898 
1899 	/*
1900 	 * Require mount to happen from the same user namespace which
1901 	 * opened /dev/fuse to prevent potential attacks.
1902 	 */
1903 	if ((ctx->file->f_op != &fuse_dev_operations) ||
1904 	    (ctx->file->f_cred->user_ns != sb->s_user_ns))
1905 		return -EINVAL;
1906 	ctx->fudptr = &ctx->file->private_data;
1907 
1908 	err = fuse_fill_super_common(sb, ctx);
1909 	if (err)
1910 		return err;
1911 	/* file->private_data shall be visible on all CPUs after this */
1912 	smp_mb();
1913 	fuse_send_init(get_fuse_mount_super(sb));
1914 	return 0;
1915 }
1916 
1917 /*
1918  * This is the path where user supplied an already initialized fuse dev.  In
1919  * this case never create a new super if the old one is gone.
1920  */
1921 static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
1922 {
1923 	return -ENOTCONN;
1924 }
1925 
1926 static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
1927 {
1928 
1929 	return fsc->sget_key == get_fuse_conn_super(sb);
1930 }
1931 
1932 static int fuse_get_tree(struct fs_context *fsc)
1933 {
1934 	struct fuse_fs_context *ctx = fsc->fs_private;
1935 	struct fuse_dev *fud;
1936 	struct fuse_conn *fc;
1937 	struct fuse_mount *fm;
1938 	struct super_block *sb;
1939 	int err;
1940 
1941 	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
1942 	if (!fc)
1943 		return -ENOMEM;
1944 
1945 	fm = kzalloc(sizeof(*fm), GFP_KERNEL);
1946 	if (!fm) {
1947 		kfree(fc);
1948 		return -ENOMEM;
1949 	}
1950 
1951 	fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL);
1952 	fc->release = fuse_free_conn;
1953 
1954 	fsc->s_fs_info = fm;
1955 
1956 	if (ctx->fd_present)
1957 		ctx->file = fget(ctx->fd);
1958 
1959 	if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
1960 		err = get_tree_bdev(fsc, fuse_fill_super);
1961 		goto out;
1962 	}
1963 	/*
1964 	 * While block dev mount can be initialized with a dummy device fd
1965 	 * (found by device name), normal fuse mounts can't
1966 	 */
1967 	err = -EINVAL;
1968 	if (!ctx->file)
1969 		goto out;
1970 
1971 	/*
1972 	 * Allow creating a fuse mount with an already initialized fuse
1973 	 * connection
1974 	 */
1975 	fud = READ_ONCE(ctx->file->private_data);
1976 	if (ctx->file->f_op == &fuse_dev_operations && fud) {
1977 		fsc->sget_key = fud->fc;
1978 		sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
1979 		err = PTR_ERR_OR_ZERO(sb);
1980 		if (!IS_ERR(sb))
1981 			fsc->root = dget(sb->s_root);
1982 	} else {
1983 		err = get_tree_nodev(fsc, fuse_fill_super);
1984 	}
1985 out:
1986 	if (fsc->s_fs_info)
1987 		fuse_mount_destroy(fm);
1988 	if (ctx->file)
1989 		fput(ctx->file);
1990 	return err;
1991 }
1992 
1993 static const struct fs_context_operations fuse_context_ops = {
1994 	.free		= fuse_free_fsc,
1995 	.parse_param	= fuse_parse_param,
1996 	.reconfigure	= fuse_reconfigure,
1997 	.get_tree	= fuse_get_tree,
1998 };
1999 
2000 /*
2001  * Set up the filesystem mount context.
2002  */
2003 static int fuse_init_fs_context(struct fs_context *fsc)
2004 {
2005 	struct fuse_fs_context *ctx;
2006 
2007 	ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
2008 	if (!ctx)
2009 		return -ENOMEM;
2010 
2011 	ctx->max_read = ~0;
2012 	ctx->blksize = FUSE_DEFAULT_BLKSIZE;
2013 	ctx->legacy_opts_show = true;
2014 
2015 #ifdef CONFIG_BLOCK
2016 	if (fsc->fs_type == &fuseblk_fs_type) {
2017 		ctx->is_bdev = true;
2018 		ctx->destroy = true;
2019 	}
2020 #endif
2021 
2022 	fsc->fs_private = ctx;
2023 	fsc->ops = &fuse_context_ops;
2024 	return 0;
2025 }
2026 
2027 bool fuse_mount_remove(struct fuse_mount *fm)
2028 {
2029 	struct fuse_conn *fc = fm->fc;
2030 	bool last = false;
2031 
2032 	down_write(&fc->killsb);
2033 	list_del_init(&fm->fc_entry);
2034 	if (list_empty(&fc->mounts))
2035 		last = true;
2036 	up_write(&fc->killsb);
2037 
2038 	return last;
2039 }
2040 EXPORT_SYMBOL_GPL(fuse_mount_remove);
2041 
2042 void fuse_conn_destroy(struct fuse_mount *fm)
2043 {
2044 	struct fuse_conn *fc = fm->fc;
2045 
2046 	if (fc->destroy)
2047 		fuse_send_destroy(fm);
2048 
2049 	fuse_abort_conn(fc);
2050 	fuse_wait_aborted(fc);
2051 
2052 	if (!list_empty(&fc->entry)) {
2053 		mutex_lock(&fuse_mutex);
2054 		list_del(&fc->entry);
2055 		fuse_ctl_remove_conn(fc);
2056 		mutex_unlock(&fuse_mutex);
2057 	}
2058 }
2059 EXPORT_SYMBOL_GPL(fuse_conn_destroy);
2060 
2061 static void fuse_sb_destroy(struct super_block *sb)
2062 {
2063 	struct fuse_mount *fm = get_fuse_mount_super(sb);
2064 	bool last;
2065 
2066 	if (sb->s_root) {
2067 		last = fuse_mount_remove(fm);
2068 		if (last)
2069 			fuse_conn_destroy(fm);
2070 	}
2071 }
2072 
2073 void fuse_mount_destroy(struct fuse_mount *fm)
2074 {
2075 	fuse_conn_put(fm->fc);
2076 	kfree_rcu(fm, rcu);
2077 }
2078 EXPORT_SYMBOL(fuse_mount_destroy);
2079 
2080 static void fuse_kill_sb_anon(struct super_block *sb)
2081 {
2082 	fuse_sb_destroy(sb);
2083 	kill_anon_super(sb);
2084 	fuse_mount_destroy(get_fuse_mount_super(sb));
2085 }
2086 
2087 static struct file_system_type fuse_fs_type = {
2088 	.owner		= THIS_MODULE,
2089 	.name		= "fuse",
2090 	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
2091 	.init_fs_context = fuse_init_fs_context,
2092 	.parameters	= fuse_fs_parameters,
2093 	.kill_sb	= fuse_kill_sb_anon,
2094 };
2095 MODULE_ALIAS_FS("fuse");
2096 
2097 #ifdef CONFIG_BLOCK
2098 static void fuse_kill_sb_blk(struct super_block *sb)
2099 {
2100 	fuse_sb_destroy(sb);
2101 	kill_block_super(sb);
2102 	fuse_mount_destroy(get_fuse_mount_super(sb));
2103 }
2104 
2105 static struct file_system_type fuseblk_fs_type = {
2106 	.owner		= THIS_MODULE,
2107 	.name		= "fuseblk",
2108 	.init_fs_context = fuse_init_fs_context,
2109 	.parameters	= fuse_fs_parameters,
2110 	.kill_sb	= fuse_kill_sb_blk,
2111 	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP,
2112 };
2113 MODULE_ALIAS_FS("fuseblk");
2114 
2115 static inline int register_fuseblk(void)
2116 {
2117 	return register_filesystem(&fuseblk_fs_type);
2118 }
2119 
2120 static inline void unregister_fuseblk(void)
2121 {
2122 	unregister_filesystem(&fuseblk_fs_type);
2123 }
2124 #else
2125 static inline int register_fuseblk(void)
2126 {
2127 	return 0;
2128 }
2129 
2130 static inline void unregister_fuseblk(void)
2131 {
2132 }
2133 #endif
2134 
2135 static void fuse_inode_init_once(void *foo)
2136 {
2137 	struct inode *inode = foo;
2138 
2139 	inode_init_once(inode);
2140 }
2141 
2142 static int __init fuse_fs_init(void)
2143 {
2144 	int err;
2145 
2146 	fuse_inode_cachep = kmem_cache_create("fuse_inode",
2147 			sizeof(struct fuse_inode), 0,
2148 			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
2149 			fuse_inode_init_once);
2150 	err = -ENOMEM;
2151 	if (!fuse_inode_cachep)
2152 		goto out;
2153 
2154 	err = register_fuseblk();
2155 	if (err)
2156 		goto out2;
2157 
2158 	err = register_filesystem(&fuse_fs_type);
2159 	if (err)
2160 		goto out3;
2161 
2162 	err = fuse_sysctl_register();
2163 	if (err)
2164 		goto out4;
2165 
2166 	return 0;
2167 
2168  out4:
2169 	unregister_filesystem(&fuse_fs_type);
2170  out3:
2171 	unregister_fuseblk();
2172  out2:
2173 	kmem_cache_destroy(fuse_inode_cachep);
2174  out:
2175 	return err;
2176 }
2177 
2178 static void fuse_fs_cleanup(void)
2179 {
2180 	fuse_sysctl_unregister();
2181 	unregister_filesystem(&fuse_fs_type);
2182 	unregister_fuseblk();
2183 
2184 	/*
2185 	 * Make sure all delayed rcu free inodes are flushed before we
2186 	 * destroy cache.
2187 	 */
2188 	rcu_barrier();
2189 	kmem_cache_destroy(fuse_inode_cachep);
2190 }
2191 
2192 static struct kobject *fuse_kobj;
2193 
2194 static int fuse_sysfs_init(void)
2195 {
2196 	int err;
2197 
2198 	fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
2199 	if (!fuse_kobj) {
2200 		err = -ENOMEM;
2201 		goto out_err;
2202 	}
2203 
2204 	err = sysfs_create_mount_point(fuse_kobj, "connections");
2205 	if (err)
2206 		goto out_fuse_unregister;
2207 
2208 	return 0;
2209 
2210  out_fuse_unregister:
2211 	kobject_put(fuse_kobj);
2212  out_err:
2213 	return err;
2214 }
2215 
2216 static void fuse_sysfs_cleanup(void)
2217 {
2218 	sysfs_remove_mount_point(fuse_kobj, "connections");
2219 	kobject_put(fuse_kobj);
2220 }
2221 
2222 static int __init fuse_init(void)
2223 {
2224 	int res;
2225 
2226 	pr_info("init (API version %i.%i)\n",
2227 		FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
2228 
2229 	INIT_LIST_HEAD(&fuse_conn_list);
2230 	res = fuse_fs_init();
2231 	if (res)
2232 		goto err;
2233 
2234 	res = fuse_dev_init();
2235 	if (res)
2236 		goto err_fs_cleanup;
2237 
2238 	res = fuse_sysfs_init();
2239 	if (res)
2240 		goto err_dev_cleanup;
2241 
2242 	res = fuse_ctl_init();
2243 	if (res)
2244 		goto err_sysfs_cleanup;
2245 
2246 	sanitize_global_limit(&max_user_bgreq);
2247 	sanitize_global_limit(&max_user_congthresh);
2248 
2249 	return 0;
2250 
2251  err_sysfs_cleanup:
2252 	fuse_sysfs_cleanup();
2253  err_dev_cleanup:
2254 	fuse_dev_cleanup();
2255  err_fs_cleanup:
2256 	fuse_fs_cleanup();
2257  err:
2258 	return res;
2259 }
2260 
2261 static void __exit fuse_exit(void)
2262 {
2263 	pr_debug("exit\n");
2264 
2265 	fuse_ctl_cleanup();
2266 	fuse_sysfs_cleanup();
2267 	fuse_fs_cleanup();
2268 	fuse_dev_cleanup();
2269 }
2270 
2271 module_init(fuse_init);
2272 module_exit(fuse_exit);
2273