1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29 
fuse_advise_use_readdirplus(struct inode * dir)30 static void fuse_advise_use_readdirplus(struct inode *dir)
31 {
32 	struct fuse_inode *fi = get_fuse_inode(dir);
33 
34 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
35 }
36 
37 #if BITS_PER_LONG >= 64
__fuse_dentry_settime(struct dentry * entry,u64 time)38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
39 {
40 	entry->d_fsdata = (void *) time;
41 }
42 
fuse_dentry_time(const struct dentry * entry)43 static inline u64 fuse_dentry_time(const struct dentry *entry)
44 {
45 	return (u64)entry->d_fsdata;
46 }
47 
48 #else
49 union fuse_dentry {
50 	u64 time;
51 	struct rcu_head rcu;
52 };
53 
__fuse_dentry_settime(struct dentry * dentry,u64 time)54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
55 {
56 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
57 }
58 
fuse_dentry_time(const struct dentry * entry)59 static inline u64 fuse_dentry_time(const struct dentry *entry)
60 {
61 	return ((union fuse_dentry *) entry->d_fsdata)->time;
62 }
63 #endif
64 
fuse_dentry_settime(struct dentry * dentry,u64 time)65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
66 {
67 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 	bool delete = !time && fc->delete_stale;
69 	/*
70 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 	 * Don't care about races, either way it's just an optimization
72 	 */
73 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 		spin_lock(&dentry->d_lock);
76 		if (!delete)
77 			dentry->d_flags &= ~DCACHE_OP_DELETE;
78 		else
79 			dentry->d_flags |= DCACHE_OP_DELETE;
80 		spin_unlock(&dentry->d_lock);
81 	}
82 
83 	__fuse_dentry_settime(dentry, time);
84 }
85 
86 /*
87  * FUSE caches dentries and attributes with separate timeout.  The
88  * time in jiffies until the dentry/attributes are valid is stored in
89  * dentry->d_fsdata and fuse_inode->i_time respectively.
90  */
91 
92 /*
93  * Calculate the time in jiffies until a dentry/attributes are valid
94  */
fuse_time_to_jiffies(u64 sec,u32 nsec)95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
96 {
97 	if (sec || nsec) {
98 		struct timespec64 ts = {
99 			sec,
100 			min_t(u32, nsec, NSEC_PER_SEC - 1)
101 		};
102 
103 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
104 	} else
105 		return 0;
106 }
107 
108 /*
109  * Set dentry and possibly attribute timeouts from the lookup/mk*
110  * replies
111  */
fuse_change_entry_timeout(struct dentry * entry,struct fuse_entry_out * o)112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
113 {
114 	fuse_dentry_settime(entry,
115 		fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
116 }
117 
fuse_invalidate_attr_mask(struct inode * inode,u32 mask)118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
119 {
120 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
121 }
122 
123 /*
124  * Mark the attributes as stale, so that at the next call to
125  * ->getattr() they will be fetched from userspace
126  */
fuse_invalidate_attr(struct inode * inode)127 void fuse_invalidate_attr(struct inode *inode)
128 {
129 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130 }
131 
fuse_dir_changed(struct inode * dir)132 static void fuse_dir_changed(struct inode *dir)
133 {
134 	fuse_invalidate_attr(dir);
135 	inode_maybe_inc_iversion(dir, false);
136 }
137 
138 /*
139  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
140  * atime is not used.
141  */
fuse_invalidate_atime(struct inode * inode)142 void fuse_invalidate_atime(struct inode *inode)
143 {
144 	if (!IS_RDONLY(inode))
145 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146 }
147 
148 /*
149  * Just mark the entry as stale, so that a next attempt to look it up
150  * will result in a new lookup call to userspace
151  *
152  * This is called when a dentry is about to become negative and the
153  * timeout is unknown (unlink, rmdir, rename and in some cases
154  * lookup)
155  */
fuse_invalidate_entry_cache(struct dentry * entry)156 void fuse_invalidate_entry_cache(struct dentry *entry)
157 {
158 	fuse_dentry_settime(entry, 0);
159 }
160 
161 /*
162  * Same as fuse_invalidate_entry_cache(), but also try to remove the
163  * dentry from the hash
164  */
fuse_invalidate_entry(struct dentry * entry)165 static void fuse_invalidate_entry(struct dentry *entry)
166 {
167 	d_invalidate(entry);
168 	fuse_invalidate_entry_cache(entry);
169 }
170 
fuse_lookup_init(struct fuse_conn * fc,struct fuse_args * args,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg)171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 			     u64 nodeid, const struct qstr *name,
173 			     struct fuse_entry_out *outarg)
174 {
175 	memset(outarg, 0, sizeof(struct fuse_entry_out));
176 	args->opcode = FUSE_LOOKUP;
177 	args->nodeid = nodeid;
178 	args->in_numargs = 3;
179 	fuse_set_zero_arg0(args);
180 	args->in_args[1].size = name->len;
181 	args->in_args[1].value = name->name;
182 	args->in_args[2].size = 1;
183 	args->in_args[2].value = "";
184 	args->out_numargs = 1;
185 	args->out_args[0].size = sizeof(struct fuse_entry_out);
186 	args->out_args[0].value = outarg;
187 }
188 
189 /*
190  * Check whether the dentry is still valid
191  *
192  * If the entry validity timeout has expired and the dentry is
193  * positive, try to redo the lookup.  If the lookup results in a
194  * different inode, then let the VFS invalidate the dentry and redo
195  * the lookup once more.  If the lookup results in the same inode,
196  * then refresh the attributes, timeouts and mark the dentry valid.
197  */
fuse_dentry_revalidate(struct inode * dir,const struct qstr * name,struct dentry * entry,unsigned int flags)198 static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
199 				  struct dentry *entry, unsigned int flags)
200 {
201 	struct inode *inode;
202 	struct fuse_mount *fm;
203 	struct fuse_inode *fi;
204 	int ret;
205 
206 	inode = d_inode_rcu(entry);
207 	if (inode && fuse_is_bad(inode))
208 		goto invalid;
209 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
210 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
211 		struct fuse_entry_out outarg;
212 		FUSE_ARGS(args);
213 		struct fuse_forget_link *forget;
214 		u64 attr_version;
215 
216 		/* For negative dentries, always do a fresh lookup */
217 		if (!inode)
218 			goto invalid;
219 
220 		ret = -ECHILD;
221 		if (flags & LOOKUP_RCU)
222 			goto out;
223 
224 		fm = get_fuse_mount(inode);
225 
226 		forget = fuse_alloc_forget();
227 		ret = -ENOMEM;
228 		if (!forget)
229 			goto out;
230 
231 		attr_version = fuse_get_attr_version(fm->fc);
232 
233 		fuse_lookup_init(fm->fc, &args, get_node_id(dir),
234 				 name, &outarg);
235 		ret = fuse_simple_request(fm, &args);
236 		/* Zero nodeid is same as -ENOENT */
237 		if (!ret && !outarg.nodeid)
238 			ret = -ENOENT;
239 		if (!ret) {
240 			fi = get_fuse_inode(inode);
241 			if (outarg.nodeid != get_node_id(inode) ||
242 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
243 				fuse_queue_forget(fm->fc, forget,
244 						  outarg.nodeid, 1);
245 				goto invalid;
246 			}
247 			spin_lock(&fi->lock);
248 			fi->nlookup++;
249 			spin_unlock(&fi->lock);
250 		}
251 		kfree(forget);
252 		if (ret == -ENOMEM || ret == -EINTR)
253 			goto out;
254 		if (ret || fuse_invalid_attr(&outarg.attr) ||
255 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
256 			goto invalid;
257 
258 		forget_all_cached_acls(inode);
259 		fuse_change_attributes(inode, &outarg.attr, NULL,
260 				       ATTR_TIMEOUT(&outarg),
261 				       attr_version);
262 		fuse_change_entry_timeout(entry, &outarg);
263 	} else if (inode) {
264 		fi = get_fuse_inode(inode);
265 		if (flags & LOOKUP_RCU) {
266 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
267 				return -ECHILD;
268 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
269 			fuse_advise_use_readdirplus(dir);
270 		}
271 	}
272 	ret = 1;
273 out:
274 	return ret;
275 
276 invalid:
277 	ret = 0;
278 	goto out;
279 }
280 
281 #if BITS_PER_LONG < 64
fuse_dentry_init(struct dentry * dentry)282 static int fuse_dentry_init(struct dentry *dentry)
283 {
284 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
285 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
286 
287 	return dentry->d_fsdata ? 0 : -ENOMEM;
288 }
fuse_dentry_release(struct dentry * dentry)289 static void fuse_dentry_release(struct dentry *dentry)
290 {
291 	union fuse_dentry *fd = dentry->d_fsdata;
292 
293 	kfree_rcu(fd, rcu);
294 }
295 #endif
296 
fuse_dentry_delete(const struct dentry * dentry)297 static int fuse_dentry_delete(const struct dentry *dentry)
298 {
299 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
300 }
301 
302 /*
303  * Create a fuse_mount object with a new superblock (with path->dentry
304  * as the root), and return that mount so it can be auto-mounted on
305  * @path.
306  */
fuse_dentry_automount(struct path * path)307 static struct vfsmount *fuse_dentry_automount(struct path *path)
308 {
309 	struct fs_context *fsc;
310 	struct vfsmount *mnt;
311 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
312 
313 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
314 	if (IS_ERR(fsc))
315 		return ERR_CAST(fsc);
316 
317 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
318 	fsc->fs_private = mp_fi;
319 
320 	/* Create the submount */
321 	mnt = fc_mount(fsc);
322 	if (!IS_ERR(mnt))
323 		mntget(mnt);
324 
325 	put_fs_context(fsc);
326 	return mnt;
327 }
328 
329 const struct dentry_operations fuse_dentry_operations = {
330 	.d_revalidate	= fuse_dentry_revalidate,
331 	.d_delete	= fuse_dentry_delete,
332 #if BITS_PER_LONG < 64
333 	.d_init		= fuse_dentry_init,
334 	.d_release	= fuse_dentry_release,
335 #endif
336 	.d_automount	= fuse_dentry_automount,
337 };
338 
339 const struct dentry_operations fuse_root_dentry_operations = {
340 #if BITS_PER_LONG < 64
341 	.d_init		= fuse_dentry_init,
342 	.d_release	= fuse_dentry_release,
343 #endif
344 };
345 
fuse_valid_type(int m)346 int fuse_valid_type(int m)
347 {
348 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
349 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
350 }
351 
fuse_valid_size(u64 size)352 static bool fuse_valid_size(u64 size)
353 {
354 	return size <= LLONG_MAX;
355 }
356 
fuse_invalid_attr(struct fuse_attr * attr)357 bool fuse_invalid_attr(struct fuse_attr *attr)
358 {
359 	return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
360 }
361 
fuse_lookup_name(struct super_block * sb,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg,struct inode ** inode)362 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
363 		     struct fuse_entry_out *outarg, struct inode **inode)
364 {
365 	struct fuse_mount *fm = get_fuse_mount_super(sb);
366 	FUSE_ARGS(args);
367 	struct fuse_forget_link *forget;
368 	u64 attr_version, evict_ctr;
369 	int err;
370 
371 	*inode = NULL;
372 	err = -ENAMETOOLONG;
373 	if (name->len > fm->fc->name_max)
374 		goto out;
375 
376 
377 	forget = fuse_alloc_forget();
378 	err = -ENOMEM;
379 	if (!forget)
380 		goto out;
381 
382 	attr_version = fuse_get_attr_version(fm->fc);
383 	evict_ctr = fuse_get_evict_ctr(fm->fc);
384 
385 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
386 	err = fuse_simple_request(fm, &args);
387 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
388 	if (err || !outarg->nodeid)
389 		goto out_put_forget;
390 
391 	err = -EIO;
392 	if (fuse_invalid_attr(&outarg->attr))
393 		goto out_put_forget;
394 	if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
395 		pr_warn_once("root generation should be zero\n");
396 		outarg->generation = 0;
397 	}
398 
399 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
400 			   &outarg->attr, ATTR_TIMEOUT(outarg),
401 			   attr_version, evict_ctr);
402 	err = -ENOMEM;
403 	if (!*inode) {
404 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
405 		goto out;
406 	}
407 	err = 0;
408 
409  out_put_forget:
410 	kfree(forget);
411  out:
412 	return err;
413 }
414 
fuse_lookup(struct inode * dir,struct dentry * entry,unsigned int flags)415 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
416 				  unsigned int flags)
417 {
418 	int err;
419 	struct fuse_entry_out outarg;
420 	struct inode *inode;
421 	struct dentry *newent;
422 	bool outarg_valid = true;
423 	bool locked;
424 
425 	if (fuse_is_bad(dir))
426 		return ERR_PTR(-EIO);
427 
428 	locked = fuse_lock_inode(dir);
429 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
430 			       &outarg, &inode);
431 	fuse_unlock_inode(dir, locked);
432 	if (err == -ENOENT) {
433 		outarg_valid = false;
434 		err = 0;
435 	}
436 	if (err)
437 		goto out_err;
438 
439 	err = -EIO;
440 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
441 		goto out_iput;
442 
443 	newent = d_splice_alias(inode, entry);
444 	err = PTR_ERR(newent);
445 	if (IS_ERR(newent))
446 		goto out_err;
447 
448 	entry = newent ? newent : entry;
449 	if (outarg_valid)
450 		fuse_change_entry_timeout(entry, &outarg);
451 	else
452 		fuse_invalidate_entry_cache(entry);
453 
454 	if (inode)
455 		fuse_advise_use_readdirplus(dir);
456 	return newent;
457 
458  out_iput:
459 	iput(inode);
460  out_err:
461 	return ERR_PTR(err);
462 }
463 
get_security_context(struct dentry * entry,umode_t mode,struct fuse_in_arg * ext)464 static int get_security_context(struct dentry *entry, umode_t mode,
465 				struct fuse_in_arg *ext)
466 {
467 	struct fuse_secctx *fctx;
468 	struct fuse_secctx_header *header;
469 	struct lsm_context lsmctx = { };
470 	void *ptr;
471 	u32 total_len = sizeof(*header);
472 	int err, nr_ctx = 0;
473 	const char *name = NULL;
474 	size_t namelen;
475 
476 	err = security_dentry_init_security(entry, mode, &entry->d_name,
477 					    &name, &lsmctx);
478 
479 	/* If no LSM is supporting this security hook ignore error */
480 	if (err && err != -EOPNOTSUPP)
481 		goto out_err;
482 
483 	if (lsmctx.len) {
484 		nr_ctx = 1;
485 		namelen = strlen(name) + 1;
486 		err = -EIO;
487 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
488 		    lsmctx.len > S32_MAX))
489 			goto out_err;
490 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
491 					    lsmctx.len);
492 	}
493 
494 	err = -ENOMEM;
495 	header = ptr = kzalloc(total_len, GFP_KERNEL);
496 	if (!ptr)
497 		goto out_err;
498 
499 	header->nr_secctx = nr_ctx;
500 	header->size = total_len;
501 	ptr += sizeof(*header);
502 	if (nr_ctx) {
503 		fctx = ptr;
504 		fctx->size = lsmctx.len;
505 		ptr += sizeof(*fctx);
506 
507 		strcpy(ptr, name);
508 		ptr += namelen;
509 
510 		memcpy(ptr, lsmctx.context, lsmctx.len);
511 	}
512 	ext->size = total_len;
513 	ext->value = header;
514 	err = 0;
515 out_err:
516 	if (nr_ctx)
517 		security_release_secctx(&lsmctx);
518 	return err;
519 }
520 
extend_arg(struct fuse_in_arg * buf,u32 bytes)521 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
522 {
523 	void *p;
524 	u32 newlen = buf->size + bytes;
525 
526 	p = krealloc(buf->value, newlen, GFP_KERNEL);
527 	if (!p) {
528 		kfree(buf->value);
529 		buf->size = 0;
530 		buf->value = NULL;
531 		return NULL;
532 	}
533 
534 	memset(p + buf->size, 0, bytes);
535 	buf->value = p;
536 	buf->size = newlen;
537 
538 	return p + newlen - bytes;
539 }
540 
fuse_ext_size(size_t size)541 static u32 fuse_ext_size(size_t size)
542 {
543 	return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
544 }
545 
546 /*
547  * This adds just a single supplementary group that matches the parent's group.
548  */
get_create_supp_group(struct mnt_idmap * idmap,struct inode * dir,struct fuse_in_arg * ext)549 static int get_create_supp_group(struct mnt_idmap *idmap,
550 				 struct inode *dir,
551 				 struct fuse_in_arg *ext)
552 {
553 	struct fuse_conn *fc = get_fuse_conn(dir);
554 	struct fuse_ext_header *xh;
555 	struct fuse_supp_groups *sg;
556 	kgid_t kgid = dir->i_gid;
557 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
558 	gid_t parent_gid = from_kgid(fc->user_ns, kgid);
559 
560 	u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
561 
562 	if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
563 	    !vfsgid_in_group_p(vfsgid))
564 		return 0;
565 
566 	xh = extend_arg(ext, sg_len);
567 	if (!xh)
568 		return -ENOMEM;
569 
570 	xh->size = sg_len;
571 	xh->type = FUSE_EXT_GROUPS;
572 
573 	sg = (struct fuse_supp_groups *) &xh[1];
574 	sg->nr_groups = 1;
575 	sg->groups[0] = parent_gid;
576 
577 	return 0;
578 }
579 
get_create_ext(struct mnt_idmap * idmap,struct fuse_args * args,struct inode * dir,struct dentry * dentry,umode_t mode)580 static int get_create_ext(struct mnt_idmap *idmap,
581 			  struct fuse_args *args,
582 			  struct inode *dir, struct dentry *dentry,
583 			  umode_t mode)
584 {
585 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
586 	struct fuse_in_arg ext = { .size = 0, .value = NULL };
587 	int err = 0;
588 
589 	if (fc->init_security)
590 		err = get_security_context(dentry, mode, &ext);
591 	if (!err && fc->create_supp_group)
592 		err = get_create_supp_group(idmap, dir, &ext);
593 
594 	if (!err && ext.size) {
595 		WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
596 		args->is_ext = true;
597 		args->ext_idx = args->in_numargs++;
598 		args->in_args[args->ext_idx] = ext;
599 	} else {
600 		kfree(ext.value);
601 	}
602 
603 	return err;
604 }
605 
free_ext_value(struct fuse_args * args)606 static void free_ext_value(struct fuse_args *args)
607 {
608 	if (args->is_ext)
609 		kfree(args->in_args[args->ext_idx].value);
610 }
611 
612 /*
613  * Atomic create+open operation
614  *
615  * If the filesystem doesn't support this, then fall back to separate
616  * 'mknod' + 'open' requests.
617  */
fuse_create_open(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,struct file * file,unsigned int flags,umode_t mode,u32 opcode)618 static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
619 			    struct dentry *entry, struct file *file,
620 			    unsigned int flags, umode_t mode, u32 opcode)
621 {
622 	int err;
623 	struct inode *inode;
624 	struct fuse_mount *fm = get_fuse_mount(dir);
625 	FUSE_ARGS(args);
626 	struct fuse_forget_link *forget;
627 	struct fuse_create_in inarg;
628 	struct fuse_open_out *outopenp;
629 	struct fuse_entry_out outentry;
630 	struct fuse_inode *fi;
631 	struct fuse_file *ff;
632 	bool trunc = flags & O_TRUNC;
633 
634 	/* Userspace expects S_IFREG in create mode */
635 	BUG_ON((mode & S_IFMT) != S_IFREG);
636 
637 	forget = fuse_alloc_forget();
638 	err = -ENOMEM;
639 	if (!forget)
640 		goto out_err;
641 
642 	err = -ENOMEM;
643 	ff = fuse_file_alloc(fm, true);
644 	if (!ff)
645 		goto out_put_forget_req;
646 
647 	if (!fm->fc->dont_mask)
648 		mode &= ~current_umask();
649 
650 	flags &= ~O_NOCTTY;
651 	memset(&inarg, 0, sizeof(inarg));
652 	memset(&outentry, 0, sizeof(outentry));
653 	inarg.flags = flags;
654 	inarg.mode = mode;
655 	inarg.umask = current_umask();
656 
657 	if (fm->fc->handle_killpriv_v2 && trunc &&
658 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
659 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
660 	}
661 
662 	args.opcode = opcode;
663 	args.nodeid = get_node_id(dir);
664 	args.in_numargs = 2;
665 	args.in_args[0].size = sizeof(inarg);
666 	args.in_args[0].value = &inarg;
667 	args.in_args[1].size = entry->d_name.len + 1;
668 	args.in_args[1].value = entry->d_name.name;
669 	args.out_numargs = 2;
670 	args.out_args[0].size = sizeof(outentry);
671 	args.out_args[0].value = &outentry;
672 	/* Store outarg for fuse_finish_open() */
673 	outopenp = &ff->args->open_outarg;
674 	args.out_args[1].size = sizeof(*outopenp);
675 	args.out_args[1].value = outopenp;
676 
677 	err = get_create_ext(idmap, &args, dir, entry, mode);
678 	if (err)
679 		goto out_free_ff;
680 
681 	err = fuse_simple_idmap_request(idmap, fm, &args);
682 	free_ext_value(&args);
683 	if (err)
684 		goto out_free_ff;
685 
686 	err = -EIO;
687 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
688 	    fuse_invalid_attr(&outentry.attr))
689 		goto out_free_ff;
690 
691 	ff->fh = outopenp->fh;
692 	ff->nodeid = outentry.nodeid;
693 	ff->open_flags = outopenp->open_flags;
694 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
695 			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
696 	if (!inode) {
697 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
698 		fuse_sync_release(NULL, ff, flags);
699 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
700 		err = -ENOMEM;
701 		goto out_err;
702 	}
703 	kfree(forget);
704 	d_instantiate(entry, inode);
705 	fuse_change_entry_timeout(entry, &outentry);
706 	fuse_dir_changed(dir);
707 	err = generic_file_open(inode, file);
708 	if (!err) {
709 		file->private_data = ff;
710 		err = finish_open(file, entry, fuse_finish_open);
711 	}
712 	if (err) {
713 		fi = get_fuse_inode(inode);
714 		fuse_sync_release(fi, ff, flags);
715 	} else {
716 		if (fm->fc->atomic_o_trunc && trunc)
717 			truncate_pagecache(inode, 0);
718 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
719 			invalidate_inode_pages2(inode->i_mapping);
720 	}
721 	return err;
722 
723 out_free_ff:
724 	fuse_file_free(ff);
725 out_put_forget_req:
726 	kfree(forget);
727 out_err:
728 	return err;
729 }
730 
731 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
732 		      umode_t, dev_t);
fuse_atomic_open(struct inode * dir,struct dentry * entry,struct file * file,unsigned flags,umode_t mode)733 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
734 			    struct file *file, unsigned flags,
735 			    umode_t mode)
736 {
737 	int err;
738 	struct mnt_idmap *idmap = file_mnt_idmap(file);
739 	struct fuse_conn *fc = get_fuse_conn(dir);
740 	struct dentry *res = NULL;
741 
742 	if (fuse_is_bad(dir))
743 		return -EIO;
744 
745 	if (d_in_lookup(entry)) {
746 		res = fuse_lookup(dir, entry, 0);
747 		if (IS_ERR(res))
748 			return PTR_ERR(res);
749 
750 		if (res)
751 			entry = res;
752 	}
753 
754 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
755 		goto no_open;
756 
757 	/* Only creates */
758 	file->f_mode |= FMODE_CREATED;
759 
760 	if (fc->no_create)
761 		goto mknod;
762 
763 	err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
764 	if (err == -ENOSYS) {
765 		fc->no_create = 1;
766 		goto mknod;
767 	} else if (err == -EEXIST)
768 		fuse_invalidate_entry(entry);
769 out_dput:
770 	dput(res);
771 	return err;
772 
773 mknod:
774 	err = fuse_mknod(idmap, dir, entry, mode, 0);
775 	if (err)
776 		goto out_dput;
777 no_open:
778 	return finish_no_open(file, res);
779 }
780 
781 /*
782  * Code shared between mknod, mkdir, symlink and link
783  */
create_new_entry(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)784 static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
785 				       struct fuse_args *args, struct inode *dir,
786 				       struct dentry *entry, umode_t mode)
787 {
788 	struct fuse_entry_out outarg;
789 	struct inode *inode;
790 	struct dentry *d;
791 	int err;
792 	struct fuse_forget_link *forget;
793 
794 	if (fuse_is_bad(dir))
795 		return ERR_PTR(-EIO);
796 
797 	forget = fuse_alloc_forget();
798 	if (!forget)
799 		return ERR_PTR(-ENOMEM);
800 
801 	memset(&outarg, 0, sizeof(outarg));
802 	args->nodeid = get_node_id(dir);
803 	args->out_numargs = 1;
804 	args->out_args[0].size = sizeof(outarg);
805 	args->out_args[0].value = &outarg;
806 
807 	if (args->opcode != FUSE_LINK) {
808 		err = get_create_ext(idmap, args, dir, entry, mode);
809 		if (err)
810 			goto out_put_forget_req;
811 	}
812 
813 	err = fuse_simple_idmap_request(idmap, fm, args);
814 	free_ext_value(args);
815 	if (err)
816 		goto out_put_forget_req;
817 
818 	err = -EIO;
819 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
820 		goto out_put_forget_req;
821 
822 	if ((outarg.attr.mode ^ mode) & S_IFMT)
823 		goto out_put_forget_req;
824 
825 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
826 			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
827 	if (!inode) {
828 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
829 		return ERR_PTR(-ENOMEM);
830 	}
831 	kfree(forget);
832 
833 	d_drop(entry);
834 	d = d_splice_alias(inode, entry);
835 	if (IS_ERR(d))
836 		return d;
837 
838 	if (d)
839 		fuse_change_entry_timeout(d, &outarg);
840 	else
841 		fuse_change_entry_timeout(entry, &outarg);
842 	fuse_dir_changed(dir);
843 	return d;
844 
845  out_put_forget_req:
846 	if (err == -EEXIST)
847 		fuse_invalidate_entry(entry);
848 	kfree(forget);
849 	return ERR_PTR(err);
850 }
851 
create_new_nondir(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)852 static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm,
853 			     struct fuse_args *args, struct inode *dir,
854 			     struct dentry *entry, umode_t mode)
855 {
856 	/*
857 	 * Note that when creating anything other than a directory we
858 	 * can be sure create_new_entry() will NOT return an alternate
859 	 * dentry as d_splice_alias() only returns an alternate dentry
860 	 * for directories.  So we don't need to check for that case
861 	 * when passing back the result.
862 	 */
863 	WARN_ON_ONCE(S_ISDIR(mode));
864 
865 	return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode));
866 }
867 
fuse_mknod(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode,dev_t rdev)868 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
869 		      struct dentry *entry, umode_t mode, dev_t rdev)
870 {
871 	struct fuse_mknod_in inarg;
872 	struct fuse_mount *fm = get_fuse_mount(dir);
873 	FUSE_ARGS(args);
874 
875 	if (!fm->fc->dont_mask)
876 		mode &= ~current_umask();
877 
878 	memset(&inarg, 0, sizeof(inarg));
879 	inarg.mode = mode;
880 	inarg.rdev = new_encode_dev(rdev);
881 	inarg.umask = current_umask();
882 	args.opcode = FUSE_MKNOD;
883 	args.in_numargs = 2;
884 	args.in_args[0].size = sizeof(inarg);
885 	args.in_args[0].value = &inarg;
886 	args.in_args[1].size = entry->d_name.len + 1;
887 	args.in_args[1].value = entry->d_name.name;
888 	return create_new_nondir(idmap, fm, &args, dir, entry, mode);
889 }
890 
fuse_create(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode,bool excl)891 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
892 		       struct dentry *entry, umode_t mode, bool excl)
893 {
894 	return fuse_mknod(idmap, dir, entry, mode, 0);
895 }
896 
fuse_tmpfile(struct mnt_idmap * idmap,struct inode * dir,struct file * file,umode_t mode)897 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
898 			struct file *file, umode_t mode)
899 {
900 	struct fuse_conn *fc = get_fuse_conn(dir);
901 	int err;
902 
903 	if (fc->no_tmpfile)
904 		return -EOPNOTSUPP;
905 
906 	err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
907 			       file->f_flags, mode, FUSE_TMPFILE);
908 	if (err == -ENOSYS) {
909 		fc->no_tmpfile = 1;
910 		err = -EOPNOTSUPP;
911 	}
912 	return err;
913 }
914 
fuse_mkdir(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,umode_t mode)915 static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
916 				 struct dentry *entry, umode_t mode)
917 {
918 	struct fuse_mkdir_in inarg;
919 	struct fuse_mount *fm = get_fuse_mount(dir);
920 	FUSE_ARGS(args);
921 
922 	if (!fm->fc->dont_mask)
923 		mode &= ~current_umask();
924 
925 	memset(&inarg, 0, sizeof(inarg));
926 	inarg.mode = mode;
927 	inarg.umask = current_umask();
928 	args.opcode = FUSE_MKDIR;
929 	args.in_numargs = 2;
930 	args.in_args[0].size = sizeof(inarg);
931 	args.in_args[0].value = &inarg;
932 	args.in_args[1].size = entry->d_name.len + 1;
933 	args.in_args[1].value = entry->d_name.name;
934 	return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
935 }
936 
fuse_symlink(struct mnt_idmap * idmap,struct inode * dir,struct dentry * entry,const char * link)937 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
938 			struct dentry *entry, const char *link)
939 {
940 	struct fuse_mount *fm = get_fuse_mount(dir);
941 	unsigned len = strlen(link) + 1;
942 	FUSE_ARGS(args);
943 
944 	args.opcode = FUSE_SYMLINK;
945 	args.in_numargs = 3;
946 	fuse_set_zero_arg0(&args);
947 	args.in_args[1].size = entry->d_name.len + 1;
948 	args.in_args[1].value = entry->d_name.name;
949 	args.in_args[2].size = len;
950 	args.in_args[2].value = link;
951 	return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK);
952 }
953 
fuse_flush_time_update(struct inode * inode)954 void fuse_flush_time_update(struct inode *inode)
955 {
956 	int err = sync_inode_metadata(inode, 1);
957 
958 	mapping_set_error(inode->i_mapping, err);
959 }
960 
fuse_update_ctime_in_cache(struct inode * inode)961 static void fuse_update_ctime_in_cache(struct inode *inode)
962 {
963 	if (!IS_NOCMTIME(inode)) {
964 		inode_set_ctime_current(inode);
965 		mark_inode_dirty_sync(inode);
966 		fuse_flush_time_update(inode);
967 	}
968 }
969 
fuse_update_ctime(struct inode * inode)970 void fuse_update_ctime(struct inode *inode)
971 {
972 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
973 	fuse_update_ctime_in_cache(inode);
974 }
975 
fuse_entry_unlinked(struct dentry * entry)976 static void fuse_entry_unlinked(struct dentry *entry)
977 {
978 	struct inode *inode = d_inode(entry);
979 	struct fuse_conn *fc = get_fuse_conn(inode);
980 	struct fuse_inode *fi = get_fuse_inode(inode);
981 
982 	spin_lock(&fi->lock);
983 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
984 	/*
985 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
986 	 * happen if userspace filesystem is careless.  It would be
987 	 * difficult to enforce correct nlink usage so just ignore this
988 	 * condition here
989 	 */
990 	if (S_ISDIR(inode->i_mode))
991 		clear_nlink(inode);
992 	else if (inode->i_nlink > 0)
993 		drop_nlink(inode);
994 	spin_unlock(&fi->lock);
995 	fuse_invalidate_entry_cache(entry);
996 	fuse_update_ctime(inode);
997 }
998 
fuse_unlink(struct inode * dir,struct dentry * entry)999 static int fuse_unlink(struct inode *dir, struct dentry *entry)
1000 {
1001 	int err;
1002 	struct fuse_mount *fm = get_fuse_mount(dir);
1003 	FUSE_ARGS(args);
1004 
1005 	if (fuse_is_bad(dir))
1006 		return -EIO;
1007 
1008 	args.opcode = FUSE_UNLINK;
1009 	args.nodeid = get_node_id(dir);
1010 	args.in_numargs = 2;
1011 	fuse_set_zero_arg0(&args);
1012 	args.in_args[1].size = entry->d_name.len + 1;
1013 	args.in_args[1].value = entry->d_name.name;
1014 	err = fuse_simple_request(fm, &args);
1015 	if (!err) {
1016 		fuse_dir_changed(dir);
1017 		fuse_entry_unlinked(entry);
1018 	} else if (err == -EINTR || err == -ENOENT)
1019 		fuse_invalidate_entry(entry);
1020 	return err;
1021 }
1022 
fuse_rmdir(struct inode * dir,struct dentry * entry)1023 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1024 {
1025 	int err;
1026 	struct fuse_mount *fm = get_fuse_mount(dir);
1027 	FUSE_ARGS(args);
1028 
1029 	if (fuse_is_bad(dir))
1030 		return -EIO;
1031 
1032 	args.opcode = FUSE_RMDIR;
1033 	args.nodeid = get_node_id(dir);
1034 	args.in_numargs = 2;
1035 	fuse_set_zero_arg0(&args);
1036 	args.in_args[1].size = entry->d_name.len + 1;
1037 	args.in_args[1].value = entry->d_name.name;
1038 	err = fuse_simple_request(fm, &args);
1039 	if (!err) {
1040 		fuse_dir_changed(dir);
1041 		fuse_entry_unlinked(entry);
1042 	} else if (err == -EINTR || err == -ENOENT)
1043 		fuse_invalidate_entry(entry);
1044 	return err;
1045 }
1046 
fuse_rename_common(struct mnt_idmap * idmap,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags,int opcode,size_t argsize)1047 static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
1048 			      struct inode *newdir, struct dentry *newent,
1049 			      unsigned int flags, int opcode, size_t argsize)
1050 {
1051 	int err;
1052 	struct fuse_rename2_in inarg;
1053 	struct fuse_mount *fm = get_fuse_mount(olddir);
1054 	FUSE_ARGS(args);
1055 
1056 	memset(&inarg, 0, argsize);
1057 	inarg.newdir = get_node_id(newdir);
1058 	inarg.flags = flags;
1059 	args.opcode = opcode;
1060 	args.nodeid = get_node_id(olddir);
1061 	args.in_numargs = 3;
1062 	args.in_args[0].size = argsize;
1063 	args.in_args[0].value = &inarg;
1064 	args.in_args[1].size = oldent->d_name.len + 1;
1065 	args.in_args[1].value = oldent->d_name.name;
1066 	args.in_args[2].size = newent->d_name.len + 1;
1067 	args.in_args[2].value = newent->d_name.name;
1068 	err = fuse_simple_idmap_request(idmap, fm, &args);
1069 	if (!err) {
1070 		/* ctime changes */
1071 		fuse_update_ctime(d_inode(oldent));
1072 
1073 		if (flags & RENAME_EXCHANGE)
1074 			fuse_update_ctime(d_inode(newent));
1075 
1076 		fuse_dir_changed(olddir);
1077 		if (olddir != newdir)
1078 			fuse_dir_changed(newdir);
1079 
1080 		/* newent will end up negative */
1081 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1082 			fuse_entry_unlinked(newent);
1083 	} else if (err == -EINTR || err == -ENOENT) {
1084 		/* If request was interrupted, DEITY only knows if the
1085 		   rename actually took place.  If the invalidation
1086 		   fails (e.g. some process has CWD under the renamed
1087 		   directory), then there can be inconsistency between
1088 		   the dcache and the real filesystem.  Tough luck. */
1089 		fuse_invalidate_entry(oldent);
1090 		if (d_really_is_positive(newent))
1091 			fuse_invalidate_entry(newent);
1092 	}
1093 
1094 	return err;
1095 }
1096 
fuse_rename2(struct mnt_idmap * idmap,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags)1097 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1098 			struct dentry *oldent, struct inode *newdir,
1099 			struct dentry *newent, unsigned int flags)
1100 {
1101 	struct fuse_conn *fc = get_fuse_conn(olddir);
1102 	int err;
1103 
1104 	if (fuse_is_bad(olddir))
1105 		return -EIO;
1106 
1107 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1108 		return -EINVAL;
1109 
1110 	if (flags) {
1111 		if (fc->no_rename2 || fc->minor < 23)
1112 			return -EINVAL;
1113 
1114 		err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
1115 					 olddir, oldent, newdir, newent, flags,
1116 					 FUSE_RENAME2,
1117 					 sizeof(struct fuse_rename2_in));
1118 		if (err == -ENOSYS) {
1119 			fc->no_rename2 = 1;
1120 			err = -EINVAL;
1121 		}
1122 	} else {
1123 		err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
1124 					 FUSE_RENAME,
1125 					 sizeof(struct fuse_rename_in));
1126 	}
1127 
1128 	return err;
1129 }
1130 
fuse_link(struct dentry * entry,struct inode * newdir,struct dentry * newent)1131 static int fuse_link(struct dentry *entry, struct inode *newdir,
1132 		     struct dentry *newent)
1133 {
1134 	int err;
1135 	struct fuse_link_in inarg;
1136 	struct inode *inode = d_inode(entry);
1137 	struct fuse_mount *fm = get_fuse_mount(inode);
1138 	FUSE_ARGS(args);
1139 
1140 	if (fm->fc->no_link)
1141 		goto out;
1142 
1143 	memset(&inarg, 0, sizeof(inarg));
1144 	inarg.oldnodeid = get_node_id(inode);
1145 	args.opcode = FUSE_LINK;
1146 	args.in_numargs = 2;
1147 	args.in_args[0].size = sizeof(inarg);
1148 	args.in_args[0].value = &inarg;
1149 	args.in_args[1].size = newent->d_name.len + 1;
1150 	args.in_args[1].value = newent->d_name.name;
1151 	err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
1152 	if (!err)
1153 		fuse_update_ctime_in_cache(inode);
1154 	else if (err == -EINTR)
1155 		fuse_invalidate_attr(inode);
1156 
1157 	if (err == -ENOSYS)
1158 		fm->fc->no_link = 1;
1159 out:
1160 	if (fm->fc->no_link)
1161 		return -EPERM;
1162 
1163 	return err;
1164 }
1165 
fuse_fillattr(struct mnt_idmap * idmap,struct inode * inode,struct fuse_attr * attr,struct kstat * stat)1166 static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
1167 			  struct fuse_attr *attr, struct kstat *stat)
1168 {
1169 	unsigned int blkbits;
1170 	struct fuse_conn *fc = get_fuse_conn(inode);
1171 	vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
1172 				      make_kuid(fc->user_ns, attr->uid));
1173 	vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
1174 				      make_kgid(fc->user_ns, attr->gid));
1175 
1176 	stat->dev = inode->i_sb->s_dev;
1177 	stat->ino = attr->ino;
1178 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1179 	stat->nlink = attr->nlink;
1180 	stat->uid = vfsuid_into_kuid(vfsuid);
1181 	stat->gid = vfsgid_into_kgid(vfsgid);
1182 	stat->rdev = inode->i_rdev;
1183 	stat->atime.tv_sec = attr->atime;
1184 	stat->atime.tv_nsec = attr->atimensec;
1185 	stat->mtime.tv_sec = attr->mtime;
1186 	stat->mtime.tv_nsec = attr->mtimensec;
1187 	stat->ctime.tv_sec = attr->ctime;
1188 	stat->ctime.tv_nsec = attr->ctimensec;
1189 	stat->size = attr->size;
1190 	stat->blocks = attr->blocks;
1191 
1192 	if (attr->blksize != 0)
1193 		blkbits = ilog2(attr->blksize);
1194 	else
1195 		blkbits = inode->i_sb->s_blocksize_bits;
1196 
1197 	stat->blksize = 1 << blkbits;
1198 }
1199 
fuse_statx_to_attr(struct fuse_statx * sx,struct fuse_attr * attr)1200 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1201 {
1202 	memset(attr, 0, sizeof(*attr));
1203 	attr->ino = sx->ino;
1204 	attr->size = sx->size;
1205 	attr->blocks = sx->blocks;
1206 	attr->atime = sx->atime.tv_sec;
1207 	attr->mtime = sx->mtime.tv_sec;
1208 	attr->ctime = sx->ctime.tv_sec;
1209 	attr->atimensec = sx->atime.tv_nsec;
1210 	attr->mtimensec = sx->mtime.tv_nsec;
1211 	attr->ctimensec = sx->ctime.tv_nsec;
1212 	attr->mode = sx->mode;
1213 	attr->nlink = sx->nlink;
1214 	attr->uid = sx->uid;
1215 	attr->gid = sx->gid;
1216 	attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1217 	attr->blksize = sx->blksize;
1218 }
1219 
fuse_do_statx(struct mnt_idmap * idmap,struct inode * inode,struct file * file,struct kstat * stat)1220 static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
1221 			 struct file *file, struct kstat *stat)
1222 {
1223 	int err;
1224 	struct fuse_attr attr;
1225 	struct fuse_statx *sx;
1226 	struct fuse_statx_in inarg;
1227 	struct fuse_statx_out outarg;
1228 	struct fuse_mount *fm = get_fuse_mount(inode);
1229 	u64 attr_version = fuse_get_attr_version(fm->fc);
1230 	FUSE_ARGS(args);
1231 
1232 	memset(&inarg, 0, sizeof(inarg));
1233 	memset(&outarg, 0, sizeof(outarg));
1234 	/* Directories have separate file-handle space */
1235 	if (file && S_ISREG(inode->i_mode)) {
1236 		struct fuse_file *ff = file->private_data;
1237 
1238 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1239 		inarg.fh = ff->fh;
1240 	}
1241 	/* For now leave sync hints as the default, request all stats. */
1242 	inarg.sx_flags = 0;
1243 	inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1244 	args.opcode = FUSE_STATX;
1245 	args.nodeid = get_node_id(inode);
1246 	args.in_numargs = 1;
1247 	args.in_args[0].size = sizeof(inarg);
1248 	args.in_args[0].value = &inarg;
1249 	args.out_numargs = 1;
1250 	args.out_args[0].size = sizeof(outarg);
1251 	args.out_args[0].value = &outarg;
1252 	err = fuse_simple_request(fm, &args);
1253 	if (err)
1254 		return err;
1255 
1256 	sx = &outarg.stat;
1257 	if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1258 	    ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1259 					 inode_wrong_type(inode, sx->mode)))) {
1260 		fuse_make_bad(inode);
1261 		return -EIO;
1262 	}
1263 
1264 	fuse_statx_to_attr(&outarg.stat, &attr);
1265 	if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1266 		fuse_change_attributes(inode, &attr, &outarg.stat,
1267 				       ATTR_TIMEOUT(&outarg), attr_version);
1268 	}
1269 
1270 	if (stat) {
1271 		stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1272 		stat->btime.tv_sec = sx->btime.tv_sec;
1273 		stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1274 		fuse_fillattr(idmap, inode, &attr, stat);
1275 		stat->result_mask |= STATX_TYPE;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
fuse_do_getattr(struct mnt_idmap * idmap,struct inode * inode,struct kstat * stat,struct file * file)1281 static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
1282 			   struct kstat *stat, struct file *file)
1283 {
1284 	int err;
1285 	struct fuse_getattr_in inarg;
1286 	struct fuse_attr_out outarg;
1287 	struct fuse_mount *fm = get_fuse_mount(inode);
1288 	FUSE_ARGS(args);
1289 	u64 attr_version;
1290 
1291 	attr_version = fuse_get_attr_version(fm->fc);
1292 
1293 	memset(&inarg, 0, sizeof(inarg));
1294 	memset(&outarg, 0, sizeof(outarg));
1295 	/* Directories have separate file-handle space */
1296 	if (file && S_ISREG(inode->i_mode)) {
1297 		struct fuse_file *ff = file->private_data;
1298 
1299 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1300 		inarg.fh = ff->fh;
1301 	}
1302 	args.opcode = FUSE_GETATTR;
1303 	args.nodeid = get_node_id(inode);
1304 	args.in_numargs = 1;
1305 	args.in_args[0].size = sizeof(inarg);
1306 	args.in_args[0].value = &inarg;
1307 	args.out_numargs = 1;
1308 	args.out_args[0].size = sizeof(outarg);
1309 	args.out_args[0].value = &outarg;
1310 	err = fuse_simple_request(fm, &args);
1311 	if (!err) {
1312 		if (fuse_invalid_attr(&outarg.attr) ||
1313 		    inode_wrong_type(inode, outarg.attr.mode)) {
1314 			fuse_make_bad(inode);
1315 			err = -EIO;
1316 		} else {
1317 			fuse_change_attributes(inode, &outarg.attr, NULL,
1318 					       ATTR_TIMEOUT(&outarg),
1319 					       attr_version);
1320 			if (stat)
1321 				fuse_fillattr(idmap, inode, &outarg.attr, stat);
1322 		}
1323 	}
1324 	return err;
1325 }
1326 
fuse_update_get_attr(struct mnt_idmap * idmap,struct inode * inode,struct file * file,struct kstat * stat,u32 request_mask,unsigned int flags)1327 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
1328 				struct file *file, struct kstat *stat,
1329 				u32 request_mask, unsigned int flags)
1330 {
1331 	struct fuse_inode *fi = get_fuse_inode(inode);
1332 	struct fuse_conn *fc = get_fuse_conn(inode);
1333 	int err = 0;
1334 	bool sync;
1335 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1336 	u32 cache_mask = fuse_get_cache_mask(inode);
1337 
1338 
1339 	/* FUSE only supports basic stats and possibly btime */
1340 	request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1341 retry:
1342 	if (fc->no_statx)
1343 		request_mask &= STATX_BASIC_STATS;
1344 
1345 	if (!request_mask)
1346 		sync = false;
1347 	else if (flags & AT_STATX_FORCE_SYNC)
1348 		sync = true;
1349 	else if (flags & AT_STATX_DONT_SYNC)
1350 		sync = false;
1351 	else if (request_mask & inval_mask & ~cache_mask)
1352 		sync = true;
1353 	else
1354 		sync = time_before64(fi->i_time, get_jiffies_64());
1355 
1356 	if (sync) {
1357 		forget_all_cached_acls(inode);
1358 		/* Try statx if BTIME is requested */
1359 		if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1360 			err = fuse_do_statx(idmap, inode, file, stat);
1361 			if (err == -ENOSYS) {
1362 				fc->no_statx = 1;
1363 				err = 0;
1364 				goto retry;
1365 			}
1366 		} else {
1367 			err = fuse_do_getattr(idmap, inode, stat, file);
1368 		}
1369 	} else if (stat) {
1370 		generic_fillattr(idmap, request_mask, inode, stat);
1371 		stat->mode = fi->orig_i_mode;
1372 		stat->ino = fi->orig_ino;
1373 		if (test_bit(FUSE_I_BTIME, &fi->state)) {
1374 			stat->btime = fi->i_btime;
1375 			stat->result_mask |= STATX_BTIME;
1376 		}
1377 	}
1378 
1379 	return err;
1380 }
1381 
fuse_update_attributes(struct inode * inode,struct file * file,u32 mask)1382 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1383 {
1384 	return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
1385 }
1386 
fuse_reverse_inval_entry(struct fuse_conn * fc,u64 parent_nodeid,u64 child_nodeid,struct qstr * name,u32 flags)1387 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1388 			     u64 child_nodeid, struct qstr *name, u32 flags)
1389 {
1390 	int err = -ENOTDIR;
1391 	struct inode *parent;
1392 	struct dentry *dir;
1393 	struct dentry *entry;
1394 
1395 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1396 	if (!parent)
1397 		return -ENOENT;
1398 
1399 	inode_lock_nested(parent, I_MUTEX_PARENT);
1400 	if (!S_ISDIR(parent->i_mode))
1401 		goto unlock;
1402 
1403 	err = -ENOENT;
1404 	dir = d_find_alias(parent);
1405 	if (!dir)
1406 		goto unlock;
1407 
1408 	name->hash = full_name_hash(dir, name->name, name->len);
1409 	entry = d_lookup(dir, name);
1410 	dput(dir);
1411 	if (!entry)
1412 		goto unlock;
1413 
1414 	fuse_dir_changed(parent);
1415 	if (!(flags & FUSE_EXPIRE_ONLY))
1416 		d_invalidate(entry);
1417 	fuse_invalidate_entry_cache(entry);
1418 
1419 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1420 		inode_lock(d_inode(entry));
1421 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1422 			err = -ENOENT;
1423 			goto badentry;
1424 		}
1425 		if (d_mountpoint(entry)) {
1426 			err = -EBUSY;
1427 			goto badentry;
1428 		}
1429 		if (d_is_dir(entry)) {
1430 			shrink_dcache_parent(entry);
1431 			if (!simple_empty(entry)) {
1432 				err = -ENOTEMPTY;
1433 				goto badentry;
1434 			}
1435 			d_inode(entry)->i_flags |= S_DEAD;
1436 		}
1437 		dont_mount(entry);
1438 		clear_nlink(d_inode(entry));
1439 		err = 0;
1440  badentry:
1441 		inode_unlock(d_inode(entry));
1442 		if (!err)
1443 			d_delete(entry);
1444 	} else {
1445 		err = 0;
1446 	}
1447 	dput(entry);
1448 
1449  unlock:
1450 	inode_unlock(parent);
1451 	iput(parent);
1452 	return err;
1453 }
1454 
fuse_permissible_uidgid(struct fuse_conn * fc)1455 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1456 {
1457 	const struct cred *cred = current_cred();
1458 
1459 	return (uid_eq(cred->euid, fc->user_id) &&
1460 		uid_eq(cred->suid, fc->user_id) &&
1461 		uid_eq(cred->uid,  fc->user_id) &&
1462 		gid_eq(cred->egid, fc->group_id) &&
1463 		gid_eq(cred->sgid, fc->group_id) &&
1464 		gid_eq(cred->gid,  fc->group_id));
1465 }
1466 
1467 /*
1468  * Calling into a user-controlled filesystem gives the filesystem
1469  * daemon ptrace-like capabilities over the current process.  This
1470  * means, that the filesystem daemon is able to record the exact
1471  * filesystem operations performed, and can also control the behavior
1472  * of the requester process in otherwise impossible ways.  For example
1473  * it can delay the operation for arbitrary length of time allowing
1474  * DoS against the requester.
1475  *
1476  * For this reason only those processes can call into the filesystem,
1477  * for which the owner of the mount has ptrace privilege.  This
1478  * excludes processes started by other users, suid or sgid processes.
1479  */
fuse_allow_current_process(struct fuse_conn * fc)1480 bool fuse_allow_current_process(struct fuse_conn *fc)
1481 {
1482 	bool allow;
1483 
1484 	if (fc->allow_other)
1485 		allow = current_in_userns(fc->user_ns);
1486 	else
1487 		allow = fuse_permissible_uidgid(fc);
1488 
1489 	if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1490 		allow = true;
1491 
1492 	return allow;
1493 }
1494 
fuse_access(struct inode * inode,int mask)1495 static int fuse_access(struct inode *inode, int mask)
1496 {
1497 	struct fuse_mount *fm = get_fuse_mount(inode);
1498 	FUSE_ARGS(args);
1499 	struct fuse_access_in inarg;
1500 	int err;
1501 
1502 	BUG_ON(mask & MAY_NOT_BLOCK);
1503 
1504 	/*
1505 	 * We should not send FUSE_ACCESS to the userspace
1506 	 * when idmapped mounts are enabled as for this case
1507 	 * we have fc->default_permissions = 1 and access
1508 	 * permission checks are done on the kernel side.
1509 	 */
1510 	WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
1511 
1512 	if (fm->fc->no_access)
1513 		return 0;
1514 
1515 	memset(&inarg, 0, sizeof(inarg));
1516 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1517 	args.opcode = FUSE_ACCESS;
1518 	args.nodeid = get_node_id(inode);
1519 	args.in_numargs = 1;
1520 	args.in_args[0].size = sizeof(inarg);
1521 	args.in_args[0].value = &inarg;
1522 	err = fuse_simple_request(fm, &args);
1523 	if (err == -ENOSYS) {
1524 		fm->fc->no_access = 1;
1525 		err = 0;
1526 	}
1527 	return err;
1528 }
1529 
fuse_perm_getattr(struct inode * inode,int mask)1530 static int fuse_perm_getattr(struct inode *inode, int mask)
1531 {
1532 	if (mask & MAY_NOT_BLOCK)
1533 		return -ECHILD;
1534 
1535 	forget_all_cached_acls(inode);
1536 	return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
1537 }
1538 
1539 /*
1540  * Check permission.  The two basic access models of FUSE are:
1541  *
1542  * 1) Local access checking ('default_permissions' mount option) based
1543  * on file mode.  This is the plain old disk filesystem permission
1544  * model.
1545  *
1546  * 2) "Remote" access checking, where server is responsible for
1547  * checking permission in each inode operation.  An exception to this
1548  * is if ->permission() was invoked from sys_access() in which case an
1549  * access request is sent.  Execute permission is still checked
1550  * locally based on file mode.
1551  */
fuse_permission(struct mnt_idmap * idmap,struct inode * inode,int mask)1552 static int fuse_permission(struct mnt_idmap *idmap,
1553 			   struct inode *inode, int mask)
1554 {
1555 	struct fuse_conn *fc = get_fuse_conn(inode);
1556 	bool refreshed = false;
1557 	int err = 0;
1558 
1559 	if (fuse_is_bad(inode))
1560 		return -EIO;
1561 
1562 	if (!fuse_allow_current_process(fc))
1563 		return -EACCES;
1564 
1565 	/*
1566 	 * If attributes are needed, refresh them before proceeding
1567 	 */
1568 	if (fc->default_permissions ||
1569 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1570 		struct fuse_inode *fi = get_fuse_inode(inode);
1571 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1572 
1573 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1574 		    time_before64(fi->i_time, get_jiffies_64())) {
1575 			refreshed = true;
1576 
1577 			err = fuse_perm_getattr(inode, mask);
1578 			if (err)
1579 				return err;
1580 		}
1581 	}
1582 
1583 	if (fc->default_permissions) {
1584 		err = generic_permission(idmap, inode, mask);
1585 
1586 		/* If permission is denied, try to refresh file
1587 		   attributes.  This is also needed, because the root
1588 		   node will at first have no permissions */
1589 		if (err == -EACCES && !refreshed) {
1590 			err = fuse_perm_getattr(inode, mask);
1591 			if (!err)
1592 				err = generic_permission(idmap,
1593 							 inode, mask);
1594 		}
1595 
1596 		/* Note: the opposite of the above test does not
1597 		   exist.  So if permissions are revoked this won't be
1598 		   noticed immediately, only after the attribute
1599 		   timeout has expired */
1600 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1601 		err = fuse_access(inode, mask);
1602 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1603 		if (!(inode->i_mode & S_IXUGO)) {
1604 			if (refreshed)
1605 				return -EACCES;
1606 
1607 			err = fuse_perm_getattr(inode, mask);
1608 			if (!err && !(inode->i_mode & S_IXUGO))
1609 				return -EACCES;
1610 		}
1611 	}
1612 	return err;
1613 }
1614 
fuse_readlink_page(struct inode * inode,struct folio * folio)1615 static int fuse_readlink_page(struct inode *inode, struct folio *folio)
1616 {
1617 	struct fuse_mount *fm = get_fuse_mount(inode);
1618 	struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 };
1619 	struct fuse_args_pages ap = {
1620 		.num_folios = 1,
1621 		.folios = &folio,
1622 		.descs = &desc,
1623 	};
1624 	char *link;
1625 	ssize_t res;
1626 
1627 	ap.args.opcode = FUSE_READLINK;
1628 	ap.args.nodeid = get_node_id(inode);
1629 	ap.args.out_pages = true;
1630 	ap.args.out_argvar = true;
1631 	ap.args.page_zeroing = true;
1632 	ap.args.out_numargs = 1;
1633 	ap.args.out_args[0].size = desc.length;
1634 	res = fuse_simple_request(fm, &ap.args);
1635 
1636 	fuse_invalidate_atime(inode);
1637 
1638 	if (res < 0)
1639 		return res;
1640 
1641 	if (WARN_ON(res >= PAGE_SIZE))
1642 		return -EIO;
1643 
1644 	link = folio_address(folio);
1645 	link[res] = '\0';
1646 
1647 	return 0;
1648 }
1649 
fuse_get_link(struct dentry * dentry,struct inode * inode,struct delayed_call * callback)1650 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1651 				 struct delayed_call *callback)
1652 {
1653 	struct fuse_conn *fc = get_fuse_conn(inode);
1654 	struct folio *folio;
1655 	int err;
1656 
1657 	err = -EIO;
1658 	if (fuse_is_bad(inode))
1659 		goto out_err;
1660 
1661 	if (fc->cache_symlinks)
1662 		return page_get_link_raw(dentry, inode, callback);
1663 
1664 	err = -ECHILD;
1665 	if (!dentry)
1666 		goto out_err;
1667 
1668 	folio = folio_alloc(GFP_KERNEL, 0);
1669 	err = -ENOMEM;
1670 	if (!folio)
1671 		goto out_err;
1672 
1673 	err = fuse_readlink_page(inode, folio);
1674 	if (err) {
1675 		folio_put(folio);
1676 		goto out_err;
1677 	}
1678 
1679 	set_delayed_call(callback, page_put_link, &folio->page);
1680 
1681 	return folio_address(folio);
1682 
1683 out_err:
1684 	return ERR_PTR(err);
1685 }
1686 
fuse_dir_open(struct inode * inode,struct file * file)1687 static int fuse_dir_open(struct inode *inode, struct file *file)
1688 {
1689 	struct fuse_mount *fm = get_fuse_mount(inode);
1690 	int err;
1691 
1692 	if (fuse_is_bad(inode))
1693 		return -EIO;
1694 
1695 	err = generic_file_open(inode, file);
1696 	if (err)
1697 		return err;
1698 
1699 	err = fuse_do_open(fm, get_node_id(inode), file, true);
1700 	if (!err) {
1701 		struct fuse_file *ff = file->private_data;
1702 
1703 		/*
1704 		 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1705 		 * directories for backward compatibility, though it's unlikely
1706 		 * to be useful.
1707 		 */
1708 		if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1709 			nonseekable_open(inode, file);
1710 		if (!(ff->open_flags & FOPEN_KEEP_CACHE))
1711 			invalidate_inode_pages2(inode->i_mapping);
1712 	}
1713 
1714 	return err;
1715 }
1716 
fuse_dir_release(struct inode * inode,struct file * file)1717 static int fuse_dir_release(struct inode *inode, struct file *file)
1718 {
1719 	fuse_release_common(file, true);
1720 
1721 	return 0;
1722 }
1723 
fuse_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)1724 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1725 			  int datasync)
1726 {
1727 	struct inode *inode = file->f_mapping->host;
1728 	struct fuse_conn *fc = get_fuse_conn(inode);
1729 	int err;
1730 
1731 	if (fuse_is_bad(inode))
1732 		return -EIO;
1733 
1734 	if (fc->no_fsyncdir)
1735 		return 0;
1736 
1737 	inode_lock(inode);
1738 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1739 	if (err == -ENOSYS) {
1740 		fc->no_fsyncdir = 1;
1741 		err = 0;
1742 	}
1743 	inode_unlock(inode);
1744 
1745 	return err;
1746 }
1747 
fuse_dir_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1748 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1749 			    unsigned long arg)
1750 {
1751 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1752 
1753 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1754 	if (fc->minor < 18)
1755 		return -ENOTTY;
1756 
1757 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1758 }
1759 
fuse_dir_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1760 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1761 				   unsigned long arg)
1762 {
1763 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1764 
1765 	if (fc->minor < 18)
1766 		return -ENOTTY;
1767 
1768 	return fuse_ioctl_common(file, cmd, arg,
1769 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1770 }
1771 
update_mtime(unsigned ivalid,bool trust_local_mtime)1772 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1773 {
1774 	/* Always update if mtime is explicitly set  */
1775 	if (ivalid & ATTR_MTIME_SET)
1776 		return true;
1777 
1778 	/* Or if kernel i_mtime is the official one */
1779 	if (trust_local_mtime)
1780 		return true;
1781 
1782 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1783 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1784 		return false;
1785 
1786 	/* In all other cases update */
1787 	return true;
1788 }
1789 
iattr_to_fattr(struct mnt_idmap * idmap,struct fuse_conn * fc,struct iattr * iattr,struct fuse_setattr_in * arg,bool trust_local_cmtime)1790 static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
1791 			   struct iattr *iattr, struct fuse_setattr_in *arg,
1792 			   bool trust_local_cmtime)
1793 {
1794 	unsigned ivalid = iattr->ia_valid;
1795 
1796 	if (ivalid & ATTR_MODE)
1797 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1798 
1799 	if (ivalid & ATTR_UID) {
1800 		kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
1801 
1802 		arg->valid |= FATTR_UID;
1803 		arg->uid = from_kuid(fc->user_ns, fsuid);
1804 	}
1805 
1806 	if (ivalid & ATTR_GID) {
1807 		kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
1808 
1809 		arg->valid |= FATTR_GID;
1810 		arg->gid = from_kgid(fc->user_ns, fsgid);
1811 	}
1812 
1813 	if (ivalid & ATTR_SIZE)
1814 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1815 	if (ivalid & ATTR_ATIME) {
1816 		arg->valid |= FATTR_ATIME;
1817 		arg->atime = iattr->ia_atime.tv_sec;
1818 		arg->atimensec = iattr->ia_atime.tv_nsec;
1819 		if (!(ivalid & ATTR_ATIME_SET))
1820 			arg->valid |= FATTR_ATIME_NOW;
1821 	}
1822 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1823 		arg->valid |= FATTR_MTIME;
1824 		arg->mtime = iattr->ia_mtime.tv_sec;
1825 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1826 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1827 			arg->valid |= FATTR_MTIME_NOW;
1828 	}
1829 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1830 		arg->valid |= FATTR_CTIME;
1831 		arg->ctime = iattr->ia_ctime.tv_sec;
1832 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1833 	}
1834 }
1835 
1836 /*
1837  * Prevent concurrent writepages on inode
1838  *
1839  * This is done by adding a negative bias to the inode write counter
1840  * and waiting for all pending writes to finish.
1841  */
fuse_set_nowrite(struct inode * inode)1842 void fuse_set_nowrite(struct inode *inode)
1843 {
1844 	struct fuse_inode *fi = get_fuse_inode(inode);
1845 
1846 	BUG_ON(!inode_is_locked(inode));
1847 
1848 	spin_lock(&fi->lock);
1849 	BUG_ON(fi->writectr < 0);
1850 	fi->writectr += FUSE_NOWRITE;
1851 	spin_unlock(&fi->lock);
1852 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1853 }
1854 
1855 /*
1856  * Allow writepages on inode
1857  *
1858  * Remove the bias from the writecounter and send any queued
1859  * writepages.
1860  */
__fuse_release_nowrite(struct inode * inode)1861 static void __fuse_release_nowrite(struct inode *inode)
1862 {
1863 	struct fuse_inode *fi = get_fuse_inode(inode);
1864 
1865 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1866 	fi->writectr = 0;
1867 	fuse_flush_writepages(inode);
1868 }
1869 
fuse_release_nowrite(struct inode * inode)1870 void fuse_release_nowrite(struct inode *inode)
1871 {
1872 	struct fuse_inode *fi = get_fuse_inode(inode);
1873 
1874 	spin_lock(&fi->lock);
1875 	__fuse_release_nowrite(inode);
1876 	spin_unlock(&fi->lock);
1877 }
1878 
fuse_setattr_fill(struct fuse_conn * fc,struct fuse_args * args,struct inode * inode,struct fuse_setattr_in * inarg_p,struct fuse_attr_out * outarg_p)1879 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1880 			      struct inode *inode,
1881 			      struct fuse_setattr_in *inarg_p,
1882 			      struct fuse_attr_out *outarg_p)
1883 {
1884 	args->opcode = FUSE_SETATTR;
1885 	args->nodeid = get_node_id(inode);
1886 	args->in_numargs = 1;
1887 	args->in_args[0].size = sizeof(*inarg_p);
1888 	args->in_args[0].value = inarg_p;
1889 	args->out_numargs = 1;
1890 	args->out_args[0].size = sizeof(*outarg_p);
1891 	args->out_args[0].value = outarg_p;
1892 }
1893 
1894 /*
1895  * Flush inode->i_mtime to the server
1896  */
fuse_flush_times(struct inode * inode,struct fuse_file * ff)1897 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1898 {
1899 	struct fuse_mount *fm = get_fuse_mount(inode);
1900 	FUSE_ARGS(args);
1901 	struct fuse_setattr_in inarg;
1902 	struct fuse_attr_out outarg;
1903 
1904 	memset(&inarg, 0, sizeof(inarg));
1905 	memset(&outarg, 0, sizeof(outarg));
1906 
1907 	inarg.valid = FATTR_MTIME;
1908 	inarg.mtime = inode_get_mtime_sec(inode);
1909 	inarg.mtimensec = inode_get_mtime_nsec(inode);
1910 	if (fm->fc->minor >= 23) {
1911 		inarg.valid |= FATTR_CTIME;
1912 		inarg.ctime = inode_get_ctime_sec(inode);
1913 		inarg.ctimensec = inode_get_ctime_nsec(inode);
1914 	}
1915 	if (ff) {
1916 		inarg.valid |= FATTR_FH;
1917 		inarg.fh = ff->fh;
1918 	}
1919 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1920 
1921 	return fuse_simple_request(fm, &args);
1922 }
1923 
1924 /*
1925  * Set attributes, and at the same time refresh them.
1926  *
1927  * Truncation is slightly complicated, because the 'truncate' request
1928  * may fail, in which case we don't want to touch the mapping.
1929  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1930  * and the actual truncation by hand.
1931  */
fuse_do_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * attr,struct file * file)1932 int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1933 		    struct iattr *attr, struct file *file)
1934 {
1935 	struct inode *inode = d_inode(dentry);
1936 	struct fuse_mount *fm = get_fuse_mount(inode);
1937 	struct fuse_conn *fc = fm->fc;
1938 	struct fuse_inode *fi = get_fuse_inode(inode);
1939 	struct address_space *mapping = inode->i_mapping;
1940 	FUSE_ARGS(args);
1941 	struct fuse_setattr_in inarg;
1942 	struct fuse_attr_out outarg;
1943 	bool is_truncate = false;
1944 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1945 	loff_t oldsize;
1946 	int err;
1947 	bool trust_local_cmtime = is_wb;
1948 	bool fault_blocked = false;
1949 
1950 	if (!fc->default_permissions)
1951 		attr->ia_valid |= ATTR_FORCE;
1952 
1953 	err = setattr_prepare(idmap, dentry, attr);
1954 	if (err)
1955 		return err;
1956 
1957 	if (attr->ia_valid & ATTR_SIZE) {
1958 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1959 			return -EIO;
1960 		is_truncate = true;
1961 	}
1962 
1963 	if (FUSE_IS_DAX(inode) && is_truncate) {
1964 		filemap_invalidate_lock(mapping);
1965 		fault_blocked = true;
1966 		err = fuse_dax_break_layouts(inode, 0, -1);
1967 		if (err) {
1968 			filemap_invalidate_unlock(mapping);
1969 			return err;
1970 		}
1971 	}
1972 
1973 	if (attr->ia_valid & ATTR_OPEN) {
1974 		/* This is coming from open(..., ... | O_TRUNC); */
1975 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1976 		WARN_ON(attr->ia_size != 0);
1977 		if (fc->atomic_o_trunc) {
1978 			/*
1979 			 * No need to send request to userspace, since actual
1980 			 * truncation has already been done by OPEN.  But still
1981 			 * need to truncate page cache.
1982 			 */
1983 			i_size_write(inode, 0);
1984 			truncate_pagecache(inode, 0);
1985 			goto out;
1986 		}
1987 		file = NULL;
1988 	}
1989 
1990 	/* Flush dirty data/metadata before non-truncate SETATTR */
1991 	if (is_wb &&
1992 	    attr->ia_valid &
1993 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1994 			 ATTR_TIMES_SET)) {
1995 		err = write_inode_now(inode, true);
1996 		if (err)
1997 			return err;
1998 
1999 		fuse_set_nowrite(inode);
2000 		fuse_release_nowrite(inode);
2001 	}
2002 
2003 	if (is_truncate) {
2004 		fuse_set_nowrite(inode);
2005 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2006 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
2007 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
2008 	}
2009 
2010 	memset(&inarg, 0, sizeof(inarg));
2011 	memset(&outarg, 0, sizeof(outarg));
2012 	iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
2013 	if (file) {
2014 		struct fuse_file *ff = file->private_data;
2015 		inarg.valid |= FATTR_FH;
2016 		inarg.fh = ff->fh;
2017 	}
2018 
2019 	/* Kill suid/sgid for non-directory chown unconditionally */
2020 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
2021 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
2022 		inarg.valid |= FATTR_KILL_SUIDGID;
2023 
2024 	if (attr->ia_valid & ATTR_SIZE) {
2025 		/* For mandatory locking in truncate */
2026 		inarg.valid |= FATTR_LOCKOWNER;
2027 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
2028 
2029 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
2030 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
2031 			inarg.valid |= FATTR_KILL_SUIDGID;
2032 	}
2033 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
2034 	err = fuse_simple_request(fm, &args);
2035 	if (err) {
2036 		if (err == -EINTR)
2037 			fuse_invalidate_attr(inode);
2038 		goto error;
2039 	}
2040 
2041 	if (fuse_invalid_attr(&outarg.attr) ||
2042 	    inode_wrong_type(inode, outarg.attr.mode)) {
2043 		fuse_make_bad(inode);
2044 		err = -EIO;
2045 		goto error;
2046 	}
2047 
2048 	spin_lock(&fi->lock);
2049 	/* the kernel maintains i_mtime locally */
2050 	if (trust_local_cmtime) {
2051 		if (attr->ia_valid & ATTR_MTIME)
2052 			inode_set_mtime_to_ts(inode, attr->ia_mtime);
2053 		if (attr->ia_valid & ATTR_CTIME)
2054 			inode_set_ctime_to_ts(inode, attr->ia_ctime);
2055 		/* FIXME: clear I_DIRTY_SYNC? */
2056 	}
2057 
2058 	fuse_change_attributes_common(inode, &outarg.attr, NULL,
2059 				      ATTR_TIMEOUT(&outarg),
2060 				      fuse_get_cache_mask(inode), 0);
2061 	oldsize = inode->i_size;
2062 	/* see the comment in fuse_change_attributes() */
2063 	if (!is_wb || is_truncate)
2064 		i_size_write(inode, outarg.attr.size);
2065 
2066 	if (is_truncate) {
2067 		/* NOTE: this may release/reacquire fi->lock */
2068 		__fuse_release_nowrite(inode);
2069 	}
2070 	spin_unlock(&fi->lock);
2071 
2072 	/*
2073 	 * Only call invalidate_inode_pages2() after removing
2074 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2075 	 */
2076 	if ((is_truncate || !is_wb) &&
2077 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2078 		truncate_pagecache(inode, outarg.attr.size);
2079 		invalidate_inode_pages2(mapping);
2080 	}
2081 
2082 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2083 out:
2084 	if (fault_blocked)
2085 		filemap_invalidate_unlock(mapping);
2086 
2087 	return 0;
2088 
2089 error:
2090 	if (is_truncate)
2091 		fuse_release_nowrite(inode);
2092 
2093 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2094 
2095 	if (fault_blocked)
2096 		filemap_invalidate_unlock(mapping);
2097 	return err;
2098 }
2099 
fuse_setattr(struct mnt_idmap * idmap,struct dentry * entry,struct iattr * attr)2100 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2101 			struct iattr *attr)
2102 {
2103 	struct inode *inode = d_inode(entry);
2104 	struct fuse_conn *fc = get_fuse_conn(inode);
2105 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2106 	int ret;
2107 
2108 	if (fuse_is_bad(inode))
2109 		return -EIO;
2110 
2111 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
2112 		return -EACCES;
2113 
2114 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2115 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2116 				    ATTR_MODE);
2117 
2118 		/*
2119 		 * The only sane way to reliably kill suid/sgid is to do it in
2120 		 * the userspace filesystem
2121 		 *
2122 		 * This should be done on write(), truncate() and chown().
2123 		 */
2124 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2125 			/*
2126 			 * ia_mode calculation may have used stale i_mode.
2127 			 * Refresh and recalculate.
2128 			 */
2129 			ret = fuse_do_getattr(idmap, inode, NULL, file);
2130 			if (ret)
2131 				return ret;
2132 
2133 			attr->ia_mode = inode->i_mode;
2134 			if (inode->i_mode & S_ISUID) {
2135 				attr->ia_valid |= ATTR_MODE;
2136 				attr->ia_mode &= ~S_ISUID;
2137 			}
2138 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2139 				attr->ia_valid |= ATTR_MODE;
2140 				attr->ia_mode &= ~S_ISGID;
2141 			}
2142 		}
2143 	}
2144 	if (!attr->ia_valid)
2145 		return 0;
2146 
2147 	ret = fuse_do_setattr(idmap, entry, attr, file);
2148 	if (!ret) {
2149 		/*
2150 		 * If filesystem supports acls it may have updated acl xattrs in
2151 		 * the filesystem, so forget cached acls for the inode.
2152 		 */
2153 		if (fc->posix_acl)
2154 			forget_all_cached_acls(inode);
2155 
2156 		/* Directory mode changed, may need to revalidate access */
2157 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2158 			fuse_invalidate_entry_cache(entry);
2159 	}
2160 	return ret;
2161 }
2162 
fuse_getattr(struct mnt_idmap * idmap,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int flags)2163 static int fuse_getattr(struct mnt_idmap *idmap,
2164 			const struct path *path, struct kstat *stat,
2165 			u32 request_mask, unsigned int flags)
2166 {
2167 	struct inode *inode = d_inode(path->dentry);
2168 	struct fuse_conn *fc = get_fuse_conn(inode);
2169 
2170 	if (fuse_is_bad(inode))
2171 		return -EIO;
2172 
2173 	if (!fuse_allow_current_process(fc)) {
2174 		if (!request_mask) {
2175 			/*
2176 			 * If user explicitly requested *nothing* then don't
2177 			 * error out, but return st_dev only.
2178 			 */
2179 			stat->result_mask = 0;
2180 			stat->dev = inode->i_sb->s_dev;
2181 			return 0;
2182 		}
2183 		return -EACCES;
2184 	}
2185 
2186 	return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
2187 }
2188 
2189 static const struct inode_operations fuse_dir_inode_operations = {
2190 	.lookup		= fuse_lookup,
2191 	.mkdir		= fuse_mkdir,
2192 	.symlink	= fuse_symlink,
2193 	.unlink		= fuse_unlink,
2194 	.rmdir		= fuse_rmdir,
2195 	.rename		= fuse_rename2,
2196 	.link		= fuse_link,
2197 	.setattr	= fuse_setattr,
2198 	.create		= fuse_create,
2199 	.atomic_open	= fuse_atomic_open,
2200 	.tmpfile	= fuse_tmpfile,
2201 	.mknod		= fuse_mknod,
2202 	.permission	= fuse_permission,
2203 	.getattr	= fuse_getattr,
2204 	.listxattr	= fuse_listxattr,
2205 	.get_inode_acl	= fuse_get_inode_acl,
2206 	.get_acl	= fuse_get_acl,
2207 	.set_acl	= fuse_set_acl,
2208 	.fileattr_get	= fuse_fileattr_get,
2209 	.fileattr_set	= fuse_fileattr_set,
2210 };
2211 
2212 static const struct file_operations fuse_dir_operations = {
2213 	.llseek		= generic_file_llseek,
2214 	.read		= generic_read_dir,
2215 	.iterate_shared	= fuse_readdir,
2216 	.open		= fuse_dir_open,
2217 	.release	= fuse_dir_release,
2218 	.fsync		= fuse_dir_fsync,
2219 	.unlocked_ioctl	= fuse_dir_ioctl,
2220 	.compat_ioctl	= fuse_dir_compat_ioctl,
2221 };
2222 
2223 static const struct inode_operations fuse_common_inode_operations = {
2224 	.setattr	= fuse_setattr,
2225 	.permission	= fuse_permission,
2226 	.getattr	= fuse_getattr,
2227 	.listxattr	= fuse_listxattr,
2228 	.get_inode_acl	= fuse_get_inode_acl,
2229 	.get_acl	= fuse_get_acl,
2230 	.set_acl	= fuse_set_acl,
2231 	.fileattr_get	= fuse_fileattr_get,
2232 	.fileattr_set	= fuse_fileattr_set,
2233 };
2234 
2235 static const struct inode_operations fuse_symlink_inode_operations = {
2236 	.setattr	= fuse_setattr,
2237 	.get_link	= fuse_get_link,
2238 	.getattr	= fuse_getattr,
2239 	.listxattr	= fuse_listxattr,
2240 };
2241 
fuse_init_common(struct inode * inode)2242 void fuse_init_common(struct inode *inode)
2243 {
2244 	inode->i_op = &fuse_common_inode_operations;
2245 }
2246 
fuse_init_dir(struct inode * inode)2247 void fuse_init_dir(struct inode *inode)
2248 {
2249 	struct fuse_inode *fi = get_fuse_inode(inode);
2250 
2251 	inode->i_op = &fuse_dir_inode_operations;
2252 	inode->i_fop = &fuse_dir_operations;
2253 
2254 	spin_lock_init(&fi->rdc.lock);
2255 	fi->rdc.cached = false;
2256 	fi->rdc.size = 0;
2257 	fi->rdc.pos = 0;
2258 	fi->rdc.version = 0;
2259 }
2260 
fuse_symlink_read_folio(struct file * null,struct folio * folio)2261 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2262 {
2263 	int err = fuse_readlink_page(folio->mapping->host, folio);
2264 
2265 	if (!err)
2266 		folio_mark_uptodate(folio);
2267 
2268 	folio_unlock(folio);
2269 
2270 	return err;
2271 }
2272 
2273 static const struct address_space_operations fuse_symlink_aops = {
2274 	.read_folio	= fuse_symlink_read_folio,
2275 };
2276 
fuse_init_symlink(struct inode * inode)2277 void fuse_init_symlink(struct inode *inode)
2278 {
2279 	inode->i_op = &fuse_symlink_inode_operations;
2280 	inode->i_data.a_ops = &fuse_symlink_aops;
2281 	inode_nohighmem(inode);
2282 }
2283