1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "acl.h"
5 #include "btree_update.h"
6 #include "dirent.h"
7 #include "inode.h"
8 #include "namei.h"
9 #include "subvolume.h"
10 #include "xattr.h"
11 
12 #include <linux/posix_acl.h>
13 
14 static inline subvol_inum parent_inum(subvol_inum inum, struct bch_inode_unpacked *inode)
15 {
16 	return (subvol_inum) {
17 		.subvol	= inode->bi_parent_subvol ?: inum.subvol,
18 		.inum	= inode->bi_dir,
19 	};
20 }
21 
22 static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
23 {
24 	return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
25 }
26 
27 int bch2_create_trans(struct btree_trans *trans,
28 		      subvol_inum dir,
29 		      struct bch_inode_unpacked *dir_u,
30 		      struct bch_inode_unpacked *new_inode,
31 		      const struct qstr *name,
32 		      uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
33 		      struct posix_acl *default_acl,
34 		      struct posix_acl *acl,
35 		      subvol_inum snapshot_src,
36 		      unsigned flags)
37 {
38 	struct bch_fs *c = trans->c;
39 	struct btree_iter dir_iter = {};
40 	struct btree_iter inode_iter = {};
41 	subvol_inum new_inum = dir;
42 	u64 now = bch2_current_time(c);
43 	u64 cpu = raw_smp_processor_id();
44 	u64 dir_target;
45 	u32 snapshot;
46 	unsigned dir_type = mode_to_type(mode);
47 	int ret;
48 
49 	ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
50 	if (ret)
51 		goto err;
52 
53 	ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir,
54 			      BTREE_ITER_intent|BTREE_ITER_with_updates);
55 	if (ret)
56 		goto err;
57 
58 	if (!(flags & BCH_CREATE_SNAPSHOT)) {
59 		/* Normal create path - allocate a new inode: */
60 		bch2_inode_init_late(c, new_inode, now, uid, gid, mode, rdev, dir_u);
61 
62 		if (flags & BCH_CREATE_TMPFILE)
63 			new_inode->bi_flags |= BCH_INODE_unlinked;
64 
65 		ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
66 		if (ret)
67 			goto err;
68 
69 		snapshot_src = (subvol_inum) { 0 };
70 	} else {
71 		/*
72 		 * Creating a snapshot - we're not allocating a new inode, but
73 		 * we do have to lookup the root inode of the subvolume we're
74 		 * snapshotting and update it (in the new snapshot):
75 		 */
76 
77 		if (!snapshot_src.inum) {
78 			/* Inode wasn't specified, just snapshot: */
79 			struct bch_subvolume s;
80 			ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, &s);
81 			if (ret)
82 				goto err;
83 
84 			snapshot_src.inum = le64_to_cpu(s.inode);
85 		}
86 
87 		ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src,
88 				      BTREE_ITER_intent);
89 		if (ret)
90 			goto err;
91 
92 		if (new_inode->bi_subvol != snapshot_src.subvol) {
93 			/* Not a subvolume root: */
94 			ret = -EINVAL;
95 			goto err;
96 		}
97 
98 		/*
99 		 * If we're not root, we have to own the subvolume being
100 		 * snapshotted:
101 		 */
102 		if (uid && new_inode->bi_uid != uid) {
103 			ret = -EPERM;
104 			goto err;
105 		}
106 
107 		flags |= BCH_CREATE_SUBVOL;
108 	}
109 
110 	new_inum.inum	= new_inode->bi_inum;
111 	dir_target	= new_inode->bi_inum;
112 
113 	if (flags & BCH_CREATE_SUBVOL) {
114 		u32 new_subvol, dir_snapshot;
115 
116 		ret = bch2_subvolume_create(trans, new_inode->bi_inum,
117 					    dir.subvol,
118 					    snapshot_src.subvol,
119 					    &new_subvol, &snapshot,
120 					    (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
121 		if (ret)
122 			goto err;
123 
124 		new_inode->bi_parent_subvol	= dir.subvol;
125 		new_inode->bi_subvol		= new_subvol;
126 		new_inum.subvol			= new_subvol;
127 		dir_target			= new_subvol;
128 		dir_type			= DT_SUBVOL;
129 
130 		ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot);
131 		if (ret)
132 			goto err;
133 
134 		bch2_btree_iter_set_snapshot(trans, &dir_iter, dir_snapshot);
135 		ret = bch2_btree_iter_traverse(trans, &dir_iter);
136 		if (ret)
137 			goto err;
138 	}
139 
140 	if (!(flags & BCH_CREATE_SNAPSHOT)) {
141 		if (default_acl) {
142 			ret = bch2_set_acl_trans(trans, new_inum, new_inode,
143 						 default_acl, ACL_TYPE_DEFAULT);
144 			if (ret)
145 				goto err;
146 		}
147 
148 		if (acl) {
149 			ret = bch2_set_acl_trans(trans, new_inum, new_inode,
150 						 acl, ACL_TYPE_ACCESS);
151 			if (ret)
152 				goto err;
153 		}
154 	}
155 
156 	if (!(flags & BCH_CREATE_TMPFILE)) {
157 		struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
158 		u64 dir_offset;
159 
160 		if (is_subdir_for_nlink(new_inode))
161 			dir_u->bi_nlink++;
162 		dir_u->bi_mtime = dir_u->bi_ctime = now;
163 
164 		ret =   bch2_dirent_create(trans, dir, &dir_hash,
165 					   dir_type,
166 					   name,
167 					   dir_target,
168 					   &dir_offset,
169 					   STR_HASH_must_create|BTREE_ITER_with_updates) ?:
170 			bch2_inode_write(trans, &dir_iter, dir_u);
171 		if (ret)
172 			goto err;
173 
174 		new_inode->bi_dir		= dir_u->bi_inum;
175 		new_inode->bi_dir_offset	= dir_offset;
176 	}
177 
178 	if (S_ISDIR(mode) &&
179 	    !new_inode->bi_subvol)
180 		new_inode->bi_depth = dir_u->bi_depth + 1;
181 
182 	inode_iter.flags &= ~BTREE_ITER_all_snapshots;
183 	bch2_btree_iter_set_snapshot(trans, &inode_iter, snapshot);
184 
185 	ret   = bch2_btree_iter_traverse(trans, &inode_iter) ?:
186 		bch2_inode_write(trans, &inode_iter, new_inode);
187 err:
188 	bch2_trans_iter_exit(trans, &inode_iter);
189 	bch2_trans_iter_exit(trans, &dir_iter);
190 	return ret;
191 }
192 
193 int bch2_link_trans(struct btree_trans *trans,
194 		    subvol_inum dir,  struct bch_inode_unpacked *dir_u,
195 		    subvol_inum inum, struct bch_inode_unpacked *inode_u,
196 		    const struct qstr *name)
197 {
198 	struct bch_fs *c = trans->c;
199 	struct btree_iter dir_iter = {};
200 	struct btree_iter inode_iter = {};
201 	struct bch_hash_info dir_hash;
202 	u64 now = bch2_current_time(c);
203 	u64 dir_offset = 0;
204 	int ret;
205 
206 	if (dir.subvol != inum.subvol)
207 		return -EXDEV;
208 
209 	ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_intent);
210 	if (ret)
211 		return ret;
212 
213 	inode_u->bi_ctime = now;
214 	ret = bch2_inode_nlink_inc(inode_u);
215 	if (ret)
216 		goto err;
217 
218 	ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
219 	if (ret)
220 		goto err;
221 
222 	if (bch2_reinherit_attrs(inode_u, dir_u)) {
223 		ret = -EXDEV;
224 		goto err;
225 	}
226 
227 	dir_u->bi_mtime = dir_u->bi_ctime = now;
228 
229 	dir_hash = bch2_hash_info_init(c, dir_u);
230 
231 	ret = bch2_dirent_create(trans, dir, &dir_hash,
232 				 mode_to_type(inode_u->bi_mode),
233 				 name, inum.inum,
234 				 &dir_offset,
235 				 STR_HASH_must_create);
236 	if (ret)
237 		goto err;
238 
239 	inode_u->bi_dir		= dir.inum;
240 	inode_u->bi_dir_offset	= dir_offset;
241 
242 	ret =   bch2_inode_write(trans, &dir_iter, dir_u) ?:
243 		bch2_inode_write(trans, &inode_iter, inode_u);
244 err:
245 	bch2_trans_iter_exit(trans, &dir_iter);
246 	bch2_trans_iter_exit(trans, &inode_iter);
247 	return ret;
248 }
249 
250 int bch2_unlink_trans(struct btree_trans *trans,
251 		      subvol_inum dir,
252 		      struct bch_inode_unpacked *dir_u,
253 		      struct bch_inode_unpacked *inode_u,
254 		      const struct qstr *name,
255 		      bool deleting_subvol)
256 {
257 	struct bch_fs *c = trans->c;
258 	struct btree_iter dir_iter = {};
259 	struct btree_iter dirent_iter = {};
260 	struct btree_iter inode_iter = {};
261 	struct bch_hash_info dir_hash;
262 	subvol_inum inum;
263 	u64 now = bch2_current_time(c);
264 	struct bkey_s_c k;
265 	int ret;
266 
267 	ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
268 	if (ret)
269 		goto err;
270 
271 	dir_hash = bch2_hash_info_init(c, dir_u);
272 
273 	ret = bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
274 				       name, &inum, BTREE_ITER_intent);
275 	if (ret)
276 		goto err;
277 
278 	ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum,
279 			      BTREE_ITER_intent);
280 	if (ret)
281 		goto err;
282 
283 	if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
284 		ret = bch2_empty_dir_trans(trans, inum);
285 		if (ret)
286 			goto err;
287 	}
288 
289 	if (deleting_subvol && !inode_u->bi_subvol) {
290 		ret = -BCH_ERR_ENOENT_not_subvol;
291 		goto err;
292 	}
293 
294 	if (inode_u->bi_subvol) {
295 		/* Recursive subvolume destroy not allowed (yet?) */
296 		ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
297 		if (ret)
298 			goto err;
299 	}
300 
301 	if (deleting_subvol || inode_u->bi_subvol) {
302 		ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
303 		if (ret)
304 			goto err;
305 
306 		k = bch2_btree_iter_peek_slot(trans, &dirent_iter);
307 		ret = bkey_err(k);
308 		if (ret)
309 			goto err;
310 
311 		/*
312 		 * If we're deleting a subvolume, we need to really delete the
313 		 * dirent, not just emit a whiteout in the current snapshot:
314 		 */
315 		bch2_btree_iter_set_snapshot(trans, &dirent_iter, k.k->p.snapshot);
316 		ret = bch2_btree_iter_traverse(trans, &dirent_iter);
317 		if (ret)
318 			goto err;
319 	} else {
320 		bch2_inode_nlink_dec(trans, inode_u);
321 	}
322 
323 	if (inode_u->bi_dir		== dirent_iter.pos.inode &&
324 	    inode_u->bi_dir_offset	== dirent_iter.pos.offset) {
325 		inode_u->bi_dir		= 0;
326 		inode_u->bi_dir_offset	= 0;
327 	}
328 
329 	dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
330 	dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
331 
332 	ret =   bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
333 				    &dir_hash, &dirent_iter,
334 				    BTREE_UPDATE_internal_snapshot_node) ?:
335 		bch2_inode_write(trans, &dir_iter, dir_u) ?:
336 		bch2_inode_write(trans, &inode_iter, inode_u);
337 err:
338 	bch2_trans_iter_exit(trans, &inode_iter);
339 	bch2_trans_iter_exit(trans, &dirent_iter);
340 	bch2_trans_iter_exit(trans, &dir_iter);
341 	return ret;
342 }
343 
344 bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
345 			  struct bch_inode_unpacked *src_u)
346 {
347 	u64 src, dst;
348 	unsigned id;
349 	bool ret = false;
350 
351 	for (id = 0; id < Inode_opt_nr; id++) {
352 		if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold)
353 			continue;
354 
355 		/* Skip attributes that were explicitly set on this inode */
356 		if (dst_u->bi_fields_set & (1 << id))
357 			continue;
358 
359 		src = bch2_inode_opt_get(src_u, id);
360 		dst = bch2_inode_opt_get(dst_u, id);
361 
362 		if (src == dst)
363 			continue;
364 
365 		bch2_inode_opt_set(dst_u, id, src);
366 		ret = true;
367 	}
368 
369 	return ret;
370 }
371 
372 static int subvol_update_parent(struct btree_trans *trans, u32 subvol, u32 new_parent)
373 {
374 	struct btree_iter iter;
375 	struct bkey_i_subvolume *s =
376 		bch2_bkey_get_mut_typed(trans, &iter,
377 			BTREE_ID_subvolumes, POS(0, subvol),
378 			BTREE_ITER_cached, subvolume);
379 	int ret = PTR_ERR_OR_ZERO(s);
380 	if (ret)
381 		return ret;
382 
383 	s->v.fs_path_parent = cpu_to_le32(new_parent);
384 	bch2_trans_iter_exit(trans, &iter);
385 	return 0;
386 }
387 
388 int bch2_rename_trans(struct btree_trans *trans,
389 		      subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u,
390 		      subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u,
391 		      struct bch_inode_unpacked *src_inode_u,
392 		      struct bch_inode_unpacked *dst_inode_u,
393 		      const struct qstr *src_name,
394 		      const struct qstr *dst_name,
395 		      enum bch_rename_mode mode)
396 {
397 	struct bch_fs *c = trans->c;
398 	struct btree_iter src_dir_iter = {};
399 	struct btree_iter dst_dir_iter = {};
400 	struct btree_iter src_inode_iter = {};
401 	struct btree_iter dst_inode_iter = {};
402 	struct bch_hash_info src_hash, dst_hash;
403 	subvol_inum src_inum, dst_inum;
404 	u64 src_offset, dst_offset;
405 	u64 now = bch2_current_time(c);
406 	int ret;
407 
408 	ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir,
409 			      BTREE_ITER_intent);
410 	if (ret)
411 		goto err;
412 
413 	src_hash = bch2_hash_info_init(c, src_dir_u);
414 
415 	if (!subvol_inum_eq(dst_dir, src_dir)) {
416 		ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
417 				      BTREE_ITER_intent);
418 		if (ret)
419 			goto err;
420 
421 		dst_hash = bch2_hash_info_init(c, dst_dir_u);
422 	} else {
423 		dst_dir_u = src_dir_u;
424 		dst_hash = src_hash;
425 	}
426 
427 	ret = bch2_dirent_rename(trans,
428 				 src_dir, &src_hash, &src_dir_u->bi_size,
429 				 dst_dir, &dst_hash, &dst_dir_u->bi_size,
430 				 src_name, &src_inum, &src_offset,
431 				 dst_name, &dst_inum, &dst_offset,
432 				 mode);
433 	if (ret)
434 		goto err;
435 
436 	ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum,
437 			      BTREE_ITER_intent);
438 	if (ret)
439 		goto err;
440 
441 	if (dst_inum.inum) {
442 		ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum,
443 				      BTREE_ITER_intent);
444 		if (ret)
445 			goto err;
446 	}
447 
448 	if (src_inode_u->bi_subvol &&
449 	    dst_dir.subvol != src_inode_u->bi_parent_subvol) {
450 		ret = subvol_update_parent(trans, src_inode_u->bi_subvol, dst_dir.subvol);
451 		if (ret)
452 			goto err;
453 	}
454 
455 	if (mode == BCH_RENAME_EXCHANGE &&
456 	    dst_inode_u->bi_subvol &&
457 	    src_dir.subvol != dst_inode_u->bi_parent_subvol) {
458 		ret = subvol_update_parent(trans, dst_inode_u->bi_subvol, src_dir.subvol);
459 		if (ret)
460 			goto err;
461 	}
462 
463 	/* Can't move across subvolumes, unless it's a subvolume root: */
464 	if (src_dir.subvol != dst_dir.subvol &&
465 	    (!src_inode_u->bi_subvol ||
466 	     (dst_inum.inum && !dst_inode_u->bi_subvol))) {
467 		ret = -EXDEV;
468 		goto err;
469 	}
470 
471 	if (src_inode_u->bi_parent_subvol)
472 		src_inode_u->bi_parent_subvol = dst_dir.subvol;
473 
474 	if ((mode == BCH_RENAME_EXCHANGE) &&
475 	    dst_inode_u->bi_parent_subvol)
476 		dst_inode_u->bi_parent_subvol = src_dir.subvol;
477 
478 	src_inode_u->bi_dir		= dst_dir_u->bi_inum;
479 	src_inode_u->bi_dir_offset	= dst_offset;
480 
481 	if (mode == BCH_RENAME_EXCHANGE) {
482 		dst_inode_u->bi_dir		= src_dir_u->bi_inum;
483 		dst_inode_u->bi_dir_offset	= src_offset;
484 	}
485 
486 	if (mode == BCH_RENAME_OVERWRITE &&
487 	    dst_inode_u->bi_dir		== dst_dir_u->bi_inum &&
488 	    dst_inode_u->bi_dir_offset	== src_offset) {
489 		dst_inode_u->bi_dir		= 0;
490 		dst_inode_u->bi_dir_offset	= 0;
491 	}
492 
493 	if (mode == BCH_RENAME_OVERWRITE) {
494 		if (S_ISDIR(src_inode_u->bi_mode) !=
495 		    S_ISDIR(dst_inode_u->bi_mode)) {
496 			ret = -ENOTDIR;
497 			goto err;
498 		}
499 
500 		if (S_ISDIR(dst_inode_u->bi_mode)) {
501 			ret = bch2_empty_dir_trans(trans, dst_inum);
502 			if (ret)
503 				goto err;
504 		}
505 	}
506 
507 	if (!subvol_inum_eq(dst_dir, src_dir)) {
508 		if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
509 		    S_ISDIR(src_inode_u->bi_mode)) {
510 			ret = -EXDEV;
511 			goto err;
512 		}
513 
514 		if (mode == BCH_RENAME_EXCHANGE &&
515 		    bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
516 		    S_ISDIR(dst_inode_u->bi_mode)) {
517 			ret = -EXDEV;
518 			goto err;
519 		}
520 
521 		ret =   bch2_maybe_propagate_has_case_insensitive(trans, src_inum, src_inode_u) ?:
522 			(mode == BCH_RENAME_EXCHANGE
523 			 ? bch2_maybe_propagate_has_case_insensitive(trans, dst_inum, dst_inode_u)
524 			 : 0);
525 		if (ret)
526 			goto err;
527 
528 		if (is_subdir_for_nlink(src_inode_u)) {
529 			src_dir_u->bi_nlink--;
530 			dst_dir_u->bi_nlink++;
531 		}
532 
533 		if (S_ISDIR(src_inode_u->bi_mode) &&
534 		    !src_inode_u->bi_subvol)
535 			src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
536 
537 		if (mode == BCH_RENAME_EXCHANGE &&
538 		    S_ISDIR(dst_inode_u->bi_mode) &&
539 		    !dst_inode_u->bi_subvol)
540 			dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
541 	}
542 
543 	if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
544 		dst_dir_u->bi_nlink--;
545 		src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
546 	}
547 
548 	if (mode == BCH_RENAME_OVERWRITE)
549 		bch2_inode_nlink_dec(trans, dst_inode_u);
550 
551 	src_dir_u->bi_mtime		= now;
552 	src_dir_u->bi_ctime		= now;
553 
554 	if (src_dir.inum != dst_dir.inum) {
555 		dst_dir_u->bi_mtime	= now;
556 		dst_dir_u->bi_ctime	= now;
557 	}
558 
559 	src_inode_u->bi_ctime		= now;
560 
561 	if (dst_inum.inum)
562 		dst_inode_u->bi_ctime	= now;
563 
564 	ret =   bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
565 		(src_dir.inum != dst_dir.inum
566 		 ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
567 		 : 0) ?:
568 		bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
569 		(dst_inum.inum
570 		 ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
571 		 : 0);
572 err:
573 	bch2_trans_iter_exit(trans, &dst_inode_iter);
574 	bch2_trans_iter_exit(trans, &src_inode_iter);
575 	bch2_trans_iter_exit(trans, &dst_dir_iter);
576 	bch2_trans_iter_exit(trans, &src_dir_iter);
577 	return ret;
578 }
579 
580 /* inum_to_path */
581 
582 static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n)
583 {
584 	bch2_printbuf_make_room(out, n);
585 
586 	unsigned can_print = min(n, printbuf_remaining(out));
587 
588 	b += n;
589 
590 	for (unsigned i = 0; i < can_print; i++)
591 		out->buf[out->pos++] = *((char *) --b);
592 
593 	printbuf_nul_terminate(out);
594 }
595 
596 static inline void prt_str_reversed(struct printbuf *out, const char *s)
597 {
598 	prt_bytes_reversed(out, s, strlen(s));
599 }
600 
601 static inline void reverse_bytes(void *b, size_t n)
602 {
603 	char *e = b + n, *s = b;
604 
605 	while (s < e) {
606 		--e;
607 		swap(*s, *e);
608 		s++;
609 	}
610 }
611 
612 static int __bch2_inum_to_path(struct btree_trans *trans,
613 			       u32 subvol, u64 inum, u32 snapshot,
614 			       struct printbuf *path)
615 {
616 	unsigned orig_pos = path->pos;
617 	int ret = 0;
618 
619 	while (true) {
620 		if (!snapshot) {
621 			ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
622 			if (ret)
623 				goto disconnected;
624 		}
625 
626 		struct bch_inode_unpacked inode;
627 		ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0);
628 		if (ret)
629 			goto disconnected;
630 
631 		if (inode.bi_subvol == BCACHEFS_ROOT_SUBVOL &&
632 		    inode.bi_inum == BCACHEFS_ROOT_INO)
633 			break;
634 
635 		if (!inode.bi_dir && !inode.bi_dir_offset) {
636 			ret = -BCH_ERR_ENOENT_inode_no_backpointer;
637 			goto disconnected;
638 		}
639 
640 		inum = inode.bi_dir;
641 		if (inode.bi_parent_subvol) {
642 			subvol = inode.bi_parent_subvol;
643 			snapshot = 0;
644 		}
645 
646 		struct btree_iter d_iter;
647 		struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter,
648 				BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot),
649 				0, dirent);
650 		ret = bkey_err(d.s_c);
651 		if (ret)
652 			goto disconnected;
653 
654 		struct qstr dirent_name = bch2_dirent_get_name(d);
655 		prt_bytes_reversed(path, dirent_name.name, dirent_name.len);
656 
657 		prt_char(path, '/');
658 
659 		bch2_trans_iter_exit(trans, &d_iter);
660 	}
661 
662 	if (orig_pos == path->pos)
663 		prt_char(path, '/');
664 out:
665 	ret = path->allocation_failure ? -ENOMEM : 0;
666 	if (ret)
667 		goto err;
668 
669 	reverse_bytes(path->buf + orig_pos, path->pos - orig_pos);
670 	return 0;
671 err:
672 	return ret;
673 disconnected:
674 	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
675 		goto err;
676 
677 	prt_str_reversed(path, "(disconnected)");
678 	goto out;
679 }
680 
681 int bch2_inum_to_path(struct btree_trans *trans,
682 		      subvol_inum inum,
683 		      struct printbuf *path)
684 {
685 	return __bch2_inum_to_path(trans, inum.subvol, inum.inum, 0, path);
686 }
687 
688 int bch2_inum_snapshot_to_path(struct btree_trans *trans, u64 inum, u32 snapshot,
689 			       snapshot_id_list *snapshot_overwrites,
690 			       struct printbuf *path)
691 {
692 	return __bch2_inum_to_path(trans, 0, inum, snapshot, path);
693 }
694 
695 /* fsck */
696 
697 static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
698 					  struct bkey_s_c_dirent d,
699 					  struct bch_inode_unpacked *target,
700 					  bool in_fsck)
701 {
702 	struct bch_fs *c = trans->c;
703 	struct printbuf buf = PRINTBUF;
704 	struct btree_iter bp_iter = {};
705 	int ret = 0;
706 
707 	if (inode_points_to_dirent(target, d))
708 		return 0;
709 
710 	if (!target->bi_dir &&
711 	    !target->bi_dir_offset) {
712 		fsck_err_on(S_ISDIR(target->bi_mode),
713 			    trans, inode_dir_missing_backpointer,
714 			    "directory with missing backpointer\n%s",
715 			    (printbuf_reset(&buf),
716 			     bch2_bkey_val_to_text(&buf, c, d.s_c),
717 			     prt_printf(&buf, "\n"),
718 			     bch2_inode_unpacked_to_text(&buf, target),
719 			     buf.buf));
720 
721 		fsck_err_on(target->bi_flags & BCH_INODE_unlinked,
722 			    trans, inode_unlinked_but_has_dirent,
723 			    "inode unlinked but has dirent\n%s",
724 			    (printbuf_reset(&buf),
725 			     bch2_bkey_val_to_text(&buf, c, d.s_c),
726 			     prt_printf(&buf, "\n"),
727 			     bch2_inode_unpacked_to_text(&buf, target),
728 			     buf.buf));
729 
730 		target->bi_flags &= ~BCH_INODE_unlinked;
731 		target->bi_dir		= d.k->p.inode;
732 		target->bi_dir_offset	= d.k->p.offset;
733 		return __bch2_fsck_write_inode(trans, target);
734 	}
735 
736 	if (bch2_inode_should_have_single_bp(target) &&
737 	    !fsck_err(trans, inode_wrong_backpointer,
738 		      "dirent points to inode that does not point back:\n%s",
739 		      (bch2_bkey_val_to_text(&buf, c, d.s_c),
740 		       prt_newline(&buf),
741 		       bch2_inode_unpacked_to_text(&buf, target),
742 		       buf.buf)))
743 		goto err;
744 
745 	struct bkey_s_c_dirent bp_dirent =
746 		bch2_bkey_get_iter_typed(trans, &bp_iter, BTREE_ID_dirents,
747 			      SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot),
748 			      0, dirent);
749 	ret = bkey_err(bp_dirent);
750 	if (ret && !bch2_err_matches(ret, ENOENT))
751 		goto err;
752 
753 	bool backpointer_exists = !ret;
754 	ret = 0;
755 
756 	if (!backpointer_exists) {
757 		if (fsck_err(trans, inode_wrong_backpointer,
758 			     "inode %llu:%u has wrong backpointer:\n"
759 			     "got       %llu:%llu\n"
760 			     "should be %llu:%llu",
761 			     target->bi_inum, target->bi_snapshot,
762 			     target->bi_dir,
763 			     target->bi_dir_offset,
764 			     d.k->p.inode,
765 			     d.k->p.offset)) {
766 			target->bi_dir		= d.k->p.inode;
767 			target->bi_dir_offset	= d.k->p.offset;
768 			ret = __bch2_fsck_write_inode(trans, target);
769 		}
770 	} else {
771 		bch2_bkey_val_to_text(&buf, c, d.s_c);
772 		prt_newline(&buf);
773 		bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
774 
775 		if (S_ISDIR(target->bi_mode) || target->bi_subvol) {
776 			/*
777 			 * XXX: verify connectivity of the other dirent
778 			 * up to the root before removing this one
779 			 *
780 			 * Additionally, bch2_lookup would need to cope with the
781 			 * dirent it found being removed - or should we remove
782 			 * the other one, even though the inode points to it?
783 			 */
784 			if (in_fsck) {
785 				if (fsck_err(trans, inode_dir_multiple_links,
786 					     "%s %llu:%u with multiple links\n%s",
787 					     S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
788 					     target->bi_inum, target->bi_snapshot, buf.buf))
789 					ret = bch2_fsck_remove_dirent(trans, d.k->p);
790 			} else {
791 				bch2_fs_inconsistent(c,
792 						"%s %llu:%u with multiple links\n%s",
793 						S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
794 						target->bi_inum, target->bi_snapshot, buf.buf);
795 			}
796 
797 			goto out;
798 		} else {
799 			/*
800 			 * hardlinked file with nlink 0:
801 			 * We're just adjusting nlink here so check_nlinks() will pick
802 			 * it up, it ignores inodes with nlink 0
803 			 */
804 			if (fsck_err_on(!target->bi_nlink,
805 					trans, inode_multiple_links_but_nlink_0,
806 					"inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
807 					target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
808 				target->bi_nlink++;
809 				target->bi_flags &= ~BCH_INODE_unlinked;
810 				ret = __bch2_fsck_write_inode(trans, target);
811 				if (ret)
812 					goto err;
813 			}
814 		}
815 	}
816 out:
817 err:
818 fsck_err:
819 	bch2_trans_iter_exit(trans, &bp_iter);
820 	printbuf_exit(&buf);
821 	bch_err_fn(c, ret);
822 	return ret;
823 }
824 
825 int __bch2_check_dirent_target(struct btree_trans *trans,
826 			       struct btree_iter *dirent_iter,
827 			       struct bkey_s_c_dirent d,
828 			       struct bch_inode_unpacked *target,
829 			       bool in_fsck)
830 {
831 	struct bch_fs *c = trans->c;
832 	struct printbuf buf = PRINTBUF;
833 	int ret = 0;
834 
835 	ret = bch2_check_dirent_inode_dirent(trans, d, target, in_fsck);
836 	if (ret)
837 		goto err;
838 
839 	if (fsck_err_on(d.v->d_type != inode_d_type(target),
840 			trans, dirent_d_type_wrong,
841 			"incorrect d_type: got %s, should be %s:\n%s",
842 			bch2_d_type_str(d.v->d_type),
843 			bch2_d_type_str(inode_d_type(target)),
844 			(printbuf_reset(&buf),
845 			 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
846 		struct bkey_i_dirent *n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
847 		ret = PTR_ERR_OR_ZERO(n);
848 		if (ret)
849 			goto err;
850 
851 		bkey_reassemble(&n->k_i, d.s_c);
852 		n->v.d_type = inode_d_type(target);
853 		if (n->v.d_type == DT_SUBVOL) {
854 			n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
855 			n->v.d_child_subvol = cpu_to_le32(target->bi_subvol);
856 		} else {
857 			n->v.d_inum = cpu_to_le64(target->bi_inum);
858 		}
859 
860 		ret = bch2_trans_update(trans, dirent_iter, &n->k_i, 0);
861 		if (ret)
862 			goto err;
863 	}
864 err:
865 fsck_err:
866 	printbuf_exit(&buf);
867 	bch_err_fn(c, ret);
868 	return ret;
869 }
870 
871 /*
872  * BCH_INODE_has_case_insensitive:
873  * We have to track whether directories have any descendent directory that is
874  * casefolded - for overlayfs:
875  */
876 
877 static int bch2_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum)
878 {
879 	struct btree_iter iter = {};
880 	int ret = 0;
881 
882 	while (true) {
883 		struct bch_inode_unpacked inode;
884 		ret = bch2_inode_peek(trans, &iter, &inode, inum,
885 				      BTREE_ITER_intent|BTREE_ITER_with_updates);
886 		if (ret)
887 			break;
888 
889 		if (inode.bi_flags & BCH_INODE_has_case_insensitive)
890 			break;
891 
892 		inode.bi_flags |= BCH_INODE_has_case_insensitive;
893 		ret = bch2_inode_write(trans, &iter, &inode);
894 		if (ret)
895 			break;
896 
897 		bch2_trans_iter_exit(trans, &iter);
898 		if (subvol_inum_eq(inum, BCACHEFS_ROOT_SUBVOL_INUM))
899 			break;
900 
901 		inum = parent_inum(inum, &inode);
902 	}
903 
904 	bch2_trans_iter_exit(trans, &iter);
905 	return ret;
906 }
907 
908 int bch2_maybe_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum,
909 					      struct bch_inode_unpacked *inode)
910 {
911 	if (!bch2_inode_casefold(trans->c, inode))
912 		return 0;
913 
914 	inode->bi_flags |= BCH_INODE_has_case_insensitive;
915 
916 	return bch2_propagate_has_case_insensitive(trans, parent_inum(inum, inode));
917 }
918 
919 int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
920 					  struct bch_inode_unpacked *inode,
921 					  snapshot_id_list *snapshot_overwrites,
922 					  bool *do_update)
923 {
924 	struct printbuf buf = PRINTBUF;
925 	bool repairing_parents = false;
926 	int ret = 0;
927 
928 	if (!S_ISDIR(inode->bi_mode)) {
929 		/*
930 		 * Old versions set bi_casefold for non dirs, but that's
931 		 * unnecessary and wasteful
932 		 */
933 		if (inode->bi_casefold) {
934 			inode->bi_casefold = 0;
935 			*do_update = true;
936 		}
937 		return 0;
938 	}
939 
940 	if (trans->c->sb.version < bcachefs_metadata_version_inode_has_case_insensitive)
941 		return 0;
942 
943 	if (bch2_inode_casefold(trans->c, inode) &&
944 	    !(inode->bi_flags & BCH_INODE_has_case_insensitive)) {
945 		prt_printf(&buf, "casefolded dir with has_case_insensitive not set\ninum %llu:%u ",
946 			   inode->bi_inum, inode->bi_snapshot);
947 
948 		ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
949 						 snapshot_overwrites, &buf);
950 		if (ret)
951 			goto err;
952 
953 		if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) {
954 			inode->bi_flags |= BCH_INODE_has_case_insensitive;
955 			*do_update = true;
956 		}
957 	}
958 
959 	if (!(inode->bi_flags & BCH_INODE_has_case_insensitive))
960 		goto out;
961 
962 	struct bch_inode_unpacked dir = *inode;
963 	u32 snapshot = dir.bi_snapshot;
964 
965 	while (!(dir.bi_inum	== BCACHEFS_ROOT_INO &&
966 		 dir.bi_subvol	== BCACHEFS_ROOT_SUBVOL)) {
967 		if (dir.bi_parent_subvol) {
968 			ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot);
969 			if (ret)
970 				goto err;
971 
972 			snapshot_overwrites = NULL;
973 		}
974 
975 		ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0);
976 		if (ret)
977 			goto err;
978 
979 		if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) {
980 			prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n");
981 
982 			ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
983 							 snapshot_overwrites, &buf);
984 			if (ret)
985 				goto err;
986 
987 			if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) {
988 				dir.bi_flags |= BCH_INODE_has_case_insensitive;
989 				ret = __bch2_fsck_write_inode(trans, &dir);
990 				if (ret)
991 					goto err;
992 			}
993 		}
994 
995 		/*
996 		 * We only need to check the first parent, unless we find an
997 		 * inconsistency
998 		 */
999 		if (!repairing_parents)
1000 			break;
1001 	}
1002 out:
1003 err:
1004 fsck_err:
1005 	printbuf_exit(&buf);
1006 	if (ret)
1007 		return ret;
1008 
1009 	if (repairing_parents) {
1010 		return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
1011 			-BCH_ERR_transaction_restart_nested;
1012 	}
1013 
1014 	return 0;
1015 }
1016