xref: /linux/fs/bcachefs/dirent.c (revision 5e63d579e752549dc256a952bcb35ade398ee921)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "bkey_buf.h"
5 #include "bkey_methods.h"
6 #include "btree_update.h"
7 #include "extents.h"
8 #include "dirent.h"
9 #include "fs.h"
10 #include "keylist.h"
11 #include "str_hash.h"
12 #include "subvolume.h"
13 
14 #include <linux/dcache.h>
15 
16 int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
17 		  const struct qstr *str, struct qstr *out_cf)
18 {
19 	*out_cf = (struct qstr) QSTR_INIT(NULL, 0);
20 
21 #ifdef CONFIG_UNICODE
22 	unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
23 	int ret = PTR_ERR_OR_ZERO(buf);
24 	if (ret)
25 		return ret;
26 
27 	ret = utf8_casefold(info->cf_encoding, str, buf, BCH_NAME_MAX + 1);
28 	if (ret <= 0)
29 		return ret;
30 
31 	*out_cf = (struct qstr) QSTR_INIT(buf, ret);
32 	return 0;
33 #else
34 	return -EOPNOTSUPP;
35 #endif
36 }
37 
38 static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
39 {
40 	if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
41 		return 0;
42 
43 	unsigned bkey_u64s = bkey_val_u64s(d.k);
44 	unsigned bkey_bytes = bkey_u64s * sizeof(u64);
45 	u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1];
46 #if CPU_BIG_ENDIAN
47 	unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8;
48 #else
49 	unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8;
50 #endif
51 
52 	return bkey_bytes -
53 		(d.v->d_casefold
54 		? offsetof(struct bch_dirent, d_cf_name_block.d_names)
55 		: offsetof(struct bch_dirent, d_name)) -
56 		trailing_nuls;
57 }
58 
59 struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
60 {
61 	if (d.v->d_casefold) {
62 		unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
63 		return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[0], name_len);
64 	} else {
65 		return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
66 	}
67 }
68 
69 static struct qstr bch2_dirent_get_casefold_name(struct bkey_s_c_dirent d)
70 {
71 	if (d.v->d_casefold) {
72 		unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
73 		unsigned cf_name_len = le16_to_cpu(d.v->d_cf_name_block.d_cf_name_len);
74 		return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[name_len], cf_name_len);
75 	} else {
76 		return (struct qstr) QSTR_INIT(NULL, 0);
77 	}
78 }
79 
80 static inline struct qstr bch2_dirent_get_lookup_name(struct bkey_s_c_dirent d)
81 {
82 	return d.v->d_casefold
83 		? bch2_dirent_get_casefold_name(d)
84 		: bch2_dirent_get_name(d);
85 }
86 
87 static u64 bch2_dirent_hash(const struct bch_hash_info *info,
88 			    const struct qstr *name)
89 {
90 	struct bch_str_hash_ctx ctx;
91 
92 	bch2_str_hash_init(&ctx, info);
93 	bch2_str_hash_update(&ctx, info, name->name, name->len);
94 
95 	/* [0,2) reserved for dots */
96 	return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
97 }
98 
99 static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
100 {
101 	return bch2_dirent_hash(info, key);
102 }
103 
104 static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
105 {
106 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
107 	struct qstr name = bch2_dirent_get_lookup_name(d);
108 
109 	return bch2_dirent_hash(info, &name);
110 }
111 
112 static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
113 {
114 	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
115 	const struct qstr l_name = bch2_dirent_get_lookup_name(l);
116 	const struct qstr *r_name = _r;
117 
118 	return !qstr_eq(l_name, *r_name);
119 }
120 
121 static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
122 {
123 	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
124 	struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
125 	const struct qstr l_name = bch2_dirent_get_lookup_name(l);
126 	const struct qstr r_name = bch2_dirent_get_lookup_name(r);
127 
128 	return !qstr_eq(l_name, r_name);
129 }
130 
131 static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k)
132 {
133 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
134 
135 	if (d.v->d_type == DT_SUBVOL)
136 		return le32_to_cpu(d.v->d_parent_subvol) == inum.subvol;
137 	return true;
138 }
139 
140 const struct bch_hash_desc bch2_dirent_hash_desc = {
141 	.btree_id	= BTREE_ID_dirents,
142 	.key_type	= KEY_TYPE_dirent,
143 	.hash_key	= dirent_hash_key,
144 	.hash_bkey	= dirent_hash_bkey,
145 	.cmp_key	= dirent_cmp_key,
146 	.cmp_bkey	= dirent_cmp_bkey,
147 	.is_visible	= dirent_is_visible,
148 };
149 
150 int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k,
151 			 struct bkey_validate_context from)
152 {
153 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
154 	unsigned name_block_len = bch2_dirent_name_bytes(d);
155 	struct qstr d_name = bch2_dirent_get_name(d);
156 	struct qstr d_cf_name = bch2_dirent_get_casefold_name(d);
157 	int ret = 0;
158 
159 	bkey_fsck_err_on(!d_name.len,
160 			 c, dirent_empty_name,
161 			 "empty name");
162 
163 	bkey_fsck_err_on(d_name.len + d_cf_name.len > name_block_len,
164 			 c, dirent_val_too_big,
165 			 "dirent names exceed bkey size (%d + %d > %d)",
166 			 d_name.len, d_cf_name.len, name_block_len);
167 
168 	/*
169 	 * Check new keys don't exceed the max length
170 	 * (older keys may be larger.)
171 	 */
172 	bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX,
173 			 c, dirent_name_too_long,
174 			 "dirent name too big (%u > %u)",
175 			 d_name.len, BCH_NAME_MAX);
176 
177 	bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len),
178 			 c, dirent_name_embedded_nul,
179 			 "dirent has stray data after name's NUL");
180 
181 	bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) ||
182 			 (d_name.len == 2 && !memcmp(d_name.name, "..", 2)),
183 			 c, dirent_name_dot_or_dotdot,
184 			 "invalid name");
185 
186 	bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len),
187 			 c, dirent_name_has_slash,
188 			 "name with /");
189 
190 	bkey_fsck_err_on(d.v->d_type != DT_SUBVOL &&
191 			 le64_to_cpu(d.v->d_inum) == d.k->p.inode,
192 			 c, dirent_to_itself,
193 			 "dirent points to own directory");
194 
195 	if (d.v->d_casefold) {
196 		bkey_fsck_err_on(from.from == BKEY_VALIDATE_commit &&
197 				 d_cf_name.len > BCH_NAME_MAX,
198 				 c, dirent_cf_name_too_big,
199 				 "dirent w/ cf name too big (%u > %u)",
200 				 d_cf_name.len, BCH_NAME_MAX);
201 
202 		bkey_fsck_err_on(d_cf_name.len != strnlen(d_cf_name.name, d_cf_name.len),
203 				 c, dirent_stray_data_after_cf_name,
204 				 "dirent has stray data after cf name's NUL");
205 	}
206 fsck_err:
207 	return ret;
208 }
209 
210 void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
211 {
212 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
213 	struct qstr d_name = bch2_dirent_get_name(d);
214 
215 	prt_printf(out, "%.*s -> ", d_name.len, d_name.name);
216 
217 	if (d.v->d_type != DT_SUBVOL)
218 		prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum));
219 	else
220 		prt_printf(out, "%u -> %u",
221 			   le32_to_cpu(d.v->d_parent_subvol),
222 			   le32_to_cpu(d.v->d_child_subvol));
223 
224 	prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
225 }
226 
227 static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans,
228 				subvol_inum dir,
229 				u8 type,
230 				int name_len, int cf_name_len,
231 				u64 dst)
232 {
233 	struct bkey_i_dirent *dirent;
234 	unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len, cf_name_len);
235 
236 	BUG_ON(u64s > U8_MAX);
237 
238 	dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
239 	if (IS_ERR(dirent))
240 		return dirent;
241 
242 	bkey_dirent_init(&dirent->k_i);
243 	dirent->k.u64s = u64s;
244 
245 	if (type != DT_SUBVOL) {
246 		dirent->v.d_inum = cpu_to_le64(dst);
247 	} else {
248 		dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol);
249 		dirent->v.d_child_subvol = cpu_to_le32(dst);
250 	}
251 
252 	dirent->v.d_type = type;
253 	dirent->v.d_unused = 0;
254 	dirent->v.d_casefold = cf_name_len ? 1 : 0;
255 
256 	return dirent;
257 }
258 
259 static void dirent_init_regular_name(struct bkey_i_dirent *dirent,
260 				     const struct qstr *name)
261 {
262 	EBUG_ON(dirent->v.d_casefold);
263 
264 	memcpy(&dirent->v.d_name[0], name->name, name->len);
265 	memset(&dirent->v.d_name[name->len], 0,
266 		bkey_val_bytes(&dirent->k) -
267 		offsetof(struct bch_dirent, d_name) -
268 		name->len);
269 }
270 
271 static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
272 					const struct qstr *name,
273 					const struct qstr *cf_name)
274 {
275 	EBUG_ON(!dirent->v.d_casefold);
276 	EBUG_ON(!cf_name->len);
277 
278 	dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len);
279 	dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_name->len);
280 	memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
281 	memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len);
282 	memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0,
283 		bkey_val_bytes(&dirent->k) -
284 		offsetof(struct bch_dirent, d_cf_name_block.d_names) -
285 		name->len + cf_name->len);
286 
287 	EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_name->len);
288 }
289 
290 static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
291 				subvol_inum dir,
292 				u8 type,
293 				const struct qstr *name,
294 				const struct qstr *cf_name,
295 				u64 dst)
296 {
297 	struct bkey_i_dirent *dirent;
298 
299 	if (name->len > BCH_NAME_MAX)
300 		return ERR_PTR(-ENAMETOOLONG);
301 
302 	dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst);
303 	if (IS_ERR(dirent))
304 		return dirent;
305 
306 	if (cf_name)
307 		dirent_init_casefolded_name(dirent, name, cf_name);
308 	else
309 		dirent_init_regular_name(dirent, name);
310 
311 	EBUG_ON(bch2_dirent_get_name(dirent_i_to_s_c(dirent)).len != name->len);
312 
313 	return dirent;
314 }
315 
316 int bch2_dirent_create_snapshot(struct btree_trans *trans,
317 			u32 dir_subvol, u64 dir, u32 snapshot,
318 			const struct bch_hash_info *hash_info,
319 			u8 type, const struct qstr *name, u64 dst_inum,
320 			u64 *dir_offset,
321 			enum btree_iter_update_trigger_flags flags)
322 {
323 	subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir };
324 	struct bkey_i_dirent *dirent;
325 	int ret;
326 
327 	dirent = dirent_create_key(trans, dir_inum, type, name, NULL, dst_inum);
328 	ret = PTR_ERR_OR_ZERO(dirent);
329 	if (ret)
330 		return ret;
331 
332 	dirent->k.p.inode	= dir;
333 	dirent->k.p.snapshot	= snapshot;
334 
335 	ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
336 					dir_inum, snapshot, &dirent->k_i,
337 					flags|BTREE_UPDATE_internal_snapshot_node);
338 	*dir_offset = dirent->k.p.offset;
339 
340 	return ret;
341 }
342 
343 int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
344 		       const struct bch_hash_info *hash_info,
345 		       u8 type, const struct qstr *name, u64 dst_inum,
346 		       u64 *dir_offset,
347 		       u64 *i_size,
348 		       enum btree_iter_update_trigger_flags flags)
349 {
350 	struct bkey_i_dirent *dirent;
351 	int ret;
352 
353 	if (hash_info->cf_encoding) {
354 		struct qstr cf_name;
355 		ret = bch2_casefold(trans, hash_info, name, &cf_name);
356 		if (ret)
357 			return ret;
358 		dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum);
359 	} else {
360 		dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum);
361 	}
362 
363 	ret = PTR_ERR_OR_ZERO(dirent);
364 	if (ret)
365 		return ret;
366 
367 	*i_size += bkey_bytes(&dirent->k);
368 
369 	ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
370 			    dir, &dirent->k_i, flags);
371 	*dir_offset = dirent->k.p.offset;
372 
373 	return ret;
374 }
375 
376 int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
377 			    struct bkey_s_c_dirent d, subvol_inum *target)
378 {
379 	struct bch_subvolume s;
380 	int ret = 0;
381 
382 	if (d.v->d_type == DT_SUBVOL &&
383 	    le32_to_cpu(d.v->d_parent_subvol) != dir.subvol)
384 		return 1;
385 
386 	if (likely(d.v->d_type != DT_SUBVOL)) {
387 		target->subvol	= dir.subvol;
388 		target->inum	= le64_to_cpu(d.v->d_inum);
389 	} else {
390 		target->subvol	= le32_to_cpu(d.v->d_child_subvol);
391 
392 		ret = bch2_subvolume_get(trans, target->subvol, true, &s);
393 
394 		target->inum	= le64_to_cpu(s.inode);
395 	}
396 
397 	return ret;
398 }
399 
400 int bch2_dirent_rename(struct btree_trans *trans,
401 		subvol_inum src_dir, struct bch_hash_info *src_hash, u64 *src_dir_i_size,
402 		subvol_inum dst_dir, struct bch_hash_info *dst_hash, u64 *dst_dir_i_size,
403 		const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset,
404 		const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset,
405 		enum bch_rename_mode mode)
406 {
407 	struct qstr src_name_lookup, dst_name_lookup;
408 	struct btree_iter src_iter = {};
409 	struct btree_iter dst_iter = {};
410 	struct bkey_s_c old_src, old_dst = bkey_s_c_null;
411 	struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
412 	struct bpos dst_pos =
413 		POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
414 	unsigned src_update_flags = 0;
415 	bool delete_src, delete_dst;
416 	int ret = 0;
417 
418 	memset(src_inum, 0, sizeof(*src_inum));
419 	memset(dst_inum, 0, sizeof(*dst_inum));
420 
421 	/* Lookup src: */
422 	ret = bch2_maybe_casefold(trans, src_hash, src_name, &src_name_lookup);
423 	if (ret)
424 		goto out;
425 	old_src = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
426 				   src_hash, src_dir, &src_name_lookup,
427 				   BTREE_ITER_intent);
428 	ret = bkey_err(old_src);
429 	if (ret)
430 		goto out;
431 
432 	ret = bch2_dirent_read_target(trans, src_dir,
433 			bkey_s_c_to_dirent(old_src), src_inum);
434 	if (ret)
435 		goto out;
436 
437 	/* Lookup dst: */
438 	ret = bch2_maybe_casefold(trans, dst_hash, dst_name, &dst_name_lookup);
439 	if (ret)
440 		goto out;
441 	if (mode == BCH_RENAME) {
442 		/*
443 		 * Note that we're _not_ checking if the target already exists -
444 		 * we're relying on the VFS to do that check for us for
445 		 * correctness:
446 		 */
447 		ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
448 				     dst_hash, dst_dir, &dst_name_lookup);
449 		if (ret)
450 			goto out;
451 	} else {
452 		old_dst = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
453 					    dst_hash, dst_dir, &dst_name_lookup,
454 					    BTREE_ITER_intent);
455 		ret = bkey_err(old_dst);
456 		if (ret)
457 			goto out;
458 
459 		ret = bch2_dirent_read_target(trans, dst_dir,
460 				bkey_s_c_to_dirent(old_dst), dst_inum);
461 		if (ret)
462 			goto out;
463 	}
464 
465 	if (mode != BCH_RENAME_EXCHANGE)
466 		*src_offset = dst_iter.pos.offset;
467 
468 	/* Create new dst key: */
469 	new_dst = dirent_create_key(trans, dst_dir, 0, dst_name,
470 				    dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
471 	ret = PTR_ERR_OR_ZERO(new_dst);
472 	if (ret)
473 		goto out;
474 
475 	dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
476 	new_dst->k.p = dst_iter.pos;
477 
478 	/* Create new src key: */
479 	if (mode == BCH_RENAME_EXCHANGE) {
480 		new_src = dirent_create_key(trans, src_dir, 0, src_name,
481 					    src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
482 		ret = PTR_ERR_OR_ZERO(new_src);
483 		if (ret)
484 			goto out;
485 
486 		dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
487 		new_src->k.p = src_iter.pos;
488 	} else {
489 		new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
490 		ret = PTR_ERR_OR_ZERO(new_src);
491 		if (ret)
492 			goto out;
493 
494 		bkey_init(&new_src->k);
495 		new_src->k.p = src_iter.pos;
496 
497 		if (bkey_le(dst_pos, src_iter.pos) &&
498 		    bkey_lt(src_iter.pos, dst_iter.pos)) {
499 			/*
500 			 * We have a hash collision for the new dst key,
501 			 * and new_src - the key we're deleting - is between
502 			 * new_dst's hashed slot and the slot we're going to be
503 			 * inserting it into - oops.  This will break the hash
504 			 * table if we don't deal with it:
505 			 */
506 			if (mode == BCH_RENAME) {
507 				/*
508 				 * If we're not overwriting, we can just insert
509 				 * new_dst at the src position:
510 				 */
511 				new_src = new_dst;
512 				new_src->k.p = src_iter.pos;
513 				goto out_set_src;
514 			} else {
515 				/* If we're overwriting, we can't insert new_dst
516 				 * at a different slot because it has to
517 				 * overwrite old_dst - just make sure to use a
518 				 * whiteout when deleting src:
519 				 */
520 				new_src->k.type = KEY_TYPE_hash_whiteout;
521 			}
522 		} else {
523 			/* Check if we need a whiteout to delete src: */
524 			ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
525 						       src_hash, &src_iter);
526 			if (ret < 0)
527 				goto out;
528 
529 			if (ret)
530 				new_src->k.type = KEY_TYPE_hash_whiteout;
531 		}
532 	}
533 
534 	if (new_dst->v.d_type == DT_SUBVOL)
535 		new_dst->v.d_parent_subvol = cpu_to_le32(dst_dir.subvol);
536 
537 	if ((mode == BCH_RENAME_EXCHANGE) &&
538 	    new_src->v.d_type == DT_SUBVOL)
539 		new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol);
540 
541 	if (old_dst.k)
542 		*dst_dir_i_size -= bkey_bytes(old_dst.k);
543 	*src_dir_i_size -= bkey_bytes(old_src.k);
544 
545 	if (mode == BCH_RENAME_EXCHANGE)
546 		*src_dir_i_size += bkey_bytes(&new_src->k);
547 	*dst_dir_i_size += bkey_bytes(&new_dst->k);
548 
549 	ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
550 	if (ret)
551 		goto out;
552 out_set_src:
553 	/*
554 	 * If we're deleting a subvolume we need to really delete the dirent,
555 	 * not just emit a whiteout in the current snapshot - there can only be
556 	 * single dirent that points to a given subvolume.
557 	 *
558 	 * IOW, we don't maintain multiple versions in different snapshots of
559 	 * dirents that point to subvolumes - dirents that point to subvolumes
560 	 * are only visible in one particular subvolume so it's not necessary,
561 	 * and it would be particularly confusing for fsck to have to deal with.
562 	 */
563 	delete_src = bkey_s_c_to_dirent(old_src).v->d_type == DT_SUBVOL &&
564 		new_src->k.p.snapshot != old_src.k->p.snapshot;
565 
566 	delete_dst = old_dst.k &&
567 		bkey_s_c_to_dirent(old_dst).v->d_type == DT_SUBVOL &&
568 		new_dst->k.p.snapshot != old_dst.k->p.snapshot;
569 
570 	if (!delete_src || !bkey_deleted(&new_src->k)) {
571 		ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
572 		if (ret)
573 			goto out;
574 	}
575 
576 	if (delete_src) {
577 		bch2_btree_iter_set_snapshot(trans, &src_iter, old_src.k->p.snapshot);
578 		ret =   bch2_btree_iter_traverse(trans, &src_iter) ?:
579 			bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_internal_snapshot_node);
580 		if (ret)
581 			goto out;
582 	}
583 
584 	if (delete_dst) {
585 		bch2_btree_iter_set_snapshot(trans, &dst_iter, old_dst.k->p.snapshot);
586 		ret =   bch2_btree_iter_traverse(trans, &dst_iter) ?:
587 			bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_internal_snapshot_node);
588 		if (ret)
589 			goto out;
590 	}
591 
592 	if (mode == BCH_RENAME_EXCHANGE)
593 		*src_offset = new_src->k.p.offset;
594 	*dst_offset = new_dst->k.p.offset;
595 out:
596 	bch2_trans_iter_exit(trans, &src_iter);
597 	bch2_trans_iter_exit(trans, &dst_iter);
598 	return ret;
599 }
600 
601 int bch2_dirent_lookup_trans(struct btree_trans *trans,
602 			     struct btree_iter *iter,
603 			     subvol_inum dir,
604 			     const struct bch_hash_info *hash_info,
605 			     const struct qstr *name, subvol_inum *inum,
606 			     unsigned flags)
607 {
608 	struct qstr lookup_name;
609 	int ret = bch2_maybe_casefold(trans, hash_info, name, &lookup_name);
610 	if (ret)
611 		return ret;
612 
613 	struct bkey_s_c k = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
614 					     hash_info, dir, &lookup_name, flags);
615 	ret = bkey_err(k);
616 	if (ret)
617 		goto err;
618 
619 	ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), inum);
620 	if (ret > 0)
621 		ret = -ENOENT;
622 err:
623 	if (ret)
624 		bch2_trans_iter_exit(trans, iter);
625 	return ret;
626 }
627 
628 u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
629 		       const struct bch_hash_info *hash_info,
630 		       const struct qstr *name, subvol_inum *inum)
631 {
632 	struct btree_trans *trans = bch2_trans_get(c);
633 	struct btree_iter iter = {};
634 
635 	int ret = lockrestart_do(trans,
636 		bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
637 	bch2_trans_iter_exit(trans, &iter);
638 	bch2_trans_put(trans);
639 	return ret;
640 }
641 
642 int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 snapshot)
643 {
644 	struct btree_iter iter;
645 	struct bkey_s_c k;
646 	int ret;
647 
648 	for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents,
649 			   SPOS(dir, 0, snapshot),
650 			   POS(dir, U64_MAX), 0, k, ret)
651 		if (k.k->type == KEY_TYPE_dirent) {
652 			struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
653 			if (d.v->d_type == DT_SUBVOL && le32_to_cpu(d.v->d_parent_subvol) != subvol)
654 				continue;
655 			ret = -BCH_ERR_ENOTEMPTY_dir_not_empty;
656 			break;
657 		}
658 	bch2_trans_iter_exit(trans, &iter);
659 
660 	return ret;
661 }
662 
663 int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
664 {
665 	u32 snapshot;
666 
667 	return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?:
668 		bch2_empty_dir_snapshot(trans, dir.inum, dir.subvol, snapshot);
669 }
670 
671 static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subvol_inum target)
672 {
673 	struct qstr name = bch2_dirent_get_name(d);
674 	/*
675 	 * Although not required by the kernel code, updating ctx->pos is needed
676 	 * for the bcachefs FUSE driver. Without this update, the FUSE
677 	 * implementation will be stuck in an infinite loop when reading
678 	 * directories (via the bcachefs_fuse_readdir callback).
679 	 * In kernel space, ctx->pos is updated by the VFS code.
680 	 */
681 	ctx->pos = d.k->p.offset;
682 	bool ret = dir_emit(ctx, name.name,
683 		      name.len,
684 		      target.inum,
685 		      vfs_d_type(d.v->d_type));
686 	if (ret)
687 		ctx->pos = d.k->p.offset + 1;
688 	return !ret;
689 }
690 
691 int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
692 {
693 	struct bkey_buf sk;
694 	bch2_bkey_buf_init(&sk);
695 
696 	int ret = bch2_trans_run(c,
697 		for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents,
698 				   POS(inum.inum, ctx->pos),
699 				   POS(inum.inum, U64_MAX),
700 				   inum.subvol, 0, k, ({
701 			if (k.k->type != KEY_TYPE_dirent)
702 				continue;
703 
704 			/* dir_emit() can fault and block: */
705 			bch2_bkey_buf_reassemble(&sk, c, k);
706 			struct bkey_s_c_dirent dirent = bkey_i_to_s_c_dirent(sk.k);
707 
708 			subvol_inum target;
709 			int ret2 = bch2_dirent_read_target(trans, inum, dirent, &target);
710 			if (ret2 > 0)
711 				continue;
712 
713 			ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target));
714 		})));
715 
716 	bch2_bkey_buf_exit(&sk, c);
717 
718 	return ret < 0 ? ret : 0;
719 }
720 
721 /* fsck */
722 
723 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
724 			      struct bch_inode_unpacked *inode)
725 {
726 	struct btree_iter iter;
727 	struct bkey_s_c k;
728 	int ret;
729 
730 	for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
731 				     BTREE_ITER_all_snapshots, k, ret) {
732 		if (k.k->p.offset != inode_nr)
733 			break;
734 		if (!bkey_is_inode(k.k))
735 			continue;
736 		ret = bch2_inode_unpack(k, inode);
737 		goto found;
738 	}
739 	ret = -BCH_ERR_ENOENT_inode;
740 found:
741 	bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
742 	bch2_trans_iter_exit(trans, &iter);
743 	return ret;
744 }
745 
746 int bch2_fsck_remove_dirent(struct btree_trans *trans, struct bpos pos)
747 {
748 	struct bch_fs *c = trans->c;
749 	struct btree_iter iter;
750 	struct bch_inode_unpacked dir_inode;
751 	struct bch_hash_info dir_hash_info;
752 	int ret;
753 
754 	ret = lookup_first_inode(trans, pos.inode, &dir_inode);
755 	if (ret)
756 		goto err;
757 
758 	dir_hash_info = bch2_hash_info_init(c, &dir_inode);
759 
760 	bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent);
761 
762 	ret =   bch2_btree_iter_traverse(trans, &iter) ?:
763 		bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
764 				    &dir_hash_info, &iter,
765 				    BTREE_UPDATE_internal_snapshot_node);
766 	bch2_trans_iter_exit(trans, &iter);
767 err:
768 	bch_err_fn(c, ret);
769 	return ret;
770 }
771