1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "bkey_buf.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
11 #include "checksum.h"
12 #include "disk_accounting.h"
13 #include "error.h"
14 #include "progress.h"
15
16 #include <linux/mm.h>
17
bch2_backpointer_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)18 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
19 struct bkey_validate_context from)
20 {
21 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
22 int ret = 0;
23
24 bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH,
25 c, backpointer_level_bad,
26 "backpointer level bad: %u >= %u",
27 bp.v->level, BTREE_MAX_DEPTH);
28
29 bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID,
30 c, backpointer_dev_bad,
31 "backpointer for BCH_SB_MEMBER_INVALID");
32 fsck_err:
33 return ret;
34 }
35
bch2_backpointer_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)36 void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
37 {
38 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
39
40 rcu_read_lock();
41 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode);
42 if (ca) {
43 u32 bucket_offset;
44 struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset);
45 rcu_read_unlock();
46 prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset);
47 } else {
48 rcu_read_unlock();
49 prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT);
50 }
51
52 bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level);
53 prt_str(out, " data_type=");
54 bch2_prt_data_type(out, bp.v->data_type);
55 prt_printf(out, " suboffset=%u len=%u gen=%u pos=",
56 (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
57 bp.v->bucket_len,
58 bp.v->bucket_gen);
59 bch2_bpos_to_text(out, bp.v->pos);
60 }
61
bch2_backpointer_swab(struct bkey_s k)62 void bch2_backpointer_swab(struct bkey_s k)
63 {
64 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
65
66 bp.v->bucket_len = swab32(bp.v->bucket_len);
67 bch2_bpos_swab(&bp.v->pos);
68 }
69
extent_matches_bp(struct bch_fs * c,enum btree_id btree_id,unsigned level,struct bkey_s_c k,struct bkey_s_c_backpointer bp)70 static bool extent_matches_bp(struct bch_fs *c,
71 enum btree_id btree_id, unsigned level,
72 struct bkey_s_c k,
73 struct bkey_s_c_backpointer bp)
74 {
75 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
76 const union bch_extent_entry *entry;
77 struct extent_ptr_decoded p;
78
79 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
80 struct bkey_i_backpointer bp2;
81 bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp2);
82
83 if (bpos_eq(bp.k->p, bp2.k.p) &&
84 !memcmp(bp.v, &bp2.v, sizeof(bp2.v)))
85 return true;
86 }
87
88 return false;
89 }
90
backpointer_mod_err(struct btree_trans * trans,struct bkey_s_c orig_k,struct bkey_i_backpointer * new_bp,struct bkey_s_c found_bp,bool insert)91 static noinline int backpointer_mod_err(struct btree_trans *trans,
92 struct bkey_s_c orig_k,
93 struct bkey_i_backpointer *new_bp,
94 struct bkey_s_c found_bp,
95 bool insert)
96 {
97 struct bch_fs *c = trans->c;
98 struct printbuf buf = PRINTBUF;
99 int ret = 0;
100
101 if (insert) {
102 prt_printf(&buf, "existing backpointer found when inserting ");
103 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
104 prt_newline(&buf);
105 printbuf_indent_add(&buf, 2);
106
107 prt_printf(&buf, "found ");
108 bch2_bkey_val_to_text(&buf, c, found_bp);
109 prt_newline(&buf);
110
111 prt_printf(&buf, "for ");
112 bch2_bkey_val_to_text(&buf, c, orig_k);
113
114 bch_err(c, "%s", buf.buf);
115 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
116 prt_printf(&buf, "backpointer not found when deleting\n");
117 printbuf_indent_add(&buf, 2);
118
119 prt_printf(&buf, "searching for ");
120 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
121 prt_newline(&buf);
122
123 prt_printf(&buf, "got ");
124 bch2_bkey_val_to_text(&buf, c, found_bp);
125 prt_newline(&buf);
126
127 prt_printf(&buf, "for ");
128 bch2_bkey_val_to_text(&buf, c, orig_k);
129 }
130
131 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers &&
132 __bch2_inconsistent_error(c, &buf))
133 ret = -BCH_ERR_erofs_unfixed_errors;
134
135 bch_err(c, "%s", buf.buf);
136 printbuf_exit(&buf);
137 return ret;
138 }
139
bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans * trans,struct bkey_s_c orig_k,struct bkey_i_backpointer * bp,bool insert)140 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
141 struct bkey_s_c orig_k,
142 struct bkey_i_backpointer *bp,
143 bool insert)
144 {
145 struct btree_iter bp_iter;
146 struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
147 bp->k.p,
148 BTREE_ITER_intent|
149 BTREE_ITER_slots|
150 BTREE_ITER_with_updates);
151 int ret = bkey_err(k);
152 if (ret)
153 return ret;
154
155 if (insert
156 ? k.k->type
157 : (k.k->type != KEY_TYPE_backpointer ||
158 memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) {
159 ret = backpointer_mod_err(trans, orig_k, bp, k, insert);
160 if (ret)
161 goto err;
162 }
163
164 if (!insert) {
165 bp->k.type = KEY_TYPE_deleted;
166 set_bkey_val_u64s(&bp->k, 0);
167 }
168
169 ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0);
170 err:
171 bch2_trans_iter_exit(trans, &bp_iter);
172 return ret;
173 }
174
bch2_backpointer_del(struct btree_trans * trans,struct bpos pos)175 static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos)
176 {
177 return (likely(!bch2_backpointers_no_use_write_buffer)
178 ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos)
179 : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?:
180 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
181 }
182
bch2_backpointers_maybe_flush(struct btree_trans * trans,struct bkey_s_c visiting_k,struct bkey_buf * last_flushed)183 static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
184 struct bkey_s_c visiting_k,
185 struct bkey_buf *last_flushed)
186 {
187 return likely(!bch2_backpointers_no_use_write_buffer)
188 ? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed)
189 : 0;
190 }
191
backpointer_target_not_found(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct bkey_s_c target_k,struct bkey_buf * last_flushed,bool commit)192 static int backpointer_target_not_found(struct btree_trans *trans,
193 struct bkey_s_c_backpointer bp,
194 struct bkey_s_c target_k,
195 struct bkey_buf *last_flushed,
196 bool commit)
197 {
198 struct bch_fs *c = trans->c;
199 struct printbuf buf = PRINTBUF;
200 int ret = 0;
201
202 /*
203 * If we're using the btree write buffer, the backpointer we were
204 * looking at may have already been deleted - failure to find what it
205 * pointed to is not an error:
206 */
207 ret = last_flushed
208 ? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed)
209 : 0;
210 if (ret)
211 return ret;
212
213 prt_printf(&buf, "backpointer doesn't match %s it points to:\n",
214 bp.v->level ? "btree node" : "extent");
215 bch2_bkey_val_to_text(&buf, c, bp.s_c);
216
217 prt_newline(&buf);
218 bch2_bkey_val_to_text(&buf, c, target_k);
219
220 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k);
221 const union bch_extent_entry *entry;
222 struct extent_ptr_decoded p;
223 bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry)
224 if (p.ptr.dev == bp.k->p.inode) {
225 prt_newline(&buf);
226 struct bkey_i_backpointer bp2;
227 bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2);
228 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i));
229 }
230
231 if (fsck_err(trans, backpointer_to_missing_ptr,
232 "%s", buf.buf)) {
233 ret = bch2_backpointer_del(trans, bp.k->p);
234 if (ret || !commit)
235 goto out;
236
237 /*
238 * Normally, on transaction commit from inside a transaction,
239 * we'll return -BCH_ERR_transaction_restart_nested, since a
240 * transaction commit invalidates pointers given out by peek().
241 *
242 * However, since we're updating a write buffer btree, if we
243 * return a transaction restart and loop we won't see that the
244 * backpointer has been deleted without an additional write
245 * buffer flush - and those are expensive.
246 *
247 * So we're relying on the caller immediately advancing to the
248 * next backpointer and starting a new transaction immediately
249 * after backpointer_get_key() returns NULL:
250 */
251 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
252 }
253 out:
254 fsck_err:
255 printbuf_exit(&buf);
256 return ret;
257 }
258
__bch2_backpointer_get_node(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,struct bkey_buf * last_flushed,bool commit)259 static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans,
260 struct bkey_s_c_backpointer bp,
261 struct btree_iter *iter,
262 struct bkey_buf *last_flushed,
263 bool commit)
264 {
265 struct bch_fs *c = trans->c;
266
267 BUG_ON(!bp.v->level);
268
269 bch2_trans_node_iter_init(trans, iter,
270 bp.v->btree_id,
271 bp.v->pos,
272 0,
273 bp.v->level - 1,
274 0);
275 struct btree *b = bch2_btree_iter_peek_node(trans, iter);
276 if (IS_ERR_OR_NULL(b))
277 goto err;
278
279 BUG_ON(b->c.level != bp.v->level - 1);
280
281 if (extent_matches_bp(c, bp.v->btree_id, bp.v->level,
282 bkey_i_to_s_c(&b->key), bp))
283 return b;
284
285 if (btree_node_will_make_reachable(b)) {
286 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
287 } else {
288 int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key),
289 last_flushed, commit);
290 b = ret ? ERR_PTR(ret) : NULL;
291 }
292 err:
293 bch2_trans_iter_exit(trans, iter);
294 return b;
295 }
296
__bch2_backpointer_get_key(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,unsigned iter_flags,struct bkey_buf * last_flushed,bool commit)297 static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
298 struct bkey_s_c_backpointer bp,
299 struct btree_iter *iter,
300 unsigned iter_flags,
301 struct bkey_buf *last_flushed,
302 bool commit)
303 {
304 struct bch_fs *c = trans->c;
305
306 if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c)))
307 return bkey_s_c_null;
308
309 bch2_trans_node_iter_init(trans, iter,
310 bp.v->btree_id,
311 bp.v->pos,
312 0,
313 bp.v->level,
314 iter_flags);
315 struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
316 if (bkey_err(k)) {
317 bch2_trans_iter_exit(trans, iter);
318 return k;
319 }
320
321 /*
322 * peek_slot() doesn't normally return NULL - except when we ask for a
323 * key at a btree level that doesn't exist.
324 *
325 * We may want to revisit this and change peek_slot():
326 */
327 if (!k.k) {
328 bkey_init(&iter->k);
329 iter->k.p = bp.v->pos;
330 k.k = &iter->k;
331 }
332
333 if (k.k &&
334 extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp))
335 return k;
336
337 bch2_trans_iter_exit(trans, iter);
338
339 if (!bp.v->level) {
340 int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit);
341 return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
342 } else {
343 struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit);
344 if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
345 return bkey_s_c_null;
346 if (IS_ERR_OR_NULL(b))
347 return ((struct bkey_s_c) { .k = ERR_CAST(b) });
348
349 return bkey_i_to_s_c(&b->key);
350 }
351 }
352
bch2_backpointer_get_node(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,struct bkey_buf * last_flushed)353 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
354 struct bkey_s_c_backpointer bp,
355 struct btree_iter *iter,
356 struct bkey_buf *last_flushed)
357 {
358 return __bch2_backpointer_get_node(trans, bp, iter, last_flushed, true);
359 }
360
bch2_backpointer_get_key(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,unsigned iter_flags,struct bkey_buf * last_flushed)361 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
362 struct bkey_s_c_backpointer bp,
363 struct btree_iter *iter,
364 unsigned iter_flags,
365 struct bkey_buf *last_flushed)
366 {
367 return __bch2_backpointer_get_key(trans, bp, iter, iter_flags, last_flushed, true);
368 }
369
bch2_check_backpointer_has_valid_bucket(struct btree_trans * trans,struct bkey_s_c k,struct bkey_buf * last_flushed)370 static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k,
371 struct bkey_buf *last_flushed)
372 {
373 if (k.k->type != KEY_TYPE_backpointer)
374 return 0;
375
376 struct bch_fs *c = trans->c;
377 struct btree_iter alloc_iter = {};
378 struct bkey_s_c alloc_k;
379 struct printbuf buf = PRINTBUF;
380 int ret = 0;
381
382 struct bpos bucket;
383 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) {
384 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed);
385 if (ret)
386 goto out;
387
388 if (fsck_err(trans, backpointer_to_missing_device,
389 "backpointer for missing device:\n%s",
390 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
391 ret = bch2_backpointer_del(trans, k.k->p);
392 goto out;
393 }
394
395 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0);
396 ret = bkey_err(alloc_k);
397 if (ret)
398 goto out;
399
400 if (alloc_k.k->type != KEY_TYPE_alloc_v4) {
401 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed);
402 if (ret)
403 goto out;
404
405 if (fsck_err(trans, backpointer_to_missing_alloc,
406 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
407 alloc_iter.pos.inode, alloc_iter.pos.offset,
408 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
409 ret = bch2_backpointer_del(trans, k.k->p);
410 }
411 out:
412 fsck_err:
413 bch2_trans_iter_exit(trans, &alloc_iter);
414 printbuf_exit(&buf);
415 return ret;
416 }
417
418 /* verify that every backpointer has a corresponding alloc key */
bch2_check_btree_backpointers(struct bch_fs * c)419 int bch2_check_btree_backpointers(struct bch_fs *c)
420 {
421 struct bkey_buf last_flushed;
422 bch2_bkey_buf_init(&last_flushed);
423 bkey_init(&last_flushed.k->k);
424
425 int ret = bch2_trans_run(c,
426 for_each_btree_key_commit(trans, iter,
427 BTREE_ID_backpointers, POS_MIN, 0, k,
428 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
429 bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed)));
430
431 bch2_bkey_buf_exit(&last_flushed, c);
432 bch_err_fn(c, ret);
433 return ret;
434 }
435
436 struct extents_to_bp_state {
437 struct bpos bp_start;
438 struct bpos bp_end;
439 struct bkey_buf last_flushed;
440 };
441
drop_dev_and_update(struct btree_trans * trans,enum btree_id btree,struct bkey_s_c extent,unsigned dev)442 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
443 struct bkey_s_c extent, unsigned dev)
444 {
445 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
446 int ret = PTR_ERR_OR_ZERO(n);
447 if (ret)
448 return ret;
449
450 bch2_bkey_drop_device(bkey_i_to_s(n), dev);
451 return bch2_btree_insert_trans(trans, btree, n, 0);
452 }
453
check_extent_checksum(struct btree_trans * trans,enum btree_id btree,struct bkey_s_c extent,enum btree_id o_btree,struct bkey_s_c extent2,unsigned dev)454 static int check_extent_checksum(struct btree_trans *trans,
455 enum btree_id btree, struct bkey_s_c extent,
456 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
457 {
458 struct bch_fs *c = trans->c;
459 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
460 const union bch_extent_entry *entry;
461 struct extent_ptr_decoded p;
462 struct printbuf buf = PRINTBUF;
463 void *data_buf = NULL;
464 struct bio *bio = NULL;
465 size_t bytes;
466 int ret = 0;
467
468 if (bkey_is_btree_ptr(extent.k))
469 return false;
470
471 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
472 if (p.ptr.dev == dev)
473 goto found;
474 BUG();
475 found:
476 if (!p.crc.csum_type)
477 return false;
478
479 bytes = p.crc.compressed_size << 9;
480
481 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ);
482 if (!ca)
483 return false;
484
485 data_buf = kvmalloc(bytes, GFP_KERNEL);
486 if (!data_buf) {
487 ret = -ENOMEM;
488 goto err;
489 }
490
491 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
492 bio->bi_iter.bi_sector = p.ptr.offset;
493 bch2_bio_map(bio, data_buf, bytes);
494 ret = submit_bio_wait(bio);
495 if (ret)
496 goto err;
497
498 prt_printf(&buf, "extents pointing to same space, but first extent checksum bad:\n");
499 bch2_btree_id_to_text(&buf, btree);
500 prt_str(&buf, " ");
501 bch2_bkey_val_to_text(&buf, c, extent);
502 prt_newline(&buf);
503 bch2_btree_id_to_text(&buf, o_btree);
504 prt_str(&buf, " ");
505 bch2_bkey_val_to_text(&buf, c, extent2);
506
507 struct nonce nonce = extent_nonce(extent.k->bversion, p.crc);
508 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
509 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
510 trans, dup_backpointer_to_bad_csum_extent,
511 "%s", buf.buf))
512 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
513 fsck_err:
514 err:
515 if (bio)
516 bio_put(bio);
517 kvfree(data_buf);
518 percpu_ref_put(&ca->io_ref[READ]);
519 printbuf_exit(&buf);
520 return ret;
521 }
522
check_bp_exists(struct btree_trans * trans,struct extents_to_bp_state * s,struct bkey_i_backpointer * bp,struct bkey_s_c orig_k)523 static int check_bp_exists(struct btree_trans *trans,
524 struct extents_to_bp_state *s,
525 struct bkey_i_backpointer *bp,
526 struct bkey_s_c orig_k)
527 {
528 struct bch_fs *c = trans->c;
529 struct btree_iter other_extent_iter = {};
530 struct printbuf buf = PRINTBUF;
531
532 if (bpos_lt(bp->k.p, s->bp_start) ||
533 bpos_gt(bp->k.p, s->bp_end))
534 return 0;
535
536 struct btree_iter bp_iter;
537 struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0);
538 int ret = bkey_err(bp_k);
539 if (ret)
540 goto err;
541
542 if (bp_k.k->type != KEY_TYPE_backpointer ||
543 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) {
544 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
545 if (ret)
546 goto err;
547
548 goto check_existing_bp;
549 }
550 out:
551 err:
552 fsck_err:
553 bch2_trans_iter_exit(trans, &other_extent_iter);
554 bch2_trans_iter_exit(trans, &bp_iter);
555 printbuf_exit(&buf);
556 return ret;
557 check_existing_bp:
558 /* Do we have a backpointer for a different extent? */
559 if (bp_k.k->type != KEY_TYPE_backpointer)
560 goto missing;
561
562 struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k);
563
564 struct bkey_s_c other_extent =
565 __bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL, false);
566 ret = bkey_err(other_extent);
567 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
568 ret = 0;
569 if (ret)
570 goto err;
571
572 if (!other_extent.k)
573 goto missing;
574
575 rcu_read_lock();
576 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp->k.p.inode);
577 if (ca) {
578 struct bkey_ptrs_c other_extent_ptrs = bch2_bkey_ptrs_c(other_extent);
579 bkey_for_each_ptr(other_extent_ptrs, ptr)
580 if (ptr->dev == bp->k.p.inode &&
581 dev_ptr_stale_rcu(ca, ptr)) {
582 ret = drop_dev_and_update(trans, other_bp.v->btree_id,
583 other_extent, bp->k.p.inode);
584 if (ret)
585 goto err;
586 goto out;
587 }
588 }
589 rcu_read_unlock();
590
591 if (bch2_extents_match(orig_k, other_extent)) {
592 printbuf_reset(&buf);
593 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n");
594 bch2_bkey_val_to_text(&buf, c, orig_k);
595 prt_newline(&buf);
596 bch2_bkey_val_to_text(&buf, c, other_extent);
597 bch_err(c, "%s", buf.buf);
598
599 if (other_extent.k->size <= orig_k.k->size) {
600 ret = drop_dev_and_update(trans, other_bp.v->btree_id,
601 other_extent, bp->k.p.inode);
602 if (ret)
603 goto err;
604 goto out;
605 } else {
606 ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode);
607 if (ret)
608 goto err;
609 goto missing;
610 }
611 }
612
613 ret = check_extent_checksum(trans,
614 other_bp.v->btree_id, other_extent,
615 bp->v.btree_id, orig_k,
616 bp->k.p.inode);
617 if (ret < 0)
618 goto err;
619 if (ret) {
620 ret = 0;
621 goto missing;
622 }
623
624 ret = check_extent_checksum(trans, bp->v.btree_id, orig_k,
625 other_bp.v->btree_id, other_extent, bp->k.p.inode);
626 if (ret < 0)
627 goto err;
628 if (ret) {
629 ret = 0;
630 goto out;
631 }
632
633 printbuf_reset(&buf);
634 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n", bp->k.p.inode);
635 bch2_bkey_val_to_text(&buf, c, orig_k);
636 prt_newline(&buf);
637 bch2_bkey_val_to_text(&buf, c, other_extent);
638 bch_err(c, "%s", buf.buf);
639 ret = -BCH_ERR_fsck_repair_unimplemented;
640 goto err;
641 missing:
642 printbuf_reset(&buf);
643 prt_str(&buf, "missing backpointer\nfor: ");
644 bch2_bkey_val_to_text(&buf, c, orig_k);
645 prt_printf(&buf, "\nwant: ");
646 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
647 prt_printf(&buf, "\ngot: ");
648 bch2_bkey_val_to_text(&buf, c, bp_k);
649
650 if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
651 ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true);
652
653 goto out;
654 }
655
check_extent_to_backpointers(struct btree_trans * trans,struct extents_to_bp_state * s,enum btree_id btree,unsigned level,struct bkey_s_c k)656 static int check_extent_to_backpointers(struct btree_trans *trans,
657 struct extents_to_bp_state *s,
658 enum btree_id btree, unsigned level,
659 struct bkey_s_c k)
660 {
661 struct bch_fs *c = trans->c;
662 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
663 const union bch_extent_entry *entry;
664 struct extent_ptr_decoded p;
665
666 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
667 if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
668 continue;
669
670 rcu_read_lock();
671 struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
672 bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
673 bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty);
674
675 bool stale = p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr));
676 rcu_read_unlock();
677
678 if ((check || empty) && !stale) {
679 struct bkey_i_backpointer bp;
680 bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
681
682 int ret = check
683 ? check_bp_exists(trans, s, &bp, k)
684 : bch2_bucket_backpointer_mod(trans, k, &bp, true);
685 if (ret)
686 return ret;
687 }
688 }
689
690 return 0;
691 }
692
check_btree_root_to_backpointers(struct btree_trans * trans,struct extents_to_bp_state * s,enum btree_id btree_id,int * level)693 static int check_btree_root_to_backpointers(struct btree_trans *trans,
694 struct extents_to_bp_state *s,
695 enum btree_id btree_id,
696 int *level)
697 {
698 struct bch_fs *c = trans->c;
699 struct btree_iter iter;
700 struct btree *b;
701 struct bkey_s_c k;
702 int ret;
703 retry:
704 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
705 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
706 b = bch2_btree_iter_peek_node(trans, &iter);
707 ret = PTR_ERR_OR_ZERO(b);
708 if (ret)
709 goto err;
710
711 if (b != btree_node_root(c, b)) {
712 bch2_trans_iter_exit(trans, &iter);
713 goto retry;
714 }
715
716 *level = b->c.level;
717
718 k = bkey_i_to_s_c(&b->key);
719 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
720 err:
721 bch2_trans_iter_exit(trans, &iter);
722 return ret;
723 }
724
bp_to_bbpos(struct bch_backpointer bp)725 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
726 {
727 return (struct bbpos) {
728 .btree = bp.btree_id,
729 .pos = bp.pos,
730 };
731 }
732
mem_may_pin_bytes(struct bch_fs * c)733 static u64 mem_may_pin_bytes(struct bch_fs *c)
734 {
735 struct sysinfo i;
736 si_meminfo(&i);
737
738 u64 mem_bytes = i.totalram * i.mem_unit;
739 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
740 }
741
btree_nodes_fit_in_ram(struct bch_fs * c)742 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
743 {
744 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
745 }
746
bch2_get_btree_in_memory_pos(struct btree_trans * trans,u64 btree_leaf_mask,u64 btree_interior_mask,struct bbpos start,struct bbpos * end)747 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
748 u64 btree_leaf_mask,
749 u64 btree_interior_mask,
750 struct bbpos start, struct bbpos *end)
751 {
752 struct bch_fs *c = trans->c;
753 s64 mem_may_pin = mem_may_pin_bytes(c);
754 int ret = 0;
755
756 bch2_btree_cache_unpin(c);
757
758 btree_interior_mask |= btree_leaf_mask;
759
760 c->btree_cache.pinned_nodes_mask[0] = btree_leaf_mask;
761 c->btree_cache.pinned_nodes_mask[1] = btree_interior_mask;
762 c->btree_cache.pinned_nodes_start = start;
763 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
764
765 for (enum btree_id btree = start.btree;
766 btree < BTREE_ID_NR && !ret;
767 btree++) {
768 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1;
769
770 if (!(BIT_ULL(btree) & btree_leaf_mask) &&
771 !(BIT_ULL(btree) & btree_interior_mask))
772 continue;
773
774 ret = __for_each_btree_node(trans, iter, btree,
775 btree == start.btree ? start.pos : POS_MIN,
776 0, depth, BTREE_ITER_prefetch, b, ({
777 mem_may_pin -= btree_buf_bytes(b);
778 if (mem_may_pin <= 0) {
779 c->btree_cache.pinned_nodes_end = *end =
780 BBPOS(btree, b->key.k.p);
781 break;
782 }
783 bch2_node_pin(c, b);
784 0;
785 }));
786 }
787
788 return ret;
789 }
790
bch2_check_extents_to_backpointers_pass(struct btree_trans * trans,struct extents_to_bp_state * s)791 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
792 struct extents_to_bp_state *s)
793 {
794 struct bch_fs *c = trans->c;
795 struct progress_indicator_state progress;
796 int ret = 0;
797
798 bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
799
800 for (enum btree_id btree_id = 0;
801 btree_id < btree_id_nr_alive(c);
802 btree_id++) {
803 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
804
805 ret = commit_do(trans, NULL, NULL,
806 BCH_TRANS_COMMIT_no_enospc,
807 check_btree_root_to_backpointers(trans, s, btree_id, &level));
808 if (ret)
809 return ret;
810
811 while (level >= depth) {
812 struct btree_iter iter;
813 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level,
814 BTREE_ITER_prefetch);
815
816 ret = for_each_btree_key_continue(trans, iter, 0, k, ({
817 bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
818 check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
819 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
820 }));
821 if (ret)
822 return ret;
823
824 --level;
825 }
826 }
827
828 return 0;
829 }
830
831 enum alloc_sector_counter {
832 ALLOC_dirty,
833 ALLOC_cached,
834 ALLOC_stripe,
835 ALLOC_SECTORS_NR
836 };
837
data_type_to_alloc_counter(enum bch_data_type t)838 static int data_type_to_alloc_counter(enum bch_data_type t)
839 {
840 switch (t) {
841 case BCH_DATA_btree:
842 case BCH_DATA_user:
843 return ALLOC_dirty;
844 case BCH_DATA_cached:
845 return ALLOC_cached;
846 case BCH_DATA_stripe:
847 case BCH_DATA_parity:
848 return ALLOC_stripe;
849 default:
850 return -1;
851 }
852 }
853
854 static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
855
check_bucket_backpointer_mismatch(struct btree_trans * trans,struct bkey_s_c alloc_k,struct bkey_buf * last_flushed)856 static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
857 struct bkey_buf *last_flushed)
858 {
859 struct bch_fs *c = trans->c;
860 struct bch_alloc_v4 a_convert;
861 const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
862 bool need_commit = false;
863
864 if (a->data_type == BCH_DATA_sb ||
865 a->data_type == BCH_DATA_journal ||
866 a->data_type == BCH_DATA_parity)
867 return 0;
868
869 u32 sectors[ALLOC_SECTORS_NR];
870 memset(sectors, 0, sizeof(sectors));
871
872 struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p);
873 if (!ca)
874 return 0;
875
876 struct btree_iter iter;
877 struct bkey_s_c bp_k;
878 int ret = 0;
879 for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
880 bucket_pos_to_bp_start(ca, alloc_k.k->p),
881 bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
882 if (bp_k.k->type != KEY_TYPE_backpointer)
883 continue;
884
885 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
886
887 if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen &&
888 (bp.v->bucket_gen != a->gen ||
889 bp.v->pad)) {
890 ret = bch2_backpointer_del(trans, bp_k.k->p);
891 if (ret)
892 break;
893
894 need_commit = true;
895 continue;
896 }
897
898 if (bp.v->bucket_gen != a->gen)
899 continue;
900
901 int alloc_counter = data_type_to_alloc_counter(bp.v->data_type);
902 if (alloc_counter < 0)
903 continue;
904
905 sectors[alloc_counter] += bp.v->bucket_len;
906 };
907 bch2_trans_iter_exit(trans, &iter);
908 if (ret)
909 goto err;
910
911 if (need_commit) {
912 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
913 if (ret)
914 goto err;
915 }
916
917 if (sectors[ALLOC_dirty] != a->dirty_sectors ||
918 sectors[ALLOC_cached] != a->cached_sectors ||
919 sectors[ALLOC_stripe] != a->stripe_sectors) {
920 if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
921 ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
922 if (ret)
923 goto err;
924 }
925
926 if (sectors[ALLOC_dirty] > a->dirty_sectors ||
927 sectors[ALLOC_cached] > a->cached_sectors ||
928 sectors[ALLOC_stripe] > a->stripe_sectors) {
929 ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
930 -BCH_ERR_transaction_restart_nested;
931 goto err;
932 }
933
934 if (!sectors[ALLOC_dirty] &&
935 !sectors[ALLOC_stripe] &&
936 !sectors[ALLOC_cached])
937 __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty);
938 else
939 __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
940 }
941 err:
942 bch2_dev_put(ca);
943 return ret;
944 }
945
backpointer_node_has_missing(struct bch_fs * c,struct bkey_s_c k)946 static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
947 {
948 switch (k.k->type) {
949 case KEY_TYPE_btree_ptr_v2: {
950 bool ret = false;
951
952 rcu_read_lock();
953 struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key;
954 while (pos.inode <= k.k->p.inode) {
955 if (pos.inode >= c->sb.nr_devices)
956 break;
957
958 struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
959 if (!ca)
960 goto next;
961
962 struct bpos bucket = bp_pos_to_bucket(ca, pos);
963 bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches,
964 ca->mi.nbuckets, bucket.offset);
965 if (bucket.offset == ca->mi.nbuckets)
966 goto next;
967
968 ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p);
969 if (ret)
970 break;
971 next:
972 pos = SPOS(pos.inode + 1, 0, 0);
973 }
974 rcu_read_unlock();
975
976 return ret;
977 }
978 case KEY_TYPE_btree_ptr:
979 return true;
980 default:
981 return false;
982 }
983 }
984
btree_node_get_and_pin(struct btree_trans * trans,struct bkey_i * k,enum btree_id btree,unsigned level)985 static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k,
986 enum btree_id btree, unsigned level)
987 {
988 struct btree_iter iter;
989 bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0);
990 struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
991 int ret = PTR_ERR_OR_ZERO(b);
992 if (ret)
993 goto err;
994
995 if (b)
996 bch2_node_pin(trans->c, b);
997 err:
998 bch2_trans_iter_exit(trans, &iter);
999 return ret;
1000 }
1001
bch2_pin_backpointer_nodes_with_missing(struct btree_trans * trans,struct bpos start,struct bpos * end)1002 static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans,
1003 struct bpos start, struct bpos *end)
1004 {
1005 struct bch_fs *c = trans->c;
1006 int ret = 0;
1007
1008 struct bkey_buf tmp;
1009 bch2_bkey_buf_init(&tmp);
1010
1011 bch2_btree_cache_unpin(c);
1012
1013 *end = SPOS_MAX;
1014
1015 s64 mem_may_pin = mem_may_pin_bytes(c);
1016 struct btree_iter iter;
1017 bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
1018 0, 1, BTREE_ITER_prefetch);
1019 ret = for_each_btree_key_continue(trans, iter, 0, k, ({
1020 if (!backpointer_node_has_missing(c, k))
1021 continue;
1022
1023 mem_may_pin -= c->opts.btree_node_size;
1024 if (mem_may_pin <= 0)
1025 break;
1026
1027 bch2_bkey_buf_reassemble(&tmp, c, k);
1028 struct btree_path *path = btree_iter_path(trans, &iter);
1029
1030 BUG_ON(path->level != 1);
1031
1032 bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1);
1033 }));
1034 if (ret)
1035 return ret;
1036
1037 struct bpos pinned = SPOS_MAX;
1038 mem_may_pin = mem_may_pin_bytes(c);
1039 bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
1040 0, 1, BTREE_ITER_prefetch);
1041 ret = for_each_btree_key_continue(trans, iter, 0, k, ({
1042 if (!backpointer_node_has_missing(c, k))
1043 continue;
1044
1045 mem_may_pin -= c->opts.btree_node_size;
1046 if (mem_may_pin <= 0) {
1047 *end = pinned;
1048 break;
1049 }
1050
1051 bch2_bkey_buf_reassemble(&tmp, c, k);
1052 struct btree_path *path = btree_iter_path(trans, &iter);
1053
1054 BUG_ON(path->level != 1);
1055
1056 int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1);
1057
1058 if (!ret2)
1059 pinned = tmp.k->k.p;
1060
1061 ret;
1062 }));
1063 if (ret)
1064 return ret;
1065
1066 return ret;
1067 }
1068
bch2_check_extents_to_backpointers(struct bch_fs * c)1069 int bch2_check_extents_to_backpointers(struct bch_fs *c)
1070 {
1071 int ret = 0;
1072
1073 /*
1074 * Can't allow devices to come/go/resize while we have bucket bitmaps
1075 * allocated
1076 */
1077 down_read(&c->state_lock);
1078
1079 for_each_member_device(c, ca) {
1080 BUG_ON(ca->bucket_backpointer_mismatches);
1081 ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
1082 sizeof(unsigned long),
1083 GFP_KERNEL);
1084 ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
1085 sizeof(unsigned long),
1086 GFP_KERNEL);
1087 if (!ca->bucket_backpointer_mismatches ||
1088 !ca->bucket_backpointer_empty) {
1089 bch2_dev_put(ca);
1090 ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
1091 goto err_free_bitmaps;
1092 }
1093 }
1094
1095 struct btree_trans *trans = bch2_trans_get(c);
1096 struct extents_to_bp_state s = { .bp_start = POS_MIN };
1097
1098 bch2_bkey_buf_init(&s.last_flushed);
1099 bkey_init(&s.last_flushed.k->k);
1100
1101 ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
1102 POS_MIN, BTREE_ITER_prefetch, k, ({
1103 check_bucket_backpointer_mismatch(trans, k, &s.last_flushed);
1104 }));
1105 if (ret)
1106 goto err;
1107
1108 u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0;
1109 for_each_member_device(c, ca) {
1110 nr_buckets += ca->mi.nbuckets;
1111 nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
1112 nr_empty += bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets);
1113 }
1114
1115 if (!nr_mismatches && !nr_empty)
1116 goto err;
1117
1118 bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
1119 nr_mismatches + nr_empty, nr_buckets);
1120
1121 while (1) {
1122 ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
1123 if (ret)
1124 break;
1125
1126 if ( bpos_eq(s.bp_start, POS_MIN) &&
1127 !bpos_eq(s.bp_end, SPOS_MAX))
1128 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
1129 __func__, btree_nodes_fit_in_ram(c));
1130
1131 if (!bpos_eq(s.bp_start, POS_MIN) ||
1132 !bpos_eq(s.bp_end, SPOS_MAX)) {
1133 struct printbuf buf = PRINTBUF;
1134
1135 prt_str(&buf, "check_extents_to_backpointers(): ");
1136 bch2_bpos_to_text(&buf, s.bp_start);
1137 prt_str(&buf, "-");
1138 bch2_bpos_to_text(&buf, s.bp_end);
1139
1140 bch_verbose(c, "%s", buf.buf);
1141 printbuf_exit(&buf);
1142 }
1143
1144 ret = bch2_check_extents_to_backpointers_pass(trans, &s);
1145 if (ret || bpos_eq(s.bp_end, SPOS_MAX))
1146 break;
1147
1148 s.bp_start = bpos_successor(s.bp_end);
1149 }
1150 err:
1151 bch2_trans_put(trans);
1152 bch2_bkey_buf_exit(&s.last_flushed, c);
1153 bch2_btree_cache_unpin(c);
1154 err_free_bitmaps:
1155 for_each_member_device(c, ca) {
1156 kvfree(ca->bucket_backpointer_empty);
1157 ca->bucket_backpointer_empty = NULL;
1158 kvfree(ca->bucket_backpointer_mismatches);
1159 ca->bucket_backpointer_mismatches = NULL;
1160 }
1161
1162 up_read(&c->state_lock);
1163 bch_err_fn(c, ret);
1164 return ret;
1165 }
1166
check_one_backpointer(struct btree_trans * trans,struct bbpos start,struct bbpos end,struct bkey_s_c bp_k,struct bkey_buf * last_flushed)1167 static int check_one_backpointer(struct btree_trans *trans,
1168 struct bbpos start,
1169 struct bbpos end,
1170 struct bkey_s_c bp_k,
1171 struct bkey_buf *last_flushed)
1172 {
1173 if (bp_k.k->type != KEY_TYPE_backpointer)
1174 return 0;
1175
1176 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
1177 struct bbpos pos = bp_to_bbpos(*bp.v);
1178
1179 if (bbpos_cmp(pos, start) < 0 ||
1180 bbpos_cmp(pos, end) > 0)
1181 return 0;
1182
1183 struct btree_iter iter;
1184 struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed);
1185 int ret = bkey_err(k);
1186 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
1187 return 0;
1188 if (ret)
1189 return ret;
1190
1191 bch2_trans_iter_exit(trans, &iter);
1192 return ret;
1193 }
1194
check_bucket_backpointers_to_extents(struct btree_trans * trans,struct bch_dev * ca,struct bpos bucket)1195 static int check_bucket_backpointers_to_extents(struct btree_trans *trans,
1196 struct bch_dev *ca, struct bpos bucket)
1197 {
1198 u32 restart_count = trans->restart_count;
1199 struct bkey_buf last_flushed;
1200 bch2_bkey_buf_init(&last_flushed);
1201 bkey_init(&last_flushed.k->k);
1202
1203 int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers,
1204 bucket_pos_to_bp_start(ca, bucket),
1205 bucket_pos_to_bp_end(ca, bucket),
1206 0, k,
1207 check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed)
1208 );
1209
1210 bch2_bkey_buf_exit(&last_flushed, trans->c);
1211 return ret ?: trans_was_restarted(trans, restart_count);
1212 }
1213
bch2_check_backpointers_to_extents_pass(struct btree_trans * trans,struct bbpos start,struct bbpos end)1214 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
1215 struct bbpos start,
1216 struct bbpos end)
1217 {
1218 struct bch_fs *c = trans->c;
1219 struct bkey_buf last_flushed;
1220 struct progress_indicator_state progress;
1221
1222 bch2_bkey_buf_init(&last_flushed);
1223 bkey_init(&last_flushed.k->k);
1224 bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
1225
1226 int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers,
1227 POS_MIN, BTREE_ITER_prefetch, k, ({
1228 bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
1229 check_one_backpointer(trans, start, end, k, &last_flushed);
1230 }));
1231
1232 bch2_bkey_buf_exit(&last_flushed, c);
1233 return ret;
1234 }
1235
bch2_check_backpointers_to_extents(struct bch_fs * c)1236 int bch2_check_backpointers_to_extents(struct bch_fs *c)
1237 {
1238 struct btree_trans *trans = bch2_trans_get(c);
1239 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
1240 int ret;
1241
1242 while (1) {
1243 ret = bch2_get_btree_in_memory_pos(trans,
1244 BIT_ULL(BTREE_ID_extents)|
1245 BIT_ULL(BTREE_ID_reflink),
1246 ~0,
1247 start, &end);
1248 if (ret)
1249 break;
1250
1251 if (!bbpos_cmp(start, BBPOS_MIN) &&
1252 bbpos_cmp(end, BBPOS_MAX))
1253 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
1254 __func__, btree_nodes_fit_in_ram(c));
1255
1256 if (bbpos_cmp(start, BBPOS_MIN) ||
1257 bbpos_cmp(end, BBPOS_MAX)) {
1258 struct printbuf buf = PRINTBUF;
1259
1260 prt_str(&buf, "check_backpointers_to_extents(): ");
1261 bch2_bbpos_to_text(&buf, start);
1262 prt_str(&buf, "-");
1263 bch2_bbpos_to_text(&buf, end);
1264
1265 bch_verbose(c, "%s", buf.buf);
1266 printbuf_exit(&buf);
1267 }
1268
1269 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
1270 if (ret || !bbpos_cmp(end, BBPOS_MAX))
1271 break;
1272
1273 start = bbpos_successor(end);
1274 }
1275 bch2_trans_put(trans);
1276
1277 bch2_btree_cache_unpin(c);
1278
1279 bch_err_fn(c, ret);
1280 return ret;
1281 }
1282