1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_inode.h"
16 #include "xfs_bit.h"
17 #include "xfs_log_format.h"
18 #include "xfs_trans.h"
19 #include "xfs_sb.h"
20 #include "xfs_alloc.h"
21 #include "xfs_ialloc.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rmap_btree.h"
24 #include "xfs_refcount.h"
25 #include "xfs_refcount_btree.h"
26 #include "xfs_error.h"
27 #include "xfs_ag.h"
28 #include "scrub/xfs_scrub.h"
29 #include "scrub/scrub.h"
30 #include "scrub/common.h"
31 #include "scrub/btree.h"
32 #include "scrub/trace.h"
33 #include "scrub/repair.h"
34 #include "scrub/bitmap.h"
35 #include "scrub/agb_bitmap.h"
36 #include "scrub/xfile.h"
37 #include "scrub/xfarray.h"
38 #include "scrub/newbt.h"
39 #include "scrub/reap.h"
40
41 /*
42 * Rebuilding the Reference Count Btree
43 * ====================================
44 *
45 * This algorithm is "borrowed" from xfs_repair. Imagine the rmap
46 * entries as rectangles representing extents of physical blocks, and
47 * that the rectangles can be laid down to allow them to overlap each
48 * other; then we know that we must emit a refcnt btree entry wherever
49 * the amount of overlap changes, i.e. the emission stimulus is
50 * level-triggered:
51 *
52 * - ---
53 * -- ----- ---- --- ------
54 * -- ---- ----------- ---- ---------
55 * -------------------------------- -----------
56 * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
57 * 2 1 23 21 3 43 234 2123 1 01 2 3 0
58 *
59 * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
60 *
61 * Note that in the actual refcnt btree we don't store the refcount < 2
62 * cases because the bnobt tells us which blocks are free; single-use
63 * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
64 * supports storing multiple entries covering a given block we could
65 * theoretically dispense with the refcntbt and simply count rmaps, but
66 * that's inefficient in the (hot) write path, so we'll take the cost of
67 * the extra tree to save time. Also there's no guarantee that rmap
68 * will be enabled.
69 *
70 * Given an array of rmaps sorted by physical block number, a starting
71 * physical block (sp), a bag to hold rmaps that cover sp, and the next
72 * physical block where the level changes (np), we can reconstruct the
73 * refcount btree as follows:
74 *
75 * While there are still unprocessed rmaps in the array,
76 * - Set sp to the physical block (pblk) of the next unprocessed rmap.
77 * - Add to the bag all rmaps in the array where startblock == sp.
78 * - Set np to the physical block where the bag size will change. This
79 * is the minimum of (the pblk of the next unprocessed rmap) and
80 * (startblock + len of each rmap in the bag).
81 * - Record the bag size as old_bag_size.
82 *
83 * - While the bag isn't empty,
84 * - Remove from the bag all rmaps where startblock + len == np.
85 * - Add to the bag all rmaps in the array where startblock == np.
86 * - If the bag size isn't old_bag_size, store the refcount entry
87 * (sp, np - sp, bag_size) in the refcnt btree.
88 * - If the bag is empty, break out of the inner loop.
89 * - Set old_bag_size to the bag size
90 * - Set sp = np.
91 * - Set np to the physical block where the bag size will change.
92 * This is the minimum of (the pblk of the next unprocessed rmap)
93 * and (startblock + len of each rmap in the bag).
94 *
95 * Like all the other repairers, we make a list of all the refcount
96 * records we need, then reinitialize the refcount btree root and
97 * insert all the records.
98 */
99
100 /* The only parts of the rmap that we care about for computing refcounts. */
101 struct xrep_refc_rmap {
102 xfs_agblock_t startblock;
103 xfs_extlen_t blockcount;
104 } __packed;
105
106 struct xrep_refc {
107 /* refcount extents */
108 struct xfarray *refcount_records;
109
110 /* new refcountbt information */
111 struct xrep_newbt new_btree;
112
113 /* old refcountbt blocks */
114 struct xagb_bitmap old_refcountbt_blocks;
115
116 struct xfs_scrub *sc;
117
118 /* get_records()'s position in the refcount record array. */
119 xfarray_idx_t array_cur;
120
121 /* # of refcountbt blocks */
122 xfs_extlen_t btblocks;
123 };
124
125 /* Check for any obvious conflicts with this shared/CoW staging extent. */
126 STATIC int
xrep_refc_check_ext(struct xfs_scrub * sc,const struct xfs_refcount_irec * rec)127 xrep_refc_check_ext(
128 struct xfs_scrub *sc,
129 const struct xfs_refcount_irec *rec)
130 {
131 enum xbtree_recpacking outcome;
132 int error;
133
134 if (xfs_refcount_check_irec(sc->sa.pag, rec) != NULL)
135 return -EFSCORRUPTED;
136
137 /* Make sure this isn't free space. */
138 error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rc_startblock,
139 rec->rc_blockcount, &outcome);
140 if (error)
141 return error;
142 if (outcome != XBTREE_RECPACKING_EMPTY)
143 return -EFSCORRUPTED;
144
145 /* Must not be an inode chunk. */
146 error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
147 rec->rc_startblock, rec->rc_blockcount, &outcome);
148 if (error)
149 return error;
150 if (outcome != XBTREE_RECPACKING_EMPTY)
151 return -EFSCORRUPTED;
152
153 return 0;
154 }
155
156 /* Record a reference count extent. */
157 STATIC int
xrep_refc_stash(struct xrep_refc * rr,enum xfs_refc_domain domain,xfs_agblock_t agbno,xfs_extlen_t len,uint64_t refcount)158 xrep_refc_stash(
159 struct xrep_refc *rr,
160 enum xfs_refc_domain domain,
161 xfs_agblock_t agbno,
162 xfs_extlen_t len,
163 uint64_t refcount)
164 {
165 struct xfs_refcount_irec irec = {
166 .rc_startblock = agbno,
167 .rc_blockcount = len,
168 .rc_domain = domain,
169 };
170 struct xfs_scrub *sc = rr->sc;
171 int error = 0;
172
173 if (xchk_should_terminate(sc, &error))
174 return error;
175
176 irec.rc_refcount = min_t(uint64_t, MAXREFCOUNT, refcount);
177
178 error = xrep_refc_check_ext(rr->sc, &irec);
179 if (error)
180 return error;
181
182 trace_xrep_refc_found(sc->sa.pag, &irec);
183
184 return xfarray_append(rr->refcount_records, &irec);
185 }
186
187 /* Record a CoW staging extent. */
188 STATIC int
xrep_refc_stash_cow(struct xrep_refc * rr,xfs_agblock_t agbno,xfs_extlen_t len)189 xrep_refc_stash_cow(
190 struct xrep_refc *rr,
191 xfs_agblock_t agbno,
192 xfs_extlen_t len)
193 {
194 return xrep_refc_stash(rr, XFS_REFC_DOMAIN_COW, agbno, len, 1);
195 }
196
197 /* Decide if an rmap could describe a shared extent. */
198 static inline bool
xrep_refc_rmap_shareable(struct xfs_mount * mp,const struct xfs_rmap_irec * rmap)199 xrep_refc_rmap_shareable(
200 struct xfs_mount *mp,
201 const struct xfs_rmap_irec *rmap)
202 {
203 /* AG metadata are never sharable */
204 if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
205 return false;
206
207 /* Metadata in files are never shareable */
208 if (xfs_internal_inum(mp, rmap->rm_owner))
209 return false;
210
211 /* Metadata and unwritten file blocks are not shareable. */
212 if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
213 XFS_RMAP_UNWRITTEN))
214 return false;
215
216 return true;
217 }
218
219 /*
220 * Walk along the reverse mapping records until we find one that could describe
221 * a shared extent.
222 */
223 STATIC int
xrep_refc_walk_rmaps(struct xrep_refc * rr,struct xrep_refc_rmap * rrm,bool * have_rec)224 xrep_refc_walk_rmaps(
225 struct xrep_refc *rr,
226 struct xrep_refc_rmap *rrm,
227 bool *have_rec)
228 {
229 struct xfs_rmap_irec rmap;
230 struct xfs_btree_cur *cur = rr->sc->sa.rmap_cur;
231 struct xfs_mount *mp = cur->bc_mp;
232 int have_gt;
233 int error = 0;
234
235 *have_rec = false;
236
237 /*
238 * Loop through the remaining rmaps. Remember CoW staging
239 * extents and the refcountbt blocks from the old tree for later
240 * disposal. We can only share written data fork extents, so
241 * keep looping until we find an rmap for one.
242 */
243 do {
244 if (xchk_should_terminate(rr->sc, &error))
245 return error;
246
247 error = xfs_btree_increment(cur, 0, &have_gt);
248 if (error)
249 return error;
250 if (!have_gt)
251 return 0;
252
253 error = xfs_rmap_get_rec(cur, &rmap, &have_gt);
254 if (error)
255 return error;
256 if (XFS_IS_CORRUPT(mp, !have_gt))
257 return -EFSCORRUPTED;
258
259 if (rmap.rm_owner == XFS_RMAP_OWN_COW) {
260 error = xrep_refc_stash_cow(rr, rmap.rm_startblock,
261 rmap.rm_blockcount);
262 if (error)
263 return error;
264 } else if (rmap.rm_owner == XFS_RMAP_OWN_REFC) {
265 /* refcountbt block, dump it when we're done. */
266 rr->btblocks += rmap.rm_blockcount;
267 error = xagb_bitmap_set(&rr->old_refcountbt_blocks,
268 rmap.rm_startblock, rmap.rm_blockcount);
269 if (error)
270 return error;
271 }
272 } while (!xrep_refc_rmap_shareable(mp, &rmap));
273
274 rrm->startblock = rmap.rm_startblock;
275 rrm->blockcount = rmap.rm_blockcount;
276 *have_rec = true;
277 return 0;
278 }
279
280 static inline uint32_t
xrep_refc_encode_startblock(const struct xfs_refcount_irec * irec)281 xrep_refc_encode_startblock(
282 const struct xfs_refcount_irec *irec)
283 {
284 uint32_t start;
285
286 start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
287 if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
288 start |= XFS_REFC_COWFLAG;
289
290 return start;
291 }
292
293 /* Sort in the same order as the ondisk records. */
294 static int
xrep_refc_extent_cmp(const void * a,const void * b)295 xrep_refc_extent_cmp(
296 const void *a,
297 const void *b)
298 {
299 const struct xfs_refcount_irec *ap = a;
300 const struct xfs_refcount_irec *bp = b;
301 uint32_t sa, sb;
302
303 sa = xrep_refc_encode_startblock(ap);
304 sb = xrep_refc_encode_startblock(bp);
305
306 if (sa > sb)
307 return 1;
308 if (sa < sb)
309 return -1;
310 return 0;
311 }
312
313 /*
314 * Sort the refcount extents by startblock or else the btree records will be in
315 * the wrong order. Make sure the records do not overlap in physical space.
316 */
317 STATIC int
xrep_refc_sort_records(struct xrep_refc * rr)318 xrep_refc_sort_records(
319 struct xrep_refc *rr)
320 {
321 struct xfs_refcount_irec irec;
322 xfarray_idx_t cur;
323 enum xfs_refc_domain dom = XFS_REFC_DOMAIN_SHARED;
324 xfs_agblock_t next_agbno = 0;
325 int error;
326
327 error = xfarray_sort(rr->refcount_records, xrep_refc_extent_cmp,
328 XFARRAY_SORT_KILLABLE);
329 if (error)
330 return error;
331
332 foreach_xfarray_idx(rr->refcount_records, cur) {
333 if (xchk_should_terminate(rr->sc, &error))
334 return error;
335
336 error = xfarray_load(rr->refcount_records, cur, &irec);
337 if (error)
338 return error;
339
340 if (dom == XFS_REFC_DOMAIN_SHARED &&
341 irec.rc_domain == XFS_REFC_DOMAIN_COW) {
342 dom = irec.rc_domain;
343 next_agbno = 0;
344 }
345
346 if (dom != irec.rc_domain)
347 return -EFSCORRUPTED;
348 if (irec.rc_startblock < next_agbno)
349 return -EFSCORRUPTED;
350
351 next_agbno = irec.rc_startblock + irec.rc_blockcount;
352 }
353
354 return error;
355 }
356
357 #define RRM_NEXT(r) ((r).startblock + (r).blockcount)
358 /*
359 * Find the next block where the refcount changes, given the next rmap we
360 * looked at and the ones we're already tracking.
361 */
362 static inline int
xrep_refc_next_edge(struct xfarray * rmap_bag,struct xrep_refc_rmap * next_rrm,bool next_valid,xfs_agblock_t * nbnop)363 xrep_refc_next_edge(
364 struct xfarray *rmap_bag,
365 struct xrep_refc_rmap *next_rrm,
366 bool next_valid,
367 xfs_agblock_t *nbnop)
368 {
369 struct xrep_refc_rmap rrm;
370 xfarray_idx_t array_cur = XFARRAY_CURSOR_INIT;
371 xfs_agblock_t nbno = NULLAGBLOCK;
372 int error;
373
374 if (next_valid)
375 nbno = next_rrm->startblock;
376
377 while ((error = xfarray_iter(rmap_bag, &array_cur, &rrm)) == 1)
378 nbno = min_t(xfs_agblock_t, nbno, RRM_NEXT(rrm));
379
380 if (error)
381 return error;
382
383 /*
384 * We should have found /something/ because either next_rrm is the next
385 * interesting rmap to look at after emitting this refcount extent, or
386 * there are other rmaps in rmap_bag contributing to the current
387 * sharing count. But if something is seriously wrong, bail out.
388 */
389 if (nbno == NULLAGBLOCK)
390 return -EFSCORRUPTED;
391
392 *nbnop = nbno;
393 return 0;
394 }
395
396 /*
397 * Walk forward through the rmap btree to collect all rmaps starting at
398 * @bno in @rmap_bag. These represent the file(s) that share ownership of
399 * the current block. Upon return, the rmap cursor points to the last record
400 * satisfying the startblock constraint.
401 */
402 static int
xrep_refc_push_rmaps_at(struct xrep_refc * rr,struct xfarray * rmap_bag,xfs_agblock_t bno,struct xrep_refc_rmap * rrm,bool * have,uint64_t * stack_sz)403 xrep_refc_push_rmaps_at(
404 struct xrep_refc *rr,
405 struct xfarray *rmap_bag,
406 xfs_agblock_t bno,
407 struct xrep_refc_rmap *rrm,
408 bool *have,
409 uint64_t *stack_sz)
410 {
411 struct xfs_scrub *sc = rr->sc;
412 int have_gt;
413 int error;
414
415 while (*have && rrm->startblock == bno) {
416 error = xfarray_store_anywhere(rmap_bag, rrm);
417 if (error)
418 return error;
419 (*stack_sz)++;
420 error = xrep_refc_walk_rmaps(rr, rrm, have);
421 if (error)
422 return error;
423 }
424
425 error = xfs_btree_decrement(sc->sa.rmap_cur, 0, &have_gt);
426 if (error)
427 return error;
428 if (XFS_IS_CORRUPT(sc->mp, !have_gt))
429 return -EFSCORRUPTED;
430
431 return 0;
432 }
433
434 /* Iterate all the rmap records to generate reference count data. */
435 STATIC int
xrep_refc_find_refcounts(struct xrep_refc * rr)436 xrep_refc_find_refcounts(
437 struct xrep_refc *rr)
438 {
439 struct xrep_refc_rmap rrm;
440 struct xfs_scrub *sc = rr->sc;
441 struct xfarray *rmap_bag;
442 char *descr;
443 uint64_t old_stack_sz;
444 uint64_t stack_sz = 0;
445 xfs_agblock_t sbno;
446 xfs_agblock_t cbno;
447 xfs_agblock_t nbno;
448 bool have;
449 int error;
450
451 xrep_ag_btcur_init(sc, &sc->sa);
452
453 /*
454 * Set up a sparse array to store all the rmap records that we're
455 * tracking to generate a reference count record. If this exceeds
456 * MAXREFCOUNT, we clamp rc_refcount.
457 */
458 descr = xchk_xfile_ag_descr(sc, "rmap record bag");
459 error = xfarray_create(descr, 0, sizeof(struct xrep_refc_rmap),
460 &rmap_bag);
461 kfree(descr);
462 if (error)
463 goto out_cur;
464
465 /* Start the rmapbt cursor to the left of all records. */
466 error = xfs_btree_goto_left_edge(sc->sa.rmap_cur);
467 if (error)
468 goto out_bag;
469
470 /* Process reverse mappings into refcount data. */
471 while (xfs_btree_has_more_records(sc->sa.rmap_cur)) {
472 /* Push all rmaps with pblk == sbno onto the stack */
473 error = xrep_refc_walk_rmaps(rr, &rrm, &have);
474 if (error)
475 goto out_bag;
476 if (!have)
477 break;
478 sbno = cbno = rrm.startblock;
479 error = xrep_refc_push_rmaps_at(rr, rmap_bag, sbno,
480 &rrm, &have, &stack_sz);
481 if (error)
482 goto out_bag;
483
484 /* Set nbno to the bno of the next refcount change */
485 error = xrep_refc_next_edge(rmap_bag, &rrm, have, &nbno);
486 if (error)
487 goto out_bag;
488
489 ASSERT(nbno > sbno);
490 old_stack_sz = stack_sz;
491
492 /* While stack isn't empty... */
493 while (stack_sz) {
494 xfarray_idx_t array_cur = XFARRAY_CURSOR_INIT;
495
496 /* Pop all rmaps that end at nbno */
497 while ((error = xfarray_iter(rmap_bag, &array_cur,
498 &rrm)) == 1) {
499 if (RRM_NEXT(rrm) != nbno)
500 continue;
501 error = xfarray_unset(rmap_bag, array_cur - 1);
502 if (error)
503 goto out_bag;
504 stack_sz--;
505 }
506 if (error)
507 goto out_bag;
508
509 /* Push array items that start at nbno */
510 error = xrep_refc_walk_rmaps(rr, &rrm, &have);
511 if (error)
512 goto out_bag;
513 if (have) {
514 error = xrep_refc_push_rmaps_at(rr, rmap_bag,
515 nbno, &rrm, &have, &stack_sz);
516 if (error)
517 goto out_bag;
518 }
519
520 /* Emit refcount if necessary */
521 ASSERT(nbno > cbno);
522 if (stack_sz != old_stack_sz) {
523 if (old_stack_sz > 1) {
524 error = xrep_refc_stash(rr,
525 XFS_REFC_DOMAIN_SHARED,
526 cbno, nbno - cbno,
527 old_stack_sz);
528 if (error)
529 goto out_bag;
530 }
531 cbno = nbno;
532 }
533
534 /* Stack empty, go find the next rmap */
535 if (stack_sz == 0)
536 break;
537 old_stack_sz = stack_sz;
538 sbno = nbno;
539
540 /* Set nbno to the bno of the next refcount change */
541 error = xrep_refc_next_edge(rmap_bag, &rrm, have,
542 &nbno);
543 if (error)
544 goto out_bag;
545
546 ASSERT(nbno > sbno);
547 }
548 }
549
550 ASSERT(stack_sz == 0);
551 out_bag:
552 xfarray_destroy(rmap_bag);
553 out_cur:
554 xchk_ag_btcur_free(&sc->sa);
555 return error;
556 }
557 #undef RRM_NEXT
558
559 /* Retrieve refcountbt data for bulk load. */
560 STATIC int
xrep_refc_get_records(struct xfs_btree_cur * cur,unsigned int idx,struct xfs_btree_block * block,unsigned int nr_wanted,void * priv)561 xrep_refc_get_records(
562 struct xfs_btree_cur *cur,
563 unsigned int idx,
564 struct xfs_btree_block *block,
565 unsigned int nr_wanted,
566 void *priv)
567 {
568 struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
569 struct xrep_refc *rr = priv;
570 union xfs_btree_rec *block_rec;
571 unsigned int loaded;
572 int error;
573
574 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
575 error = xfarray_load(rr->refcount_records, rr->array_cur++,
576 irec);
577 if (error)
578 return error;
579
580 block_rec = xfs_btree_rec_addr(cur, idx, block);
581 cur->bc_ops->init_rec_from_cur(cur, block_rec);
582 }
583
584 return loaded;
585 }
586
587 /* Feed one of the new btree blocks to the bulk loader. */
588 STATIC int
xrep_refc_claim_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,void * priv)589 xrep_refc_claim_block(
590 struct xfs_btree_cur *cur,
591 union xfs_btree_ptr *ptr,
592 void *priv)
593 {
594 struct xrep_refc *rr = priv;
595
596 return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
597 }
598
599 /* Update the AGF counters. */
600 STATIC int
xrep_refc_reset_counters(struct xrep_refc * rr)601 xrep_refc_reset_counters(
602 struct xrep_refc *rr)
603 {
604 struct xfs_scrub *sc = rr->sc;
605 struct xfs_perag *pag = sc->sa.pag;
606
607 /*
608 * After we commit the new btree to disk, it is possible that the
609 * process to reap the old btree blocks will race with the AIL trying
610 * to checkpoint the old btree blocks into the filesystem. If the new
611 * tree is shorter than the old one, the refcountbt write verifier will
612 * fail and the AIL will shut down the filesystem.
613 *
614 * To avoid this, save the old incore btree height values as the alt
615 * height values before re-initializing the perag info from the updated
616 * AGF to capture all the new values.
617 */
618 pag->pagf_repair_refcount_level = pag->pagf_refcount_level;
619
620 /* Reinitialize with the values we just logged. */
621 return xrep_reinit_pagf(sc);
622 }
623
624 /*
625 * Use the collected refcount information to stage a new refcount btree. If
626 * this is successful we'll return with the new btree root information logged
627 * to the repair transaction but not yet committed.
628 */
629 STATIC int
xrep_refc_build_new_tree(struct xrep_refc * rr)630 xrep_refc_build_new_tree(
631 struct xrep_refc *rr)
632 {
633 struct xfs_scrub *sc = rr->sc;
634 struct xfs_btree_cur *refc_cur;
635 struct xfs_perag *pag = sc->sa.pag;
636 xfs_fsblock_t fsbno;
637 int error;
638
639 error = xrep_refc_sort_records(rr);
640 if (error)
641 return error;
642
643 /*
644 * Prepare to construct the new btree by reserving disk space for the
645 * new btree and setting up all the accounting information we'll need
646 * to root the new btree while it's under construction and before we
647 * attach it to the AG header.
648 */
649 fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, xfs_refc_block(sc->mp));
650 xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_REFC, fsbno,
651 XFS_AG_RESV_METADATA);
652 rr->new_btree.bload.get_records = xrep_refc_get_records;
653 rr->new_btree.bload.claim_block = xrep_refc_claim_block;
654
655 /* Compute how many blocks we'll need. */
656 refc_cur = xfs_refcountbt_stage_cursor(sc->mp, &rr->new_btree.afake,
657 pag);
658 error = xfs_btree_bload_compute_geometry(refc_cur,
659 &rr->new_btree.bload,
660 xfarray_length(rr->refcount_records));
661 if (error)
662 goto err_cur;
663
664 /* Last chance to abort before we start committing fixes. */
665 if (xchk_should_terminate(sc, &error))
666 goto err_cur;
667
668 /* Reserve the space we'll need for the new btree. */
669 error = xrep_newbt_alloc_blocks(&rr->new_btree,
670 rr->new_btree.bload.nr_blocks);
671 if (error)
672 goto err_cur;
673
674 /*
675 * Due to btree slack factors, it's possible for a new btree to be one
676 * level taller than the old btree. Update the incore btree height so
677 * that we don't trip the verifiers when writing the new btree blocks
678 * to disk.
679 */
680 pag->pagf_repair_refcount_level = rr->new_btree.bload.btree_height;
681
682 /* Add all observed refcount records. */
683 rr->array_cur = XFARRAY_CURSOR_INIT;
684 error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
685 if (error)
686 goto err_level;
687
688 /*
689 * Install the new btree in the AG header. After this point the old
690 * btree is no longer accessible and the new tree is live.
691 */
692 xfs_refcountbt_commit_staged_btree(refc_cur, sc->tp, sc->sa.agf_bp);
693 xfs_btree_del_cursor(refc_cur, 0);
694
695 /* Reset the AGF counters now that we've changed the btree shape. */
696 error = xrep_refc_reset_counters(rr);
697 if (error)
698 goto err_newbt;
699
700 /* Dispose of any unused blocks and the accounting information. */
701 error = xrep_newbt_commit(&rr->new_btree);
702 if (error)
703 return error;
704
705 return xrep_roll_ag_trans(sc);
706
707 err_level:
708 pag->pagf_repair_refcount_level = 0;
709 err_cur:
710 xfs_btree_del_cursor(refc_cur, error);
711 err_newbt:
712 xrep_newbt_cancel(&rr->new_btree);
713 return error;
714 }
715
716 /*
717 * Now that we've logged the roots of the new btrees, invalidate all of the
718 * old blocks and free them.
719 */
720 STATIC int
xrep_refc_remove_old_tree(struct xrep_refc * rr)721 xrep_refc_remove_old_tree(
722 struct xrep_refc *rr)
723 {
724 struct xfs_scrub *sc = rr->sc;
725 struct xfs_perag *pag = sc->sa.pag;
726 int error;
727
728 /* Free the old refcountbt blocks if they're not in use. */
729 error = xrep_reap_agblocks(sc, &rr->old_refcountbt_blocks,
730 &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
731 if (error)
732 return error;
733
734 /*
735 * Now that we've zapped all the old refcountbt blocks we can turn off
736 * the alternate height mechanism and reset the per-AG space
737 * reservations.
738 */
739 pag->pagf_repair_refcount_level = 0;
740 sc->flags |= XREP_RESET_PERAG_RESV;
741 return 0;
742 }
743
744 /* Rebuild the refcount btree. */
745 int
xrep_refcountbt(struct xfs_scrub * sc)746 xrep_refcountbt(
747 struct xfs_scrub *sc)
748 {
749 struct xrep_refc *rr;
750 struct xfs_mount *mp = sc->mp;
751 char *descr;
752 int error;
753
754 /* We require the rmapbt to rebuild anything. */
755 if (!xfs_has_rmapbt(mp))
756 return -EOPNOTSUPP;
757
758 rr = kzalloc(sizeof(struct xrep_refc), XCHK_GFP_FLAGS);
759 if (!rr)
760 return -ENOMEM;
761 rr->sc = sc;
762
763 /* Set up enough storage to handle one refcount record per block. */
764 descr = xchk_xfile_ag_descr(sc, "reference count records");
765 error = xfarray_create(descr, mp->m_sb.sb_agblocks,
766 sizeof(struct xfs_refcount_irec),
767 &rr->refcount_records);
768 kfree(descr);
769 if (error)
770 goto out_rr;
771
772 /* Collect all reference counts. */
773 xagb_bitmap_init(&rr->old_refcountbt_blocks);
774 error = xrep_refc_find_refcounts(rr);
775 if (error)
776 goto out_bitmap;
777
778 /* Rebuild the refcount information. */
779 error = xrep_refc_build_new_tree(rr);
780 if (error)
781 goto out_bitmap;
782
783 /* Kill the old tree. */
784 error = xrep_refc_remove_old_tree(rr);
785 if (error)
786 goto out_bitmap;
787
788 out_bitmap:
789 xagb_bitmap_destroy(&rr->old_refcountbt_blocks);
790 xfarray_destroy(rr->refcount_records);
791 out_rr:
792 kfree(rr);
793 return error;
794 }
795