1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2020-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_log_format.h" 14 #include "xfs_trans.h" 15 #include "xfs_rtalloc.h" 16 #include "xfs_inode.h" 17 #include "xfs_bit.h" 18 #include "xfs_bmap.h" 19 #include "xfs_bmap_btree.h" 20 #include "xfs_rmap.h" 21 #include "xfs_rtrmap_btree.h" 22 #include "xfs_exchmaps.h" 23 #include "xfs_rtbitmap.h" 24 #include "xfs_rtgroup.h" 25 #include "xfs_extent_busy.h" 26 #include "xfs_refcount.h" 27 #include "scrub/scrub.h" 28 #include "scrub/common.h" 29 #include "scrub/trace.h" 30 #include "scrub/repair.h" 31 #include "scrub/xfile.h" 32 #include "scrub/tempfile.h" 33 #include "scrub/tempexch.h" 34 #include "scrub/reap.h" 35 #include "scrub/rtbitmap.h" 36 37 /* rt bitmap content repairs */ 38 39 /* Set up to repair the realtime bitmap for this group. */ 40 int 41 xrep_setup_rtbitmap( 42 struct xfs_scrub *sc, 43 struct xchk_rtbitmap *rtb) 44 { 45 struct xfs_mount *mp = sc->mp; 46 char *descr; 47 unsigned long long blocks = mp->m_sb.sb_rbmblocks; 48 int error; 49 50 error = xrep_tempfile_create(sc, S_IFREG); 51 if (error) 52 return error; 53 54 /* Create an xfile to hold our reconstructed bitmap. */ 55 descr = xchk_xfile_rtgroup_descr(sc, "bitmap file"); 56 error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile); 57 kfree(descr); 58 if (error) 59 return error; 60 61 /* 62 * Reserve enough blocks to write out a completely new bitmap file, 63 * plus twice as many blocks as we would need if we can only allocate 64 * one block per data fork mapping. This should cover the 65 * preallocation of the temporary file and exchanging the extent 66 * mappings. 67 * 68 * We cannot use xfs_exchmaps_estimate because we have not yet 69 * constructed the replacement bitmap and therefore do not know how 70 * many extents it will use. By the time we do, we will have a dirty 71 * transaction (which we cannot drop because we cannot drop the 72 * rtbitmap ILOCK) and cannot ask for more reservation. 73 */ 74 blocks += xfs_bmbt_calc_size(mp, blocks) * 2; 75 if (blocks > UINT_MAX) 76 return -EOPNOTSUPP; 77 78 rtb->resblks += blocks; 79 return 0; 80 } 81 82 static inline xrep_wordoff_t 83 rtx_to_wordoff( 84 struct xfs_mount *mp, 85 xfs_rtxnum_t rtx) 86 { 87 return rtx >> XFS_NBWORDLOG; 88 } 89 90 static inline xrep_wordcnt_t 91 rtxlen_to_wordcnt( 92 xfs_rtxlen_t rtxlen) 93 { 94 return rtxlen >> XFS_NBWORDLOG; 95 } 96 97 /* Helper functions to record rtwords in an xfile. */ 98 99 static inline int 100 xfbmp_load( 101 struct xchk_rtbitmap *rtb, 102 xrep_wordoff_t wordoff, 103 xfs_rtword_t *word) 104 { 105 union xfs_rtword_raw urk; 106 int error; 107 108 ASSERT(xfs_has_rtgroups(rtb->sc->mp)); 109 110 error = xfile_load(rtb->sc->xfile, &urk, 111 sizeof(union xfs_rtword_raw), 112 wordoff << XFS_WORDLOG); 113 if (error) 114 return error; 115 116 *word = be32_to_cpu(urk.rtg); 117 return 0; 118 } 119 120 static inline int 121 xfbmp_store( 122 struct xchk_rtbitmap *rtb, 123 xrep_wordoff_t wordoff, 124 const xfs_rtword_t word) 125 { 126 union xfs_rtword_raw urk; 127 128 ASSERT(xfs_has_rtgroups(rtb->sc->mp)); 129 130 urk.rtg = cpu_to_be32(word); 131 return xfile_store(rtb->sc->xfile, &urk, 132 sizeof(union xfs_rtword_raw), 133 wordoff << XFS_WORDLOG); 134 } 135 136 static inline int 137 xfbmp_copyin( 138 struct xchk_rtbitmap *rtb, 139 xrep_wordoff_t wordoff, 140 const union xfs_rtword_raw *word, 141 xrep_wordcnt_t nr_words) 142 { 143 return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG, 144 wordoff << XFS_WORDLOG); 145 } 146 147 static inline int 148 xfbmp_copyout( 149 struct xchk_rtbitmap *rtb, 150 xrep_wordoff_t wordoff, 151 union xfs_rtword_raw *word, 152 xrep_wordcnt_t nr_words) 153 { 154 return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG, 155 wordoff << XFS_WORDLOG); 156 } 157 158 /* Perform a logical OR operation on an rtword in the incore bitmap. */ 159 static int 160 xrep_rtbitmap_or( 161 struct xchk_rtbitmap *rtb, 162 xrep_wordoff_t wordoff, 163 xfs_rtword_t mask) 164 { 165 xfs_rtword_t word; 166 int error; 167 168 error = xfbmp_load(rtb, wordoff, &word); 169 if (error) 170 return error; 171 172 trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word); 173 174 return xfbmp_store(rtb, wordoff, word | mask); 175 } 176 177 /* 178 * Mark as free every rt extent between the next rt block we expected to see 179 * in the rtrmap records and the given rt block. 180 */ 181 STATIC int 182 xrep_rtbitmap_mark_free( 183 struct xchk_rtbitmap *rtb, 184 xfs_rgblock_t rgbno) 185 { 186 struct xfs_mount *mp = rtb->sc->mp; 187 struct xchk_rt *sr = &rtb->sc->sr; 188 struct xfs_rtgroup *rtg = sr->rtg; 189 xfs_rtxnum_t startrtx; 190 xfs_rtxnum_t nextrtx; 191 xrep_wordoff_t wordoff, nextwordoff; 192 unsigned int bit; 193 unsigned int bufwsize; 194 xfs_extlen_t mod; 195 xfs_rtword_t mask; 196 enum xbtree_recpacking outcome; 197 int error; 198 199 if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno)) 200 return -EFSCORRUPTED; 201 202 /* 203 * Convert rt blocks to rt extents The block range we find must be 204 * aligned to an rtextent boundary on both ends. 205 */ 206 startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno); 207 mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno); 208 if (mod) 209 return -EFSCORRUPTED; 210 211 nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1; 212 mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1); 213 if (mod != mp->m_sb.sb_rextsize - 1) 214 return -EFSCORRUPTED; 215 216 /* Must not be shared or CoW staging. */ 217 if (sr->refc_cur) { 218 error = xfs_refcount_has_records(sr->refc_cur, 219 XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno, 220 rgbno - rtb->next_rgbno, &outcome); 221 if (error) 222 return error; 223 if (outcome != XBTREE_RECPACKING_EMPTY) 224 return -EFSCORRUPTED; 225 226 error = xfs_refcount_has_records(sr->refc_cur, 227 XFS_REFC_DOMAIN_COW, rtb->next_rgbno, 228 rgbno - rtb->next_rgbno, &outcome); 229 if (error) 230 return error; 231 if (outcome != XBTREE_RECPACKING_EMPTY) 232 return -EFSCORRUPTED; 233 } 234 235 trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1); 236 237 /* Set bits as needed to round startrtx up to the nearest word. */ 238 bit = startrtx & XREP_RTBMP_WORDMASK; 239 if (bit) { 240 xfs_rtblock_t len = nextrtx - startrtx; 241 unsigned int lastbit; 242 243 lastbit = min(bit + len, XFS_NBWORD); 244 mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; 245 246 error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx), 247 mask); 248 if (error || lastbit - bit == len) 249 return error; 250 startrtx += XFS_NBWORD - bit; 251 } 252 253 /* Set bits as needed to round nextrtx down to the nearest word. */ 254 bit = nextrtx & XREP_RTBMP_WORDMASK; 255 if (bit) { 256 mask = ((xfs_rtword_t)1 << bit) - 1; 257 258 error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx), 259 mask); 260 if (error || startrtx + bit == nextrtx) 261 return error; 262 nextrtx -= bit; 263 } 264 265 trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1); 266 267 /* Set all the words in between, up to a whole fs block at once. */ 268 wordoff = rtx_to_wordoff(mp, startrtx); 269 nextwordoff = rtx_to_wordoff(mp, nextrtx); 270 bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG; 271 272 while (wordoff < nextwordoff) { 273 xrep_wordoff_t rem; 274 xrep_wordcnt_t wordcnt; 275 276 wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff, 277 bufwsize); 278 279 /* 280 * Try to keep us aligned to the rtwords buffer to reduce the 281 * number of xfile writes. 282 */ 283 rem = wordoff & (bufwsize - 1); 284 if (rem) 285 wordcnt = min_t(xrep_wordcnt_t, wordcnt, 286 bufwsize - rem); 287 288 error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt); 289 if (error) 290 return error; 291 292 wordoff += wordcnt; 293 } 294 295 return 0; 296 } 297 298 /* Set free space in the rtbitmap based on rtrmapbt records. */ 299 STATIC int 300 xrep_rtbitmap_walk_rtrmap( 301 struct xfs_btree_cur *cur, 302 const struct xfs_rmap_irec *rec, 303 void *priv) 304 { 305 struct xchk_rtbitmap *rtb = priv; 306 int error = 0; 307 308 if (xchk_should_terminate(rtb->sc, &error)) 309 return error; 310 311 if (rtb->next_rgbno < rec->rm_startblock) { 312 error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock); 313 if (error) 314 return error; 315 } 316 317 rtb->next_rgbno = max(rtb->next_rgbno, 318 rec->rm_startblock + rec->rm_blockcount); 319 return 0; 320 } 321 322 /* 323 * Walk the rtrmapbt to find all the gaps between records, and mark the gaps 324 * in the realtime bitmap that we're computing. 325 */ 326 STATIC int 327 xrep_rtbitmap_find_freespace( 328 struct xchk_rtbitmap *rtb) 329 { 330 struct xfs_scrub *sc = rtb->sc; 331 struct xfs_mount *mp = sc->mp; 332 struct xfs_rtgroup *rtg = sc->sr.rtg; 333 uint64_t blockcount; 334 int error; 335 336 /* Prepare a buffer of ones so that we can accelerate bulk setting. */ 337 memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize); 338 339 xrep_rtgroup_btcur_init(sc, &sc->sr); 340 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap, 341 rtb); 342 if (error) 343 goto out; 344 345 /* 346 * Mark as free every possible rt extent from the last one we saw to 347 * the end of the rt group. 348 */ 349 blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize; 350 if (rtb->next_rgbno < blockcount) { 351 error = xrep_rtbitmap_mark_free(rtb, blockcount); 352 if (error) 353 goto out; 354 } 355 356 out: 357 xchk_rtgroup_btcur_free(&sc->sr); 358 return error; 359 } 360 361 static int 362 xrep_rtbitmap_prep_buf( 363 struct xfs_scrub *sc, 364 struct xfs_buf *bp, 365 void *data) 366 { 367 struct xchk_rtbitmap *rtb = data; 368 struct xfs_mount *mp = sc->mp; 369 union xfs_rtword_raw *ondisk; 370 int error; 371 372 rtb->args.mp = sc->mp; 373 rtb->args.tp = sc->tp; 374 rtb->args.rbmbp = bp; 375 ondisk = xfs_rbmblock_wordptr(&rtb->args, 0); 376 rtb->args.rbmbp = NULL; 377 378 error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk, 379 mp->m_blockwsize); 380 if (error) 381 return error; 382 383 if (xfs_has_rtgroups(sc->mp)) { 384 struct xfs_rtbuf_blkinfo *hdr = bp->b_addr; 385 386 hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC); 387 hdr->rt_owner = cpu_to_be64(sc->ip->i_ino); 388 hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp)); 389 hdr->rt_lsn = 0; 390 uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid); 391 bp->b_ops = &xfs_rtbitmap_buf_ops; 392 } else { 393 bp->b_ops = &xfs_rtbuf_ops; 394 } 395 396 rtb->prep_wordoff += mp->m_blockwsize; 397 xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF); 398 return 0; 399 } 400 401 /* 402 * Make sure that the given range of the data fork of the realtime file is 403 * mapped to written blocks. The caller must ensure that the inode is joined 404 * to the transaction. 405 */ 406 STATIC int 407 xrep_rtbitmap_data_mappings( 408 struct xfs_scrub *sc, 409 xfs_filblks_t len) 410 { 411 struct xfs_bmbt_irec map; 412 xfs_fileoff_t off = 0; 413 int error; 414 415 ASSERT(sc->ip != NULL); 416 417 while (off < len) { 418 int nmaps = 1; 419 420 /* 421 * If we have a real extent mapping this block then we're 422 * in ok shape. 423 */ 424 error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps, 425 XFS_DATA_FORK); 426 if (error) 427 return error; 428 if (nmaps == 0) { 429 ASSERT(nmaps != 0); 430 return -EFSCORRUPTED; 431 } 432 433 /* 434 * Written extents are ok. Holes are not filled because we 435 * do not know the freespace information. 436 */ 437 if (xfs_bmap_is_written_extent(&map) || 438 map.br_startblock == HOLESTARTBLOCK) { 439 off = map.br_startoff + map.br_blockcount; 440 continue; 441 } 442 443 /* 444 * If we find a delalloc reservation then something is very 445 * very wrong. Bail out. 446 */ 447 if (map.br_startblock == DELAYSTARTBLOCK) 448 return -EFSCORRUPTED; 449 450 /* Make sure we're really converting an unwritten extent. */ 451 if (map.br_state != XFS_EXT_UNWRITTEN) { 452 ASSERT(map.br_state == XFS_EXT_UNWRITTEN); 453 return -EFSCORRUPTED; 454 } 455 456 /* Make sure this block has a real zeroed extent mapped. */ 457 nmaps = 1; 458 error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff, 459 map.br_blockcount, 460 XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 461 0, &map, &nmaps); 462 if (error) 463 return error; 464 465 /* Commit new extent and all deferred work. */ 466 error = xrep_defer_finish(sc); 467 if (error) 468 return error; 469 470 off = map.br_startoff + map.br_blockcount; 471 } 472 473 return 0; 474 } 475 476 /* Fix broken rt volume geometry. */ 477 STATIC int 478 xrep_rtbitmap_geometry( 479 struct xfs_scrub *sc, 480 struct xchk_rtbitmap *rtb) 481 { 482 struct xfs_mount *mp = sc->mp; 483 struct xfs_trans *tp = sc->tp; 484 485 /* Superblock fields */ 486 if (mp->m_sb.sb_rextents != rtb->rextents) 487 xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS, 488 rtb->rextents - mp->m_sb.sb_rextents); 489 490 if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks) 491 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS, 492 rtb->rbmblocks - mp->m_sb.sb_rbmblocks); 493 494 if (mp->m_sb.sb_rextslog != rtb->rextslog) 495 xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG, 496 rtb->rextslog - mp->m_sb.sb_rextslog); 497 498 /* Fix broken isize */ 499 sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size, 500 mp->m_sb.sb_blocksize); 501 502 if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks)) 503 sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks); 504 505 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); 506 return xrep_roll_trans(sc); 507 } 508 509 /* Repair the realtime bitmap file metadata. */ 510 int 511 xrep_rtbitmap( 512 struct xfs_scrub *sc) 513 { 514 struct xchk_rtbitmap *rtb = sc->buf; 515 struct xfs_mount *mp = sc->mp; 516 struct xfs_group *xg = rtg_group(sc->sr.rtg); 517 unsigned long long blocks = 0; 518 unsigned int busy_gen; 519 int error; 520 521 /* We require the realtime rmapbt to rebuild anything. */ 522 if (!xfs_has_rtrmapbt(sc->mp)) 523 return -EOPNOTSUPP; 524 /* We require atomic file exchange range to rebuild anything. */ 525 if (!xfs_has_exchange_range(sc->mp)) 526 return -EOPNOTSUPP; 527 528 /* Impossibly large rtbitmap means we can't touch the filesystem. */ 529 if (rtb->rbmblocks > U32_MAX) 530 return 0; 531 532 /* 533 * If the size of the rt bitmap file is larger than what we reserved, 534 * figure out if we need to adjust the block reservation in the 535 * transaction. 536 */ 537 blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks); 538 if (blocks > UINT_MAX) 539 return -EOPNOTSUPP; 540 if (blocks > rtb->resblks) { 541 error = xfs_trans_reserve_more(sc->tp, blocks, 0); 542 if (error) 543 return error; 544 545 rtb->resblks += blocks; 546 } 547 548 /* Fix inode core and forks. */ 549 error = xrep_metadata_inode_forks(sc); 550 if (error) 551 return error; 552 553 xfs_trans_ijoin(sc->tp, sc->ip, 0); 554 555 /* Ensure no unwritten extents. */ 556 error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks); 557 if (error) 558 return error; 559 560 /* 561 * Fix inconsistent bitmap geometry. This function returns with a 562 * clean scrub transaction. 563 */ 564 error = xrep_rtbitmap_geometry(sc, rtb); 565 if (error) 566 return error; 567 568 /* 569 * Make sure the busy extent list is clear because we can't put extents 570 * on there twice. 571 */ 572 if (!xfs_extent_busy_list_empty(xg, &busy_gen)) { 573 error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0); 574 if (error) 575 return error; 576 } 577 578 /* 579 * Generate the new rtbitmap data. We don't need the rtbmp information 580 * once this call is finished. 581 */ 582 error = xrep_rtbitmap_find_freespace(rtb); 583 if (error) 584 return error; 585 586 /* 587 * Try to take ILOCK_EXCL of the temporary file. We had better be the 588 * only ones holding onto this inode, but we can't block while holding 589 * the rtbitmap file's ILOCK_EXCL. 590 */ 591 while (!xrep_tempfile_ilock_nowait(sc)) { 592 if (xchk_should_terminate(sc, &error)) 593 return error; 594 delay(1); 595 } 596 597 /* 598 * Make sure we have space allocated for the part of the bitmap 599 * file that corresponds to this group. We already joined sc->ip. 600 */ 601 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 602 error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks); 603 if (error) 604 return error; 605 606 /* Last chance to abort before we start committing fixes. */ 607 if (xchk_should_terminate(sc, &error)) 608 return error; 609 610 /* Copy the bitmap file that we generated. */ 611 error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks, 612 xrep_rtbitmap_prep_buf, rtb); 613 if (error) 614 return error; 615 error = xrep_tempfile_set_isize(sc, 616 XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks)); 617 if (error) 618 return error; 619 620 /* 621 * Now exchange the data fork contents. We're done with the temporary 622 * buffer, so we can reuse it for the tempfile exchmaps information. 623 */ 624 error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0, 625 rtb->rbmblocks, &rtb->tempexch); 626 if (error) 627 return error; 628 629 error = xrep_tempexch_contents(sc, &rtb->tempexch); 630 if (error) 631 return error; 632 633 /* Free the old rtbitmap blocks if they're not in use. */ 634 return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK); 635 } 636