1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_bit.h" 14 #include "xfs_log_format.h" 15 #include "xfs_trans.h" 16 #include "xfs_inode.h" 17 #include "xfs_alloc.h" 18 #include "xfs_bmap.h" 19 #include "xfs_bmap_btree.h" 20 #include "xfs_rmap.h" 21 #include "xfs_rmap_btree.h" 22 #include "xfs_rtgroup.h" 23 #include "xfs_health.h" 24 #include "xfs_rtalloc.h" 25 #include "xfs_rtrmap_btree.h" 26 #include "scrub/scrub.h" 27 #include "scrub/common.h" 28 #include "scrub/btree.h" 29 #include "scrub/health.h" 30 #include "xfs_ag.h" 31 32 /* Set us up with an inode's bmap. */ 33 int 34 xchk_setup_inode_bmap( 35 struct xfs_scrub *sc) 36 { 37 int error; 38 39 if (xchk_need_intent_drain(sc)) 40 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); 41 42 error = xchk_iget_for_scrubbing(sc); 43 if (error) 44 goto out; 45 46 xchk_ilock(sc, XFS_IOLOCK_EXCL); 47 48 /* 49 * We don't want any ephemeral data/cow fork updates sitting around 50 * while we inspect block mappings, so wait for directio to finish 51 * and flush dirty data if we have delalloc reservations. 52 */ 53 if (S_ISREG(VFS_I(sc->ip)->i_mode) && 54 sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) { 55 struct address_space *mapping = VFS_I(sc->ip)->i_mapping; 56 bool is_repair = xchk_could_repair(sc); 57 58 xchk_ilock(sc, XFS_MMAPLOCK_EXCL); 59 60 /* Break all our leases, we're going to mess with things. */ 61 if (is_repair) { 62 error = xfs_break_layouts(VFS_I(sc->ip), 63 &sc->ilock_flags, BREAK_WRITE); 64 if (error) 65 goto out; 66 } 67 68 inode_dio_wait(VFS_I(sc->ip)); 69 70 /* 71 * Try to flush all incore state to disk before we examine the 72 * space mappings for the data fork. Leave accumulated errors 73 * in the mapping for the writer threads to consume. 74 * 75 * On ENOSPC or EIO writeback errors, we continue into the 76 * extent mapping checks because write failures do not 77 * necessarily imply anything about the correctness of the file 78 * metadata. The metadata and the file data could be on 79 * completely separate devices; a media failure might only 80 * affect a subset of the disk, etc. We can handle delalloc 81 * extents in the scrubber, so leaving them in memory is fine. 82 */ 83 error = filemap_fdatawrite(mapping); 84 if (!error) 85 error = filemap_fdatawait_keep_errors(mapping); 86 if (error && (error != -ENOSPC && error != -EIO)) 87 goto out; 88 89 /* Drop the page cache if we're repairing block mappings. */ 90 if (is_repair) { 91 error = invalidate_inode_pages2( 92 VFS_I(sc->ip)->i_mapping); 93 if (error) 94 goto out; 95 } 96 97 } 98 99 /* Got the inode, lock it and we're ready to go. */ 100 error = xchk_trans_alloc(sc, 0); 101 if (error) 102 goto out; 103 104 error = xchk_ino_dqattach(sc); 105 if (error) 106 goto out; 107 108 xchk_ilock(sc, XFS_ILOCK_EXCL); 109 out: 110 /* scrub teardown will unlock and release the inode */ 111 return error; 112 } 113 114 /* 115 * Inode fork block mapping (BMBT) scrubber. 116 * More complex than the others because we have to scrub 117 * all the extents regardless of whether or not the fork 118 * is in btree format. 119 */ 120 121 struct xchk_bmap_info { 122 struct xfs_scrub *sc; 123 124 /* Incore extent tree cursor */ 125 struct xfs_iext_cursor icur; 126 127 /* Previous fork mapping that we examined */ 128 struct xfs_bmbt_irec prev_rec; 129 130 /* Is this a realtime fork? */ 131 bool is_rt; 132 133 /* May mappings point to shared space? */ 134 bool is_shared; 135 136 /* Was the incore extent tree loaded? */ 137 bool was_loaded; 138 139 /* Which inode fork are we checking? */ 140 int whichfork; 141 }; 142 143 /* Look for a corresponding rmap for this irec. */ 144 static inline bool 145 xchk_bmap_get_rmap( 146 struct xchk_bmap_info *info, 147 struct xfs_bmbt_irec *irec, 148 xfs_agblock_t bno, 149 uint64_t owner, 150 struct xfs_rmap_irec *rmap) 151 { 152 struct xfs_btree_cur **curp = &info->sc->sa.rmap_cur; 153 xfs_fileoff_t offset; 154 unsigned int rflags = 0; 155 int has_rmap; 156 int error; 157 158 if (xfs_ifork_is_realtime(info->sc->ip, info->whichfork)) 159 curp = &info->sc->sr.rmap_cur; 160 161 if (*curp == NULL) 162 return false; 163 164 if (info->whichfork == XFS_ATTR_FORK) 165 rflags |= XFS_RMAP_ATTR_FORK; 166 if (irec->br_state == XFS_EXT_UNWRITTEN) 167 rflags |= XFS_RMAP_UNWRITTEN; 168 169 /* 170 * CoW staging extents are owned (on disk) by the refcountbt, so 171 * their rmaps do not have offsets. 172 */ 173 if (info->whichfork == XFS_COW_FORK) 174 offset = 0; 175 else 176 offset = irec->br_startoff; 177 178 /* 179 * If the caller thinks this could be a shared bmbt extent (IOWs, 180 * any data fork extent of a reflink inode) then we have to use the 181 * range rmap lookup to make sure we get the correct owner/offset. 182 */ 183 if (info->is_shared) { 184 error = xfs_rmap_lookup_le_range(*curp, bno, owner, offset, 185 rflags, rmap, &has_rmap); 186 } else { 187 error = xfs_rmap_lookup_le(*curp, bno, owner, offset, 188 rflags, rmap, &has_rmap); 189 } 190 if (!xchk_should_check_xref(info->sc, &error, curp)) 191 return false; 192 193 if (!has_rmap) 194 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 195 irec->br_startoff); 196 return has_rmap; 197 } 198 199 /* Make sure that we have rmapbt records for this data/attr fork extent. */ 200 STATIC void 201 xchk_bmap_xref_rmap( 202 struct xchk_bmap_info *info, 203 struct xfs_bmbt_irec *irec, 204 xfs_agblock_t bno) 205 { 206 struct xfs_rmap_irec rmap; 207 unsigned long long rmap_end; 208 uint64_t owner = info->sc->ip->i_ino; 209 210 if (xchk_skip_xref(info->sc->sm)) 211 return; 212 213 /* Find the rmap record for this irec. */ 214 if (!xchk_bmap_get_rmap(info, irec, bno, owner, &rmap)) 215 return; 216 217 /* 218 * The rmap must be an exact match for this incore file mapping record, 219 * which may have arisen from multiple ondisk records. 220 */ 221 if (rmap.rm_startblock != bno) 222 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 223 irec->br_startoff); 224 225 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 226 if (rmap_end != bno + irec->br_blockcount) 227 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 228 irec->br_startoff); 229 230 /* Check the logical offsets. */ 231 if (rmap.rm_offset != irec->br_startoff) 232 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 233 irec->br_startoff); 234 235 rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount; 236 if (rmap_end != irec->br_startoff + irec->br_blockcount) 237 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 238 irec->br_startoff); 239 240 /* Check the owner */ 241 if (rmap.rm_owner != owner) 242 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 243 irec->br_startoff); 244 245 /* 246 * Check for discrepancies between the unwritten flag in the irec and 247 * the rmap. Note that the (in-memory) CoW fork distinguishes between 248 * unwritten and written extents, but we don't track that in the rmap 249 * records because the blocks are owned (on-disk) by the refcountbt, 250 * which doesn't track unwritten state. 251 */ 252 if (!!(irec->br_state == XFS_EXT_UNWRITTEN) != 253 !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) 254 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 255 irec->br_startoff); 256 257 if (!!(info->whichfork == XFS_ATTR_FORK) != 258 !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) 259 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 260 irec->br_startoff); 261 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 262 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 263 irec->br_startoff); 264 } 265 266 /* Make sure that we have rmapbt records for this COW fork extent. */ 267 STATIC void 268 xchk_bmap_xref_rmap_cow( 269 struct xchk_bmap_info *info, 270 struct xfs_bmbt_irec *irec, 271 xfs_agblock_t bno) 272 { 273 struct xfs_rmap_irec rmap; 274 unsigned long long rmap_end; 275 uint64_t owner = XFS_RMAP_OWN_COW; 276 277 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) 278 return; 279 280 /* Find the rmap record for this irec. */ 281 if (!xchk_bmap_get_rmap(info, irec, bno, owner, &rmap)) 282 return; 283 284 /* 285 * CoW staging extents are owned by the refcount btree, so the rmap 286 * can start before and end after the physical space allocated to this 287 * mapping. There are no offsets to check. 288 */ 289 if (rmap.rm_startblock > bno) 290 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 291 irec->br_startoff); 292 293 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 294 if (rmap_end < bno + irec->br_blockcount) 295 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 296 irec->br_startoff); 297 298 /* Check the owner */ 299 if (rmap.rm_owner != owner) 300 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 301 irec->br_startoff); 302 303 /* 304 * No flags allowed. Note that the (in-memory) CoW fork distinguishes 305 * between unwritten and written extents, but we don't track that in 306 * the rmap records because the blocks are owned (on-disk) by the 307 * refcountbt, which doesn't track unwritten state. 308 */ 309 if (rmap.rm_flags & XFS_RMAP_ATTR_FORK) 310 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 311 irec->br_startoff); 312 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 313 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 314 irec->br_startoff); 315 if (rmap.rm_flags & XFS_RMAP_UNWRITTEN) 316 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 317 irec->br_startoff); 318 } 319 320 /* Cross-reference a single rtdev extent record. */ 321 STATIC void 322 xchk_bmap_rt_iextent_xref( 323 struct xfs_inode *ip, 324 struct xchk_bmap_info *info, 325 struct xfs_bmbt_irec *irec) 326 { 327 struct xfs_owner_info oinfo; 328 xfs_rgblock_t rgbno; 329 int error; 330 331 error = xchk_rtgroup_init_existing(info->sc, 332 xfs_rtb_to_rgno(ip->i_mount, irec->br_startblock), 333 &info->sc->sr); 334 if (!xchk_fblock_process_error(info->sc, info->whichfork, 335 irec->br_startoff, &error)) 336 return; 337 338 error = xchk_rtgroup_lock(info->sc, &info->sc->sr, XCHK_RTGLOCK_ALL); 339 if (!xchk_fblock_process_error(info->sc, info->whichfork, 340 irec->br_startoff, &error)) 341 goto out_free; 342 343 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, 344 irec->br_blockcount); 345 346 if (!xfs_has_rtrmapbt(info->sc->mp)) 347 goto out_cur; 348 349 rgbno = xfs_rtb_to_rgbno(info->sc->mp, irec->br_startblock); 350 351 switch (info->whichfork) { 352 case XFS_DATA_FORK: 353 xchk_bmap_xref_rmap(info, irec, rgbno); 354 if (!xfs_is_reflink_inode(info->sc->ip)) { 355 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 356 info->whichfork, irec->br_startoff); 357 xchk_xref_is_only_rt_owned_by(info->sc, rgbno, 358 irec->br_blockcount, &oinfo); 359 xchk_xref_is_not_rt_shared(info->sc, rgbno, 360 irec->br_blockcount); 361 } 362 xchk_xref_is_not_rt_cow_staging(info->sc, rgbno, 363 irec->br_blockcount); 364 break; 365 case XFS_COW_FORK: 366 xchk_bmap_xref_rmap_cow(info, irec, rgbno); 367 xchk_xref_is_only_rt_owned_by(info->sc, rgbno, 368 irec->br_blockcount, &XFS_RMAP_OINFO_COW); 369 xchk_xref_is_rt_cow_staging(info->sc, rgbno, 370 irec->br_blockcount); 371 xchk_xref_is_not_rt_shared(info->sc, rgbno, 372 irec->br_blockcount); 373 break; 374 } 375 out_cur: 376 xchk_rtgroup_btcur_free(&info->sc->sr); 377 out_free: 378 xchk_rtgroup_free(info->sc, &info->sc->sr); 379 } 380 381 /* Cross-reference a single datadev extent record. */ 382 STATIC void 383 xchk_bmap_iextent_xref( 384 struct xfs_inode *ip, 385 struct xchk_bmap_info *info, 386 struct xfs_bmbt_irec *irec) 387 { 388 struct xfs_owner_info oinfo; 389 struct xfs_mount *mp = info->sc->mp; 390 xfs_agnumber_t agno; 391 xfs_agblock_t agbno; 392 xfs_extlen_t len; 393 int error; 394 395 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); 396 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); 397 len = irec->br_blockcount; 398 399 error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa); 400 if (!xchk_fblock_process_error(info->sc, info->whichfork, 401 irec->br_startoff, &error)) 402 goto out_free; 403 404 xchk_xref_is_used_space(info->sc, agbno, len); 405 xchk_xref_is_not_inode_chunk(info->sc, agbno, len); 406 switch (info->whichfork) { 407 case XFS_DATA_FORK: 408 xchk_bmap_xref_rmap(info, irec, agbno); 409 if (!xfs_is_reflink_inode(info->sc->ip)) { 410 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 411 info->whichfork, irec->br_startoff); 412 xchk_xref_is_only_owned_by(info->sc, agbno, 413 irec->br_blockcount, &oinfo); 414 xchk_xref_is_not_shared(info->sc, agbno, 415 irec->br_blockcount); 416 } 417 xchk_xref_is_not_cow_staging(info->sc, agbno, 418 irec->br_blockcount); 419 break; 420 case XFS_ATTR_FORK: 421 xchk_bmap_xref_rmap(info, irec, agbno); 422 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 423 info->whichfork, irec->br_startoff); 424 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 425 &oinfo); 426 xchk_xref_is_not_shared(info->sc, agbno, 427 irec->br_blockcount); 428 xchk_xref_is_not_cow_staging(info->sc, agbno, 429 irec->br_blockcount); 430 break; 431 case XFS_COW_FORK: 432 xchk_bmap_xref_rmap_cow(info, irec, agbno); 433 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 434 &XFS_RMAP_OINFO_COW); 435 xchk_xref_is_cow_staging(info->sc, agbno, 436 irec->br_blockcount); 437 xchk_xref_is_not_shared(info->sc, agbno, 438 irec->br_blockcount); 439 break; 440 } 441 442 out_free: 443 xchk_ag_free(info->sc, &info->sc->sa); 444 } 445 446 /* 447 * Directories and attr forks should never have blocks that can't be addressed 448 * by a xfs_dablk_t. 449 */ 450 STATIC void 451 xchk_bmap_dirattr_extent( 452 struct xfs_inode *ip, 453 struct xchk_bmap_info *info, 454 struct xfs_bmbt_irec *irec) 455 { 456 struct xfs_mount *mp = ip->i_mount; 457 xfs_fileoff_t off; 458 459 if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK) 460 return; 461 462 if (!xfs_verify_dablk(mp, irec->br_startoff)) 463 xchk_fblock_set_corrupt(info->sc, info->whichfork, 464 irec->br_startoff); 465 466 off = irec->br_startoff + irec->br_blockcount - 1; 467 if (!xfs_verify_dablk(mp, off)) 468 xchk_fblock_set_corrupt(info->sc, info->whichfork, off); 469 } 470 471 /* Scrub a single extent record. */ 472 STATIC void 473 xchk_bmap_iextent( 474 struct xfs_inode *ip, 475 struct xchk_bmap_info *info, 476 struct xfs_bmbt_irec *irec) 477 { 478 struct xfs_mount *mp = info->sc->mp; 479 480 /* 481 * Check for out-of-order extents. This record could have come 482 * from the incore list, for which there is no ordering check. 483 */ 484 if (irec->br_startoff < info->prev_rec.br_startoff + 485 info->prev_rec.br_blockcount) 486 xchk_fblock_set_corrupt(info->sc, info->whichfork, 487 irec->br_startoff); 488 489 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 490 xchk_fblock_set_corrupt(info->sc, info->whichfork, 491 irec->br_startoff); 492 493 xchk_bmap_dirattr_extent(ip, info, irec); 494 495 /* Make sure the extent points to a valid place. */ 496 if (info->is_rt && 497 !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount)) 498 xchk_fblock_set_corrupt(info->sc, info->whichfork, 499 irec->br_startoff); 500 if (!info->is_rt && 501 !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount)) 502 xchk_fblock_set_corrupt(info->sc, info->whichfork, 503 irec->br_startoff); 504 505 /* We don't allow unwritten extents on attr forks. */ 506 if (irec->br_state == XFS_EXT_UNWRITTEN && 507 info->whichfork == XFS_ATTR_FORK) 508 xchk_fblock_set_corrupt(info->sc, info->whichfork, 509 irec->br_startoff); 510 511 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 512 return; 513 514 if (info->is_rt) 515 xchk_bmap_rt_iextent_xref(ip, info, irec); 516 else 517 xchk_bmap_iextent_xref(ip, info, irec); 518 } 519 520 /* Scrub a bmbt record. */ 521 STATIC int 522 xchk_bmapbt_rec( 523 struct xchk_btree *bs, 524 const union xfs_btree_rec *rec) 525 { 526 struct xfs_bmbt_irec irec; 527 struct xfs_bmbt_irec iext_irec; 528 struct xfs_iext_cursor icur; 529 struct xchk_bmap_info *info = bs->private; 530 struct xfs_inode *ip = bs->cur->bc_ino.ip; 531 struct xfs_buf *bp = NULL; 532 struct xfs_btree_block *block; 533 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork); 534 uint64_t owner; 535 int i; 536 537 /* 538 * Check the owners of the btree blocks up to the level below 539 * the root since the verifiers don't do that. 540 */ 541 if (xfs_has_crc(bs->cur->bc_mp) && 542 bs->cur->bc_levels[0].ptr == 1) { 543 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { 544 block = xfs_btree_get_block(bs->cur, i, &bp); 545 owner = be64_to_cpu(block->bb_u.l.bb_owner); 546 if (owner != ip->i_ino) 547 xchk_fblock_set_corrupt(bs->sc, 548 info->whichfork, 0); 549 } 550 } 551 552 /* 553 * Check that the incore extent tree contains an extent that matches 554 * this one exactly. We validate those cached bmaps later, so we don't 555 * need to check them here. If the incore extent tree was just loaded 556 * from disk by the scrubber, we assume that its contents match what's 557 * on disk (we still hold the ILOCK) and skip the equivalence check. 558 */ 559 if (!info->was_loaded) 560 return 0; 561 562 xfs_bmbt_disk_get_all(&rec->bmbt, &irec); 563 if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) { 564 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 565 irec.br_startoff); 566 return 0; 567 } 568 569 if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur, 570 &iext_irec) || 571 irec.br_startoff != iext_irec.br_startoff || 572 irec.br_startblock != iext_irec.br_startblock || 573 irec.br_blockcount != iext_irec.br_blockcount || 574 irec.br_state != iext_irec.br_state) 575 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 576 irec.br_startoff); 577 return 0; 578 } 579 580 /* Scan the btree records. */ 581 STATIC int 582 xchk_bmap_btree( 583 struct xfs_scrub *sc, 584 int whichfork, 585 struct xchk_bmap_info *info) 586 { 587 struct xfs_owner_info oinfo; 588 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork); 589 struct xfs_mount *mp = sc->mp; 590 struct xfs_inode *ip = sc->ip; 591 struct xfs_btree_cur *cur; 592 int error; 593 594 /* Load the incore bmap cache if it's not loaded. */ 595 info->was_loaded = !xfs_need_iread_extents(ifp); 596 597 error = xfs_iread_extents(sc->tp, ip, whichfork); 598 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 599 goto out; 600 601 /* Check the btree structure. */ 602 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); 603 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 604 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info); 605 xfs_btree_del_cursor(cur, error); 606 out: 607 return error; 608 } 609 610 struct xchk_bmap_check_rmap_info { 611 struct xfs_scrub *sc; 612 int whichfork; 613 struct xfs_iext_cursor icur; 614 }; 615 616 /* Can we find bmaps that fit this rmap? */ 617 STATIC int 618 xchk_bmap_check_rmap( 619 struct xfs_btree_cur *cur, 620 const struct xfs_rmap_irec *rec, 621 void *priv) 622 { 623 struct xfs_bmbt_irec irec; 624 struct xfs_rmap_irec check_rec; 625 struct xchk_bmap_check_rmap_info *sbcri = priv; 626 struct xfs_ifork *ifp; 627 struct xfs_scrub *sc = sbcri->sc; 628 bool have_map; 629 630 /* Is this even the right fork? */ 631 if (rec->rm_owner != sc->ip->i_ino) 632 return 0; 633 if ((sbcri->whichfork == XFS_ATTR_FORK) ^ 634 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK)) 635 return 0; 636 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) 637 return 0; 638 639 /* Now look up the bmbt record. */ 640 ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork); 641 if (!ifp) { 642 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 643 rec->rm_offset); 644 goto out; 645 } 646 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset, 647 &sbcri->icur, &irec); 648 if (!have_map) 649 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 650 rec->rm_offset); 651 /* 652 * bmap extent record lengths are constrained to 2^21 blocks in length 653 * because of space constraints in the on-disk metadata structure. 654 * However, rmap extent record lengths are constrained only by AG 655 * length, so we have to loop through the bmbt to make sure that the 656 * entire rmap is covered by bmbt records. 657 */ 658 check_rec = *rec; 659 while (have_map) { 660 if (irec.br_startoff != check_rec.rm_offset) 661 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 662 check_rec.rm_offset); 663 if (irec.br_startblock != 664 xfs_gbno_to_fsb(cur->bc_group, check_rec.rm_startblock)) 665 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 666 check_rec.rm_offset); 667 if (irec.br_blockcount > check_rec.rm_blockcount) 668 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 669 check_rec.rm_offset); 670 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 671 break; 672 check_rec.rm_startblock += irec.br_blockcount; 673 check_rec.rm_offset += irec.br_blockcount; 674 check_rec.rm_blockcount -= irec.br_blockcount; 675 if (check_rec.rm_blockcount == 0) 676 break; 677 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); 678 if (!have_map) 679 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 680 check_rec.rm_offset); 681 } 682 683 out: 684 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 685 return -ECANCELED; 686 return 0; 687 } 688 689 /* Make sure each rmap has a corresponding bmbt entry. */ 690 STATIC int 691 xchk_bmap_check_ag_rmaps( 692 struct xfs_scrub *sc, 693 int whichfork, 694 struct xfs_perag *pag) 695 { 696 struct xchk_bmap_check_rmap_info sbcri; 697 struct xfs_btree_cur *cur; 698 struct xfs_buf *agf; 699 int error; 700 701 error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf); 702 if (error) 703 return error; 704 705 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag); 706 707 sbcri.sc = sc; 708 sbcri.whichfork = whichfork; 709 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); 710 if (error == -ECANCELED) 711 error = 0; 712 713 xfs_btree_del_cursor(cur, error); 714 xfs_trans_brelse(sc->tp, agf); 715 return error; 716 } 717 718 /* Make sure each rt rmap has a corresponding bmbt entry. */ 719 STATIC int 720 xchk_bmap_check_rt_rmaps( 721 struct xfs_scrub *sc, 722 struct xfs_rtgroup *rtg) 723 { 724 struct xchk_bmap_check_rmap_info sbcri; 725 struct xfs_btree_cur *cur; 726 int error; 727 728 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); 729 cur = xfs_rtrmapbt_init_cursor(sc->tp, rtg); 730 731 sbcri.sc = sc; 732 sbcri.whichfork = XFS_DATA_FORK; 733 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); 734 if (error == -ECANCELED) 735 error = 0; 736 737 xfs_btree_del_cursor(cur, error); 738 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); 739 return error; 740 } 741 742 /* 743 * Decide if we want to scan the reverse mappings to determine if the attr 744 * fork /really/ has zero space mappings. 745 */ 746 STATIC bool 747 xchk_bmap_check_empty_attrfork( 748 struct xfs_inode *ip) 749 { 750 struct xfs_ifork *ifp = &ip->i_af; 751 752 /* 753 * If the dinode repair found a bad attr fork, it will reset the fork 754 * to extents format with zero records and wait for the this scrubber 755 * to reconstruct the block mappings. If the fork is not in this 756 * state, then the fork cannot have been zapped. 757 */ 758 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) 759 return false; 760 761 /* 762 * Files can have an attr fork in EXTENTS format with zero records for 763 * several reasons: 764 * 765 * a) an attr set created a fork but ran out of space 766 * b) attr replace deleted an old attr but failed during the set step 767 * c) the data fork was in btree format when all attrs were deleted, so 768 * the fork was left in place 769 * d) the inode repair code zapped the fork 770 * 771 * Only in case (d) do we want to scan the rmapbt to see if we need to 772 * rebuild the attr fork. The fork zap code clears all DAC permission 773 * bits and zeroes the uid and gid, so avoid the scan if any of those 774 * three conditions are not met. 775 */ 776 if ((VFS_I(ip)->i_mode & 0777) != 0) 777 return false; 778 if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID)) 779 return false; 780 if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID)) 781 return false; 782 783 return true; 784 } 785 786 /* 787 * Decide if we want to scan the reverse mappings to determine if the data 788 * fork /really/ has zero space mappings. 789 */ 790 STATIC bool 791 xchk_bmap_check_empty_datafork( 792 struct xfs_inode *ip) 793 { 794 struct xfs_ifork *ifp = &ip->i_df; 795 796 /* 797 * If the dinode repair found a bad data fork, it will reset the fork 798 * to extents format with zero records and wait for the this scrubber 799 * to reconstruct the block mappings. If the fork is not in this 800 * state, then the fork cannot have been zapped. 801 */ 802 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) 803 return false; 804 805 /* 806 * If we encounter an empty data fork along with evidence that the fork 807 * might not really be empty, we need to scan the reverse mappings to 808 * decide if we're going to rebuild the fork. Data forks with nonzero 809 * file size are scanned. 810 */ 811 return i_size_read(VFS_I(ip)) != 0; 812 } 813 814 /* 815 * Decide if we want to walk every rmap btree in the fs to make sure that each 816 * rmap for this file fork has corresponding bmbt entries. 817 */ 818 static bool 819 xchk_bmap_want_check_rmaps( 820 struct xchk_bmap_info *info) 821 { 822 struct xfs_scrub *sc = info->sc; 823 824 if (!xfs_has_rmapbt(sc->mp)) 825 return false; 826 if (info->whichfork == XFS_COW_FORK) 827 return false; 828 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 829 return false; 830 831 if (info->whichfork == XFS_ATTR_FORK) 832 return xchk_bmap_check_empty_attrfork(sc->ip); 833 834 return xchk_bmap_check_empty_datafork(sc->ip); 835 } 836 837 /* Make sure each rmap has a corresponding bmbt entry. */ 838 STATIC int 839 xchk_bmap_check_rmaps( 840 struct xfs_scrub *sc, 841 int whichfork) 842 { 843 struct xfs_perag *pag = NULL; 844 int error; 845 846 if (xfs_ifork_is_realtime(sc->ip, whichfork)) { 847 struct xfs_rtgroup *rtg = NULL; 848 849 while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) { 850 error = xchk_bmap_check_rt_rmaps(sc, rtg); 851 if (error || 852 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { 853 xfs_rtgroup_rele(rtg); 854 return error; 855 } 856 } 857 858 return 0; 859 } 860 861 while ((pag = xfs_perag_next(sc->mp, pag))) { 862 error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); 863 if (error || 864 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { 865 xfs_perag_rele(pag); 866 return error; 867 } 868 } 869 870 return 0; 871 } 872 873 /* Scrub a delalloc reservation from the incore extent map tree. */ 874 STATIC void 875 xchk_bmap_iextent_delalloc( 876 struct xfs_inode *ip, 877 struct xchk_bmap_info *info, 878 struct xfs_bmbt_irec *irec) 879 { 880 struct xfs_mount *mp = info->sc->mp; 881 882 /* 883 * Check for out-of-order extents. This record could have come 884 * from the incore list, for which there is no ordering check. 885 */ 886 if (irec->br_startoff < info->prev_rec.br_startoff + 887 info->prev_rec.br_blockcount) 888 xchk_fblock_set_corrupt(info->sc, info->whichfork, 889 irec->br_startoff); 890 891 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 892 xchk_fblock_set_corrupt(info->sc, info->whichfork, 893 irec->br_startoff); 894 895 /* Make sure the extent points to a valid place. */ 896 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 897 xchk_fblock_set_corrupt(info->sc, info->whichfork, 898 irec->br_startoff); 899 } 900 901 /* Decide if this individual fork mapping is ok. */ 902 static bool 903 xchk_bmap_iext_mapping( 904 struct xchk_bmap_info *info, 905 const struct xfs_bmbt_irec *irec) 906 { 907 /* There should never be a "hole" extent in either extent list. */ 908 if (irec->br_startblock == HOLESTARTBLOCK) 909 return false; 910 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 911 return false; 912 return true; 913 } 914 915 /* Are these two mappings contiguous with each other? */ 916 static inline bool 917 xchk_are_bmaps_contiguous( 918 const struct xchk_bmap_info *info, 919 const struct xfs_bmbt_irec *b1, 920 const struct xfs_bmbt_irec *b2) 921 { 922 struct xfs_mount *mp = info->sc->mp; 923 924 /* Don't try to combine unallocated mappings. */ 925 if (!xfs_bmap_is_real_extent(b1)) 926 return false; 927 if (!xfs_bmap_is_real_extent(b2)) 928 return false; 929 930 /* Does b2 come right after b1 in the logical and physical range? */ 931 if (b1->br_startoff + b1->br_blockcount != b2->br_startoff) 932 return false; 933 if (b1->br_startblock + b1->br_blockcount != b2->br_startblock) 934 return false; 935 if (b1->br_state != b2->br_state) 936 return false; 937 938 /* 939 * Don't combine bmaps that would cross rtgroup boundaries. This is a 940 * valid state, but if combined they will fail rtb extent checks. 941 */ 942 if (info->is_rt && xfs_has_rtgroups(mp)) { 943 if (xfs_rtb_to_rgno(mp, b1->br_startblock) != 944 xfs_rtb_to_rgno(mp, b2->br_startblock)) 945 return false; 946 } 947 948 return true; 949 } 950 951 /* 952 * Walk the incore extent records, accumulating consecutive contiguous records 953 * into a single incore mapping. Returns true if @irec has been set to a 954 * mapping or false if there are no more mappings. Caller must ensure that 955 * @info.icur is zeroed before the first call. 956 */ 957 static bool 958 xchk_bmap_iext_iter( 959 struct xchk_bmap_info *info, 960 struct xfs_bmbt_irec *irec) 961 { 962 struct xfs_bmbt_irec got; 963 struct xfs_ifork *ifp; 964 unsigned int nr = 0; 965 966 ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); 967 968 /* Advance to the next iextent record and check the mapping. */ 969 xfs_iext_next(ifp, &info->icur); 970 if (!xfs_iext_get_extent(ifp, &info->icur, irec)) 971 return false; 972 973 if (!xchk_bmap_iext_mapping(info, irec)) { 974 xchk_fblock_set_corrupt(info->sc, info->whichfork, 975 irec->br_startoff); 976 return false; 977 } 978 nr++; 979 980 /* 981 * Iterate subsequent iextent records and merge them with the one 982 * that we just read, if possible. 983 */ 984 while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) { 985 if (!xchk_are_bmaps_contiguous(info, irec, &got)) 986 break; 987 988 if (!xchk_bmap_iext_mapping(info, &got)) { 989 xchk_fblock_set_corrupt(info->sc, info->whichfork, 990 got.br_startoff); 991 return false; 992 } 993 nr++; 994 995 irec->br_blockcount += got.br_blockcount; 996 xfs_iext_next(ifp, &info->icur); 997 } 998 999 /* 1000 * If the merged mapping could be expressed with fewer bmbt records 1001 * than we actually found, notify the user that this fork could be 1002 * optimized. CoW forks only exist in memory so we ignore them. 1003 */ 1004 if (nr > 1 && info->whichfork != XFS_COW_FORK && 1005 howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr) 1006 xchk_ino_set_preen(info->sc, info->sc->ip->i_ino); 1007 1008 return true; 1009 } 1010 1011 /* 1012 * Scrub an inode fork's block mappings. 1013 * 1014 * First we scan every record in every btree block, if applicable. 1015 * Then we unconditionally scan the incore extent cache. 1016 */ 1017 STATIC int 1018 xchk_bmap( 1019 struct xfs_scrub *sc, 1020 int whichfork) 1021 { 1022 struct xfs_bmbt_irec irec; 1023 struct xchk_bmap_info info = { NULL }; 1024 struct xfs_mount *mp = sc->mp; 1025 struct xfs_inode *ip = sc->ip; 1026 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1027 xfs_fileoff_t endoff; 1028 int error = 0; 1029 1030 /* Non-existent forks can be ignored. */ 1031 if (!ifp) 1032 return -ENOENT; 1033 1034 info.is_rt = xfs_ifork_is_realtime(ip, whichfork); 1035 info.whichfork = whichfork; 1036 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip); 1037 info.sc = sc; 1038 1039 switch (whichfork) { 1040 case XFS_COW_FORK: 1041 /* No CoW forks filesystem doesn't support out of place writes */ 1042 if (!xfs_has_reflink(mp) && !xfs_has_zoned(mp)) { 1043 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1044 return 0; 1045 } 1046 break; 1047 case XFS_ATTR_FORK: 1048 /* 1049 * "attr" means that an attr fork was created at some point in 1050 * the life of this filesystem. "attr2" means that inodes have 1051 * variable-sized data/attr fork areas. Hence we only check 1052 * attr here. 1053 */ 1054 if (!xfs_has_attr(mp)) 1055 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1056 break; 1057 default: 1058 ASSERT(whichfork == XFS_DATA_FORK); 1059 break; 1060 } 1061 1062 /* Check the fork values */ 1063 switch (ifp->if_format) { 1064 case XFS_DINODE_FMT_UUID: 1065 case XFS_DINODE_FMT_DEV: 1066 case XFS_DINODE_FMT_LOCAL: 1067 case XFS_DINODE_FMT_META_BTREE: 1068 /* No mappings to check. */ 1069 if (whichfork == XFS_COW_FORK) 1070 xchk_fblock_set_corrupt(sc, whichfork, 0); 1071 return 0; 1072 case XFS_DINODE_FMT_EXTENTS: 1073 break; 1074 case XFS_DINODE_FMT_BTREE: 1075 if (whichfork == XFS_COW_FORK) { 1076 xchk_fblock_set_corrupt(sc, whichfork, 0); 1077 return 0; 1078 } 1079 1080 error = xchk_bmap_btree(sc, whichfork, &info); 1081 if (error) 1082 return error; 1083 break; 1084 default: 1085 xchk_fblock_set_corrupt(sc, whichfork, 0); 1086 return 0; 1087 } 1088 1089 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 1090 return 0; 1091 1092 /* Find the offset of the last extent in the mapping. */ 1093 error = xfs_bmap_last_offset(ip, &endoff, whichfork); 1094 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 1095 return error; 1096 1097 /* 1098 * Scrub extent records. We use a special iterator function here that 1099 * combines adjacent mappings if they are logically and physically 1100 * contiguous. For large allocations that require multiple bmbt 1101 * records, this reduces the number of cross-referencing calls, which 1102 * reduces runtime. Cross referencing with the rmap is simpler because 1103 * the rmap must match the combined mapping exactly. 1104 */ 1105 while (xchk_bmap_iext_iter(&info, &irec)) { 1106 if (xchk_should_terminate(sc, &error) || 1107 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 1108 return 0; 1109 1110 if (irec.br_startoff >= endoff) { 1111 xchk_fblock_set_corrupt(sc, whichfork, 1112 irec.br_startoff); 1113 return 0; 1114 } 1115 1116 if (isnullstartblock(irec.br_startblock)) 1117 xchk_bmap_iextent_delalloc(ip, &info, &irec); 1118 else 1119 xchk_bmap_iextent(ip, &info, &irec); 1120 memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec)); 1121 } 1122 1123 if (xchk_bmap_want_check_rmaps(&info)) { 1124 error = xchk_bmap_check_rmaps(sc, whichfork); 1125 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error)) 1126 return error; 1127 } 1128 1129 return 0; 1130 } 1131 1132 /* Scrub an inode's data fork. */ 1133 int 1134 xchk_bmap_data( 1135 struct xfs_scrub *sc) 1136 { 1137 int error; 1138 1139 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) { 1140 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1141 return 0; 1142 } 1143 1144 error = xchk_bmap(sc, XFS_DATA_FORK); 1145 if (error) 1146 return error; 1147 1148 /* If the data fork is clean, it is clearly not zapped. */ 1149 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED); 1150 return 0; 1151 } 1152 1153 /* Scrub an inode's attr fork. */ 1154 int 1155 xchk_bmap_attr( 1156 struct xfs_scrub *sc) 1157 { 1158 int error; 1159 1160 /* 1161 * If the attr fork has been zapped, it's possible that forkoff was 1162 * reset to zero and hence sc->ip->i_afp is NULL. We don't want the 1163 * NULL ifp check in xchk_bmap to conclude that the attr fork is ok, 1164 * so short circuit that logic by setting the corruption flag and 1165 * returning immediately. 1166 */ 1167 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) { 1168 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1169 return 0; 1170 } 1171 1172 error = xchk_bmap(sc, XFS_ATTR_FORK); 1173 if (error) 1174 return error; 1175 1176 /* If the attr fork is clean, it is clearly not zapped. */ 1177 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED); 1178 return 0; 1179 } 1180 1181 /* Scrub an inode's CoW fork. */ 1182 int 1183 xchk_bmap_cow( 1184 struct xfs_scrub *sc) 1185 { 1186 return xchk_bmap(sc, XFS_COW_FORK); 1187 } 1188