1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_log_format.h" 14 #include "xfs_trans.h" 15 #include "xfs_ag.h" 16 #include "xfs_inode.h" 17 #include "xfs_ialloc.h" 18 #include "xfs_icache.h" 19 #include "xfs_da_format.h" 20 #include "xfs_reflink.h" 21 #include "xfs_rmap.h" 22 #include "xfs_bmap_util.h" 23 #include "xfs_rtbitmap.h" 24 #include "scrub/scrub.h" 25 #include "scrub/common.h" 26 #include "scrub/btree.h" 27 #include "scrub/trace.h" 28 #include "scrub/repair.h" 29 30 /* Prepare the attached inode for scrubbing. */ 31 static inline int 32 xchk_prepare_iscrub( 33 struct xfs_scrub *sc) 34 { 35 int error; 36 37 xchk_ilock(sc, XFS_IOLOCK_EXCL); 38 39 error = xchk_trans_alloc(sc, 0); 40 if (error) 41 return error; 42 43 error = xchk_ino_dqattach(sc); 44 if (error) 45 return error; 46 47 xchk_ilock(sc, XFS_ILOCK_EXCL); 48 return 0; 49 } 50 51 /* Install this scrub-by-handle inode and prepare it for scrubbing. */ 52 static inline int 53 xchk_install_handle_iscrub( 54 struct xfs_scrub *sc, 55 struct xfs_inode *ip) 56 { 57 int error; 58 59 error = xchk_install_handle_inode(sc, ip); 60 if (error) 61 return error; 62 63 /* 64 * Don't allow scrubbing by handle of any non-directory inode records 65 * in the metadata directory tree. We don't know if any of the scans 66 * launched by this scrubber will end up indirectly trying to lock this 67 * file. 68 * 69 * Scrubbers of inode-rooted metadata files (e.g. quota files) will 70 * attach all the resources needed to scrub the inode and call 71 * xchk_inode directly. Userspace cannot call this directly. 72 */ 73 if (xfs_is_metadir_inode(ip) && !S_ISDIR(VFS_I(ip)->i_mode)) { 74 xchk_irele(sc, ip); 75 sc->ip = NULL; 76 return -ENOENT; 77 } 78 79 return xchk_prepare_iscrub(sc); 80 } 81 82 /* 83 * Grab total control of the inode metadata. In the best case, we grab the 84 * incore inode and take all locks on it. If the incore inode cannot be 85 * constructed due to corruption problems, lock the AGI so that we can single 86 * step the loading process to fix everything that can go wrong. 87 */ 88 int 89 xchk_setup_inode( 90 struct xfs_scrub *sc) 91 { 92 struct xfs_imap imap; 93 struct xfs_inode *ip; 94 struct xfs_mount *mp = sc->mp; 95 struct xfs_inode *ip_in = XFS_I(file_inode(sc->file)); 96 struct xfs_buf *agi_bp; 97 struct xfs_perag *pag; 98 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino); 99 int error; 100 101 if (xchk_need_intent_drain(sc)) 102 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); 103 104 /* We want to scan the opened inode, so lock it and exit. */ 105 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { 106 error = xchk_install_live_inode(sc, ip_in); 107 if (error) 108 return error; 109 110 return xchk_prepare_iscrub(sc); 111 } 112 113 /* 114 * On pre-metadir filesystems, reject internal metadata files. For 115 * metadir filesystems, limited scrubbing of any file in the metadata 116 * directory tree by handle is allowed, because that is the only way to 117 * validate the lack of parent pointers in the sb-root metadata inodes. 118 */ 119 if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino)) 120 return -ENOENT; 121 /* Reject obviously bad inode numbers. */ 122 if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) 123 return -ENOENT; 124 125 /* Try a safe untrusted iget. */ 126 error = xchk_iget_safe(sc, sc->sm->sm_ino, &ip); 127 if (!error) 128 return xchk_install_handle_iscrub(sc, ip); 129 if (error == -ENOENT) 130 return error; 131 if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL) 132 goto out_error; 133 134 /* 135 * EINVAL with IGET_UNTRUSTED probably means one of several things: 136 * userspace gave us an inode number that doesn't correspond to fs 137 * space; the inode btree lacks a record for this inode; or there is 138 * a record, and it says this inode is free. 139 * 140 * EFSCORRUPTED/EFSBADCRC could mean that the inode was mappable, but 141 * some other metadata corruption (e.g. inode forks) prevented 142 * instantiation of the incore inode. Or it could mean the inobt is 143 * corrupt. 144 * 145 * We want to look up this inode in the inobt directly to distinguish 146 * three different scenarios: (1) the inobt says the inode is free, 147 * in which case there's nothing to do; (2) the inobt is corrupt so we 148 * should flag the corruption and exit to userspace to let it fix the 149 * inobt; and (3) the inobt says the inode is allocated, but loading it 150 * failed due to corruption. 151 * 152 * Allocate a transaction and grab the AGI to prevent inobt activity in 153 * this AG. Retry the iget in case someone allocated a new inode after 154 * the first iget failed. 155 */ 156 error = xchk_trans_alloc(sc, 0); 157 if (error) 158 goto out_error; 159 160 error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip); 161 if (error == 0) { 162 /* Actually got the incore inode, so install it and proceed. */ 163 xchk_trans_cancel(sc); 164 return xchk_install_handle_iscrub(sc, ip); 165 } 166 if (error == -ENOENT) 167 goto out_gone; 168 if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL) 169 goto out_cancel; 170 171 /* Ensure that we have protected against inode allocation/freeing. */ 172 if (agi_bp == NULL) { 173 ASSERT(agi_bp != NULL); 174 error = -ECANCELED; 175 goto out_cancel; 176 } 177 178 /* 179 * Untrusted iget failed a second time. Let's try an inobt lookup. 180 * If the inobt doesn't think this is an allocated inode then we'll 181 * return ENOENT to signal that the check can be skipped. 182 * 183 * If the lookup signals corruption, we'll mark this inode corrupt and 184 * exit to userspace. There's little chance of fixing anything until 185 * the inobt is straightened out, but there's nothing we can do here. 186 * 187 * If the lookup encounters a runtime error, exit to userspace. 188 */ 189 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino)); 190 if (!pag) { 191 error = -EFSCORRUPTED; 192 goto out_cancel; 193 } 194 195 error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap, 196 XFS_IGET_UNTRUSTED); 197 xfs_perag_put(pag); 198 if (error == -EINVAL || error == -ENOENT) 199 goto out_gone; 200 if (error) 201 goto out_cancel; 202 203 /* 204 * The lookup succeeded. Chances are the ondisk inode is corrupt and 205 * preventing iget from reading it. Retain the scrub transaction and 206 * the AGI buffer to prevent anyone from allocating or freeing inodes. 207 * This ensures that we preserve the inconsistency between the inobt 208 * saying the inode is allocated and the icache being unable to load 209 * the inode until we can flag the corruption in xchk_inode. The 210 * scrub function has to note the corruption, since we're not really 211 * supposed to do that from the setup function. Save the mapping to 212 * make repairs to the ondisk inode buffer. 213 */ 214 if (xchk_could_repair(sc)) 215 xrep_setup_inode(sc, &imap); 216 return 0; 217 218 out_cancel: 219 xchk_trans_cancel(sc); 220 out_error: 221 trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), 222 error, __return_address); 223 return error; 224 out_gone: 225 /* The file is gone, so there's nothing to check. */ 226 xchk_trans_cancel(sc); 227 return -ENOENT; 228 } 229 230 /* Inode core */ 231 232 /* Validate di_extsize hint. */ 233 STATIC void 234 xchk_inode_extsize( 235 struct xfs_scrub *sc, 236 struct xfs_dinode *dip, 237 xfs_ino_t ino, 238 uint16_t mode, 239 uint16_t flags) 240 { 241 xfs_failaddr_t fa; 242 uint32_t value = be32_to_cpu(dip->di_extsize); 243 244 fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags); 245 if (fa) 246 xchk_ino_set_corrupt(sc, ino); 247 248 /* 249 * XFS allows a sysadmin to change the rt extent size when adding a rt 250 * section to a filesystem after formatting. If there are any 251 * directories with extszinherit and rtinherit set, the hint could 252 * become misaligned with the new rextsize. The verifier doesn't check 253 * this, because we allow rtinherit directories even without an rt 254 * device. Flag this as an administrative warning since we will clean 255 * this up eventually. 256 */ 257 if ((flags & XFS_DIFLAG_RTINHERIT) && 258 (flags & XFS_DIFLAG_EXTSZINHERIT) && 259 xfs_extlen_to_rtxmod(sc->mp, value) > 0) 260 xchk_ino_set_warning(sc, ino); 261 } 262 263 /* Validate di_cowextsize hint. */ 264 STATIC void 265 xchk_inode_cowextsize( 266 struct xfs_scrub *sc, 267 struct xfs_dinode *dip, 268 xfs_ino_t ino, 269 uint16_t mode, 270 uint16_t flags, 271 uint64_t flags2) 272 { 273 xfs_failaddr_t fa; 274 uint32_t value = be32_to_cpu(dip->di_cowextsize); 275 276 /* 277 * The used block counter for rtrmap is checked and repaired elsewhere. 278 */ 279 if (xfs_has_zoned(sc->mp) && 280 dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP)) 281 return; 282 283 fa = xfs_inode_validate_cowextsize(sc->mp, value, mode, flags, flags2); 284 if (fa) 285 xchk_ino_set_corrupt(sc, ino); 286 287 /* 288 * XFS allows a sysadmin to change the rt extent size when adding a rt 289 * section to a filesystem after formatting. If there are any 290 * directories with cowextsize and rtinherit set, the hint could become 291 * misaligned with the new rextsize. The verifier doesn't check this, 292 * because we allow rtinherit directories even without an rt device. 293 * Flag this as an administrative warning since we will clean this up 294 * eventually. 295 */ 296 if ((flags & XFS_DIFLAG_RTINHERIT) && 297 (flags2 & XFS_DIFLAG2_COWEXTSIZE) && 298 value % sc->mp->m_sb.sb_rextsize > 0) 299 xchk_ino_set_warning(sc, ino); 300 } 301 302 /* Make sure the di_flags make sense for the inode. */ 303 STATIC void 304 xchk_inode_flags( 305 struct xfs_scrub *sc, 306 struct xfs_dinode *dip, 307 xfs_ino_t ino, 308 uint16_t mode, 309 uint16_t flags) 310 { 311 struct xfs_mount *mp = sc->mp; 312 313 /* di_flags are all taken, last bit cannot be used */ 314 if (flags & ~XFS_DIFLAG_ANY) 315 goto bad; 316 317 /* rt flags require rt device */ 318 if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 319 goto bad; 320 321 /* new rt bitmap flag only valid for rbmino */ 322 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 323 goto bad; 324 325 /* directory-only flags */ 326 if ((flags & (XFS_DIFLAG_RTINHERIT | 327 XFS_DIFLAG_EXTSZINHERIT | 328 XFS_DIFLAG_PROJINHERIT | 329 XFS_DIFLAG_NOSYMLINKS)) && 330 !S_ISDIR(mode)) 331 goto bad; 332 333 /* file-only flags */ 334 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 335 !S_ISREG(mode)) 336 goto bad; 337 338 /* filestreams and rt make no sense */ 339 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 340 goto bad; 341 342 return; 343 bad: 344 xchk_ino_set_corrupt(sc, ino); 345 } 346 347 /* Make sure the di_flags2 make sense for the inode. */ 348 STATIC void 349 xchk_inode_flags2( 350 struct xfs_scrub *sc, 351 struct xfs_dinode *dip, 352 xfs_ino_t ino, 353 uint16_t mode, 354 uint16_t flags, 355 uint64_t flags2) 356 { 357 struct xfs_mount *mp = sc->mp; 358 359 /* Unknown di_flags2 could be from a future kernel */ 360 if (flags2 & ~XFS_DIFLAG2_ANY) 361 xchk_ino_set_warning(sc, ino); 362 363 /* reflink flag requires reflink feature */ 364 if ((flags2 & XFS_DIFLAG2_REFLINK) && 365 !xfs_has_reflink(mp)) 366 goto bad; 367 368 /* cowextsize flag is checked w.r.t. mode separately */ 369 370 /* file/dir-only flags */ 371 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 372 goto bad; 373 374 /* file-only flags */ 375 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 376 goto bad; 377 378 /* realtime and reflink don't always go together */ 379 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK) && 380 !xfs_has_rtreflink(mp)) 381 goto bad; 382 383 /* no bigtime iflag without the bigtime feature */ 384 if (xfs_dinode_has_bigtime(dip) && !xfs_has_bigtime(mp)) 385 goto bad; 386 387 /* no large extent counts without the filesystem feature */ 388 if ((flags2 & XFS_DIFLAG2_NREXT64) && !xfs_has_large_extent_counts(mp)) 389 goto bad; 390 391 return; 392 bad: 393 xchk_ino_set_corrupt(sc, ino); 394 } 395 396 static inline void 397 xchk_dinode_nsec( 398 struct xfs_scrub *sc, 399 xfs_ino_t ino, 400 struct xfs_dinode *dip, 401 const xfs_timestamp_t ts) 402 { 403 struct timespec64 tv; 404 405 tv = xfs_inode_from_disk_ts(dip, ts); 406 if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC) 407 xchk_ino_set_corrupt(sc, ino); 408 } 409 410 /* Scrub all the ondisk inode fields. */ 411 STATIC void 412 xchk_dinode( 413 struct xfs_scrub *sc, 414 struct xfs_dinode *dip, 415 xfs_ino_t ino) 416 { 417 struct xfs_mount *mp = sc->mp; 418 size_t fork_recs; 419 unsigned long long isize; 420 uint64_t flags2; 421 xfs_extnum_t nextents; 422 xfs_extnum_t naextents; 423 prid_t prid; 424 uint16_t flags; 425 uint16_t mode; 426 427 flags = be16_to_cpu(dip->di_flags); 428 if (dip->di_version >= 3) 429 flags2 = be64_to_cpu(dip->di_flags2); 430 else 431 flags2 = 0; 432 433 /* di_mode */ 434 mode = be16_to_cpu(dip->di_mode); 435 switch (mode & S_IFMT) { 436 case S_IFLNK: 437 case S_IFREG: 438 case S_IFDIR: 439 case S_IFCHR: 440 case S_IFBLK: 441 case S_IFIFO: 442 case S_IFSOCK: 443 /* mode is recognized */ 444 break; 445 default: 446 xchk_ino_set_corrupt(sc, ino); 447 break; 448 } 449 450 /* v1/v2 fields */ 451 switch (dip->di_version) { 452 case 1: 453 /* 454 * We autoconvert v1 inodes into v2 inodes on writeout, 455 * so just mark this inode for preening. 456 */ 457 xchk_ino_set_preen(sc, ino); 458 prid = 0; 459 break; 460 case 2: 461 case 3: 462 if (xfs_dinode_is_metadir(dip)) { 463 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) 464 xchk_ino_set_corrupt(sc, ino); 465 } else { 466 if (dip->di_metatype != 0) 467 xchk_ino_set_corrupt(sc, ino); 468 } 469 470 if (dip->di_mode == 0 && sc->ip) 471 xchk_ino_set_corrupt(sc, ino); 472 473 if (dip->di_projid_hi != 0 && 474 !xfs_has_projid32(mp)) 475 xchk_ino_set_corrupt(sc, ino); 476 477 prid = be16_to_cpu(dip->di_projid_lo); 478 break; 479 default: 480 xchk_ino_set_corrupt(sc, ino); 481 return; 482 } 483 484 if (xfs_has_projid32(mp)) 485 prid |= (prid_t)be16_to_cpu(dip->di_projid_hi) << 16; 486 487 /* 488 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 489 * userspace could have created that. 490 */ 491 if (dip->di_uid == cpu_to_be32(-1U) || 492 dip->di_gid == cpu_to_be32(-1U)) 493 xchk_ino_set_warning(sc, ino); 494 495 /* 496 * project id of -1 isn't supposed to be valid, but the kernel didn't 497 * always validate that. 498 */ 499 if (prid == -1U) 500 xchk_ino_set_warning(sc, ino); 501 502 /* di_format */ 503 switch (dip->di_format) { 504 case XFS_DINODE_FMT_DEV: 505 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 506 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 507 xchk_ino_set_corrupt(sc, ino); 508 break; 509 case XFS_DINODE_FMT_LOCAL: 510 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 511 xchk_ino_set_corrupt(sc, ino); 512 break; 513 case XFS_DINODE_FMT_EXTENTS: 514 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 515 xchk_ino_set_corrupt(sc, ino); 516 break; 517 case XFS_DINODE_FMT_BTREE: 518 if (!S_ISREG(mode) && !S_ISDIR(mode)) 519 xchk_ino_set_corrupt(sc, ino); 520 break; 521 case XFS_DINODE_FMT_META_BTREE: 522 if (!S_ISREG(mode)) 523 xchk_ino_set_corrupt(sc, ino); 524 break; 525 case XFS_DINODE_FMT_UUID: 526 default: 527 xchk_ino_set_corrupt(sc, ino); 528 break; 529 } 530 531 /* di_[amc]time.nsec */ 532 xchk_dinode_nsec(sc, ino, dip, dip->di_atime); 533 xchk_dinode_nsec(sc, ino, dip, dip->di_mtime); 534 xchk_dinode_nsec(sc, ino, dip, dip->di_ctime); 535 536 /* 537 * di_size. xfs_dinode_verify checks for things that screw up 538 * the VFS such as the upper bit being set and zero-length 539 * symlinks/directories, but we can do more here. 540 */ 541 isize = be64_to_cpu(dip->di_size); 542 if (isize & (1ULL << 63)) 543 xchk_ino_set_corrupt(sc, ino); 544 545 /* Devices, fifos, and sockets must have zero size */ 546 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 547 xchk_ino_set_corrupt(sc, ino); 548 549 /* Directories can't be larger than the data section size (32G) */ 550 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 551 xchk_ino_set_corrupt(sc, ino); 552 553 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 554 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 555 xchk_ino_set_corrupt(sc, ino); 556 557 /* 558 * Warn if the running kernel can't handle the kinds of offsets 559 * needed to deal with the file size. In other words, if the 560 * pagecache can't cache all the blocks in this file due to 561 * overly large offsets, flag the inode for admin review. 562 */ 563 if (isize > mp->m_super->s_maxbytes) 564 xchk_ino_set_warning(sc, ino); 565 566 /* di_nblocks */ 567 if (flags2 & XFS_DIFLAG2_REFLINK) { 568 ; /* nblocks can exceed dblocks */ 569 } else if (flags & XFS_DIFLAG_REALTIME) { 570 /* 571 * nblocks is the sum of data extents (in the rtdev), 572 * attr extents (in the datadev), and both forks' bmbt 573 * blocks (in the datadev). This clumsy check is the 574 * best we can do without cross-referencing with the 575 * inode forks. 576 */ 577 if (be64_to_cpu(dip->di_nblocks) >= 578 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 579 xchk_ino_set_corrupt(sc, ino); 580 } else { 581 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 582 xchk_ino_set_corrupt(sc, ino); 583 } 584 585 xchk_inode_flags(sc, dip, ino, mode, flags); 586 587 xchk_inode_extsize(sc, dip, ino, mode, flags); 588 589 nextents = xfs_dfork_data_extents(dip); 590 naextents = xfs_dfork_attr_extents(dip); 591 592 /* di_nextents */ 593 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 594 switch (dip->di_format) { 595 case XFS_DINODE_FMT_EXTENTS: 596 if (nextents > fork_recs) 597 xchk_ino_set_corrupt(sc, ino); 598 break; 599 case XFS_DINODE_FMT_BTREE: 600 if (nextents <= fork_recs) 601 xchk_ino_set_corrupt(sc, ino); 602 break; 603 default: 604 if (nextents != 0) 605 xchk_ino_set_corrupt(sc, ino); 606 break; 607 } 608 609 /* di_forkoff */ 610 if (XFS_DFORK_BOFF(dip) >= mp->m_sb.sb_inodesize) 611 xchk_ino_set_corrupt(sc, ino); 612 if (naextents != 0 && dip->di_forkoff == 0) 613 xchk_ino_set_corrupt(sc, ino); 614 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 615 xchk_ino_set_corrupt(sc, ino); 616 617 /* di_aformat */ 618 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 619 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 620 dip->di_aformat != XFS_DINODE_FMT_BTREE) 621 xchk_ino_set_corrupt(sc, ino); 622 623 /* di_anextents */ 624 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 625 switch (dip->di_aformat) { 626 case XFS_DINODE_FMT_EXTENTS: 627 if (naextents > fork_recs) 628 xchk_ino_set_corrupt(sc, ino); 629 break; 630 case XFS_DINODE_FMT_BTREE: 631 if (naextents <= fork_recs) 632 xchk_ino_set_corrupt(sc, ino); 633 break; 634 default: 635 if (naextents != 0) 636 xchk_ino_set_corrupt(sc, ino); 637 } 638 639 if (dip->di_version >= 3) { 640 xchk_dinode_nsec(sc, ino, dip, dip->di_crtime); 641 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); 642 xchk_inode_cowextsize(sc, dip, ino, mode, flags, 643 flags2); 644 } 645 } 646 647 /* 648 * Make sure the finobt doesn't think this inode is free. 649 * We don't have to check the inobt ourselves because we got the inode via 650 * IGET_UNTRUSTED, which checks the inobt for us. 651 */ 652 static void 653 xchk_inode_xref_finobt( 654 struct xfs_scrub *sc, 655 xfs_ino_t ino) 656 { 657 struct xfs_inobt_rec_incore rec; 658 xfs_agino_t agino; 659 int has_record; 660 int error; 661 662 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) 663 return; 664 665 agino = XFS_INO_TO_AGINO(sc->mp, ino); 666 667 /* 668 * Try to get the finobt record. If we can't get it, then we're 669 * in good shape. 670 */ 671 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 672 &has_record); 673 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 674 !has_record) 675 return; 676 677 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 678 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 679 !has_record) 680 return; 681 682 /* 683 * Otherwise, make sure this record either doesn't cover this inode, 684 * or that it does but it's marked present. 685 */ 686 if (rec.ir_startino > agino || 687 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 688 return; 689 690 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 691 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 692 } 693 694 /* Cross reference the inode fields with the forks. */ 695 STATIC void 696 xchk_inode_xref_bmap( 697 struct xfs_scrub *sc, 698 struct xfs_dinode *dip) 699 { 700 xfs_extnum_t nextents; 701 xfs_filblks_t count; 702 xfs_filblks_t acount; 703 int error; 704 705 if (xchk_skip_xref(sc->sm)) 706 return; 707 708 /* Walk all the extents to check nextents/naextents/nblocks. */ 709 error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count); 710 if (!xchk_should_check_xref(sc, &error, NULL)) 711 return; 712 if (nextents < xfs_dfork_data_extents(dip)) 713 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 714 715 error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents, &acount); 716 if (!xchk_should_check_xref(sc, &error, NULL)) 717 return; 718 if (nextents != xfs_dfork_attr_extents(dip)) 719 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 720 721 /* Check nblocks against the inode. */ 722 if (count + acount != be64_to_cpu(dip->di_nblocks)) 723 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 724 } 725 726 /* Cross-reference with the other btrees. */ 727 STATIC void 728 xchk_inode_xref( 729 struct xfs_scrub *sc, 730 xfs_ino_t ino, 731 struct xfs_dinode *dip) 732 { 733 xfs_agnumber_t agno; 734 xfs_agblock_t agbno; 735 int error; 736 737 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 738 return; 739 740 agno = XFS_INO_TO_AGNO(sc->mp, ino); 741 agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 742 743 error = xchk_ag_init_existing(sc, agno, &sc->sa); 744 if (!xchk_xref_process_error(sc, agno, agbno, &error)) 745 goto out_free; 746 747 xchk_xref_is_used_space(sc, agbno, 1); 748 xchk_inode_xref_finobt(sc, ino); 749 xchk_xref_is_only_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES); 750 xchk_xref_is_not_shared(sc, agbno, 1); 751 xchk_xref_is_not_cow_staging(sc, agbno, 1); 752 xchk_inode_xref_bmap(sc, dip); 753 754 out_free: 755 xchk_ag_free(sc, &sc->sa); 756 } 757 758 /* 759 * If the reflink iflag disagrees with a scan for shared data fork extents, 760 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 761 * any shared extents). We already checked for reflink iflag set on a non 762 * reflink filesystem. 763 */ 764 static void 765 xchk_inode_check_reflink_iflag( 766 struct xfs_scrub *sc, 767 xfs_ino_t ino) 768 { 769 struct xfs_mount *mp = sc->mp; 770 bool has_shared; 771 int error; 772 773 if (!xfs_has_reflink(mp)) 774 return; 775 776 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 777 &has_shared); 778 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 779 XFS_INO_TO_AGBNO(mp, ino), &error)) 780 return; 781 if (xfs_is_reflink_inode(sc->ip) && !has_shared) 782 xchk_ino_set_preen(sc, ino); 783 else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 784 xchk_ino_set_corrupt(sc, ino); 785 } 786 787 /* 788 * If this inode has zero link count, it must be on the unlinked list. If 789 * it has nonzero link count, it must not be on the unlinked list. 790 */ 791 STATIC void 792 xchk_inode_check_unlinked( 793 struct xfs_scrub *sc) 794 { 795 if (VFS_I(sc->ip)->i_nlink == 0) { 796 if (!xfs_inode_on_unlinked_list(sc->ip)) 797 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 798 } else { 799 if (xfs_inode_on_unlinked_list(sc->ip)) 800 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 801 } 802 } 803 804 /* Scrub an inode. */ 805 int 806 xchk_inode( 807 struct xfs_scrub *sc) 808 { 809 struct xfs_dinode di; 810 int error = 0; 811 812 /* 813 * If sc->ip is NULL, that means that the setup function called 814 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED 815 * and a NULL inode, so flag the corruption error and return. 816 */ 817 if (!sc->ip) { 818 xchk_ino_set_corrupt(sc, sc->sm->sm_ino); 819 return 0; 820 } 821 822 /* Scrub the inode core. */ 823 xfs_inode_to_disk(sc->ip, &di, 0); 824 xchk_dinode(sc, &di, sc->ip->i_ino); 825 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 826 goto out; 827 828 /* 829 * Look for discrepancies between file's data blocks and the reflink 830 * iflag. We already checked the iflag against the file mode when 831 * we scrubbed the dinode. 832 */ 833 if (S_ISREG(VFS_I(sc->ip)->i_mode)) 834 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); 835 836 xchk_inode_check_unlinked(sc); 837 838 xchk_inode_xref(sc, sc->ip->i_ino, &di); 839 out: 840 return error; 841 } 842