1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_bit.h" 15 #include "xfs_log_format.h" 16 #include "xfs_trans.h" 17 #include "xfs_sb.h" 18 #include "xfs_inode.h" 19 #include "xfs_icache.h" 20 #include "xfs_inode_buf.h" 21 #include "xfs_inode_fork.h" 22 #include "xfs_ialloc.h" 23 #include "xfs_da_format.h" 24 #include "xfs_reflink.h" 25 #include "xfs_alloc.h" 26 #include "xfs_rmap.h" 27 #include "xfs_rmap_btree.h" 28 #include "xfs_bmap.h" 29 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_util.h" 31 #include "xfs_dir2.h" 32 #include "xfs_dir2_priv.h" 33 #include "xfs_quota_defs.h" 34 #include "xfs_quota.h" 35 #include "xfs_ag.h" 36 #include "xfs_rtbitmap.h" 37 #include "xfs_attr_leaf.h" 38 #include "xfs_log_priv.h" 39 #include "xfs_health.h" 40 #include "xfs_symlink_remote.h" 41 #include "xfs_rtgroup.h" 42 #include "xfs_rtrmap_btree.h" 43 #include "xfs_rtrefcount_btree.h" 44 #include "scrub/xfs_scrub.h" 45 #include "scrub/scrub.h" 46 #include "scrub/common.h" 47 #include "scrub/btree.h" 48 #include "scrub/trace.h" 49 #include "scrub/repair.h" 50 #include "scrub/iscan.h" 51 #include "scrub/readdir.h" 52 #include "scrub/tempfile.h" 53 54 /* 55 * Inode Record Repair 56 * =================== 57 * 58 * Roughly speaking, inode problems can be classified based on whether or not 59 * they trip the dinode verifiers. If those trip, then we won't be able to 60 * xfs_iget ourselves the inode. 61 * 62 * Therefore, the xrep_dinode_* functions fix anything that will cause the 63 * inode buffer verifier or the dinode verifier. The xrep_inode_* functions 64 * fix things on live incore inodes. The inode repair functions make decisions 65 * with security and usability implications when reviving a file: 66 * 67 * - Files with zero di_mode or a garbage di_mode are converted to regular file 68 * that only root can read. This file may not actually contain user data, 69 * if the file was not previously a regular file. Setuid and setgid bits 70 * are cleared. 71 * 72 * - Zero-size directories can be truncated to look empty. It is necessary to 73 * run the bmapbtd and directory repair functions to fully rebuild the 74 * directory. 75 * 76 * - Zero-size symbolic link targets can be truncated to '?'. It is necessary 77 * to run the bmapbtd and symlink repair functions to salvage the symlink. 78 * 79 * - Invalid extent size hints will be removed. 80 * 81 * - Quotacheck will be scheduled if we repaired an inode that was so badly 82 * damaged that the ondisk inode had to be rebuilt. 83 * 84 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero. 85 * Setuid and setgid bits are cleared. 86 * 87 * - Data and attr forks are reset to extents format with zero extents if the 88 * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta 89 * repair functions to recover the space mapping. 90 * 91 * - ACLs will not be recovered if the attr fork is zapped or the extended 92 * attribute structure itself requires salvaging. 93 * 94 * - If the attr fork is zapped, the user and group ids are reset to root and 95 * the setuid and setgid bits are removed. 96 */ 97 98 /* 99 * All the information we need to repair the ondisk inode if we can't iget the 100 * incore inode. We don't allocate this buffer unless we're going to perform 101 * a repair to the ondisk inode cluster buffer. 102 */ 103 struct xrep_inode { 104 /* Inode mapping that we saved from the initial lookup attempt. */ 105 struct xfs_imap imap; 106 107 struct xfs_scrub *sc; 108 109 /* Blocks in use on the data device by data extents or bmbt blocks. */ 110 xfs_rfsblock_t data_blocks; 111 112 /* Blocks in use on the rt device. */ 113 xfs_rfsblock_t rt_blocks; 114 115 /* Blocks in use by the attr fork. */ 116 xfs_rfsblock_t attr_blocks; 117 118 /* Number of data device extents for the data fork. */ 119 xfs_extnum_t data_extents; 120 121 /* 122 * Number of realtime device extents for the data fork. If 123 * data_extents and rt_extents indicate that the data fork has extents 124 * on both devices, we'll just back away slowly. 125 */ 126 xfs_extnum_t rt_extents; 127 128 /* Number of (data device) extents for the attr fork. */ 129 xfs_aextnum_t attr_extents; 130 131 /* Sick state to set after zapping parts of the inode. */ 132 unsigned int ino_sick_mask; 133 134 /* Must we remove all access from this file? */ 135 bool zap_acls; 136 137 /* Inode scanner to see if we can find the ftype from dirents */ 138 struct xchk_iscan ftype_iscan; 139 uint8_t alleged_ftype; 140 }; 141 142 /* 143 * Setup function for inode repair. @imap contains the ondisk inode mapping 144 * information so that we can correct the ondisk inode cluster buffer if 145 * necessary to make iget work. 146 */ 147 int 148 xrep_setup_inode( 149 struct xfs_scrub *sc, 150 const struct xfs_imap *imap) 151 { 152 struct xrep_inode *ri; 153 154 sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS); 155 if (!sc->buf) 156 return -ENOMEM; 157 158 ri = sc->buf; 159 memcpy(&ri->imap, imap, sizeof(struct xfs_imap)); 160 ri->sc = sc; 161 return 0; 162 } 163 164 /* 165 * Make sure this ondisk inode can pass the inode buffer verifier. This is 166 * not the same as the dinode verifier. 167 */ 168 STATIC void 169 xrep_dinode_buf_core( 170 struct xfs_scrub *sc, 171 struct xfs_buf *bp, 172 unsigned int ioffset) 173 { 174 struct xfs_dinode *dip = xfs_buf_offset(bp, ioffset); 175 struct xfs_trans *tp = sc->tp; 176 struct xfs_mount *mp = sc->mp; 177 xfs_agino_t agino; 178 bool crc_ok = false; 179 bool magic_ok = false; 180 bool unlinked_ok = false; 181 182 agino = be32_to_cpu(dip->di_next_unlinked); 183 184 if (xfs_verify_agino_or_null(bp->b_pag, agino)) 185 unlinked_ok = true; 186 187 if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 188 xfs_dinode_good_version(mp, dip->di_version)) 189 magic_ok = true; 190 191 if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 192 XFS_DINODE_CRC_OFF)) 193 crc_ok = true; 194 195 if (magic_ok && unlinked_ok && crc_ok) 196 return; 197 198 if (!magic_ok) { 199 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 200 dip->di_version = 3; 201 } 202 if (!unlinked_ok) 203 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 204 xfs_dinode_calc_crc(mp, dip); 205 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF); 206 xfs_trans_log_buf(tp, bp, ioffset, 207 ioffset + sizeof(struct xfs_dinode) - 1); 208 } 209 210 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */ 211 STATIC void 212 xrep_dinode_buf( 213 struct xfs_scrub *sc, 214 struct xfs_buf *bp) 215 { 216 struct xfs_mount *mp = sc->mp; 217 int i; 218 int ni; 219 220 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 221 for (i = 0; i < ni; i++) 222 xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog); 223 } 224 225 /* Reinitialize things that never change in an inode. */ 226 STATIC void 227 xrep_dinode_header( 228 struct xfs_scrub *sc, 229 struct xfs_dinode *dip) 230 { 231 trace_xrep_dinode_header(sc, dip); 232 233 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 234 if (!xfs_dinode_good_version(sc->mp, dip->di_version)) 235 dip->di_version = 3; 236 dip->di_ino = cpu_to_be64(sc->sm->sm_ino); 237 uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid); 238 dip->di_gen = cpu_to_be32(sc->sm->sm_gen); 239 } 240 241 /* 242 * If this directory entry points to the scrub target inode, then the directory 243 * we're scanning is the parent of the scrub target inode. 244 */ 245 STATIC int 246 xrep_dinode_findmode_dirent( 247 struct xfs_scrub *sc, 248 struct xfs_inode *dp, 249 xfs_dir2_dataptr_t dapos, 250 const struct xfs_name *name, 251 xfs_ino_t ino, 252 void *priv) 253 { 254 struct xrep_inode *ri = priv; 255 int error = 0; 256 257 if (xchk_should_terminate(ri->sc, &error)) 258 return error; 259 260 if (ino != sc->sm->sm_ino) 261 return 0; 262 263 /* Ignore garbage directory entry names. */ 264 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) 265 return -EFSCORRUPTED; 266 267 /* Don't pick up dot or dotdot entries; we only want child dirents. */ 268 if (xfs_dir2_samename(name, &xfs_name_dotdot) || 269 xfs_dir2_samename(name, &xfs_name_dot)) 270 return 0; 271 272 /* 273 * Uhoh, more than one parent for this inode and they don't agree on 274 * the file type? 275 */ 276 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN && 277 ri->alleged_ftype != name->type) { 278 trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type, 279 ri->alleged_ftype); 280 return -EFSCORRUPTED; 281 } 282 283 /* We found a potential parent; remember the ftype. */ 284 trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type); 285 ri->alleged_ftype = name->type; 286 return 0; 287 } 288 289 /* Try to lock a directory, or wait a jiffy. */ 290 static inline int 291 xrep_dinode_ilock_nowait( 292 struct xfs_inode *dp, 293 unsigned int lock_mode) 294 { 295 if (xfs_ilock_nowait(dp, lock_mode)) 296 return true; 297 298 schedule_timeout_killable(1); 299 return false; 300 } 301 302 /* 303 * Try to lock a directory to look for ftype hints. Since we already hold the 304 * AGI buffer, we cannot block waiting for the ILOCK because rename can take 305 * the ILOCK and then try to lock AGIs. 306 */ 307 STATIC int 308 xrep_dinode_trylock_directory( 309 struct xrep_inode *ri, 310 struct xfs_inode *dp, 311 unsigned int *lock_modep) 312 { 313 unsigned long deadline = jiffies + msecs_to_jiffies(30000); 314 unsigned int lock_mode; 315 int error = 0; 316 317 do { 318 if (xchk_should_terminate(ri->sc, &error)) 319 return error; 320 321 if (xfs_need_iread_extents(&dp->i_df)) 322 lock_mode = XFS_ILOCK_EXCL; 323 else 324 lock_mode = XFS_ILOCK_SHARED; 325 326 if (xrep_dinode_ilock_nowait(dp, lock_mode)) { 327 *lock_modep = lock_mode; 328 return 0; 329 } 330 } while (!time_is_before_jiffies(deadline)); 331 return -EBUSY; 332 } 333 334 /* 335 * If this is a directory, walk the dirents looking for any that point to the 336 * scrub target inode. 337 */ 338 STATIC int 339 xrep_dinode_findmode_walk_directory( 340 struct xrep_inode *ri, 341 struct xfs_inode *dp) 342 { 343 struct xfs_scrub *sc = ri->sc; 344 unsigned int lock_mode; 345 int error = 0; 346 347 /* Ignore temporary repair directories. */ 348 if (xrep_is_tempfile(dp)) 349 return 0; 350 351 /* 352 * Scan the directory to see if there it contains an entry pointing to 353 * the directory that we are repairing. 354 */ 355 error = xrep_dinode_trylock_directory(ri, dp, &lock_mode); 356 if (error) 357 return error; 358 359 /* 360 * If this directory is known to be sick, we cannot scan it reliably 361 * and must abort. 362 */ 363 if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE | 364 XFS_SICK_INO_BMBTD | 365 XFS_SICK_INO_DIR)) { 366 error = -EFSCORRUPTED; 367 goto out_unlock; 368 } 369 370 /* 371 * We cannot complete our parent pointer scan if a directory looks as 372 * though it has been zapped by the inode record repair code. 373 */ 374 if (xchk_dir_looks_zapped(dp)) { 375 error = -EBUSY; 376 goto out_unlock; 377 } 378 379 error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri); 380 if (error) 381 goto out_unlock; 382 383 out_unlock: 384 xfs_iunlock(dp, lock_mode); 385 return error; 386 } 387 388 /* 389 * Try to find the mode of the inode being repaired by looking for directories 390 * that point down to this file. 391 */ 392 STATIC int 393 xrep_dinode_find_mode( 394 struct xrep_inode *ri, 395 uint16_t *mode) 396 { 397 struct xfs_scrub *sc = ri->sc; 398 struct xfs_inode *dp; 399 int error; 400 401 /* No ftype means we have no other metadata to consult. */ 402 if (!xfs_has_ftype(sc->mp)) { 403 *mode = S_IFREG; 404 return 0; 405 } 406 407 /* 408 * Scan all directories for parents that might point down to this 409 * inode. Skip the inode being repaired during the scan since it 410 * cannot be its own parent. Note that we still hold the AGI locked 411 * so there's a real possibility that _iscan_iter can return EBUSY. 412 */ 413 xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan); 414 xchk_iscan_set_agi_trylock(&ri->ftype_iscan); 415 ri->ftype_iscan.skip_ino = sc->sm->sm_ino; 416 ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN; 417 while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) { 418 if (S_ISDIR(VFS_I(dp)->i_mode)) 419 error = xrep_dinode_findmode_walk_directory(ri, dp); 420 xchk_iscan_mark_visited(&ri->ftype_iscan, dp); 421 xchk_irele(sc, dp); 422 if (error < 0) 423 break; 424 if (xchk_should_terminate(sc, &error)) 425 break; 426 } 427 xchk_iscan_iter_finish(&ri->ftype_iscan); 428 xchk_iscan_teardown(&ri->ftype_iscan); 429 430 if (error == -EBUSY) { 431 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) { 432 /* 433 * If we got an EBUSY after finding at least one 434 * dirent, that means the scan found an inode on the 435 * inactivation list and could not open it. Accept the 436 * alleged ftype and install a new mode below. 437 */ 438 error = 0; 439 } else if (!(sc->flags & XCHK_TRY_HARDER)) { 440 /* 441 * Otherwise, retry the operation one time to see if 442 * the reason for the delay is an inode from the same 443 * cluster buffer waiting on the inactivation list. 444 */ 445 error = -EDEADLOCK; 446 } 447 } 448 if (error) 449 return error; 450 451 /* 452 * Convert the discovered ftype into the file mode. If all else fails, 453 * return S_IFREG. 454 */ 455 switch (ri->alleged_ftype) { 456 case XFS_DIR3_FT_DIR: 457 *mode = S_IFDIR; 458 break; 459 case XFS_DIR3_FT_WHT: 460 case XFS_DIR3_FT_CHRDEV: 461 *mode = S_IFCHR; 462 break; 463 case XFS_DIR3_FT_BLKDEV: 464 *mode = S_IFBLK; 465 break; 466 case XFS_DIR3_FT_FIFO: 467 *mode = S_IFIFO; 468 break; 469 case XFS_DIR3_FT_SOCK: 470 *mode = S_IFSOCK; 471 break; 472 case XFS_DIR3_FT_SYMLINK: 473 *mode = S_IFLNK; 474 break; 475 default: 476 *mode = S_IFREG; 477 break; 478 } 479 return 0; 480 } 481 482 /* Turn di_mode into /something/ recognizable. Returns true if we succeed. */ 483 STATIC int 484 xrep_dinode_mode( 485 struct xrep_inode *ri, 486 struct xfs_dinode *dip) 487 { 488 struct xfs_scrub *sc = ri->sc; 489 uint16_t mode = be16_to_cpu(dip->di_mode); 490 int error; 491 492 trace_xrep_dinode_mode(sc, dip); 493 494 if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN) 495 return 0; 496 497 /* Try to fix the mode. If we cannot, then leave everything alone. */ 498 error = xrep_dinode_find_mode(ri, &mode); 499 switch (error) { 500 case -EINTR: 501 case -EBUSY: 502 case -EDEADLOCK: 503 /* temporary failure or fatal signal */ 504 return error; 505 case 0: 506 /* found mode */ 507 break; 508 default: 509 /* some other error, assume S_IFREG */ 510 mode = S_IFREG; 511 break; 512 } 513 514 /* bad mode, so we set it to a file that only root can read */ 515 dip->di_mode = cpu_to_be16(mode); 516 dip->di_uid = 0; 517 dip->di_gid = 0; 518 ri->zap_acls = true; 519 return 0; 520 } 521 522 /* Fix unused link count fields having nonzero values. */ 523 STATIC void 524 xrep_dinode_nlinks( 525 struct xfs_dinode *dip) 526 { 527 if (dip->di_version < 2) { 528 dip->di_nlink = 0; 529 return; 530 } 531 532 if (xfs_dinode_is_metadir(dip)) { 533 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) 534 dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN); 535 } else { 536 dip->di_metatype = 0; 537 } 538 } 539 540 /* Fix any conflicting flags that the verifiers complain about. */ 541 STATIC void 542 xrep_dinode_flags( 543 struct xfs_scrub *sc, 544 struct xfs_dinode *dip, 545 bool isrt) 546 { 547 struct xfs_mount *mp = sc->mp; 548 uint64_t flags2 = be64_to_cpu(dip->di_flags2); 549 uint16_t flags = be16_to_cpu(dip->di_flags); 550 uint16_t mode = be16_to_cpu(dip->di_mode); 551 552 trace_xrep_dinode_flags(sc, dip); 553 554 if (isrt) 555 flags |= XFS_DIFLAG_REALTIME; 556 else 557 flags &= ~XFS_DIFLAG_REALTIME; 558 559 /* 560 * For regular files on a reflink filesystem, set the REFLINK flag to 561 * protect shared extents. A later stage will actually check those 562 * extents and clear the flag if possible. 563 */ 564 if (xfs_has_reflink(mp) && S_ISREG(mode)) 565 flags2 |= XFS_DIFLAG2_REFLINK; 566 else 567 flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE); 568 if (!xfs_has_bigtime(mp)) 569 flags2 &= ~XFS_DIFLAG2_BIGTIME; 570 if (!xfs_has_large_extent_counts(mp)) 571 flags2 &= ~XFS_DIFLAG2_NREXT64; 572 if (flags2 & XFS_DIFLAG2_NREXT64) 573 dip->di_nrext64_pad = 0; 574 else if (dip->di_version >= 3) 575 dip->di_v3_pad = 0; 576 577 if (flags2 & XFS_DIFLAG2_METADATA) { 578 xfs_failaddr_t fa; 579 580 fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags, 581 flags2); 582 if (fa) 583 flags2 &= ~XFS_DIFLAG2_METADATA; 584 } 585 586 dip->di_flags = cpu_to_be16(flags); 587 dip->di_flags2 = cpu_to_be64(flags2); 588 } 589 590 /* 591 * Blow out symlink; now it points nowhere. We don't have to worry about 592 * incore state because this inode is failing the verifiers. 593 */ 594 STATIC void 595 xrep_dinode_zap_symlink( 596 struct xrep_inode *ri, 597 struct xfs_dinode *dip) 598 { 599 struct xfs_scrub *sc = ri->sc; 600 char *p; 601 602 trace_xrep_dinode_zap_symlink(sc, dip); 603 604 dip->di_format = XFS_DINODE_FMT_LOCAL; 605 dip->di_size = cpu_to_be64(1); 606 p = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 607 *p = '?'; 608 ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED; 609 } 610 611 /* 612 * Blow out dir, make the parent point to the root. In the future repair will 613 * reconstruct this directory for us. Note that there's no in-core directory 614 * inode because the sf verifier tripped, so we don't have to worry about the 615 * dentry cache. 616 */ 617 STATIC void 618 xrep_dinode_zap_dir( 619 struct xrep_inode *ri, 620 struct xfs_dinode *dip) 621 { 622 struct xfs_scrub *sc = ri->sc; 623 struct xfs_mount *mp = sc->mp; 624 struct xfs_dir2_sf_hdr *sfp; 625 int i8count; 626 627 trace_xrep_dinode_zap_dir(sc, dip); 628 629 dip->di_format = XFS_DINODE_FMT_LOCAL; 630 i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM; 631 sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 632 sfp->count = 0; 633 sfp->i8count = i8count; 634 xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino); 635 dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count)); 636 ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED; 637 } 638 639 /* Make sure we don't have a garbage file size. */ 640 STATIC void 641 xrep_dinode_size( 642 struct xrep_inode *ri, 643 struct xfs_dinode *dip) 644 { 645 struct xfs_scrub *sc = ri->sc; 646 uint64_t size = be64_to_cpu(dip->di_size); 647 uint16_t mode = be16_to_cpu(dip->di_mode); 648 649 trace_xrep_dinode_size(sc, dip); 650 651 switch (mode & S_IFMT) { 652 case S_IFIFO: 653 case S_IFCHR: 654 case S_IFBLK: 655 case S_IFSOCK: 656 /* di_size can't be nonzero for special files */ 657 dip->di_size = 0; 658 break; 659 case S_IFREG: 660 /* Regular files can't be larger than 2^63-1 bytes. */ 661 dip->di_size = cpu_to_be64(size & ~(1ULL << 63)); 662 break; 663 case S_IFLNK: 664 /* 665 * Truncate ridiculously oversized symlinks. If the size is 666 * zero, reset it to point to the current directory. Both of 667 * these conditions trigger dinode verifier errors, so there 668 * is no in-core state to reset. 669 */ 670 if (size > XFS_SYMLINK_MAXLEN) 671 dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN); 672 else if (size == 0) 673 xrep_dinode_zap_symlink(ri, dip); 674 break; 675 case S_IFDIR: 676 /* 677 * Directories can't have a size larger than 32G. If the size 678 * is zero, reset it to an empty directory. Both of these 679 * conditions trigger dinode verifier errors, so there is no 680 * in-core state to reset. 681 */ 682 if (size > XFS_DIR2_SPACE_SIZE) 683 dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE); 684 else if (size == 0) 685 xrep_dinode_zap_dir(ri, dip); 686 break; 687 } 688 } 689 690 /* Fix extent size hints. */ 691 STATIC void 692 xrep_dinode_extsize_hints( 693 struct xfs_scrub *sc, 694 struct xfs_dinode *dip) 695 { 696 struct xfs_mount *mp = sc->mp; 697 uint64_t flags2 = be64_to_cpu(dip->di_flags2); 698 uint16_t flags = be16_to_cpu(dip->di_flags); 699 uint16_t mode = be16_to_cpu(dip->di_mode); 700 701 xfs_failaddr_t fa; 702 703 trace_xrep_dinode_extsize_hints(sc, dip); 704 705 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 706 mode, flags); 707 if (fa) { 708 dip->di_extsize = 0; 709 dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE | 710 XFS_DIFLAG_EXTSZINHERIT); 711 } 712 713 if (dip->di_version < 3 || 714 (xfs_has_zoned(sc->mp) && 715 dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP))) 716 return; 717 718 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 719 mode, flags, flags2); 720 if (fa) { 721 dip->di_cowextsize = 0; 722 dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE); 723 } 724 } 725 726 /* Count extents and blocks for an inode given an rmap. */ 727 STATIC int 728 xrep_dinode_walk_rmap( 729 struct xfs_btree_cur *cur, 730 const struct xfs_rmap_irec *rec, 731 void *priv) 732 { 733 struct xrep_inode *ri = priv; 734 int error = 0; 735 736 if (xchk_should_terminate(ri->sc, &error)) 737 return error; 738 739 /* We only care about this inode. */ 740 if (rec->rm_owner != ri->sc->sm->sm_ino) 741 return 0; 742 743 if (rec->rm_flags & XFS_RMAP_ATTR_FORK) { 744 ri->attr_blocks += rec->rm_blockcount; 745 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) 746 ri->attr_extents++; 747 748 return 0; 749 } 750 751 ri->data_blocks += rec->rm_blockcount; 752 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) 753 ri->data_extents++; 754 755 return 0; 756 } 757 758 /* Count extents and blocks for an inode from all AG rmap data. */ 759 STATIC int 760 xrep_dinode_count_ag_rmaps( 761 struct xrep_inode *ri, 762 struct xfs_perag *pag) 763 { 764 struct xfs_btree_cur *cur; 765 struct xfs_buf *agf; 766 int error; 767 768 error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf); 769 if (error) 770 return error; 771 772 cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag); 773 error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri); 774 xfs_btree_del_cursor(cur, error); 775 xfs_trans_brelse(ri->sc->tp, agf); 776 return error; 777 } 778 779 /* Count extents and blocks for an inode given an rt rmap. */ 780 STATIC int 781 xrep_dinode_walk_rtrmap( 782 struct xfs_btree_cur *cur, 783 const struct xfs_rmap_irec *rec, 784 void *priv) 785 { 786 struct xrep_inode *ri = priv; 787 int error = 0; 788 789 if (xchk_should_terminate(ri->sc, &error)) 790 return error; 791 792 /* We only care about this inode. */ 793 if (rec->rm_owner != ri->sc->sm->sm_ino) 794 return 0; 795 796 if (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) 797 return -EFSCORRUPTED; 798 799 ri->rt_blocks += rec->rm_blockcount; 800 ri->rt_extents++; 801 return 0; 802 } 803 804 /* Count extents and blocks for an inode from all realtime rmap data. */ 805 STATIC int 806 xrep_dinode_count_rtgroup_rmaps( 807 struct xrep_inode *ri, 808 struct xfs_rtgroup *rtg) 809 { 810 struct xfs_scrub *sc = ri->sc; 811 int error; 812 813 error = xrep_rtgroup_init(sc, rtg, &sc->sr, XFS_RTGLOCK_RMAP); 814 if (error) 815 return error; 816 817 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap, 818 ri); 819 xchk_rtgroup_btcur_free(&sc->sr); 820 xchk_rtgroup_free(sc, &sc->sr); 821 return error; 822 } 823 824 /* Count extents and blocks for a given inode from all rmap data. */ 825 STATIC int 826 xrep_dinode_count_rmaps( 827 struct xrep_inode *ri) 828 { 829 struct xfs_perag *pag = NULL; 830 struct xfs_rtgroup *rtg = NULL; 831 int error; 832 833 if (!xfs_has_rmapbt(ri->sc->mp)) 834 return -EOPNOTSUPP; 835 836 while ((rtg = xfs_rtgroup_next(ri->sc->mp, rtg))) { 837 error = xrep_dinode_count_rtgroup_rmaps(ri, rtg); 838 if (error) { 839 xfs_rtgroup_rele(rtg); 840 return error; 841 } 842 } 843 844 while ((pag = xfs_perag_next(ri->sc->mp, pag))) { 845 error = xrep_dinode_count_ag_rmaps(ri, pag); 846 if (error) { 847 xfs_perag_rele(pag); 848 return error; 849 } 850 } 851 852 /* Can't have extents on both the rt and the data device. */ 853 if (ri->data_extents && ri->rt_extents) 854 return -EFSCORRUPTED; 855 856 trace_xrep_dinode_count_rmaps(ri->sc, 857 ri->data_blocks, ri->rt_blocks, ri->attr_blocks, 858 ri->data_extents, ri->rt_extents, ri->attr_extents); 859 return 0; 860 } 861 862 /* Return true if this extents-format ifork looks like garbage. */ 863 STATIC bool 864 xrep_dinode_bad_extents_fork( 865 struct xfs_scrub *sc, 866 struct xfs_dinode *dip, 867 unsigned int dfork_size, 868 int whichfork) 869 { 870 struct xfs_bmbt_irec new; 871 struct xfs_bmbt_rec *dp; 872 xfs_extnum_t nex; 873 bool isrt; 874 unsigned int i; 875 876 nex = xfs_dfork_nextents(dip, whichfork); 877 if (nex > dfork_size / sizeof(struct xfs_bmbt_rec)) 878 return true; 879 880 dp = XFS_DFORK_PTR(dip, whichfork); 881 882 isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME); 883 for (i = 0; i < nex; i++, dp++) { 884 xfs_failaddr_t fa; 885 886 xfs_bmbt_disk_get_all(dp, &new); 887 fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork, 888 &new); 889 if (fa) 890 return true; 891 } 892 893 return false; 894 } 895 896 /* Return true if this btree-format ifork looks like garbage. */ 897 STATIC bool 898 xrep_dinode_bad_bmbt_fork( 899 struct xfs_scrub *sc, 900 struct xfs_dinode *dip, 901 unsigned int dfork_size, 902 int whichfork) 903 { 904 struct xfs_bmdr_block *dfp; 905 xfs_extnum_t nex; 906 unsigned int i; 907 unsigned int dmxr; 908 unsigned int nrecs; 909 unsigned int level; 910 911 nex = xfs_dfork_nextents(dip, whichfork); 912 if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec)) 913 return true; 914 915 if (dfork_size < sizeof(struct xfs_bmdr_block)) 916 return true; 917 918 dfp = XFS_DFORK_PTR(dip, whichfork); 919 nrecs = be16_to_cpu(dfp->bb_numrecs); 920 level = be16_to_cpu(dfp->bb_level); 921 922 if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size) 923 return true; 924 if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork)) 925 return true; 926 927 dmxr = xfs_bmdr_maxrecs(dfork_size, 0); 928 for (i = 1; i <= nrecs; i++) { 929 struct xfs_bmbt_key *fkp; 930 xfs_bmbt_ptr_t *fpp; 931 xfs_fileoff_t fileoff; 932 xfs_fsblock_t fsbno; 933 934 fkp = xfs_bmdr_key_addr(dfp, i); 935 fileoff = be64_to_cpu(fkp->br_startoff); 936 if (!xfs_verify_fileoff(sc->mp, fileoff)) 937 return true; 938 939 fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr); 940 fsbno = be64_to_cpu(*fpp); 941 if (!xfs_verify_fsbno(sc->mp, fsbno)) 942 return true; 943 } 944 945 return false; 946 } 947 948 /* Return true if this rmap-format ifork looks like garbage. */ 949 STATIC bool 950 xrep_dinode_bad_rtrmapbt_fork( 951 struct xfs_scrub *sc, 952 struct xfs_dinode *dip, 953 unsigned int dfork_size) 954 { 955 struct xfs_rtrmap_root *dfp; 956 unsigned int nrecs; 957 unsigned int level; 958 959 if (dfork_size < sizeof(struct xfs_rtrmap_root)) 960 return true; 961 962 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 963 nrecs = be16_to_cpu(dfp->bb_numrecs); 964 level = be16_to_cpu(dfp->bb_level); 965 966 if (level > sc->mp->m_rtrmap_maxlevels) 967 return true; 968 if (xfs_rtrmap_droot_space_calc(level, nrecs) > dfork_size) 969 return true; 970 if (level > 0 && nrecs == 0) 971 return true; 972 973 return false; 974 } 975 976 /* Return true if this refcount-format ifork looks like garbage. */ 977 STATIC bool 978 xrep_dinode_bad_rtrefcountbt_fork( 979 struct xfs_scrub *sc, 980 struct xfs_dinode *dip, 981 unsigned int dfork_size) 982 { 983 struct xfs_rtrefcount_root *dfp; 984 unsigned int nrecs; 985 unsigned int level; 986 987 if (dfork_size < sizeof(struct xfs_rtrefcount_root)) 988 return true; 989 990 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 991 nrecs = be16_to_cpu(dfp->bb_numrecs); 992 level = be16_to_cpu(dfp->bb_level); 993 994 if (level > sc->mp->m_rtrefc_maxlevels) 995 return true; 996 if (xfs_rtrefcount_droot_space_calc(level, nrecs) > dfork_size) 997 return true; 998 if (level > 0 && nrecs == 0) 999 return true; 1000 1001 return false; 1002 } 1003 1004 /* Check a metadata-btree fork. */ 1005 STATIC bool 1006 xrep_dinode_bad_metabt_fork( 1007 struct xfs_scrub *sc, 1008 struct xfs_dinode *dip, 1009 unsigned int dfork_size, 1010 int whichfork) 1011 { 1012 if (whichfork != XFS_DATA_FORK) 1013 return true; 1014 1015 switch (be16_to_cpu(dip->di_metatype)) { 1016 case XFS_METAFILE_RTRMAP: 1017 return xrep_dinode_bad_rtrmapbt_fork(sc, dip, dfork_size); 1018 case XFS_METAFILE_RTREFCOUNT: 1019 return xrep_dinode_bad_rtrefcountbt_fork(sc, dip, dfork_size); 1020 default: 1021 return true; 1022 } 1023 1024 return false; 1025 } 1026 1027 /* 1028 * Check the data fork for things that will fail the ifork verifiers or the 1029 * ifork formatters. 1030 */ 1031 STATIC bool 1032 xrep_dinode_check_dfork( 1033 struct xfs_scrub *sc, 1034 struct xfs_dinode *dip, 1035 uint16_t mode) 1036 { 1037 void *dfork_ptr; 1038 int64_t data_size; 1039 unsigned int fmt; 1040 unsigned int dfork_size; 1041 1042 /* 1043 * Verifier functions take signed int64_t, so check for bogus negative 1044 * values first. 1045 */ 1046 data_size = be64_to_cpu(dip->di_size); 1047 if (data_size < 0) 1048 return true; 1049 1050 fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK); 1051 switch (mode & S_IFMT) { 1052 case S_IFIFO: 1053 case S_IFCHR: 1054 case S_IFBLK: 1055 case S_IFSOCK: 1056 if (fmt != XFS_DINODE_FMT_DEV) 1057 return true; 1058 break; 1059 case S_IFREG: 1060 switch (fmt) { 1061 case XFS_DINODE_FMT_LOCAL: 1062 return true; 1063 case XFS_DINODE_FMT_EXTENTS: 1064 case XFS_DINODE_FMT_BTREE: 1065 case XFS_DINODE_FMT_META_BTREE: 1066 break; 1067 default: 1068 return true; 1069 } 1070 break; 1071 case S_IFLNK: 1072 case S_IFDIR: 1073 switch (fmt) { 1074 case XFS_DINODE_FMT_LOCAL: 1075 case XFS_DINODE_FMT_EXTENTS: 1076 case XFS_DINODE_FMT_BTREE: 1077 break; 1078 default: 1079 return true; 1080 } 1081 break; 1082 default: 1083 return true; 1084 } 1085 1086 dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK); 1087 dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1088 1089 switch (fmt) { 1090 case XFS_DINODE_FMT_DEV: 1091 break; 1092 case XFS_DINODE_FMT_LOCAL: 1093 /* dir/symlink structure cannot be larger than the fork */ 1094 if (data_size > dfork_size) 1095 return true; 1096 /* directory structure must pass verification. */ 1097 if (S_ISDIR(mode) && 1098 xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL) 1099 return true; 1100 /* symlink structure must pass verification. */ 1101 if (S_ISLNK(mode) && 1102 xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL) 1103 return true; 1104 break; 1105 case XFS_DINODE_FMT_EXTENTS: 1106 if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size, 1107 XFS_DATA_FORK)) 1108 return true; 1109 break; 1110 case XFS_DINODE_FMT_BTREE: 1111 if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size, 1112 XFS_DATA_FORK)) 1113 return true; 1114 break; 1115 case XFS_DINODE_FMT_META_BTREE: 1116 if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size, 1117 XFS_DATA_FORK)) 1118 return true; 1119 break; 1120 default: 1121 return true; 1122 } 1123 1124 return false; 1125 } 1126 1127 static void 1128 xrep_dinode_set_data_nextents( 1129 struct xfs_dinode *dip, 1130 xfs_extnum_t nextents) 1131 { 1132 if (xfs_dinode_has_large_extent_counts(dip)) 1133 dip->di_big_nextents = cpu_to_be64(nextents); 1134 else 1135 dip->di_nextents = cpu_to_be32(nextents); 1136 } 1137 1138 static void 1139 xrep_dinode_set_attr_nextents( 1140 struct xfs_dinode *dip, 1141 xfs_extnum_t nextents) 1142 { 1143 if (xfs_dinode_has_large_extent_counts(dip)) 1144 dip->di_big_anextents = cpu_to_be32(nextents); 1145 else 1146 dip->di_anextents = cpu_to_be16(nextents); 1147 } 1148 1149 /* Reset the data fork to something sane. */ 1150 STATIC void 1151 xrep_dinode_zap_dfork( 1152 struct xrep_inode *ri, 1153 struct xfs_dinode *dip, 1154 uint16_t mode) 1155 { 1156 struct xfs_scrub *sc = ri->sc; 1157 1158 trace_xrep_dinode_zap_dfork(sc, dip); 1159 1160 ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED; 1161 1162 xrep_dinode_set_data_nextents(dip, 0); 1163 ri->data_blocks = 0; 1164 ri->rt_blocks = 0; 1165 1166 /* Special files always get reset to DEV */ 1167 switch (mode & S_IFMT) { 1168 case S_IFIFO: 1169 case S_IFCHR: 1170 case S_IFBLK: 1171 case S_IFSOCK: 1172 dip->di_format = XFS_DINODE_FMT_DEV; 1173 dip->di_size = 0; 1174 return; 1175 } 1176 1177 /* 1178 * If we have data extents, reset to an empty map and hope the user 1179 * will run the bmapbtd checker next. 1180 */ 1181 if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) { 1182 dip->di_format = XFS_DINODE_FMT_EXTENTS; 1183 return; 1184 } 1185 1186 /* Otherwise, reset the local format to the minimum. */ 1187 switch (mode & S_IFMT) { 1188 case S_IFLNK: 1189 xrep_dinode_zap_symlink(ri, dip); 1190 break; 1191 case S_IFDIR: 1192 xrep_dinode_zap_dir(ri, dip); 1193 break; 1194 } 1195 } 1196 1197 /* 1198 * Check the attr fork for things that will fail the ifork verifiers or the 1199 * ifork formatters. 1200 */ 1201 STATIC bool 1202 xrep_dinode_check_afork( 1203 struct xfs_scrub *sc, 1204 struct xfs_dinode *dip) 1205 { 1206 struct xfs_attr_sf_hdr *afork_ptr; 1207 size_t attr_size; 1208 unsigned int afork_size; 1209 1210 if (XFS_DFORK_BOFF(dip) == 0) 1211 return dip->di_aformat != XFS_DINODE_FMT_EXTENTS || 1212 xfs_dfork_attr_extents(dip) != 0; 1213 1214 afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); 1215 afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); 1216 1217 switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) { 1218 case XFS_DINODE_FMT_LOCAL: 1219 /* Fork has to be large enough to extract the xattr size. */ 1220 if (afork_size < sizeof(struct xfs_attr_sf_hdr)) 1221 return true; 1222 1223 /* xattr structure cannot be larger than the fork */ 1224 attr_size = be16_to_cpu(afork_ptr->totsize); 1225 if (attr_size > afork_size) 1226 return true; 1227 1228 /* xattr structure must pass verification. */ 1229 return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL; 1230 case XFS_DINODE_FMT_EXTENTS: 1231 if (xrep_dinode_bad_extents_fork(sc, dip, afork_size, 1232 XFS_ATTR_FORK)) 1233 return true; 1234 break; 1235 case XFS_DINODE_FMT_BTREE: 1236 if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size, 1237 XFS_ATTR_FORK)) 1238 return true; 1239 break; 1240 case XFS_DINODE_FMT_META_BTREE: 1241 if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size, 1242 XFS_ATTR_FORK)) 1243 return true; 1244 break; 1245 default: 1246 return true; 1247 } 1248 1249 return false; 1250 } 1251 1252 /* 1253 * Reset the attr fork to empty. Since the attr fork could have contained 1254 * ACLs, make the file readable only by root. 1255 */ 1256 STATIC void 1257 xrep_dinode_zap_afork( 1258 struct xrep_inode *ri, 1259 struct xfs_dinode *dip, 1260 uint16_t mode) 1261 { 1262 struct xfs_scrub *sc = ri->sc; 1263 1264 trace_xrep_dinode_zap_afork(sc, dip); 1265 1266 ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED; 1267 1268 dip->di_aformat = XFS_DINODE_FMT_EXTENTS; 1269 xrep_dinode_set_attr_nextents(dip, 0); 1270 ri->attr_blocks = 0; 1271 1272 /* 1273 * If the data fork is in btree format, removing the attr fork entirely 1274 * might cause verifier failures if the next level down in the bmbt 1275 * could now fit in the data fork area. 1276 */ 1277 if (dip->di_format != XFS_DINODE_FMT_BTREE) 1278 dip->di_forkoff = 0; 1279 dip->di_mode = cpu_to_be16(mode & ~0777); 1280 dip->di_uid = 0; 1281 dip->di_gid = 0; 1282 } 1283 1284 /* Make sure the fork offset is a sensible value. */ 1285 STATIC void 1286 xrep_dinode_ensure_forkoff( 1287 struct xrep_inode *ri, 1288 struct xfs_dinode *dip, 1289 uint16_t mode) 1290 { 1291 struct xfs_bmdr_block *bmdr; 1292 struct xfs_rtrmap_root *rmdr; 1293 struct xfs_rtrefcount_root *rcdr; 1294 struct xfs_scrub *sc = ri->sc; 1295 xfs_extnum_t attr_extents, data_extents; 1296 size_t bmdr_minsz = xfs_bmdr_space_calc(1); 1297 unsigned int lit_sz = XFS_LITINO(sc->mp); 1298 unsigned int afork_min, dfork_min; 1299 1300 trace_xrep_dinode_ensure_forkoff(sc, dip); 1301 1302 /* 1303 * Before calling this function, xrep_dinode_core ensured that both 1304 * forks actually fit inside their respective literal areas. If this 1305 * was not the case, the fork was reset to FMT_EXTENTS with zero 1306 * records. If the rmapbt scan found attr or data fork blocks, this 1307 * will be noted in the dinode_stats, and we must leave enough room 1308 * for the bmap repair code to reconstruct the mapping structure. 1309 * 1310 * First, compute the minimum space required for the attr fork. 1311 */ 1312 switch (dip->di_aformat) { 1313 case XFS_DINODE_FMT_LOCAL: 1314 /* 1315 * If we still have a shortform xattr structure at all, that 1316 * means the attr fork area was exactly large enough to fit 1317 * the sf structure. 1318 */ 1319 afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); 1320 break; 1321 case XFS_DINODE_FMT_EXTENTS: 1322 attr_extents = xfs_dfork_attr_extents(dip); 1323 if (attr_extents) { 1324 /* 1325 * We must maintain sufficient space to hold the entire 1326 * extent map array in the data fork. Note that we 1327 * previously zapped the fork if it had no chance of 1328 * fitting in the inode. 1329 */ 1330 afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents; 1331 } else if (ri->attr_extents > 0) { 1332 /* 1333 * The attr fork thinks it has zero extents, but we 1334 * found some xattr extents. We need to leave enough 1335 * empty space here so that the incore attr fork will 1336 * get created (and hence trigger the attr fork bmap 1337 * repairer). 1338 */ 1339 afork_min = bmdr_minsz; 1340 } else { 1341 /* No extents on disk or found in rmapbt. */ 1342 afork_min = 0; 1343 } 1344 break; 1345 case XFS_DINODE_FMT_BTREE: 1346 /* Must have space for btree header and key/pointers. */ 1347 bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); 1348 afork_min = xfs_bmap_broot_space(sc->mp, bmdr); 1349 break; 1350 default: 1351 /* We should never see any other formats. */ 1352 afork_min = 0; 1353 break; 1354 } 1355 1356 /* Compute the minimum space required for the data fork. */ 1357 switch (dip->di_format) { 1358 case XFS_DINODE_FMT_DEV: 1359 dfork_min = sizeof(__be32); 1360 break; 1361 case XFS_DINODE_FMT_UUID: 1362 dfork_min = sizeof(uuid_t); 1363 break; 1364 case XFS_DINODE_FMT_LOCAL: 1365 /* 1366 * If we still have a shortform data fork at all, that means 1367 * the data fork area was large enough to fit whatever was in 1368 * there. 1369 */ 1370 dfork_min = be64_to_cpu(dip->di_size); 1371 break; 1372 case XFS_DINODE_FMT_EXTENTS: 1373 data_extents = xfs_dfork_data_extents(dip); 1374 if (data_extents) { 1375 /* 1376 * We must maintain sufficient space to hold the entire 1377 * extent map array in the data fork. Note that we 1378 * previously zapped the fork if it had no chance of 1379 * fitting in the inode. 1380 */ 1381 dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents; 1382 } else if (ri->data_extents > 0 || ri->rt_extents > 0) { 1383 /* 1384 * The data fork thinks it has zero extents, but we 1385 * found some data extents. We need to leave enough 1386 * empty space here so that the data fork bmap repair 1387 * will recover the mappings. 1388 */ 1389 dfork_min = bmdr_minsz; 1390 } else { 1391 /* No extents on disk or found in rmapbt. */ 1392 dfork_min = 0; 1393 } 1394 break; 1395 case XFS_DINODE_FMT_BTREE: 1396 /* Must have space for btree header and key/pointers. */ 1397 bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1398 dfork_min = xfs_bmap_broot_space(sc->mp, bmdr); 1399 break; 1400 case XFS_DINODE_FMT_META_BTREE: 1401 switch (be16_to_cpu(dip->di_metatype)) { 1402 case XFS_METAFILE_RTRMAP: 1403 rmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1404 dfork_min = xfs_rtrmap_broot_space(sc->mp, rmdr); 1405 break; 1406 case XFS_METAFILE_RTREFCOUNT: 1407 rcdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1408 dfork_min = xfs_rtrefcount_broot_space(sc->mp, rcdr); 1409 break; 1410 default: 1411 dfork_min = 0; 1412 break; 1413 } 1414 break; 1415 default: 1416 dfork_min = 0; 1417 break; 1418 } 1419 1420 /* 1421 * Round all values up to the nearest 8 bytes, because that is the 1422 * precision of di_forkoff. 1423 */ 1424 afork_min = roundup(afork_min, 8); 1425 dfork_min = roundup(dfork_min, 8); 1426 bmdr_minsz = roundup(bmdr_minsz, 8); 1427 1428 ASSERT(dfork_min <= lit_sz); 1429 ASSERT(afork_min <= lit_sz); 1430 1431 /* 1432 * If the data fork was zapped and we don't have enough space for the 1433 * recovery fork, move the attr fork up. 1434 */ 1435 if (dip->di_format == XFS_DINODE_FMT_EXTENTS && 1436 xfs_dfork_data_extents(dip) == 0 && 1437 (ri->data_extents > 0 || ri->rt_extents > 0) && 1438 bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) { 1439 if (bmdr_minsz + afork_min > lit_sz) { 1440 /* 1441 * The attr for and the stub fork we need to recover 1442 * the data fork won't both fit. Zap the attr fork. 1443 */ 1444 xrep_dinode_zap_afork(ri, dip, mode); 1445 afork_min = bmdr_minsz; 1446 } else { 1447 void *before, *after; 1448 1449 /* Otherwise, just slide the attr fork up. */ 1450 before = XFS_DFORK_APTR(dip); 1451 dip->di_forkoff = bmdr_minsz >> 3; 1452 after = XFS_DFORK_APTR(dip); 1453 memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp)); 1454 } 1455 } 1456 1457 /* 1458 * If the attr fork was zapped and we don't have enough space for the 1459 * recovery fork, move the attr fork down. 1460 */ 1461 if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS && 1462 xfs_dfork_attr_extents(dip) == 0 && 1463 ri->attr_extents > 0 && 1464 bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) { 1465 if (dip->di_format == XFS_DINODE_FMT_BTREE) { 1466 /* 1467 * If the data fork is in btree format then we can't 1468 * adjust forkoff because that runs the risk of 1469 * violating the extents/btree format transition rules. 1470 */ 1471 } else if (bmdr_minsz + dfork_min > lit_sz) { 1472 /* 1473 * If we can't move the attr fork, too bad, we lose the 1474 * attr fork and leak its blocks. 1475 */ 1476 xrep_dinode_zap_afork(ri, dip, mode); 1477 } else { 1478 /* 1479 * Otherwise, just slide the attr fork down. The attr 1480 * fork is empty, so we don't have any old contents to 1481 * move here. 1482 */ 1483 dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3; 1484 } 1485 } 1486 } 1487 1488 /* 1489 * Zap the data/attr forks if we spot anything that isn't going to pass the 1490 * ifork verifiers or the ifork formatters, because we need to get the inode 1491 * into good enough shape that the higher level repair functions can run. 1492 */ 1493 STATIC void 1494 xrep_dinode_zap_forks( 1495 struct xrep_inode *ri, 1496 struct xfs_dinode *dip) 1497 { 1498 struct xfs_scrub *sc = ri->sc; 1499 xfs_extnum_t data_extents; 1500 xfs_extnum_t attr_extents; 1501 xfs_filblks_t nblocks; 1502 uint16_t mode; 1503 bool zap_datafork = false; 1504 bool zap_attrfork = ri->zap_acls; 1505 1506 trace_xrep_dinode_zap_forks(sc, dip); 1507 1508 mode = be16_to_cpu(dip->di_mode); 1509 1510 data_extents = xfs_dfork_data_extents(dip); 1511 attr_extents = xfs_dfork_attr_extents(dip); 1512 nblocks = be64_to_cpu(dip->di_nblocks); 1513 1514 /* Inode counters don't make sense? */ 1515 if (data_extents > nblocks) 1516 zap_datafork = true; 1517 if (attr_extents > nblocks) 1518 zap_attrfork = true; 1519 if (data_extents + attr_extents > nblocks) 1520 zap_datafork = zap_attrfork = true; 1521 1522 if (!zap_datafork) 1523 zap_datafork = xrep_dinode_check_dfork(sc, dip, mode); 1524 if (!zap_attrfork) 1525 zap_attrfork = xrep_dinode_check_afork(sc, dip); 1526 1527 /* Zap whatever's bad. */ 1528 if (zap_attrfork) 1529 xrep_dinode_zap_afork(ri, dip, mode); 1530 if (zap_datafork) 1531 xrep_dinode_zap_dfork(ri, dip, mode); 1532 xrep_dinode_ensure_forkoff(ri, dip, mode); 1533 1534 /* 1535 * Zero di_nblocks if we don't have any extents at all to satisfy the 1536 * buffer verifier. 1537 */ 1538 data_extents = xfs_dfork_data_extents(dip); 1539 attr_extents = xfs_dfork_attr_extents(dip); 1540 if (data_extents + attr_extents == 0) 1541 dip->di_nblocks = 0; 1542 } 1543 1544 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */ 1545 STATIC int 1546 xrep_dinode_core( 1547 struct xrep_inode *ri) 1548 { 1549 struct xfs_scrub *sc = ri->sc; 1550 struct xfs_buf *bp; 1551 struct xfs_dinode *dip; 1552 xfs_ino_t ino = sc->sm->sm_ino; 1553 int error; 1554 int iget_error; 1555 1556 /* Figure out what this inode had mapped in both forks. */ 1557 error = xrep_dinode_count_rmaps(ri); 1558 if (error) 1559 return error; 1560 1561 /* Read the inode cluster buffer. */ 1562 error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp, 1563 ri->imap.im_blkno, ri->imap.im_len, 0, &bp, NULL); 1564 if (error) 1565 return error; 1566 1567 /* Make sure we can pass the inode buffer verifier. */ 1568 xrep_dinode_buf(sc, bp); 1569 bp->b_ops = &xfs_inode_buf_ops; 1570 1571 /* Fix everything the verifier will complain about. */ 1572 dip = xfs_buf_offset(bp, ri->imap.im_boffset); 1573 xrep_dinode_header(sc, dip); 1574 iget_error = xrep_dinode_mode(ri, dip); 1575 if (iget_error) 1576 goto write; 1577 xrep_dinode_nlinks(dip); 1578 xrep_dinode_flags(sc, dip, ri->rt_extents > 0); 1579 xrep_dinode_size(ri, dip); 1580 xrep_dinode_extsize_hints(sc, dip); 1581 xrep_dinode_zap_forks(ri, dip); 1582 1583 write: 1584 /* Write out the inode. */ 1585 trace_xrep_dinode_fixed(sc, dip); 1586 xfs_dinode_calc_crc(sc->mp, dip); 1587 xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF); 1588 xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset, 1589 ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1); 1590 1591 /* 1592 * In theory, we've fixed the ondisk inode record enough that we should 1593 * be able to load the inode into the cache. Try to iget that inode 1594 * now while we hold the AGI and the inode cluster buffer and take the 1595 * IOLOCK so that we can continue with repairs without anyone else 1596 * accessing the inode. If iget fails, we still need to commit the 1597 * changes. 1598 */ 1599 if (!iget_error) 1600 iget_error = xchk_iget(sc, ino, &sc->ip); 1601 if (!iget_error) 1602 xchk_ilock(sc, XFS_IOLOCK_EXCL); 1603 1604 /* 1605 * Commit the inode cluster buffer updates and drop the AGI buffer that 1606 * we've been holding since scrub setup. From here on out, repairs 1607 * deal only with the cached inode. 1608 */ 1609 error = xrep_trans_commit(sc); 1610 if (error) 1611 return error; 1612 1613 if (iget_error) 1614 return iget_error; 1615 1616 error = xchk_trans_alloc(sc, 0); 1617 if (error) 1618 return error; 1619 1620 error = xrep_ino_dqattach(sc); 1621 if (error) 1622 return error; 1623 1624 xchk_ilock(sc, XFS_ILOCK_EXCL); 1625 if (ri->ino_sick_mask) 1626 xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask); 1627 return 0; 1628 } 1629 1630 /* Fix everything xfs_dinode_verify cares about. */ 1631 STATIC int 1632 xrep_dinode_problems( 1633 struct xrep_inode *ri) 1634 { 1635 struct xfs_scrub *sc = ri->sc; 1636 int error; 1637 1638 error = xrep_dinode_core(ri); 1639 if (error) 1640 return error; 1641 1642 /* We had to fix a totally busted inode, schedule quotacheck. */ 1643 if (XFS_IS_UQUOTA_ON(sc->mp)) 1644 xrep_force_quotacheck(sc, XFS_DQTYPE_USER); 1645 if (XFS_IS_GQUOTA_ON(sc->mp)) 1646 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); 1647 if (XFS_IS_PQUOTA_ON(sc->mp)) 1648 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); 1649 1650 return 0; 1651 } 1652 1653 /* 1654 * Fix problems that the verifiers don't care about. In general these are 1655 * errors that don't cause problems elsewhere in the kernel that we can easily 1656 * detect, so we don't check them all that rigorously. 1657 */ 1658 1659 /* Make sure block and extent counts are ok. */ 1660 STATIC int 1661 xrep_inode_blockcounts( 1662 struct xfs_scrub *sc) 1663 { 1664 struct xfs_ifork *ifp; 1665 xfs_filblks_t count; 1666 xfs_filblks_t acount; 1667 xfs_extnum_t nextents; 1668 int error; 1669 1670 trace_xrep_inode_blockcounts(sc); 1671 1672 /* Set data fork counters from the data fork mappings. */ 1673 error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count); 1674 if (error) 1675 return error; 1676 if (xfs_is_reflink_inode(sc->ip)) { 1677 /* 1678 * data fork blockcount can exceed physical storage if a user 1679 * reflinks the same block over and over again. 1680 */ 1681 ; 1682 } else if (XFS_IS_REALTIME_INODE(sc->ip)) { 1683 if (count >= sc->mp->m_sb.sb_rblocks) 1684 return -EFSCORRUPTED; 1685 } else { 1686 if (count >= sc->mp->m_sb.sb_dblocks) 1687 return -EFSCORRUPTED; 1688 } 1689 error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents); 1690 if (error) 1691 return error; 1692 sc->ip->i_df.if_nextents = nextents; 1693 1694 /* Set attr fork counters from the attr fork mappings. */ 1695 ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); 1696 if (ifp) { 1697 error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents, 1698 &acount); 1699 if (error) 1700 return error; 1701 if (count >= sc->mp->m_sb.sb_dblocks) 1702 return -EFSCORRUPTED; 1703 error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK, 1704 nextents); 1705 if (error) 1706 return error; 1707 ifp->if_nextents = nextents; 1708 } else { 1709 acount = 0; 1710 } 1711 1712 sc->ip->i_nblocks = count + acount; 1713 return 0; 1714 } 1715 1716 /* Check for invalid uid/gid/prid. */ 1717 STATIC void 1718 xrep_inode_ids( 1719 struct xfs_scrub *sc) 1720 { 1721 bool dirty = false; 1722 1723 trace_xrep_inode_ids(sc); 1724 1725 if (!uid_valid(VFS_I(sc->ip)->i_uid)) { 1726 i_uid_write(VFS_I(sc->ip), 0); 1727 dirty = true; 1728 if (XFS_IS_UQUOTA_ON(sc->mp)) 1729 xrep_force_quotacheck(sc, XFS_DQTYPE_USER); 1730 } 1731 1732 if (!gid_valid(VFS_I(sc->ip)->i_gid)) { 1733 i_gid_write(VFS_I(sc->ip), 0); 1734 dirty = true; 1735 if (XFS_IS_GQUOTA_ON(sc->mp)) 1736 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); 1737 } 1738 1739 if (sc->ip->i_projid == -1U) { 1740 sc->ip->i_projid = 0; 1741 dirty = true; 1742 if (XFS_IS_PQUOTA_ON(sc->mp)) 1743 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); 1744 } 1745 1746 /* strip setuid/setgid if we touched any of the ids */ 1747 if (dirty) 1748 VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID); 1749 } 1750 1751 static inline void 1752 xrep_clamp_timestamp( 1753 struct xfs_inode *ip, 1754 struct timespec64 *ts) 1755 { 1756 ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC); 1757 *ts = timestamp_truncate(*ts, VFS_I(ip)); 1758 } 1759 1760 /* Nanosecond counters can't have more than 1 billion. */ 1761 STATIC void 1762 xrep_inode_timestamps( 1763 struct xfs_inode *ip) 1764 { 1765 struct timespec64 tstamp; 1766 struct inode *inode = VFS_I(ip); 1767 1768 tstamp = inode_get_atime(inode); 1769 xrep_clamp_timestamp(ip, &tstamp); 1770 inode_set_atime_to_ts(inode, tstamp); 1771 1772 tstamp = inode_get_mtime(inode); 1773 xrep_clamp_timestamp(ip, &tstamp); 1774 inode_set_mtime_to_ts(inode, tstamp); 1775 1776 tstamp = inode_get_ctime(inode); 1777 xrep_clamp_timestamp(ip, &tstamp); 1778 inode_set_ctime_to_ts(inode, tstamp); 1779 1780 xrep_clamp_timestamp(ip, &ip->i_crtime); 1781 } 1782 1783 /* Fix inode flags that don't make sense together. */ 1784 STATIC void 1785 xrep_inode_flags( 1786 struct xfs_scrub *sc) 1787 { 1788 uint16_t mode; 1789 1790 trace_xrep_inode_flags(sc); 1791 1792 mode = VFS_I(sc->ip)->i_mode; 1793 1794 /* Clear junk flags */ 1795 if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY) 1796 sc->ip->i_diflags &= ~XFS_DIFLAG_ANY; 1797 1798 /* NEWRTBM only applies to realtime bitmaps */ 1799 if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino) 1800 sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM; 1801 else 1802 sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM; 1803 1804 /* These only make sense for directories. */ 1805 if (!S_ISDIR(mode)) 1806 sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT | 1807 XFS_DIFLAG_EXTSZINHERIT | 1808 XFS_DIFLAG_PROJINHERIT | 1809 XFS_DIFLAG_NOSYMLINKS); 1810 1811 /* These only make sense for files. */ 1812 if (!S_ISREG(mode)) 1813 sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME | 1814 XFS_DIFLAG_EXTSIZE); 1815 1816 /* These only make sense for non-rt files. */ 1817 if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) 1818 sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM; 1819 1820 /* Immutable and append only? Drop the append. */ 1821 if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) && 1822 (sc->ip->i_diflags & XFS_DIFLAG_APPEND)) 1823 sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND; 1824 1825 /* Clear junk flags. */ 1826 if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY) 1827 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY; 1828 1829 /* No reflink flag unless we support it and it's a file. */ 1830 if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode)) 1831 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; 1832 1833 /* DAX only applies to files and dirs. */ 1834 if (!(S_ISREG(mode) || S_ISDIR(mode))) 1835 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX; 1836 } 1837 1838 /* 1839 * Fix size problems with block/node format directories. If we fail to find 1840 * the extent list, just bail out and let the bmapbtd repair functions clean 1841 * up that mess. 1842 */ 1843 STATIC void 1844 xrep_inode_blockdir_size( 1845 struct xfs_scrub *sc) 1846 { 1847 struct xfs_iext_cursor icur; 1848 struct xfs_bmbt_irec got; 1849 struct xfs_ifork *ifp; 1850 xfs_fileoff_t off; 1851 int error; 1852 1853 trace_xrep_inode_blockdir_size(sc); 1854 1855 error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK); 1856 if (error) 1857 return; 1858 1859 /* Find the last block before 32G; this is the dir size. */ 1860 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); 1861 off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE); 1862 if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) { 1863 /* zero-extents directory? */ 1864 return; 1865 } 1866 1867 off = got.br_startoff + got.br_blockcount; 1868 sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE, 1869 XFS_FSB_TO_B(sc->mp, off)); 1870 } 1871 1872 /* Fix size problems with short format directories. */ 1873 STATIC void 1874 xrep_inode_sfdir_size( 1875 struct xfs_scrub *sc) 1876 { 1877 struct xfs_ifork *ifp; 1878 1879 trace_xrep_inode_sfdir_size(sc); 1880 1881 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); 1882 sc->ip->i_disk_size = ifp->if_bytes; 1883 } 1884 1885 /* 1886 * Fix any irregularities in a directory inode's size now that we can iterate 1887 * extent maps and access other regular inode data. 1888 */ 1889 STATIC void 1890 xrep_inode_dir_size( 1891 struct xfs_scrub *sc) 1892 { 1893 trace_xrep_inode_dir_size(sc); 1894 1895 switch (sc->ip->i_df.if_format) { 1896 case XFS_DINODE_FMT_EXTENTS: 1897 case XFS_DINODE_FMT_BTREE: 1898 xrep_inode_blockdir_size(sc); 1899 break; 1900 case XFS_DINODE_FMT_LOCAL: 1901 xrep_inode_sfdir_size(sc); 1902 break; 1903 } 1904 } 1905 1906 /* Fix extent size hint problems. */ 1907 STATIC void 1908 xrep_inode_extsize( 1909 struct xfs_scrub *sc) 1910 { 1911 /* Fix misaligned extent size hints on a directory. */ 1912 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 1913 (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && 1914 xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) { 1915 sc->ip->i_extsize = 0; 1916 sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT; 1917 } 1918 } 1919 1920 /* Ensure this file has an attr fork if it needs to hold a parent pointer. */ 1921 STATIC int 1922 xrep_inode_pptr( 1923 struct xfs_scrub *sc) 1924 { 1925 struct xfs_mount *mp = sc->mp; 1926 struct xfs_inode *ip = sc->ip; 1927 struct inode *inode = VFS_I(ip); 1928 1929 if (!xfs_has_parent(mp)) 1930 return 0; 1931 1932 /* 1933 * Unlinked inodes that cannot be added to the directory tree will not 1934 * have a parent pointer. 1935 */ 1936 if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) 1937 return 0; 1938 1939 /* Children of the superblock do not have parent pointers. */ 1940 if (xchk_inode_is_sb_rooted(ip)) 1941 return 0; 1942 1943 /* Inode already has an attr fork; no further work possible here. */ 1944 if (xfs_inode_has_attr_fork(ip)) 1945 return 0; 1946 1947 return xfs_bmap_add_attrfork(sc->tp, ip, 1948 sizeof(struct xfs_attr_sf_hdr), true); 1949 } 1950 1951 /* Fix COW extent size hint problems. */ 1952 STATIC void 1953 xrep_inode_cowextsize( 1954 struct xfs_scrub *sc) 1955 { 1956 /* Fix misaligned CoW extent size hints on a directory. */ 1957 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 1958 (sc->ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) && 1959 sc->ip->i_extsize % sc->mp->m_sb.sb_rextsize > 0) { 1960 sc->ip->i_cowextsize = 0; 1961 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; 1962 } 1963 } 1964 1965 /* Fix any irregularities in an inode that the verifiers don't catch. */ 1966 STATIC int 1967 xrep_inode_problems( 1968 struct xfs_scrub *sc) 1969 { 1970 int error; 1971 1972 error = xrep_inode_blockcounts(sc); 1973 if (error) 1974 return error; 1975 error = xrep_inode_pptr(sc); 1976 if (error) 1977 return error; 1978 xrep_inode_timestamps(sc->ip); 1979 xrep_inode_flags(sc); 1980 xrep_inode_ids(sc); 1981 /* 1982 * We can now do a better job fixing the size of a directory now that 1983 * we can scan the data fork extents than we could in xrep_dinode_size. 1984 */ 1985 if (S_ISDIR(VFS_I(sc->ip)->i_mode)) 1986 xrep_inode_dir_size(sc); 1987 xrep_inode_extsize(sc); 1988 xrep_inode_cowextsize(sc); 1989 1990 trace_xrep_inode_fixed(sc); 1991 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); 1992 return xrep_roll_trans(sc); 1993 } 1994 1995 /* 1996 * Make sure this inode's unlinked list pointers are consistent with its 1997 * link count. 1998 */ 1999 STATIC int 2000 xrep_inode_unlinked( 2001 struct xfs_scrub *sc) 2002 { 2003 unsigned int nlink = VFS_I(sc->ip)->i_nlink; 2004 int error; 2005 2006 /* 2007 * If this inode is linked from the directory tree and on the unlinked 2008 * list, remove it from the unlinked list. 2009 */ 2010 if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) { 2011 struct xfs_perag *pag; 2012 int error; 2013 2014 pag = xfs_perag_get(sc->mp, 2015 XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino)); 2016 error = xfs_iunlink_remove(sc->tp, pag, sc->ip); 2017 xfs_perag_put(pag); 2018 if (error) 2019 return error; 2020 } 2021 2022 /* 2023 * If this inode is not linked from the directory tree yet not on the 2024 * unlinked list, put it on the unlinked list. 2025 */ 2026 if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) { 2027 error = xfs_iunlink(sc->tp, sc->ip); 2028 if (error) 2029 return error; 2030 } 2031 2032 return 0; 2033 } 2034 2035 /* Repair an inode's fields. */ 2036 int 2037 xrep_inode( 2038 struct xfs_scrub *sc) 2039 { 2040 int error = 0; 2041 2042 /* 2043 * No inode? That means we failed the _iget verifiers. Repair all 2044 * the things that the inode verifiers care about, then retry _iget. 2045 */ 2046 if (!sc->ip) { 2047 struct xrep_inode *ri = sc->buf; 2048 2049 ASSERT(ri != NULL); 2050 2051 error = xrep_dinode_problems(ri); 2052 if (error == -EBUSY) { 2053 /* 2054 * Directory scan to recover inode mode encountered a 2055 * busy inode, so we did not continue repairing things. 2056 */ 2057 return 0; 2058 } 2059 if (error) 2060 return error; 2061 2062 /* By this point we had better have a working incore inode. */ 2063 if (!sc->ip) 2064 return -EFSCORRUPTED; 2065 } 2066 2067 xfs_trans_ijoin(sc->tp, sc->ip, 0); 2068 2069 /* If we found corruption of any kind, try to fix it. */ 2070 if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) || 2071 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) { 2072 error = xrep_inode_problems(sc); 2073 if (error) 2074 return error; 2075 } 2076 2077 /* See if we can clear the reflink flag. */ 2078 if (xfs_is_reflink_inode(sc->ip)) { 2079 error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp); 2080 if (error) 2081 return error; 2082 } 2083 2084 /* Reconnect incore unlinked list */ 2085 error = xrep_inode_unlinked(sc); 2086 if (error) 2087 return error; 2088 2089 return xrep_defer_finish(sc); 2090 } 2091