1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_ag.h" 14 #include "xfs_inode.h" 15 #include "xfs_errortag.h" 16 #include "xfs_error.h" 17 #include "xfs_icache.h" 18 #include "xfs_trans.h" 19 #include "xfs_ialloc.h" 20 #include "xfs_dir2.h" 21 #include "xfs_health.h" 22 #include "xfs_metafile.h" 23 24 #include <linux/iversion.h> 25 26 /* 27 * If we are doing readahead on an inode buffer, we might be in log recovery 28 * reading an inode allocation buffer that hasn't yet been replayed, and hence 29 * has not had the inode cores stamped into it. Hence for readahead, the buffer 30 * may be potentially invalid. 31 * 32 * If the readahead buffer is invalid, we need to mark it with an error and 33 * clear the DONE status of the buffer so that a followup read will re-read it 34 * from disk. We don't report the error otherwise to avoid warnings during log 35 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here 36 * because all we want to do is say readahead failed; there is no-one to report 37 * the error to, so this will distinguish it from a non-ra verifier failure. 38 * Changes to this readahead error behaviour also need to be reflected in 39 * xfs_dquot_buf_readahead_verify(). 40 */ 41 static void 42 xfs_inode_buf_verify( 43 struct xfs_buf *bp, 44 bool readahead) 45 { 46 struct xfs_mount *mp = bp->b_mount; 47 int i; 48 int ni; 49 50 /* 51 * Validate the magic number and version of every inode in the buffer 52 */ 53 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 54 for (i = 0; i < ni; i++) { 55 struct xfs_dinode *dip; 56 xfs_agino_t unlinked_ino; 57 int di_ok; 58 59 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 60 unlinked_ino = be32_to_cpu(dip->di_next_unlinked); 61 di_ok = xfs_verify_magic16(bp, dip->di_magic) && 62 xfs_dinode_good_version(mp, dip->di_version) && 63 xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); 64 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 65 XFS_ERRTAG_ITOBP_INOTOBP))) { 66 if (readahead) { 67 bp->b_flags &= ~XBF_DONE; 68 xfs_buf_ioerror(bp, -EIO); 69 return; 70 } 71 72 #ifdef DEBUG 73 xfs_alert(mp, 74 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 75 (unsigned long long)xfs_buf_daddr(bp), i, 76 be16_to_cpu(dip->di_magic)); 77 #endif 78 xfs_buf_verifier_error(bp, -EFSCORRUPTED, 79 __func__, dip, sizeof(*dip), 80 NULL); 81 return; 82 } 83 } 84 } 85 86 87 static void 88 xfs_inode_buf_read_verify( 89 struct xfs_buf *bp) 90 { 91 xfs_inode_buf_verify(bp, false); 92 } 93 94 static void 95 xfs_inode_buf_readahead_verify( 96 struct xfs_buf *bp) 97 { 98 xfs_inode_buf_verify(bp, true); 99 } 100 101 static void 102 xfs_inode_buf_write_verify( 103 struct xfs_buf *bp) 104 { 105 xfs_inode_buf_verify(bp, false); 106 } 107 108 const struct xfs_buf_ops xfs_inode_buf_ops = { 109 .name = "xfs_inode", 110 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 111 cpu_to_be16(XFS_DINODE_MAGIC) }, 112 .verify_read = xfs_inode_buf_read_verify, 113 .verify_write = xfs_inode_buf_write_verify, 114 }; 115 116 const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 117 .name = "xfs_inode_ra", 118 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 119 cpu_to_be16(XFS_DINODE_MAGIC) }, 120 .verify_read = xfs_inode_buf_readahead_verify, 121 .verify_write = xfs_inode_buf_write_verify, 122 }; 123 124 125 /* 126 * This routine is called to map an inode to the buffer containing the on-disk 127 * version of the inode. It returns a pointer to the buffer containing the 128 * on-disk inode in the bpp parameter. 129 */ 130 int 131 xfs_imap_to_bp( 132 struct xfs_mount *mp, 133 struct xfs_trans *tp, 134 struct xfs_imap *imap, 135 struct xfs_buf **bpp) 136 { 137 int error; 138 139 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 140 imap->im_len, 0, bpp, &xfs_inode_buf_ops); 141 if (xfs_metadata_is_sick(error)) 142 xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), 143 XFS_SICK_AG_INODES); 144 return error; 145 } 146 147 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) 148 { 149 struct timespec64 tv; 150 uint32_t n; 151 152 tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); 153 tv.tv_nsec = n; 154 155 return tv; 156 } 157 158 /* Convert an ondisk timestamp to an incore timestamp. */ 159 struct timespec64 160 xfs_inode_from_disk_ts( 161 struct xfs_dinode *dip, 162 const xfs_timestamp_t ts) 163 { 164 struct timespec64 tv; 165 struct xfs_legacy_timestamp *lts; 166 167 if (xfs_dinode_has_bigtime(dip)) 168 return xfs_inode_decode_bigtime(be64_to_cpu(ts)); 169 170 lts = (struct xfs_legacy_timestamp *)&ts; 171 tv.tv_sec = (int)be32_to_cpu(lts->t_sec); 172 tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); 173 174 return tv; 175 } 176 177 int 178 xfs_inode_from_disk( 179 struct xfs_inode *ip, 180 struct xfs_dinode *from) 181 { 182 struct inode *inode = VFS_I(ip); 183 int error; 184 xfs_failaddr_t fa; 185 186 ASSERT(ip->i_cowfp == NULL); 187 188 fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); 189 if (fa) { 190 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, 191 sizeof(*from), fa); 192 return -EFSCORRUPTED; 193 } 194 195 /* 196 * First get the permanent information that is needed to allocate an 197 * inode. If the inode is unused, mode is zero and we shouldn't mess 198 * with the uninitialized part of it. 199 */ 200 if (!xfs_has_v3inodes(ip->i_mount)) 201 ip->i_flushiter = be16_to_cpu(from->di_flushiter); 202 inode->i_generation = be32_to_cpu(from->di_gen); 203 inode->i_mode = be16_to_cpu(from->di_mode); 204 if (!inode->i_mode) 205 return 0; 206 207 /* 208 * Convert v1 inodes immediately to v2 inode format as this is the 209 * minimum inode version format we support in the rest of the code. 210 * They will also be unconditionally written back to disk as v2 inodes. 211 */ 212 if (unlikely(from->di_version == 1)) { 213 /* di_metatype used to be di_onlink */ 214 set_nlink(inode, be16_to_cpu(from->di_metatype)); 215 ip->i_projid = 0; 216 } else { 217 set_nlink(inode, be32_to_cpu(from->di_nlink)); 218 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | 219 be16_to_cpu(from->di_projid_lo); 220 if (xfs_dinode_is_metadir(from)) 221 ip->i_metatype = be16_to_cpu(from->di_metatype); 222 } 223 224 i_uid_write(inode, be32_to_cpu(from->di_uid)); 225 i_gid_write(inode, be32_to_cpu(from->di_gid)); 226 227 /* 228 * Time is signed, so need to convert to signed 32 bit before 229 * storing in inode timestamp which may be 64 bit. Otherwise 230 * a time before epoch is converted to a time long after epoch 231 * on 64 bit systems. 232 */ 233 inode_set_atime_to_ts(inode, 234 xfs_inode_from_disk_ts(from, from->di_atime)); 235 inode_set_mtime_to_ts(inode, 236 xfs_inode_from_disk_ts(from, from->di_mtime)); 237 inode_set_ctime_to_ts(inode, 238 xfs_inode_from_disk_ts(from, from->di_ctime)); 239 240 ip->i_disk_size = be64_to_cpu(from->di_size); 241 ip->i_nblocks = be64_to_cpu(from->di_nblocks); 242 ip->i_extsize = be32_to_cpu(from->di_extsize); 243 ip->i_forkoff = from->di_forkoff; 244 ip->i_diflags = be16_to_cpu(from->di_flags); 245 ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); 246 247 if (from->di_dmevmask || from->di_dmstate) 248 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); 249 250 if (xfs_has_v3inodes(ip->i_mount)) { 251 inode_set_iversion_queried(inode, 252 be64_to_cpu(from->di_changecount)); 253 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); 254 ip->i_diflags2 = be64_to_cpu(from->di_flags2); 255 /* also covers the di_used_blocks union arm: */ 256 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); 257 BUILD_BUG_ON(sizeof(from->di_cowextsize) != 258 sizeof(from->di_used_blocks)); 259 } 260 261 error = xfs_iformat_data_fork(ip, from); 262 if (error) 263 return error; 264 if (from->di_forkoff) { 265 error = xfs_iformat_attr_fork(ip, from); 266 if (error) 267 goto out_destroy_data_fork; 268 } 269 if (xfs_is_reflink_inode(ip)) 270 xfs_ifork_init_cow(ip); 271 return 0; 272 273 out_destroy_data_fork: 274 xfs_idestroy_fork(&ip->i_df); 275 return error; 276 } 277 278 /* Convert an incore timestamp to an ondisk timestamp. */ 279 static inline xfs_timestamp_t 280 xfs_inode_to_disk_ts( 281 struct xfs_inode *ip, 282 const struct timespec64 tv) 283 { 284 struct xfs_legacy_timestamp *lts; 285 xfs_timestamp_t ts; 286 287 if (xfs_inode_has_bigtime(ip)) 288 return cpu_to_be64(xfs_inode_encode_bigtime(tv)); 289 290 lts = (struct xfs_legacy_timestamp *)&ts; 291 lts->t_sec = cpu_to_be32(tv.tv_sec); 292 lts->t_nsec = cpu_to_be32(tv.tv_nsec); 293 294 return ts; 295 } 296 297 static inline void 298 xfs_inode_to_disk_iext_counters( 299 struct xfs_inode *ip, 300 struct xfs_dinode *to) 301 { 302 if (xfs_inode_has_large_extent_counts(ip)) { 303 to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); 304 to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); 305 /* 306 * We might be upgrading the inode to use larger extent counters 307 * than was previously used. Hence zero the unused field. 308 */ 309 to->di_nrext64_pad = cpu_to_be16(0); 310 } else { 311 to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); 312 to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); 313 } 314 } 315 316 void 317 xfs_inode_to_disk( 318 struct xfs_inode *ip, 319 struct xfs_dinode *to, 320 xfs_lsn_t lsn) 321 { 322 struct inode *inode = VFS_I(ip); 323 324 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 325 if (xfs_is_metadir_inode(ip)) 326 to->di_metatype = cpu_to_be16(ip->i_metatype); 327 else 328 to->di_metatype = 0; 329 330 to->di_format = xfs_ifork_format(&ip->i_df); 331 to->di_uid = cpu_to_be32(i_uid_read(inode)); 332 to->di_gid = cpu_to_be32(i_gid_read(inode)); 333 to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); 334 to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); 335 336 to->di_atime = xfs_inode_to_disk_ts(ip, inode_get_atime(inode)); 337 to->di_mtime = xfs_inode_to_disk_ts(ip, inode_get_mtime(inode)); 338 to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); 339 to->di_nlink = cpu_to_be32(inode->i_nlink); 340 to->di_gen = cpu_to_be32(inode->i_generation); 341 to->di_mode = cpu_to_be16(inode->i_mode); 342 343 to->di_size = cpu_to_be64(ip->i_disk_size); 344 to->di_nblocks = cpu_to_be64(ip->i_nblocks); 345 to->di_extsize = cpu_to_be32(ip->i_extsize); 346 to->di_forkoff = ip->i_forkoff; 347 to->di_aformat = xfs_ifork_format(&ip->i_af); 348 to->di_flags = cpu_to_be16(ip->i_diflags); 349 350 if (xfs_has_v3inodes(ip->i_mount)) { 351 to->di_version = 3; 352 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); 353 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); 354 to->di_flags2 = cpu_to_be64(ip->i_diflags2); 355 /* also covers the di_used_blocks union arm: */ 356 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); 357 to->di_ino = cpu_to_be64(ip->i_ino); 358 to->di_lsn = cpu_to_be64(lsn); 359 memset(to->di_pad2, 0, sizeof(to->di_pad2)); 360 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); 361 to->di_v3_pad = 0; 362 } else { 363 to->di_version = 2; 364 to->di_flushiter = cpu_to_be16(ip->i_flushiter); 365 memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); 366 } 367 368 xfs_inode_to_disk_iext_counters(ip, to); 369 } 370 371 static xfs_failaddr_t 372 xfs_dinode_verify_fork( 373 struct xfs_dinode *dip, 374 struct xfs_mount *mp, 375 int whichfork) 376 { 377 xfs_extnum_t di_nextents; 378 xfs_extnum_t max_extents; 379 mode_t mode = be16_to_cpu(dip->di_mode); 380 uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); 381 uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); 382 383 di_nextents = xfs_dfork_nextents(dip, whichfork); 384 385 /* 386 * For fork types that can contain local data, check that the fork 387 * format matches the size of local data contained within the fork. 388 */ 389 if (whichfork == XFS_DATA_FORK) { 390 /* 391 * A directory small enough to fit in the inode must be stored 392 * in local format. The directory sf <-> extents conversion 393 * code updates the directory size accordingly. Directories 394 * being truncated have zero size and are not subject to this 395 * check. 396 */ 397 if (S_ISDIR(mode)) { 398 if (dip->di_size && 399 be64_to_cpu(dip->di_size) <= fork_size && 400 fork_format != XFS_DINODE_FMT_LOCAL) 401 return __this_address; 402 } 403 404 /* 405 * A symlink with a target small enough to fit in the inode can 406 * be stored in extents format if xattrs were added (thus 407 * converting the data fork from shortform to remote format) 408 * and then removed. 409 */ 410 if (S_ISLNK(mode)) { 411 if (be64_to_cpu(dip->di_size) <= fork_size && 412 fork_format != XFS_DINODE_FMT_EXTENTS && 413 fork_format != XFS_DINODE_FMT_LOCAL) 414 return __this_address; 415 } 416 417 /* 418 * For all types, check that when the size says the fork should 419 * be in extent or btree format, the inode isn't claiming to be 420 * in local format. 421 */ 422 if (be64_to_cpu(dip->di_size) > fork_size && 423 fork_format == XFS_DINODE_FMT_LOCAL) 424 return __this_address; 425 } 426 427 switch (fork_format) { 428 case XFS_DINODE_FMT_LOCAL: 429 /* 430 * No local regular files yet. 431 */ 432 if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) 433 return __this_address; 434 if (di_nextents) 435 return __this_address; 436 break; 437 case XFS_DINODE_FMT_EXTENTS: 438 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) 439 return __this_address; 440 break; 441 case XFS_DINODE_FMT_BTREE: 442 max_extents = xfs_iext_max_nextents( 443 xfs_dinode_has_large_extent_counts(dip), 444 whichfork); 445 if (di_nextents > max_extents) 446 return __this_address; 447 break; 448 case XFS_DINODE_FMT_META_BTREE: 449 if (!xfs_has_metadir(mp)) 450 return __this_address; 451 if (!(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA))) 452 return __this_address; 453 switch (be16_to_cpu(dip->di_metatype)) { 454 case XFS_METAFILE_RTRMAP: 455 /* 456 * growfs must create the rtrmap inodes before adding a 457 * realtime volume to the filesystem, so we cannot use 458 * the rtrmapbt predicate here. 459 */ 460 if (!xfs_has_rmapbt(mp)) 461 return __this_address; 462 break; 463 case XFS_METAFILE_RTREFCOUNT: 464 /* same comment about growfs and rmap inodes applies */ 465 if (!xfs_has_reflink(mp)) 466 return __this_address; 467 break; 468 default: 469 return __this_address; 470 } 471 break; 472 default: 473 return __this_address; 474 } 475 return NULL; 476 } 477 478 static xfs_failaddr_t 479 xfs_dinode_verify_forkoff( 480 struct xfs_dinode *dip, 481 struct xfs_mount *mp) 482 { 483 if (!dip->di_forkoff) 484 return NULL; 485 486 switch (dip->di_format) { 487 case XFS_DINODE_FMT_DEV: 488 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 489 return __this_address; 490 break; 491 case XFS_DINODE_FMT_META_BTREE: 492 if (!xfs_has_metadir(mp) || !xfs_has_parent(mp)) 493 return __this_address; 494 fallthrough; 495 case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 496 case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 497 case XFS_DINODE_FMT_BTREE: 498 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) 499 return __this_address; 500 break; 501 default: 502 return __this_address; 503 } 504 return NULL; 505 } 506 507 static xfs_failaddr_t 508 xfs_dinode_verify_nrext64( 509 struct xfs_mount *mp, 510 struct xfs_dinode *dip) 511 { 512 if (xfs_dinode_has_large_extent_counts(dip)) { 513 if (!xfs_has_large_extent_counts(mp)) 514 return __this_address; 515 if (dip->di_nrext64_pad != 0) 516 return __this_address; 517 } else if (dip->di_version >= 3) { 518 if (dip->di_v3_pad != 0) 519 return __this_address; 520 } 521 522 return NULL; 523 } 524 525 /* 526 * Validate all the picky requirements we have for a file that claims to be 527 * filesystem metadata. 528 */ 529 xfs_failaddr_t 530 xfs_dinode_verify_metadir( 531 struct xfs_mount *mp, 532 struct xfs_dinode *dip, 533 uint16_t mode, 534 uint16_t flags, 535 uint64_t flags2) 536 { 537 if (!xfs_has_metadir(mp)) 538 return __this_address; 539 540 /* V5 filesystem only */ 541 if (dip->di_version < 3) 542 return __this_address; 543 544 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) 545 return __this_address; 546 547 /* V3 inode fields that are always zero */ 548 if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad) 549 return __this_address; 550 if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter) 551 return __this_address; 552 553 /* Metadata files can only be directories or regular files */ 554 if (!S_ISDIR(mode) && !S_ISREG(mode)) 555 return __this_address; 556 557 /* They must have zero access permissions */ 558 if (mode & 0777) 559 return __this_address; 560 561 /* DMAPI event and state masks are zero */ 562 if (dip->di_dmevmask || dip->di_dmstate) 563 return __this_address; 564 565 /* 566 * User and group IDs must be zero. The project ID is used for 567 * grouping inodes. Metadata inodes are never accounted to quotas. 568 */ 569 if (dip->di_uid || dip->di_gid) 570 return __this_address; 571 572 /* Mandatory inode flags must be set */ 573 if (S_ISDIR(mode)) { 574 if ((flags & XFS_METADIR_DIFLAGS) != XFS_METADIR_DIFLAGS) 575 return __this_address; 576 } else { 577 if ((flags & XFS_METAFILE_DIFLAGS) != XFS_METAFILE_DIFLAGS) 578 return __this_address; 579 } 580 581 /* dax flags2 must not be set */ 582 if (flags2 & XFS_DIFLAG2_DAX) 583 return __this_address; 584 585 return NULL; 586 } 587 588 xfs_failaddr_t 589 xfs_dinode_verify( 590 struct xfs_mount *mp, 591 xfs_ino_t ino, 592 struct xfs_dinode *dip) 593 { 594 xfs_failaddr_t fa; 595 uint16_t mode; 596 uint16_t flags; 597 uint64_t flags2; 598 uint64_t di_size; 599 xfs_extnum_t nextents; 600 xfs_extnum_t naextents; 601 xfs_filblks_t nblocks; 602 603 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 604 return __this_address; 605 606 /* Verify v3 integrity information first */ 607 if (dip->di_version >= 3) { 608 if (!xfs_has_v3inodes(mp)) 609 return __this_address; 610 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 611 XFS_DINODE_CRC_OFF)) 612 return __this_address; 613 if (be64_to_cpu(dip->di_ino) != ino) 614 return __this_address; 615 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 616 return __this_address; 617 } 618 619 /* 620 * Historical note: xfsprogs in the 3.2 era set up its incore inodes to 621 * have di_nlink track the link count, even if the actual filesystem 622 * only supported V1 inodes (i.e. di_onlink). When writing out the 623 * ondisk inode, it would set both the ondisk di_nlink and di_onlink to 624 * the the incore di_nlink value, which is why we cannot check for 625 * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with 626 * di_onlink==0, so we can check that. 627 */ 628 if (dip->di_version == 2) { 629 if (dip->di_metatype) 630 return __this_address; 631 } else if (dip->di_version >= 3) { 632 if (!xfs_dinode_is_metadir(dip) && dip->di_metatype) 633 return __this_address; 634 } 635 636 /* don't allow invalid i_size */ 637 di_size = be64_to_cpu(dip->di_size); 638 if (di_size & (1ULL << 63)) 639 return __this_address; 640 641 mode = be16_to_cpu(dip->di_mode); 642 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 643 return __this_address; 644 645 /* 646 * No zero-length symlinks/dirs unless they're unlinked and hence being 647 * inactivated. 648 */ 649 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) { 650 if (dip->di_version > 1) { 651 if (dip->di_nlink) 652 return __this_address; 653 } else { 654 /* di_metatype used to be di_onlink */ 655 if (dip->di_metatype) 656 return __this_address; 657 } 658 } 659 660 fa = xfs_dinode_verify_nrext64(mp, dip); 661 if (fa) 662 return fa; 663 664 nextents = xfs_dfork_data_extents(dip); 665 naextents = xfs_dfork_attr_extents(dip); 666 nblocks = be64_to_cpu(dip->di_nblocks); 667 668 /* Fork checks carried over from xfs_iformat_fork */ 669 if (mode && nextents + naextents > nblocks) 670 return __this_address; 671 672 if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) 673 return __this_address; 674 675 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 676 return __this_address; 677 678 flags = be16_to_cpu(dip->di_flags); 679 680 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 681 return __this_address; 682 683 /* check for illegal values of forkoff */ 684 fa = xfs_dinode_verify_forkoff(dip, mp); 685 if (fa) 686 return fa; 687 688 /* Do we have appropriate data fork formats for the mode? */ 689 switch (mode & S_IFMT) { 690 case S_IFIFO: 691 case S_IFCHR: 692 case S_IFBLK: 693 case S_IFSOCK: 694 if (dip->di_format != XFS_DINODE_FMT_DEV) 695 return __this_address; 696 break; 697 case S_IFREG: 698 case S_IFLNK: 699 case S_IFDIR: 700 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); 701 if (fa) 702 return fa; 703 break; 704 case 0: 705 /* Uninitialized inode ok. */ 706 break; 707 default: 708 return __this_address; 709 } 710 711 if (dip->di_forkoff) { 712 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 713 if (fa) 714 return fa; 715 } else { 716 /* 717 * If there is no fork offset, this may be a freshly-made inode 718 * in a new disk cluster, in which case di_aformat is zeroed. 719 * Otherwise, such an inode must be in EXTENTS format; this goes 720 * for freed inodes as well. 721 */ 722 switch (dip->di_aformat) { 723 case 0: 724 case XFS_DINODE_FMT_EXTENTS: 725 break; 726 default: 727 return __this_address; 728 } 729 if (naextents) 730 return __this_address; 731 } 732 733 /* extent size hint validation */ 734 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 735 mode, flags); 736 if (fa) 737 return fa; 738 739 /* only version 3 or greater inodes are extensively verified here */ 740 if (dip->di_version < 3) 741 return NULL; 742 743 flags2 = be64_to_cpu(dip->di_flags2); 744 745 /* don't allow reflink/cowextsize if we don't have reflink */ 746 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 747 !xfs_has_reflink(mp)) 748 return __this_address; 749 750 /* only regular files get reflink */ 751 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 752 return __this_address; 753 754 /* don't let reflink and realtime mix */ 755 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME) && 756 !xfs_has_rtreflink(mp)) 757 return __this_address; 758 759 if (xfs_has_zoned(mp) && 760 dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP)) { 761 if (be32_to_cpu(dip->di_used_blocks) > mp->m_sb.sb_rgextents) 762 return __this_address; 763 } else { 764 /* COW extent size hint validation */ 765 fa = xfs_inode_validate_cowextsize(mp, 766 be32_to_cpu(dip->di_cowextsize), 767 mode, flags, flags2); 768 if (fa) 769 return fa; 770 } 771 772 /* bigtime iflag can only happen on bigtime filesystems */ 773 if (xfs_dinode_has_bigtime(dip) && 774 !xfs_has_bigtime(mp)) 775 return __this_address; 776 777 if (flags2 & XFS_DIFLAG2_METADATA) { 778 fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2); 779 if (fa) 780 return fa; 781 } 782 783 /* metadata inodes containing btrees always have zero extent count */ 784 if (XFS_DFORK_FORMAT(dip, XFS_DATA_FORK) != XFS_DINODE_FMT_META_BTREE) { 785 if (nextents + naextents == 0 && nblocks != 0) 786 return __this_address; 787 } 788 789 return NULL; 790 } 791 792 void 793 xfs_dinode_calc_crc( 794 struct xfs_mount *mp, 795 struct xfs_dinode *dip) 796 { 797 uint32_t crc; 798 799 if (dip->di_version < 3) 800 return; 801 802 ASSERT(xfs_has_crc(mp)); 803 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 804 XFS_DINODE_CRC_OFF); 805 dip->di_crc = xfs_end_cksum(crc); 806 } 807 808 /* 809 * Validate di_extsize hint. 810 * 811 * 1. Extent size hint is only valid for directories and regular files. 812 * 2. FS_XFLAG_EXTSIZE is only valid for regular files. 813 * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 814 * 4. Hint cannot be larger than MAXTEXTLEN. 815 * 5. Can be changed on directories at any time. 816 * 6. Hint value of 0 turns off hints, clears inode flags. 817 * 7. Extent size must be a multiple of the appropriate block size. 818 * For realtime files, this is the rt extent size. 819 * 8. For non-realtime files, the extent size hint must be limited 820 * to half the AG size to avoid alignment extending the extent beyond the 821 * limits of the AG. 822 */ 823 xfs_failaddr_t 824 xfs_inode_validate_extsize( 825 struct xfs_mount *mp, 826 uint32_t extsize, 827 uint16_t mode, 828 uint16_t flags) 829 { 830 bool rt_flag; 831 bool hint_flag; 832 bool inherit_flag; 833 uint32_t extsize_bytes; 834 uint32_t blocksize_bytes; 835 836 rt_flag = (flags & XFS_DIFLAG_REALTIME); 837 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 838 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 839 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 840 841 /* 842 * This comment describes a historic gap in this verifier function. 843 * 844 * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this 845 * function has never checked that the extent size hint is an integer 846 * multiple of the realtime extent size. Since we allow users to set 847 * this combination on non-rt filesystems /and/ to change the rt 848 * extent size when adding a rt device to a filesystem, the net effect 849 * is that users can configure a filesystem anticipating one rt 850 * geometry and change their minds later. Directories do not use the 851 * extent size hint, so this is harmless for them. 852 * 853 * If a directory with a misaligned extent size hint is allowed to 854 * propagate that hint into a new regular realtime file, the result 855 * is that the inode cluster buffer verifier will trigger a corruption 856 * shutdown the next time it is run, because the verifier has always 857 * enforced the alignment rule for regular files. 858 * 859 * Because we allow administrators to set a new rt extent size when 860 * adding a rt section, we cannot add a check to this verifier because 861 * that will result a new source of directory corruption errors when 862 * reading an existing filesystem. Instead, we rely on callers to 863 * decide when alignment checks are appropriate, and fix things up as 864 * needed. 865 */ 866 867 if (rt_flag) 868 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 869 else 870 blocksize_bytes = mp->m_sb.sb_blocksize; 871 872 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 873 return __this_address; 874 875 if (hint_flag && !S_ISREG(mode)) 876 return __this_address; 877 878 if (inherit_flag && !S_ISDIR(mode)) 879 return __this_address; 880 881 if ((hint_flag || inherit_flag) && extsize == 0) 882 return __this_address; 883 884 /* free inodes get flags set to zero but extsize remains */ 885 if (mode && !(hint_flag || inherit_flag) && extsize != 0) 886 return __this_address; 887 888 if (extsize_bytes % blocksize_bytes) 889 return __this_address; 890 891 if (extsize > XFS_MAX_BMBT_EXTLEN) 892 return __this_address; 893 894 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 895 return __this_address; 896 897 return NULL; 898 } 899 900 /* 901 * Validate di_cowextsize hint. 902 * 903 * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 904 * The inode does not have to have any shared blocks, but it must be a v3. 905 * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 906 * for a directory, the hint is propagated to new files. 907 * 3. Can be changed on files & directories at any time. 908 * 4. Hint value of 0 turns off hints, clears inode flags. 909 * 5. Extent size must be a multiple of the appropriate block size. 910 * 6. The extent size hint must be limited to half the AG size to avoid 911 * alignment extending the extent beyond the limits of the AG. 912 */ 913 xfs_failaddr_t 914 xfs_inode_validate_cowextsize( 915 struct xfs_mount *mp, 916 uint32_t cowextsize, 917 uint16_t mode, 918 uint16_t flags, 919 uint64_t flags2) 920 { 921 bool rt_flag; 922 bool hint_flag; 923 uint32_t cowextsize_bytes; 924 uint32_t blocksize_bytes; 925 926 rt_flag = (flags & XFS_DIFLAG_REALTIME); 927 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 928 cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); 929 930 /* 931 * Similar to extent size hints, a directory can be configured to 932 * propagate realtime status and a CoW extent size hint to newly 933 * created files even if there is no realtime device, and the hints on 934 * disk can become misaligned if the sysadmin changes the rt extent 935 * size while adding the realtime device. 936 * 937 * Therefore, we can only enforce the rextsize alignment check against 938 * regular realtime files, and rely on callers to decide when alignment 939 * checks are appropriate, and fix things up as needed. 940 */ 941 942 if (rt_flag) 943 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 944 else 945 blocksize_bytes = mp->m_sb.sb_blocksize; 946 947 if (hint_flag && !xfs_has_reflink(mp)) 948 return __this_address; 949 950 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 951 return __this_address; 952 953 if (hint_flag && cowextsize == 0) 954 return __this_address; 955 956 /* free inodes get flags set to zero but cowextsize remains */ 957 if (mode && !hint_flag && cowextsize != 0) 958 return __this_address; 959 960 if (cowextsize_bytes % blocksize_bytes) 961 return __this_address; 962 963 if (cowextsize > XFS_MAX_BMBT_EXTLEN) 964 return __this_address; 965 966 if (!rt_flag && cowextsize > mp->m_sb.sb_agblocks / 2) 967 return __this_address; 968 969 return NULL; 970 } 971