1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include <linux/iversion.h> 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_inode.h" 16 #include "xfs_inode_util.h" 17 #include "xfs_trans.h" 18 #include "xfs_ialloc.h" 19 #include "xfs_health.h" 20 #include "xfs_bmap.h" 21 #include "xfs_error.h" 22 #include "xfs_trace.h" 23 #include "xfs_ag.h" 24 #include "xfs_iunlink_item.h" 25 #include "xfs_inode_item.h" 26 27 uint16_t 28 xfs_flags2diflags( 29 struct xfs_inode *ip, 30 unsigned int xflags) 31 { 32 /* can't set PREALLOC this way, just preserve it */ 33 uint16_t di_flags = 34 (ip->i_diflags & XFS_DIFLAG_PREALLOC); 35 36 if (xflags & FS_XFLAG_IMMUTABLE) 37 di_flags |= XFS_DIFLAG_IMMUTABLE; 38 if (xflags & FS_XFLAG_APPEND) 39 di_flags |= XFS_DIFLAG_APPEND; 40 if (xflags & FS_XFLAG_SYNC) 41 di_flags |= XFS_DIFLAG_SYNC; 42 if (xflags & FS_XFLAG_NOATIME) 43 di_flags |= XFS_DIFLAG_NOATIME; 44 if (xflags & FS_XFLAG_NODUMP) 45 di_flags |= XFS_DIFLAG_NODUMP; 46 if (xflags & FS_XFLAG_NODEFRAG) 47 di_flags |= XFS_DIFLAG_NODEFRAG; 48 if (xflags & FS_XFLAG_FILESTREAM) 49 di_flags |= XFS_DIFLAG_FILESTREAM; 50 if (S_ISDIR(VFS_I(ip)->i_mode)) { 51 if (xflags & FS_XFLAG_RTINHERIT) 52 di_flags |= XFS_DIFLAG_RTINHERIT; 53 if (xflags & FS_XFLAG_NOSYMLINKS) 54 di_flags |= XFS_DIFLAG_NOSYMLINKS; 55 if (xflags & FS_XFLAG_EXTSZINHERIT) 56 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 57 if (xflags & FS_XFLAG_PROJINHERIT) 58 di_flags |= XFS_DIFLAG_PROJINHERIT; 59 } else if (S_ISREG(VFS_I(ip)->i_mode)) { 60 if (xflags & FS_XFLAG_REALTIME) 61 di_flags |= XFS_DIFLAG_REALTIME; 62 if (xflags & FS_XFLAG_EXTSIZE) 63 di_flags |= XFS_DIFLAG_EXTSIZE; 64 } 65 66 return di_flags; 67 } 68 69 uint64_t 70 xfs_flags2diflags2( 71 struct xfs_inode *ip, 72 unsigned int xflags) 73 { 74 uint64_t di_flags2 = 75 (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK | 76 XFS_DIFLAG2_BIGTIME | 77 XFS_DIFLAG2_NREXT64)); 78 79 if (xflags & FS_XFLAG_DAX) 80 di_flags2 |= XFS_DIFLAG2_DAX; 81 if (xflags & FS_XFLAG_COWEXTSIZE) 82 di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; 83 84 return di_flags2; 85 } 86 87 uint32_t 88 xfs_ip2xflags( 89 struct xfs_inode *ip) 90 { 91 uint32_t flags = 0; 92 93 if (ip->i_diflags & XFS_DIFLAG_ANY) { 94 if (ip->i_diflags & XFS_DIFLAG_REALTIME) 95 flags |= FS_XFLAG_REALTIME; 96 if (ip->i_diflags & XFS_DIFLAG_PREALLOC) 97 flags |= FS_XFLAG_PREALLOC; 98 if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) 99 flags |= FS_XFLAG_IMMUTABLE; 100 if (ip->i_diflags & XFS_DIFLAG_APPEND) 101 flags |= FS_XFLAG_APPEND; 102 if (ip->i_diflags & XFS_DIFLAG_SYNC) 103 flags |= FS_XFLAG_SYNC; 104 if (ip->i_diflags & XFS_DIFLAG_NOATIME) 105 flags |= FS_XFLAG_NOATIME; 106 if (ip->i_diflags & XFS_DIFLAG_NODUMP) 107 flags |= FS_XFLAG_NODUMP; 108 if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) 109 flags |= FS_XFLAG_RTINHERIT; 110 if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT) 111 flags |= FS_XFLAG_PROJINHERIT; 112 if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS) 113 flags |= FS_XFLAG_NOSYMLINKS; 114 if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) 115 flags |= FS_XFLAG_EXTSIZE; 116 if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) 117 flags |= FS_XFLAG_EXTSZINHERIT; 118 if (ip->i_diflags & XFS_DIFLAG_NODEFRAG) 119 flags |= FS_XFLAG_NODEFRAG; 120 if (ip->i_diflags & XFS_DIFLAG_FILESTREAM) 121 flags |= FS_XFLAG_FILESTREAM; 122 } 123 124 if (ip->i_diflags2 & XFS_DIFLAG2_ANY) { 125 if (ip->i_diflags2 & XFS_DIFLAG2_DAX) 126 flags |= FS_XFLAG_DAX; 127 if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 128 flags |= FS_XFLAG_COWEXTSIZE; 129 } 130 131 if (xfs_inode_has_attr_fork(ip)) 132 flags |= FS_XFLAG_HASATTR; 133 return flags; 134 } 135 136 prid_t 137 xfs_get_initial_prid(struct xfs_inode *dp) 138 { 139 if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT) 140 return dp->i_projid; 141 142 /* Assign to the root project by default. */ 143 return 0; 144 } 145 146 /* Propagate di_flags from a parent inode to a child inode. */ 147 static inline void 148 xfs_inode_inherit_flags( 149 struct xfs_inode *ip, 150 const struct xfs_inode *pip) 151 { 152 unsigned int di_flags = 0; 153 xfs_failaddr_t failaddr; 154 umode_t mode = VFS_I(ip)->i_mode; 155 156 if (S_ISDIR(mode)) { 157 if (pip->i_diflags & XFS_DIFLAG_RTINHERIT) 158 di_flags |= XFS_DIFLAG_RTINHERIT; 159 if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { 160 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 161 ip->i_extsize = pip->i_extsize; 162 } 163 if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT) 164 di_flags |= XFS_DIFLAG_PROJINHERIT; 165 } else if (S_ISREG(mode)) { 166 if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && 167 xfs_has_realtime(ip->i_mount)) 168 di_flags |= XFS_DIFLAG_REALTIME; 169 if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { 170 di_flags |= XFS_DIFLAG_EXTSIZE; 171 ip->i_extsize = pip->i_extsize; 172 } 173 } 174 if ((pip->i_diflags & XFS_DIFLAG_NOATIME) && 175 xfs_inherit_noatime) 176 di_flags |= XFS_DIFLAG_NOATIME; 177 if ((pip->i_diflags & XFS_DIFLAG_NODUMP) && 178 xfs_inherit_nodump) 179 di_flags |= XFS_DIFLAG_NODUMP; 180 if ((pip->i_diflags & XFS_DIFLAG_SYNC) && 181 xfs_inherit_sync) 182 di_flags |= XFS_DIFLAG_SYNC; 183 if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) && 184 xfs_inherit_nosymlinks) 185 di_flags |= XFS_DIFLAG_NOSYMLINKS; 186 if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) && 187 xfs_inherit_nodefrag) 188 di_flags |= XFS_DIFLAG_NODEFRAG; 189 if (pip->i_diflags & XFS_DIFLAG_FILESTREAM) 190 di_flags |= XFS_DIFLAG_FILESTREAM; 191 192 ip->i_diflags |= di_flags; 193 194 /* 195 * Inode verifiers on older kernels only check that the extent size 196 * hint is an integer multiple of the rt extent size on realtime files. 197 * They did not check the hint alignment on a directory with both 198 * rtinherit and extszinherit flags set. If the misaligned hint is 199 * propagated from a directory into a new realtime file, new file 200 * allocations will fail due to math errors in the rt allocator and/or 201 * trip the verifiers. Validate the hint settings in the new file so 202 * that we don't let broken hints propagate. 203 */ 204 failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, 205 VFS_I(ip)->i_mode, ip->i_diflags); 206 if (failaddr) { 207 ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | 208 XFS_DIFLAG_EXTSZINHERIT); 209 ip->i_extsize = 0; 210 } 211 } 212 213 /* Propagate di_flags2 from a parent inode to a child inode. */ 214 static inline void 215 xfs_inode_inherit_flags2( 216 struct xfs_inode *ip, 217 const struct xfs_inode *pip) 218 { 219 xfs_failaddr_t failaddr; 220 221 if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { 222 ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; 223 ip->i_cowextsize = pip->i_cowextsize; 224 } 225 if (pip->i_diflags2 & XFS_DIFLAG2_DAX) 226 ip->i_diflags2 |= XFS_DIFLAG2_DAX; 227 if (xfs_is_metadir_inode(pip)) 228 ip->i_diflags2 |= XFS_DIFLAG2_METADATA; 229 230 /* Don't let invalid cowextsize hints propagate. */ 231 failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, 232 VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); 233 if (failaddr) { 234 ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; 235 ip->i_cowextsize = 0; 236 } 237 } 238 239 /* 240 * If we need to create attributes immediately after allocating the inode, 241 * initialise an empty attribute fork right now. We use the default fork offset 242 * for attributes here as we don't know exactly what size or how many 243 * attributes we might be adding. We can do this safely here because we know 244 * the data fork is completely empty and this saves us from needing to run a 245 * separate transaction to set the fork offset in the immediate future. 246 * 247 * If we have parent pointers and the caller hasn't told us that the file will 248 * never be linked into a directory tree, we /must/ create the attr fork. 249 */ 250 static inline bool 251 xfs_icreate_want_attrfork( 252 struct xfs_mount *mp, 253 const struct xfs_icreate_args *args) 254 { 255 if (args->flags & XFS_ICREATE_INIT_XATTRS) 256 return true; 257 258 if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp)) 259 return true; 260 261 return false; 262 } 263 264 /* Initialise an inode's attributes. */ 265 void 266 xfs_inode_init( 267 struct xfs_trans *tp, 268 const struct xfs_icreate_args *args, 269 struct xfs_inode *ip) 270 { 271 struct xfs_inode *pip = args->pip; 272 struct inode *dir = pip ? VFS_I(pip) : NULL; 273 struct xfs_mount *mp = tp->t_mountp; 274 struct inode *inode = VFS_I(ip); 275 unsigned int flags; 276 int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG | 277 XFS_ICHGTIME_ACCESS; 278 279 if (args->flags & XFS_ICREATE_TMPFILE) 280 set_nlink(inode, 0); 281 else if (S_ISDIR(args->mode)) 282 set_nlink(inode, 2); 283 else 284 set_nlink(inode, 1); 285 inode->i_rdev = args->rdev; 286 287 if (!args->idmap || pip == NULL) { 288 /* creating a tree root, sb rooted, or detached file */ 289 inode->i_uid = GLOBAL_ROOT_UID; 290 inode->i_gid = GLOBAL_ROOT_GID; 291 ip->i_projid = 0; 292 inode->i_mode = args->mode; 293 } else { 294 /* creating a child in the directory tree */ 295 if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { 296 inode_fsuid_set(inode, args->idmap); 297 inode->i_gid = dir->i_gid; 298 inode->i_mode = args->mode; 299 } else { 300 inode_init_owner(args->idmap, inode, dir, args->mode); 301 } 302 303 /* 304 * If the group ID of the new file does not match the effective 305 * group ID or one of the supplementary group IDs, the S_ISGID 306 * bit is cleared (and only if the irix_sgid_inherit 307 * compatibility variable is set). 308 */ 309 if (irix_sgid_inherit && (inode->i_mode & S_ISGID) && 310 !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode))) 311 inode->i_mode &= ~S_ISGID; 312 313 ip->i_projid = xfs_get_initial_prid(pip); 314 } 315 316 ip->i_disk_size = 0; 317 ip->i_df.if_nextents = 0; 318 ASSERT(ip->i_nblocks == 0); 319 320 ip->i_extsize = 0; 321 ip->i_diflags = 0; 322 323 if (xfs_has_v3inodes(mp)) { 324 inode_set_iversion(inode, 1); 325 /* also covers the di_used_blocks union arm: */ 326 ip->i_cowextsize = 0; 327 times |= XFS_ICHGTIME_CREATE; 328 } 329 330 xfs_trans_ichgtime(tp, ip, times); 331 332 flags = XFS_ILOG_CORE; 333 switch (args->mode & S_IFMT) { 334 case S_IFIFO: 335 case S_IFCHR: 336 case S_IFBLK: 337 case S_IFSOCK: 338 ip->i_df.if_format = XFS_DINODE_FMT_DEV; 339 flags |= XFS_ILOG_DEV; 340 break; 341 case S_IFREG: 342 case S_IFDIR: 343 if (pip && (pip->i_diflags & XFS_DIFLAG_ANY)) 344 xfs_inode_inherit_flags(ip, pip); 345 if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY)) 346 xfs_inode_inherit_flags2(ip, pip); 347 fallthrough; 348 case S_IFLNK: 349 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 350 ip->i_df.if_bytes = 0; 351 ip->i_df.if_data = NULL; 352 break; 353 default: 354 ASSERT(0); 355 } 356 357 if (xfs_icreate_want_attrfork(mp, args)) { 358 ip->i_forkoff = xfs_default_attroffset(ip) >> 3; 359 xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); 360 361 if (!xfs_has_attr(mp)) { 362 spin_lock(&mp->m_sb_lock); 363 xfs_add_attr(mp); 364 spin_unlock(&mp->m_sb_lock); 365 xfs_log_sb(tp); 366 } 367 } 368 369 xfs_trans_log_inode(tp, ip, flags); 370 } 371 372 /* 373 * In-Core Unlinked List Lookups 374 * ============================= 375 * 376 * Every inode is supposed to be reachable from some other piece of metadata 377 * with the exception of the root directory. Inodes with a connection to a 378 * file descriptor but not linked from anywhere in the on-disk directory tree 379 * are collectively known as unlinked inodes, though the filesystem itself 380 * maintains links to these inodes so that on-disk metadata are consistent. 381 * 382 * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI 383 * header contains a number of buckets that point to an inode, and each inode 384 * record has a pointer to the next inode in the hash chain. This 385 * singly-linked list causes scaling problems in the iunlink remove function 386 * because we must walk that list to find the inode that points to the inode 387 * being removed from the unlinked hash bucket list. 388 * 389 * Hence we keep an in-memory double linked list to link each inode on an 390 * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer 391 * based lists would require having 64 list heads in the perag, one for each 392 * list. This is expensive in terms of memory (think millions of AGs) and cache 393 * misses on lookups. Instead, use the fact that inodes on the unlinked list 394 * must be referenced at the VFS level to keep them on the list and hence we 395 * have an existence guarantee for inodes on the unlinked list. 396 * 397 * Given we have an existence guarantee, we can use lockless inode cache lookups 398 * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode 399 * for the double linked unlinked list, and we don't need any extra locking to 400 * keep the list safe as all manipulations are done under the AGI buffer lock. 401 * Keeping the list up to date does not require memory allocation, just finding 402 * the XFS inode and updating the next/prev unlinked list aginos. 403 */ 404 405 /* 406 * Update the prev pointer of the next agino. Returns -ENOLINK if the inode 407 * is not in cache. 408 */ 409 static int 410 xfs_iunlink_update_backref( 411 struct xfs_perag *pag, 412 xfs_agino_t prev_agino, 413 xfs_agino_t next_agino) 414 { 415 struct xfs_inode *ip; 416 417 /* No update necessary if we are at the end of the list. */ 418 if (next_agino == NULLAGINO) 419 return 0; 420 421 ip = xfs_iunlink_lookup(pag, next_agino); 422 if (!ip) 423 return -ENOLINK; 424 425 ip->i_prev_unlinked = prev_agino; 426 return 0; 427 } 428 429 /* 430 * Point the AGI unlinked bucket at an inode and log the results. The caller 431 * is responsible for validating the old value. 432 */ 433 STATIC int 434 xfs_iunlink_update_bucket( 435 struct xfs_trans *tp, 436 struct xfs_perag *pag, 437 struct xfs_buf *agibp, 438 unsigned int bucket_index, 439 xfs_agino_t new_agino) 440 { 441 struct xfs_agi *agi = agibp->b_addr; 442 xfs_agino_t old_value; 443 int offset; 444 445 ASSERT(xfs_verify_agino_or_null(pag, new_agino)); 446 447 old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); 448 trace_xfs_iunlink_update_bucket(pag, bucket_index, old_value, 449 new_agino); 450 451 /* 452 * We should never find the head of the list already set to the value 453 * passed in because either we're adding or removing ourselves from the 454 * head of the list. 455 */ 456 if (old_value == new_agino) { 457 xfs_buf_mark_corrupt(agibp); 458 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 459 return -EFSCORRUPTED; 460 } 461 462 agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); 463 offset = offsetof(struct xfs_agi, agi_unlinked) + 464 (sizeof(xfs_agino_t) * bucket_index); 465 xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); 466 return 0; 467 } 468 469 static int 470 xfs_iunlink_insert_inode( 471 struct xfs_trans *tp, 472 struct xfs_perag *pag, 473 struct xfs_buf *agibp, 474 struct xfs_inode *ip) 475 { 476 struct xfs_mount *mp = tp->t_mountp; 477 struct xfs_agi *agi = agibp->b_addr; 478 xfs_agino_t next_agino; 479 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 480 short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 481 int error; 482 483 /* 484 * Get the index into the agi hash table for the list this inode will 485 * go on. Make sure the pointer isn't garbage and that this inode 486 * isn't already on the list. 487 */ 488 next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 489 if (next_agino == agino || 490 !xfs_verify_agino_or_null(pag, next_agino)) { 491 xfs_buf_mark_corrupt(agibp); 492 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 493 return -EFSCORRUPTED; 494 } 495 496 /* 497 * Update the prev pointer in the next inode to point back to this 498 * inode. 499 */ 500 error = xfs_iunlink_update_backref(pag, agino, next_agino); 501 if (error == -ENOLINK) 502 error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); 503 if (error) 504 return error; 505 506 if (next_agino != NULLAGINO) { 507 /* 508 * There is already another inode in the bucket, so point this 509 * inode to the current head of the list. 510 */ 511 error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); 512 if (error) 513 return error; 514 ip->i_next_unlinked = next_agino; 515 } 516 517 /* Point the head of the list to point to this inode. */ 518 ip->i_prev_unlinked = NULLAGINO; 519 return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); 520 } 521 522 /* 523 * This is called when the inode's link count has gone to 0 or we are creating 524 * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. 525 * 526 * We place the on-disk inode on a list in the AGI. It will be pulled from this 527 * list when the inode is freed. 528 */ 529 int 530 xfs_iunlink( 531 struct xfs_trans *tp, 532 struct xfs_inode *ip) 533 { 534 struct xfs_mount *mp = tp->t_mountp; 535 struct xfs_perag *pag; 536 struct xfs_buf *agibp; 537 int error; 538 539 ASSERT(VFS_I(ip)->i_nlink == 0); 540 ASSERT(VFS_I(ip)->i_mode != 0); 541 trace_xfs_iunlink(ip); 542 543 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 544 545 /* Get the agi buffer first. It ensures lock ordering on the list. */ 546 error = xfs_read_agi(pag, tp, 0, &agibp); 547 if (error) 548 goto out; 549 550 error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); 551 out: 552 xfs_perag_put(pag); 553 return error; 554 } 555 556 static int 557 xfs_iunlink_remove_inode( 558 struct xfs_trans *tp, 559 struct xfs_perag *pag, 560 struct xfs_buf *agibp, 561 struct xfs_inode *ip) 562 { 563 struct xfs_mount *mp = tp->t_mountp; 564 struct xfs_agi *agi = agibp->b_addr; 565 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 566 xfs_agino_t head_agino; 567 short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 568 int error; 569 570 trace_xfs_iunlink_remove(ip); 571 572 /* 573 * Get the index into the agi hash table for the list this inode will 574 * go on. Make sure the head pointer isn't garbage. 575 */ 576 head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 577 if (!xfs_verify_agino(pag, head_agino)) { 578 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 579 agi, sizeof(*agi)); 580 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 581 return -EFSCORRUPTED; 582 } 583 584 /* 585 * Set our inode's next_unlinked pointer to NULL and then return 586 * the old pointer value so that we can update whatever was previous 587 * to us in the list to point to whatever was next in the list. 588 */ 589 error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); 590 if (error) 591 return error; 592 593 /* 594 * Update the prev pointer in the next inode to point back to previous 595 * inode in the chain. 596 */ 597 error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, 598 ip->i_next_unlinked); 599 if (error == -ENOLINK) 600 error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, 601 ip->i_next_unlinked); 602 if (error) 603 return error; 604 605 if (head_agino != agino) { 606 struct xfs_inode *prev_ip; 607 608 prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); 609 if (!prev_ip) { 610 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); 611 return -EFSCORRUPTED; 612 } 613 614 error = xfs_iunlink_log_inode(tp, prev_ip, pag, 615 ip->i_next_unlinked); 616 prev_ip->i_next_unlinked = ip->i_next_unlinked; 617 } else { 618 /* Point the head of the list to the next unlinked inode. */ 619 error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, 620 ip->i_next_unlinked); 621 } 622 623 ip->i_next_unlinked = NULLAGINO; 624 ip->i_prev_unlinked = 0; 625 return error; 626 } 627 628 /* 629 * Pull the on-disk inode from the AGI unlinked list. 630 */ 631 int 632 xfs_iunlink_remove( 633 struct xfs_trans *tp, 634 struct xfs_perag *pag, 635 struct xfs_inode *ip) 636 { 637 struct xfs_buf *agibp; 638 int error; 639 640 trace_xfs_iunlink_remove(ip); 641 642 /* Get the agi buffer first. It ensures lock ordering on the list. */ 643 error = xfs_read_agi(pag, tp, 0, &agibp); 644 if (error) 645 return error; 646 647 return xfs_iunlink_remove_inode(tp, pag, agibp, ip); 648 } 649 650 /* 651 * Decrement the link count on an inode & log the change. If this causes the 652 * link count to go to zero, move the inode to AGI unlinked list so that it can 653 * be freed when the last active reference goes away via xfs_inactive(). 654 */ 655 int 656 xfs_droplink( 657 struct xfs_trans *tp, 658 struct xfs_inode *ip) 659 { 660 struct inode *inode = VFS_I(ip); 661 662 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 663 664 if (inode->i_nlink == 0) { 665 xfs_info_ratelimited(tp->t_mountp, 666 "Inode 0x%llx link count dropped below zero. Pinning link count.", 667 ip->i_ino); 668 set_nlink(inode, XFS_NLINK_PINNED); 669 } 670 if (inode->i_nlink != XFS_NLINK_PINNED) 671 drop_nlink(inode); 672 673 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 674 675 if (inode->i_nlink) 676 return 0; 677 678 return xfs_iunlink(tp, ip); 679 } 680 681 /* 682 * Increment the link count on an inode & log the change. 683 */ 684 void 685 xfs_bumplink( 686 struct xfs_trans *tp, 687 struct xfs_inode *ip) 688 { 689 struct inode *inode = VFS_I(ip); 690 691 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 692 693 if (inode->i_nlink == XFS_NLINK_PINNED - 1) 694 xfs_info_ratelimited(tp->t_mountp, 695 "Inode 0x%llx link count exceeded maximum. Pinning link count.", 696 ip->i_ino); 697 if (inode->i_nlink != XFS_NLINK_PINNED) 698 inc_nlink(inode); 699 700 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 701 } 702 703 /* Free an inode in the ondisk index and zero it out. */ 704 int 705 xfs_inode_uninit( 706 struct xfs_trans *tp, 707 struct xfs_perag *pag, 708 struct xfs_inode *ip, 709 struct xfs_icluster *xic) 710 { 711 struct xfs_mount *mp = ip->i_mount; 712 int error; 713 714 /* 715 * Free the inode first so that we guarantee that the AGI lock is going 716 * to be taken before we remove the inode from the unlinked list. This 717 * makes the AGI lock -> unlinked list modification order the same as 718 * used in O_TMPFILE creation. 719 */ 720 error = xfs_difree(tp, pag, ip->i_ino, xic); 721 if (error) 722 return error; 723 724 error = xfs_iunlink_remove(tp, pag, ip); 725 if (error) 726 return error; 727 728 /* 729 * Free any local-format data sitting around before we reset the 730 * data fork to extents format. Note that the attr fork data has 731 * already been freed by xfs_attr_inactive. 732 */ 733 if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { 734 kfree(ip->i_df.if_data); 735 ip->i_df.if_data = NULL; 736 ip->i_df.if_bytes = 0; 737 } 738 739 VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 740 ip->i_diflags = 0; 741 ip->i_diflags2 = mp->m_ino_geo.new_diflags2; 742 ip->i_forkoff = 0; /* mark the attr fork not in use */ 743 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 744 745 /* 746 * Bump the generation count so no one will be confused 747 * by reincarnations of this inode. 748 */ 749 VFS_I(ip)->i_generation++; 750 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 751 return 0; 752 } 753