1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 4 * Copyright (C) 2010 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_mount.h" 14 #include "xfs_da_format.h" 15 #include "xfs_da_btree.h" 16 #include "xfs_inode.h" 17 #include "xfs_bmap_btree.h" 18 #include "xfs_quota.h" 19 #include "xfs_trans.h" 20 #include "xfs_qm.h" 21 #include "xfs_trans_space.h" 22 #include "xfs_rtbitmap.h" 23 #include "xfs_attr_item.h" 24 #include "xfs_log.h" 25 #include "xfs_defer.h" 26 #include "xfs_bmap_item.h" 27 #include "xfs_extfree_item.h" 28 #include "xfs_rmap_item.h" 29 #include "xfs_refcount_item.h" 30 #include "xfs_trace.h" 31 32 #define _ALLOC true 33 #define _FREE false 34 35 /* 36 * A buffer has a format structure overhead in the log in addition 37 * to the data, so we need to take this into account when reserving 38 * space in a transaction for a buffer. Round the space required up 39 * to a multiple of 128 bytes so that we don't change the historical 40 * reservation that has been used for this overhead. 41 */ 42 STATIC uint 43 xfs_buf_log_overhead(void) 44 { 45 return round_up(sizeof(struct xlog_op_header) + 46 sizeof(struct xfs_buf_log_format), 128); 47 } 48 49 /* 50 * Calculate out transaction log reservation per item in bytes. 51 * 52 * The nbufs argument is used to indicate the number of items that 53 * will be changed in a transaction. size is used to tell how many 54 * bytes should be reserved per item. 55 */ 56 STATIC uint 57 xfs_calc_buf_res( 58 uint nbufs, 59 uint size) 60 { 61 return nbufs * (size + xfs_buf_log_overhead()); 62 } 63 64 /* 65 * Per-extent log reservation for the btree changes involved in freeing or 66 * allocating an extent. In classic XFS there were two trees that will be 67 * modified (bnobt + cntbt). With rmap enabled, there are three trees 68 * (rmapbt). The number of blocks reserved is based on the formula: 69 * 70 * num trees * ((2 blocks/level * max depth) - 1) 71 * 72 * Keep in mind that max depth is calculated separately for each type of tree. 73 */ 74 uint 75 xfs_allocfree_block_count( 76 struct xfs_mount *mp, 77 uint num_ops) 78 { 79 uint blocks; 80 81 blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1); 82 if (xfs_has_rmapbt(mp)) 83 blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); 84 85 return blocks; 86 } 87 88 /* 89 * Per-extent log reservation for refcount btree changes. These are never done 90 * in the same transaction as an allocation or a free, so we compute them 91 * separately. 92 */ 93 static unsigned int 94 xfs_refcountbt_block_count( 95 struct xfs_mount *mp, 96 unsigned int num_ops) 97 { 98 return num_ops * (2 * mp->m_refc_maxlevels - 1); 99 } 100 101 static unsigned int 102 xfs_rtrefcountbt_block_count( 103 struct xfs_mount *mp, 104 unsigned int num_ops) 105 { 106 return num_ops * (2 * mp->m_rtrefc_maxlevels - 1); 107 } 108 109 /* 110 * Logging inodes is really tricksy. They are logged in memory format, 111 * which means that what we write into the log doesn't directly translate into 112 * the amount of space they use on disk. 113 * 114 * Case in point - btree format forks in memory format use more space than the 115 * on-disk format. In memory, the buffer contains a normal btree block header so 116 * the btree code can treat it as though it is just another generic buffer. 117 * However, when we write it to the inode fork, we don't write all of this 118 * header as it isn't needed. e.g. the root is only ever in the inode, so 119 * there's no need for sibling pointers which would waste 16 bytes of space. 120 * 121 * Hence when we have an inode with a maximally sized btree format fork, then 122 * amount of information we actually log is greater than the size of the inode 123 * on disk. Hence we need an inode reservation function that calculates all this 124 * correctly. So, we log: 125 * 126 * - 4 log op headers for object 127 * - for the ilf, the inode core and 2 forks 128 * - inode log format object 129 * - the inode core 130 * - two inode forks containing bmap btree root blocks. 131 * - the btree data contained by both forks will fit into the inode size, 132 * hence when combined with the inode core above, we have a total of the 133 * actual inode size. 134 * - the BMBT headers need to be accounted separately, as they are 135 * additional to the records and pointers that fit inside the inode 136 * forks. 137 */ 138 STATIC uint 139 xfs_calc_inode_res( 140 struct xfs_mount *mp, 141 uint ninodes) 142 { 143 return ninodes * 144 (4 * sizeof(struct xlog_op_header) + 145 sizeof(struct xfs_inode_log_format) + 146 mp->m_sb.sb_inodesize + 147 2 * xfs_bmbt_block_len(mp)); 148 } 149 150 /* 151 * Inode btree record insertion/removal modifies the inode btree and free space 152 * btrees (since the inobt does not use the agfl). This requires the following 153 * reservation: 154 * 155 * the inode btree: max depth * blocksize 156 * the allocation btrees: 2 trees * (max depth - 1) * block size 157 * 158 * The caller must account for SB and AG header modifications, etc. 159 */ 160 STATIC uint 161 xfs_calc_inobt_res( 162 struct xfs_mount *mp) 163 { 164 return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels, 165 XFS_FSB_TO_B(mp, 1)) + 166 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), 167 XFS_FSB_TO_B(mp, 1)); 168 } 169 170 /* 171 * The free inode btree is a conditional feature. The behavior differs slightly 172 * from that of the traditional inode btree in that the finobt tracks records 173 * for inode chunks with at least one free inode. A record can be removed from 174 * the tree during individual inode allocation. Therefore the finobt 175 * reservation is unconditional for both the inode chunk allocation and 176 * individual inode allocation (modify) cases. 177 * 178 * Behavior aside, the reservation for finobt modification is equivalent to the 179 * traditional inobt: cover a full finobt shape change plus block allocation. 180 */ 181 STATIC uint 182 xfs_calc_finobt_res( 183 struct xfs_mount *mp) 184 { 185 if (!xfs_has_finobt(mp)) 186 return 0; 187 188 return xfs_calc_inobt_res(mp); 189 } 190 191 /* 192 * Calculate the reservation required to allocate or free an inode chunk. This 193 * includes: 194 * 195 * the allocation btrees: 2 trees * (max depth - 1) * block size 196 * the inode chunk: m_ino_geo.ialloc_blks * N 197 * 198 * The size N of the inode chunk reservation depends on whether it is for 199 * allocation or free and which type of create transaction is in use. An inode 200 * chunk free always invalidates the buffers and only requires reservation for 201 * headers (N == 0). An inode chunk allocation requires a chunk sized 202 * reservation on v4 and older superblocks to initialize the chunk. No chunk 203 * reservation is required for allocation on v5 supers, which use ordered 204 * buffers to initialize. 205 */ 206 STATIC uint 207 xfs_calc_inode_chunk_res( 208 struct xfs_mount *mp, 209 bool alloc) 210 { 211 uint res, size = 0; 212 213 res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), 214 XFS_FSB_TO_B(mp, 1)); 215 if (alloc) { 216 /* icreate tx uses ordered buffers */ 217 if (xfs_has_v3inodes(mp)) 218 return res; 219 size = XFS_FSB_TO_B(mp, 1); 220 } 221 222 res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size); 223 return res; 224 } 225 226 /* 227 * Per-extent log reservation for the btree changes involved in freeing or 228 * allocating a realtime extent. We have to be able to log as many rtbitmap 229 * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime 230 * extents, as well as the realtime summary block (t1). Realtime rmap btree 231 * operations happen in a second transaction, so factor in a couple of rtrmapbt 232 * splits (t2). 233 */ 234 static unsigned int 235 xfs_rtalloc_block_count( 236 struct xfs_mount *mp, 237 unsigned int num_ops) 238 { 239 unsigned int rtbmp_blocks; 240 xfs_rtxlen_t rtxlen; 241 unsigned int t1, t2 = 0; 242 243 rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN); 244 rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen); 245 t1 = (rtbmp_blocks + 1) * num_ops; 246 247 if (xfs_has_rmapbt(mp)) 248 t2 = num_ops * (2 * mp->m_rtrmap_maxlevels - 1); 249 250 return max(t1, t2); 251 } 252 253 /* 254 * Various log reservation values. 255 * 256 * These are based on the size of the file system block because that is what 257 * most transactions manipulate. Each adds in an additional 128 bytes per 258 * item logged to try to account for the overhead of the transaction mechanism. 259 * 260 * Note: Most of the reservations underestimate the number of allocation 261 * groups into which they could free extents in the xfs_defer_finish() call. 262 * This is because the number in the worst case is quite high and quite 263 * unusual. In order to fix this we need to change xfs_defer_finish() to free 264 * extents in only a single AG at a time. This will require changes to the 265 * EFI code as well, however, so that the EFI for the extents not freed is 266 * logged again in each transaction. See SGI PV #261917. 267 * 268 * Reservation functions here avoid a huge stack in xfs_trans_init due to 269 * register overflow from temporaries in the calculations. 270 */ 271 272 /* 273 * Finishing a data device refcount updates (t1): 274 * the agfs of the ags containing the blocks: nr_ops * sector size 275 * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size 276 */ 277 inline unsigned int 278 xfs_calc_finish_cui_reservation( 279 struct xfs_mount *mp, 280 unsigned int nr_ops) 281 { 282 if (!xfs_has_reflink(mp)) 283 return 0; 284 285 return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) + 286 xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), 287 mp->m_sb.sb_blocksize); 288 } 289 290 /* 291 * Realtime refcount updates (t2); 292 * the rt refcount inode 293 * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size 294 */ 295 inline unsigned int 296 xfs_calc_finish_rt_cui_reservation( 297 struct xfs_mount *mp, 298 unsigned int nr_ops) 299 { 300 if (!xfs_has_rtreflink(mp)) 301 return 0; 302 303 return xfs_calc_inode_res(mp, 1) + 304 xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops), 305 mp->m_sb.sb_blocksize); 306 } 307 308 /* 309 * Compute the log reservation required to handle the refcount update 310 * transaction. Refcount updates are always done via deferred log items. 311 * 312 * This is calculated as the max of: 313 * Data device refcount updates (t1): 314 * the agfs of the ags containing the blocks: nr_ops * sector size 315 * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size 316 * Realtime refcount updates (t2); 317 * the rt refcount inode 318 * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size 319 */ 320 static unsigned int 321 xfs_calc_refcountbt_reservation( 322 struct xfs_mount *mp, 323 unsigned int nr_ops) 324 { 325 unsigned int t1, t2; 326 327 t1 = xfs_calc_finish_cui_reservation(mp, nr_ops); 328 t2 = xfs_calc_finish_rt_cui_reservation(mp, nr_ops); 329 330 return max(t1, t2); 331 } 332 333 /* 334 * In a write transaction we can allocate a maximum of 2 335 * extents. This gives (t1): 336 * the inode getting the new extents: inode size 337 * the inode's bmap btree: max depth * block size 338 * the agfs of the ags from which the extents are allocated: 2 * sector 339 * the superblock free block counter: sector size 340 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 341 * Or, if we're writing to a realtime file (t2): 342 * the inode getting the new extents: inode size 343 * the inode's bmap btree: max depth * block size 344 * the agfs of the ags from which the extents are allocated: 2 * sector 345 * the superblock free block counter: sector size 346 * the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes 347 * the realtime summary: 1 block 348 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size 349 * And the bmap_finish transaction can free bmap blocks in a join (t3): 350 * the agfs of the ags containing the blocks: 2 * sector size 351 * the agfls of the ags containing the blocks: 2 * sector size 352 * the super block free block counter: sector size 353 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 354 * And any refcount updates that happen in a separate transaction (t4). 355 */ 356 STATIC uint 357 xfs_calc_write_reservation( 358 struct xfs_mount *mp, 359 bool for_minlogsize) 360 { 361 unsigned int t1, t2, t3, t4; 362 unsigned int blksz = XFS_FSB_TO_B(mp, 1); 363 364 t1 = xfs_calc_inode_res(mp, 1) + 365 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) + 366 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 367 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz); 368 369 if (xfs_has_realtime(mp)) { 370 t2 = xfs_calc_inode_res(mp, 1) + 371 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 372 blksz) + 373 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 374 xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) + 375 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz); 376 } else { 377 t2 = 0; 378 } 379 380 t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 381 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz); 382 383 /* 384 * In the early days of reflink, we included enough reservation to log 385 * two refcountbt splits for each transaction. The codebase runs 386 * refcountbt updates in separate transactions now, so to compute the 387 * minimum log size, add the refcountbtree splits back to t1 and t3 and 388 * do not account them separately as t4. Reflink did not support 389 * realtime when the reservations were established, so no adjustment to 390 * t2 is needed. 391 */ 392 if (for_minlogsize) { 393 unsigned int adj = 0; 394 395 if (xfs_has_reflink(mp)) 396 adj = xfs_calc_buf_res( 397 xfs_refcountbt_block_count(mp, 2), 398 blksz); 399 t1 += adj; 400 t3 += adj; 401 return XFS_DQUOT_LOGRES + max3(t1, t2, t3); 402 } 403 404 t4 = xfs_calc_refcountbt_reservation(mp, 1); 405 return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); 406 } 407 408 unsigned int 409 xfs_calc_write_reservation_minlogsize( 410 struct xfs_mount *mp) 411 { 412 return xfs_calc_write_reservation(mp, true); 413 } 414 415 /* 416 * Finishing an EFI can free the blocks and bmap blocks (t2): 417 * the agf for each of the ags: nr * sector size 418 * the agfl for each of the ags: nr * sector size 419 * the super block to reflect the freed blocks: sector size 420 * worst case split in allocation btrees per extent assuming nr extents: 421 * nr exts * 2 trees * (2 * max depth - 1) * block size 422 */ 423 inline unsigned int 424 xfs_calc_finish_efi_reservation( 425 struct xfs_mount *mp, 426 unsigned int nr) 427 { 428 return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) + 429 xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr), 430 mp->m_sb.sb_blocksize); 431 } 432 433 /* 434 * Or, if it's a realtime file (t3): 435 * the agf for each of the ags: 2 * sector size 436 * the agfl for each of the ags: 2 * sector size 437 * the super block to reflect the freed blocks: sector size 438 * the realtime bitmap: 439 * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes 440 * the realtime summary: 2 exts * 1 block 441 * worst case split in allocation btrees per extent assuming 2 extents: 442 * 2 exts * 2 trees * (2 * max depth - 1) * block size 443 */ 444 inline unsigned int 445 xfs_calc_finish_rt_efi_reservation( 446 struct xfs_mount *mp, 447 unsigned int nr) 448 { 449 if (!xfs_has_realtime(mp)) 450 return 0; 451 452 return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) + 453 xfs_calc_buf_res(xfs_rtalloc_block_count(mp, nr), 454 mp->m_sb.sb_blocksize) + 455 xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr), 456 mp->m_sb.sb_blocksize); 457 } 458 459 /* 460 * Finishing an RUI is the same as an EFI. We can split the rmap btree twice 461 * on each end of the record, and that can cause the AGFL to be refilled or 462 * emptied out. 463 */ 464 inline unsigned int 465 xfs_calc_finish_rui_reservation( 466 struct xfs_mount *mp, 467 unsigned int nr) 468 { 469 if (!xfs_has_rmapbt(mp)) 470 return 0; 471 return xfs_calc_finish_efi_reservation(mp, nr); 472 } 473 474 /* 475 * Finishing an RUI is the same as an EFI. We can split the rmap btree twice 476 * on each end of the record, and that can cause the AGFL to be refilled or 477 * emptied out. 478 */ 479 inline unsigned int 480 xfs_calc_finish_rt_rui_reservation( 481 struct xfs_mount *mp, 482 unsigned int nr) 483 { 484 if (!xfs_has_rtrmapbt(mp)) 485 return 0; 486 return xfs_calc_finish_rt_efi_reservation(mp, nr); 487 } 488 489 /* 490 * In finishing a BUI, we can modify: 491 * the inode being truncated: inode size 492 * dquots 493 * the inode's bmap btree: (max depth + 1) * block size 494 */ 495 inline unsigned int 496 xfs_calc_finish_bui_reservation( 497 struct xfs_mount *mp, 498 unsigned int nr) 499 { 500 return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES + 501 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, 502 mp->m_sb.sb_blocksize); 503 } 504 505 /* 506 * In truncating a file we free up to two extents at once. We can modify (t1): 507 * the inode being truncated: inode size 508 * the inode's bmap btree: (max depth + 1) * block size 509 * And the bmap_finish transaction can free the blocks and bmap blocks (t2): 510 * the agf for each of the ags: 4 * sector size 511 * the agfl for each of the ags: 4 * sector size 512 * the super block to reflect the freed blocks: sector size 513 * worst case split in allocation btrees per extent assuming 4 extents: 514 * 4 exts * 2 trees * (2 * max depth - 1) * block size 515 * Or, if it's a realtime file (t3): 516 * the agf for each of the ags: 2 * sector size 517 * the agfl for each of the ags: 2 * sector size 518 * the super block to reflect the freed blocks: sector size 519 * the realtime bitmap: 520 * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes 521 * the realtime summary: 2 exts * 1 block 522 * worst case split in allocation btrees per extent assuming 2 extents: 523 * 2 exts * 2 trees * (2 * max depth - 1) * block size 524 * And any refcount updates that happen in a separate transaction (t4). 525 */ 526 STATIC uint 527 xfs_calc_itruncate_reservation( 528 struct xfs_mount *mp, 529 bool for_minlogsize) 530 { 531 unsigned int t1, t2, t3, t4; 532 unsigned int blksz = XFS_FSB_TO_B(mp, 1); 533 534 t1 = xfs_calc_inode_res(mp, 1) + 535 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz); 536 537 t2 = xfs_calc_finish_efi_reservation(mp, 4); 538 t3 = xfs_calc_finish_rt_efi_reservation(mp, 2); 539 540 /* 541 * In the early days of reflink, we included enough reservation to log 542 * four refcountbt splits in the same transaction as bnobt/cntbt 543 * updates. The codebase runs refcountbt updates in separate 544 * transactions now, so to compute the minimum log size, add the 545 * refcount btree splits back here and do not compute them separately 546 * as t4. Reflink did not support realtime when the reservations were 547 * established, so do not adjust t3. 548 */ 549 if (for_minlogsize) { 550 if (xfs_has_reflink(mp)) 551 t2 += xfs_calc_buf_res( 552 xfs_refcountbt_block_count(mp, 4), 553 blksz); 554 555 return XFS_DQUOT_LOGRES + max3(t1, t2, t3); 556 } 557 558 t4 = xfs_calc_refcountbt_reservation(mp, 2); 559 return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); 560 } 561 562 unsigned int 563 xfs_calc_itruncate_reservation_minlogsize( 564 struct xfs_mount *mp) 565 { 566 return xfs_calc_itruncate_reservation(mp, true); 567 } 568 569 static inline unsigned int xfs_calc_pptr_link_overhead(void) 570 { 571 return sizeof(struct xfs_attri_log_format) + 572 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) + 573 xlog_calc_iovec_len(MAXNAMELEN - 1); 574 } 575 static inline unsigned int xfs_calc_pptr_unlink_overhead(void) 576 { 577 return sizeof(struct xfs_attri_log_format) + 578 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) + 579 xlog_calc_iovec_len(MAXNAMELEN - 1); 580 } 581 static inline unsigned int xfs_calc_pptr_replace_overhead(void) 582 { 583 return sizeof(struct xfs_attri_log_format) + 584 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) + 585 xlog_calc_iovec_len(MAXNAMELEN - 1) + 586 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) + 587 xlog_calc_iovec_len(MAXNAMELEN - 1); 588 } 589 590 /* 591 * In renaming a files we can modify: 592 * the five inodes involved: 5 * inode size 593 * the two directory btrees: 2 * (max depth + v2) * dir block size 594 * the two directory bmap btrees: 2 * max depth * block size 595 * And the bmap_finish transaction can free dir and bmap blocks (two sets 596 * of bmap blocks) giving (t2): 597 * the agf for the ags in which the blocks live: 3 * sector size 598 * the agfl for the ags in which the blocks live: 3 * sector size 599 * the superblock for the free block count: sector size 600 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size 601 * If parent pointers are enabled (t3), then each transaction in the chain 602 * must be capable of setting or removing the extended attribute 603 * containing the parent information. It must also be able to handle 604 * the three xattr intent items that track the progress of the parent 605 * pointer update. 606 */ 607 STATIC uint 608 xfs_calc_rename_reservation( 609 struct xfs_mount *mp) 610 { 611 unsigned int overhead = XFS_DQUOT_LOGRES; 612 struct xfs_trans_resv *resp = M_RES(mp); 613 unsigned int t1, t2, t3 = 0; 614 615 t1 = xfs_calc_inode_res(mp, 5) + 616 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), 617 XFS_FSB_TO_B(mp, 1)); 618 619 t2 = xfs_calc_finish_efi_reservation(mp, 3); 620 621 if (xfs_has_parent(mp)) { 622 unsigned int rename_overhead, exchange_overhead; 623 624 t3 = max(resp->tr_attrsetm.tr_logres, 625 resp->tr_attrrm.tr_logres); 626 627 /* 628 * For a standard rename, the three xattr intent log items 629 * are (1) replacing the pptr for the source file; (2) 630 * removing the pptr on the dest file; and (3) adding a 631 * pptr for the whiteout file in the src dir. 632 * 633 * For an RENAME_EXCHANGE, there are two xattr intent 634 * items to replace the pptr for both src and dest 635 * files. Link counts don't change and there is no 636 * whiteout. 637 * 638 * In the worst case we can end up relogging all log 639 * intent items to allow the log tail to move ahead, so 640 * they become overhead added to each transaction in a 641 * processing chain. 642 */ 643 rename_overhead = xfs_calc_pptr_replace_overhead() + 644 xfs_calc_pptr_unlink_overhead() + 645 xfs_calc_pptr_link_overhead(); 646 exchange_overhead = 2 * xfs_calc_pptr_replace_overhead(); 647 648 overhead += max(rename_overhead, exchange_overhead); 649 } 650 651 return overhead + max3(t1, t2, t3); 652 } 653 654 static inline unsigned int 655 xfs_rename_log_count( 656 struct xfs_mount *mp, 657 struct xfs_trans_resv *resp) 658 { 659 /* One for the rename, one more for freeing blocks */ 660 unsigned int ret = XFS_RENAME_LOG_COUNT; 661 662 /* 663 * Pre-reserve enough log reservation to handle the transaction 664 * rolling needed to remove or add one parent pointer. 665 */ 666 if (xfs_has_parent(mp)) 667 ret += max(resp->tr_attrsetm.tr_logcount, 668 resp->tr_attrrm.tr_logcount); 669 670 return ret; 671 } 672 673 /* 674 * For removing an inode from unlinked list at first, we can modify: 675 * the agi hash list and counters: sector size 676 * the on disk inode before ours in the agi hash list: inode cluster size 677 * the on disk inode in the agi hash list: inode cluster size 678 */ 679 STATIC uint 680 xfs_calc_iunlink_remove_reservation( 681 struct xfs_mount *mp) 682 { 683 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 684 2 * M_IGEO(mp)->inode_cluster_size; 685 } 686 687 static inline unsigned int 688 xfs_link_log_count( 689 struct xfs_mount *mp, 690 struct xfs_trans_resv *resp) 691 { 692 unsigned int ret = XFS_LINK_LOG_COUNT; 693 694 /* 695 * Pre-reserve enough log reservation to handle the transaction 696 * rolling needed to add one parent pointer. 697 */ 698 if (xfs_has_parent(mp)) 699 ret += resp->tr_attrsetm.tr_logcount; 700 701 return ret; 702 } 703 704 /* 705 * For creating a link to an inode: 706 * the parent directory inode: inode size 707 * the linked inode: inode size 708 * the directory btree could split: (max depth + v2) * dir block size 709 * the directory bmap btree could join or split: (max depth + v2) * blocksize 710 * And the bmap_finish transaction can free some bmap blocks giving: 711 * the agf for the ag in which the blocks live: sector size 712 * the agfl for the ag in which the blocks live: sector size 713 * the superblock for the free block count: sector size 714 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size 715 */ 716 STATIC uint 717 xfs_calc_link_reservation( 718 struct xfs_mount *mp) 719 { 720 unsigned int overhead = XFS_DQUOT_LOGRES; 721 struct xfs_trans_resv *resp = M_RES(mp); 722 unsigned int t1, t2, t3 = 0; 723 724 overhead += xfs_calc_iunlink_remove_reservation(mp); 725 t1 = xfs_calc_inode_res(mp, 2) + 726 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); 727 t2 = xfs_calc_finish_efi_reservation(mp, 1); 728 729 if (xfs_has_parent(mp)) { 730 t3 = resp->tr_attrsetm.tr_logres; 731 overhead += xfs_calc_pptr_link_overhead(); 732 } 733 734 return overhead + max3(t1, t2, t3); 735 } 736 737 /* 738 * For adding an inode to unlinked list we can modify: 739 * the agi hash list: sector size 740 * the on disk inode: inode cluster size 741 */ 742 STATIC uint 743 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) 744 { 745 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 746 M_IGEO(mp)->inode_cluster_size; 747 } 748 749 static inline unsigned int 750 xfs_remove_log_count( 751 struct xfs_mount *mp, 752 struct xfs_trans_resv *resp) 753 { 754 unsigned int ret = XFS_REMOVE_LOG_COUNT; 755 756 /* 757 * Pre-reserve enough log reservation to handle the transaction 758 * rolling needed to add one parent pointer. 759 */ 760 if (xfs_has_parent(mp)) 761 ret += resp->tr_attrrm.tr_logcount; 762 763 return ret; 764 } 765 766 /* 767 * For removing a directory entry we can modify: 768 * the parent directory inode: inode size 769 * the removed inode: inode size 770 * the directory btree could join: (max depth + v2) * dir block size 771 * the directory bmap btree could join or split: (max depth + v2) * blocksize 772 * And the bmap_finish transaction can free the dir and bmap blocks giving: 773 * the agf for the ag in which the blocks live: 2 * sector size 774 * the agfl for the ag in which the blocks live: 2 * sector size 775 * the superblock for the free block count: sector size 776 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 777 */ 778 STATIC uint 779 xfs_calc_remove_reservation( 780 struct xfs_mount *mp) 781 { 782 unsigned int overhead = XFS_DQUOT_LOGRES; 783 struct xfs_trans_resv *resp = M_RES(mp); 784 unsigned int t1, t2, t3 = 0; 785 786 overhead += xfs_calc_iunlink_add_reservation(mp); 787 788 t1 = xfs_calc_inode_res(mp, 2) + 789 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); 790 t2 = xfs_calc_finish_efi_reservation(mp, 2); 791 792 if (xfs_has_parent(mp)) { 793 t3 = resp->tr_attrrm.tr_logres; 794 overhead += xfs_calc_pptr_unlink_overhead(); 795 } 796 797 return overhead + max3(t1, t2, t3); 798 } 799 800 /* 801 * For create, break it in to the two cases that the transaction 802 * covers. We start with the modify case - allocation done by modification 803 * of the state of existing inodes - and the allocation case. 804 */ 805 806 /* 807 * For create we can modify: 808 * the parent directory inode: inode size 809 * the new inode: inode size 810 * the inode btree entry: block size 811 * the superblock for the nlink flag: sector size 812 * the directory btree: (max depth + v2) * dir block size 813 * the directory inode's bmap btree: (max depth + v2) * block size 814 * the finobt (record modification and allocation btrees) 815 */ 816 STATIC uint 817 xfs_calc_create_resv_modify( 818 struct xfs_mount *mp) 819 { 820 return xfs_calc_inode_res(mp, 2) + 821 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 822 (uint)XFS_FSB_TO_B(mp, 1) + 823 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + 824 xfs_calc_finobt_res(mp); 825 } 826 827 /* 828 * For icreate we can allocate some inodes giving: 829 * the agi and agf of the ag getting the new inodes: 2 * sectorsize 830 * the superblock for the nlink flag: sector size 831 * the inode chunk (allocation, optional init) 832 * the inobt (record insertion) 833 * the finobt (optional, record insertion) 834 */ 835 STATIC uint 836 xfs_calc_icreate_resv_alloc( 837 struct xfs_mount *mp) 838 { 839 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 840 mp->m_sb.sb_sectsize + 841 xfs_calc_inode_chunk_res(mp, _ALLOC) + 842 xfs_calc_inobt_res(mp) + 843 xfs_calc_finobt_res(mp); 844 } 845 846 static inline unsigned int 847 xfs_icreate_log_count( 848 struct xfs_mount *mp, 849 struct xfs_trans_resv *resp) 850 { 851 unsigned int ret = XFS_CREATE_LOG_COUNT; 852 853 /* 854 * Pre-reserve enough log reservation to handle the transaction 855 * rolling needed to add one parent pointer. 856 */ 857 if (xfs_has_parent(mp)) 858 ret += resp->tr_attrsetm.tr_logcount; 859 860 return ret; 861 } 862 863 STATIC uint 864 xfs_calc_icreate_reservation( 865 struct xfs_mount *mp) 866 { 867 struct xfs_trans_resv *resp = M_RES(mp); 868 unsigned int overhead = XFS_DQUOT_LOGRES; 869 unsigned int t1, t2, t3 = 0; 870 871 t1 = xfs_calc_icreate_resv_alloc(mp); 872 t2 = xfs_calc_create_resv_modify(mp); 873 874 if (xfs_has_parent(mp)) { 875 t3 = resp->tr_attrsetm.tr_logres; 876 overhead += xfs_calc_pptr_link_overhead(); 877 } 878 879 return overhead + max3(t1, t2, t3); 880 } 881 882 STATIC uint 883 xfs_calc_create_tmpfile_reservation( 884 struct xfs_mount *mp) 885 { 886 uint res = XFS_DQUOT_LOGRES; 887 888 res += xfs_calc_icreate_resv_alloc(mp); 889 return res + xfs_calc_iunlink_add_reservation(mp); 890 } 891 892 static inline unsigned int 893 xfs_mkdir_log_count( 894 struct xfs_mount *mp, 895 struct xfs_trans_resv *resp) 896 { 897 unsigned int ret = XFS_MKDIR_LOG_COUNT; 898 899 /* 900 * Pre-reserve enough log reservation to handle the transaction 901 * rolling needed to add one parent pointer. 902 */ 903 if (xfs_has_parent(mp)) 904 ret += resp->tr_attrsetm.tr_logcount; 905 906 return ret; 907 } 908 909 /* 910 * Making a new directory is the same as creating a new file. 911 */ 912 STATIC uint 913 xfs_calc_mkdir_reservation( 914 struct xfs_mount *mp) 915 { 916 return xfs_calc_icreate_reservation(mp); 917 } 918 919 static inline unsigned int 920 xfs_symlink_log_count( 921 struct xfs_mount *mp, 922 struct xfs_trans_resv *resp) 923 { 924 unsigned int ret = XFS_SYMLINK_LOG_COUNT; 925 926 /* 927 * Pre-reserve enough log reservation to handle the transaction 928 * rolling needed to add one parent pointer. 929 */ 930 if (xfs_has_parent(mp)) 931 ret += resp->tr_attrsetm.tr_logcount; 932 933 return ret; 934 } 935 936 /* 937 * Making a new symplink is the same as creating a new file, but 938 * with the added blocks for remote symlink data which can be up to 1kB in 939 * length (XFS_SYMLINK_MAXLEN). 940 */ 941 STATIC uint 942 xfs_calc_symlink_reservation( 943 struct xfs_mount *mp) 944 { 945 return xfs_calc_icreate_reservation(mp) + 946 xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); 947 } 948 949 /* 950 * In freeing an inode we can modify: 951 * the inode being freed: inode size 952 * the super block free inode counter, AGF and AGFL: sector size 953 * the on disk inode (agi unlinked list removal) 954 * the inode chunk (invalidated, headers only) 955 * the inode btree 956 * the finobt (record insertion, removal or modification) 957 * 958 * Note that the inode chunk res. includes an allocfree res. for freeing of the 959 * inode chunk. This is technically extraneous because the inode chunk free is 960 * deferred (it occurs after a transaction roll). Include the extra reservation 961 * anyways since we've had reports of ifree transaction overruns due to too many 962 * agfl fixups during inode chunk frees. 963 */ 964 STATIC uint 965 xfs_calc_ifree_reservation( 966 struct xfs_mount *mp) 967 { 968 return XFS_DQUOT_LOGRES + 969 xfs_calc_inode_res(mp, 1) + 970 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 971 xfs_calc_iunlink_remove_reservation(mp) + 972 xfs_calc_inode_chunk_res(mp, _FREE) + 973 xfs_calc_inobt_res(mp) + 974 xfs_calc_finobt_res(mp); 975 } 976 977 /* 978 * When only changing the inode we log the inode and possibly the superblock 979 * We also add a bit of slop for the transaction stuff. 980 */ 981 STATIC uint 982 xfs_calc_ichange_reservation( 983 struct xfs_mount *mp) 984 { 985 return XFS_DQUOT_LOGRES + 986 xfs_calc_inode_res(mp, 1) + 987 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); 988 989 } 990 991 /* 992 * Growing the data section of the filesystem. 993 * superblock 994 * agi and agf 995 * allocation btrees 996 */ 997 STATIC uint 998 xfs_calc_growdata_reservation( 999 struct xfs_mount *mp) 1000 { 1001 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 1002 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), 1003 XFS_FSB_TO_B(mp, 1)); 1004 } 1005 1006 /* 1007 * Growing the rt section of the filesystem. 1008 * In the first set of transactions (ALLOC) we allocate space to the 1009 * bitmap or summary files. 1010 * superblock: sector size 1011 * agf of the ag from which the extent is allocated: sector size 1012 * bmap btree for bitmap/summary inode: max depth * blocksize 1013 * bitmap/summary inode: inode size 1014 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize 1015 */ 1016 STATIC uint 1017 xfs_calc_growrtalloc_reservation( 1018 struct xfs_mount *mp) 1019 { 1020 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 1021 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 1022 XFS_FSB_TO_B(mp, 1)) + 1023 xfs_calc_inode_res(mp, 1) + 1024 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), 1025 XFS_FSB_TO_B(mp, 1)); 1026 } 1027 1028 /* 1029 * Growing the rt section of the filesystem. 1030 * In the second set of transactions (ZERO) we zero the new metadata blocks. 1031 * one bitmap/summary block: blocksize 1032 */ 1033 STATIC uint 1034 xfs_calc_growrtzero_reservation( 1035 struct xfs_mount *mp) 1036 { 1037 return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize); 1038 } 1039 1040 /* 1041 * Growing the rt section of the filesystem. 1042 * In the third set of transactions (FREE) we update metadata without 1043 * allocating any new blocks. 1044 * superblock: sector size 1045 * bitmap inode: inode size 1046 * summary inode: inode size 1047 * one bitmap block: blocksize 1048 * summary blocks: new summary size 1049 */ 1050 STATIC uint 1051 xfs_calc_growrtfree_reservation( 1052 struct xfs_mount *mp) 1053 { 1054 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 1055 xfs_calc_inode_res(mp, 2) + 1056 xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) + 1057 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks)); 1058 } 1059 1060 /* 1061 * Logging the inode modification timestamp on a synchronous write. 1062 * inode 1063 */ 1064 STATIC uint 1065 xfs_calc_swrite_reservation( 1066 struct xfs_mount *mp) 1067 { 1068 return xfs_calc_inode_res(mp, 1); 1069 } 1070 1071 /* 1072 * Logging the inode mode bits when writing a setuid/setgid file 1073 * inode 1074 */ 1075 STATIC uint 1076 xfs_calc_writeid_reservation( 1077 struct xfs_mount *mp) 1078 { 1079 return xfs_calc_inode_res(mp, 1); 1080 } 1081 1082 /* 1083 * Converting the inode from non-attributed to attributed. 1084 * the inode being converted: inode size 1085 * agf block and superblock (for block allocation) 1086 * the new block (directory sized) 1087 * bmap blocks for the new directory block 1088 * allocation btrees 1089 */ 1090 STATIC uint 1091 xfs_calc_addafork_reservation( 1092 struct xfs_mount *mp) 1093 { 1094 return XFS_DQUOT_LOGRES + 1095 xfs_calc_inode_res(mp, 1) + 1096 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 1097 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + 1098 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, 1099 XFS_FSB_TO_B(mp, 1)) + 1100 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), 1101 XFS_FSB_TO_B(mp, 1)); 1102 } 1103 1104 /* 1105 * Removing the attribute fork of a file 1106 * the inode being truncated: inode size 1107 * the inode's bmap btree: max depth * block size 1108 * And the bmap_finish transaction can free the blocks and bmap blocks: 1109 * the agf for each of the ags: 4 * sector size 1110 * the agfl for each of the ags: 4 * sector size 1111 * the super block to reflect the freed blocks: sector size 1112 * worst case split in allocation btrees per extent assuming 4 extents: 1113 * 4 exts * 2 trees * (2 * max depth - 1) * block size 1114 */ 1115 STATIC uint 1116 xfs_calc_attrinval_reservation( 1117 struct xfs_mount *mp) 1118 { 1119 return max((xfs_calc_inode_res(mp, 1) + 1120 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), 1121 XFS_FSB_TO_B(mp, 1))), 1122 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 1123 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), 1124 XFS_FSB_TO_B(mp, 1)))); 1125 } 1126 1127 /* 1128 * Setting an attribute at mount time. 1129 * the inode getting the attribute 1130 * the superblock for allocations 1131 * the agfs extents are allocated from 1132 * the attribute btree * max depth 1133 * the inode allocation btree 1134 * Since attribute transaction space is dependent on the size of the attribute, 1135 * the calculation is done partially at mount time and partially at runtime(see 1136 * below). 1137 */ 1138 STATIC uint 1139 xfs_calc_attrsetm_reservation( 1140 struct xfs_mount *mp) 1141 { 1142 return XFS_DQUOT_LOGRES + 1143 xfs_calc_inode_res(mp, 1) + 1144 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 1145 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); 1146 } 1147 1148 /* 1149 * Setting an attribute at runtime, transaction space unit per block. 1150 * the superblock for allocations: sector size 1151 * the inode bmap btree could join or split: max depth * block size 1152 * Since the runtime attribute transaction space is dependent on the total 1153 * blocks needed for the 1st bmap, here we calculate out the space unit for 1154 * one block so that the caller could figure out the total space according 1155 * to the attibute extent length in blocks by: 1156 * ext * M_RES(mp)->tr_attrsetrt.tr_logres 1157 */ 1158 STATIC uint 1159 xfs_calc_attrsetrt_reservation( 1160 struct xfs_mount *mp) 1161 { 1162 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 1163 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), 1164 XFS_FSB_TO_B(mp, 1)); 1165 } 1166 1167 /* 1168 * Removing an attribute. 1169 * the inode: inode size 1170 * the attribute btree could join: max depth * block size 1171 * the inode bmap btree could join or split: max depth * block size 1172 * And the bmap_finish transaction can free the attr blocks freed giving: 1173 * the agf for the ag in which the blocks live: 2 * sector size 1174 * the agfl for the ag in which the blocks live: 2 * sector size 1175 * the superblock for the free block count: sector size 1176 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 1177 */ 1178 STATIC uint 1179 xfs_calc_attrrm_reservation( 1180 struct xfs_mount *mp) 1181 { 1182 return XFS_DQUOT_LOGRES + 1183 max((xfs_calc_inode_res(mp, 1) + 1184 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, 1185 XFS_FSB_TO_B(mp, 1)) + 1186 (uint)XFS_FSB_TO_B(mp, 1187 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 1188 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), 1189 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 1190 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), 1191 XFS_FSB_TO_B(mp, 1)))); 1192 } 1193 1194 /* 1195 * Clearing a bad agino number in an agi hash bucket. 1196 */ 1197 STATIC uint 1198 xfs_calc_clear_agi_bucket_reservation( 1199 struct xfs_mount *mp) 1200 { 1201 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); 1202 } 1203 1204 /* 1205 * Adjusting quota limits. 1206 * the disk quota buffer: sizeof(struct xfs_disk_dquot) 1207 */ 1208 STATIC uint 1209 xfs_calc_qm_setqlim_reservation(void) 1210 { 1211 return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot)); 1212 } 1213 1214 /* 1215 * Allocating quota on disk if needed. 1216 * the write transaction log space for quota file extent allocation 1217 * the unit of quota allocation: one system block size 1218 */ 1219 STATIC uint 1220 xfs_calc_qm_dqalloc_reservation( 1221 struct xfs_mount *mp, 1222 bool for_minlogsize) 1223 { 1224 return xfs_calc_write_reservation(mp, for_minlogsize) + 1225 xfs_calc_buf_res(1, 1226 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); 1227 } 1228 1229 unsigned int 1230 xfs_calc_qm_dqalloc_reservation_minlogsize( 1231 struct xfs_mount *mp) 1232 { 1233 return xfs_calc_qm_dqalloc_reservation(mp, true); 1234 } 1235 1236 /* 1237 * Syncing the incore super block changes to disk. 1238 * the super block to reflect the changes: sector size 1239 */ 1240 STATIC uint 1241 xfs_calc_sb_reservation( 1242 struct xfs_mount *mp) 1243 { 1244 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); 1245 } 1246 1247 /* 1248 * Namespace reservations. 1249 * 1250 * These get tricky when parent pointers are enabled as we have attribute 1251 * modifications occurring from within these transactions. Rather than confuse 1252 * each of these reservation calculations with the conditional attribute 1253 * reservations, add them here in a clear and concise manner. This requires that 1254 * the attribute reservations have already been calculated. 1255 * 1256 * Note that we only include the static attribute reservation here; the runtime 1257 * reservation will have to be modified by the size of the attributes being 1258 * added/removed/modified. See the comments on the attribute reservation 1259 * calculations for more details. 1260 */ 1261 STATIC void 1262 xfs_calc_namespace_reservations( 1263 struct xfs_mount *mp, 1264 struct xfs_trans_resv *resp) 1265 { 1266 ASSERT(resp->tr_attrsetm.tr_logres > 0); 1267 1268 resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp); 1269 resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp); 1270 resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1271 1272 resp->tr_link.tr_logres = xfs_calc_link_reservation(mp); 1273 resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp); 1274 resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1275 1276 resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp); 1277 resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp); 1278 resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1279 1280 resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp); 1281 resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp); 1282 resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1283 1284 resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); 1285 resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp); 1286 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1287 1288 resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); 1289 resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp); 1290 resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1291 } 1292 1293 STATIC void 1294 xfs_calc_default_atomic_ioend_reservation( 1295 struct xfs_mount *mp, 1296 struct xfs_trans_resv *resp) 1297 { 1298 /* Pick a default that will scale reasonably for the log size. */ 1299 resp->tr_atomic_ioend = resp->tr_itruncate; 1300 } 1301 1302 void 1303 xfs_trans_resv_calc( 1304 struct xfs_mount *mp, 1305 struct xfs_trans_resv *resp) 1306 { 1307 int logcount_adj = 0; 1308 1309 /* 1310 * The following transactions are logged in physical format and 1311 * require a permanent reservation on space. 1312 */ 1313 resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false); 1314 resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; 1315 resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1316 1317 resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false); 1318 resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; 1319 resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1320 1321 resp->tr_create_tmpfile.tr_logres = 1322 xfs_calc_create_tmpfile_reservation(mp); 1323 resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; 1324 resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1325 1326 resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp); 1327 resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT; 1328 resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1329 1330 resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp); 1331 resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT; 1332 resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1333 1334 resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp); 1335 resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT; 1336 resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1337 1338 resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp); 1339 resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT; 1340 resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1341 1342 resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp); 1343 resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT; 1344 resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1345 1346 resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp); 1347 resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT; 1348 resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1349 1350 resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp, 1351 false); 1352 resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; 1353 resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1354 1355 xfs_calc_namespace_reservations(mp, resp); 1356 1357 /* 1358 * The following transactions are logged in logical format with 1359 * a default log count. 1360 */ 1361 resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(); 1362 resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; 1363 1364 resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp); 1365 resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT; 1366 1367 /* growdata requires permanent res; it can free space to the last AG */ 1368 resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); 1369 resp->tr_growdata.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT; 1370 resp->tr_growdata.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 1371 1372 /* The following transaction are logged in logical format */ 1373 resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); 1374 resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); 1375 resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); 1376 resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); 1377 resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp); 1378 resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp); 1379 resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp); 1380 1381 /* 1382 * Add one logcount for BUI items that appear with rmap or reflink, 1383 * one logcount for refcount intent items, and one logcount for rmap 1384 * intent items. 1385 */ 1386 if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp)) 1387 logcount_adj++; 1388 if (xfs_has_reflink(mp)) 1389 logcount_adj++; 1390 if (xfs_has_rmapbt(mp)) 1391 logcount_adj++; 1392 1393 resp->tr_itruncate.tr_logcount += logcount_adj; 1394 resp->tr_write.tr_logcount += logcount_adj; 1395 resp->tr_qm_dqalloc.tr_logcount += logcount_adj; 1396 1397 /* 1398 * Now that we've finished computing the static reservations, we can 1399 * compute the dynamic reservation for atomic writes. 1400 */ 1401 xfs_calc_default_atomic_ioend_reservation(mp, resp); 1402 } 1403 1404 /* 1405 * Return the per-extent and fixed transaction reservation sizes needed to 1406 * complete an atomic write. 1407 */ 1408 STATIC unsigned int 1409 xfs_calc_atomic_write_ioend_geometry( 1410 struct xfs_mount *mp, 1411 unsigned int *step_size) 1412 { 1413 const unsigned int efi = xfs_efi_log_space(1); 1414 const unsigned int efd = xfs_efd_log_space(1); 1415 const unsigned int rui = xfs_rui_log_space(1); 1416 const unsigned int rud = xfs_rud_log_space(); 1417 const unsigned int cui = xfs_cui_log_space(1); 1418 const unsigned int cud = xfs_cud_log_space(); 1419 const unsigned int bui = xfs_bui_log_space(1); 1420 const unsigned int bud = xfs_bud_log_space(); 1421 1422 /* 1423 * Maximum overhead to complete an atomic write ioend in software: 1424 * remove data fork extent + remove cow fork extent + map extent into 1425 * data fork. 1426 * 1427 * tx0: Creates a BUI and a CUI and that's all it needs. 1428 * 1429 * tx1: Roll to finish the BUI. Need space for the BUD, an RUI, and 1430 * enough space to relog the CUI (== CUI + CUD). 1431 * 1432 * tx2: Roll again to finish the RUI. Need space for the RUD and space 1433 * to relog the CUI. 1434 * 1435 * tx3: Roll again, need space for the CUD and possibly a new EFI. 1436 * 1437 * tx4: Roll again, need space for an EFD. 1438 * 1439 * If the extent referenced by the pair of BUI/CUI items is not the one 1440 * being currently processed, then we need to reserve space to relog 1441 * both items. 1442 */ 1443 const unsigned int tx0 = bui + cui; 1444 const unsigned int tx1 = bud + rui + cui + cud; 1445 const unsigned int tx2 = rud + cui + cud; 1446 const unsigned int tx3 = cud + efi; 1447 const unsigned int tx4 = efd; 1448 const unsigned int relog = bui + bud + cui + cud; 1449 1450 const unsigned int per_intent = max(max3(tx0, tx1, tx2), 1451 max3(tx3, tx4, relog)); 1452 1453 /* Overhead to finish one step of each intent item type */ 1454 const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1); 1455 const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1); 1456 const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1); 1457 const unsigned int f4 = xfs_calc_finish_bui_reservation(mp, 1); 1458 1459 /* We only finish one item per transaction in a chain */ 1460 *step_size = max(f4, max3(f1, f2, f3)); 1461 1462 return per_intent; 1463 } 1464 1465 /* 1466 * Compute the maximum size (in fsblocks) of atomic writes that we can complete 1467 * given the existing log reservations. 1468 */ 1469 xfs_extlen_t 1470 xfs_calc_max_atomic_write_fsblocks( 1471 struct xfs_mount *mp) 1472 { 1473 const struct xfs_trans_res *resv = &M_RES(mp)->tr_atomic_ioend; 1474 unsigned int per_intent = 0; 1475 unsigned int step_size = 0; 1476 unsigned int ret = 0; 1477 1478 if (resv->tr_logres > 0) { 1479 per_intent = xfs_calc_atomic_write_ioend_geometry(mp, 1480 &step_size); 1481 1482 if (resv->tr_logres >= step_size) 1483 ret = (resv->tr_logres - step_size) / per_intent; 1484 } 1485 1486 trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size, 1487 resv->tr_logres, ret); 1488 1489 return ret; 1490 } 1491 1492 /* 1493 * Compute the log blocks and transaction reservation needed to complete an 1494 * atomic write of a given number of blocks. Worst case, each block requires 1495 * separate handling. A return value of 0 means something went wrong. 1496 */ 1497 xfs_extlen_t 1498 xfs_calc_atomic_write_log_geometry( 1499 struct xfs_mount *mp, 1500 xfs_extlen_t blockcount, 1501 unsigned int *new_logres) 1502 { 1503 struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend; 1504 uint old_logres = curr_res->tr_logres; 1505 unsigned int per_intent, step_size; 1506 unsigned int logres; 1507 xfs_extlen_t min_logblocks; 1508 1509 ASSERT(blockcount > 0); 1510 1511 xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp)); 1512 1513 per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size); 1514 1515 /* Check for overflows */ 1516 if (check_mul_overflow(blockcount, per_intent, &logres) || 1517 check_add_overflow(logres, step_size, &logres)) 1518 return 0; 1519 1520 curr_res->tr_logres = logres; 1521 min_logblocks = xfs_log_calc_minimum_size(mp); 1522 curr_res->tr_logres = old_logres; 1523 1524 trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size, 1525 blockcount, min_logblocks, logres); 1526 1527 *new_logres = logres; 1528 return min_logblocks; 1529 } 1530 1531 /* 1532 * Compute the transaction reservation needed to complete an out of place 1533 * atomic write of a given number of blocks. 1534 */ 1535 int 1536 xfs_calc_atomic_write_reservation( 1537 struct xfs_mount *mp, 1538 xfs_extlen_t blockcount) 1539 { 1540 unsigned int new_logres; 1541 xfs_extlen_t min_logblocks; 1542 1543 /* 1544 * If the caller doesn't ask for a specific atomic write size, then 1545 * use the defaults. 1546 */ 1547 if (blockcount == 0) { 1548 xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp)); 1549 return 0; 1550 } 1551 1552 min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount, 1553 &new_logres); 1554 if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks) 1555 return -EINVAL; 1556 1557 M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres; 1558 return 0; 1559 } 1560