1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
4 * Copyright (C) 2010 Red Hat, Inc.
5 * All Rights Reserved.
6 */
7 #include "xfs.h"
8 #include "xfs_fs.h"
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_da_format.h"
15 #include "xfs_da_btree.h"
16 #include "xfs_inode.h"
17 #include "xfs_bmap_btree.h"
18 #include "xfs_quota.h"
19 #include "xfs_trans.h"
20 #include "xfs_qm.h"
21 #include "xfs_trans_space.h"
22 #include "xfs_rtbitmap.h"
23 #include "xfs_attr_item.h"
24 #include "xfs_log.h"
25 #include "xfs_defer.h"
26 #include "xfs_bmap_item.h"
27 #include "xfs_extfree_item.h"
28 #include "xfs_rmap_item.h"
29 #include "xfs_refcount_item.h"
30 #include "xfs_trace.h"
31
32 #define _ALLOC true
33 #define _FREE false
34
35 /*
36 * A buffer has a format structure overhead in the log in addition
37 * to the data, so we need to take this into account when reserving
38 * space in a transaction for a buffer. Round the space required up
39 * to a multiple of 128 bytes so that we don't change the historical
40 * reservation that has been used for this overhead.
41 */
42 STATIC uint
xfs_buf_log_overhead(void)43 xfs_buf_log_overhead(void)
44 {
45 return round_up(sizeof(struct xlog_op_header) +
46 sizeof(struct xfs_buf_log_format), 128);
47 }
48
49 /*
50 * Calculate out transaction log reservation per item in bytes.
51 *
52 * The nbufs argument is used to indicate the number of items that
53 * will be changed in a transaction. size is used to tell how many
54 * bytes should be reserved per item.
55 */
56 STATIC uint
xfs_calc_buf_res(uint nbufs,uint size)57 xfs_calc_buf_res(
58 uint nbufs,
59 uint size)
60 {
61 return nbufs * (size + xfs_buf_log_overhead());
62 }
63
64 /*
65 * Per-extent log reservation for the btree changes involved in freeing or
66 * allocating an extent. In classic XFS there were two trees that will be
67 * modified (bnobt + cntbt). With rmap enabled, there are three trees
68 * (rmapbt). The number of blocks reserved is based on the formula:
69 *
70 * num trees * ((2 blocks/level * max depth) - 1)
71 *
72 * Keep in mind that max depth is calculated separately for each type of tree.
73 */
74 uint
xfs_allocfree_block_count(struct xfs_mount * mp,uint num_ops)75 xfs_allocfree_block_count(
76 struct xfs_mount *mp,
77 uint num_ops)
78 {
79 uint blocks;
80
81 blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1);
82 if (xfs_has_rmapbt(mp))
83 blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
84
85 return blocks;
86 }
87
88 /*
89 * Per-extent log reservation for refcount btree changes. These are never done
90 * in the same transaction as an allocation or a free, so we compute them
91 * separately.
92 */
93 static unsigned int
xfs_refcountbt_block_count(struct xfs_mount * mp,unsigned int num_ops)94 xfs_refcountbt_block_count(
95 struct xfs_mount *mp,
96 unsigned int num_ops)
97 {
98 return num_ops * (2 * mp->m_refc_maxlevels - 1);
99 }
100
101 static unsigned int
xfs_rtrefcountbt_block_count(struct xfs_mount * mp,unsigned int num_ops)102 xfs_rtrefcountbt_block_count(
103 struct xfs_mount *mp,
104 unsigned int num_ops)
105 {
106 return num_ops * (2 * mp->m_rtrefc_maxlevels - 1);
107 }
108
109 /*
110 * Logging inodes is really tricksy. They are logged in memory format,
111 * which means that what we write into the log doesn't directly translate into
112 * the amount of space they use on disk.
113 *
114 * Case in point - btree format forks in memory format use more space than the
115 * on-disk format. In memory, the buffer contains a normal btree block header so
116 * the btree code can treat it as though it is just another generic buffer.
117 * However, when we write it to the inode fork, we don't write all of this
118 * header as it isn't needed. e.g. the root is only ever in the inode, so
119 * there's no need for sibling pointers which would waste 16 bytes of space.
120 *
121 * Hence when we have an inode with a maximally sized btree format fork, then
122 * amount of information we actually log is greater than the size of the inode
123 * on disk. Hence we need an inode reservation function that calculates all this
124 * correctly. So, we log:
125 *
126 * - 4 log op headers for object
127 * - for the ilf, the inode core and 2 forks
128 * - inode log format object
129 * - the inode core
130 * - two inode forks containing bmap btree root blocks.
131 * - the btree data contained by both forks will fit into the inode size,
132 * hence when combined with the inode core above, we have a total of the
133 * actual inode size.
134 * - the BMBT headers need to be accounted separately, as they are
135 * additional to the records and pointers that fit inside the inode
136 * forks.
137 */
138 STATIC uint
xfs_calc_inode_res(struct xfs_mount * mp,uint ninodes)139 xfs_calc_inode_res(
140 struct xfs_mount *mp,
141 uint ninodes)
142 {
143 return ninodes *
144 (4 * sizeof(struct xlog_op_header) +
145 sizeof(struct xfs_inode_log_format) +
146 mp->m_sb.sb_inodesize +
147 2 * xfs_bmbt_block_len(mp));
148 }
149
150 /*
151 * Inode btree record insertion/removal modifies the inode btree and free space
152 * btrees (since the inobt does not use the agfl). This requires the following
153 * reservation:
154 *
155 * the inode btree: max depth * blocksize
156 * the allocation btrees: 2 trees * (max depth - 1) * block size
157 *
158 * The caller must account for SB and AG header modifications, etc.
159 */
160 STATIC uint
xfs_calc_inobt_res(struct xfs_mount * mp)161 xfs_calc_inobt_res(
162 struct xfs_mount *mp)
163 {
164 return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
165 XFS_FSB_TO_B(mp, 1)) +
166 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
167 XFS_FSB_TO_B(mp, 1));
168 }
169
170 /*
171 * The free inode btree is a conditional feature. The behavior differs slightly
172 * from that of the traditional inode btree in that the finobt tracks records
173 * for inode chunks with at least one free inode. A record can be removed from
174 * the tree during individual inode allocation. Therefore the finobt
175 * reservation is unconditional for both the inode chunk allocation and
176 * individual inode allocation (modify) cases.
177 *
178 * Behavior aside, the reservation for finobt modification is equivalent to the
179 * traditional inobt: cover a full finobt shape change plus block allocation.
180 */
181 STATIC uint
xfs_calc_finobt_res(struct xfs_mount * mp)182 xfs_calc_finobt_res(
183 struct xfs_mount *mp)
184 {
185 if (!xfs_has_finobt(mp))
186 return 0;
187
188 return xfs_calc_inobt_res(mp);
189 }
190
191 /*
192 * Calculate the reservation required to allocate or free an inode chunk. This
193 * includes:
194 *
195 * the allocation btrees: 2 trees * (max depth - 1) * block size
196 * the inode chunk: m_ino_geo.ialloc_blks * N
197 *
198 * The size N of the inode chunk reservation depends on whether it is for
199 * allocation or free and which type of create transaction is in use. An inode
200 * chunk free always invalidates the buffers and only requires reservation for
201 * headers (N == 0). An inode chunk allocation requires a chunk sized
202 * reservation on v4 and older superblocks to initialize the chunk. No chunk
203 * reservation is required for allocation on v5 supers, which use ordered
204 * buffers to initialize.
205 */
206 STATIC uint
xfs_calc_inode_chunk_res(struct xfs_mount * mp,bool alloc)207 xfs_calc_inode_chunk_res(
208 struct xfs_mount *mp,
209 bool alloc)
210 {
211 uint res, size = 0;
212
213 res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
214 XFS_FSB_TO_B(mp, 1));
215 if (alloc) {
216 /* icreate tx uses ordered buffers */
217 if (xfs_has_v3inodes(mp))
218 return res;
219 size = XFS_FSB_TO_B(mp, 1);
220 }
221
222 res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
223 return res;
224 }
225
226 /*
227 * Per-extent log reservation for the btree changes involved in freeing or
228 * allocating a realtime extent. We have to be able to log as many rtbitmap
229 * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime
230 * extents, as well as the realtime summary block (t1). Realtime rmap btree
231 * operations happen in a second transaction, so factor in a couple of rtrmapbt
232 * splits (t2).
233 */
234 static unsigned int
xfs_rtalloc_block_count(struct xfs_mount * mp,unsigned int num_ops)235 xfs_rtalloc_block_count(
236 struct xfs_mount *mp,
237 unsigned int num_ops)
238 {
239 unsigned int rtbmp_blocks;
240 xfs_rtxlen_t rtxlen;
241 unsigned int t1, t2 = 0;
242
243 rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
244 rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen);
245 t1 = (rtbmp_blocks + 1) * num_ops;
246
247 if (xfs_has_rmapbt(mp))
248 t2 = num_ops * (2 * mp->m_rtrmap_maxlevels - 1);
249
250 return max(t1, t2);
251 }
252
253 /*
254 * Various log reservation values.
255 *
256 * These are based on the size of the file system block because that is what
257 * most transactions manipulate. Each adds in an additional 128 bytes per
258 * item logged to try to account for the overhead of the transaction mechanism.
259 *
260 * Note: Most of the reservations underestimate the number of allocation
261 * groups into which they could free extents in the xfs_defer_finish() call.
262 * This is because the number in the worst case is quite high and quite
263 * unusual. In order to fix this we need to change xfs_defer_finish() to free
264 * extents in only a single AG at a time. This will require changes to the
265 * EFI code as well, however, so that the EFI for the extents not freed is
266 * logged again in each transaction. See SGI PV #261917.
267 *
268 * Reservation functions here avoid a huge stack in xfs_trans_init due to
269 * register overflow from temporaries in the calculations.
270 */
271
272 /*
273 * Finishing a data device refcount updates (t1):
274 * the agfs of the ags containing the blocks: nr_ops * sector size
275 * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
276 */
277 inline unsigned int
xfs_calc_finish_cui_reservation(struct xfs_mount * mp,unsigned int nr_ops)278 xfs_calc_finish_cui_reservation(
279 struct xfs_mount *mp,
280 unsigned int nr_ops)
281 {
282 if (!xfs_has_reflink(mp))
283 return 0;
284
285 return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
286 xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops),
287 mp->m_sb.sb_blocksize);
288 }
289
290 /*
291 * Realtime refcount updates (t2);
292 * the rt refcount inode
293 * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
294 */
295 inline unsigned int
xfs_calc_finish_rt_cui_reservation(struct xfs_mount * mp,unsigned int nr_ops)296 xfs_calc_finish_rt_cui_reservation(
297 struct xfs_mount *mp,
298 unsigned int nr_ops)
299 {
300 if (!xfs_has_rtreflink(mp))
301 return 0;
302
303 return xfs_calc_inode_res(mp, 1) +
304 xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
305 mp->m_sb.sb_blocksize);
306 }
307
308 /*
309 * Compute the log reservation required to handle the refcount update
310 * transaction. Refcount updates are always done via deferred log items.
311 *
312 * This is calculated as the max of:
313 * Data device refcount updates (t1):
314 * the agfs of the ags containing the blocks: nr_ops * sector size
315 * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
316 * Realtime refcount updates (t2);
317 * the rt refcount inode
318 * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
319 */
320 static unsigned int
xfs_calc_refcountbt_reservation(struct xfs_mount * mp,unsigned int nr_ops)321 xfs_calc_refcountbt_reservation(
322 struct xfs_mount *mp,
323 unsigned int nr_ops)
324 {
325 unsigned int t1, t2;
326
327 t1 = xfs_calc_finish_cui_reservation(mp, nr_ops);
328 t2 = xfs_calc_finish_rt_cui_reservation(mp, nr_ops);
329
330 return max(t1, t2);
331 }
332
333 /*
334 * In a write transaction we can allocate a maximum of 2
335 * extents. This gives (t1):
336 * the inode getting the new extents: inode size
337 * the inode's bmap btree: max depth * block size
338 * the agfs of the ags from which the extents are allocated: 2 * sector
339 * the superblock free block counter: sector size
340 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
341 * Or, if we're writing to a realtime file (t2):
342 * the inode getting the new extents: inode size
343 * the inode's bmap btree: max depth * block size
344 * the agfs of the ags from which the extents are allocated: 2 * sector
345 * the superblock free block counter: sector size
346 * the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
347 * the realtime summary: 1 block
348 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
349 * And the bmap_finish transaction can free bmap blocks in a join (t3):
350 * the agfs of the ags containing the blocks: 2 * sector size
351 * the agfls of the ags containing the blocks: 2 * sector size
352 * the super block free block counter: sector size
353 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
354 * And any refcount updates that happen in a separate transaction (t4).
355 */
356 STATIC uint
xfs_calc_write_reservation(struct xfs_mount * mp,bool for_minlogsize)357 xfs_calc_write_reservation(
358 struct xfs_mount *mp,
359 bool for_minlogsize)
360 {
361 unsigned int t1, t2, t3, t4;
362 unsigned int blksz = XFS_FSB_TO_B(mp, 1);
363
364 t1 = xfs_calc_inode_res(mp, 1) +
365 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
366 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
367 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
368
369 if (xfs_has_realtime(mp)) {
370 t2 = xfs_calc_inode_res(mp, 1) +
371 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
372 blksz) +
373 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
374 xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) +
375 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz);
376 } else {
377 t2 = 0;
378 }
379
380 t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
381 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
382
383 /*
384 * In the early days of reflink, we included enough reservation to log
385 * two refcountbt splits for each transaction. The codebase runs
386 * refcountbt updates in separate transactions now, so to compute the
387 * minimum log size, add the refcountbtree splits back to t1 and t3 and
388 * do not account them separately as t4. Reflink did not support
389 * realtime when the reservations were established, so no adjustment to
390 * t2 is needed.
391 */
392 if (for_minlogsize) {
393 unsigned int adj = 0;
394
395 if (xfs_has_reflink(mp))
396 adj = xfs_calc_buf_res(
397 xfs_refcountbt_block_count(mp, 2),
398 blksz);
399 t1 += adj;
400 t3 += adj;
401 return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
402 }
403
404 t4 = xfs_calc_refcountbt_reservation(mp, 1);
405 return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
406 }
407
408 unsigned int
xfs_calc_write_reservation_minlogsize(struct xfs_mount * mp)409 xfs_calc_write_reservation_minlogsize(
410 struct xfs_mount *mp)
411 {
412 return xfs_calc_write_reservation(mp, true);
413 }
414
415 /*
416 * Finishing an EFI can free the blocks and bmap blocks (t2):
417 * the agf for each of the ags: nr * sector size
418 * the agfl for each of the ags: nr * sector size
419 * the super block to reflect the freed blocks: sector size
420 * worst case split in allocation btrees per extent assuming nr extents:
421 * nr exts * 2 trees * (2 * max depth - 1) * block size
422 */
423 inline unsigned int
xfs_calc_finish_efi_reservation(struct xfs_mount * mp,unsigned int nr)424 xfs_calc_finish_efi_reservation(
425 struct xfs_mount *mp,
426 unsigned int nr)
427 {
428 return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
429 xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
430 mp->m_sb.sb_blocksize);
431 }
432
433 /*
434 * Or, if it's a realtime file (t3):
435 * the agf for each of the ags: 2 * sector size
436 * the agfl for each of the ags: 2 * sector size
437 * the super block to reflect the freed blocks: sector size
438 * the realtime bitmap:
439 * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
440 * the realtime summary: 2 exts * 1 block
441 * worst case split in allocation btrees per extent assuming 2 extents:
442 * 2 exts * 2 trees * (2 * max depth - 1) * block size
443 */
444 inline unsigned int
xfs_calc_finish_rt_efi_reservation(struct xfs_mount * mp,unsigned int nr)445 xfs_calc_finish_rt_efi_reservation(
446 struct xfs_mount *mp,
447 unsigned int nr)
448 {
449 if (!xfs_has_realtime(mp))
450 return 0;
451
452 return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
453 xfs_calc_buf_res(xfs_rtalloc_block_count(mp, nr),
454 mp->m_sb.sb_blocksize) +
455 xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
456 mp->m_sb.sb_blocksize);
457 }
458
459 /*
460 * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
461 * on each end of the record, and that can cause the AGFL to be refilled or
462 * emptied out.
463 */
464 inline unsigned int
xfs_calc_finish_rui_reservation(struct xfs_mount * mp,unsigned int nr)465 xfs_calc_finish_rui_reservation(
466 struct xfs_mount *mp,
467 unsigned int nr)
468 {
469 if (!xfs_has_rmapbt(mp))
470 return 0;
471 return xfs_calc_finish_efi_reservation(mp, nr);
472 }
473
474 /*
475 * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
476 * on each end of the record, and that can cause the AGFL to be refilled or
477 * emptied out.
478 */
479 inline unsigned int
xfs_calc_finish_rt_rui_reservation(struct xfs_mount * mp,unsigned int nr)480 xfs_calc_finish_rt_rui_reservation(
481 struct xfs_mount *mp,
482 unsigned int nr)
483 {
484 if (!xfs_has_rtrmapbt(mp))
485 return 0;
486 return xfs_calc_finish_rt_efi_reservation(mp, nr);
487 }
488
489 /*
490 * In finishing a BUI, we can modify:
491 * the inode being truncated: inode size
492 * dquots
493 * the inode's bmap btree: (max depth + 1) * block size
494 */
495 inline unsigned int
xfs_calc_finish_bui_reservation(struct xfs_mount * mp,unsigned int nr)496 xfs_calc_finish_bui_reservation(
497 struct xfs_mount *mp,
498 unsigned int nr)
499 {
500 return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES +
501 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
502 mp->m_sb.sb_blocksize);
503 }
504
505 /*
506 * In truncating a file we free up to two extents at once. We can modify (t1):
507 * the inode being truncated: inode size
508 * the inode's bmap btree: (max depth + 1) * block size
509 * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
510 * the agf for each of the ags: 4 * sector size
511 * the agfl for each of the ags: 4 * sector size
512 * the super block to reflect the freed blocks: sector size
513 * worst case split in allocation btrees per extent assuming 4 extents:
514 * 4 exts * 2 trees * (2 * max depth - 1) * block size
515 * Or, if it's a realtime file (t3):
516 * the agf for each of the ags: 2 * sector size
517 * the agfl for each of the ags: 2 * sector size
518 * the super block to reflect the freed blocks: sector size
519 * the realtime bitmap:
520 * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
521 * the realtime summary: 2 exts * 1 block
522 * worst case split in allocation btrees per extent assuming 2 extents:
523 * 2 exts * 2 trees * (2 * max depth - 1) * block size
524 * And any refcount updates that happen in a separate transaction (t4).
525 */
526 STATIC uint
xfs_calc_itruncate_reservation(struct xfs_mount * mp,bool for_minlogsize)527 xfs_calc_itruncate_reservation(
528 struct xfs_mount *mp,
529 bool for_minlogsize)
530 {
531 unsigned int t1, t2, t3, t4;
532 unsigned int blksz = XFS_FSB_TO_B(mp, 1);
533
534 t1 = xfs_calc_inode_res(mp, 1) +
535 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
536
537 t2 = xfs_calc_finish_efi_reservation(mp, 4);
538 t3 = xfs_calc_finish_rt_efi_reservation(mp, 2);
539
540 /*
541 * In the early days of reflink, we included enough reservation to log
542 * four refcountbt splits in the same transaction as bnobt/cntbt
543 * updates. The codebase runs refcountbt updates in separate
544 * transactions now, so to compute the minimum log size, add the
545 * refcount btree splits back here and do not compute them separately
546 * as t4. Reflink did not support realtime when the reservations were
547 * established, so do not adjust t3.
548 */
549 if (for_minlogsize) {
550 if (xfs_has_reflink(mp))
551 t2 += xfs_calc_buf_res(
552 xfs_refcountbt_block_count(mp, 4),
553 blksz);
554
555 return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
556 }
557
558 t4 = xfs_calc_refcountbt_reservation(mp, 2);
559 return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
560 }
561
562 unsigned int
xfs_calc_itruncate_reservation_minlogsize(struct xfs_mount * mp)563 xfs_calc_itruncate_reservation_minlogsize(
564 struct xfs_mount *mp)
565 {
566 return xfs_calc_itruncate_reservation(mp, true);
567 }
568
xfs_calc_pptr_link_overhead(void)569 static inline unsigned int xfs_calc_pptr_link_overhead(void)
570 {
571 return sizeof(struct xfs_attri_log_format) +
572 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
573 xlog_calc_iovec_len(MAXNAMELEN - 1);
574 }
xfs_calc_pptr_unlink_overhead(void)575 static inline unsigned int xfs_calc_pptr_unlink_overhead(void)
576 {
577 return sizeof(struct xfs_attri_log_format) +
578 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
579 xlog_calc_iovec_len(MAXNAMELEN - 1);
580 }
xfs_calc_pptr_replace_overhead(void)581 static inline unsigned int xfs_calc_pptr_replace_overhead(void)
582 {
583 return sizeof(struct xfs_attri_log_format) +
584 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
585 xlog_calc_iovec_len(MAXNAMELEN - 1) +
586 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
587 xlog_calc_iovec_len(MAXNAMELEN - 1);
588 }
589
590 /*
591 * In renaming a files we can modify:
592 * the five inodes involved: 5 * inode size
593 * the two directory btrees: 2 * (max depth + v2) * dir block size
594 * the two directory bmap btrees: 2 * max depth * block size
595 * And the bmap_finish transaction can free dir and bmap blocks (two sets
596 * of bmap blocks) giving (t2):
597 * the agf for the ags in which the blocks live: 3 * sector size
598 * the agfl for the ags in which the blocks live: 3 * sector size
599 * the superblock for the free block count: sector size
600 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
601 * If parent pointers are enabled (t3), then each transaction in the chain
602 * must be capable of setting or removing the extended attribute
603 * containing the parent information. It must also be able to handle
604 * the three xattr intent items that track the progress of the parent
605 * pointer update.
606 */
607 STATIC uint
xfs_calc_rename_reservation(struct xfs_mount * mp)608 xfs_calc_rename_reservation(
609 struct xfs_mount *mp)
610 {
611 unsigned int overhead = XFS_DQUOT_LOGRES;
612 struct xfs_trans_resv *resp = M_RES(mp);
613 unsigned int t1, t2, t3 = 0;
614
615 t1 = xfs_calc_inode_res(mp, 5) +
616 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
617 XFS_FSB_TO_B(mp, 1));
618
619 t2 = xfs_calc_finish_efi_reservation(mp, 3);
620
621 if (xfs_has_parent(mp)) {
622 unsigned int rename_overhead, exchange_overhead;
623
624 t3 = max(resp->tr_attrsetm.tr_logres,
625 resp->tr_attrrm.tr_logres);
626
627 /*
628 * For a standard rename, the three xattr intent log items
629 * are (1) replacing the pptr for the source file; (2)
630 * removing the pptr on the dest file; and (3) adding a
631 * pptr for the whiteout file in the src dir.
632 *
633 * For an RENAME_EXCHANGE, there are two xattr intent
634 * items to replace the pptr for both src and dest
635 * files. Link counts don't change and there is no
636 * whiteout.
637 *
638 * In the worst case we can end up relogging all log
639 * intent items to allow the log tail to move ahead, so
640 * they become overhead added to each transaction in a
641 * processing chain.
642 */
643 rename_overhead = xfs_calc_pptr_replace_overhead() +
644 xfs_calc_pptr_unlink_overhead() +
645 xfs_calc_pptr_link_overhead();
646 exchange_overhead = 2 * xfs_calc_pptr_replace_overhead();
647
648 overhead += max(rename_overhead, exchange_overhead);
649 }
650
651 return overhead + max3(t1, t2, t3);
652 }
653
654 static inline unsigned int
xfs_rename_log_count(struct xfs_mount * mp,struct xfs_trans_resv * resp)655 xfs_rename_log_count(
656 struct xfs_mount *mp,
657 struct xfs_trans_resv *resp)
658 {
659 /* One for the rename, one more for freeing blocks */
660 unsigned int ret = XFS_RENAME_LOG_COUNT;
661
662 /*
663 * Pre-reserve enough log reservation to handle the transaction
664 * rolling needed to remove or add one parent pointer.
665 */
666 if (xfs_has_parent(mp))
667 ret += max(resp->tr_attrsetm.tr_logcount,
668 resp->tr_attrrm.tr_logcount);
669
670 return ret;
671 }
672
673 /*
674 * For removing an inode from unlinked list at first, we can modify:
675 * the agi hash list and counters: sector size
676 * the on disk inode before ours in the agi hash list: inode cluster size
677 * the on disk inode in the agi hash list: inode cluster size
678 */
679 STATIC uint
xfs_calc_iunlink_remove_reservation(struct xfs_mount * mp)680 xfs_calc_iunlink_remove_reservation(
681 struct xfs_mount *mp)
682 {
683 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
684 2 * M_IGEO(mp)->inode_cluster_size;
685 }
686
687 static inline unsigned int
xfs_link_log_count(struct xfs_mount * mp,struct xfs_trans_resv * resp)688 xfs_link_log_count(
689 struct xfs_mount *mp,
690 struct xfs_trans_resv *resp)
691 {
692 unsigned int ret = XFS_LINK_LOG_COUNT;
693
694 /*
695 * Pre-reserve enough log reservation to handle the transaction
696 * rolling needed to add one parent pointer.
697 */
698 if (xfs_has_parent(mp))
699 ret += resp->tr_attrsetm.tr_logcount;
700
701 return ret;
702 }
703
704 /*
705 * For creating a link to an inode:
706 * the parent directory inode: inode size
707 * the linked inode: inode size
708 * the directory btree could split: (max depth + v2) * dir block size
709 * the directory bmap btree could join or split: (max depth + v2) * blocksize
710 * And the bmap_finish transaction can free some bmap blocks giving:
711 * the agf for the ag in which the blocks live: sector size
712 * the agfl for the ag in which the blocks live: sector size
713 * the superblock for the free block count: sector size
714 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
715 */
716 STATIC uint
xfs_calc_link_reservation(struct xfs_mount * mp)717 xfs_calc_link_reservation(
718 struct xfs_mount *mp)
719 {
720 unsigned int overhead = XFS_DQUOT_LOGRES;
721 struct xfs_trans_resv *resp = M_RES(mp);
722 unsigned int t1, t2, t3 = 0;
723
724 overhead += xfs_calc_iunlink_remove_reservation(mp);
725 t1 = xfs_calc_inode_res(mp, 2) +
726 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
727 t2 = xfs_calc_finish_efi_reservation(mp, 1);
728
729 if (xfs_has_parent(mp)) {
730 t3 = resp->tr_attrsetm.tr_logres;
731 overhead += xfs_calc_pptr_link_overhead();
732 }
733
734 return overhead + max3(t1, t2, t3);
735 }
736
737 /*
738 * For adding an inode to unlinked list we can modify:
739 * the agi hash list: sector size
740 * the on disk inode: inode cluster size
741 */
742 STATIC uint
xfs_calc_iunlink_add_reservation(xfs_mount_t * mp)743 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
744 {
745 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
746 M_IGEO(mp)->inode_cluster_size;
747 }
748
749 static inline unsigned int
xfs_remove_log_count(struct xfs_mount * mp,struct xfs_trans_resv * resp)750 xfs_remove_log_count(
751 struct xfs_mount *mp,
752 struct xfs_trans_resv *resp)
753 {
754 unsigned int ret = XFS_REMOVE_LOG_COUNT;
755
756 /*
757 * Pre-reserve enough log reservation to handle the transaction
758 * rolling needed to add one parent pointer.
759 */
760 if (xfs_has_parent(mp))
761 ret += resp->tr_attrrm.tr_logcount;
762
763 return ret;
764 }
765
766 /*
767 * For removing a directory entry we can modify:
768 * the parent directory inode: inode size
769 * the removed inode: inode size
770 * the directory btree could join: (max depth + v2) * dir block size
771 * the directory bmap btree could join or split: (max depth + v2) * blocksize
772 * And the bmap_finish transaction can free the dir and bmap blocks giving:
773 * the agf for the ag in which the blocks live: 2 * sector size
774 * the agfl for the ag in which the blocks live: 2 * sector size
775 * the superblock for the free block count: sector size
776 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
777 */
778 STATIC uint
xfs_calc_remove_reservation(struct xfs_mount * mp)779 xfs_calc_remove_reservation(
780 struct xfs_mount *mp)
781 {
782 unsigned int overhead = XFS_DQUOT_LOGRES;
783 struct xfs_trans_resv *resp = M_RES(mp);
784 unsigned int t1, t2, t3 = 0;
785
786 overhead += xfs_calc_iunlink_add_reservation(mp);
787
788 t1 = xfs_calc_inode_res(mp, 2) +
789 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
790 t2 = xfs_calc_finish_efi_reservation(mp, 2);
791
792 if (xfs_has_parent(mp)) {
793 t3 = resp->tr_attrrm.tr_logres;
794 overhead += xfs_calc_pptr_unlink_overhead();
795 }
796
797 return overhead + max3(t1, t2, t3);
798 }
799
800 /*
801 * For create, break it in to the two cases that the transaction
802 * covers. We start with the modify case - allocation done by modification
803 * of the state of existing inodes - and the allocation case.
804 */
805
806 /*
807 * For create we can modify:
808 * the parent directory inode: inode size
809 * the new inode: inode size
810 * the inode btree entry: block size
811 * the superblock for the nlink flag: sector size
812 * the directory btree: (max depth + v2) * dir block size
813 * the directory inode's bmap btree: (max depth + v2) * block size
814 * the finobt (record modification and allocation btrees)
815 */
816 STATIC uint
xfs_calc_create_resv_modify(struct xfs_mount * mp)817 xfs_calc_create_resv_modify(
818 struct xfs_mount *mp)
819 {
820 return xfs_calc_inode_res(mp, 2) +
821 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
822 (uint)XFS_FSB_TO_B(mp, 1) +
823 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
824 xfs_calc_finobt_res(mp);
825 }
826
827 /*
828 * For icreate we can allocate some inodes giving:
829 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
830 * the superblock for the nlink flag: sector size
831 * the inode chunk (allocation, optional init)
832 * the inobt (record insertion)
833 * the finobt (optional, record insertion)
834 */
835 STATIC uint
xfs_calc_icreate_resv_alloc(struct xfs_mount * mp)836 xfs_calc_icreate_resv_alloc(
837 struct xfs_mount *mp)
838 {
839 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
840 mp->m_sb.sb_sectsize +
841 xfs_calc_inode_chunk_res(mp, _ALLOC) +
842 xfs_calc_inobt_res(mp) +
843 xfs_calc_finobt_res(mp);
844 }
845
846 static inline unsigned int
xfs_icreate_log_count(struct xfs_mount * mp,struct xfs_trans_resv * resp)847 xfs_icreate_log_count(
848 struct xfs_mount *mp,
849 struct xfs_trans_resv *resp)
850 {
851 unsigned int ret = XFS_CREATE_LOG_COUNT;
852
853 /*
854 * Pre-reserve enough log reservation to handle the transaction
855 * rolling needed to add one parent pointer.
856 */
857 if (xfs_has_parent(mp))
858 ret += resp->tr_attrsetm.tr_logcount;
859
860 return ret;
861 }
862
863 STATIC uint
xfs_calc_icreate_reservation(struct xfs_mount * mp)864 xfs_calc_icreate_reservation(
865 struct xfs_mount *mp)
866 {
867 struct xfs_trans_resv *resp = M_RES(mp);
868 unsigned int overhead = XFS_DQUOT_LOGRES;
869 unsigned int t1, t2, t3 = 0;
870
871 t1 = xfs_calc_icreate_resv_alloc(mp);
872 t2 = xfs_calc_create_resv_modify(mp);
873
874 if (xfs_has_parent(mp)) {
875 t3 = resp->tr_attrsetm.tr_logres;
876 overhead += xfs_calc_pptr_link_overhead();
877 }
878
879 return overhead + max3(t1, t2, t3);
880 }
881
882 STATIC uint
xfs_calc_create_tmpfile_reservation(struct xfs_mount * mp)883 xfs_calc_create_tmpfile_reservation(
884 struct xfs_mount *mp)
885 {
886 uint res = XFS_DQUOT_LOGRES;
887
888 res += xfs_calc_icreate_resv_alloc(mp);
889 return res + xfs_calc_iunlink_add_reservation(mp);
890 }
891
892 static inline unsigned int
xfs_mkdir_log_count(struct xfs_mount * mp,struct xfs_trans_resv * resp)893 xfs_mkdir_log_count(
894 struct xfs_mount *mp,
895 struct xfs_trans_resv *resp)
896 {
897 unsigned int ret = XFS_MKDIR_LOG_COUNT;
898
899 /*
900 * Pre-reserve enough log reservation to handle the transaction
901 * rolling needed to add one parent pointer.
902 */
903 if (xfs_has_parent(mp))
904 ret += resp->tr_attrsetm.tr_logcount;
905
906 return ret;
907 }
908
909 /*
910 * Making a new directory is the same as creating a new file.
911 */
912 STATIC uint
xfs_calc_mkdir_reservation(struct xfs_mount * mp)913 xfs_calc_mkdir_reservation(
914 struct xfs_mount *mp)
915 {
916 return xfs_calc_icreate_reservation(mp);
917 }
918
919 static inline unsigned int
xfs_symlink_log_count(struct xfs_mount * mp,struct xfs_trans_resv * resp)920 xfs_symlink_log_count(
921 struct xfs_mount *mp,
922 struct xfs_trans_resv *resp)
923 {
924 unsigned int ret = XFS_SYMLINK_LOG_COUNT;
925
926 /*
927 * Pre-reserve enough log reservation to handle the transaction
928 * rolling needed to add one parent pointer.
929 */
930 if (xfs_has_parent(mp))
931 ret += resp->tr_attrsetm.tr_logcount;
932
933 return ret;
934 }
935
936 /*
937 * Making a new symplink is the same as creating a new file, but
938 * with the added blocks for remote symlink data which can be up to 1kB in
939 * length (XFS_SYMLINK_MAXLEN).
940 */
941 STATIC uint
xfs_calc_symlink_reservation(struct xfs_mount * mp)942 xfs_calc_symlink_reservation(
943 struct xfs_mount *mp)
944 {
945 return xfs_calc_icreate_reservation(mp) +
946 xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
947 }
948
949 /*
950 * In freeing an inode we can modify:
951 * the inode being freed: inode size
952 * the super block free inode counter, AGF and AGFL: sector size
953 * the on disk inode (agi unlinked list removal)
954 * the inode chunk (invalidated, headers only)
955 * the inode btree
956 * the finobt (record insertion, removal or modification)
957 *
958 * Note that the inode chunk res. includes an allocfree res. for freeing of the
959 * inode chunk. This is technically extraneous because the inode chunk free is
960 * deferred (it occurs after a transaction roll). Include the extra reservation
961 * anyways since we've had reports of ifree transaction overruns due to too many
962 * agfl fixups during inode chunk frees.
963 */
964 STATIC uint
xfs_calc_ifree_reservation(struct xfs_mount * mp)965 xfs_calc_ifree_reservation(
966 struct xfs_mount *mp)
967 {
968 return XFS_DQUOT_LOGRES +
969 xfs_calc_inode_res(mp, 1) +
970 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
971 xfs_calc_iunlink_remove_reservation(mp) +
972 xfs_calc_inode_chunk_res(mp, _FREE) +
973 xfs_calc_inobt_res(mp) +
974 xfs_calc_finobt_res(mp);
975 }
976
977 /*
978 * When only changing the inode we log the inode and possibly the superblock
979 * We also add a bit of slop for the transaction stuff.
980 */
981 STATIC uint
xfs_calc_ichange_reservation(struct xfs_mount * mp)982 xfs_calc_ichange_reservation(
983 struct xfs_mount *mp)
984 {
985 return XFS_DQUOT_LOGRES +
986 xfs_calc_inode_res(mp, 1) +
987 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
988
989 }
990
991 /*
992 * Growing the data section of the filesystem.
993 * superblock
994 * agi and agf
995 * allocation btrees
996 */
997 STATIC uint
xfs_calc_growdata_reservation(struct xfs_mount * mp)998 xfs_calc_growdata_reservation(
999 struct xfs_mount *mp)
1000 {
1001 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
1002 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1003 XFS_FSB_TO_B(mp, 1));
1004 }
1005
1006 /*
1007 * Growing the rt section of the filesystem.
1008 * In the first set of transactions (ALLOC) we allocate space to the
1009 * bitmap or summary files.
1010 * superblock: sector size
1011 * agf of the ag from which the extent is allocated: sector size
1012 * bmap btree for bitmap/summary inode: max depth * blocksize
1013 * bitmap/summary inode: inode size
1014 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
1015 */
1016 STATIC uint
xfs_calc_growrtalloc_reservation(struct xfs_mount * mp)1017 xfs_calc_growrtalloc_reservation(
1018 struct xfs_mount *mp)
1019 {
1020 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
1021 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
1022 XFS_FSB_TO_B(mp, 1)) +
1023 xfs_calc_inode_res(mp, 1) +
1024 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1025 XFS_FSB_TO_B(mp, 1));
1026 }
1027
1028 /*
1029 * Growing the rt section of the filesystem.
1030 * In the second set of transactions (ZERO) we zero the new metadata blocks.
1031 * one bitmap/summary block: blocksize
1032 */
1033 STATIC uint
xfs_calc_growrtzero_reservation(struct xfs_mount * mp)1034 xfs_calc_growrtzero_reservation(
1035 struct xfs_mount *mp)
1036 {
1037 return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
1038 }
1039
1040 /*
1041 * Growing the rt section of the filesystem.
1042 * In the third set of transactions (FREE) we update metadata without
1043 * allocating any new blocks.
1044 * superblock: sector size
1045 * bitmap inode: inode size
1046 * summary inode: inode size
1047 * one bitmap block: blocksize
1048 * summary blocks: new summary size
1049 */
1050 STATIC uint
xfs_calc_growrtfree_reservation(struct xfs_mount * mp)1051 xfs_calc_growrtfree_reservation(
1052 struct xfs_mount *mp)
1053 {
1054 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1055 xfs_calc_inode_res(mp, 2) +
1056 xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
1057 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks));
1058 }
1059
1060 /*
1061 * Logging the inode modification timestamp on a synchronous write.
1062 * inode
1063 */
1064 STATIC uint
xfs_calc_swrite_reservation(struct xfs_mount * mp)1065 xfs_calc_swrite_reservation(
1066 struct xfs_mount *mp)
1067 {
1068 return xfs_calc_inode_res(mp, 1);
1069 }
1070
1071 /*
1072 * Logging the inode mode bits when writing a setuid/setgid file
1073 * inode
1074 */
1075 STATIC uint
xfs_calc_writeid_reservation(struct xfs_mount * mp)1076 xfs_calc_writeid_reservation(
1077 struct xfs_mount *mp)
1078 {
1079 return xfs_calc_inode_res(mp, 1);
1080 }
1081
1082 /*
1083 * Converting the inode from non-attributed to attributed.
1084 * the inode being converted: inode size
1085 * agf block and superblock (for block allocation)
1086 * the new block (directory sized)
1087 * bmap blocks for the new directory block
1088 * allocation btrees
1089 */
1090 STATIC uint
xfs_calc_addafork_reservation(struct xfs_mount * mp)1091 xfs_calc_addafork_reservation(
1092 struct xfs_mount *mp)
1093 {
1094 return XFS_DQUOT_LOGRES +
1095 xfs_calc_inode_res(mp, 1) +
1096 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
1097 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
1098 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
1099 XFS_FSB_TO_B(mp, 1)) +
1100 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1101 XFS_FSB_TO_B(mp, 1));
1102 }
1103
1104 /*
1105 * Removing the attribute fork of a file
1106 * the inode being truncated: inode size
1107 * the inode's bmap btree: max depth * block size
1108 * And the bmap_finish transaction can free the blocks and bmap blocks:
1109 * the agf for each of the ags: 4 * sector size
1110 * the agfl for each of the ags: 4 * sector size
1111 * the super block to reflect the freed blocks: sector size
1112 * worst case split in allocation btrees per extent assuming 4 extents:
1113 * 4 exts * 2 trees * (2 * max depth - 1) * block size
1114 */
1115 STATIC uint
xfs_calc_attrinval_reservation(struct xfs_mount * mp)1116 xfs_calc_attrinval_reservation(
1117 struct xfs_mount *mp)
1118 {
1119 return max((xfs_calc_inode_res(mp, 1) +
1120 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
1121 XFS_FSB_TO_B(mp, 1))),
1122 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
1123 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4),
1124 XFS_FSB_TO_B(mp, 1))));
1125 }
1126
1127 /*
1128 * Setting an attribute at mount time.
1129 * the inode getting the attribute
1130 * the superblock for allocations
1131 * the agfs extents are allocated from
1132 * the attribute btree * max depth
1133 * the inode allocation btree
1134 * Since attribute transaction space is dependent on the size of the attribute,
1135 * the calculation is done partially at mount time and partially at runtime(see
1136 * below).
1137 */
1138 STATIC uint
xfs_calc_attrsetm_reservation(struct xfs_mount * mp)1139 xfs_calc_attrsetm_reservation(
1140 struct xfs_mount *mp)
1141 {
1142 return XFS_DQUOT_LOGRES +
1143 xfs_calc_inode_res(mp, 1) +
1144 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1145 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
1146 }
1147
1148 /*
1149 * Setting an attribute at runtime, transaction space unit per block.
1150 * the superblock for allocations: sector size
1151 * the inode bmap btree could join or split: max depth * block size
1152 * Since the runtime attribute transaction space is dependent on the total
1153 * blocks needed for the 1st bmap, here we calculate out the space unit for
1154 * one block so that the caller could figure out the total space according
1155 * to the attibute extent length in blocks by:
1156 * ext * M_RES(mp)->tr_attrsetrt.tr_logres
1157 */
1158 STATIC uint
xfs_calc_attrsetrt_reservation(struct xfs_mount * mp)1159 xfs_calc_attrsetrt_reservation(
1160 struct xfs_mount *mp)
1161 {
1162 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1163 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
1164 XFS_FSB_TO_B(mp, 1));
1165 }
1166
1167 /*
1168 * Removing an attribute.
1169 * the inode: inode size
1170 * the attribute btree could join: max depth * block size
1171 * the inode bmap btree could join or split: max depth * block size
1172 * And the bmap_finish transaction can free the attr blocks freed giving:
1173 * the agf for the ag in which the blocks live: 2 * sector size
1174 * the agfl for the ag in which the blocks live: 2 * sector size
1175 * the superblock for the free block count: sector size
1176 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
1177 */
1178 STATIC uint
xfs_calc_attrrm_reservation(struct xfs_mount * mp)1179 xfs_calc_attrrm_reservation(
1180 struct xfs_mount *mp)
1181 {
1182 return XFS_DQUOT_LOGRES +
1183 max((xfs_calc_inode_res(mp, 1) +
1184 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
1185 XFS_FSB_TO_B(mp, 1)) +
1186 (uint)XFS_FSB_TO_B(mp,
1187 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
1188 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
1189 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
1190 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
1191 XFS_FSB_TO_B(mp, 1))));
1192 }
1193
1194 /*
1195 * Clearing a bad agino number in an agi hash bucket.
1196 */
1197 STATIC uint
xfs_calc_clear_agi_bucket_reservation(struct xfs_mount * mp)1198 xfs_calc_clear_agi_bucket_reservation(
1199 struct xfs_mount *mp)
1200 {
1201 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
1202 }
1203
1204 /*
1205 * Adjusting quota limits.
1206 * the disk quota buffer: sizeof(struct xfs_disk_dquot)
1207 */
1208 STATIC uint
xfs_calc_qm_setqlim_reservation(void)1209 xfs_calc_qm_setqlim_reservation(void)
1210 {
1211 return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
1212 }
1213
1214 /*
1215 * Allocating quota on disk if needed.
1216 * the write transaction log space for quota file extent allocation
1217 * the unit of quota allocation: one system block size
1218 */
1219 STATIC uint
xfs_calc_qm_dqalloc_reservation(struct xfs_mount * mp,bool for_minlogsize)1220 xfs_calc_qm_dqalloc_reservation(
1221 struct xfs_mount *mp,
1222 bool for_minlogsize)
1223 {
1224 return xfs_calc_write_reservation(mp, for_minlogsize) +
1225 xfs_calc_buf_res(1,
1226 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
1227 }
1228
1229 unsigned int
xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount * mp)1230 xfs_calc_qm_dqalloc_reservation_minlogsize(
1231 struct xfs_mount *mp)
1232 {
1233 return xfs_calc_qm_dqalloc_reservation(mp, true);
1234 }
1235
1236 /*
1237 * Syncing the incore super block changes to disk.
1238 * the super block to reflect the changes: sector size
1239 */
1240 STATIC uint
xfs_calc_sb_reservation(struct xfs_mount * mp)1241 xfs_calc_sb_reservation(
1242 struct xfs_mount *mp)
1243 {
1244 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
1245 }
1246
1247 /*
1248 * Namespace reservations.
1249 *
1250 * These get tricky when parent pointers are enabled as we have attribute
1251 * modifications occurring from within these transactions. Rather than confuse
1252 * each of these reservation calculations with the conditional attribute
1253 * reservations, add them here in a clear and concise manner. This requires that
1254 * the attribute reservations have already been calculated.
1255 *
1256 * Note that we only include the static attribute reservation here; the runtime
1257 * reservation will have to be modified by the size of the attributes being
1258 * added/removed/modified. See the comments on the attribute reservation
1259 * calculations for more details.
1260 */
1261 STATIC void
xfs_calc_namespace_reservations(struct xfs_mount * mp,struct xfs_trans_resv * resp)1262 xfs_calc_namespace_reservations(
1263 struct xfs_mount *mp,
1264 struct xfs_trans_resv *resp)
1265 {
1266 ASSERT(resp->tr_attrsetm.tr_logres > 0);
1267
1268 resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
1269 resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp);
1270 resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1271
1272 resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
1273 resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp);
1274 resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1275
1276 resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
1277 resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp);
1278 resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1279
1280 resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
1281 resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp);
1282 resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1283
1284 resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
1285 resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp);
1286 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1287
1288 resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
1289 resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp);
1290 resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1291 }
1292
1293 STATIC void
xfs_calc_default_atomic_ioend_reservation(struct xfs_mount * mp,struct xfs_trans_resv * resp)1294 xfs_calc_default_atomic_ioend_reservation(
1295 struct xfs_mount *mp,
1296 struct xfs_trans_resv *resp)
1297 {
1298 /* Pick a default that will scale reasonably for the log size. */
1299 resp->tr_atomic_ioend = resp->tr_itruncate;
1300 }
1301
1302 void
xfs_trans_resv_calc(struct xfs_mount * mp,struct xfs_trans_resv * resp)1303 xfs_trans_resv_calc(
1304 struct xfs_mount *mp,
1305 struct xfs_trans_resv *resp)
1306 {
1307 int logcount_adj = 0;
1308
1309 /*
1310 * The following transactions are logged in physical format and
1311 * require a permanent reservation on space.
1312 */
1313 resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
1314 resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
1315 resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1316
1317 resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
1318 resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
1319 resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1320
1321 resp->tr_create_tmpfile.tr_logres =
1322 xfs_calc_create_tmpfile_reservation(mp);
1323 resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
1324 resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1325
1326 resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
1327 resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
1328 resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1329
1330 resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
1331 resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
1332 resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1333
1334 resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
1335 resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
1336 resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1337
1338 resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
1339 resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
1340 resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1341
1342 resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
1343 resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
1344 resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1345
1346 resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
1347 resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
1348 resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1349
1350 resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp,
1351 false);
1352 resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
1353 resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1354
1355 xfs_calc_namespace_reservations(mp, resp);
1356
1357 /*
1358 * The following transactions are logged in logical format with
1359 * a default log count.
1360 */
1361 resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation();
1362 resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
1363
1364 resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
1365 resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
1366
1367 /* growdata requires permanent res; it can free space to the last AG */
1368 resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
1369 resp->tr_growdata.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
1370 resp->tr_growdata.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1371
1372 /* The following transaction are logged in logical format */
1373 resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
1374 resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
1375 resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
1376 resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
1377 resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
1378 resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
1379 resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
1380
1381 /*
1382 * Add one logcount for BUI items that appear with rmap or reflink,
1383 * one logcount for refcount intent items, and one logcount for rmap
1384 * intent items.
1385 */
1386 if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp))
1387 logcount_adj++;
1388 if (xfs_has_reflink(mp))
1389 logcount_adj++;
1390 if (xfs_has_rmapbt(mp))
1391 logcount_adj++;
1392
1393 resp->tr_itruncate.tr_logcount += logcount_adj;
1394 resp->tr_write.tr_logcount += logcount_adj;
1395 resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
1396
1397 /*
1398 * Now that we've finished computing the static reservations, we can
1399 * compute the dynamic reservation for atomic writes.
1400 */
1401 xfs_calc_default_atomic_ioend_reservation(mp, resp);
1402 }
1403
1404 /*
1405 * Return the per-extent and fixed transaction reservation sizes needed to
1406 * complete an atomic write.
1407 */
1408 STATIC unsigned int
xfs_calc_atomic_write_ioend_geometry(struct xfs_mount * mp,unsigned int * step_size)1409 xfs_calc_atomic_write_ioend_geometry(
1410 struct xfs_mount *mp,
1411 unsigned int *step_size)
1412 {
1413 const unsigned int efi = xfs_efi_log_space(1);
1414 const unsigned int efd = xfs_efd_log_space(1);
1415 const unsigned int rui = xfs_rui_log_space(1);
1416 const unsigned int rud = xfs_rud_log_space();
1417 const unsigned int cui = xfs_cui_log_space(1);
1418 const unsigned int cud = xfs_cud_log_space();
1419 const unsigned int bui = xfs_bui_log_space(1);
1420 const unsigned int bud = xfs_bud_log_space();
1421
1422 /*
1423 * Maximum overhead to complete an atomic write ioend in software:
1424 * remove data fork extent + remove cow fork extent + map extent into
1425 * data fork.
1426 *
1427 * tx0: Creates a BUI and a CUI and that's all it needs.
1428 *
1429 * tx1: Roll to finish the BUI. Need space for the BUD, an RUI, and
1430 * enough space to relog the CUI (== CUI + CUD).
1431 *
1432 * tx2: Roll again to finish the RUI. Need space for the RUD and space
1433 * to relog the CUI.
1434 *
1435 * tx3: Roll again, need space for the CUD and possibly a new EFI.
1436 *
1437 * tx4: Roll again, need space for an EFD.
1438 *
1439 * If the extent referenced by the pair of BUI/CUI items is not the one
1440 * being currently processed, then we need to reserve space to relog
1441 * both items.
1442 */
1443 const unsigned int tx0 = bui + cui;
1444 const unsigned int tx1 = bud + rui + cui + cud;
1445 const unsigned int tx2 = rud + cui + cud;
1446 const unsigned int tx3 = cud + efi;
1447 const unsigned int tx4 = efd;
1448 const unsigned int relog = bui + bud + cui + cud;
1449
1450 const unsigned int per_intent = max(max3(tx0, tx1, tx2),
1451 max3(tx3, tx4, relog));
1452
1453 /* Overhead to finish one step of each intent item type */
1454 const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
1455 const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
1456 const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1);
1457 const unsigned int f4 = xfs_calc_finish_bui_reservation(mp, 1);
1458
1459 /* We only finish one item per transaction in a chain */
1460 *step_size = max(f4, max3(f1, f2, f3));
1461
1462 return per_intent;
1463 }
1464
1465 /*
1466 * Compute the maximum size (in fsblocks) of atomic writes that we can complete
1467 * given the existing log reservations.
1468 */
1469 xfs_extlen_t
xfs_calc_max_atomic_write_fsblocks(struct xfs_mount * mp)1470 xfs_calc_max_atomic_write_fsblocks(
1471 struct xfs_mount *mp)
1472 {
1473 const struct xfs_trans_res *resv = &M_RES(mp)->tr_atomic_ioend;
1474 unsigned int per_intent = 0;
1475 unsigned int step_size = 0;
1476 unsigned int ret = 0;
1477
1478 if (resv->tr_logres > 0) {
1479 per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
1480 &step_size);
1481
1482 if (resv->tr_logres >= step_size)
1483 ret = (resv->tr_logres - step_size) / per_intent;
1484 }
1485
1486 trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size,
1487 resv->tr_logres, ret);
1488
1489 return ret;
1490 }
1491
1492 /*
1493 * Compute the log blocks and transaction reservation needed to complete an
1494 * atomic write of a given number of blocks. Worst case, each block requires
1495 * separate handling. A return value of 0 means something went wrong.
1496 */
1497 xfs_extlen_t
xfs_calc_atomic_write_log_geometry(struct xfs_mount * mp,xfs_extlen_t blockcount,unsigned int * new_logres)1498 xfs_calc_atomic_write_log_geometry(
1499 struct xfs_mount *mp,
1500 xfs_extlen_t blockcount,
1501 unsigned int *new_logres)
1502 {
1503 struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend;
1504 uint old_logres = curr_res->tr_logres;
1505 unsigned int per_intent, step_size;
1506 unsigned int logres;
1507 xfs_extlen_t min_logblocks;
1508
1509 ASSERT(blockcount > 0);
1510
1511 xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1512
1513 per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
1514
1515 /* Check for overflows */
1516 if (check_mul_overflow(blockcount, per_intent, &logres) ||
1517 check_add_overflow(logres, step_size, &logres))
1518 return 0;
1519
1520 curr_res->tr_logres = logres;
1521 min_logblocks = xfs_log_calc_minimum_size(mp);
1522 curr_res->tr_logres = old_logres;
1523
1524 trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
1525 blockcount, min_logblocks, logres);
1526
1527 *new_logres = logres;
1528 return min_logblocks;
1529 }
1530
1531 /*
1532 * Compute the transaction reservation needed to complete an out of place
1533 * atomic write of a given number of blocks.
1534 */
1535 int
xfs_calc_atomic_write_reservation(struct xfs_mount * mp,xfs_extlen_t blockcount)1536 xfs_calc_atomic_write_reservation(
1537 struct xfs_mount *mp,
1538 xfs_extlen_t blockcount)
1539 {
1540 unsigned int new_logres;
1541 xfs_extlen_t min_logblocks;
1542
1543 /*
1544 * If the caller doesn't ask for a specific atomic write size, then
1545 * use the defaults.
1546 */
1547 if (blockcount == 0) {
1548 xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1549 return 0;
1550 }
1551
1552 min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
1553 &new_logres);
1554 if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
1555 return -EINVAL;
1556
1557 M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
1558 return 0;
1559 }
1560