1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
4  * Copyright (C) 2010 Red Hat, Inc.
5  * All Rights Reserved.
6  */
7 #include "xfs.h"
8 #include "xfs_fs.h"
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_da_format.h"
15 #include "xfs_da_btree.h"
16 #include "xfs_inode.h"
17 #include "xfs_bmap_btree.h"
18 #include "xfs_quota.h"
19 #include "xfs_trans.h"
20 #include "xfs_qm.h"
21 #include "xfs_trans_space.h"
22 #include "xfs_rtbitmap.h"
23 #include "xfs_attr_item.h"
24 #include "xfs_log.h"
25 #include "xfs_defer.h"
26 #include "xfs_bmap_item.h"
27 #include "xfs_extfree_item.h"
28 #include "xfs_rmap_item.h"
29 #include "xfs_refcount_item.h"
30 #include "xfs_trace.h"
31 
32 #define _ALLOC	true
33 #define _FREE	false
34 
35 /*
36  * A buffer has a format structure overhead in the log in addition
37  * to the data, so we need to take this into account when reserving
38  * space in a transaction for a buffer.  Round the space required up
39  * to a multiple of 128 bytes so that we don't change the historical
40  * reservation that has been used for this overhead.
41  */
42 STATIC uint
43 xfs_buf_log_overhead(void)
44 {
45 	return round_up(sizeof(struct xlog_op_header) +
46 			sizeof(struct xfs_buf_log_format), 128);
47 }
48 
49 /*
50  * Calculate out transaction log reservation per item in bytes.
51  *
52  * The nbufs argument is used to indicate the number of items that
53  * will be changed in a transaction.  size is used to tell how many
54  * bytes should be reserved per item.
55  */
56 STATIC uint
57 xfs_calc_buf_res(
58 	uint		nbufs,
59 	uint		size)
60 {
61 	return nbufs * (size + xfs_buf_log_overhead());
62 }
63 
64 /*
65  * Per-extent log reservation for the btree changes involved in freeing or
66  * allocating an extent.  In classic XFS there were two trees that will be
67  * modified (bnobt + cntbt).  With rmap enabled, there are three trees
68  * (rmapbt).  The number of blocks reserved is based on the formula:
69  *
70  * num trees * ((2 blocks/level * max depth) - 1)
71  *
72  * Keep in mind that max depth is calculated separately for each type of tree.
73  */
74 uint
75 xfs_allocfree_block_count(
76 	struct xfs_mount *mp,
77 	uint		num_ops)
78 {
79 	uint		blocks;
80 
81 	blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1);
82 	if (xfs_has_rmapbt(mp))
83 		blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
84 
85 	return blocks;
86 }
87 
88 /*
89  * Per-extent log reservation for refcount btree changes.  These are never done
90  * in the same transaction as an allocation or a free, so we compute them
91  * separately.
92  */
93 static unsigned int
94 xfs_refcountbt_block_count(
95 	struct xfs_mount	*mp,
96 	unsigned int		num_ops)
97 {
98 	return num_ops * (2 * mp->m_refc_maxlevels - 1);
99 }
100 
101 static unsigned int
102 xfs_rtrefcountbt_block_count(
103 	struct xfs_mount	*mp,
104 	unsigned int		num_ops)
105 {
106 	return num_ops * (2 * mp->m_rtrefc_maxlevels - 1);
107 }
108 
109 /*
110  * Logging inodes is really tricksy. They are logged in memory format,
111  * which means that what we write into the log doesn't directly translate into
112  * the amount of space they use on disk.
113  *
114  * Case in point - btree format forks in memory format use more space than the
115  * on-disk format. In memory, the buffer contains a normal btree block header so
116  * the btree code can treat it as though it is just another generic buffer.
117  * However, when we write it to the inode fork, we don't write all of this
118  * header as it isn't needed. e.g. the root is only ever in the inode, so
119  * there's no need for sibling pointers which would waste 16 bytes of space.
120  *
121  * Hence when we have an inode with a maximally sized btree format fork, then
122  * amount of information we actually log is greater than the size of the inode
123  * on disk. Hence we need an inode reservation function that calculates all this
124  * correctly. So, we log:
125  *
126  * - 4 log op headers for object
127  *	- for the ilf, the inode core and 2 forks
128  * - inode log format object
129  * - the inode core
130  * - two inode forks containing bmap btree root blocks.
131  *	- the btree data contained by both forks will fit into the inode size,
132  *	  hence when combined with the inode core above, we have a total of the
133  *	  actual inode size.
134  *	- the BMBT headers need to be accounted separately, as they are
135  *	  additional to the records and pointers that fit inside the inode
136  *	  forks.
137  */
138 STATIC uint
139 xfs_calc_inode_res(
140 	struct xfs_mount	*mp,
141 	uint			ninodes)
142 {
143 	return ninodes *
144 		(4 * sizeof(struct xlog_op_header) +
145 		 sizeof(struct xfs_inode_log_format) +
146 		 mp->m_sb.sb_inodesize +
147 		 2 * xfs_bmbt_block_len(mp));
148 }
149 
150 /*
151  * Inode btree record insertion/removal modifies the inode btree and free space
152  * btrees (since the inobt does not use the agfl). This requires the following
153  * reservation:
154  *
155  * the inode btree: max depth * blocksize
156  * the allocation btrees: 2 trees * (max depth - 1) * block size
157  *
158  * The caller must account for SB and AG header modifications, etc.
159  */
160 STATIC uint
161 xfs_calc_inobt_res(
162 	struct xfs_mount	*mp)
163 {
164 	return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
165 			XFS_FSB_TO_B(mp, 1)) +
166 				xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
167 			XFS_FSB_TO_B(mp, 1));
168 }
169 
170 /*
171  * The free inode btree is a conditional feature. The behavior differs slightly
172  * from that of the traditional inode btree in that the finobt tracks records
173  * for inode chunks with at least one free inode. A record can be removed from
174  * the tree during individual inode allocation. Therefore the finobt
175  * reservation is unconditional for both the inode chunk allocation and
176  * individual inode allocation (modify) cases.
177  *
178  * Behavior aside, the reservation for finobt modification is equivalent to the
179  * traditional inobt: cover a full finobt shape change plus block allocation.
180  */
181 STATIC uint
182 xfs_calc_finobt_res(
183 	struct xfs_mount	*mp)
184 {
185 	if (!xfs_has_finobt(mp))
186 		return 0;
187 
188 	return xfs_calc_inobt_res(mp);
189 }
190 
191 /*
192  * Calculate the reservation required to allocate or free an inode chunk. This
193  * includes:
194  *
195  * the allocation btrees: 2 trees * (max depth - 1) * block size
196  * the inode chunk: m_ino_geo.ialloc_blks * N
197  *
198  * The size N of the inode chunk reservation depends on whether it is for
199  * allocation or free and which type of create transaction is in use. An inode
200  * chunk free always invalidates the buffers and only requires reservation for
201  * headers (N == 0). An inode chunk allocation requires a chunk sized
202  * reservation on v4 and older superblocks to initialize the chunk. No chunk
203  * reservation is required for allocation on v5 supers, which use ordered
204  * buffers to initialize.
205  */
206 STATIC uint
207 xfs_calc_inode_chunk_res(
208 	struct xfs_mount	*mp,
209 	bool			alloc)
210 {
211 	uint			res, size = 0;
212 
213 	res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
214 			       XFS_FSB_TO_B(mp, 1));
215 	if (alloc) {
216 		/* icreate tx uses ordered buffers */
217 		if (xfs_has_v3inodes(mp))
218 			return res;
219 		size = XFS_FSB_TO_B(mp, 1);
220 	}
221 
222 	res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
223 	return res;
224 }
225 
226 /*
227  * Per-extent log reservation for the btree changes involved in freeing or
228  * allocating a realtime extent.  We have to be able to log as many rtbitmap
229  * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime
230  * extents, as well as the realtime summary block (t1).  Realtime rmap btree
231  * operations happen in a second transaction, so factor in a couple of rtrmapbt
232  * splits (t2).
233  */
234 static unsigned int
235 xfs_rtalloc_block_count(
236 	struct xfs_mount	*mp,
237 	unsigned int		num_ops)
238 {
239 	unsigned int		rtbmp_blocks;
240 	xfs_rtxlen_t		rtxlen;
241 	unsigned int		t1, t2 = 0;
242 
243 	rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
244 	rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen);
245 	t1 = (rtbmp_blocks + 1) * num_ops;
246 
247 	if (xfs_has_rmapbt(mp))
248 		t2 = num_ops * (2 * mp->m_rtrmap_maxlevels - 1);
249 
250 	return max(t1, t2);
251 }
252 
253 /*
254  * Various log reservation values.
255  *
256  * These are based on the size of the file system block because that is what
257  * most transactions manipulate.  Each adds in an additional 128 bytes per
258  * item logged to try to account for the overhead of the transaction mechanism.
259  *
260  * Note:  Most of the reservations underestimate the number of allocation
261  * groups into which they could free extents in the xfs_defer_finish() call.
262  * This is because the number in the worst case is quite high and quite
263  * unusual.  In order to fix this we need to change xfs_defer_finish() to free
264  * extents in only a single AG at a time.  This will require changes to the
265  * EFI code as well, however, so that the EFI for the extents not freed is
266  * logged again in each transaction.  See SGI PV #261917.
267  *
268  * Reservation functions here avoid a huge stack in xfs_trans_init due to
269  * register overflow from temporaries in the calculations.
270  */
271 
272 /*
273  * Finishing a data device refcount updates (t1):
274  *    the agfs of the ags containing the blocks: nr_ops * sector size
275  *    the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
276  */
277 inline unsigned int
278 xfs_calc_finish_cui_reservation(
279 	struct xfs_mount	*mp,
280 	unsigned int		nr_ops)
281 {
282 	if (!xfs_has_reflink(mp))
283 		return 0;
284 
285 	return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
286 	       xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops),
287 			       mp->m_sb.sb_blocksize);
288 }
289 
290 /*
291  * Realtime refcount updates (t2);
292  *    the rt refcount inode
293  *    the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
294  */
295 inline unsigned int
296 xfs_calc_finish_rt_cui_reservation(
297 	struct xfs_mount	*mp,
298 	unsigned int		nr_ops)
299 {
300 	if (!xfs_has_rtreflink(mp))
301 		return 0;
302 
303 	return xfs_calc_inode_res(mp, 1) +
304 	       xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
305 				     mp->m_sb.sb_blocksize);
306 }
307 
308 /*
309  * Compute the log reservation required to handle the refcount update
310  * transaction.  Refcount updates are always done via deferred log items.
311  *
312  * This is calculated as the max of:
313  * Data device refcount updates (t1):
314  *    the agfs of the ags containing the blocks: nr_ops * sector size
315  *    the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
316  * Realtime refcount updates (t2);
317  *    the rt refcount inode
318  *    the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
319  */
320 static unsigned int
321 xfs_calc_refcountbt_reservation(
322 	struct xfs_mount	*mp,
323 	unsigned int		nr_ops)
324 {
325 	unsigned int		t1, t2;
326 
327 	t1 = xfs_calc_finish_cui_reservation(mp, nr_ops);
328 	t2 = xfs_calc_finish_rt_cui_reservation(mp, nr_ops);
329 
330 	return max(t1, t2);
331 }
332 
333 /*
334  * In a write transaction we can allocate a maximum of 2
335  * extents.  This gives (t1):
336  *    the inode getting the new extents: inode size
337  *    the inode's bmap btree: max depth * block size
338  *    the agfs of the ags from which the extents are allocated: 2 * sector
339  *    the superblock free block counter: sector size
340  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
341  * Or, if we're writing to a realtime file (t2):
342  *    the inode getting the new extents: inode size
343  *    the inode's bmap btree: max depth * block size
344  *    the agfs of the ags from which the extents are allocated: 2 * sector
345  *    the superblock free block counter: sector size
346  *    the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
347  *    the realtime summary: 1 block
348  *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
349  * And the bmap_finish transaction can free bmap blocks in a join (t3):
350  *    the agfs of the ags containing the blocks: 2 * sector size
351  *    the agfls of the ags containing the blocks: 2 * sector size
352  *    the super block free block counter: sector size
353  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
354  * And any refcount updates that happen in a separate transaction (t4).
355  */
356 STATIC uint
357 xfs_calc_write_reservation(
358 	struct xfs_mount	*mp,
359 	bool			for_minlogsize)
360 {
361 	unsigned int		t1, t2, t3, t4;
362 	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
363 
364 	t1 = xfs_calc_inode_res(mp, 1) +
365 	     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
366 	     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
367 	     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
368 
369 	if (xfs_has_realtime(mp)) {
370 		t2 = xfs_calc_inode_res(mp, 1) +
371 		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
372 				     blksz) +
373 		     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
374 		     xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) +
375 		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz);
376 	} else {
377 		t2 = 0;
378 	}
379 
380 	t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
381 	     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
382 
383 	/*
384 	 * In the early days of reflink, we included enough reservation to log
385 	 * two refcountbt splits for each transaction.  The codebase runs
386 	 * refcountbt updates in separate transactions now, so to compute the
387 	 * minimum log size, add the refcountbtree splits back to t1 and t3 and
388 	 * do not account them separately as t4.  Reflink did not support
389 	 * realtime when the reservations were established, so no adjustment to
390 	 * t2 is needed.
391 	 */
392 	if (for_minlogsize) {
393 		unsigned int	adj = 0;
394 
395 		if (xfs_has_reflink(mp))
396 			adj = xfs_calc_buf_res(
397 					xfs_refcountbt_block_count(mp, 2),
398 					blksz);
399 		t1 += adj;
400 		t3 += adj;
401 		return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
402 	}
403 
404 	t4 = xfs_calc_refcountbt_reservation(mp, 1);
405 	return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
406 }
407 
408 unsigned int
409 xfs_calc_write_reservation_minlogsize(
410 	struct xfs_mount	*mp)
411 {
412 	return xfs_calc_write_reservation(mp, true);
413 }
414 
415 /*
416  * Finishing an EFI can free the blocks and bmap blocks (t2):
417  *    the agf for each of the ags: nr * sector size
418  *    the agfl for each of the ags: nr * sector size
419  *    the super block to reflect the freed blocks: sector size
420  *    worst case split in allocation btrees per extent assuming nr extents:
421  *		nr exts * 2 trees * (2 * max depth - 1) * block size
422  */
423 inline unsigned int
424 xfs_calc_finish_efi_reservation(
425 	struct xfs_mount	*mp,
426 	unsigned int		nr)
427 {
428 	return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
429 	       xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
430 			       mp->m_sb.sb_blocksize);
431 }
432 
433 /*
434  * Or, if it's a realtime file (t3):
435  *    the agf for each of the ags: 2 * sector size
436  *    the agfl for each of the ags: 2 * sector size
437  *    the super block to reflect the freed blocks: sector size
438  *    the realtime bitmap:
439  *		2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
440  *    the realtime summary: 2 exts * 1 block
441  *    worst case split in allocation btrees per extent assuming 2 extents:
442  *		2 exts * 2 trees * (2 * max depth - 1) * block size
443  */
444 inline unsigned int
445 xfs_calc_finish_rt_efi_reservation(
446 	struct xfs_mount	*mp,
447 	unsigned int		nr)
448 {
449 	if (!xfs_has_realtime(mp))
450 		return 0;
451 
452 	return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
453 	       xfs_calc_buf_res(xfs_rtalloc_block_count(mp, nr),
454 			       mp->m_sb.sb_blocksize) +
455 	       xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
456 			       mp->m_sb.sb_blocksize);
457 }
458 
459 /*
460  * Finishing an RUI is the same as an EFI.  We can split the rmap btree twice
461  * on each end of the record, and that can cause the AGFL to be refilled or
462  * emptied out.
463  */
464 inline unsigned int
465 xfs_calc_finish_rui_reservation(
466 	struct xfs_mount	*mp,
467 	unsigned int		nr)
468 {
469 	if (!xfs_has_rmapbt(mp))
470 		return 0;
471 	return xfs_calc_finish_efi_reservation(mp, nr);
472 }
473 
474 /*
475  * Finishing an RUI is the same as an EFI.  We can split the rmap btree twice
476  * on each end of the record, and that can cause the AGFL to be refilled or
477  * emptied out.
478  */
479 inline unsigned int
480 xfs_calc_finish_rt_rui_reservation(
481 	struct xfs_mount	*mp,
482 	unsigned int		nr)
483 {
484 	if (!xfs_has_rtrmapbt(mp))
485 		return 0;
486 	return xfs_calc_finish_rt_efi_reservation(mp, nr);
487 }
488 
489 /*
490  * In finishing a BUI, we can modify:
491  *    the inode being truncated: inode size
492  *    dquots
493  *    the inode's bmap btree: (max depth + 1) * block size
494  */
495 inline unsigned int
496 xfs_calc_finish_bui_reservation(
497 	struct xfs_mount	*mp,
498 	unsigned int		nr)
499 {
500 	return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES +
501 	       xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
502 			       mp->m_sb.sb_blocksize);
503 }
504 
505 /*
506  * In truncating a file we free up to two extents at once.  We can modify (t1):
507  *    the inode being truncated: inode size
508  *    the inode's bmap btree: (max depth + 1) * block size
509  * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
510  *    the agf for each of the ags: 4 * sector size
511  *    the agfl for each of the ags: 4 * sector size
512  *    the super block to reflect the freed blocks: sector size
513  *    worst case split in allocation btrees per extent assuming 4 extents:
514  *		4 exts * 2 trees * (2 * max depth - 1) * block size
515  * Or, if it's a realtime file (t3):
516  *    the agf for each of the ags: 2 * sector size
517  *    the agfl for each of the ags: 2 * sector size
518  *    the super block to reflect the freed blocks: sector size
519  *    the realtime bitmap:
520  *		2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
521  *    the realtime summary: 2 exts * 1 block
522  *    worst case split in allocation btrees per extent assuming 2 extents:
523  *		2 exts * 2 trees * (2 * max depth - 1) * block size
524  * And any refcount updates that happen in a separate transaction (t4).
525  */
526 STATIC uint
527 xfs_calc_itruncate_reservation(
528 	struct xfs_mount	*mp,
529 	bool			for_minlogsize)
530 {
531 	unsigned int		t1, t2, t3, t4;
532 	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
533 
534 	t1 = xfs_calc_inode_res(mp, 1) +
535 	     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
536 
537 	t2 = xfs_calc_finish_efi_reservation(mp, 4);
538 	t3 = xfs_calc_finish_rt_efi_reservation(mp, 2);
539 
540 	/*
541 	 * In the early days of reflink, we included enough reservation to log
542 	 * four refcountbt splits in the same transaction as bnobt/cntbt
543 	 * updates.  The codebase runs refcountbt updates in separate
544 	 * transactions now, so to compute the minimum log size, add the
545 	 * refcount btree splits back here and do not compute them separately
546 	 * as t4.  Reflink did not support realtime when the reservations were
547 	 * established, so do not adjust t3.
548 	 */
549 	if (for_minlogsize) {
550 		if (xfs_has_reflink(mp))
551 			t2 += xfs_calc_buf_res(
552 					xfs_refcountbt_block_count(mp, 4),
553 					blksz);
554 
555 		return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
556 	}
557 
558 	t4 = xfs_calc_refcountbt_reservation(mp, 2);
559 	return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
560 }
561 
562 unsigned int
563 xfs_calc_itruncate_reservation_minlogsize(
564 	struct xfs_mount	*mp)
565 {
566 	return xfs_calc_itruncate_reservation(mp, true);
567 }
568 
569 static inline unsigned int xfs_calc_pptr_link_overhead(void)
570 {
571 	return sizeof(struct xfs_attri_log_format) +
572 			xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
573 			xlog_calc_iovec_len(MAXNAMELEN - 1);
574 }
575 static inline unsigned int xfs_calc_pptr_unlink_overhead(void)
576 {
577 	return sizeof(struct xfs_attri_log_format) +
578 			xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
579 			xlog_calc_iovec_len(MAXNAMELEN - 1);
580 }
581 static inline unsigned int xfs_calc_pptr_replace_overhead(void)
582 {
583 	return sizeof(struct xfs_attri_log_format) +
584 			xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
585 			xlog_calc_iovec_len(MAXNAMELEN - 1) +
586 			xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
587 			xlog_calc_iovec_len(MAXNAMELEN - 1);
588 }
589 
590 /*
591  * In renaming a files we can modify:
592  *    the five inodes involved: 5 * inode size
593  *    the two directory btrees: 2 * (max depth + v2) * dir block size
594  *    the two directory bmap btrees: 2 * max depth * block size
595  * And the bmap_finish transaction can free dir and bmap blocks (two sets
596  *	of bmap blocks) giving (t2):
597  *    the agf for the ags in which the blocks live: 3 * sector size
598  *    the agfl for the ags in which the blocks live: 3 * sector size
599  *    the superblock for the free block count: sector size
600  *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
601  * If parent pointers are enabled (t3), then each transaction in the chain
602  *    must be capable of setting or removing the extended attribute
603  *    containing the parent information.  It must also be able to handle
604  *    the three xattr intent items that track the progress of the parent
605  *    pointer update.
606  */
607 STATIC uint
608 xfs_calc_rename_reservation(
609 	struct xfs_mount	*mp)
610 {
611 	unsigned int		overhead = XFS_DQUOT_LOGRES;
612 	struct xfs_trans_resv	*resp = M_RES(mp);
613 	unsigned int		t1, t2, t3 = 0;
614 
615 	t1 = xfs_calc_inode_res(mp, 5) +
616 	     xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
617 			XFS_FSB_TO_B(mp, 1));
618 
619 	t2 = xfs_calc_finish_efi_reservation(mp, 3);
620 
621 	if (xfs_has_parent(mp)) {
622 		unsigned int	rename_overhead, exchange_overhead;
623 
624 		t3 = max(resp->tr_attrsetm.tr_logres,
625 			 resp->tr_attrrm.tr_logres);
626 
627 		/*
628 		 * For a standard rename, the three xattr intent log items
629 		 * are (1) replacing the pptr for the source file; (2)
630 		 * removing the pptr on the dest file; and (3) adding a
631 		 * pptr for the whiteout file in the src dir.
632 		 *
633 		 * For an RENAME_EXCHANGE, there are two xattr intent
634 		 * items to replace the pptr for both src and dest
635 		 * files.  Link counts don't change and there is no
636 		 * whiteout.
637 		 *
638 		 * In the worst case we can end up relogging all log
639 		 * intent items to allow the log tail to move ahead, so
640 		 * they become overhead added to each transaction in a
641 		 * processing chain.
642 		 */
643 		rename_overhead = xfs_calc_pptr_replace_overhead() +
644 				  xfs_calc_pptr_unlink_overhead() +
645 				  xfs_calc_pptr_link_overhead();
646 		exchange_overhead = 2 * xfs_calc_pptr_replace_overhead();
647 
648 		overhead += max(rename_overhead, exchange_overhead);
649 	}
650 
651 	return overhead + max3(t1, t2, t3);
652 }
653 
654 static inline unsigned int
655 xfs_rename_log_count(
656 	struct xfs_mount	*mp,
657 	struct xfs_trans_resv	*resp)
658 {
659 	/* One for the rename, one more for freeing blocks */
660 	unsigned int		ret = XFS_RENAME_LOG_COUNT;
661 
662 	/*
663 	 * Pre-reserve enough log reservation to handle the transaction
664 	 * rolling needed to remove or add one parent pointer.
665 	 */
666 	if (xfs_has_parent(mp))
667 		ret += max(resp->tr_attrsetm.tr_logcount,
668 			   resp->tr_attrrm.tr_logcount);
669 
670 	return ret;
671 }
672 
673 /*
674  * For removing an inode from unlinked list at first, we can modify:
675  *    the agi hash list and counters: sector size
676  *    the on disk inode before ours in the agi hash list: inode cluster size
677  *    the on disk inode in the agi hash list: inode cluster size
678  */
679 STATIC uint
680 xfs_calc_iunlink_remove_reservation(
681 	struct xfs_mount        *mp)
682 {
683 	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
684 	       2 * M_IGEO(mp)->inode_cluster_size;
685 }
686 
687 static inline unsigned int
688 xfs_link_log_count(
689 	struct xfs_mount	*mp,
690 	struct xfs_trans_resv	*resp)
691 {
692 	unsigned int		ret = XFS_LINK_LOG_COUNT;
693 
694 	/*
695 	 * Pre-reserve enough log reservation to handle the transaction
696 	 * rolling needed to add one parent pointer.
697 	 */
698 	if (xfs_has_parent(mp))
699 		ret += resp->tr_attrsetm.tr_logcount;
700 
701 	return ret;
702 }
703 
704 /*
705  * For creating a link to an inode:
706  *    the parent directory inode: inode size
707  *    the linked inode: inode size
708  *    the directory btree could split: (max depth + v2) * dir block size
709  *    the directory bmap btree could join or split: (max depth + v2) * blocksize
710  * And the bmap_finish transaction can free some bmap blocks giving:
711  *    the agf for the ag in which the blocks live: sector size
712  *    the agfl for the ag in which the blocks live: sector size
713  *    the superblock for the free block count: sector size
714  *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
715  */
716 STATIC uint
717 xfs_calc_link_reservation(
718 	struct xfs_mount	*mp)
719 {
720 	unsigned int		overhead = XFS_DQUOT_LOGRES;
721 	struct xfs_trans_resv	*resp = M_RES(mp);
722 	unsigned int		t1, t2, t3 = 0;
723 
724 	overhead += xfs_calc_iunlink_remove_reservation(mp);
725 	t1 = xfs_calc_inode_res(mp, 2) +
726 	     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
727 	t2 = xfs_calc_finish_efi_reservation(mp, 1);
728 
729 	if (xfs_has_parent(mp)) {
730 		t3 = resp->tr_attrsetm.tr_logres;
731 		overhead += xfs_calc_pptr_link_overhead();
732 	}
733 
734 	return overhead + max3(t1, t2, t3);
735 }
736 
737 /*
738  * For adding an inode to unlinked list we can modify:
739  *    the agi hash list: sector size
740  *    the on disk inode: inode cluster size
741  */
742 STATIC uint
743 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
744 {
745 	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
746 			M_IGEO(mp)->inode_cluster_size;
747 }
748 
749 static inline unsigned int
750 xfs_remove_log_count(
751 	struct xfs_mount	*mp,
752 	struct xfs_trans_resv	*resp)
753 {
754 	unsigned int		ret = XFS_REMOVE_LOG_COUNT;
755 
756 	/*
757 	 * Pre-reserve enough log reservation to handle the transaction
758 	 * rolling needed to add one parent pointer.
759 	 */
760 	if (xfs_has_parent(mp))
761 		ret += resp->tr_attrrm.tr_logcount;
762 
763 	return ret;
764 }
765 
766 /*
767  * For removing a directory entry we can modify:
768  *    the parent directory inode: inode size
769  *    the removed inode: inode size
770  *    the directory btree could join: (max depth + v2) * dir block size
771  *    the directory bmap btree could join or split: (max depth + v2) * blocksize
772  * And the bmap_finish transaction can free the dir and bmap blocks giving:
773  *    the agf for the ag in which the blocks live: 2 * sector size
774  *    the agfl for the ag in which the blocks live: 2 * sector size
775  *    the superblock for the free block count: sector size
776  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
777  */
778 STATIC uint
779 xfs_calc_remove_reservation(
780 	struct xfs_mount	*mp)
781 {
782 	unsigned int            overhead = XFS_DQUOT_LOGRES;
783 	struct xfs_trans_resv   *resp = M_RES(mp);
784 	unsigned int            t1, t2, t3 = 0;
785 
786 	overhead += xfs_calc_iunlink_add_reservation(mp);
787 
788 	t1 = xfs_calc_inode_res(mp, 2) +
789 	     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
790 	t2 = xfs_calc_finish_efi_reservation(mp, 2);
791 
792 	if (xfs_has_parent(mp)) {
793 		t3 = resp->tr_attrrm.tr_logres;
794 		overhead += xfs_calc_pptr_unlink_overhead();
795 	}
796 
797 	return overhead + max3(t1, t2, t3);
798 }
799 
800 /*
801  * For create, break it in to the two cases that the transaction
802  * covers. We start with the modify case - allocation done by modification
803  * of the state of existing inodes - and the allocation case.
804  */
805 
806 /*
807  * For create we can modify:
808  *    the parent directory inode: inode size
809  *    the new inode: inode size
810  *    the inode btree entry: block size
811  *    the superblock for the nlink flag: sector size
812  *    the directory btree: (max depth + v2) * dir block size
813  *    the directory inode's bmap btree: (max depth + v2) * block size
814  *    the finobt (record modification and allocation btrees)
815  */
816 STATIC uint
817 xfs_calc_create_resv_modify(
818 	struct xfs_mount	*mp)
819 {
820 	return xfs_calc_inode_res(mp, 2) +
821 		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
822 		(uint)XFS_FSB_TO_B(mp, 1) +
823 		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
824 		xfs_calc_finobt_res(mp);
825 }
826 
827 /*
828  * For icreate we can allocate some inodes giving:
829  *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
830  *    the superblock for the nlink flag: sector size
831  *    the inode chunk (allocation, optional init)
832  *    the inobt (record insertion)
833  *    the finobt (optional, record insertion)
834  */
835 STATIC uint
836 xfs_calc_icreate_resv_alloc(
837 	struct xfs_mount	*mp)
838 {
839 	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
840 		mp->m_sb.sb_sectsize +
841 		xfs_calc_inode_chunk_res(mp, _ALLOC) +
842 		xfs_calc_inobt_res(mp) +
843 		xfs_calc_finobt_res(mp);
844 }
845 
846 static inline unsigned int
847 xfs_icreate_log_count(
848 	struct xfs_mount	*mp,
849 	struct xfs_trans_resv	*resp)
850 {
851 	unsigned int		ret = XFS_CREATE_LOG_COUNT;
852 
853 	/*
854 	 * Pre-reserve enough log reservation to handle the transaction
855 	 * rolling needed to add one parent pointer.
856 	 */
857 	if (xfs_has_parent(mp))
858 		ret += resp->tr_attrsetm.tr_logcount;
859 
860 	return ret;
861 }
862 
863 STATIC uint
864 xfs_calc_icreate_reservation(
865 	struct xfs_mount	*mp)
866 {
867 	struct xfs_trans_resv	*resp = M_RES(mp);
868 	unsigned int		overhead = XFS_DQUOT_LOGRES;
869 	unsigned int		t1, t2, t3 = 0;
870 
871 	t1 = xfs_calc_icreate_resv_alloc(mp);
872 	t2 = xfs_calc_create_resv_modify(mp);
873 
874 	if (xfs_has_parent(mp)) {
875 		t3 = resp->tr_attrsetm.tr_logres;
876 		overhead += xfs_calc_pptr_link_overhead();
877 	}
878 
879 	return overhead + max3(t1, t2, t3);
880 }
881 
882 STATIC uint
883 xfs_calc_create_tmpfile_reservation(
884 	struct xfs_mount        *mp)
885 {
886 	uint	res = XFS_DQUOT_LOGRES;
887 
888 	res += xfs_calc_icreate_resv_alloc(mp);
889 	return res + xfs_calc_iunlink_add_reservation(mp);
890 }
891 
892 static inline unsigned int
893 xfs_mkdir_log_count(
894 	struct xfs_mount	*mp,
895 	struct xfs_trans_resv	*resp)
896 {
897 	unsigned int		ret = XFS_MKDIR_LOG_COUNT;
898 
899 	/*
900 	 * Pre-reserve enough log reservation to handle the transaction
901 	 * rolling needed to add one parent pointer.
902 	 */
903 	if (xfs_has_parent(mp))
904 		ret += resp->tr_attrsetm.tr_logcount;
905 
906 	return ret;
907 }
908 
909 /*
910  * Making a new directory is the same as creating a new file.
911  */
912 STATIC uint
913 xfs_calc_mkdir_reservation(
914 	struct xfs_mount	*mp)
915 {
916 	return xfs_calc_icreate_reservation(mp);
917 }
918 
919 static inline unsigned int
920 xfs_symlink_log_count(
921 	struct xfs_mount	*mp,
922 	struct xfs_trans_resv	*resp)
923 {
924 	unsigned int		ret = XFS_SYMLINK_LOG_COUNT;
925 
926 	/*
927 	 * Pre-reserve enough log reservation to handle the transaction
928 	 * rolling needed to add one parent pointer.
929 	 */
930 	if (xfs_has_parent(mp))
931 		ret += resp->tr_attrsetm.tr_logcount;
932 
933 	return ret;
934 }
935 
936 /*
937  * Making a new symplink is the same as creating a new file, but
938  * with the added blocks for remote symlink data which can be up to 1kB in
939  * length (XFS_SYMLINK_MAXLEN).
940  */
941 STATIC uint
942 xfs_calc_symlink_reservation(
943 	struct xfs_mount	*mp)
944 {
945 	return xfs_calc_icreate_reservation(mp) +
946 	       xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
947 }
948 
949 /*
950  * In freeing an inode we can modify:
951  *    the inode being freed: inode size
952  *    the super block free inode counter, AGF and AGFL: sector size
953  *    the on disk inode (agi unlinked list removal)
954  *    the inode chunk (invalidated, headers only)
955  *    the inode btree
956  *    the finobt (record insertion, removal or modification)
957  *
958  * Note that the inode chunk res. includes an allocfree res. for freeing of the
959  * inode chunk. This is technically extraneous because the inode chunk free is
960  * deferred (it occurs after a transaction roll). Include the extra reservation
961  * anyways since we've had reports of ifree transaction overruns due to too many
962  * agfl fixups during inode chunk frees.
963  */
964 STATIC uint
965 xfs_calc_ifree_reservation(
966 	struct xfs_mount	*mp)
967 {
968 	return XFS_DQUOT_LOGRES +
969 		xfs_calc_inode_res(mp, 1) +
970 		xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
971 		xfs_calc_iunlink_remove_reservation(mp) +
972 		xfs_calc_inode_chunk_res(mp, _FREE) +
973 		xfs_calc_inobt_res(mp) +
974 		xfs_calc_finobt_res(mp);
975 }
976 
977 /*
978  * When only changing the inode we log the inode and possibly the superblock
979  * We also add a bit of slop for the transaction stuff.
980  */
981 STATIC uint
982 xfs_calc_ichange_reservation(
983 	struct xfs_mount	*mp)
984 {
985 	return XFS_DQUOT_LOGRES +
986 		xfs_calc_inode_res(mp, 1) +
987 		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
988 
989 }
990 
991 /*
992  * Growing the data section of the filesystem.
993  *	superblock
994  *	agi and agf
995  *	allocation btrees
996  */
997 STATIC uint
998 xfs_calc_growdata_reservation(
999 	struct xfs_mount	*mp)
1000 {
1001 	return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
1002 		xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1003 				 XFS_FSB_TO_B(mp, 1));
1004 }
1005 
1006 /*
1007  * Growing the rt section of the filesystem.
1008  * In the first set of transactions (ALLOC) we allocate space to the
1009  * bitmap or summary files.
1010  *	superblock: sector size
1011  *	agf of the ag from which the extent is allocated: sector size
1012  *	bmap btree for bitmap/summary inode: max depth * blocksize
1013  *	bitmap/summary inode: inode size
1014  *	allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
1015  */
1016 STATIC uint
1017 xfs_calc_growrtalloc_reservation(
1018 	struct xfs_mount	*mp)
1019 {
1020 	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
1021 		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
1022 				 XFS_FSB_TO_B(mp, 1)) +
1023 		xfs_calc_inode_res(mp, 1) +
1024 		xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1025 				 XFS_FSB_TO_B(mp, 1));
1026 }
1027 
1028 /*
1029  * Growing the rt section of the filesystem.
1030  * In the second set of transactions (ZERO) we zero the new metadata blocks.
1031  *	one bitmap/summary block: blocksize
1032  */
1033 STATIC uint
1034 xfs_calc_growrtzero_reservation(
1035 	struct xfs_mount	*mp)
1036 {
1037 	return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
1038 }
1039 
1040 /*
1041  * Growing the rt section of the filesystem.
1042  * In the third set of transactions (FREE) we update metadata without
1043  * allocating any new blocks.
1044  *	superblock: sector size
1045  *	bitmap inode: inode size
1046  *	summary inode: inode size
1047  *	one bitmap block: blocksize
1048  *	summary blocks: new summary size
1049  */
1050 STATIC uint
1051 xfs_calc_growrtfree_reservation(
1052 	struct xfs_mount	*mp)
1053 {
1054 	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1055 		xfs_calc_inode_res(mp, 2) +
1056 		xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
1057 		xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks));
1058 }
1059 
1060 /*
1061  * Logging the inode modification timestamp on a synchronous write.
1062  *	inode
1063  */
1064 STATIC uint
1065 xfs_calc_swrite_reservation(
1066 	struct xfs_mount	*mp)
1067 {
1068 	return xfs_calc_inode_res(mp, 1);
1069 }
1070 
1071 /*
1072  * Logging the inode mode bits when writing a setuid/setgid file
1073  *	inode
1074  */
1075 STATIC uint
1076 xfs_calc_writeid_reservation(
1077 	struct xfs_mount	*mp)
1078 {
1079 	return xfs_calc_inode_res(mp, 1);
1080 }
1081 
1082 /*
1083  * Converting the inode from non-attributed to attributed.
1084  *	the inode being converted: inode size
1085  *	agf block and superblock (for block allocation)
1086  *	the new block (directory sized)
1087  *	bmap blocks for the new directory block
1088  *	allocation btrees
1089  */
1090 STATIC uint
1091 xfs_calc_addafork_reservation(
1092 	struct xfs_mount	*mp)
1093 {
1094 	return XFS_DQUOT_LOGRES +
1095 		xfs_calc_inode_res(mp, 1) +
1096 		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
1097 		xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
1098 		xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
1099 				 XFS_FSB_TO_B(mp, 1)) +
1100 		xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1101 				 XFS_FSB_TO_B(mp, 1));
1102 }
1103 
1104 /*
1105  * Removing the attribute fork of a file
1106  *    the inode being truncated: inode size
1107  *    the inode's bmap btree: max depth * block size
1108  * And the bmap_finish transaction can free the blocks and bmap blocks:
1109  *    the agf for each of the ags: 4 * sector size
1110  *    the agfl for each of the ags: 4 * sector size
1111  *    the super block to reflect the freed blocks: sector size
1112  *    worst case split in allocation btrees per extent assuming 4 extents:
1113  *		4 exts * 2 trees * (2 * max depth - 1) * block size
1114  */
1115 STATIC uint
1116 xfs_calc_attrinval_reservation(
1117 	struct xfs_mount	*mp)
1118 {
1119 	return max((xfs_calc_inode_res(mp, 1) +
1120 		    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
1121 				     XFS_FSB_TO_B(mp, 1))),
1122 		   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
1123 		    xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4),
1124 				     XFS_FSB_TO_B(mp, 1))));
1125 }
1126 
1127 /*
1128  * Setting an attribute at mount time.
1129  *	the inode getting the attribute
1130  *	the superblock for allocations
1131  *	the agfs extents are allocated from
1132  *	the attribute btree * max depth
1133  *	the inode allocation btree
1134  * Since attribute transaction space is dependent on the size of the attribute,
1135  * the calculation is done partially at mount time and partially at runtime(see
1136  * below).
1137  */
1138 STATIC uint
1139 xfs_calc_attrsetm_reservation(
1140 	struct xfs_mount	*mp)
1141 {
1142 	return XFS_DQUOT_LOGRES +
1143 		xfs_calc_inode_res(mp, 1) +
1144 		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1145 		xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
1146 }
1147 
1148 /*
1149  * Setting an attribute at runtime, transaction space unit per block.
1150  * 	the superblock for allocations: sector size
1151  *	the inode bmap btree could join or split: max depth * block size
1152  * Since the runtime attribute transaction space is dependent on the total
1153  * blocks needed for the 1st bmap, here we calculate out the space unit for
1154  * one block so that the caller could figure out the total space according
1155  * to the attibute extent length in blocks by:
1156  *	ext * M_RES(mp)->tr_attrsetrt.tr_logres
1157  */
1158 STATIC uint
1159 xfs_calc_attrsetrt_reservation(
1160 	struct xfs_mount	*mp)
1161 {
1162 	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1163 		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
1164 				 XFS_FSB_TO_B(mp, 1));
1165 }
1166 
1167 /*
1168  * Removing an attribute.
1169  *    the inode: inode size
1170  *    the attribute btree could join: max depth * block size
1171  *    the inode bmap btree could join or split: max depth * block size
1172  * And the bmap_finish transaction can free the attr blocks freed giving:
1173  *    the agf for the ag in which the blocks live: 2 * sector size
1174  *    the agfl for the ag in which the blocks live: 2 * sector size
1175  *    the superblock for the free block count: sector size
1176  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
1177  */
1178 STATIC uint
1179 xfs_calc_attrrm_reservation(
1180 	struct xfs_mount	*mp)
1181 {
1182 	return XFS_DQUOT_LOGRES +
1183 		max((xfs_calc_inode_res(mp, 1) +
1184 		     xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
1185 				      XFS_FSB_TO_B(mp, 1)) +
1186 		     (uint)XFS_FSB_TO_B(mp,
1187 					XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
1188 		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
1189 		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
1190 		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
1191 				      XFS_FSB_TO_B(mp, 1))));
1192 }
1193 
1194 /*
1195  * Clearing a bad agino number in an agi hash bucket.
1196  */
1197 STATIC uint
1198 xfs_calc_clear_agi_bucket_reservation(
1199 	struct xfs_mount	*mp)
1200 {
1201 	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
1202 }
1203 
1204 /*
1205  * Adjusting quota limits.
1206  *    the disk quota buffer: sizeof(struct xfs_disk_dquot)
1207  */
1208 STATIC uint
1209 xfs_calc_qm_setqlim_reservation(void)
1210 {
1211 	return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
1212 }
1213 
1214 /*
1215  * Allocating quota on disk if needed.
1216  *	the write transaction log space for quota file extent allocation
1217  *	the unit of quota allocation: one system block size
1218  */
1219 STATIC uint
1220 xfs_calc_qm_dqalloc_reservation(
1221 	struct xfs_mount	*mp,
1222 	bool			for_minlogsize)
1223 {
1224 	return xfs_calc_write_reservation(mp, for_minlogsize) +
1225 		xfs_calc_buf_res(1,
1226 			XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
1227 }
1228 
1229 unsigned int
1230 xfs_calc_qm_dqalloc_reservation_minlogsize(
1231 	struct xfs_mount	*mp)
1232 {
1233 	return xfs_calc_qm_dqalloc_reservation(mp, true);
1234 }
1235 
1236 /*
1237  * Syncing the incore super block changes to disk.
1238  *     the super block to reflect the changes: sector size
1239  */
1240 STATIC uint
1241 xfs_calc_sb_reservation(
1242 	struct xfs_mount	*mp)
1243 {
1244 	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
1245 }
1246 
1247 /*
1248  * Namespace reservations.
1249  *
1250  * These get tricky when parent pointers are enabled as we have attribute
1251  * modifications occurring from within these transactions. Rather than confuse
1252  * each of these reservation calculations with the conditional attribute
1253  * reservations, add them here in a clear and concise manner. This requires that
1254  * the attribute reservations have already been calculated.
1255  *
1256  * Note that we only include the static attribute reservation here; the runtime
1257  * reservation will have to be modified by the size of the attributes being
1258  * added/removed/modified. See the comments on the attribute reservation
1259  * calculations for more details.
1260  */
1261 STATIC void
1262 xfs_calc_namespace_reservations(
1263 	struct xfs_mount	*mp,
1264 	struct xfs_trans_resv	*resp)
1265 {
1266 	ASSERT(resp->tr_attrsetm.tr_logres > 0);
1267 
1268 	resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
1269 	resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp);
1270 	resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1271 
1272 	resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
1273 	resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp);
1274 	resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1275 
1276 	resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
1277 	resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp);
1278 	resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1279 
1280 	resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
1281 	resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp);
1282 	resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1283 
1284 	resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
1285 	resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp);
1286 	resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1287 
1288 	resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
1289 	resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp);
1290 	resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1291 }
1292 
1293 STATIC void
1294 xfs_calc_default_atomic_ioend_reservation(
1295 	struct xfs_mount	*mp,
1296 	struct xfs_trans_resv	*resp)
1297 {
1298 	/* Pick a default that will scale reasonably for the log size. */
1299 	resp->tr_atomic_ioend = resp->tr_itruncate;
1300 }
1301 
1302 void
1303 xfs_trans_resv_calc(
1304 	struct xfs_mount	*mp,
1305 	struct xfs_trans_resv	*resp)
1306 {
1307 	int			logcount_adj = 0;
1308 
1309 	/*
1310 	 * The following transactions are logged in physical format and
1311 	 * require a permanent reservation on space.
1312 	 */
1313 	resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
1314 	resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
1315 	resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1316 
1317 	resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
1318 	resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
1319 	resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1320 
1321 	resp->tr_create_tmpfile.tr_logres =
1322 			xfs_calc_create_tmpfile_reservation(mp);
1323 	resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
1324 	resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1325 
1326 	resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
1327 	resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
1328 	resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1329 
1330 	resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
1331 	resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
1332 	resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1333 
1334 	resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
1335 	resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
1336 	resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1337 
1338 	resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
1339 	resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
1340 	resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1341 
1342 	resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
1343 	resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
1344 	resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1345 
1346 	resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
1347 	resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
1348 	resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1349 
1350 	resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp,
1351 			false);
1352 	resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
1353 	resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1354 
1355 	xfs_calc_namespace_reservations(mp, resp);
1356 
1357 	/*
1358 	 * The following transactions are logged in logical format with
1359 	 * a default log count.
1360 	 */
1361 	resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation();
1362 	resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
1363 
1364 	resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
1365 	resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
1366 
1367 	/* growdata requires permanent res; it can free space to the last AG */
1368 	resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
1369 	resp->tr_growdata.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
1370 	resp->tr_growdata.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1371 
1372 	/* The following transaction are logged in logical format */
1373 	resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
1374 	resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
1375 	resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
1376 	resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
1377 	resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
1378 	resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
1379 	resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
1380 
1381 	/*
1382 	 * Add one logcount for BUI items that appear with rmap or reflink,
1383 	 * one logcount for refcount intent items, and one logcount for rmap
1384 	 * intent items.
1385 	 */
1386 	if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp))
1387 		logcount_adj++;
1388 	if (xfs_has_reflink(mp))
1389 		logcount_adj++;
1390 	if (xfs_has_rmapbt(mp))
1391 		logcount_adj++;
1392 
1393 	resp->tr_itruncate.tr_logcount += logcount_adj;
1394 	resp->tr_write.tr_logcount += logcount_adj;
1395 	resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
1396 
1397 	/*
1398 	 * Now that we've finished computing the static reservations, we can
1399 	 * compute the dynamic reservation for atomic writes.
1400 	 */
1401 	xfs_calc_default_atomic_ioend_reservation(mp, resp);
1402 }
1403 
1404 /*
1405  * Return the per-extent and fixed transaction reservation sizes needed to
1406  * complete an atomic write.
1407  */
1408 STATIC unsigned int
1409 xfs_calc_atomic_write_ioend_geometry(
1410 	struct xfs_mount	*mp,
1411 	unsigned int		*step_size)
1412 {
1413 	const unsigned int	efi = xfs_efi_log_space(1);
1414 	const unsigned int	efd = xfs_efd_log_space(1);
1415 	const unsigned int	rui = xfs_rui_log_space(1);
1416 	const unsigned int	rud = xfs_rud_log_space();
1417 	const unsigned int	cui = xfs_cui_log_space(1);
1418 	const unsigned int	cud = xfs_cud_log_space();
1419 	const unsigned int	bui = xfs_bui_log_space(1);
1420 	const unsigned int	bud = xfs_bud_log_space();
1421 
1422 	/*
1423 	 * Maximum overhead to complete an atomic write ioend in software:
1424 	 * remove data fork extent + remove cow fork extent + map extent into
1425 	 * data fork.
1426 	 *
1427 	 * tx0: Creates a BUI and a CUI and that's all it needs.
1428 	 *
1429 	 * tx1: Roll to finish the BUI.  Need space for the BUD, an RUI, and
1430 	 * enough space to relog the CUI (== CUI + CUD).
1431 	 *
1432 	 * tx2: Roll again to finish the RUI.  Need space for the RUD and space
1433 	 * to relog the CUI.
1434 	 *
1435 	 * tx3: Roll again, need space for the CUD and possibly a new EFI.
1436 	 *
1437 	 * tx4: Roll again, need space for an EFD.
1438 	 *
1439 	 * If the extent referenced by the pair of BUI/CUI items is not the one
1440 	 * being currently processed, then we need to reserve space to relog
1441 	 * both items.
1442 	 */
1443 	const unsigned int	tx0 = bui + cui;
1444 	const unsigned int	tx1 = bud + rui + cui + cud;
1445 	const unsigned int	tx2 = rud + cui + cud;
1446 	const unsigned int	tx3 = cud + efi;
1447 	const unsigned int	tx4 = efd;
1448 	const unsigned int	relog = bui + bud + cui + cud;
1449 
1450 	const unsigned int	per_intent = max(max3(tx0, tx1, tx2),
1451 						 max3(tx3, tx4, relog));
1452 
1453 	/* Overhead to finish one step of each intent item type */
1454 	const unsigned int	f1 = xfs_calc_finish_efi_reservation(mp, 1);
1455 	const unsigned int	f2 = xfs_calc_finish_rui_reservation(mp, 1);
1456 	const unsigned int	f3 = xfs_calc_finish_cui_reservation(mp, 1);
1457 	const unsigned int	f4 = xfs_calc_finish_bui_reservation(mp, 1);
1458 
1459 	/* We only finish one item per transaction in a chain */
1460 	*step_size = max(f4, max3(f1, f2, f3));
1461 
1462 	return per_intent;
1463 }
1464 
1465 /*
1466  * Compute the maximum size (in fsblocks) of atomic writes that we can complete
1467  * given the existing log reservations.
1468  */
1469 xfs_extlen_t
1470 xfs_calc_max_atomic_write_fsblocks(
1471 	struct xfs_mount		*mp)
1472 {
1473 	const struct xfs_trans_res	*resv = &M_RES(mp)->tr_atomic_ioend;
1474 	unsigned int			per_intent = 0;
1475 	unsigned int			step_size = 0;
1476 	unsigned int			ret = 0;
1477 
1478 	if (resv->tr_logres > 0) {
1479 		per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
1480 				&step_size);
1481 
1482 		if (resv->tr_logres >= step_size)
1483 			ret = (resv->tr_logres - step_size) / per_intent;
1484 	}
1485 
1486 	trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size,
1487 			resv->tr_logres, ret);
1488 
1489 	return ret;
1490 }
1491 
1492 /*
1493  * Compute the log blocks and transaction reservation needed to complete an
1494  * atomic write of a given number of blocks.  Worst case, each block requires
1495  * separate handling.  A return value of 0 means something went wrong.
1496  */
1497 xfs_extlen_t
1498 xfs_calc_atomic_write_log_geometry(
1499 	struct xfs_mount	*mp,
1500 	xfs_extlen_t		blockcount,
1501 	unsigned int		*new_logres)
1502 {
1503 	struct xfs_trans_res	*curr_res = &M_RES(mp)->tr_atomic_ioend;
1504 	uint			old_logres = curr_res->tr_logres;
1505 	unsigned int		per_intent, step_size;
1506 	unsigned int		logres;
1507 	xfs_extlen_t		min_logblocks;
1508 
1509 	ASSERT(blockcount > 0);
1510 
1511 	xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1512 
1513 	per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
1514 
1515 	/* Check for overflows */
1516 	if (check_mul_overflow(blockcount, per_intent, &logres) ||
1517 	    check_add_overflow(logres, step_size, &logres))
1518 		return 0;
1519 
1520 	curr_res->tr_logres = logres;
1521 	min_logblocks = xfs_log_calc_minimum_size(mp);
1522 	curr_res->tr_logres = old_logres;
1523 
1524 	trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
1525 			blockcount, min_logblocks, logres);
1526 
1527 	*new_logres = logres;
1528 	return min_logblocks;
1529 }
1530 
1531 /*
1532  * Compute the transaction reservation needed to complete an out of place
1533  * atomic write of a given number of blocks.
1534  */
1535 int
1536 xfs_calc_atomic_write_reservation(
1537 	struct xfs_mount	*mp,
1538 	xfs_extlen_t		blockcount)
1539 {
1540 	unsigned int		new_logres;
1541 	xfs_extlen_t		min_logblocks;
1542 
1543 	/*
1544 	 * If the caller doesn't ask for a specific atomic write size, then
1545 	 * use the defaults.
1546 	 */
1547 	if (blockcount == 0) {
1548 		xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1549 		return 0;
1550 	}
1551 
1552 	min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
1553 			&new_logres);
1554 	if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
1555 		return -EINVAL;
1556 
1557 	M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
1558 	return 0;
1559 }
1560