1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_inode.h"
17 #include "xfs_trans.h"
18 #include "xfs_alloc.h"
19 #include "xfs_btree.h"
20 #include "xfs_btree_staging.h"
21 #include "xfs_metafile.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rtrmap_btree.h"
24 #include "xfs_trace.h"
25 #include "xfs_cksum.h"
26 #include "xfs_error.h"
27 #include "xfs_extent_busy.h"
28 #include "xfs_rtgroup.h"
29 #include "xfs_bmap.h"
30 #include "xfs_health.h"
31 #include "xfs_buf_mem.h"
32 #include "xfs_btree_mem.h"
33
34 static struct kmem_cache *xfs_rtrmapbt_cur_cache;
35
36 /*
37 * Realtime Reverse Map btree.
38 *
39 * This is a btree used to track the owner(s) of a given extent in the realtime
40 * device. See the comments in xfs_rmap_btree.c for more information.
41 *
42 * This tree is basically the same as the regular rmap btree except that it
43 * is rooted in an inode and does not live in free space.
44 */
45
46 static struct xfs_btree_cur *
xfs_rtrmapbt_dup_cursor(struct xfs_btree_cur * cur)47 xfs_rtrmapbt_dup_cursor(
48 struct xfs_btree_cur *cur)
49 {
50 return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group));
51 }
52
53 STATIC int
xfs_rtrmapbt_get_minrecs(struct xfs_btree_cur * cur,int level)54 xfs_rtrmapbt_get_minrecs(
55 struct xfs_btree_cur *cur,
56 int level)
57 {
58 if (level == cur->bc_nlevels - 1) {
59 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
60
61 return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
62 level == 0) / 2;
63 }
64
65 return cur->bc_mp->m_rtrmap_mnr[level != 0];
66 }
67
68 STATIC int
xfs_rtrmapbt_get_maxrecs(struct xfs_btree_cur * cur,int level)69 xfs_rtrmapbt_get_maxrecs(
70 struct xfs_btree_cur *cur,
71 int level)
72 {
73 if (level == cur->bc_nlevels - 1) {
74 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
75
76 return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
77 level == 0);
78 }
79
80 return cur->bc_mp->m_rtrmap_mxr[level != 0];
81 }
82
83 /* Calculate number of records in the ondisk realtime rmap btree inode root. */
84 unsigned int
xfs_rtrmapbt_droot_maxrecs(unsigned int blocklen,bool leaf)85 xfs_rtrmapbt_droot_maxrecs(
86 unsigned int blocklen,
87 bool leaf)
88 {
89 blocklen -= sizeof(struct xfs_rtrmap_root);
90
91 if (leaf)
92 return blocklen / sizeof(struct xfs_rmap_rec);
93 return blocklen / (2 * sizeof(struct xfs_rmap_key) +
94 sizeof(xfs_rtrmap_ptr_t));
95 }
96
97 /*
98 * Get the maximum records we could store in the on-disk format.
99 *
100 * For non-root nodes this is equivalent to xfs_rtrmapbt_get_maxrecs, but
101 * for the root node this checks the available space in the dinode fork
102 * so that we can resize the in-memory buffer to match it. After a
103 * resize to the maximum size this function returns the same value
104 * as xfs_rtrmapbt_get_maxrecs for the root node, too.
105 */
106 STATIC int
xfs_rtrmapbt_get_dmaxrecs(struct xfs_btree_cur * cur,int level)107 xfs_rtrmapbt_get_dmaxrecs(
108 struct xfs_btree_cur *cur,
109 int level)
110 {
111 if (level != cur->bc_nlevels - 1)
112 return cur->bc_mp->m_rtrmap_mxr[level != 0];
113 return xfs_rtrmapbt_droot_maxrecs(cur->bc_ino.forksize, level == 0);
114 }
115
116 /*
117 * Convert the ondisk record's offset field into the ondisk key's offset field.
118 * Fork and bmbt are significant parts of the rmap record key, but written
119 * status is merely a record attribute.
120 */
ondisk_rec_offset_to_key(const union xfs_btree_rec * rec)121 static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
122 {
123 return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
124 }
125
126 STATIC void
xfs_rtrmapbt_init_key_from_rec(union xfs_btree_key * key,const union xfs_btree_rec * rec)127 xfs_rtrmapbt_init_key_from_rec(
128 union xfs_btree_key *key,
129 const union xfs_btree_rec *rec)
130 {
131 key->rmap.rm_startblock = rec->rmap.rm_startblock;
132 key->rmap.rm_owner = rec->rmap.rm_owner;
133 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
134 }
135
136 STATIC void
xfs_rtrmapbt_init_high_key_from_rec(union xfs_btree_key * key,const union xfs_btree_rec * rec)137 xfs_rtrmapbt_init_high_key_from_rec(
138 union xfs_btree_key *key,
139 const union xfs_btree_rec *rec)
140 {
141 uint64_t off;
142 int adj;
143
144 adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
145
146 key->rmap.rm_startblock = rec->rmap.rm_startblock;
147 be32_add_cpu(&key->rmap.rm_startblock, adj);
148 key->rmap.rm_owner = rec->rmap.rm_owner;
149 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
150 if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
151 XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
152 return;
153 off = be64_to_cpu(key->rmap.rm_offset);
154 off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
155 key->rmap.rm_offset = cpu_to_be64(off);
156 }
157
158 STATIC void
xfs_rtrmapbt_init_rec_from_cur(struct xfs_btree_cur * cur,union xfs_btree_rec * rec)159 xfs_rtrmapbt_init_rec_from_cur(
160 struct xfs_btree_cur *cur,
161 union xfs_btree_rec *rec)
162 {
163 rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
164 rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
165 rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
166 rec->rmap.rm_offset = cpu_to_be64(
167 xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
168 }
169
170 STATIC void
xfs_rtrmapbt_init_ptr_from_cur(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)171 xfs_rtrmapbt_init_ptr_from_cur(
172 struct xfs_btree_cur *cur,
173 union xfs_btree_ptr *ptr)
174 {
175 ptr->l = 0;
176 }
177
178 /*
179 * Mask the appropriate parts of the ondisk key field for a key comparison.
180 * Fork and bmbt are significant parts of the rmap record key, but written
181 * status is merely a record attribute.
182 */
offset_keymask(uint64_t offset)183 static inline uint64_t offset_keymask(uint64_t offset)
184 {
185 return offset & ~XFS_RMAP_OFF_UNWRITTEN;
186 }
187
188 STATIC int
xfs_rtrmapbt_cmp_key_with_cur(struct xfs_btree_cur * cur,const union xfs_btree_key * key)189 xfs_rtrmapbt_cmp_key_with_cur(
190 struct xfs_btree_cur *cur,
191 const union xfs_btree_key *key)
192 {
193 struct xfs_rmap_irec *rec = &cur->bc_rec.r;
194 const struct xfs_rmap_key *kp = &key->rmap;
195
196 return cmp_int(be32_to_cpu(kp->rm_startblock), rec->rm_startblock) ?:
197 cmp_int(be64_to_cpu(kp->rm_owner), rec->rm_owner) ?:
198 cmp_int(offset_keymask(be64_to_cpu(kp->rm_offset)),
199 offset_keymask(xfs_rmap_irec_offset_pack(rec)));
200 }
201
202 STATIC int
xfs_rtrmapbt_cmp_two_keys(struct xfs_btree_cur * cur,const union xfs_btree_key * k1,const union xfs_btree_key * k2,const union xfs_btree_key * mask)203 xfs_rtrmapbt_cmp_two_keys(
204 struct xfs_btree_cur *cur,
205 const union xfs_btree_key *k1,
206 const union xfs_btree_key *k2,
207 const union xfs_btree_key *mask)
208 {
209 const struct xfs_rmap_key *kp1 = &k1->rmap;
210 const struct xfs_rmap_key *kp2 = &k2->rmap;
211 int d;
212
213 /* Doesn't make sense to mask off the physical space part */
214 ASSERT(!mask || mask->rmap.rm_startblock);
215
216 d = cmp_int(be32_to_cpu(kp1->rm_startblock),
217 be32_to_cpu(kp2->rm_startblock));
218 if (d)
219 return d;
220
221 if (!mask || mask->rmap.rm_owner) {
222 d = cmp_int(be64_to_cpu(kp1->rm_owner),
223 be64_to_cpu(kp2->rm_owner));
224 if (d)
225 return d;
226 }
227
228 if (!mask || mask->rmap.rm_offset) {
229 /* Doesn't make sense to allow offset but not owner */
230 ASSERT(!mask || mask->rmap.rm_owner);
231
232 d = cmp_int(offset_keymask(be64_to_cpu(kp1->rm_offset)),
233 offset_keymask(be64_to_cpu(kp2->rm_offset)));
234 if (d)
235 return d;
236 }
237
238 return 0;
239 }
240
241 static xfs_failaddr_t
xfs_rtrmapbt_verify(struct xfs_buf * bp)242 xfs_rtrmapbt_verify(
243 struct xfs_buf *bp)
244 {
245 struct xfs_mount *mp = bp->b_target->bt_mount;
246 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
247 xfs_failaddr_t fa;
248 int level;
249
250 if (!xfs_verify_magic(bp, block->bb_magic))
251 return __this_address;
252
253 if (!xfs_has_rmapbt(mp))
254 return __this_address;
255 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
256 if (fa)
257 return fa;
258 level = be16_to_cpu(block->bb_level);
259 if (level > mp->m_rtrmap_maxlevels)
260 return __this_address;
261
262 return xfs_btree_fsblock_verify(bp, mp->m_rtrmap_mxr[level != 0]);
263 }
264
265 static void
xfs_rtrmapbt_read_verify(struct xfs_buf * bp)266 xfs_rtrmapbt_read_verify(
267 struct xfs_buf *bp)
268 {
269 xfs_failaddr_t fa;
270
271 if (!xfs_btree_fsblock_verify_crc(bp))
272 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
273 else {
274 fa = xfs_rtrmapbt_verify(bp);
275 if (fa)
276 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
277 }
278
279 if (bp->b_error)
280 trace_xfs_btree_corrupt(bp, _RET_IP_);
281 }
282
283 static void
xfs_rtrmapbt_write_verify(struct xfs_buf * bp)284 xfs_rtrmapbt_write_verify(
285 struct xfs_buf *bp)
286 {
287 xfs_failaddr_t fa;
288
289 fa = xfs_rtrmapbt_verify(bp);
290 if (fa) {
291 trace_xfs_btree_corrupt(bp, _RET_IP_);
292 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
293 return;
294 }
295 xfs_btree_fsblock_calc_crc(bp);
296
297 }
298
299 const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = {
300 .name = "xfs_rtrmapbt",
301 .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
302 .verify_read = xfs_rtrmapbt_read_verify,
303 .verify_write = xfs_rtrmapbt_write_verify,
304 .verify_struct = xfs_rtrmapbt_verify,
305 };
306
307 STATIC int
xfs_rtrmapbt_keys_inorder(struct xfs_btree_cur * cur,const union xfs_btree_key * k1,const union xfs_btree_key * k2)308 xfs_rtrmapbt_keys_inorder(
309 struct xfs_btree_cur *cur,
310 const union xfs_btree_key *k1,
311 const union xfs_btree_key *k2)
312 {
313 uint32_t x;
314 uint32_t y;
315 uint64_t a;
316 uint64_t b;
317
318 x = be32_to_cpu(k1->rmap.rm_startblock);
319 y = be32_to_cpu(k2->rmap.rm_startblock);
320 if (x < y)
321 return 1;
322 else if (x > y)
323 return 0;
324 a = be64_to_cpu(k1->rmap.rm_owner);
325 b = be64_to_cpu(k2->rmap.rm_owner);
326 if (a < b)
327 return 1;
328 else if (a > b)
329 return 0;
330 a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
331 b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
332 if (a <= b)
333 return 1;
334 return 0;
335 }
336
337 STATIC int
xfs_rtrmapbt_recs_inorder(struct xfs_btree_cur * cur,const union xfs_btree_rec * r1,const union xfs_btree_rec * r2)338 xfs_rtrmapbt_recs_inorder(
339 struct xfs_btree_cur *cur,
340 const union xfs_btree_rec *r1,
341 const union xfs_btree_rec *r2)
342 {
343 uint32_t x;
344 uint32_t y;
345 uint64_t a;
346 uint64_t b;
347
348 x = be32_to_cpu(r1->rmap.rm_startblock);
349 y = be32_to_cpu(r2->rmap.rm_startblock);
350 if (x < y)
351 return 1;
352 else if (x > y)
353 return 0;
354 a = be64_to_cpu(r1->rmap.rm_owner);
355 b = be64_to_cpu(r2->rmap.rm_owner);
356 if (a < b)
357 return 1;
358 else if (a > b)
359 return 0;
360 a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
361 b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
362 if (a <= b)
363 return 1;
364 return 0;
365 }
366
367 STATIC enum xbtree_key_contig
xfs_rtrmapbt_keys_contiguous(struct xfs_btree_cur * cur,const union xfs_btree_key * key1,const union xfs_btree_key * key2,const union xfs_btree_key * mask)368 xfs_rtrmapbt_keys_contiguous(
369 struct xfs_btree_cur *cur,
370 const union xfs_btree_key *key1,
371 const union xfs_btree_key *key2,
372 const union xfs_btree_key *mask)
373 {
374 ASSERT(!mask || mask->rmap.rm_startblock);
375
376 /*
377 * We only support checking contiguity of the physical space component.
378 * If any callers ever need more specificity than that, they'll have to
379 * implement it here.
380 */
381 ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
382
383 return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
384 be32_to_cpu(key2->rmap.rm_startblock));
385 }
386
387 static inline void
xfs_rtrmapbt_move_ptrs(struct xfs_mount * mp,struct xfs_btree_block * broot,short old_size,size_t new_size,unsigned int numrecs)388 xfs_rtrmapbt_move_ptrs(
389 struct xfs_mount *mp,
390 struct xfs_btree_block *broot,
391 short old_size,
392 size_t new_size,
393 unsigned int numrecs)
394 {
395 void *dptr;
396 void *sptr;
397
398 sptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, old_size);
399 dptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, new_size);
400 memmove(dptr, sptr, numrecs * sizeof(xfs_rtrmap_ptr_t));
401 }
402
403 static struct xfs_btree_block *
xfs_rtrmapbt_broot_realloc(struct xfs_btree_cur * cur,unsigned int new_numrecs)404 xfs_rtrmapbt_broot_realloc(
405 struct xfs_btree_cur *cur,
406 unsigned int new_numrecs)
407 {
408 struct xfs_mount *mp = cur->bc_mp;
409 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
410 struct xfs_btree_block *broot;
411 unsigned int new_size;
412 unsigned int old_size = ifp->if_broot_bytes;
413 const unsigned int level = cur->bc_nlevels - 1;
414
415 new_size = xfs_rtrmap_broot_space_calc(mp, level, new_numrecs);
416
417 /* Handle the nop case quietly. */
418 if (new_size == old_size)
419 return ifp->if_broot;
420
421 if (new_size > old_size) {
422 unsigned int old_numrecs;
423
424 /*
425 * If there wasn't any memory allocated before, just allocate
426 * it now and get out.
427 */
428 if (old_size == 0)
429 return xfs_broot_realloc(ifp, new_size);
430
431 /*
432 * If there is already an existing if_broot, then we need to
433 * realloc it and possibly move the node block pointers because
434 * those are not butted up against the btree block header.
435 */
436 old_numrecs = xfs_rtrmapbt_maxrecs(mp, old_size, level == 0);
437 broot = xfs_broot_realloc(ifp, new_size);
438 if (level > 0)
439 xfs_rtrmapbt_move_ptrs(mp, broot, old_size, new_size,
440 old_numrecs);
441 goto out_broot;
442 }
443
444 /*
445 * We're reducing numrecs. If we're going all the way to zero, just
446 * free the block.
447 */
448 ASSERT(ifp->if_broot != NULL && old_size > 0);
449 if (new_size == 0)
450 return xfs_broot_realloc(ifp, 0);
451
452 /*
453 * Shrink the btree root by possibly moving the rtrmapbt pointers,
454 * since they are not butted up against the btree block header. Then
455 * reallocate broot.
456 */
457 if (level > 0)
458 xfs_rtrmapbt_move_ptrs(mp, ifp->if_broot, old_size, new_size,
459 new_numrecs);
460 broot = xfs_broot_realloc(ifp, new_size);
461
462 out_broot:
463 ASSERT(xfs_rtrmap_droot_space(broot) <=
464 xfs_inode_fork_size(cur->bc_ino.ip, cur->bc_ino.whichfork));
465 return broot;
466 }
467
468 const struct xfs_btree_ops xfs_rtrmapbt_ops = {
469 .name = "rtrmap",
470 .type = XFS_BTREE_TYPE_INODE,
471 .geom_flags = XFS_BTGEO_OVERLAPPING |
472 XFS_BTGEO_IROOT_RECORDS,
473
474 .rec_len = sizeof(struct xfs_rmap_rec),
475 /* Overlapping btree; 2 keys per pointer. */
476 .key_len = 2 * sizeof(struct xfs_rmap_key),
477 .ptr_len = XFS_BTREE_LONG_PTR_LEN,
478
479 .lru_refs = XFS_RMAP_BTREE_REF,
480 .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_2),
481 .sick_mask = XFS_SICK_RG_RMAPBT,
482
483 .dup_cursor = xfs_rtrmapbt_dup_cursor,
484 .alloc_block = xfs_btree_alloc_metafile_block,
485 .free_block = xfs_btree_free_metafile_block,
486 .get_minrecs = xfs_rtrmapbt_get_minrecs,
487 .get_maxrecs = xfs_rtrmapbt_get_maxrecs,
488 .get_dmaxrecs = xfs_rtrmapbt_get_dmaxrecs,
489 .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec,
490 .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec,
491 .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur,
492 .init_ptr_from_cur = xfs_rtrmapbt_init_ptr_from_cur,
493 .cmp_key_with_cur = xfs_rtrmapbt_cmp_key_with_cur,
494 .buf_ops = &xfs_rtrmapbt_buf_ops,
495 .cmp_two_keys = xfs_rtrmapbt_cmp_two_keys,
496 .keys_inorder = xfs_rtrmapbt_keys_inorder,
497 .recs_inorder = xfs_rtrmapbt_recs_inorder,
498 .keys_contiguous = xfs_rtrmapbt_keys_contiguous,
499 .broot_realloc = xfs_rtrmapbt_broot_realloc,
500 };
501
502 /* Allocate a new rt rmap btree cursor. */
503 struct xfs_btree_cur *
xfs_rtrmapbt_init_cursor(struct xfs_trans * tp,struct xfs_rtgroup * rtg)504 xfs_rtrmapbt_init_cursor(
505 struct xfs_trans *tp,
506 struct xfs_rtgroup *rtg)
507 {
508 struct xfs_inode *ip = rtg_rmap(rtg);
509 struct xfs_mount *mp = rtg_mount(rtg);
510 struct xfs_btree_cur *cur;
511
512 xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
513
514 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_ops,
515 mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
516
517 cur->bc_ino.ip = ip;
518 cur->bc_group = xfs_group_hold(rtg_group(rtg));
519 cur->bc_ino.whichfork = XFS_DATA_FORK;
520 cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1;
521 cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK);
522
523 return cur;
524 }
525
526 #ifdef CONFIG_XFS_BTREE_IN_MEM
527 /*
528 * Validate an in-memory realtime rmap btree block. Callers are allowed to
529 * generate an in-memory btree even if the ondisk feature is not enabled.
530 */
531 static xfs_failaddr_t
xfs_rtrmapbt_mem_verify(struct xfs_buf * bp)532 xfs_rtrmapbt_mem_verify(
533 struct xfs_buf *bp)
534 {
535 struct xfs_mount *mp = bp->b_mount;
536 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
537 xfs_failaddr_t fa;
538 unsigned int level;
539 unsigned int maxrecs;
540
541 if (!xfs_verify_magic(bp, block->bb_magic))
542 return __this_address;
543
544 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
545 if (fa)
546 return fa;
547
548 level = be16_to_cpu(block->bb_level);
549 if (xfs_has_rmapbt(mp)) {
550 if (level >= mp->m_rtrmap_maxlevels)
551 return __this_address;
552 } else {
553 if (level >= xfs_rtrmapbt_maxlevels_ondisk())
554 return __this_address;
555 }
556
557 maxrecs = xfs_rtrmapbt_maxrecs(mp, XFBNO_BLOCKSIZE, level == 0);
558 return xfs_btree_memblock_verify(bp, maxrecs);
559 }
560
561 static void
xfs_rtrmapbt_mem_rw_verify(struct xfs_buf * bp)562 xfs_rtrmapbt_mem_rw_verify(
563 struct xfs_buf *bp)
564 {
565 xfs_failaddr_t fa = xfs_rtrmapbt_mem_verify(bp);
566
567 if (fa)
568 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
569 }
570
571 /* skip crc checks on in-memory btrees to save time */
572 static const struct xfs_buf_ops xfs_rtrmapbt_mem_buf_ops = {
573 .name = "xfs_rtrmapbt_mem",
574 .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
575 .verify_read = xfs_rtrmapbt_mem_rw_verify,
576 .verify_write = xfs_rtrmapbt_mem_rw_verify,
577 .verify_struct = xfs_rtrmapbt_mem_verify,
578 };
579
580 const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = {
581 .type = XFS_BTREE_TYPE_MEM,
582 .geom_flags = XFS_BTGEO_OVERLAPPING,
583
584 .rec_len = sizeof(struct xfs_rmap_rec),
585 /* Overlapping btree; 2 keys per pointer. */
586 .key_len = 2 * sizeof(struct xfs_rmap_key),
587 .ptr_len = XFS_BTREE_LONG_PTR_LEN,
588
589 .lru_refs = XFS_RMAP_BTREE_REF,
590 .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_mem_2),
591
592 .dup_cursor = xfbtree_dup_cursor,
593 .set_root = xfbtree_set_root,
594 .alloc_block = xfbtree_alloc_block,
595 .free_block = xfbtree_free_block,
596 .get_minrecs = xfbtree_get_minrecs,
597 .get_maxrecs = xfbtree_get_maxrecs,
598 .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec,
599 .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec,
600 .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur,
601 .init_ptr_from_cur = xfbtree_init_ptr_from_cur,
602 .cmp_key_with_cur = xfs_rtrmapbt_cmp_key_with_cur,
603 .buf_ops = &xfs_rtrmapbt_mem_buf_ops,
604 .cmp_two_keys = xfs_rtrmapbt_cmp_two_keys,
605 .keys_inorder = xfs_rtrmapbt_keys_inorder,
606 .recs_inorder = xfs_rtrmapbt_recs_inorder,
607 .keys_contiguous = xfs_rtrmapbt_keys_contiguous,
608 };
609
610 /* Create a cursor for an in-memory btree. */
611 struct xfs_btree_cur *
xfs_rtrmapbt_mem_cursor(struct xfs_rtgroup * rtg,struct xfs_trans * tp,struct xfbtree * xfbt)612 xfs_rtrmapbt_mem_cursor(
613 struct xfs_rtgroup *rtg,
614 struct xfs_trans *tp,
615 struct xfbtree *xfbt)
616 {
617 struct xfs_mount *mp = rtg_mount(rtg);
618 struct xfs_btree_cur *cur;
619
620 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_mem_ops,
621 mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
622 cur->bc_mem.xfbtree = xfbt;
623 cur->bc_nlevels = xfbt->nlevels;
624 cur->bc_group = xfs_group_hold(rtg_group(rtg));
625 return cur;
626 }
627
628 /* Create an in-memory realtime rmap btree. */
629 int
xfs_rtrmapbt_mem_init(struct xfs_mount * mp,struct xfbtree * xfbt,struct xfs_buftarg * btp,xfs_rgnumber_t rgno)630 xfs_rtrmapbt_mem_init(
631 struct xfs_mount *mp,
632 struct xfbtree *xfbt,
633 struct xfs_buftarg *btp,
634 xfs_rgnumber_t rgno)
635 {
636 xfbt->owner = rgno;
637 return xfbtree_init(mp, xfbt, btp, &xfs_rtrmapbt_mem_ops);
638 }
639 #endif /* CONFIG_XFS_BTREE_IN_MEM */
640
641 /*
642 * Install a new rt reverse mapping btree root. Caller is responsible for
643 * invalidating and freeing the old btree blocks.
644 */
645 void
xfs_rtrmapbt_commit_staged_btree(struct xfs_btree_cur * cur,struct xfs_trans * tp)646 xfs_rtrmapbt_commit_staged_btree(
647 struct xfs_btree_cur *cur,
648 struct xfs_trans *tp)
649 {
650 struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake;
651 struct xfs_ifork *ifp;
652 int flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT;
653
654 ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
655 ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE);
656
657 /*
658 * Free any resources hanging off the real fork, then shallow-copy the
659 * staging fork's contents into the real fork to transfer everything
660 * we just built.
661 */
662 ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK);
663 xfs_idestroy_fork(ifp);
664 memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork));
665
666 cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno;
667 xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
668 xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK);
669 }
670
671 /* Calculate number of records in a rt reverse mapping btree block. */
672 static inline unsigned int
xfs_rtrmapbt_block_maxrecs(unsigned int blocklen,bool leaf)673 xfs_rtrmapbt_block_maxrecs(
674 unsigned int blocklen,
675 bool leaf)
676 {
677 if (leaf)
678 return blocklen / sizeof(struct xfs_rmap_rec);
679 return blocklen /
680 (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rtrmap_ptr_t));
681 }
682
683 /*
684 * Calculate number of records in an rt reverse mapping btree block.
685 */
686 unsigned int
xfs_rtrmapbt_maxrecs(struct xfs_mount * mp,unsigned int blocklen,bool leaf)687 xfs_rtrmapbt_maxrecs(
688 struct xfs_mount *mp,
689 unsigned int blocklen,
690 bool leaf)
691 {
692 blocklen -= XFS_RTRMAP_BLOCK_LEN;
693 return xfs_rtrmapbt_block_maxrecs(blocklen, leaf);
694 }
695
696 /* Compute the max possible height for realtime reverse mapping btrees. */
697 unsigned int
xfs_rtrmapbt_maxlevels_ondisk(void)698 xfs_rtrmapbt_maxlevels_ondisk(void)
699 {
700 unsigned long long max_dblocks;
701 unsigned int minrecs[2];
702 unsigned int blocklen;
703
704 blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
705
706 minrecs[0] = xfs_rtrmapbt_block_maxrecs(blocklen, true) / 2;
707 minrecs[1] = xfs_rtrmapbt_block_maxrecs(blocklen, false) / 2;
708
709 /*
710 * Compute the asymptotic maxlevels for an rtrmapbt on any rtreflink fs.
711 *
712 * On a reflink filesystem, each block in an rtgroup can have up to
713 * 2^32 (per the refcount record format) owners, which means that
714 * theoretically we could face up to 2^64 rmap records. However, we're
715 * likely to run out of blocks in the data device long before that
716 * happens, which means that we must compute the max height based on
717 * what the btree will look like if it consumes almost all the blocks
718 * in the data device due to maximal sharing factor.
719 */
720 max_dblocks = -1U; /* max ag count */
721 max_dblocks *= XFS_MAX_CRC_AG_BLOCKS;
722 return xfs_btree_space_to_height(minrecs, max_dblocks);
723 }
724
725 int __init
xfs_rtrmapbt_init_cur_cache(void)726 xfs_rtrmapbt_init_cur_cache(void)
727 {
728 xfs_rtrmapbt_cur_cache = kmem_cache_create("xfs_rtrmapbt_cur",
729 xfs_btree_cur_sizeof(xfs_rtrmapbt_maxlevels_ondisk()),
730 0, 0, NULL);
731
732 if (!xfs_rtrmapbt_cur_cache)
733 return -ENOMEM;
734 return 0;
735 }
736
737 void
xfs_rtrmapbt_destroy_cur_cache(void)738 xfs_rtrmapbt_destroy_cur_cache(void)
739 {
740 kmem_cache_destroy(xfs_rtrmapbt_cur_cache);
741 xfs_rtrmapbt_cur_cache = NULL;
742 }
743
744 /* Compute the maximum height of an rt reverse mapping btree. */
745 void
xfs_rtrmapbt_compute_maxlevels(struct xfs_mount * mp)746 xfs_rtrmapbt_compute_maxlevels(
747 struct xfs_mount *mp)
748 {
749 unsigned int d_maxlevels, r_maxlevels;
750
751 if (!xfs_has_rtrmapbt(mp)) {
752 mp->m_rtrmap_maxlevels = 0;
753 return;
754 }
755
756 /*
757 * The realtime rmapbt lives on the data device, which means that its
758 * maximum height is constrained by the size of the data device and
759 * the height required to store one rmap record for each block in an
760 * rt group.
761 *
762 * On a reflink filesystem, each rt block can have up to 2^32 (per the
763 * refcount record format) owners, which means that theoretically we
764 * could face up to 2^64 rmap records. This makes the computation of
765 * maxlevels based on record count meaningless, so we only consider the
766 * size of the data device.
767 */
768 d_maxlevels = xfs_btree_space_to_height(mp->m_rtrmap_mnr,
769 mp->m_sb.sb_dblocks);
770 if (xfs_has_rtreflink(mp)) {
771 mp->m_rtrmap_maxlevels = d_maxlevels + 1;
772 return;
773 }
774
775 r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrmap_mnr,
776 mp->m_groups[XG_TYPE_RTG].blocks);
777
778 /* Add one level to handle the inode root level. */
779 mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1;
780 }
781
782 /* Calculate the rtrmap btree size for some records. */
783 unsigned long long
xfs_rtrmapbt_calc_size(struct xfs_mount * mp,unsigned long long len)784 xfs_rtrmapbt_calc_size(
785 struct xfs_mount *mp,
786 unsigned long long len)
787 {
788 return xfs_btree_calc_size(mp->m_rtrmap_mnr, len);
789 }
790
791 /*
792 * Calculate the maximum rmap btree size.
793 */
794 static unsigned long long
xfs_rtrmapbt_max_size(struct xfs_mount * mp,xfs_rtblock_t rtblocks)795 xfs_rtrmapbt_max_size(
796 struct xfs_mount *mp,
797 xfs_rtblock_t rtblocks)
798 {
799 /* Bail out if we're uninitialized, which can happen in mkfs. */
800 if (mp->m_rtrmap_mxr[0] == 0)
801 return 0;
802
803 return xfs_rtrmapbt_calc_size(mp, rtblocks);
804 }
805
806 /*
807 * Figure out how many blocks to reserve and how many are used by this btree.
808 */
809 xfs_filblks_t
xfs_rtrmapbt_calc_reserves(struct xfs_mount * mp)810 xfs_rtrmapbt_calc_reserves(
811 struct xfs_mount *mp)
812 {
813 uint32_t blocks = mp->m_groups[XG_TYPE_RTG].blocks;
814
815 if (!xfs_has_rtrmapbt(mp))
816 return 0;
817
818 /* Reserve 1% of the rtgroup or enough for 1 block per record. */
819 return max_t(xfs_filblks_t, blocks / 100,
820 xfs_rtrmapbt_max_size(mp, blocks));
821 }
822
823 /* Convert on-disk form of btree root to in-memory form. */
824 STATIC void
xfs_rtrmapbt_from_disk(struct xfs_inode * ip,struct xfs_rtrmap_root * dblock,unsigned int dblocklen,struct xfs_btree_block * rblock)825 xfs_rtrmapbt_from_disk(
826 struct xfs_inode *ip,
827 struct xfs_rtrmap_root *dblock,
828 unsigned int dblocklen,
829 struct xfs_btree_block *rblock)
830 {
831 struct xfs_mount *mp = ip->i_mount;
832 struct xfs_rmap_key *fkp;
833 __be64 *fpp;
834 struct xfs_rmap_key *tkp;
835 __be64 *tpp;
836 struct xfs_rmap_rec *frp;
837 struct xfs_rmap_rec *trp;
838 unsigned int rblocklen = xfs_rtrmap_broot_space(mp, dblock);
839 unsigned int numrecs;
840 unsigned int maxrecs;
841
842 xfs_btree_init_block(mp, rblock, &xfs_rtrmapbt_ops, 0, 0, ip->i_ino);
843
844 rblock->bb_level = dblock->bb_level;
845 rblock->bb_numrecs = dblock->bb_numrecs;
846 numrecs = be16_to_cpu(dblock->bb_numrecs);
847
848 if (be16_to_cpu(rblock->bb_level) > 0) {
849 maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
850 fkp = xfs_rtrmap_droot_key_addr(dblock, 1);
851 tkp = xfs_rtrmap_key_addr(rblock, 1);
852 fpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
853 tpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
854 memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
855 memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
856 } else {
857 frp = xfs_rtrmap_droot_rec_addr(dblock, 1);
858 trp = xfs_rtrmap_rec_addr(rblock, 1);
859 memcpy(trp, frp, sizeof(*frp) * numrecs);
860 }
861 }
862
863 /* Load a realtime reverse mapping btree root in from disk. */
864 int
xfs_iformat_rtrmap(struct xfs_inode * ip,struct xfs_dinode * dip)865 xfs_iformat_rtrmap(
866 struct xfs_inode *ip,
867 struct xfs_dinode *dip)
868 {
869 struct xfs_mount *mp = ip->i_mount;
870 struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
871 struct xfs_btree_block *broot;
872 unsigned int numrecs;
873 unsigned int level;
874 int dsize;
875
876 /*
877 * growfs must create the rtrmap inodes before adding a realtime volume
878 * to the filesystem, so we cannot use the rtrmapbt predicate here.
879 */
880 if (!xfs_has_rmapbt(ip->i_mount)) {
881 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
882 return -EFSCORRUPTED;
883 }
884
885 dsize = XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK);
886 numrecs = be16_to_cpu(dfp->bb_numrecs);
887 level = be16_to_cpu(dfp->bb_level);
888
889 if (level > mp->m_rtrmap_maxlevels ||
890 xfs_rtrmap_droot_space_calc(level, numrecs) > dsize) {
891 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
892 return -EFSCORRUPTED;
893 }
894
895 broot = xfs_broot_alloc(xfs_ifork_ptr(ip, XFS_DATA_FORK),
896 xfs_rtrmap_broot_space_calc(mp, level, numrecs));
897 if (broot)
898 xfs_rtrmapbt_from_disk(ip, dfp, dsize, broot);
899 return 0;
900 }
901
902 /* Convert in-memory form of btree root to on-disk form. */
903 void
xfs_rtrmapbt_to_disk(struct xfs_mount * mp,struct xfs_btree_block * rblock,unsigned int rblocklen,struct xfs_rtrmap_root * dblock,unsigned int dblocklen)904 xfs_rtrmapbt_to_disk(
905 struct xfs_mount *mp,
906 struct xfs_btree_block *rblock,
907 unsigned int rblocklen,
908 struct xfs_rtrmap_root *dblock,
909 unsigned int dblocklen)
910 {
911 struct xfs_rmap_key *fkp;
912 __be64 *fpp;
913 struct xfs_rmap_key *tkp;
914 __be64 *tpp;
915 struct xfs_rmap_rec *frp;
916 struct xfs_rmap_rec *trp;
917 unsigned int numrecs;
918 unsigned int maxrecs;
919
920 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_RTRMAP_CRC_MAGIC));
921 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid));
922 ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL));
923 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
924 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
925
926 dblock->bb_level = rblock->bb_level;
927 dblock->bb_numrecs = rblock->bb_numrecs;
928 numrecs = be16_to_cpu(rblock->bb_numrecs);
929
930 if (be16_to_cpu(rblock->bb_level) > 0) {
931 maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
932 fkp = xfs_rtrmap_key_addr(rblock, 1);
933 tkp = xfs_rtrmap_droot_key_addr(dblock, 1);
934 fpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
935 tpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
936 memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
937 memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
938 } else {
939 frp = xfs_rtrmap_rec_addr(rblock, 1);
940 trp = xfs_rtrmap_droot_rec_addr(dblock, 1);
941 memcpy(trp, frp, sizeof(*frp) * numrecs);
942 }
943 }
944
945 /* Flush a realtime reverse mapping btree root out to disk. */
946 void
xfs_iflush_rtrmap(struct xfs_inode * ip,struct xfs_dinode * dip)947 xfs_iflush_rtrmap(
948 struct xfs_inode *ip,
949 struct xfs_dinode *dip)
950 {
951 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
952 struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
953
954 ASSERT(ifp->if_broot != NULL);
955 ASSERT(ifp->if_broot_bytes > 0);
956 ASSERT(xfs_rtrmap_droot_space(ifp->if_broot) <=
957 xfs_inode_fork_size(ip, XFS_DATA_FORK));
958 xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes,
959 dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK));
960 }
961
962 /*
963 * Create a realtime rmap btree inode.
964 */
965 int
xfs_rtrmapbt_create(struct xfs_rtgroup * rtg,struct xfs_inode * ip,struct xfs_trans * tp,bool init)966 xfs_rtrmapbt_create(
967 struct xfs_rtgroup *rtg,
968 struct xfs_inode *ip,
969 struct xfs_trans *tp,
970 bool init)
971 {
972 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
973 struct xfs_mount *mp = ip->i_mount;
974 struct xfs_btree_block *broot;
975
976 ifp->if_format = XFS_DINODE_FMT_META_BTREE;
977 ASSERT(ifp->if_broot_bytes == 0);
978 ASSERT(ifp->if_bytes == 0);
979
980 /* Initialize the empty incore btree root. */
981 broot = xfs_broot_realloc(ifp, xfs_rtrmap_broot_space_calc(mp, 0, 0));
982 if (broot)
983 xfs_btree_init_block(mp, broot, &xfs_rtrmapbt_ops, 0, 0,
984 ip->i_ino);
985 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DBROOT);
986
987 return 0;
988 }
989
990 /*
991 * Initialize an rmap for a realtime superblock using the potentially updated
992 * rt geometry in the provided @mp.
993 */
994 int
xfs_rtrmapbt_init_rtsb(struct xfs_mount * mp,struct xfs_rtgroup * rtg,struct xfs_trans * tp)995 xfs_rtrmapbt_init_rtsb(
996 struct xfs_mount *mp,
997 struct xfs_rtgroup *rtg,
998 struct xfs_trans *tp)
999 {
1000 struct xfs_rmap_irec rmap = {
1001 .rm_blockcount = mp->m_sb.sb_rextsize,
1002 .rm_owner = XFS_RMAP_OWN_FS,
1003 };
1004 struct xfs_btree_cur *cur;
1005 int error;
1006
1007 ASSERT(xfs_has_rtsb(mp));
1008 ASSERT(rtg_rgno(rtg) == 0);
1009
1010 cur = xfs_rtrmapbt_init_cursor(tp, rtg);
1011 error = xfs_rmap_map_raw(cur, &rmap);
1012 xfs_btree_del_cursor(cur, error);
1013 return error;
1014 }
1015
1016 /*
1017 * Return the highest rgbno currently tracked by the rmap for this rtg.
1018 */
1019 xfs_rgblock_t
xfs_rtrmap_highest_rgbno(struct xfs_rtgroup * rtg)1020 xfs_rtrmap_highest_rgbno(
1021 struct xfs_rtgroup *rtg)
1022 {
1023 struct xfs_btree_block *block = rtg_rmap(rtg)->i_df.if_broot;
1024 union xfs_btree_key key = {};
1025 struct xfs_btree_cur *cur;
1026
1027 if (block->bb_numrecs == 0)
1028 return NULLRGBLOCK;
1029 cur = xfs_rtrmapbt_init_cursor(NULL, rtg);
1030 xfs_btree_get_keys(cur, block, &key);
1031 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1032 return be32_to_cpu(key.__rmap_bigkey[1].rm_startblock);
1033 }
1034