1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Maple Tree implementation
4 * Copyright (c) 2018-2022 Oracle Corporation
5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
6 * Matthew Wilcox <willy@infradead.org>
7 * Copyright (c) 2023 ByteDance
8 * Author: Peng Zhang <zhangpeng.00@bytedance.com>
9 */
10
11 /*
12 * DOC: Interesting implementation details of the Maple Tree
13 *
14 * Each node type has a number of slots for entries and a number of slots for
15 * pivots. In the case of dense nodes, the pivots are implied by the position
16 * and are simply the slot index + the minimum of the node.
17 *
18 * In regular B-Tree terms, pivots are called keys. The term pivot is used to
19 * indicate that the tree is specifying ranges. Pivots may appear in the
20 * subtree with an entry attached to the value whereas keys are unique to a
21 * specific position of a B-tree. Pivot values are inclusive of the slot with
22 * the same index.
23 *
24 *
25 * The following illustrates the layout of a range64 nodes slots and pivots.
26 *
27 *
28 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 |
29 * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬
30 * │ │ │ │ │ │ │ │ └─ Implied maximum
31 * │ │ │ │ │ │ │ └─ Pivot 14
32 * │ │ │ │ │ │ └─ Pivot 13
33 * │ │ │ │ │ └─ Pivot 12
34 * │ │ │ │ └─ Pivot 11
35 * │ │ │ └─ Pivot 2
36 * │ │ └─ Pivot 1
37 * │ └─ Pivot 0
38 * └─ Implied minimum
39 *
40 * Slot contents:
41 * Internal (non-leaf) nodes contain pointers to other nodes.
42 * Leaf nodes contain entries.
43 *
44 * The location of interest is often referred to as an offset. All offsets have
45 * a slot, but the last offset has an implied pivot from the node above (or
46 * UINT_MAX for the root node.
47 *
48 * Ranges complicate certain write activities. When modifying any of
49 * the B-tree variants, it is known that one entry will either be added or
50 * deleted. When modifying the Maple Tree, one store operation may overwrite
51 * the entire data set, or one half of the tree, or the middle half of the tree.
52 *
53 */
54
55
56 #include <linux/maple_tree.h>
57 #include <linux/xarray.h>
58 #include <linux/types.h>
59 #include <linux/export.h>
60 #include <linux/slab.h>
61 #include <linux/limits.h>
62 #include <asm/barrier.h>
63
64 #define CREATE_TRACE_POINTS
65 #include <trace/events/maple_tree.h>
66
67 #define TP_FCT tracepoint_string(__func__)
68
69 /*
70 * Kernel pointer hashing renders much of the maple tree dump useless as tagged
71 * pointers get hashed to arbitrary values.
72 *
73 * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is
74 * permissible to bypass this. Otherwise remain cautious and retain the hashing.
75 *
76 * Userland doesn't know about %px so also use %p there.
77 */
78 #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE)
79 #define PTR_FMT "%px"
80 #else
81 #define PTR_FMT "%p"
82 #endif
83
84 #define MA_ROOT_PARENT 1
85
86 /*
87 * Maple state flags
88 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation
89 */
90 #define MA_STATE_PREALLOC 1
91
92 #define ma_parent_ptr(x) ((struct maple_pnode *)(x))
93 #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
94 #define ma_mnode_ptr(x) ((struct maple_node *)(x))
95 #define ma_enode_ptr(x) ((struct maple_enode *)(x))
96 static struct kmem_cache *maple_node_cache;
97
98 #ifdef CONFIG_DEBUG_MAPLE_TREE
99 static const unsigned long mt_max[] = {
100 [maple_dense] = MAPLE_NODE_SLOTS,
101 [maple_leaf_64] = ULONG_MAX,
102 [maple_range_64] = ULONG_MAX,
103 [maple_arange_64] = ULONG_MAX,
104 [maple_copy] = ULONG_MAX,
105 };
106 #define mt_node_max(x) mt_max[mte_node_type(x)]
107 #endif
108
109 static const unsigned char mt_slots[] = {
110 [maple_dense] = MAPLE_NODE_SLOTS,
111 [maple_leaf_64] = MAPLE_RANGE64_SLOTS,
112 [maple_range_64] = MAPLE_RANGE64_SLOTS,
113 [maple_arange_64] = MAPLE_ARANGE64_SLOTS,
114 [maple_copy] = 3,
115 };
116 #define mt_slot_count(x) mt_slots[mte_node_type(x)]
117
118 static const unsigned char mt_pivots[] = {
119 [maple_dense] = 0,
120 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1,
121 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1,
122 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1,
123 [maple_copy] = 3,
124 };
125 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
126
127 static const unsigned char mt_min_slots[] = {
128 [maple_dense] = MAPLE_NODE_SLOTS / 2,
129 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
130 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
131 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1,
132 [maple_copy] = 1, /* Should never be used */
133 };
134 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
135
136 /* Functions */
mt_alloc_one(gfp_t gfp)137 static inline struct maple_node *mt_alloc_one(gfp_t gfp)
138 {
139 return kmem_cache_alloc(maple_node_cache, gfp);
140 }
141
mt_free_bulk(size_t size,void __rcu ** nodes)142 static inline void mt_free_bulk(size_t size, void __rcu **nodes)
143 {
144 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
145 }
146
mt_return_sheaf(struct slab_sheaf * sheaf)147 static void mt_return_sheaf(struct slab_sheaf *sheaf)
148 {
149 kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf);
150 }
151
mt_get_sheaf(gfp_t gfp,int count)152 static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count)
153 {
154 return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count);
155 }
156
mt_refill_sheaf(gfp_t gfp,struct slab_sheaf ** sheaf,unsigned int size)157 static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf,
158 unsigned int size)
159 {
160 return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size);
161 }
162
163 /*
164 * ma_free_rcu() - Use rcu callback to free a maple node
165 * @node: The node to free
166 *
167 * The maple tree uses the parent pointer to indicate this node is no longer in
168 * use and will be freed.
169 */
ma_free_rcu(struct maple_node * node)170 static void ma_free_rcu(struct maple_node *node)
171 {
172 WARN_ON(node->parent != ma_parent_ptr(node));
173 kfree_rcu(node, rcu);
174 }
175
mt_set_height(struct maple_tree * mt,unsigned char height)176 static void mt_set_height(struct maple_tree *mt, unsigned char height)
177 {
178 unsigned int new_flags = mt->ma_flags;
179
180 new_flags &= ~MT_FLAGS_HEIGHT_MASK;
181 MT_BUG_ON(mt, height > MAPLE_HEIGHT_MAX);
182 new_flags |= height << MT_FLAGS_HEIGHT_OFFSET;
183 mt->ma_flags = new_flags;
184 }
185
mas_mt_height(struct ma_state * mas)186 static unsigned int mas_mt_height(struct ma_state *mas)
187 {
188 return mt_height(mas->tree);
189 }
190
mt_attr(struct maple_tree * mt)191 static inline unsigned int mt_attr(struct maple_tree *mt)
192 {
193 return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK;
194 }
195
mte_node_type(const struct maple_enode * entry)196 static __always_inline enum maple_type mte_node_type(
197 const struct maple_enode *entry)
198 {
199 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
200 MAPLE_NODE_TYPE_MASK;
201 }
202
ma_is_dense(const enum maple_type type)203 static __always_inline bool ma_is_dense(const enum maple_type type)
204 {
205 return type < maple_leaf_64;
206 }
207
ma_is_leaf(const enum maple_type type)208 static __always_inline bool ma_is_leaf(const enum maple_type type)
209 {
210 return type < maple_range_64;
211 }
212
mte_is_leaf(const struct maple_enode * entry)213 static __always_inline bool mte_is_leaf(const struct maple_enode *entry)
214 {
215 return ma_is_leaf(mte_node_type(entry));
216 }
217
218 /*
219 * We also reserve values with the bottom two bits set to '10' which are
220 * below 4096
221 */
mt_is_reserved(const void * entry)222 static __always_inline bool mt_is_reserved(const void *entry)
223 {
224 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
225 xa_is_internal(entry);
226 }
227
mas_set_err(struct ma_state * mas,long err)228 static __always_inline void mas_set_err(struct ma_state *mas, long err)
229 {
230 mas->node = MA_ERROR(err);
231 mas->status = ma_error;
232 }
233
mas_is_ptr(const struct ma_state * mas)234 static __always_inline bool mas_is_ptr(const struct ma_state *mas)
235 {
236 return mas->status == ma_root;
237 }
238
mas_is_start(const struct ma_state * mas)239 static __always_inline bool mas_is_start(const struct ma_state *mas)
240 {
241 return mas->status == ma_start;
242 }
243
mas_is_none(const struct ma_state * mas)244 static __always_inline bool mas_is_none(const struct ma_state *mas)
245 {
246 return mas->status == ma_none;
247 }
248
mas_is_paused(const struct ma_state * mas)249 static __always_inline bool mas_is_paused(const struct ma_state *mas)
250 {
251 return mas->status == ma_pause;
252 }
253
mas_is_overflow(struct ma_state * mas)254 static __always_inline bool mas_is_overflow(struct ma_state *mas)
255 {
256 return mas->status == ma_overflow;
257 }
258
mas_is_underflow(struct ma_state * mas)259 static inline bool mas_is_underflow(struct ma_state *mas)
260 {
261 return mas->status == ma_underflow;
262 }
263
mte_to_node(const struct maple_enode * entry)264 static __always_inline struct maple_node *mte_to_node(
265 const struct maple_enode *entry)
266 {
267 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
268 }
269
270 /*
271 * mte_to_mat() - Convert a maple encoded node to a maple topiary node.
272 * @entry: The maple encoded node
273 *
274 * Return: a maple topiary pointer
275 */
mte_to_mat(const struct maple_enode * entry)276 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry)
277 {
278 return (struct maple_topiary *)
279 ((unsigned long)entry & ~MAPLE_NODE_MASK);
280 }
281
282 /*
283 * mas_mn() - Get the maple state node.
284 * @mas: The maple state
285 *
286 * Return: the maple node (not encoded - bare pointer).
287 */
mas_mn(const struct ma_state * mas)288 static inline struct maple_node *mas_mn(const struct ma_state *mas)
289 {
290 return mte_to_node(mas->node);
291 }
292
293 /*
294 * mte_set_node_dead() - Set a maple encoded node as dead.
295 * @mn: The maple encoded node.
296 */
mte_set_node_dead(struct maple_enode * mn)297 static inline void mte_set_node_dead(struct maple_enode *mn)
298 {
299 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn));
300 smp_wmb(); /* Needed for RCU */
301 }
302
303 /* Bit 1 indicates the root is a node */
304 #define MAPLE_ROOT_NODE 0x02
305 /* maple_type stored bit 3-6 */
306 #define MAPLE_ENODE_TYPE_SHIFT 0x03
307 /* Bit 2 means a NULL somewhere below */
308 #define MAPLE_ENODE_NULL 0x04
309
mt_mk_node(const struct maple_node * node,enum maple_type type)310 static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
311 enum maple_type type)
312 {
313 return (void *)((unsigned long)node |
314 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
315 }
316
ma_init_slot(void __rcu ** slot,const struct maple_node * mn,const enum maple_type mt)317 static inline void ma_init_slot(void __rcu **slot, const struct maple_node *mn,
318 const enum maple_type mt)
319 {
320 /* WARNING: this is unsafe if the slot is exposed to readers. */
321 RCU_INIT_POINTER(*slot, (void *)mt_mk_node(mn, mt));
322 }
323
mte_mk_root(const struct maple_enode * node)324 static inline void *mte_mk_root(const struct maple_enode *node)
325 {
326 return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
327 }
328
mte_safe_root(const struct maple_enode * node)329 static inline void *mte_safe_root(const struct maple_enode *node)
330 {
331 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
332 }
333
mte_set_full(const struct maple_enode * node)334 static inline void __maybe_unused *mte_set_full(const struct maple_enode *node)
335 {
336 return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
337 }
338
mte_clear_full(const struct maple_enode * node)339 static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node)
340 {
341 return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
342 }
343
mte_has_null(const struct maple_enode * node)344 static inline bool __maybe_unused mte_has_null(const struct maple_enode *node)
345 {
346 return (unsigned long)node & MAPLE_ENODE_NULL;
347 }
348
ma_is_root(struct maple_node * node)349 static __always_inline bool ma_is_root(struct maple_node *node)
350 {
351 return ((unsigned long)node->parent & MA_ROOT_PARENT);
352 }
353
mte_is_root(const struct maple_enode * node)354 static __always_inline bool mte_is_root(const struct maple_enode *node)
355 {
356 return ma_is_root(mte_to_node(node));
357 }
358
mas_is_root_limits(const struct ma_state * mas)359 static inline bool mas_is_root_limits(const struct ma_state *mas)
360 {
361 return !mas->min && mas->max == ULONG_MAX;
362 }
363
mt_is_alloc(struct maple_tree * mt)364 static __always_inline bool mt_is_alloc(struct maple_tree *mt)
365 {
366 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
367 }
368
369 /*
370 * The Parent Pointer
371 * Excluding root, the parent pointer is 256B aligned like all other tree nodes.
372 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16
373 * bit values need an extra bit to store the offset. This extra bit comes from
374 * a reuse of the last bit in the node type. This is possible by using bit 1 to
375 * indicate if bit 2 is part of the type or the slot.
376 *
377 * Node types:
378 * 0b??1 = Root
379 * 0b?00 = 16 bit nodes
380 * 0b010 = 32 bit nodes
381 * 0b110 = 64 bit nodes
382 *
383 * Slot size and alignment
384 * 0b??1 : Root
385 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7
386 * 0b010 : 32 bit values, type in 0-2, slot in 3-7
387 * 0b110 : 64 bit values, type in 0-2, slot in 3-7
388 */
389
390 #define MAPLE_PARENT_ROOT 0x01
391
392 #define MAPLE_PARENT_SLOT_SHIFT 0x03
393 #define MAPLE_PARENT_SLOT_MASK 0xF8
394
395 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02
396 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC
397
398 #define MAPLE_PARENT_RANGE64 0x06
399 #define MAPLE_PARENT_RANGE32 0x02
400 #define MAPLE_PARENT_NOT_RANGE16 0x02
401
402 /*
403 * mte_parent_shift() - Get the parent shift for the slot storage.
404 * @parent: The parent pointer cast as an unsigned long
405 * Return: The shift into that pointer to the star to of the slot
406 */
mte_parent_shift(unsigned long parent)407 static inline unsigned long mte_parent_shift(unsigned long parent)
408 {
409 /* Note bit 1 == 0 means 16B */
410 if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
411 return MAPLE_PARENT_SLOT_SHIFT;
412
413 return MAPLE_PARENT_16B_SLOT_SHIFT;
414 }
415
416 /*
417 * mte_parent_slot_mask() - Get the slot mask for the parent.
418 * @parent: The parent pointer cast as an unsigned long.
419 * Return: The slot mask for that parent.
420 */
mte_parent_slot_mask(unsigned long parent)421 static inline unsigned long mte_parent_slot_mask(unsigned long parent)
422 {
423 /* Note bit 1 == 0 means 16B */
424 if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
425 return MAPLE_PARENT_SLOT_MASK;
426
427 return MAPLE_PARENT_16B_SLOT_MASK;
428 }
429
430 /*
431 * mas_parent_type() - Return the maple_type of the parent from the stored
432 * parent type.
433 * @mas: The maple state
434 * @enode: The maple_enode to extract the parent's enum
435 * Return: The node->parent maple_type
436 */
437 static inline
mas_parent_type(struct ma_state * mas,struct maple_enode * enode)438 enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode)
439 {
440 unsigned long p_type;
441
442 p_type = (unsigned long)mte_to_node(enode)->parent;
443 if (WARN_ON(p_type & MAPLE_PARENT_ROOT))
444 return 0;
445
446 p_type &= MAPLE_NODE_MASK;
447 p_type &= ~mte_parent_slot_mask(p_type);
448 switch (p_type) {
449 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
450 if (mt_is_alloc(mas->tree))
451 return maple_arange_64;
452 return maple_range_64;
453 }
454
455 return 0;
456 }
457
458 /*
459 * mas_set_parent() - Set the parent node and encode the slot
460 * @mas: The maple state
461 * @enode: The encoded maple node.
462 * @parent: The encoded maple node that is the parent of @enode.
463 * @slot: The slot that @enode resides in @parent.
464 *
465 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
466 * parent type.
467 */
468 static inline
mas_set_parent(struct ma_state * mas,struct maple_enode * enode,const struct maple_enode * parent,unsigned char slot)469 void mas_set_parent(struct ma_state *mas, struct maple_enode *enode,
470 const struct maple_enode *parent, unsigned char slot)
471 {
472 unsigned long val = (unsigned long)parent;
473 unsigned long shift;
474 unsigned long type;
475 enum maple_type p_type = mte_node_type(parent);
476
477 MAS_BUG_ON(mas, p_type == maple_dense);
478 MAS_BUG_ON(mas, p_type == maple_leaf_64);
479
480 switch (p_type) {
481 case maple_range_64:
482 case maple_arange_64:
483 shift = MAPLE_PARENT_SLOT_SHIFT;
484 type = MAPLE_PARENT_RANGE64;
485 break;
486 default:
487 case maple_dense:
488 case maple_leaf_64:
489 shift = type = 0;
490 break;
491 }
492
493 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
494 val |= (slot << shift) | type;
495 mte_to_node(enode)->parent = ma_parent_ptr(val);
496 }
497
498 /*
499 * mte_parent_slot() - get the parent slot of @enode.
500 * @enode: The encoded maple node.
501 *
502 * Return: The slot in the parent node where @enode resides.
503 */
504 static __always_inline
mte_parent_slot(const struct maple_enode * enode)505 unsigned int mte_parent_slot(const struct maple_enode *enode)
506 {
507 unsigned long val = (unsigned long)mte_to_node(enode)->parent;
508
509 if (unlikely(val & MA_ROOT_PARENT))
510 return 0;
511
512 /*
513 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
514 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
515 */
516 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
517 }
518
519 /*
520 * mte_parent() - Get the parent of @node.
521 * @enode: The encoded maple node.
522 *
523 * Return: The parent maple node.
524 */
525 static __always_inline
mte_parent(const struct maple_enode * enode)526 struct maple_node *mte_parent(const struct maple_enode *enode)
527 {
528 return (void *)((unsigned long)
529 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
530 }
531
532 /*
533 * ma_dead_node() - check if the @enode is dead.
534 * @enode: The encoded maple node
535 *
536 * Return: true if dead, false otherwise.
537 */
ma_dead_node(const struct maple_node * node)538 static __always_inline bool ma_dead_node(const struct maple_node *node)
539 {
540 struct maple_node *parent;
541
542 /* Do not reorder reads from the node prior to the parent check */
543 smp_rmb();
544 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
545 return (parent == node);
546 }
547
548 /*
549 * mte_dead_node() - check if the @enode is dead.
550 * @enode: The encoded maple node
551 *
552 * Return: true if dead, false otherwise.
553 */
mte_dead_node(const struct maple_enode * enode)554 static __always_inline bool mte_dead_node(const struct maple_enode *enode)
555 {
556 struct maple_node *node;
557
558 node = mte_to_node(enode);
559 return ma_dead_node(node);
560 }
561
562 /*
563 * ma_pivots() - Get a pointer to the maple node pivots.
564 * @node: the maple node
565 * @type: the node type
566 *
567 * In the event of a dead node, this array may be %NULL
568 *
569 * Return: A pointer to the maple node pivots
570 */
ma_pivots(struct maple_node * node,enum maple_type type)571 static inline unsigned long *ma_pivots(struct maple_node *node,
572 enum maple_type type)
573 {
574 switch (type) {
575 case maple_arange_64:
576 return node->ma64.pivot;
577 case maple_range_64:
578 case maple_leaf_64:
579 return node->mr64.pivot;
580 case maple_copy:
581 return node->cp.pivot;
582 case maple_dense:
583 return NULL;
584 }
585 return NULL;
586 }
587
588 /*
589 * ma_gaps() - Get a pointer to the maple node gaps.
590 * @node: the maple node
591 * @type: the node type
592 *
593 * Return: A pointer to the maple node gaps
594 */
ma_gaps(struct maple_node * node,enum maple_type type)595 static inline unsigned long *ma_gaps(struct maple_node *node,
596 enum maple_type type)
597 {
598 switch (type) {
599 case maple_arange_64:
600 return node->ma64.gap;
601 case maple_copy:
602 return node->cp.gap;
603 case maple_range_64:
604 case maple_leaf_64:
605 case maple_dense:
606 return NULL;
607 }
608 return NULL;
609 }
610
611 /*
612 * mas_safe_pivot() - get the pivot at @piv or mas->max.
613 * @mas: The maple state
614 * @pivots: The pointer to the maple node pivots
615 * @piv: The pivot to fetch
616 * @type: The maple node type
617 *
618 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max
619 * otherwise.
620 */
621 static __always_inline unsigned long
mas_safe_pivot(const struct ma_state * mas,unsigned long * pivots,unsigned char piv,enum maple_type type)622 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
623 unsigned char piv, enum maple_type type)
624 {
625 if (piv >= mt_pivots[type])
626 return mas->max;
627
628 return pivots[piv];
629 }
630
631 /*
632 * mas_safe_min() - Return the minimum for a given offset.
633 * @mas: The maple state
634 * @pivots: The pointer to the maple node pivots
635 * @offset: The offset into the pivot array
636 *
637 * Return: The minimum range value that is contained in @offset.
638 */
639 static inline unsigned long
mas_safe_min(struct ma_state * mas,unsigned long * pivots,unsigned char offset)640 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
641 {
642 if (likely(offset))
643 return pivots[offset - 1] + 1;
644
645 return mas->min;
646 }
647
648 /*
649 * mte_set_pivot() - Set a pivot to a value in an encoded maple node.
650 * @mn: The encoded maple node
651 * @piv: The pivot offset
652 * @val: The value of the pivot
653 */
mte_set_pivot(struct maple_enode * mn,unsigned char piv,unsigned long val)654 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
655 unsigned long val)
656 {
657 struct maple_node *node = mte_to_node(mn);
658 enum maple_type type = mte_node_type(mn);
659
660 BUG_ON(piv >= mt_pivots[type]);
661 switch (type) {
662 case maple_range_64:
663 case maple_leaf_64:
664 node->mr64.pivot[piv] = val;
665 break;
666 case maple_arange_64:
667 node->ma64.pivot[piv] = val;
668 break;
669 case maple_copy:
670 case maple_dense:
671 break;
672 }
673
674 }
675
676 /*
677 * ma_slots() - Get a pointer to the maple node slots.
678 * @mn: The maple node
679 * @mt: The maple node type
680 *
681 * Return: A pointer to the maple node slots
682 */
ma_slots(struct maple_node * mn,enum maple_type mt)683 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
684 {
685 switch (mt) {
686 case maple_arange_64:
687 return mn->ma64.slot;
688 case maple_range_64:
689 case maple_leaf_64:
690 return mn->mr64.slot;
691 case maple_copy:
692 return mn->cp.slot;
693 case maple_dense:
694 return mn->slot;
695 }
696
697 return NULL;
698 }
699
mt_write_locked(const struct maple_tree * mt)700 static inline bool mt_write_locked(const struct maple_tree *mt)
701 {
702 return mt_external_lock(mt) ? mt_write_lock_is_held(mt) :
703 lockdep_is_held(&mt->ma_lock);
704 }
705
mt_locked(const struct maple_tree * mt)706 static __always_inline bool mt_locked(const struct maple_tree *mt)
707 {
708 return mt_external_lock(mt) ? mt_lock_is_held(mt) :
709 lockdep_is_held(&mt->ma_lock);
710 }
711
mt_slot(const struct maple_tree * mt,void __rcu ** slots,unsigned char offset)712 static __always_inline void *mt_slot(const struct maple_tree *mt,
713 void __rcu **slots, unsigned char offset)
714 {
715 return rcu_dereference_check(slots[offset], mt_locked(mt));
716 }
717
mt_slot_locked(struct maple_tree * mt,void __rcu ** slots,unsigned char offset)718 static __always_inline void *mt_slot_locked(struct maple_tree *mt,
719 void __rcu **slots, unsigned char offset)
720 {
721 return rcu_dereference_protected(slots[offset], mt_write_locked(mt));
722 }
723 /*
724 * mas_slot_locked() - Get the slot value when holding the maple tree lock.
725 * @mas: The maple state
726 * @slots: The pointer to the slots
727 * @offset: The offset into the slots array to fetch
728 *
729 * Return: The entry stored in @slots at the @offset.
730 */
mas_slot_locked(struct ma_state * mas,void __rcu ** slots,unsigned char offset)731 static __always_inline void *mas_slot_locked(struct ma_state *mas,
732 void __rcu **slots, unsigned char offset)
733 {
734 return mt_slot_locked(mas->tree, slots, offset);
735 }
736
737 /*
738 * mas_slot() - Get the slot value when not holding the maple tree lock.
739 * @mas: The maple state
740 * @slots: The pointer to the slots
741 * @offset: The offset into the slots array to fetch
742 *
743 * Return: The entry stored in @slots at the @offset
744 */
mas_slot(struct ma_state * mas,void __rcu ** slots,unsigned char offset)745 static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
746 unsigned char offset)
747 {
748 return mt_slot(mas->tree, slots, offset);
749 }
750
751 /*
752 * mas_root() - Get the maple tree root.
753 * @mas: The maple state.
754 *
755 * Return: The pointer to the root of the tree
756 */
mas_root(struct ma_state * mas)757 static __always_inline void *mas_root(struct ma_state *mas)
758 {
759 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
760 }
761
mt_root_locked(struct maple_tree * mt)762 static inline void *mt_root_locked(struct maple_tree *mt)
763 {
764 return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt));
765 }
766
767 /*
768 * mas_root_locked() - Get the maple tree root when holding the maple tree lock.
769 * @mas: The maple state.
770 *
771 * Return: The pointer to the root of the tree
772 */
mas_root_locked(struct ma_state * mas)773 static inline void *mas_root_locked(struct ma_state *mas)
774 {
775 return mt_root_locked(mas->tree);
776 }
777
ma_meta(struct maple_node * mn,enum maple_type mt)778 static inline struct maple_metadata *ma_meta(struct maple_node *mn,
779 enum maple_type mt)
780 {
781 switch (mt) {
782 case maple_arange_64:
783 return &mn->ma64.meta;
784 default:
785 return &mn->mr64.meta;
786 }
787 }
788
789 /*
790 * ma_set_meta() - Set the metadata information of a node.
791 * @mn: The maple node
792 * @mt: The maple node type
793 * @offset: The offset of the highest sub-gap in this node.
794 * @end: The end of the data in this node.
795 */
ma_set_meta(struct maple_node * mn,enum maple_type mt,unsigned char offset,unsigned char end)796 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
797 unsigned char offset, unsigned char end)
798 {
799 struct maple_metadata *meta = ma_meta(mn, mt);
800
801 meta->gap = offset;
802 meta->end = end;
803 }
804
805 /*
806 * mt_clear_meta() - clear the metadata information of a node, if it exists
807 * @mt: The maple tree
808 * @mn: The maple node
809 * @type: The maple node type
810 */
mt_clear_meta(struct maple_tree * mt,struct maple_node * mn,enum maple_type type)811 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
812 enum maple_type type)
813 {
814 struct maple_metadata *meta;
815 unsigned long *pivots;
816 void __rcu **slots;
817 void *next;
818
819 switch (type) {
820 case maple_range_64:
821 pivots = mn->mr64.pivot;
822 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
823 slots = mn->mr64.slot;
824 next = mt_slot_locked(mt, slots,
825 MAPLE_RANGE64_SLOTS - 1);
826 if (unlikely((mte_to_node(next) &&
827 mte_node_type(next))))
828 return; /* no metadata, could be node */
829 }
830 fallthrough;
831 case maple_arange_64:
832 meta = ma_meta(mn, type);
833 break;
834 default:
835 return;
836 }
837
838 meta->gap = 0;
839 meta->end = 0;
840 }
841
842 /*
843 * ma_meta_end() - Get the data end of a node from the metadata
844 * @mn: The maple node
845 * @mt: The maple node type
846 */
ma_meta_end(struct maple_node * mn,enum maple_type mt)847 static inline unsigned char ma_meta_end(struct maple_node *mn,
848 enum maple_type mt)
849 {
850 struct maple_metadata *meta = ma_meta(mn, mt);
851
852 return meta->end;
853 }
854
855 /*
856 * ma_meta_gap() - Get the largest gap location of a node from the metadata
857 * @mn: The maple node
858 */
ma_meta_gap(struct maple_node * mn)859 static inline unsigned char ma_meta_gap(struct maple_node *mn)
860 {
861 return mn->ma64.meta.gap;
862 }
863
864 /*
865 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
866 * @mn: The maple node
867 * @mt: The maple node type
868 * @offset: The location of the largest gap.
869 */
ma_set_meta_gap(struct maple_node * mn,enum maple_type mt,unsigned char offset)870 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
871 unsigned char offset)
872 {
873
874 struct maple_metadata *meta = ma_meta(mn, mt);
875
876 meta->gap = offset;
877 }
878
879 /*
880 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
881 * @mat: the ma_topiary, a linked list of dead nodes.
882 * @dead_enode: the node to be marked as dead and added to the tail of the list
883 *
884 * Add the @dead_enode to the linked list in @mat.
885 */
mat_add(struct ma_topiary * mat,struct maple_enode * dead_enode)886 static inline void mat_add(struct ma_topiary *mat,
887 struct maple_enode *dead_enode)
888 {
889 mte_set_node_dead(dead_enode);
890 mte_to_mat(dead_enode)->next = NULL;
891 if (!mat->tail) {
892 mat->tail = mat->head = dead_enode;
893 return;
894 }
895
896 mte_to_mat(mat->tail)->next = dead_enode;
897 mat->tail = dead_enode;
898 }
899
900 static void mt_free_walk(struct rcu_head *head);
901 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
902 bool free);
903 /*
904 * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
905 * @mas: the maple state
906 * @mat: the ma_topiary linked list of dead nodes to free.
907 *
908 * Destroy walk a dead list.
909 */
mas_mat_destroy(struct ma_state * mas,struct ma_topiary * mat)910 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
911 {
912 struct maple_enode *next;
913 struct maple_node *node;
914 bool in_rcu = mt_in_rcu(mas->tree);
915
916 while (mat->head) {
917 next = mte_to_mat(mat->head)->next;
918 node = mte_to_node(mat->head);
919 mt_destroy_walk(mat->head, mas->tree, !in_rcu);
920 if (in_rcu)
921 call_rcu(&node->rcu, mt_free_walk);
922 mat->head = next;
923 }
924 }
925 /*
926 * mas_descend() - Descend into the slot stored in the ma_state.
927 * @mas: the maple state.
928 *
929 * Note: Not RCU safe, only use in write side or debug code.
930 */
mas_descend(struct ma_state * mas)931 static inline void mas_descend(struct ma_state *mas)
932 {
933 enum maple_type type;
934 unsigned long *pivots;
935 struct maple_node *node;
936 void __rcu **slots;
937
938 node = mas_mn(mas);
939 type = mte_node_type(mas->node);
940 pivots = ma_pivots(node, type);
941 slots = ma_slots(node, type);
942
943 if (mas->offset)
944 mas->min = pivots[mas->offset - 1] + 1;
945 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type);
946 mas->node = mas_slot(mas, slots, mas->offset);
947 }
948
949 /*
950 * mas_ascend() - Walk up a level of the tree.
951 * @mas: The maple state
952 *
953 * Sets the @mas->max and @mas->min for the parent node of mas->node. This
954 * may cause several levels of walking up to find the correct min and max.
955 * May find a dead node which will cause a premature return.
956 * Return: 1 on dead node, 0 otherwise
957 */
mas_ascend(struct ma_state * mas)958 static int mas_ascend(struct ma_state *mas)
959 {
960 struct maple_enode *p_enode; /* parent enode. */
961 struct maple_enode *a_enode; /* ancestor enode. */
962 struct maple_node *a_node; /* ancestor node. */
963 struct maple_node *p_node; /* parent node. */
964 unsigned char a_slot;
965 enum maple_type a_type;
966 unsigned long min, max;
967 unsigned long *pivots;
968 bool set_max = false, set_min = false;
969
970 a_node = mas_mn(mas);
971 if (ma_is_root(a_node)) {
972 mas->offset = 0;
973 return 0;
974 }
975
976 p_node = mte_parent(mas->node);
977 if (unlikely(a_node == p_node))
978 return 1;
979
980 a_type = mas_parent_type(mas, mas->node);
981 mas->offset = mte_parent_slot(mas->node);
982 a_enode = mt_mk_node(p_node, a_type);
983
984 /* Check to make sure all parent information is still accurate */
985 if (p_node != mte_parent(mas->node))
986 return 1;
987
988 mas->node = a_enode;
989
990 if (mte_is_root(a_enode)) {
991 mas->max = ULONG_MAX;
992 mas->min = 0;
993 return 0;
994 }
995
996 min = 0;
997 max = ULONG_MAX;
998
999 /*
1000 * !mas->offset implies that parent node min == mas->min.
1001 * mas->offset > 0 implies that we need to walk up to find the
1002 * implied pivot min.
1003 */
1004 if (!mas->offset) {
1005 min = mas->min;
1006 set_min = true;
1007 }
1008
1009 if (mas->max == ULONG_MAX)
1010 set_max = true;
1011
1012 do {
1013 p_enode = a_enode;
1014 a_type = mas_parent_type(mas, p_enode);
1015 a_node = mte_parent(p_enode);
1016 a_slot = mte_parent_slot(p_enode);
1017 a_enode = mt_mk_node(a_node, a_type);
1018 pivots = ma_pivots(a_node, a_type);
1019
1020 if (unlikely(ma_dead_node(a_node)))
1021 return 1;
1022
1023 if (!set_min && a_slot) {
1024 set_min = true;
1025 min = pivots[a_slot - 1] + 1;
1026 }
1027
1028 if (!set_max && a_slot < mt_pivots[a_type]) {
1029 set_max = true;
1030 max = pivots[a_slot];
1031 }
1032
1033 if (unlikely(ma_dead_node(a_node)))
1034 return 1;
1035
1036 if (unlikely(ma_is_root(a_node)))
1037 break;
1038
1039 } while (!set_min || !set_max);
1040
1041 mas->max = max;
1042 mas->min = min;
1043 return 0;
1044 }
1045
1046 /*
1047 * mas_pop_node() - Get a previously allocated maple node from the maple state.
1048 * @mas: The maple state
1049 *
1050 * Return: A pointer to a maple node.
1051 */
mas_pop_node(struct ma_state * mas)1052 static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas)
1053 {
1054 struct maple_node *ret;
1055
1056 if (mas->alloc) {
1057 ret = mas->alloc;
1058 mas->alloc = NULL;
1059 goto out;
1060 }
1061
1062 if (WARN_ON_ONCE(!mas->sheaf))
1063 return NULL;
1064
1065 ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf);
1066
1067 out:
1068 memset(ret, 0, sizeof(*ret));
1069 return ret;
1070 }
1071
1072 /*
1073 * mas_alloc_nodes() - Allocate nodes into a maple state
1074 * @mas: The maple state
1075 * @gfp: The GFP Flags
1076 */
mas_alloc_nodes(struct ma_state * mas,gfp_t gfp)1077 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
1078 {
1079 if (!mas->node_request)
1080 return;
1081
1082 if (mas->node_request == 1) {
1083 if (mas->sheaf)
1084 goto use_sheaf;
1085
1086 if (mas->alloc)
1087 return;
1088
1089 mas->alloc = mt_alloc_one(gfp);
1090 if (!mas->alloc)
1091 goto error;
1092
1093 mas->node_request = 0;
1094 return;
1095 }
1096
1097 use_sheaf:
1098 if (unlikely(mas->alloc)) {
1099 kfree(mas->alloc);
1100 mas->alloc = NULL;
1101 }
1102
1103 if (mas->sheaf) {
1104 unsigned long refill;
1105
1106 refill = mas->node_request;
1107 if (kmem_cache_sheaf_size(mas->sheaf) >= refill) {
1108 mas->node_request = 0;
1109 return;
1110 }
1111
1112 if (mt_refill_sheaf(gfp, &mas->sheaf, refill))
1113 goto error;
1114
1115 mas->node_request = 0;
1116 return;
1117 }
1118
1119 mas->sheaf = mt_get_sheaf(gfp, mas->node_request);
1120 if (likely(mas->sheaf)) {
1121 mas->node_request = 0;
1122 return;
1123 }
1124
1125 error:
1126 mas_set_err(mas, -ENOMEM);
1127 }
1128
mas_empty_nodes(struct ma_state * mas)1129 static inline void mas_empty_nodes(struct ma_state *mas)
1130 {
1131 mas->node_request = 0;
1132 if (mas->sheaf) {
1133 mt_return_sheaf(mas->sheaf);
1134 mas->sheaf = NULL;
1135 }
1136
1137 if (mas->alloc) {
1138 kfree(mas->alloc);
1139 mas->alloc = NULL;
1140 }
1141 }
1142
1143 /*
1144 * mas_free() - Free an encoded maple node
1145 * @mas: The maple state
1146 * @used: The encoded maple node to free.
1147 *
1148 * Uses rcu free if necessary, pushes @used back on the maple state allocations
1149 * otherwise.
1150 */
mas_free(struct ma_state * mas,struct maple_enode * used)1151 static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
1152 {
1153 ma_free_rcu(mte_to_node(used));
1154 }
1155
1156 /*
1157 * mas_start() - Sets up maple state for operations.
1158 * @mas: The maple state.
1159 *
1160 * If mas->status == ma_start, then set the min, max and depth to
1161 * defaults.
1162 *
1163 * Return:
1164 * - If mas->node is an error or not mas_start, return NULL.
1165 * - If it's an empty tree: NULL & mas->status == ma_none
1166 * - If it's a single entry: The entry & mas->status == ma_root
1167 * - If it's a tree: NULL & mas->status == ma_active
1168 */
mas_start(struct ma_state * mas)1169 static inline struct maple_enode *mas_start(struct ma_state *mas)
1170 {
1171 if (likely(mas_is_start(mas))) {
1172 struct maple_enode *root;
1173
1174 mas->min = 0;
1175 mas->max = ULONG_MAX;
1176
1177 retry:
1178 mas->depth = 0;
1179 root = mas_root(mas);
1180 /* Tree with nodes */
1181 if (likely(xa_is_node(root))) {
1182 mas->depth = 0;
1183 mas->status = ma_active;
1184 mas->node = mte_safe_root(root);
1185 mas->offset = 0;
1186 if (mte_dead_node(mas->node))
1187 goto retry;
1188
1189 return NULL;
1190 }
1191
1192 mas->node = NULL;
1193 /* empty tree */
1194 if (unlikely(!root)) {
1195 mas->status = ma_none;
1196 mas->offset = MAPLE_NODE_SLOTS;
1197 return NULL;
1198 }
1199
1200 /* Single entry tree */
1201 mas->status = ma_root;
1202 mas->offset = MAPLE_NODE_SLOTS;
1203
1204 /* Single entry tree. */
1205 if (mas->index > 0)
1206 return NULL;
1207
1208 return root;
1209 }
1210
1211 return NULL;
1212 }
1213
1214 /*
1215 * ma_data_end() - Find the end of the data in a node.
1216 * @node: The maple node
1217 * @type: The maple node type
1218 * @pivots: The array of pivots in the node
1219 * @max: The maximum value in the node
1220 *
1221 * Uses metadata to find the end of the data when possible.
1222 * Return: The zero indexed last slot with data (may be null).
1223 */
ma_data_end(struct maple_node * node,enum maple_type type,unsigned long * pivots,unsigned long max)1224 static __always_inline unsigned char ma_data_end(struct maple_node *node,
1225 enum maple_type type, unsigned long *pivots, unsigned long max)
1226 {
1227 unsigned char offset;
1228
1229 if (!pivots)
1230 return 0;
1231
1232 if (type == maple_arange_64)
1233 return ma_meta_end(node, type);
1234
1235 offset = mt_pivots[type] - 1;
1236 if (likely(!pivots[offset]))
1237 return ma_meta_end(node, type);
1238
1239 if (likely(pivots[offset] == max))
1240 return offset;
1241
1242 return mt_pivots[type];
1243 }
1244
1245 /*
1246 * mas_data_end() - Find the end of the data (slot).
1247 * @mas: the maple state
1248 *
1249 * This method is optimized to check the metadata of a node if the node type
1250 * supports data end metadata.
1251 *
1252 * Return: The zero indexed last slot with data (may be null).
1253 */
mas_data_end(struct ma_state * mas)1254 static inline unsigned char mas_data_end(struct ma_state *mas)
1255 {
1256 enum maple_type type;
1257 struct maple_node *node;
1258 unsigned char offset;
1259 unsigned long *pivots;
1260
1261 type = mte_node_type(mas->node);
1262 node = mas_mn(mas);
1263 if (type == maple_arange_64)
1264 return ma_meta_end(node, type);
1265
1266 pivots = ma_pivots(node, type);
1267 if (unlikely(ma_dead_node(node)))
1268 return 0;
1269
1270 offset = mt_pivots[type] - 1;
1271 if (likely(!pivots[offset]))
1272 return ma_meta_end(node, type);
1273
1274 if (likely(pivots[offset] == mas->max))
1275 return offset;
1276
1277 return mt_pivots[type];
1278 }
1279
1280 static inline
wr_mas_setup(struct ma_wr_state * wr_mas,struct ma_state * mas)1281 void wr_mas_setup(struct ma_wr_state *wr_mas, struct ma_state *mas)
1282 {
1283 wr_mas->node = mas_mn(mas);
1284 wr_mas->type = mte_node_type(mas->node);
1285 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
1286 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
1287 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, mas->offset);
1288 wr_mas->r_max = mas_safe_pivot(mas, wr_mas->pivots, mas->offset,
1289 wr_mas->type);
1290 }
1291
1292 static inline
wr_mas_ascend(struct ma_wr_state * wr_mas)1293 void wr_mas_ascend(struct ma_wr_state *wr_mas)
1294 {
1295 struct ma_state *mas = wr_mas->mas;
1296
1297 mas_ascend(mas);
1298 wr_mas_setup(wr_mas, mas);
1299 mas->end = ma_data_end(wr_mas->node, wr_mas->type, wr_mas->pivots,
1300 mas->max);
1301 /* Careful, this may be wrong.. */
1302 wr_mas->end_piv = wr_mas->r_max;
1303 wr_mas->offset_end = mas->offset;
1304 }
1305
ma_leaf_max_gap(struct maple_node * mn,enum maple_type mt,unsigned long min,unsigned long max,unsigned long * pivots,void __rcu ** slots)1306 static inline unsigned long ma_leaf_max_gap(struct maple_node *mn,
1307 enum maple_type mt, unsigned long min, unsigned long max,
1308 unsigned long *pivots, void __rcu **slots)
1309 {
1310 unsigned long pstart, gap, max_gap;
1311 unsigned char i;
1312 unsigned char max_piv;
1313
1314 max_gap = 0;
1315 if (unlikely(ma_is_dense(mt))) {
1316 gap = 0;
1317 for (i = 0; i < mt_slots[mt]; i++) {
1318 if (slots[i]) {
1319 if (gap > max_gap)
1320 max_gap = gap;
1321 gap = 0;
1322 } else {
1323 gap++;
1324 }
1325 }
1326 if (gap > max_gap)
1327 max_gap = gap;
1328 return max_gap;
1329 }
1330
1331 /*
1332 * Check the first implied pivot optimizes the loop below and slot 1 may
1333 * be skipped if there is a gap in slot 0.
1334 */
1335 if (likely(!slots[0])) {
1336 max_gap = pivots[0] - min + 1;
1337 i = 2;
1338 } else {
1339 i = 1;
1340 }
1341
1342 /* reduce max_piv as the special case is checked before the loop */
1343 max_piv = ma_data_end(mn, mt, pivots, max) - 1;
1344 /*
1345 * Check end implied pivot which can only be a gap on the right most
1346 * node.
1347 */
1348 if (unlikely(max == ULONG_MAX) && !slots[max_piv + 1]) {
1349 gap = ULONG_MAX - pivots[max_piv];
1350 if (gap > max_gap)
1351 max_gap = gap;
1352
1353 if (max_gap > pivots[max_piv] - min)
1354 return max_gap;
1355 }
1356
1357 for (; i <= max_piv; i++) {
1358 /* data == no gap. */
1359 if (likely(slots[i]))
1360 continue;
1361
1362 pstart = pivots[i - 1];
1363 gap = pivots[i] - pstart;
1364 if (gap > max_gap)
1365 max_gap = gap;
1366
1367 /* There cannot be two gaps in a row. */
1368 i++;
1369 }
1370 return max_gap;
1371 }
1372
1373 /*
1374 * mas_leaf_max_gap() - Returns the largest gap in a leaf node
1375 * @mas: the maple state
1376 *
1377 * Return: The maximum gap in the leaf.
1378 */
mas_leaf_max_gap(struct ma_state * mas)1379 static inline unsigned long mas_leaf_max_gap(struct ma_state *mas)
1380 {
1381 enum maple_type mt;
1382 struct maple_node *mn;
1383 unsigned long *pivots;
1384 void __rcu **slots;
1385
1386 mn = mas_mn(mas);
1387 mt = mte_node_type(mas->node);
1388 slots = ma_slots(mn, mt);
1389 pivots = ma_pivots(mn, mt);
1390
1391 return ma_leaf_max_gap(mn, mt, mas->min, mas->max, pivots, slots);
1392 }
1393
1394 /*
1395 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
1396 * @node: The maple node
1397 * @gaps: The pointer to the gaps
1398 * @mt: The maple node type
1399 * @off: Pointer to store the offset location of the gap.
1400 *
1401 * Uses the metadata data end to scan backwards across set gaps.
1402 *
1403 * Return: The maximum gap value
1404 */
1405 static inline unsigned long
ma_max_gap(struct maple_node * node,unsigned long * gaps,enum maple_type mt,unsigned char * off)1406 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
1407 unsigned char *off)
1408 {
1409 unsigned char offset, i;
1410 unsigned long max_gap = 0;
1411
1412 i = offset = ma_meta_end(node, mt);
1413 do {
1414 if (gaps[i] > max_gap) {
1415 max_gap = gaps[i];
1416 offset = i;
1417 }
1418 } while (i--);
1419
1420 *off = offset;
1421 return max_gap;
1422 }
1423
1424 /*
1425 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
1426 * @mas: The maple state.
1427 *
1428 * Return: The gap value.
1429 */
mas_max_gap(struct ma_state * mas)1430 static inline unsigned long mas_max_gap(struct ma_state *mas)
1431 {
1432 unsigned long *gaps;
1433 unsigned char offset;
1434 enum maple_type mt;
1435 struct maple_node *node;
1436
1437 mt = mte_node_type(mas->node);
1438 if (ma_is_leaf(mt))
1439 return mas_leaf_max_gap(mas);
1440
1441 node = mas_mn(mas);
1442 MAS_BUG_ON(mas, mt != maple_arange_64);
1443 offset = ma_meta_gap(node);
1444 gaps = ma_gaps(node, mt);
1445 return gaps[offset];
1446 }
1447
1448 /*
1449 * mas_parent_gap() - Set the parent gap and any gaps above, as needed
1450 * @mas: The maple state
1451 * @offset: The gap offset in the parent to set
1452 * @new: The new gap value.
1453 *
1454 * Set the parent gap then continue to set the gap upwards, using the metadata
1455 * of the parent to see if it is necessary to check the node above.
1456 */
mas_parent_gap(struct ma_state * mas,unsigned char offset,unsigned long new)1457 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
1458 unsigned long new)
1459 {
1460 unsigned long meta_gap = 0;
1461 struct maple_node *pnode;
1462 struct maple_enode *penode;
1463 unsigned long *pgaps;
1464 unsigned char meta_offset;
1465 enum maple_type pmt;
1466
1467 pnode = mte_parent(mas->node);
1468 pmt = mas_parent_type(mas, mas->node);
1469 penode = mt_mk_node(pnode, pmt);
1470 pgaps = ma_gaps(pnode, pmt);
1471
1472 ascend:
1473 MAS_BUG_ON(mas, pmt != maple_arange_64);
1474 meta_offset = ma_meta_gap(pnode);
1475 meta_gap = pgaps[meta_offset];
1476
1477 pgaps[offset] = new;
1478
1479 if (meta_gap == new)
1480 return;
1481
1482 if (offset != meta_offset) {
1483 if (meta_gap > new)
1484 return;
1485
1486 ma_set_meta_gap(pnode, pmt, offset);
1487 } else if (new < meta_gap) {
1488 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
1489 ma_set_meta_gap(pnode, pmt, meta_offset);
1490 }
1491
1492 if (ma_is_root(pnode))
1493 return;
1494
1495 /* Go to the parent node. */
1496 pnode = mte_parent(penode);
1497 pmt = mas_parent_type(mas, penode);
1498 pgaps = ma_gaps(pnode, pmt);
1499 offset = mte_parent_slot(penode);
1500 penode = mt_mk_node(pnode, pmt);
1501 goto ascend;
1502 }
1503
1504 /*
1505 * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
1506 * @mas: the maple state.
1507 */
mas_update_gap(struct ma_state * mas)1508 static inline void mas_update_gap(struct ma_state *mas)
1509 {
1510 unsigned char pslot;
1511 unsigned long p_gap;
1512 unsigned long max_gap;
1513
1514 if (!mt_is_alloc(mas->tree))
1515 return;
1516
1517 if (mte_is_root(mas->node))
1518 return;
1519
1520 max_gap = mas_max_gap(mas);
1521
1522 pslot = mte_parent_slot(mas->node);
1523 p_gap = ma_gaps(mte_parent(mas->node),
1524 mas_parent_type(mas, mas->node))[pslot];
1525
1526 if (p_gap != max_gap)
1527 mas_parent_gap(mas, pslot, max_gap);
1528 }
1529
1530 /*
1531 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
1532 * @parent with the slot encoded.
1533 * @mas: the maple state (for the tree)
1534 * @parent: the maple encoded node containing the children.
1535 */
mas_adopt_children(struct ma_state * mas,struct maple_enode * parent)1536 static inline void mas_adopt_children(struct ma_state *mas,
1537 struct maple_enode *parent)
1538 {
1539 enum maple_type type = mte_node_type(parent);
1540 struct maple_node *node = mte_to_node(parent);
1541 void __rcu **slots = ma_slots(node, type);
1542 unsigned long *pivots = ma_pivots(node, type);
1543 struct maple_enode *child;
1544 unsigned char offset;
1545
1546 offset = ma_data_end(node, type, pivots, mas->max);
1547 do {
1548 child = mas_slot_locked(mas, slots, offset);
1549 mas_set_parent(mas, child, parent, offset);
1550 } while (offset--);
1551 }
1552
1553 /*
1554 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old
1555 * node as dead.
1556 * @mas: the maple state with the new node
1557 * @old_enode: The old maple encoded node to replace.
1558 * @new_height: if we are inserting a root node, update the height of the tree
1559 */
mas_put_in_tree(struct ma_state * mas,struct maple_enode * old_enode,char new_height)1560 static inline void mas_put_in_tree(struct ma_state *mas,
1561 struct maple_enode *old_enode, char new_height)
1562 __must_hold(mas->tree->ma_lock)
1563 {
1564 unsigned char offset;
1565 void __rcu **slots;
1566
1567 if (mte_is_root(mas->node)) {
1568 mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas));
1569 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
1570 mt_set_height(mas->tree, new_height);
1571 } else {
1572
1573 offset = mte_parent_slot(mas->node);
1574 slots = ma_slots(mte_parent(mas->node),
1575 mas_parent_type(mas, mas->node));
1576 rcu_assign_pointer(slots[offset], mas->node);
1577 }
1578
1579 mte_set_node_dead(old_enode);
1580 }
1581
1582 /*
1583 * mas_replace_node() - Replace a node by putting it in the tree, marking it
1584 * dead, and freeing it.
1585 * the parent encoding to locate the maple node in the tree.
1586 * @mas: the ma_state with @mas->node pointing to the new node.
1587 * @old_enode: The old maple encoded node.
1588 * @new_height: The new height of the tree as a result of the operation
1589 */
mas_replace_node(struct ma_state * mas,struct maple_enode * old_enode,unsigned char new_height)1590 static inline void mas_replace_node(struct ma_state *mas,
1591 struct maple_enode *old_enode, unsigned char new_height)
1592 __must_hold(mas->tree->ma_lock)
1593 {
1594 mas_put_in_tree(mas, old_enode, new_height);
1595 mas_free(mas, old_enode);
1596 }
1597
1598 /*
1599 * mas_find_child() - Find a child who has the parent @mas->node.
1600 * @mas: the maple state with the parent.
1601 * @child: the maple state to store the child.
1602 */
mas_find_child(struct ma_state * mas,struct ma_state * child)1603 static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child)
1604 __must_hold(mas->tree->ma_lock)
1605 {
1606 enum maple_type mt;
1607 unsigned char offset;
1608 unsigned char end;
1609 unsigned long *pivots;
1610 struct maple_enode *entry;
1611 struct maple_node *node;
1612 void __rcu **slots;
1613
1614 mt = mte_node_type(mas->node);
1615 node = mas_mn(mas);
1616 slots = ma_slots(node, mt);
1617 pivots = ma_pivots(node, mt);
1618 end = ma_data_end(node, mt, pivots, mas->max);
1619 for (offset = mas->offset; offset <= end; offset++) {
1620 entry = mas_slot_locked(mas, slots, offset);
1621 if (mte_parent(entry) == node) {
1622 *child = *mas;
1623 mas->offset = offset + 1;
1624 child->offset = offset;
1625 mas_descend(child);
1626 child->offset = 0;
1627 return true;
1628 }
1629 }
1630 return false;
1631 }
1632
1633 /*
1634 * mas_leaf_set_meta() - Set the metadata of a leaf if possible.
1635 * @node: The maple node
1636 * @mt: The maple type
1637 * @end: The node end
1638 */
mas_leaf_set_meta(struct maple_node * node,enum maple_type mt,unsigned char end)1639 static inline void mas_leaf_set_meta(struct maple_node *node,
1640 enum maple_type mt, unsigned char end)
1641 {
1642 if (end < mt_slots[mt] - 1)
1643 ma_set_meta(node, mt, 0, end);
1644 }
1645
1646 /*
1647 * mas_prev_sibling() - Find the previous node with the same parent.
1648 * @mas: the maple state
1649 *
1650 * Return: True if there is a previous sibling, false otherwise.
1651 */
mas_prev_sibling(struct ma_state * mas)1652 static inline bool mas_prev_sibling(struct ma_state *mas)
1653 {
1654 unsigned int p_slot = mte_parent_slot(mas->node);
1655
1656 /* For root node, p_slot is set to 0 by mte_parent_slot(). */
1657 if (!p_slot)
1658 return false;
1659
1660 mas_ascend(mas);
1661 mas->offset = p_slot - 1;
1662 mas_descend(mas);
1663 return true;
1664 }
1665
1666 /*
1667 * mas_next_sibling() - Find the next node with the same parent.
1668 * @mas: the maple state
1669 *
1670 * Return: true if there is a next sibling, false otherwise.
1671 */
mas_next_sibling(struct ma_state * mas)1672 static inline bool mas_next_sibling(struct ma_state *mas)
1673 {
1674 MA_STATE(parent, mas->tree, mas->index, mas->last);
1675
1676 if (mte_is_root(mas->node))
1677 return false;
1678
1679 parent = *mas;
1680 mas_ascend(&parent);
1681 parent.offset = mte_parent_slot(mas->node) + 1;
1682 if (parent.offset > mas_data_end(&parent))
1683 return false;
1684
1685 *mas = parent;
1686 mas_descend(mas);
1687 return true;
1688 }
1689
1690 /*
1691 * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
1692 * If @mas->index cannot be found within the containing
1693 * node, we traverse to the last entry in the node.
1694 * @wr_mas: The maple write state
1695 *
1696 * Uses mas_slot_locked() and does not need to worry about dead nodes.
1697 */
mas_wr_node_walk(struct ma_wr_state * wr_mas)1698 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
1699 {
1700 struct ma_state *mas = wr_mas->mas;
1701 unsigned char count, offset;
1702
1703 if (unlikely(ma_is_dense(wr_mas->type))) {
1704 wr_mas->r_max = wr_mas->r_min = mas->index;
1705 mas->offset = mas->index = mas->min;
1706 return;
1707 }
1708
1709 wr_mas->node = mas_mn(wr_mas->mas);
1710 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
1711 count = mas->end = ma_data_end(wr_mas->node, wr_mas->type,
1712 wr_mas->pivots, mas->max);
1713 offset = mas->offset;
1714
1715 while (offset < count && mas->index > wr_mas->pivots[offset])
1716 offset++;
1717
1718 wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max;
1719 wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset);
1720 wr_mas->offset_end = mas->offset = offset;
1721 }
1722
rebalance_sib(struct ma_state * parent,struct ma_state * sib)1723 static inline void rebalance_sib(struct ma_state *parent, struct ma_state *sib)
1724 {
1725 *sib = *parent;
1726 /* Prioritize move right to pull data left */
1727 if (sib->offset < sib->end)
1728 sib->offset++;
1729 else
1730 sib->offset--;
1731
1732 mas_descend(sib);
1733 sib->end = mas_data_end(sib);
1734 }
1735
1736 static inline
spanning_sib(struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas,struct ma_state * nneighbour)1737 void spanning_sib(struct ma_wr_state *l_wr_mas,
1738 struct ma_wr_state *r_wr_mas, struct ma_state *nneighbour)
1739 {
1740 struct ma_state l_tmp = *l_wr_mas->mas;
1741 struct ma_state r_tmp = *r_wr_mas->mas;
1742 unsigned char depth = 0;
1743
1744 do {
1745 mas_ascend(&r_tmp);
1746 mas_ascend(&l_tmp);
1747 depth++;
1748 if (r_tmp.offset < mas_data_end(&r_tmp)) {
1749 r_tmp.offset++;
1750 mas_descend(&r_tmp);
1751 r_tmp.offset = 0;
1752 while (--depth)
1753 mas_descend(&r_tmp);
1754
1755 r_tmp.end = mas_data_end(&r_tmp);
1756 *nneighbour = r_tmp;
1757 return;
1758 } else if (l_tmp.offset) {
1759 l_tmp.offset--;
1760 do {
1761 mas_descend(&l_tmp);
1762 l_tmp.offset = mas_data_end(&l_tmp);
1763 } while (--depth);
1764
1765 l_tmp.end = l_tmp.offset;
1766 *nneighbour = l_tmp;
1767 return;
1768 }
1769 } while (!mte_is_root(r_tmp.node));
1770
1771 WARN_ON_ONCE(1);
1772 }
1773
1774 /*
1775 * mas_topiary_node() - Dispose of a single node
1776 * @mas: The maple state for pushing nodes
1777 * @in_rcu: If the tree is in rcu mode
1778 *
1779 * The node will either be RCU freed or pushed back on the maple state.
1780 */
mas_topiary_node(struct ma_state * mas,struct ma_state * tmp_mas,bool in_rcu)1781 static inline void mas_topiary_node(struct ma_state *mas,
1782 struct ma_state *tmp_mas, bool in_rcu)
1783 {
1784 struct maple_node *tmp;
1785 struct maple_enode *enode;
1786
1787 if (mas_is_none(tmp_mas))
1788 return;
1789
1790 enode = tmp_mas->node;
1791 tmp = mte_to_node(enode);
1792 mte_set_node_dead(enode);
1793 ma_free_rcu(tmp);
1794 }
1795
1796 /*
1797 * mas_topiary_replace() - Replace the data with new data, then repair the
1798 * parent links within the new tree. Iterate over the dead sub-tree and collect
1799 * the dead subtrees and topiary the nodes that are no longer of use.
1800 *
1801 * The new tree will have up to three children with the correct parent. Keep
1802 * track of the new entries as they need to be followed to find the next level
1803 * of new entries.
1804 *
1805 * The old tree will have up to three children with the old parent. Keep track
1806 * of the old entries as they may have more nodes below replaced. Nodes within
1807 * [index, last] are dead subtrees, others need to be freed and followed.
1808 *
1809 * @mas: The maple state pointing at the new data
1810 * @old_enode: The maple encoded node being replaced
1811 * @new_height: The new height of the tree as a result of the operation
1812 *
1813 */
mas_topiary_replace(struct ma_state * mas,struct maple_enode * old_enode,unsigned char new_height)1814 static inline void mas_topiary_replace(struct ma_state *mas,
1815 struct maple_enode *old_enode, unsigned char new_height)
1816 {
1817 struct ma_state tmp[3], tmp_next[3];
1818 MA_TOPIARY(subtrees, mas->tree);
1819 bool in_rcu;
1820 int i, n;
1821
1822 /* Place data in tree & then mark node as old */
1823 mas_put_in_tree(mas, old_enode, new_height);
1824
1825 /* Update the parent pointers in the tree */
1826 tmp[0] = *mas;
1827 tmp[0].offset = 0;
1828 tmp[1].status = ma_none;
1829 tmp[2].status = ma_none;
1830 while (!mte_is_leaf(tmp[0].node)) {
1831 n = 0;
1832 for (i = 0; i < 3; i++) {
1833 if (mas_is_none(&tmp[i]))
1834 continue;
1835
1836 while (n < 3) {
1837 if (!mas_find_child(&tmp[i], &tmp_next[n]))
1838 break;
1839 n++;
1840 }
1841
1842 mas_adopt_children(&tmp[i], tmp[i].node);
1843 }
1844
1845 if (MAS_WARN_ON(mas, n == 0))
1846 break;
1847
1848 while (n < 3)
1849 tmp_next[n++].status = ma_none;
1850
1851 for (i = 0; i < 3; i++)
1852 tmp[i] = tmp_next[i];
1853 }
1854
1855 /* Collect the old nodes that need to be discarded */
1856 if (mte_is_leaf(old_enode))
1857 return mas_free(mas, old_enode);
1858
1859 tmp[0] = *mas;
1860 tmp[0].offset = 0;
1861 tmp[0].node = old_enode;
1862 tmp[1].status = ma_none;
1863 tmp[2].status = ma_none;
1864 in_rcu = mt_in_rcu(mas->tree);
1865 do {
1866 n = 0;
1867 for (i = 0; i < 3; i++) {
1868 if (mas_is_none(&tmp[i]))
1869 continue;
1870
1871 while (n < 3) {
1872 if (!mas_find_child(&tmp[i], &tmp_next[n]))
1873 break;
1874
1875 if ((tmp_next[n].min >= tmp_next->index) &&
1876 (tmp_next[n].max <= tmp_next->last)) {
1877 mat_add(&subtrees, tmp_next[n].node);
1878 tmp_next[n].status = ma_none;
1879 } else {
1880 n++;
1881 }
1882 }
1883 }
1884
1885 if (MAS_WARN_ON(mas, n == 0))
1886 break;
1887
1888 while (n < 3)
1889 tmp_next[n++].status = ma_none;
1890
1891 for (i = 0; i < 3; i++) {
1892 mas_topiary_node(mas, &tmp[i], in_rcu);
1893 tmp[i] = tmp_next[i];
1894 }
1895 } while (!mte_is_leaf(tmp[0].node));
1896
1897 for (i = 0; i < 3; i++)
1898 mas_topiary_node(mas, &tmp[i], in_rcu);
1899
1900 mas_mat_destroy(mas, &subtrees);
1901 }
1902
1903 /*
1904 * node_copy() - Copy from one node to another.
1905 *
1906 * @mas: The maple state
1907 * @src: The source node
1908 * @start: The offset into the src to start copying
1909 * @size: The size to copy (non-zero)
1910 * @s_max: The source node max
1911 * @s_mt: The source maple node type
1912 * @dst: The destination
1913 * @d_start: The start location in the destination node
1914 * @d_mt: The destination maple node type
1915 */
1916 static inline
node_copy(struct ma_state * mas,struct maple_node * src,unsigned char start,unsigned char size,unsigned long s_max,enum maple_type s_mt,struct maple_node * dst,unsigned char d_start,enum maple_type d_mt)1917 unsigned long node_copy(struct ma_state *mas, struct maple_node *src,
1918 unsigned char start, unsigned char size, unsigned long s_max,
1919 enum maple_type s_mt, struct maple_node *dst, unsigned char d_start,
1920 enum maple_type d_mt)
1921 {
1922 unsigned long *s_pivots, *d_pivots;
1923 void __rcu **s_slots, **d_slots;
1924 unsigned long *s_gaps, *d_gaps;
1925 unsigned long d_max;
1926
1927 d_slots = ma_slots(dst, d_mt) + d_start;
1928 d_pivots = ma_pivots(dst, d_mt) + d_start;
1929 s_slots = ma_slots(src, s_mt) + start;
1930 s_pivots = ma_pivots(src, s_mt) + start;
1931 memcpy(d_slots, s_slots, size * sizeof(void __rcu *));
1932 if (!ma_is_leaf(d_mt) && s_mt == maple_copy) {
1933 struct maple_enode *edst = mt_mk_node(dst, d_mt);
1934
1935
1936 for (int i = 0; i < size; i++)
1937 mas_set_parent(mas,
1938 mt_slot_locked(mas->tree, d_slots, i),
1939 edst, d_start + i);
1940 }
1941
1942 d_gaps = ma_gaps(dst, d_mt);
1943 if (d_gaps) {
1944 s_gaps = ma_gaps(src, s_mt) + start;
1945 d_gaps += d_start;
1946 memcpy(d_gaps, s_gaps, size * sizeof(unsigned long));
1947 }
1948
1949 if (start + size - 1 < mt_pivots[s_mt])
1950 d_max = s_pivots[size - 1];
1951 else
1952 d_max = s_max;
1953
1954 if (d_start + size <= mt_pivots[d_mt])
1955 d_pivots[size - 1] = d_max;
1956
1957 size--;
1958 if (size)
1959 memcpy(d_pivots, s_pivots, size * sizeof(unsigned long));
1960
1961 return d_max;
1962 }
1963
1964 /*
1965 * node_finalise() - Zero out unused area and populate metadata
1966 * @node: The maple node
1967 * @mt: The maple node type
1968 * @end: The end of the used area
1969 */
1970 static inline
node_finalise(struct maple_node * node,enum maple_type mt,unsigned char end)1971 void node_finalise(struct maple_node *node, enum maple_type mt,
1972 unsigned char end)
1973 {
1974 unsigned char max_end = mt_slots[mt];
1975 unsigned char size;
1976 unsigned long *gaps;
1977 unsigned char gap_slot;
1978
1979 gaps = ma_gaps(node, mt);
1980 if (end < max_end - 1) {
1981 size = max_end - end;
1982 memset(ma_slots(node, mt) + end, 0, size * sizeof(void *));
1983
1984 if (gaps)
1985 memset(gaps + end, 0, size * sizeof(unsigned long));
1986
1987 if (--size)
1988 memset(ma_pivots(node, mt) + end, 0, size * sizeof(unsigned long));
1989 }
1990
1991 gap_slot = 0;
1992 if (gaps && !ma_is_leaf(mt)) {
1993 unsigned long max_gap;
1994
1995 max_gap = 0;
1996 for (int i = 0; i <= end; i++)
1997 if (gaps[i] > max_gap) {
1998 gap_slot = i;
1999 max_gap = gaps[i];
2000 }
2001 }
2002
2003 if (mt == maple_arange_64)
2004 ma_set_meta(node, mt, gap_slot, end - 1);
2005 else if (end <= max_end - 1)
2006 ma_set_meta(node, mt, gap_slot, end - 1);
2007 }
2008
mtree_range_walk(struct ma_state * mas)2009 static inline void *mtree_range_walk(struct ma_state *mas)
2010 {
2011 unsigned long *pivots;
2012 unsigned char offset;
2013 struct maple_node *node;
2014 struct maple_enode *next, *last;
2015 enum maple_type type;
2016 void __rcu **slots;
2017 unsigned char end;
2018 unsigned long max, min;
2019 unsigned long prev_max, prev_min;
2020
2021 next = mas->node;
2022 min = mas->min;
2023 max = mas->max;
2024 do {
2025 last = next;
2026 node = mte_to_node(next);
2027 type = mte_node_type(next);
2028 pivots = ma_pivots(node, type);
2029 end = ma_data_end(node, type, pivots, max);
2030 prev_min = min;
2031 prev_max = max;
2032 if (pivots[0] >= mas->index) {
2033 offset = 0;
2034 max = pivots[0];
2035 goto next;
2036 }
2037
2038 offset = 1;
2039 while (offset < end) {
2040 if (pivots[offset] >= mas->index) {
2041 max = pivots[offset];
2042 break;
2043 }
2044 offset++;
2045 }
2046
2047 min = pivots[offset - 1] + 1;
2048 next:
2049 slots = ma_slots(node, type);
2050 next = mt_slot(mas->tree, slots, offset);
2051 if (unlikely(ma_dead_node(node)))
2052 goto dead_node;
2053 } while (!ma_is_leaf(type));
2054
2055 mas->end = end;
2056 mas->offset = offset;
2057 mas->index = min;
2058 mas->last = max;
2059 mas->min = prev_min;
2060 mas->max = prev_max;
2061 mas->node = last;
2062 return (void *)next;
2063
2064 dead_node:
2065 mas_reset(mas);
2066 return NULL;
2067 }
2068
2069 /*
2070 * mas_wmb_replace() - Write memory barrier and replace
2071 * @mas: The maple state
2072 * @cp: The maple copy node
2073 *
2074 * Updates gap as necessary.
2075 */
mas_wmb_replace(struct ma_state * mas,struct maple_copy * cp)2076 static inline void mas_wmb_replace(struct ma_state *mas, struct maple_copy *cp)
2077 {
2078 struct maple_enode *old_enode;
2079
2080 old_enode = mas->node;
2081 mas->node = mt_slot_locked(mas->tree, cp->slot, 0);
2082 /* Insert the new data in the tree */
2083 mas_topiary_replace(mas, old_enode, cp->height);
2084 if (!mte_is_leaf(mas->node))
2085 mas_update_gap(mas);
2086
2087 mtree_range_walk(mas);
2088 }
2089
2090
2091 /*
2092 * cp_leaf_init() - Initialize a maple_copy node for the leaf level of a
2093 * spanning store
2094 * @cp: The maple copy node
2095 * @mas: The maple state
2096 * @l_wr_mas: The left write state of the spanning store
2097 * @r_wr_mas: The right write state of the spanning store
2098 */
cp_leaf_init(struct maple_copy * cp,struct ma_state * mas,struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas)2099 static inline void cp_leaf_init(struct maple_copy *cp,
2100 struct ma_state *mas, struct ma_wr_state *l_wr_mas,
2101 struct ma_wr_state *r_wr_mas)
2102 {
2103 unsigned char end = 0;
2104
2105 /*
2106 * WARNING: The use of RCU_INIT_POINTER() makes it extremely important
2107 * to not expose the maple_copy node to any readers. Exposure may
2108 * result in buggy code when a compiler reorders the instructions.
2109 */
2110
2111 cp->height = 1;
2112 /* Create entries to insert including split entries to left and right */
2113 if (l_wr_mas->r_min < mas->index) {
2114 end++;
2115 RCU_INIT_POINTER(cp->slot[0], l_wr_mas->content);
2116 cp->pivot[0] = mas->index - 1;
2117 }
2118 RCU_INIT_POINTER(cp->slot[end], l_wr_mas->entry);
2119 cp->pivot[end] = mas->last;
2120
2121 if (r_wr_mas->end_piv > mas->last) {
2122 end++;
2123 RCU_INIT_POINTER(cp->slot[end],
2124 r_wr_mas->slots[r_wr_mas->offset_end]);
2125 cp->pivot[end] = r_wr_mas->end_piv;
2126 }
2127
2128 cp->min = l_wr_mas->r_min;
2129 cp->max = cp->pivot[end];
2130 cp->end = end;
2131 }
2132
2133 /*
2134 * cp_data_calc() - Calculate the size of the data (1 indexed).
2135 * @cp: The maple copy struct with the new data populated.
2136 * @l_wr_mas: The maple write state containing the data to the left of the write
2137 * @r_wr_mas: The maple write state containing the data to the right of the
2138 * write
2139 *
2140 * cp->data is a size (not indexed by 0).
2141 */
cp_data_calc(struct maple_copy * cp,struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas)2142 static inline void cp_data_calc(struct maple_copy *cp,
2143 struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas)
2144 {
2145
2146 /* Add 1 every time for the 0th element */
2147 cp->data = l_wr_mas->mas->offset;
2148 /* Add the new data and any partial overwrites */
2149 cp->data += cp->end + 1;
2150 /* Data from right (offset + 1 to end), +1 for zero */
2151 cp->data += r_wr_mas->mas->end - r_wr_mas->offset_end;
2152 }
2153
data_fits(struct ma_state * sib,struct ma_state * mas,struct maple_copy * cp)2154 static bool data_fits(struct ma_state *sib, struct ma_state *mas,
2155 struct maple_copy *cp)
2156 {
2157 unsigned char new_data;
2158 enum maple_type type;
2159 unsigned char space;
2160 unsigned char end;
2161
2162 type = mte_node_type(mas->node);
2163 space = 2 * mt_slots[type];
2164 end = sib->end;
2165
2166 new_data = end + 1 + cp->data;
2167 if (new_data > space)
2168 return false;
2169
2170 /*
2171 * This is off by one by design. The extra space is left to reduce
2172 * jitter in operations that add then remove two entries.
2173 *
2174 * end is an index while new space and data are both sizes. Adding one
2175 * to end to convert the index to a size means that the below
2176 * calculation should be <=, but we want to keep an extra space in nodes
2177 * to reduce jitter.
2178 *
2179 * Note that it is still possible to get a full node on the left by the
2180 * NULL landing exactly on the split. The NULL ending of a node happens
2181 * in the dst_setup() function, where we will either increase the split
2182 * by one or decrease it by one, if possible. In the case of split
2183 * (this case), it is always possible to shift the spilt by one - again
2184 * because there is at least one slot free by the below checking.
2185 */
2186 if (new_data < space)
2187 return true;
2188
2189 return false;
2190 }
2191
push_data_sib(struct maple_copy * cp,struct ma_state * mas,struct ma_state * sib,struct ma_state * parent)2192 static inline void push_data_sib(struct maple_copy *cp, struct ma_state *mas,
2193 struct ma_state *sib, struct ma_state *parent)
2194 {
2195
2196 if (mte_is_root(mas->node))
2197 goto no_push;
2198
2199
2200 *sib = *parent;
2201 if (sib->offset) {
2202 sib->offset--;
2203 mas_descend(sib);
2204 sib->end = mas_data_end(sib);
2205 if (data_fits(sib, mas, cp)) /* Push left */
2206 return;
2207
2208 *sib = *parent;
2209 }
2210
2211 if (sib->offset >= sib->end)
2212 goto no_push;
2213
2214 sib->offset++;
2215 mas_descend(sib);
2216 sib->end = mas_data_end(sib);
2217 if (data_fits(sib, mas, cp)) /* Push right*/
2218 return;
2219
2220 no_push:
2221 sib->end = 0;
2222 }
2223
2224 /*
2225 * rebalance_data() - Calculate the @cp data, populate @sib if insufficient or
2226 * if the data can be pushed into a sibling.
2227 * @cp: The maple copy node
2228 * @wr_mas: The left write maple state
2229 * @sib: The maple state of the sibling.
2230 *
2231 * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to
2232 * indicate it will not be used.
2233 *
2234 */
rebalance_data(struct maple_copy * cp,struct ma_wr_state * wr_mas,struct ma_state * sib,struct ma_state * parent)2235 static inline void rebalance_data(struct maple_copy *cp,
2236 struct ma_wr_state *wr_mas, struct ma_state *sib,
2237 struct ma_state *parent)
2238 {
2239 cp_data_calc(cp, wr_mas, wr_mas);
2240 sib->end = 0;
2241 if (cp->data > mt_slots[wr_mas->type]) {
2242 push_data_sib(cp, wr_mas->mas, sib, parent);
2243 if (sib->end)
2244 goto use_sib;
2245 } else if (cp->data <= mt_min_slots[wr_mas->type]) {
2246 if ((wr_mas->mas->min != 0) ||
2247 (wr_mas->mas->max != ULONG_MAX)) {
2248 rebalance_sib(parent, sib);
2249 goto use_sib;
2250 }
2251 }
2252
2253 return;
2254
2255 use_sib:
2256
2257 cp->data += sib->end + 1;
2258 }
2259
2260 /*
2261 * spanning_data() - Calculate the @cp data and populate @sib if insufficient
2262 * @cp: The maple copy node
2263 * @l_wr_mas: The left write maple state
2264 * @r_wr_mas: The right write maple state
2265 * @sib: The maple state of the sibling.
2266 *
2267 * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to
2268 * indicate it will not be used.
2269 */
spanning_data(struct maple_copy * cp,struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas,struct ma_state * sib)2270 static inline void spanning_data(struct maple_copy *cp,
2271 struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas,
2272 struct ma_state *sib)
2273 {
2274 cp_data_calc(cp, l_wr_mas, r_wr_mas);
2275 if (((l_wr_mas->mas->min != 0) || (r_wr_mas->mas->max != ULONG_MAX)) &&
2276 (cp->data <= mt_min_slots[l_wr_mas->type])) {
2277 spanning_sib(l_wr_mas, r_wr_mas, sib);
2278 cp->data += sib->end + 1;
2279 } else {
2280 sib->end = 0;
2281 }
2282 }
2283
2284 /*
2285 * dst_setup() - Set up one or more destinations for the new data.
2286 * @cp: The maple copy node
2287 * @mas: The maple state
2288 * @mt: The source node type
2289 */
2290 static inline
dst_setup(struct maple_copy * cp,struct ma_state * mas,enum maple_type mt)2291 void dst_setup(struct maple_copy *cp, struct ma_state *mas, enum maple_type mt)
2292 {
2293 /* Data is 1 indexed, every src has +1 added. */
2294
2295 if (cp->data <= mt_slots[mt]) {
2296 cp->split = cp->data - 1;
2297 cp->d_count = 1;
2298 goto node_setup;
2299 }
2300
2301 cp->split = (cp->data - 1) / 2;
2302 cp->d_count = 2;
2303 if (cp->data < mt_slots[mt] * 2)
2304 goto node_setup;
2305
2306 if (cp->data == mt_slots[mt] * 2) {
2307 unsigned char off;
2308 unsigned char s;
2309
2310 if (!ma_is_leaf(mt))
2311 goto node_setup;
2312
2313 /*
2314 * Leaf nodes are a bit tricky because we cannot assume the data
2315 * can fit due to the NULL limitation on node ends.
2316 */
2317 off = cp->split;
2318 for (s = 0; s < cp->s_count; s++) {
2319 unsigned char s_off;
2320
2321 s_off = cp->src[s].end - cp->src[s].start;
2322 if (s_off >= off)
2323 break;
2324
2325 s_off++;
2326 off -= s_off;
2327 }
2328
2329 off += cp->src[s].start;
2330 if (ma_slots(cp->src[s].node, cp->src[s].mt)[off])
2331 goto node_setup;
2332
2333 cp->split++;
2334 if (cp->split < mt_slots[mt])
2335 goto node_setup;
2336
2337 cp->split -= 2;
2338 if (cp->data - 2 - cp->split < mt_slots[mt])
2339 goto node_setup;
2340
2341 }
2342
2343 /* No other choice but to 3-way split the data */
2344 cp->split = (cp->data + 2) / 3;
2345 cp->d_count = 3;
2346
2347 node_setup:
2348 for (int i = 0; i < cp->d_count; i++) {
2349 cp->dst[i].mt = mt;
2350 cp->dst[i].node = ma_mnode_ptr(mas_pop_node(mas));
2351 }
2352 }
2353
append_mas_cp(struct maple_copy * cp,struct ma_state * mas,unsigned char start,unsigned char end)2354 static inline void append_mas_cp(struct maple_copy *cp,
2355 struct ma_state *mas, unsigned char start, unsigned char end)
2356 {
2357 struct maple_node *node;
2358 enum maple_type mt;
2359 unsigned char count;
2360
2361 count = cp->s_count;
2362 node = mas_mn(mas);
2363 mt = mte_node_type(mas->node);
2364 cp->src[count].node = node;
2365 cp->src[count].mt = mt;
2366 if (mas->end <= end)
2367 cp->src[count].max = mas->max;
2368 else
2369 cp->src[count].max = ma_pivots(node, mt)[end];
2370
2371 cp->src[count].start = start;
2372 cp->src[count].end = end;
2373 cp->s_count++;
2374 }
2375
append_wr_mas_cp(struct maple_copy * cp,struct ma_wr_state * wr_mas,unsigned char start,unsigned char end)2376 static inline void append_wr_mas_cp(struct maple_copy *cp,
2377 struct ma_wr_state *wr_mas, unsigned char start, unsigned char end)
2378 {
2379 unsigned char count;
2380
2381 count = cp->s_count;
2382 cp->src[count].node = wr_mas->node;
2383 cp->src[count].mt = wr_mas->type;
2384 if (wr_mas->mas->end <= end)
2385 cp->src[count].max = wr_mas->mas->max;
2386 else
2387 cp->src[count].max = wr_mas->pivots[end];
2388
2389 cp->src[count].start = start;
2390 cp->src[count].end = end;
2391 cp->s_count++;
2392 }
2393
init_cp_src(struct maple_copy * cp)2394 static inline void init_cp_src(struct maple_copy *cp)
2395 {
2396 cp->src[cp->s_count].node = ma_mnode_ptr(cp);
2397 cp->src[cp->s_count].mt = maple_copy;
2398 cp->src[cp->s_count].max = cp->max;
2399 cp->src[cp->s_count].start = 0;
2400 cp->src[cp->s_count].end = cp->end;
2401 cp->s_count++;
2402 }
2403
2404 /*
2405 * multi_src_setup() - Set the @cp node up with multiple sources to copy from.
2406 * @cp: The maple copy node
2407 * @l_wr_mas: The left write maple state
2408 * @r_wr_mas: The right write maple state
2409 * @sib: The sibling maple state
2410 *
2411 * Note: @sib->end == 0 indicates no sibling will be used.
2412 */
2413 static inline
multi_src_setup(struct maple_copy * cp,struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas,struct ma_state * sib)2414 void multi_src_setup(struct maple_copy *cp, struct ma_wr_state *l_wr_mas,
2415 struct ma_wr_state *r_wr_mas, struct ma_state *sib)
2416 {
2417 cp->s_count = 0;
2418 if (sib->end && sib->max < l_wr_mas->mas->min)
2419 append_mas_cp(cp, sib, 0, sib->end);
2420
2421 /* Copy left 0 - offset */
2422 if (l_wr_mas->mas->offset) {
2423 unsigned char off = l_wr_mas->mas->offset - 1;
2424
2425 append_wr_mas_cp(cp, l_wr_mas, 0, off);
2426 cp->src[cp->s_count - 1].max = cp->min - 1;
2427 }
2428
2429 init_cp_src(cp);
2430
2431 /* Copy right either from offset or offset + 1 pending on r_max */
2432 if (r_wr_mas->mas->end != r_wr_mas->offset_end)
2433 append_wr_mas_cp(cp, r_wr_mas, r_wr_mas->offset_end + 1,
2434 r_wr_mas->mas->end);
2435
2436 if (sib->end && sib->min > r_wr_mas->mas->max)
2437 append_mas_cp(cp, sib, 0, sib->end);
2438 }
2439
2440 static inline
cp_data_write(struct maple_copy * cp,struct ma_state * mas)2441 void cp_data_write(struct maple_copy *cp, struct ma_state *mas)
2442 {
2443 struct maple_node *dst, *src;
2444 unsigned char s, d;
2445 unsigned char dst_offset;
2446 unsigned char data_offset;
2447 unsigned char src_end, s_offset;
2448 unsigned char split;
2449 unsigned long s_max, d_max;
2450 unsigned char dst_size;
2451 enum maple_type s_mt, d_mt;
2452
2453 data_offset = 0;
2454 s = d = 0;
2455 /* Readability help */
2456 src = cp->src[s].node;
2457 dst = cp->dst[d].node;
2458 s_offset = cp->src[s].start;
2459 src_end = cp->src[s].end;
2460 split = cp->split;
2461 s_max = cp->src[s].max;
2462 s_mt = cp->src[s].mt;
2463 d_mt = cp->dst[d].mt;
2464 do {
2465 dst_offset = 0;
2466 d_max = 0;
2467 dst = cp->dst[d].node;
2468 d_mt = cp->dst[d].mt;
2469 dst_size = split + 1;
2470
2471 while (dst_size) {
2472 unsigned char size;
2473
2474 if (src_end - s_offset + 1 < dst_size)
2475 size = src_end - s_offset + 1;
2476 else
2477 size = dst_size;
2478
2479 d_max = node_copy(mas, src, s_offset, size, s_max, s_mt,
2480 dst, dst_offset, d_mt);
2481
2482 dst_offset += size;
2483 s_offset += size;
2484 if (s_offset > src_end) {
2485 /* This source is exhausted */
2486 s++;
2487 if (s >= cp->s_count) {
2488 cp->dst[d].max = d_max;
2489 node_finalise(dst, d_mt, dst_offset);
2490 return;
2491 }
2492 /* Reset local src */
2493 src = cp->src[s].node;
2494 s_offset = cp->src[s].start;
2495 src_end = cp->src[s].end;
2496 s_max = cp->src[s].max;
2497 s_mt = cp->src[s].mt;
2498 }
2499
2500 dst_size -= size;
2501 data_offset += size;
2502 }
2503
2504 split = cp->split;
2505 cp->dst[d].max = d_max;
2506 /* Handle null entries */
2507 if (cp->dst[d].max != ULONG_MAX &&
2508 !ma_slots(dst, d_mt)[dst_offset - 1]) {
2509 if (s_offset == cp->src[s].start) {
2510 s--;
2511 src = cp->src[s].node;
2512 src_end = cp->src[s].end;
2513 s_max = cp->src[s].max;
2514 s_mt = cp->src[s].mt;
2515 s_offset = src_end;
2516 } else {
2517 s_offset--;
2518 }
2519 /* Set dst max and clear pivot */
2520 split++;
2521 data_offset--;
2522 dst_offset--;
2523 cp->dst[d].max = ma_pivots(dst, d_mt)[dst_offset - 1];
2524 }
2525
2526 node_finalise(dst, d_mt, dst_offset);
2527 ++d; /* Next destination */
2528 if (d == cp->d_count - 1)
2529 split = cp->data - data_offset;
2530
2531 if (d >= cp->d_count) {
2532 WARN_ON(data_offset < cp->data);
2533 return;
2534 }
2535
2536 } while (data_offset <= cp->data);
2537 }
2538
2539 /*
2540 * cp_dst_to_slots() - Migrate the maple copy destination to the maple copy
2541 * slots
2542 * @cp: The maple copy node
2543 * @min: The minimal value represented
2544 * @max: The maximum value represented
2545 * @mas: The maple state
2546 */
cp_dst_to_slots(struct maple_copy * cp,unsigned long min,unsigned long max,struct ma_state * mas)2547 static inline void cp_dst_to_slots(struct maple_copy *cp, unsigned long min,
2548 unsigned long max, struct ma_state *mas)
2549 {
2550 unsigned char d;
2551 unsigned long slot_min = min;
2552
2553 for (d = 0; d < cp->d_count; d++) {
2554 struct maple_node *mn = cp->dst[d].node;
2555 enum maple_type mt = cp->dst[d].mt;
2556 unsigned long slot_max = cp->dst[d].max;
2557
2558 /*
2559 * Warning, see cp_leaf_init() comment and rcu_assign_pointer()
2560 * documentation. Since these are new nodes, there are no
2561 * read-side operations that can view them until they are
2562 * inserted into the tree after an rcu_assign_pointer() call.
2563 */
2564 ma_init_slot(&cp->slot[d], mn, mt);
2565 cp->pivot[d] = slot_max;
2566 if (mt_is_alloc(mas->tree)) {
2567 if (ma_is_leaf(mt)) {
2568 cp->gap[d] = ma_leaf_max_gap(mn, mt, slot_min,
2569 slot_max, ma_pivots(mn, mt),
2570 ma_slots(mn, mt));
2571 } else {
2572 unsigned long *gaps = ma_gaps(mn, mt);
2573
2574 if (gaps) {
2575 unsigned char gap_slot;
2576
2577 gap_slot = ma_meta_gap(mn);
2578 cp->gap[d] = gaps[gap_slot];
2579 }
2580 }
2581 }
2582 slot_min = slot_max + 1;
2583 }
2584
2585 cp->end = cp->d_count - 1;
2586 cp->min = min;
2587 cp->max = max;
2588 }
2589
cp_is_new_root(struct maple_copy * cp,struct ma_state * mas)2590 static inline bool cp_is_new_root(struct maple_copy *cp, struct ma_state *mas)
2591 {
2592 if (cp->min || cp->max != ULONG_MAX)
2593 return false;
2594
2595 if (cp->d_count != 1) {
2596 enum maple_type mt = maple_arange_64;
2597
2598 if (!mt_is_alloc(mas->tree))
2599 mt = maple_range_64;
2600
2601 cp->data = cp->d_count;
2602 cp->s_count = 0;
2603 dst_setup(cp, mas, mt);
2604 init_cp_src(cp);
2605 node_copy(mas, cp->src[0].node, 0, cp->data, cp->max, maple_copy,
2606 cp->dst[0].node, 0, mt);
2607 node_finalise(cp->dst[0].node, mt, cp->end + 1);
2608 /*
2609 * Warning, see cp_leaf_init() comment and rcu_assign_pointer()
2610 * documentation. Since this is a new root, there are no
2611 * read-side operations that can view it until it is insert into
2612 * the tree after an rcu_assign_pointer() call.
2613 */
2614 ma_init_slot(&cp->slot[0], cp->dst[0].node, mt);
2615 cp->height++;
2616 }
2617 WARN_ON_ONCE(cp->dst[0].node != mte_to_node(
2618 mt_slot_locked(mas->tree, cp->slot, 0)));
2619 cp->dst[0].node->parent = ma_parent_ptr(mas_tree_parent(mas));
2620 mas->min = 0;
2621 mas->max = ULONG_MAX;
2622 mas->depth = 0;
2623 mas->node = mas_root_locked(mas);
2624 return true;
2625 }
2626
cp_converged(struct maple_copy * cp,struct ma_state * mas,struct ma_state * sib)2627 static inline bool cp_converged(struct maple_copy *cp, struct ma_state *mas,
2628 struct ma_state *sib)
2629 {
2630 if (cp->d_count != 1 || sib->end)
2631 return false;
2632
2633 cp->dst[0].node->parent = ma_parent_ptr(mas_mn(mas)->parent);
2634 return true;
2635 }
2636
2637 /*
2638 * spanning_ascend() - See if a spanning store operation has to keep walking up
2639 * the tree
2640 * @cp: The maple_copy node
2641 * @l_wr_mas: The left maple write state
2642 * @r_wr_mas: The right maple write state
2643 * @sib: the maple state of the sibling
2644 *
2645 * Returns: True if another iteration is necessary.
2646 */
spanning_ascend(struct maple_copy * cp,struct ma_state * mas,struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas,struct ma_state * sib)2647 static bool spanning_ascend(struct maple_copy *cp, struct ma_state *mas,
2648 struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas,
2649 struct ma_state *sib)
2650 {
2651 if (sib->end) {
2652 if (sib->max < l_wr_mas->mas->min)
2653 *l_wr_mas->mas = *sib;
2654 else
2655 *r_wr_mas->mas = *sib;
2656 }
2657
2658 cp_dst_to_slots(cp, l_wr_mas->mas->min, r_wr_mas->mas->max, mas);
2659 if (cp_is_new_root(cp, mas))
2660 return false;
2661
2662 /* Converged and has a single destination */
2663 if ((cp->d_count == 1) &&
2664 (l_wr_mas->mas->node == r_wr_mas->mas->node)) {
2665 cp->dst[0].node->parent = ma_parent_ptr(mas_mn(mas)->parent);
2666 return false;
2667 }
2668
2669 cp->height++;
2670 wr_mas_ascend(l_wr_mas);
2671 wr_mas_ascend(r_wr_mas);
2672 return true;
2673 }
2674
2675 static inline
copy_tree_location(const struct ma_state * src,struct ma_state * dst)2676 void copy_tree_location(const struct ma_state *src, struct ma_state *dst)
2677 {
2678 dst->node = src->node;
2679 dst->offset = src->offset;
2680 dst->min = src->min;
2681 dst->max = src->max;
2682 dst->end = src->end;
2683 dst->depth = src->depth;
2684 }
2685
2686 /*
2687 * rebalance_ascend() - Ascend the tree and set up for the next loop - if
2688 * necessary
2689 *
2690 * Return: True if there another rebalancing operation on the next level is
2691 * needed, false otherwise.
2692 */
rebalance_ascend(struct maple_copy * cp,struct ma_wr_state * wr_mas,struct ma_state * sib,struct ma_state * parent)2693 static inline bool rebalance_ascend(struct maple_copy *cp,
2694 struct ma_wr_state *wr_mas, struct ma_state *sib,
2695 struct ma_state *parent)
2696 {
2697 struct ma_state *mas;
2698 unsigned long min, max;
2699
2700 mas = wr_mas->mas;
2701 if (!sib->end) {
2702 min = mas->min;
2703 max = mas->max;
2704 } else if (sib->min > mas->max) { /* Move right succeeded */
2705 min = mas->min;
2706 max = sib->max;
2707 wr_mas->offset_end = parent->offset + 1;
2708 } else {
2709 min = sib->min;
2710 max = mas->max;
2711 wr_mas->offset_end = parent->offset;
2712 parent->offset--;
2713 }
2714
2715 cp_dst_to_slots(cp, min, max, mas);
2716 if (cp_is_new_root(cp, mas))
2717 return false;
2718
2719 if (cp_converged(cp, mas, sib))
2720 return false;
2721
2722 cp->height++;
2723 copy_tree_location(parent, mas);
2724 wr_mas_setup(wr_mas, mas);
2725 return true;
2726 }
2727
2728 /*
2729 * mas_root_expand() - Expand a root to a node
2730 * @mas: The maple state
2731 * @entry: The entry to store into the tree
2732 */
mas_root_expand(struct ma_state * mas,void * entry)2733 static inline void mas_root_expand(struct ma_state *mas, void *entry)
2734 {
2735 void *contents = mas_root_locked(mas);
2736 enum maple_type type = maple_leaf_64;
2737 struct maple_node *node;
2738 void __rcu **slots;
2739 unsigned long *pivots;
2740 int slot = 0;
2741
2742 node = mas_pop_node(mas);
2743 pivots = ma_pivots(node, type);
2744 slots = ma_slots(node, type);
2745 node->parent = ma_parent_ptr(mas_tree_parent(mas));
2746 mas->node = mt_mk_node(node, type);
2747 mas->status = ma_active;
2748
2749 if (mas->index) {
2750 if (contents) {
2751 rcu_assign_pointer(slots[slot], contents);
2752 if (likely(mas->index > 1))
2753 slot++;
2754 }
2755 pivots[slot++] = mas->index - 1;
2756 }
2757
2758 rcu_assign_pointer(slots[slot], entry);
2759 mas->offset = slot;
2760 pivots[slot] = mas->last;
2761 if (mas->last != ULONG_MAX)
2762 pivots[++slot] = ULONG_MAX;
2763
2764 mt_set_height(mas->tree, 1);
2765 ma_set_meta(node, maple_leaf_64, 0, slot);
2766 /* swap the new root into the tree */
2767 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
2768 }
2769
2770 /*
2771 * mas_store_root() - Storing value into root.
2772 * @mas: The maple state
2773 * @entry: The entry to store.
2774 *
2775 * There is no root node now and we are storing a value into the root - this
2776 * function either assigns the pointer or expands into a node.
2777 */
mas_store_root(struct ma_state * mas,void * entry)2778 static inline void mas_store_root(struct ma_state *mas, void *entry)
2779 {
2780 if (!entry) {
2781 if (!mas->index)
2782 rcu_assign_pointer(mas->tree->ma_root, NULL);
2783 } else if (likely((mas->last != 0) || (mas->index != 0)))
2784 mas_root_expand(mas, entry);
2785 else if (((unsigned long) (entry) & 3) == 2)
2786 mas_root_expand(mas, entry);
2787 else {
2788 rcu_assign_pointer(mas->tree->ma_root, entry);
2789 mas->status = ma_start;
2790 }
2791 }
2792
2793 /*
2794 * mas_is_span_wr() - Check if the write needs to be treated as a write that
2795 * spans the node.
2796 * @wr_mas: The maple write state
2797 *
2798 * Spanning writes are writes that start in one node and end in another OR if
2799 * the write of a %NULL will cause the node to end with a %NULL.
2800 *
2801 * Return: True if this is a spanning write, false otherwise.
2802 */
mas_is_span_wr(struct ma_wr_state * wr_mas)2803 static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
2804 {
2805 unsigned long max = wr_mas->r_max;
2806 unsigned long last = wr_mas->mas->last;
2807 enum maple_type type = wr_mas->type;
2808 void *entry = wr_mas->entry;
2809
2810 /* Contained in this pivot, fast path */
2811 if (last < max)
2812 return false;
2813
2814 if (ma_is_leaf(type)) {
2815 max = wr_mas->mas->max;
2816 if (last < max)
2817 return false;
2818 }
2819
2820 if (last == max) {
2821 /*
2822 * The last entry of leaf node cannot be NULL unless it is the
2823 * rightmost node (writing ULONG_MAX), otherwise it spans slots.
2824 */
2825 if (entry || last == ULONG_MAX)
2826 return false;
2827 }
2828
2829 trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry);
2830 return true;
2831 }
2832
mas_wr_walk_descend(struct ma_wr_state * wr_mas)2833 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
2834 {
2835 wr_mas->type = mte_node_type(wr_mas->mas->node);
2836 mas_wr_node_walk(wr_mas);
2837 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
2838 }
2839
mas_wr_walk_traverse(struct ma_wr_state * wr_mas)2840 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
2841 {
2842 wr_mas->mas->max = wr_mas->r_max;
2843 wr_mas->mas->min = wr_mas->r_min;
2844 wr_mas->mas->node = wr_mas->content;
2845 wr_mas->mas->offset = 0;
2846 wr_mas->mas->depth++;
2847 }
2848 /*
2849 * mas_wr_walk() - Walk the tree for a write.
2850 * @wr_mas: The maple write state
2851 *
2852 * Uses mas_slot_locked() and does not need to worry about dead nodes.
2853 *
2854 * Return: True if it's contained in a node, false on spanning write.
2855 */
mas_wr_walk(struct ma_wr_state * wr_mas)2856 static bool mas_wr_walk(struct ma_wr_state *wr_mas)
2857 {
2858 struct ma_state *mas = wr_mas->mas;
2859
2860 while (true) {
2861 mas_wr_walk_descend(wr_mas);
2862 if (unlikely(mas_is_span_wr(wr_mas)))
2863 return false;
2864
2865 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
2866 mas->offset);
2867 if (ma_is_leaf(wr_mas->type))
2868 return true;
2869
2870 if (mas->end < mt_slots[wr_mas->type] - 1)
2871 wr_mas->vacant_height = mas->depth + 1;
2872
2873 if (ma_is_root(mas_mn(mas))) {
2874 /* root needs more than 2 entries to be sufficient + 1 */
2875 if (mas->end > 2)
2876 wr_mas->sufficient_height = 1;
2877 } else if (mas->end > mt_min_slots[wr_mas->type] + 1)
2878 wr_mas->sufficient_height = mas->depth + 1;
2879
2880 mas_wr_walk_traverse(wr_mas);
2881 }
2882
2883 return true;
2884 }
2885
mas_wr_walk_index(struct ma_wr_state * wr_mas)2886 static void mas_wr_walk_index(struct ma_wr_state *wr_mas)
2887 {
2888 struct ma_state *mas = wr_mas->mas;
2889
2890 while (true) {
2891 mas_wr_walk_descend(wr_mas);
2892 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
2893 mas->offset);
2894 if (ma_is_leaf(wr_mas->type))
2895 return;
2896 mas_wr_walk_traverse(wr_mas);
2897 }
2898 }
2899 /*
2900 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
2901 * @l_wr_mas: The left maple write state
2902 * @r_wr_mas: The right maple write state
2903 */
mas_extend_spanning_null(struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas)2904 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
2905 struct ma_wr_state *r_wr_mas)
2906 {
2907 struct ma_state *r_mas = r_wr_mas->mas;
2908 struct ma_state *l_mas = l_wr_mas->mas;
2909 unsigned char l_slot;
2910
2911 l_slot = l_mas->offset;
2912 if (!l_wr_mas->content)
2913 l_mas->index = l_wr_mas->r_min;
2914
2915 if ((l_mas->index == l_wr_mas->r_min) &&
2916 (l_slot &&
2917 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) {
2918 if (l_slot > 1)
2919 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1;
2920 else
2921 l_mas->index = l_mas->min;
2922
2923 l_mas->offset = l_slot - 1;
2924 l_wr_mas->r_min = l_mas->index;
2925 }
2926
2927 if (!r_wr_mas->content) {
2928 if (r_mas->last < r_wr_mas->r_max)
2929 r_mas->last = r_wr_mas->r_max;
2930 r_mas->offset++;
2931 } else if ((r_mas->last == r_wr_mas->r_max) &&
2932 (r_mas->last < r_mas->max) &&
2933 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) {
2934 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
2935 r_wr_mas->type, r_mas->offset + 1);
2936 r_mas->offset++;
2937 r_wr_mas->r_max = r_mas->last;
2938 }
2939 }
2940
mas_state_walk(struct ma_state * mas)2941 static inline void *mas_state_walk(struct ma_state *mas)
2942 {
2943 void *entry;
2944
2945 entry = mas_start(mas);
2946 if (mas_is_none(mas))
2947 return NULL;
2948
2949 if (mas_is_ptr(mas))
2950 return entry;
2951
2952 return mtree_range_walk(mas);
2953 }
2954
2955 /*
2956 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up
2957 * to date.
2958 *
2959 * @mas: The maple state.
2960 *
2961 * Note: Leaves mas in undesirable state.
2962 * Return: The entry for @mas->index or %NULL on dead node.
2963 */
mtree_lookup_walk(struct ma_state * mas)2964 static inline void *mtree_lookup_walk(struct ma_state *mas)
2965 {
2966 unsigned long *pivots;
2967 unsigned char offset;
2968 struct maple_node *node;
2969 struct maple_enode *next;
2970 enum maple_type type;
2971 void __rcu **slots;
2972 unsigned char end;
2973
2974 next = mas->node;
2975 do {
2976 node = mte_to_node(next);
2977 type = mte_node_type(next);
2978 pivots = ma_pivots(node, type);
2979 end = mt_pivots[type];
2980 offset = 0;
2981 do {
2982 if (pivots[offset] >= mas->index)
2983 break;
2984 } while (++offset < end);
2985
2986 slots = ma_slots(node, type);
2987 next = mt_slot(mas->tree, slots, offset);
2988 if (unlikely(ma_dead_node(node)))
2989 goto dead_node;
2990 } while (!ma_is_leaf(type));
2991
2992 return (void *)next;
2993
2994 dead_node:
2995 mas_reset(mas);
2996 return NULL;
2997 }
2998
2999 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
3000 /*
3001 * mas_new_root() - Create a new root node that only contains the entry passed
3002 * in.
3003 * @mas: The maple state
3004 * @entry: The entry to store.
3005 *
3006 * Only valid when the index == 0 and the last == ULONG_MAX
3007 */
mas_new_root(struct ma_state * mas,void * entry)3008 static inline void mas_new_root(struct ma_state *mas, void *entry)
3009 {
3010 struct maple_enode *root = mas_root_locked(mas);
3011 enum maple_type type = maple_leaf_64;
3012 struct maple_node *node;
3013 void __rcu **slots;
3014 unsigned long *pivots;
3015
3016 WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX);
3017
3018 if (!entry) {
3019 mt_set_height(mas->tree, 0);
3020 rcu_assign_pointer(mas->tree->ma_root, entry);
3021 mas->status = ma_start;
3022 goto done;
3023 }
3024
3025 node = mas_pop_node(mas);
3026 pivots = ma_pivots(node, type);
3027 slots = ma_slots(node, type);
3028 node->parent = ma_parent_ptr(mas_tree_parent(mas));
3029 mas->node = mt_mk_node(node, type);
3030 mas->status = ma_active;
3031 rcu_assign_pointer(slots[0], entry);
3032 pivots[0] = mas->last;
3033 mt_set_height(mas->tree, 1);
3034 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3035
3036 done:
3037 if (xa_is_node(root))
3038 mte_destroy_walk(root, mas->tree);
3039 }
3040 /*
3041 * mas_wr_spanning_store() - Create a subtree with the store operation completed
3042 * and new nodes where necessary, then place the sub-tree in the actual tree.
3043 * Note that mas is expected to point to the node which caused the store to
3044 * span.
3045 * @wr_mas: The maple write state
3046 */
mas_wr_spanning_store(struct ma_wr_state * wr_mas)3047 static void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
3048 {
3049 struct maple_copy cp;
3050 struct ma_state *mas;
3051 struct ma_state sib;
3052
3053 /* Left and Right side of spanning store */
3054 MA_STATE(r_mas, NULL, 0, 0);
3055 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
3056
3057 /*
3058 * A store operation that spans multiple nodes is called a spanning
3059 * store and is handled early in the store call stack by the function
3060 * mas_is_span_wr(). When a spanning store is identified, the maple
3061 * state is duplicated. The first maple state walks the left tree path
3062 * to ``index``, the duplicate walks the right tree path to ``last``.
3063 * The data in the two nodes are combined into a single node, two nodes,
3064 * or possibly three nodes (see the 3-way split above). A ``NULL``
3065 * written to the last entry of a node is considered a spanning store as
3066 * a rebalance is required for the operation to complete and an overflow
3067 * of data may happen.
3068 */
3069 mas = wr_mas->mas;
3070 trace_ma_op(TP_FCT, mas);
3071
3072 if (unlikely(!mas->index && mas->last == ULONG_MAX))
3073 return mas_new_root(mas, wr_mas->entry);
3074 /*
3075 * Node rebalancing may occur due to this store, so there may be three new
3076 * entries per level plus a new root.
3077 */
3078
3079 /*
3080 * Set up right side. Need to get to the next offset after the spanning
3081 * store to ensure it's not NULL and to combine both the next node and
3082 * the node with the start together.
3083 */
3084 r_mas = *mas;
3085 /* Avoid overflow, walk to next slot in the tree. */
3086 if (r_mas.last + 1)
3087 r_mas.last++;
3088
3089 r_mas.index = r_mas.last;
3090 mas_wr_walk_index(&r_wr_mas);
3091 r_mas.last = r_mas.index = mas->last;
3092 r_wr_mas.end_piv = r_wr_mas.r_max;
3093
3094 /* Set up left side. */
3095 mas_wr_walk_index(wr_mas);
3096
3097 if (!wr_mas->entry) {
3098 mas_extend_spanning_null(wr_mas, &r_wr_mas);
3099 mas->last = r_mas.last;
3100 }
3101
3102 /* expanding NULLs may make this cover the entire range */
3103 if (!mas->index && r_mas.last == ULONG_MAX) {
3104 mas_set_range(mas, 0, ULONG_MAX);
3105 return mas_new_root(mas, wr_mas->entry);
3106 }
3107
3108 cp_leaf_init(&cp, mas, wr_mas, &r_wr_mas);
3109 do {
3110 spanning_data(&cp, wr_mas, &r_wr_mas, &sib);
3111 multi_src_setup(&cp, wr_mas, &r_wr_mas, &sib);
3112 dst_setup(&cp, mas, wr_mas->type);
3113 cp_data_write(&cp, mas);
3114 } while (spanning_ascend(&cp, mas, wr_mas, &r_wr_mas, &sib));
3115
3116 mas_wmb_replace(mas, &cp);
3117 }
3118
3119 /*
3120 * mas_wr_node_store() - Attempt to store the value in a node
3121 * @wr_mas: The maple write state
3122 *
3123 * Attempts to reuse the node, but may allocate.
3124 */
mas_wr_node_store(struct ma_wr_state * wr_mas)3125 static inline void mas_wr_node_store(struct ma_wr_state *wr_mas)
3126 {
3127 unsigned char dst_offset, offset_end;
3128 unsigned char copy_size, node_pivots;
3129 struct maple_node reuse, *newnode;
3130 unsigned long *dst_pivots;
3131 void __rcu **dst_slots;
3132 unsigned char new_end;
3133 struct ma_state *mas;
3134 bool in_rcu;
3135
3136 mas = wr_mas->mas;
3137 trace_ma_op(TP_FCT, mas);
3138 in_rcu = mt_in_rcu(mas->tree);
3139 offset_end = wr_mas->offset_end;
3140 node_pivots = mt_pivots[wr_mas->type];
3141 /* Assume last adds an entry */
3142 new_end = mas->end + 1 - offset_end + mas->offset;
3143 if (mas->last == wr_mas->end_piv) {
3144 offset_end++; /* don't copy this offset */
3145 new_end--;
3146 }
3147
3148 /* set up node. */
3149 if (in_rcu) {
3150 newnode = mas_pop_node(mas);
3151 } else {
3152 memset(&reuse, 0, sizeof(struct maple_node));
3153 newnode = &reuse;
3154 }
3155
3156 newnode->parent = mas_mn(mas)->parent;
3157 dst_pivots = ma_pivots(newnode, wr_mas->type);
3158 dst_slots = ma_slots(newnode, wr_mas->type);
3159 /* Copy from start to insert point */
3160 if (mas->offset) {
3161 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset);
3162 memcpy(dst_slots, wr_mas->slots, sizeof(void __rcu *) * mas->offset);
3163 }
3164
3165 /* Handle insert of new range starting after old range */
3166 if (wr_mas->r_min < mas->index) {
3167 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content);
3168 dst_pivots[mas->offset++] = mas->index - 1;
3169 new_end++;
3170 }
3171
3172 /* Store the new entry and range end. */
3173 if (mas->offset < node_pivots)
3174 dst_pivots[mas->offset] = mas->last;
3175 rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry);
3176
3177 /*
3178 * this range wrote to the end of the node or it overwrote the rest of
3179 * the data
3180 */
3181 if (offset_end > mas->end)
3182 goto done;
3183
3184 dst_offset = mas->offset + 1;
3185 /* Copy to the end of node if necessary. */
3186 copy_size = mas->end - offset_end + 1;
3187 memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end,
3188 sizeof(void __rcu *) * copy_size);
3189 memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end,
3190 sizeof(unsigned long) * (copy_size - 1));
3191
3192 if (new_end < node_pivots)
3193 dst_pivots[new_end] = mas->max;
3194
3195 done:
3196 mas_leaf_set_meta(newnode, maple_leaf_64, new_end);
3197 if (in_rcu) {
3198 struct maple_enode *old_enode = mas->node;
3199
3200 mas->node = mt_mk_node(newnode, wr_mas->type);
3201 mas_replace_node(mas, old_enode, mas_mt_height(mas));
3202 } else {
3203 memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
3204 }
3205 trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
3206 mas_update_gap(mas);
3207 mas->end = new_end;
3208 }
3209
3210 /*
3211 * mas_wr_slot_store: Attempt to store a value in a slot.
3212 * @wr_mas: the maple write state
3213 */
mas_wr_slot_store(struct ma_wr_state * wr_mas)3214 static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
3215 {
3216 struct ma_state *mas = wr_mas->mas;
3217 unsigned char offset = mas->offset;
3218 void __rcu **slots = wr_mas->slots;
3219 bool gap = false;
3220
3221 gap |= !mt_slot_locked(mas->tree, slots, offset);
3222 gap |= !mt_slot_locked(mas->tree, slots, offset + 1);
3223
3224 if (wr_mas->offset_end - offset == 1) {
3225 if (mas->index == wr_mas->r_min) {
3226 /* Overwriting the range and a part of the next one */
3227 rcu_assign_pointer(slots[offset], wr_mas->entry);
3228 wr_mas->pivots[offset] = mas->last;
3229 } else {
3230 /* Overwriting a part of the range and the next one */
3231 rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
3232 wr_mas->pivots[offset] = mas->index - 1;
3233 mas->offset++; /* Keep mas accurate. */
3234 }
3235 } else {
3236 WARN_ON_ONCE(mt_in_rcu(mas->tree));
3237 /*
3238 * Expand the range, only partially overwriting the previous and
3239 * next ranges
3240 */
3241 gap |= !mt_slot_locked(mas->tree, slots, offset + 2);
3242 rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
3243 wr_mas->pivots[offset] = mas->index - 1;
3244 wr_mas->pivots[offset + 1] = mas->last;
3245 mas->offset++; /* Keep mas accurate. */
3246 }
3247
3248 trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
3249 /*
3250 * Only update gap when the new entry is empty or there is an empty
3251 * entry in the original two ranges.
3252 */
3253 if (!wr_mas->entry || gap)
3254 mas_update_gap(mas);
3255 }
3256
mas_wr_extend_null(struct ma_wr_state * wr_mas)3257 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
3258 {
3259 struct ma_state *mas = wr_mas->mas;
3260
3261 if (!wr_mas->slots[wr_mas->offset_end]) {
3262 /* If this one is null, the next and prev are not */
3263 mas->last = wr_mas->end_piv;
3264 } else {
3265 /* Check next slot(s) if we are overwriting the end */
3266 if ((mas->last == wr_mas->end_piv) &&
3267 (mas->end != wr_mas->offset_end) &&
3268 !wr_mas->slots[wr_mas->offset_end + 1]) {
3269 wr_mas->offset_end++;
3270 if (wr_mas->offset_end == mas->end)
3271 mas->last = mas->max;
3272 else
3273 mas->last = wr_mas->pivots[wr_mas->offset_end];
3274 wr_mas->end_piv = mas->last;
3275 }
3276 }
3277
3278 if (!wr_mas->content) {
3279 /* If this one is null, the next and prev are not */
3280 mas->index = wr_mas->r_min;
3281 } else {
3282 /* Check prev slot if we are overwriting the start */
3283 if (mas->index == wr_mas->r_min && mas->offset &&
3284 !wr_mas->slots[mas->offset - 1]) {
3285 mas->offset--;
3286 wr_mas->r_min = mas->index =
3287 mas_safe_min(mas, wr_mas->pivots, mas->offset);
3288 wr_mas->r_max = wr_mas->pivots[mas->offset];
3289 }
3290 }
3291 }
3292
mas_wr_end_piv(struct ma_wr_state * wr_mas)3293 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
3294 {
3295 while ((wr_mas->offset_end < wr_mas->mas->end) &&
3296 (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end]))
3297 wr_mas->offset_end++;
3298
3299 if (wr_mas->offset_end < wr_mas->mas->end)
3300 wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end];
3301 else
3302 wr_mas->end_piv = wr_mas->mas->max;
3303 }
3304
mas_wr_new_end(struct ma_wr_state * wr_mas)3305 static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
3306 {
3307 struct ma_state *mas = wr_mas->mas;
3308 unsigned char new_end = mas->end + 2;
3309
3310 new_end -= wr_mas->offset_end - mas->offset;
3311 if (wr_mas->r_min == mas->index)
3312 new_end--;
3313
3314 if (wr_mas->end_piv == mas->last)
3315 new_end--;
3316
3317 return new_end;
3318 }
3319
3320 /*
3321 * mas_wr_append: Attempt to append
3322 * @wr_mas: the maple write state
3323 *
3324 * This is currently unsafe in rcu mode since the end of the node may be cached
3325 * by readers while the node contents may be updated which could result in
3326 * inaccurate information.
3327 */
mas_wr_append(struct ma_wr_state * wr_mas)3328 static inline void mas_wr_append(struct ma_wr_state *wr_mas)
3329 {
3330 struct ma_state *mas = wr_mas->mas;
3331 void __rcu **slots;
3332 unsigned char end = mas->end;
3333 unsigned char new_end = mas_wr_new_end(wr_mas);
3334
3335 if (new_end < mt_pivots[wr_mas->type]) {
3336 wr_mas->pivots[new_end] = wr_mas->pivots[end];
3337 ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end);
3338 }
3339
3340 slots = wr_mas->slots;
3341 if (new_end == end + 1) {
3342 if (mas->last == wr_mas->r_max) {
3343 /* Append to end of range */
3344 rcu_assign_pointer(slots[new_end], wr_mas->entry);
3345 wr_mas->pivots[end] = mas->index - 1;
3346 mas->offset = new_end;
3347 } else {
3348 /* Append to start of range */
3349 rcu_assign_pointer(slots[new_end], wr_mas->content);
3350 wr_mas->pivots[end] = mas->last;
3351 rcu_assign_pointer(slots[end], wr_mas->entry);
3352 }
3353 } else {
3354 /* Append to the range without touching any boundaries. */
3355 rcu_assign_pointer(slots[new_end], wr_mas->content);
3356 wr_mas->pivots[end + 1] = mas->last;
3357 rcu_assign_pointer(slots[end + 1], wr_mas->entry);
3358 wr_mas->pivots[end] = mas->index - 1;
3359 mas->offset = end + 1;
3360 }
3361
3362 if (!wr_mas->content || !wr_mas->entry)
3363 mas_update_gap(mas);
3364
3365 mas->end = new_end;
3366 trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry);
3367 }
3368
3369 /*
3370 * split_ascend() - See if a split operation has to keep walking up the tree
3371 * @cp: The maple_copy node
3372 * @wr_mas: The maple write state
3373 * @sib: the maple state of the sibling
3374 *
3375 * Return: true if another split operation on the next level is needed, false
3376 * otherwise
3377 */
split_ascend(struct maple_copy * cp,struct ma_wr_state * wr_mas,struct ma_state * sib,struct ma_state * parent)3378 static inline bool split_ascend(struct maple_copy *cp,
3379 struct ma_wr_state *wr_mas, struct ma_state *sib,
3380 struct ma_state *parent)
3381 {
3382 struct ma_state *mas;
3383 unsigned long min, max;
3384
3385 mas = wr_mas->mas;
3386 min = mas->min; /* push right, or normal split */
3387 max = mas->max;
3388 wr_mas->offset_end = parent->offset;
3389 if (sib->end) {
3390 if (sib->max < mas->min) {
3391 min = sib->min; /* push left */
3392 parent->offset--;
3393 } else {
3394 max = sib->max; /* push right */
3395 wr_mas->offset_end++;
3396 }
3397 }
3398
3399 cp_dst_to_slots(cp, min, max, mas);
3400 if (cp_is_new_root(cp, mas))
3401 return false;
3402
3403 if (cp_converged(cp, mas, sib))
3404 return false;
3405
3406 cp->height++;
3407 copy_tree_location(parent, mas);
3408 wr_mas_setup(wr_mas, mas);
3409 return true;
3410 }
3411
3412 /*
3413 * split_data() - Calculate the @cp data, populate @sib if the data can be
3414 * pushed into a sibling.
3415 * @cp: The maple copy node
3416 * @wr_mas: The left write maple state
3417 * @sib: The maple state of the sibling.
3418 *
3419 * Note: @cp->data is a size and not indexed by 0. @sib->end may be set to 0 to
3420 * indicate it will not be used.
3421 *
3422 */
split_data(struct maple_copy * cp,struct ma_wr_state * wr_mas,struct ma_state * sib,struct ma_state * parent)3423 static inline void split_data(struct maple_copy *cp,
3424 struct ma_wr_state *wr_mas, struct ma_state *sib,
3425 struct ma_state *parent)
3426 {
3427 cp_data_calc(cp, wr_mas, wr_mas);
3428 if (cp->data <= mt_slots[wr_mas->type]) {
3429 sib->end = 0;
3430 return;
3431 }
3432
3433 push_data_sib(cp, wr_mas->mas, sib, parent);
3434 if (sib->end)
3435 cp->data += sib->end + 1;
3436 }
3437
3438 /*
3439 * mas_wr_split() - Expand one node into two
3440 * @wr_mas: The write maple state
3441 */
mas_wr_split(struct ma_wr_state * wr_mas)3442 static void mas_wr_split(struct ma_wr_state *wr_mas)
3443 {
3444 struct ma_state parent;
3445 struct ma_state *mas;
3446 struct maple_copy cp;
3447 struct ma_state sib;
3448
3449 mas = wr_mas->mas;
3450 trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry);
3451 parent = *mas;
3452 cp_leaf_init(&cp, mas, wr_mas, wr_mas);
3453 do {
3454 if (!mte_is_root(parent.node)) {
3455 mas_ascend(&parent);
3456 parent.end = mas_data_end(&parent);
3457 }
3458 split_data(&cp, wr_mas, &sib, &parent);
3459 multi_src_setup(&cp, wr_mas, wr_mas, &sib);
3460 dst_setup(&cp, mas, wr_mas->type);
3461 cp_data_write(&cp, mas);
3462 } while (split_ascend(&cp, wr_mas, &sib, &parent));
3463
3464 mas_wmb_replace(mas, &cp);
3465 }
3466
3467 /*
3468 * mas_wr_rebalance() - Insufficient data in one node needs to either get data
3469 * from a sibling or absorb a sibling all together.
3470 * @wr_mas: The write maple state
3471 *
3472 * Rebalance is different than a spanning store in that the write state is
3473 * already at the leaf node that's being altered.
3474 */
mas_wr_rebalance(struct ma_wr_state * wr_mas)3475 static void mas_wr_rebalance(struct ma_wr_state *wr_mas)
3476 {
3477 struct ma_state parent;
3478 struct ma_state *mas;
3479 struct maple_copy cp;
3480 struct ma_state sib;
3481
3482 /*
3483 * Rebalancing occurs if a node is insufficient. Data is rebalanced
3484 * against the node to the right if it exists, otherwise the node to the
3485 * left of this node is rebalanced against this node. If rebalancing
3486 * causes just one node to be produced instead of two, then the parent
3487 * is also examined and rebalanced if it is insufficient. Every level
3488 * tries to combine the data in the same way. If one node contains the
3489 * entire range of the tree, then that node is used as a new root node.
3490 */
3491
3492 mas = wr_mas->mas;
3493 trace_ma_op(TP_FCT, mas);
3494 parent = *mas;
3495 cp_leaf_init(&cp, mas, wr_mas, wr_mas);
3496 do {
3497 if (!mte_is_root(parent.node)) {
3498 mas_ascend(&parent);
3499 parent.end = mas_data_end(&parent);
3500 }
3501 rebalance_data(&cp, wr_mas, &sib, &parent);
3502 multi_src_setup(&cp, wr_mas, wr_mas, &sib);
3503 dst_setup(&cp, mas, wr_mas->type);
3504 cp_data_write(&cp, mas);
3505 } while (rebalance_ascend(&cp, wr_mas, &sib, &parent));
3506
3507 mas_wmb_replace(mas, &cp);
3508 }
3509
3510 /*
3511 * mas_wr_store_entry() - Internal call to store a value
3512 * @wr_mas: The maple write state
3513 */
mas_wr_store_entry(struct ma_wr_state * wr_mas)3514 static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas)
3515 {
3516 struct ma_state *mas = wr_mas->mas;
3517
3518 switch (mas->store_type) {
3519 case wr_exact_fit:
3520 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
3521 if (!!wr_mas->entry ^ !!wr_mas->content)
3522 mas_update_gap(mas);
3523 break;
3524 case wr_append:
3525 mas_wr_append(wr_mas);
3526 break;
3527 case wr_slot_store:
3528 mas_wr_slot_store(wr_mas);
3529 break;
3530 case wr_node_store:
3531 mas_wr_node_store(wr_mas);
3532 break;
3533 case wr_spanning_store:
3534 mas_wr_spanning_store(wr_mas);
3535 break;
3536 case wr_split_store:
3537 mas_wr_split(wr_mas);
3538 break;
3539 case wr_rebalance:
3540 mas_wr_rebalance(wr_mas);
3541 break;
3542 case wr_new_root:
3543 mas_new_root(mas, wr_mas->entry);
3544 break;
3545 case wr_store_root:
3546 mas_store_root(mas, wr_mas->entry);
3547 break;
3548 case wr_invalid:
3549 MT_BUG_ON(mas->tree, 1);
3550 }
3551 }
3552
mas_wr_prealloc_setup(struct ma_wr_state * wr_mas)3553 static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
3554 {
3555 struct ma_state *mas = wr_mas->mas;
3556
3557 if (!mas_is_active(mas)) {
3558 if (mas_is_start(mas))
3559 goto set_content;
3560
3561 if (unlikely(mas_is_paused(mas)))
3562 goto reset;
3563
3564 if (unlikely(mas_is_none(mas)))
3565 goto reset;
3566
3567 if (unlikely(mas_is_overflow(mas)))
3568 goto reset;
3569
3570 if (unlikely(mas_is_underflow(mas)))
3571 goto reset;
3572 }
3573
3574 /*
3575 * A less strict version of mas_is_span_wr() where we allow spanning
3576 * writes within this node. This is to stop partial walks in
3577 * mas_prealloc() from being reset.
3578 */
3579 if (mas->last > mas->max)
3580 goto reset;
3581
3582 if (wr_mas->entry)
3583 goto set_content;
3584
3585 if (mte_is_leaf(mas->node) && mas->last == mas->max)
3586 goto reset;
3587
3588 goto set_content;
3589
3590 reset:
3591 mas_reset(mas);
3592 set_content:
3593 wr_mas->content = mas_start(mas);
3594 }
3595
3596 /**
3597 * mas_prealloc_calc() - Calculate number of nodes needed for a
3598 * given store oepration
3599 * @wr_mas: The maple write state
3600 * @entry: The entry to store into the tree
3601 *
3602 * Return: Number of nodes required for preallocation.
3603 */
mas_prealloc_calc(struct ma_wr_state * wr_mas,void * entry)3604 static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
3605 {
3606 struct ma_state *mas = wr_mas->mas;
3607 unsigned char height = mas_mt_height(mas);
3608 int ret = height * 3 + 1;
3609 unsigned char delta = height - wr_mas->vacant_height;
3610
3611 switch (mas->store_type) {
3612 case wr_exact_fit:
3613 case wr_append:
3614 case wr_slot_store:
3615 ret = 0;
3616 break;
3617 case wr_spanning_store:
3618 if (wr_mas->sufficient_height < wr_mas->vacant_height)
3619 ret = (height - wr_mas->sufficient_height) * 3 + 1;
3620 else
3621 ret = delta * 3 + 1;
3622 break;
3623 case wr_split_store:
3624 ret = delta * 2 + 1;
3625 break;
3626 case wr_rebalance:
3627 if (wr_mas->sufficient_height < wr_mas->vacant_height)
3628 ret = (height - wr_mas->sufficient_height) * 2 + 1;
3629 else
3630 ret = delta * 2 + 1;
3631 break;
3632 case wr_node_store:
3633 ret = mt_in_rcu(mas->tree) ? 1 : 0;
3634 break;
3635 case wr_new_root:
3636 ret = 1;
3637 break;
3638 case wr_store_root:
3639 if (likely((mas->last != 0) || (mas->index != 0)))
3640 ret = 1;
3641 else if (((unsigned long) (entry) & 3) == 2)
3642 ret = 1;
3643 else
3644 ret = 0;
3645 break;
3646 case wr_invalid:
3647 WARN_ON_ONCE(1);
3648 }
3649
3650 mas->node_request = ret;
3651 }
3652
3653 /*
3654 * mas_wr_store_type() - Determine the store type for a given
3655 * store operation.
3656 * @wr_mas: The maple write state
3657 *
3658 * Return: the type of store needed for the operation
3659 */
mas_wr_store_type(struct ma_wr_state * wr_mas)3660 static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
3661 {
3662 struct ma_state *mas = wr_mas->mas;
3663 unsigned char new_end;
3664
3665 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
3666 return wr_store_root;
3667
3668 if (unlikely(!mas_wr_walk(wr_mas)))
3669 return wr_spanning_store;
3670
3671 /* At this point, we are at the leaf node that needs to be altered. */
3672 mas_wr_end_piv(wr_mas);
3673 if (!wr_mas->entry)
3674 mas_wr_extend_null(wr_mas);
3675
3676 if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last))
3677 return wr_exact_fit;
3678
3679 if (unlikely(!mas->index && mas->last == ULONG_MAX))
3680 return wr_new_root;
3681
3682 new_end = mas_wr_new_end(wr_mas);
3683 /* Potential spanning rebalance collapsing a node */
3684 if (new_end < mt_min_slots[wr_mas->type]) {
3685 if (!mte_is_root(mas->node))
3686 return wr_rebalance;
3687 return wr_node_store;
3688 }
3689
3690 if (new_end >= mt_slots[wr_mas->type])
3691 return wr_split_store;
3692
3693 if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end))
3694 return wr_append;
3695
3696 if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) ||
3697 (wr_mas->offset_end - mas->offset == 1)))
3698 return wr_slot_store;
3699
3700 return wr_node_store;
3701 }
3702
3703 /**
3704 * mas_wr_preallocate() - Preallocate enough nodes for a store operation
3705 * @wr_mas: The maple write state
3706 * @entry: The entry that will be stored
3707 *
3708 */
mas_wr_preallocate(struct ma_wr_state * wr_mas,void * entry)3709 static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry)
3710 {
3711 struct ma_state *mas = wr_mas->mas;
3712
3713 mas_wr_prealloc_setup(wr_mas);
3714 mas->store_type = mas_wr_store_type(wr_mas);
3715 mas_prealloc_calc(wr_mas, entry);
3716 if (!mas->node_request)
3717 return;
3718
3719 mas_alloc_nodes(mas, GFP_NOWAIT);
3720 }
3721
3722 /**
3723 * mas_insert() - Internal call to insert a value
3724 * @mas: The maple state
3725 * @entry: The entry to store
3726 *
3727 * Return: %NULL or the contents that already exists at the requested index
3728 * otherwise. The maple state needs to be checked for error conditions.
3729 */
mas_insert(struct ma_state * mas,void * entry)3730 static inline void *mas_insert(struct ma_state *mas, void *entry)
3731 {
3732 MA_WR_STATE(wr_mas, mas, entry);
3733
3734 /*
3735 * Inserting a new range inserts either 0, 1, or 2 pivots within the
3736 * tree. If the insert fits exactly into an existing gap with a value
3737 * of NULL, then the slot only needs to be written with the new value.
3738 * If the range being inserted is adjacent to another range, then only a
3739 * single pivot needs to be inserted (as well as writing the entry). If
3740 * the new range is within a gap but does not touch any other ranges,
3741 * then two pivots need to be inserted: the start - 1, and the end. As
3742 * usual, the entry must be written. Most operations require a new node
3743 * to be allocated and replace an existing node to ensure RCU safety,
3744 * when in RCU mode. The exception to requiring a newly allocated node
3745 * is when inserting at the end of a node (appending). When done
3746 * carefully, appending can reuse the node in place.
3747 */
3748 wr_mas.content = mas_start(mas);
3749 if (wr_mas.content)
3750 goto exists;
3751
3752 mas_wr_preallocate(&wr_mas, entry);
3753 if (mas_is_err(mas))
3754 return NULL;
3755
3756 /* spanning writes always overwrite something */
3757 if (mas->store_type == wr_spanning_store)
3758 goto exists;
3759
3760 /* At this point, we are at the leaf node that needs to be altered. */
3761 if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) {
3762 wr_mas.offset_end = mas->offset;
3763 wr_mas.end_piv = wr_mas.r_max;
3764
3765 if (wr_mas.content || (mas->last > wr_mas.r_max))
3766 goto exists;
3767 }
3768
3769 mas_wr_store_entry(&wr_mas);
3770 return wr_mas.content;
3771
3772 exists:
3773 mas_set_err(mas, -EEXIST);
3774 return wr_mas.content;
3775
3776 }
3777
3778 /**
3779 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry
3780 * @mas: The maple state.
3781 * @startp: Pointer to ID.
3782 * @range_lo: Lower bound of range to search.
3783 * @range_hi: Upper bound of range to search.
3784 * @entry: The entry to store.
3785 * @next: Pointer to next ID to allocate.
3786 * @gfp: The GFP_FLAGS to use for allocations.
3787 *
3788 * Return: 0 if the allocation succeeded without wrapping, 1 if the
3789 * allocation succeeded after wrapping, or -EBUSY if there are no
3790 * free entries.
3791 */
mas_alloc_cyclic(struct ma_state * mas,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)3792 int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
3793 void *entry, unsigned long range_lo, unsigned long range_hi,
3794 unsigned long *next, gfp_t gfp)
3795 {
3796 unsigned long min = range_lo;
3797 int ret = 0;
3798
3799 range_lo = max(min, *next);
3800 ret = mas_empty_area(mas, range_lo, range_hi, 1);
3801 if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) {
3802 mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED;
3803 ret = 1;
3804 }
3805 if (ret < 0 && range_lo > min) {
3806 mas_reset(mas);
3807 ret = mas_empty_area(mas, min, range_hi, 1);
3808 if (ret == 0)
3809 ret = 1;
3810 }
3811 if (ret < 0)
3812 return ret;
3813
3814 do {
3815 mas_insert(mas, entry);
3816 } while (mas_nomem(mas, gfp));
3817 if (mas_is_err(mas))
3818 return xa_err(mas->node);
3819
3820 *startp = mas->index;
3821 *next = *startp + 1;
3822 if (*next == 0)
3823 mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;
3824
3825 mas_destroy(mas);
3826 return ret;
3827 }
3828 EXPORT_SYMBOL(mas_alloc_cyclic);
3829
mas_rewalk(struct ma_state * mas,unsigned long index)3830 static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index)
3831 {
3832 retry:
3833 mas_set(mas, index);
3834 mas_state_walk(mas);
3835 if (mas_is_start(mas))
3836 goto retry;
3837 }
3838
mas_rewalk_if_dead(struct ma_state * mas,struct maple_node * node,const unsigned long index)3839 static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas,
3840 struct maple_node *node, const unsigned long index)
3841 {
3842 if (unlikely(ma_dead_node(node))) {
3843 mas_rewalk(mas, index);
3844 return true;
3845 }
3846 return false;
3847 }
3848
3849 /*
3850 * mas_prev_node() - Find the prev non-null entry at the same level in the
3851 * tree. The prev value will be mas->node[mas->offset] or the status will be
3852 * ma_none.
3853 * @mas: The maple state
3854 * @min: The lower limit to search
3855 *
3856 * The prev node value will be mas->node[mas->offset] or the status will be
3857 * ma_none.
3858 * Return: 1 if the node is dead, 0 otherwise.
3859 */
mas_prev_node(struct ma_state * mas,unsigned long min)3860 static int mas_prev_node(struct ma_state *mas, unsigned long min)
3861 {
3862 enum maple_type mt;
3863 int offset, level;
3864 void __rcu **slots;
3865 struct maple_node *node;
3866 unsigned long *pivots;
3867 unsigned long max;
3868
3869 node = mas_mn(mas);
3870 if (!mas->min)
3871 goto no_entry;
3872
3873 max = mas->min - 1;
3874 if (max < min)
3875 goto no_entry;
3876
3877 level = 0;
3878 do {
3879 if (ma_is_root(node))
3880 goto no_entry;
3881
3882 /* Walk up. */
3883 if (unlikely(mas_ascend(mas)))
3884 return 1;
3885 offset = mas->offset;
3886 level++;
3887 node = mas_mn(mas);
3888 } while (!offset);
3889
3890 offset--;
3891 mt = mte_node_type(mas->node);
3892 while (level > 1) {
3893 level--;
3894 slots = ma_slots(node, mt);
3895 mas->node = mas_slot(mas, slots, offset);
3896 if (unlikely(ma_dead_node(node)))
3897 return 1;
3898
3899 mt = mte_node_type(mas->node);
3900 node = mas_mn(mas);
3901 pivots = ma_pivots(node, mt);
3902 offset = ma_data_end(node, mt, pivots, max);
3903 if (unlikely(ma_dead_node(node)))
3904 return 1;
3905 }
3906
3907 slots = ma_slots(node, mt);
3908 mas->node = mas_slot(mas, slots, offset);
3909 pivots = ma_pivots(node, mt);
3910 if (unlikely(ma_dead_node(node)))
3911 return 1;
3912
3913 if (likely(offset))
3914 mas->min = pivots[offset - 1] + 1;
3915 mas->max = max;
3916 mas->offset = mas_data_end(mas);
3917 if (unlikely(mte_dead_node(mas->node)))
3918 return 1;
3919
3920 mas->end = mas->offset;
3921 return 0;
3922
3923 no_entry:
3924 if (unlikely(ma_dead_node(node)))
3925 return 1;
3926
3927 mas->status = ma_underflow;
3928 return 0;
3929 }
3930
3931 /*
3932 * mas_prev_slot() - Get the entry in the previous slot
3933 *
3934 * @mas: The maple state
3935 * @min: The minimum starting range
3936 * @empty: Can be empty
3937 *
3938 * Return: The entry in the previous slot which is possibly NULL
3939 */
mas_prev_slot(struct ma_state * mas,unsigned long min,bool empty)3940 static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty)
3941 {
3942 void *entry;
3943 void __rcu **slots;
3944 unsigned long pivot;
3945 enum maple_type type;
3946 unsigned long *pivots;
3947 struct maple_node *node;
3948 unsigned long save_point = mas->index;
3949
3950 retry:
3951 node = mas_mn(mas);
3952 type = mte_node_type(mas->node);
3953 pivots = ma_pivots(node, type);
3954 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
3955 goto retry;
3956
3957 if (mas->min <= min) {
3958 pivot = mas_safe_min(mas, pivots, mas->offset);
3959
3960 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
3961 goto retry;
3962
3963 if (pivot <= min)
3964 goto underflow;
3965 }
3966
3967 again:
3968 if (likely(mas->offset)) {
3969 mas->offset--;
3970 mas->last = mas->index - 1;
3971 mas->index = mas_safe_min(mas, pivots, mas->offset);
3972 } else {
3973 if (mas->index <= min)
3974 goto underflow;
3975
3976 if (mas_prev_node(mas, min)) {
3977 mas_rewalk(mas, save_point);
3978 goto retry;
3979 }
3980
3981 if (WARN_ON_ONCE(mas_is_underflow(mas)))
3982 return NULL;
3983
3984 mas->last = mas->max;
3985 node = mas_mn(mas);
3986 type = mte_node_type(mas->node);
3987 pivots = ma_pivots(node, type);
3988 mas->index = pivots[mas->offset - 1] + 1;
3989 }
3990
3991 slots = ma_slots(node, type);
3992 entry = mas_slot(mas, slots, mas->offset);
3993 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
3994 goto retry;
3995
3996 if (likely(entry))
3997 return entry;
3998
3999 if (!empty) {
4000 if (mas->index <= min)
4001 goto underflow;
4002
4003 goto again;
4004 }
4005
4006 return entry;
4007
4008 underflow:
4009 mas->status = ma_underflow;
4010 return NULL;
4011 }
4012
4013 /*
4014 * mas_next_node() - Get the next node at the same level in the tree.
4015 * @mas: The maple state
4016 * @node: The maple node
4017 * @max: The maximum pivot value to check.
4018 *
4019 * The next value will be mas->node[mas->offset] or the status will have
4020 * overflowed.
4021 * Return: 1 on dead node, 0 otherwise.
4022 */
mas_next_node(struct ma_state * mas,struct maple_node * node,unsigned long max)4023 static int mas_next_node(struct ma_state *mas, struct maple_node *node,
4024 unsigned long max)
4025 {
4026 unsigned long min;
4027 unsigned long *pivots;
4028 struct maple_enode *enode;
4029 struct maple_node *tmp;
4030 int level = 0;
4031 unsigned char node_end;
4032 enum maple_type mt;
4033 void __rcu **slots;
4034
4035 if (mas->max >= max)
4036 goto overflow;
4037
4038 min = mas->max + 1;
4039 level = 0;
4040 do {
4041 if (ma_is_root(node))
4042 goto overflow;
4043
4044 /* Walk up. */
4045 if (unlikely(mas_ascend(mas)))
4046 return 1;
4047
4048 level++;
4049 node = mas_mn(mas);
4050 mt = mte_node_type(mas->node);
4051 pivots = ma_pivots(node, mt);
4052 node_end = ma_data_end(node, mt, pivots, mas->max);
4053 if (unlikely(ma_dead_node(node)))
4054 return 1;
4055
4056 } while (unlikely(mas->offset == node_end));
4057
4058 slots = ma_slots(node, mt);
4059 mas->offset++;
4060 enode = mas_slot(mas, slots, mas->offset);
4061 if (unlikely(ma_dead_node(node)))
4062 return 1;
4063
4064 if (level > 1)
4065 mas->offset = 0;
4066
4067 while (unlikely(level > 1)) {
4068 level--;
4069 mas->node = enode;
4070 node = mas_mn(mas);
4071 mt = mte_node_type(mas->node);
4072 slots = ma_slots(node, mt);
4073 enode = mas_slot(mas, slots, 0);
4074 if (unlikely(ma_dead_node(node)))
4075 return 1;
4076 }
4077
4078 if (!mas->offset)
4079 pivots = ma_pivots(node, mt);
4080
4081 mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt);
4082 tmp = mte_to_node(enode);
4083 mt = mte_node_type(enode);
4084 pivots = ma_pivots(tmp, mt);
4085 mas->end = ma_data_end(tmp, mt, pivots, mas->max);
4086 if (unlikely(ma_dead_node(node)))
4087 return 1;
4088
4089 mas->node = enode;
4090 mas->min = min;
4091 return 0;
4092
4093 overflow:
4094 if (unlikely(ma_dead_node(node)))
4095 return 1;
4096
4097 mas->status = ma_overflow;
4098 return 0;
4099 }
4100
4101 /*
4102 * mas_next_slot() - Get the entry in the next slot
4103 *
4104 * @mas: The maple state
4105 * @max: The maximum starting range
4106 * @empty: Can be empty
4107 *
4108 * Return: The entry in the next slot which is possibly NULL
4109 */
mas_next_slot(struct ma_state * mas,unsigned long max,bool empty)4110 static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty)
4111 {
4112 void __rcu **slots;
4113 unsigned long *pivots;
4114 unsigned long pivot;
4115 enum maple_type type;
4116 struct maple_node *node;
4117 unsigned long save_point = mas->last;
4118 void *entry;
4119
4120 retry:
4121 node = mas_mn(mas);
4122 type = mte_node_type(mas->node);
4123 pivots = ma_pivots(node, type);
4124 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4125 goto retry;
4126
4127 if (mas->max >= max) {
4128 if (likely(mas->offset < mas->end))
4129 pivot = pivots[mas->offset];
4130 else
4131 pivot = mas->max;
4132
4133 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4134 goto retry;
4135
4136 if (pivot >= max) { /* Was at the limit, next will extend beyond */
4137 mas->status = ma_overflow;
4138 return NULL;
4139 }
4140 }
4141
4142 if (likely(mas->offset < mas->end)) {
4143 mas->index = pivots[mas->offset] + 1;
4144 again:
4145 mas->offset++;
4146 if (likely(mas->offset < mas->end))
4147 mas->last = pivots[mas->offset];
4148 else
4149 mas->last = mas->max;
4150 } else {
4151 if (mas->last >= max) {
4152 mas->status = ma_overflow;
4153 return NULL;
4154 }
4155
4156 if (mas_next_node(mas, node, max)) {
4157 mas_rewalk(mas, save_point);
4158 goto retry;
4159 }
4160
4161 if (WARN_ON_ONCE(mas_is_overflow(mas)))
4162 return NULL;
4163
4164 mas->offset = 0;
4165 mas->index = mas->min;
4166 node = mas_mn(mas);
4167 type = mte_node_type(mas->node);
4168 pivots = ma_pivots(node, type);
4169 mas->last = pivots[0];
4170 }
4171
4172 slots = ma_slots(node, type);
4173 entry = mt_slot(mas->tree, slots, mas->offset);
4174 if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4175 goto retry;
4176
4177 if (entry)
4178 return entry;
4179
4180
4181 if (!empty) {
4182 if (mas->last >= max) {
4183 mas->status = ma_overflow;
4184 return NULL;
4185 }
4186
4187 mas->index = mas->last + 1;
4188 goto again;
4189 }
4190
4191 return entry;
4192 }
4193
4194 /*
4195 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the
4196 * highest gap address of a given size in a given node and descend.
4197 * @mas: The maple state
4198 * @size: The needed size.
4199 *
4200 * Return: True if found in a leaf, false otherwise.
4201 *
4202 */
mas_rev_awalk(struct ma_state * mas,unsigned long size,unsigned long * gap_min,unsigned long * gap_max)4203 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
4204 unsigned long *gap_min, unsigned long *gap_max)
4205 {
4206 enum maple_type type = mte_node_type(mas->node);
4207 struct maple_node *node = mas_mn(mas);
4208 unsigned long *pivots, *gaps;
4209 void __rcu **slots;
4210 unsigned long gap = 0;
4211 unsigned long max, min;
4212 unsigned char offset;
4213
4214 if (unlikely(mas_is_err(mas)))
4215 return true;
4216
4217 if (ma_is_dense(type)) {
4218 /* dense nodes. */
4219 mas->offset = (unsigned char)(mas->index - mas->min);
4220 return true;
4221 }
4222
4223 pivots = ma_pivots(node, type);
4224 slots = ma_slots(node, type);
4225 gaps = ma_gaps(node, type);
4226 offset = mas->offset;
4227 min = mas_safe_min(mas, pivots, offset);
4228 /* Skip out of bounds. */
4229 while (mas->last < min)
4230 min = mas_safe_min(mas, pivots, --offset);
4231
4232 max = mas_safe_pivot(mas, pivots, offset, type);
4233 while (mas->index <= max) {
4234 gap = 0;
4235 if (gaps)
4236 gap = gaps[offset];
4237 else if (!mas_slot(mas, slots, offset))
4238 gap = max - min + 1;
4239
4240 if (gap) {
4241 if ((size <= gap) && (size <= mas->last - min + 1))
4242 break;
4243
4244 if (!gaps) {
4245 /* Skip the next slot, it cannot be a gap. */
4246 if (offset < 2)
4247 goto ascend;
4248
4249 offset -= 2;
4250 max = pivots[offset];
4251 min = mas_safe_min(mas, pivots, offset);
4252 continue;
4253 }
4254 }
4255
4256 if (!offset)
4257 goto ascend;
4258
4259 offset--;
4260 max = min - 1;
4261 min = mas_safe_min(mas, pivots, offset);
4262 }
4263
4264 if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
4265 goto no_space;
4266
4267 if (unlikely(ma_is_leaf(type))) {
4268 mas->offset = offset;
4269 *gap_min = min;
4270 *gap_max = min + gap - 1;
4271 return true;
4272 }
4273
4274 /* descend, only happens under lock. */
4275 mas->node = mas_slot(mas, slots, offset);
4276 mas->min = min;
4277 mas->max = max;
4278 mas->offset = mas_data_end(mas);
4279 return false;
4280
4281 ascend:
4282 if (!mte_is_root(mas->node))
4283 return false;
4284
4285 no_space:
4286 mas_set_err(mas, -EBUSY);
4287 return false;
4288 }
4289
mas_anode_descend(struct ma_state * mas,unsigned long size)4290 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
4291 {
4292 enum maple_type type = mte_node_type(mas->node);
4293 unsigned long pivot, min, gap = 0;
4294 unsigned char offset, data_end;
4295 unsigned long *gaps, *pivots;
4296 void __rcu **slots;
4297 struct maple_node *node;
4298 bool found = false;
4299
4300 if (ma_is_dense(type)) {
4301 mas->offset = (unsigned char)(mas->index - mas->min);
4302 return true;
4303 }
4304
4305 node = mas_mn(mas);
4306 pivots = ma_pivots(node, type);
4307 slots = ma_slots(node, type);
4308 gaps = ma_gaps(node, type);
4309 offset = mas->offset;
4310 min = mas_safe_min(mas, pivots, offset);
4311 data_end = ma_data_end(node, type, pivots, mas->max);
4312 for (; offset <= data_end; offset++) {
4313 pivot = mas_safe_pivot(mas, pivots, offset, type);
4314
4315 /* Not within lower bounds */
4316 if (mas->index > pivot)
4317 goto next_slot;
4318
4319 if (gaps)
4320 gap = gaps[offset];
4321 else if (!mas_slot(mas, slots, offset))
4322 gap = min(pivot, mas->last) - max(mas->index, min) + 1;
4323 else
4324 goto next_slot;
4325
4326 if (gap >= size) {
4327 if (ma_is_leaf(type)) {
4328 found = true;
4329 break;
4330 }
4331
4332 mas->node = mas_slot(mas, slots, offset);
4333 mas->min = min;
4334 mas->max = pivot;
4335 offset = 0;
4336 break;
4337 }
4338 next_slot:
4339 min = pivot + 1;
4340 if (mas->last <= pivot) {
4341 mas_set_err(mas, -EBUSY);
4342 return true;
4343 }
4344 }
4345
4346 mas->offset = offset;
4347 return found;
4348 }
4349
4350 /**
4351 * mas_walk() - Search for @mas->index in the tree.
4352 * @mas: The maple state.
4353 *
4354 * mas->index and mas->last will be set to the range if there is a value. If
4355 * mas->status is ma_none, reset to ma_start
4356 *
4357 * Return: the entry at the location or %NULL.
4358 */
mas_walk(struct ma_state * mas)4359 void *mas_walk(struct ma_state *mas)
4360 {
4361 void *entry;
4362
4363 if (!mas_is_active(mas) && !mas_is_start(mas))
4364 mas->status = ma_start;
4365 retry:
4366 entry = mas_state_walk(mas);
4367 if (mas_is_start(mas)) {
4368 goto retry;
4369 } else if (mas_is_none(mas)) {
4370 mas->index = 0;
4371 mas->last = ULONG_MAX;
4372 } else if (mas_is_ptr(mas)) {
4373 if (!mas->index) {
4374 mas->last = 0;
4375 return entry;
4376 }
4377
4378 mas->index = 1;
4379 mas->last = ULONG_MAX;
4380 mas->status = ma_none;
4381 return NULL;
4382 }
4383
4384 return entry;
4385 }
4386 EXPORT_SYMBOL_GPL(mas_walk);
4387
mas_rewind_node(struct ma_state * mas)4388 static inline bool mas_rewind_node(struct ma_state *mas)
4389 {
4390 unsigned char slot;
4391
4392 do {
4393 if (mte_is_root(mas->node)) {
4394 slot = mas->offset;
4395 if (!slot)
4396 return false;
4397 } else {
4398 mas_ascend(mas);
4399 slot = mas->offset;
4400 }
4401 } while (!slot);
4402
4403 mas->offset = --slot;
4404 return true;
4405 }
4406
4407 /*
4408 * mas_skip_node() - Internal function. Skip over a node.
4409 * @mas: The maple state.
4410 *
4411 * Return: true if there is another node, false otherwise.
4412 */
mas_skip_node(struct ma_state * mas)4413 static inline bool mas_skip_node(struct ma_state *mas)
4414 {
4415 if (mas_is_err(mas))
4416 return false;
4417
4418 do {
4419 if (mte_is_root(mas->node)) {
4420 if (mas->offset >= mas_data_end(mas)) {
4421 mas_set_err(mas, -EBUSY);
4422 return false;
4423 }
4424 } else {
4425 mas_ascend(mas);
4426 }
4427 } while (mas->offset >= mas_data_end(mas));
4428
4429 mas->offset++;
4430 return true;
4431 }
4432
4433 /*
4434 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of
4435 * @size
4436 * @mas: The maple state
4437 * @size: The size of the gap required
4438 *
4439 * Search between @mas->index and @mas->last for a gap of @size.
4440 */
mas_awalk(struct ma_state * mas,unsigned long size)4441 static inline void mas_awalk(struct ma_state *mas, unsigned long size)
4442 {
4443 struct maple_enode *last = NULL;
4444
4445 /*
4446 * There are 4 options:
4447 * go to child (descend)
4448 * go back to parent (ascend)
4449 * no gap found. (return, error == -EBUSY)
4450 * found the gap. (return)
4451 */
4452 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
4453 if (last == mas->node)
4454 mas_skip_node(mas);
4455 else
4456 last = mas->node;
4457 }
4458 }
4459
4460 /*
4461 * mas_sparse_area() - Internal function. Return upper or lower limit when
4462 * searching for a gap in an empty tree.
4463 * @mas: The maple state
4464 * @min: the minimum range
4465 * @max: The maximum range
4466 * @size: The size of the gap
4467 * @fwd: Searching forward or back
4468 */
mas_sparse_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size,bool fwd)4469 static inline int mas_sparse_area(struct ma_state *mas, unsigned long min,
4470 unsigned long max, unsigned long size, bool fwd)
4471 {
4472 if (!unlikely(mas_is_none(mas)) && min == 0) {
4473 min++;
4474 /*
4475 * At this time, min is increased, we need to recheck whether
4476 * the size is satisfied.
4477 */
4478 if (min > max || max - min + 1 < size)
4479 return -EBUSY;
4480 }
4481 /* mas_is_ptr */
4482
4483 if (fwd) {
4484 mas->index = min;
4485 mas->last = min + size - 1;
4486 } else {
4487 mas->last = max;
4488 mas->index = max - size + 1;
4489 }
4490 return 0;
4491 }
4492
4493 /*
4494 * mas_empty_area() - Get the lowest address within the range that is
4495 * sufficient for the size requested.
4496 * @mas: The maple state
4497 * @min: The lowest value of the range
4498 * @max: The highest value of the range
4499 * @size: The size needed
4500 */
mas_empty_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)4501 int mas_empty_area(struct ma_state *mas, unsigned long min,
4502 unsigned long max, unsigned long size)
4503 {
4504 unsigned char offset;
4505 unsigned long *pivots;
4506 enum maple_type mt;
4507 struct maple_node *node;
4508
4509 if (min > max)
4510 return -EINVAL;
4511
4512 if (size == 0 || max - min < size - 1)
4513 return -EINVAL;
4514
4515 if (mas_is_start(mas))
4516 mas_start(mas);
4517 else if (mas->offset >= 2)
4518 mas->offset -= 2;
4519 else if (!mas_skip_node(mas))
4520 return -EBUSY;
4521
4522 /* Empty set */
4523 if (mas_is_none(mas) || mas_is_ptr(mas))
4524 return mas_sparse_area(mas, min, max, size, true);
4525
4526 /* The start of the window can only be within these values */
4527 mas->index = min;
4528 mas->last = max;
4529 mas_awalk(mas, size);
4530
4531 if (unlikely(mas_is_err(mas)))
4532 return xa_err(mas->node);
4533
4534 offset = mas->offset;
4535 node = mas_mn(mas);
4536 mt = mte_node_type(mas->node);
4537 pivots = ma_pivots(node, mt);
4538 min = mas_safe_min(mas, pivots, offset);
4539 if (mas->index < min)
4540 mas->index = min;
4541 mas->last = mas->index + size - 1;
4542 mas->end = ma_data_end(node, mt, pivots, mas->max);
4543 return 0;
4544 }
4545 EXPORT_SYMBOL_GPL(mas_empty_area);
4546
4547 /*
4548 * mas_empty_area_rev() - Get the highest address within the range that is
4549 * sufficient for the size requested.
4550 * @mas: The maple state
4551 * @min: The lowest value of the range
4552 * @max: The highest value of the range
4553 * @size: The size needed
4554 */
mas_empty_area_rev(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)4555 int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
4556 unsigned long max, unsigned long size)
4557 {
4558 struct maple_enode *last = mas->node;
4559
4560 if (min > max)
4561 return -EINVAL;
4562
4563 if (size == 0 || max - min < size - 1)
4564 return -EINVAL;
4565
4566 if (mas_is_start(mas))
4567 mas_start(mas);
4568 else if ((mas->offset < 2) && (!mas_rewind_node(mas)))
4569 return -EBUSY;
4570
4571 if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
4572 return mas_sparse_area(mas, min, max, size, false);
4573 else if (mas->offset >= 2)
4574 mas->offset -= 2;
4575 else
4576 mas->offset = mas_data_end(mas);
4577
4578
4579 /* The start of the window can only be within these values. */
4580 mas->index = min;
4581 mas->last = max;
4582
4583 while (!mas_rev_awalk(mas, size, &min, &max)) {
4584 if (last == mas->node) {
4585 if (!mas_rewind_node(mas))
4586 return -EBUSY;
4587 } else {
4588 last = mas->node;
4589 }
4590 }
4591
4592 if (mas_is_err(mas))
4593 return xa_err(mas->node);
4594
4595 if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
4596 return -EBUSY;
4597
4598 /* Trim the upper limit to the max. */
4599 if (max < mas->last)
4600 mas->last = max;
4601
4602 mas->index = mas->last - size + 1;
4603 mas->end = mas_data_end(mas);
4604 return 0;
4605 }
4606 EXPORT_SYMBOL_GPL(mas_empty_area_rev);
4607
4608 /*
4609 * mte_dead_leaves() - Mark all leaves of a node as dead.
4610 * @enode: the encoded node
4611 * @mt: the maple tree
4612 * @slots: Pointer to the slot array
4613 *
4614 * Must hold the write lock.
4615 *
4616 * Return: The number of leaves marked as dead.
4617 */
4618 static inline
mte_dead_leaves(struct maple_enode * enode,struct maple_tree * mt,void __rcu ** slots)4619 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
4620 void __rcu **slots)
4621 {
4622 struct maple_node *node;
4623 enum maple_type type;
4624 void *entry;
4625 int offset;
4626
4627 for (offset = 0; offset < mt_slot_count(enode); offset++) {
4628 entry = mt_slot(mt, slots, offset);
4629 type = mte_node_type(entry);
4630 node = mte_to_node(entry);
4631 /* Use both node and type to catch LE & BE metadata */
4632 if (!node || !type)
4633 break;
4634
4635 mte_set_node_dead(entry);
4636 node->type = type;
4637 rcu_assign_pointer(slots[offset], node);
4638 }
4639
4640 return offset;
4641 }
4642
4643 /**
4644 * mte_dead_walk() - Walk down a dead tree to just before the leaves
4645 * @enode: The maple encoded node
4646 * @offset: The starting offset
4647 *
4648 * Note: This can only be used from the RCU callback context.
4649 */
mte_dead_walk(struct maple_enode ** enode,unsigned char offset)4650 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
4651 {
4652 struct maple_node *node, *next;
4653 void __rcu **slots = NULL;
4654
4655 next = mte_to_node(*enode);
4656 do {
4657 *enode = ma_enode_ptr(next);
4658 node = mte_to_node(*enode);
4659 slots = ma_slots(node, node->type);
4660 next = rcu_dereference_protected(slots[offset],
4661 lock_is_held(&rcu_callback_map));
4662 offset = 0;
4663 } while (!ma_is_leaf(next->type));
4664
4665 return slots;
4666 }
4667
4668 /**
4669 * mt_free_walk() - Walk & free a tree in the RCU callback context
4670 * @head: The RCU head that's within the node.
4671 *
4672 * Note: This can only be used from the RCU callback context.
4673 */
mt_free_walk(struct rcu_head * head)4674 static void mt_free_walk(struct rcu_head *head)
4675 {
4676 void __rcu **slots;
4677 struct maple_node *node, *start;
4678 struct maple_enode *enode;
4679 unsigned char offset;
4680 enum maple_type type;
4681
4682 node = container_of(head, struct maple_node, rcu);
4683
4684 if (ma_is_leaf(node->type))
4685 goto free_leaf;
4686
4687 start = node;
4688 enode = mt_mk_node(node, node->type);
4689 slots = mte_dead_walk(&enode, 0);
4690 node = mte_to_node(enode);
4691 do {
4692 mt_free_bulk(node->slot_len, slots);
4693 offset = node->parent_slot + 1;
4694 enode = node->piv_parent;
4695 if (mte_to_node(enode) == node)
4696 goto free_leaf;
4697
4698 type = mte_node_type(enode);
4699 slots = ma_slots(mte_to_node(enode), type);
4700 if ((offset < mt_slots[type]) &&
4701 rcu_dereference_protected(slots[offset],
4702 lock_is_held(&rcu_callback_map)))
4703 slots = mte_dead_walk(&enode, offset);
4704 node = mte_to_node(enode);
4705 } while ((node != start) || (node->slot_len < offset));
4706
4707 slots = ma_slots(node, node->type);
4708 mt_free_bulk(node->slot_len, slots);
4709
4710 free_leaf:
4711 kfree(node);
4712 }
4713
mte_destroy_descend(struct maple_enode ** enode,struct maple_tree * mt,struct maple_enode * prev,unsigned char offset)4714 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
4715 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
4716 {
4717 struct maple_node *node;
4718 struct maple_enode *next = *enode;
4719 void __rcu **slots = NULL;
4720 enum maple_type type;
4721 unsigned char next_offset = 0;
4722
4723 do {
4724 *enode = next;
4725 node = mte_to_node(*enode);
4726 type = mte_node_type(*enode);
4727 slots = ma_slots(node, type);
4728 next = mt_slot_locked(mt, slots, next_offset);
4729 if ((mte_dead_node(next)))
4730 next = mt_slot_locked(mt, slots, ++next_offset);
4731
4732 mte_set_node_dead(*enode);
4733 node->type = type;
4734 node->piv_parent = prev;
4735 node->parent_slot = offset;
4736 offset = next_offset;
4737 next_offset = 0;
4738 prev = *enode;
4739 } while (!mte_is_leaf(next));
4740
4741 return slots;
4742 }
4743
mt_destroy_walk(struct maple_enode * enode,struct maple_tree * mt,bool free)4744 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
4745 bool free)
4746 {
4747 void __rcu **slots;
4748 struct maple_node *node = mte_to_node(enode);
4749 struct maple_enode *start;
4750
4751 if (mte_is_leaf(enode)) {
4752 mte_set_node_dead(enode);
4753 node->type = mte_node_type(enode);
4754 goto free_leaf;
4755 }
4756
4757 start = enode;
4758 slots = mte_destroy_descend(&enode, mt, start, 0);
4759 node = mte_to_node(enode); // Updated in the above call.
4760 do {
4761 enum maple_type type;
4762 unsigned char offset;
4763 struct maple_enode *parent, *tmp;
4764
4765 node->slot_len = mte_dead_leaves(enode, mt, slots);
4766 if (free)
4767 mt_free_bulk(node->slot_len, slots);
4768 offset = node->parent_slot + 1;
4769 enode = node->piv_parent;
4770 if (mte_to_node(enode) == node)
4771 goto free_leaf;
4772
4773 type = mte_node_type(enode);
4774 slots = ma_slots(mte_to_node(enode), type);
4775 if (offset >= mt_slots[type])
4776 goto next;
4777
4778 tmp = mt_slot_locked(mt, slots, offset);
4779 if (mte_node_type(tmp) && mte_to_node(tmp)) {
4780 parent = enode;
4781 enode = tmp;
4782 slots = mte_destroy_descend(&enode, mt, parent, offset);
4783 }
4784 next:
4785 node = mte_to_node(enode);
4786 } while (start != enode);
4787
4788 node = mte_to_node(enode);
4789 node->slot_len = mte_dead_leaves(enode, mt, slots);
4790 if (free)
4791 mt_free_bulk(node->slot_len, slots);
4792
4793 free_leaf:
4794 if (free)
4795 kfree(node);
4796 else
4797 mt_clear_meta(mt, node, node->type);
4798 }
4799
4800 /*
4801 * mte_destroy_walk() - Free a tree or sub-tree.
4802 * @enode: the encoded maple node (maple_enode) to start
4803 * @mt: the tree to free - needed for node types.
4804 *
4805 * Must hold the write lock.
4806 */
mte_destroy_walk(struct maple_enode * enode,struct maple_tree * mt)4807 static inline void mte_destroy_walk(struct maple_enode *enode,
4808 struct maple_tree *mt)
4809 {
4810 struct maple_node *node = mte_to_node(enode);
4811
4812 if (mt_in_rcu(mt)) {
4813 mt_destroy_walk(enode, mt, false);
4814 call_rcu(&node->rcu, mt_free_walk);
4815 } else {
4816 mt_destroy_walk(enode, mt, true);
4817 }
4818 }
4819 /* Interface */
4820
4821 /**
4822 * mas_store() - Store an @entry.
4823 * @mas: The maple state.
4824 * @entry: The entry to store.
4825 *
4826 * The @mas->index and @mas->last is used to set the range for the @entry.
4827 *
4828 * Return: the first entry between mas->index and mas->last or %NULL.
4829 */
mas_store(struct ma_state * mas,void * entry)4830 void *mas_store(struct ma_state *mas, void *entry)
4831 {
4832 MA_WR_STATE(wr_mas, mas, entry);
4833
4834 trace_ma_write(TP_FCT, mas, 0, entry);
4835 #ifdef CONFIG_DEBUG_MAPLE_TREE
4836 if (MAS_WARN_ON(mas, mas->index > mas->last))
4837 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
4838 entry);
4839
4840 if (mas->index > mas->last) {
4841 mas_set_err(mas, -EINVAL);
4842 return NULL;
4843 }
4844
4845 #endif
4846
4847 /*
4848 * Storing is the same operation as insert with the added caveat that it
4849 * can overwrite entries. Although this seems simple enough, one may
4850 * want to examine what happens if a single store operation was to
4851 * overwrite multiple entries within a self-balancing B-Tree.
4852 */
4853 mas_wr_prealloc_setup(&wr_mas);
4854 mas->store_type = mas_wr_store_type(&wr_mas);
4855 if (mas->mas_flags & MA_STATE_PREALLOC) {
4856 mas_wr_store_entry(&wr_mas);
4857 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
4858 return wr_mas.content;
4859 }
4860
4861 mas_prealloc_calc(&wr_mas, entry);
4862 if (!mas->node_request)
4863 goto store;
4864
4865 mas_alloc_nodes(mas, GFP_NOWAIT);
4866 if (mas_is_err(mas))
4867 return NULL;
4868
4869 store:
4870 mas_wr_store_entry(&wr_mas);
4871 mas_destroy(mas);
4872 return wr_mas.content;
4873 }
4874 EXPORT_SYMBOL_GPL(mas_store);
4875
4876 /**
4877 * mas_store_gfp() - Store a value into the tree.
4878 * @mas: The maple state
4879 * @entry: The entry to store
4880 * @gfp: The GFP_FLAGS to use for allocations if necessary.
4881 *
4882 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
4883 * be allocated.
4884 */
mas_store_gfp(struct ma_state * mas,void * entry,gfp_t gfp)4885 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
4886 {
4887 unsigned long index = mas->index;
4888 unsigned long last = mas->last;
4889 MA_WR_STATE(wr_mas, mas, entry);
4890 int ret = 0;
4891
4892 retry:
4893 mas_wr_preallocate(&wr_mas, entry);
4894 if (unlikely(mas_nomem(mas, gfp))) {
4895 if (!entry)
4896 __mas_set_range(mas, index, last);
4897 goto retry;
4898 }
4899
4900 if (mas_is_err(mas)) {
4901 ret = xa_err(mas->node);
4902 goto out;
4903 }
4904
4905 mas_wr_store_entry(&wr_mas);
4906 out:
4907 mas_destroy(mas);
4908 return ret;
4909 }
4910 EXPORT_SYMBOL_GPL(mas_store_gfp);
4911
4912 /**
4913 * mas_store_prealloc() - Store a value into the tree using memory
4914 * preallocated in the maple state.
4915 * @mas: The maple state
4916 * @entry: The entry to store.
4917 */
mas_store_prealloc(struct ma_state * mas,void * entry)4918 void mas_store_prealloc(struct ma_state *mas, void *entry)
4919 {
4920 MA_WR_STATE(wr_mas, mas, entry);
4921
4922 if (mas->store_type == wr_store_root) {
4923 mas_wr_prealloc_setup(&wr_mas);
4924 goto store;
4925 }
4926
4927 mas_wr_walk_descend(&wr_mas);
4928 if (mas->store_type != wr_spanning_store) {
4929 /* set wr_mas->content to current slot */
4930 wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset);
4931 mas_wr_end_piv(&wr_mas);
4932 }
4933
4934 store:
4935 trace_ma_write(TP_FCT, mas, 0, entry);
4936 mas_wr_store_entry(&wr_mas);
4937 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
4938 mas_destroy(mas);
4939 }
4940 EXPORT_SYMBOL_GPL(mas_store_prealloc);
4941
4942 /**
4943 * mas_preallocate() - Preallocate enough nodes for a store operation
4944 * @mas: The maple state
4945 * @entry: The entry that will be stored
4946 * @gfp: The GFP_FLAGS to use for allocations.
4947 *
4948 * Return: 0 on success, -ENOMEM if memory could not be allocated.
4949 */
mas_preallocate(struct ma_state * mas,void * entry,gfp_t gfp)4950 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
4951 {
4952 MA_WR_STATE(wr_mas, mas, entry);
4953
4954 mas_wr_prealloc_setup(&wr_mas);
4955 mas->store_type = mas_wr_store_type(&wr_mas);
4956 mas_prealloc_calc(&wr_mas, entry);
4957 if (!mas->node_request)
4958 goto set_flag;
4959
4960 mas->mas_flags &= ~MA_STATE_PREALLOC;
4961 mas_alloc_nodes(mas, gfp);
4962 if (mas_is_err(mas)) {
4963 int ret = xa_err(mas->node);
4964
4965 mas->node_request = 0;
4966 mas_destroy(mas);
4967 mas_reset(mas);
4968 return ret;
4969 }
4970
4971 set_flag:
4972 mas->mas_flags |= MA_STATE_PREALLOC;
4973 return 0;
4974 }
4975 EXPORT_SYMBOL_GPL(mas_preallocate);
4976
4977 /*
4978 * mas_destroy() - destroy a maple state.
4979 * @mas: The maple state
4980 *
4981 * Upon completion, check the left-most node and rebalance against the node to
4982 * the right if necessary. Frees any allocated nodes associated with this maple
4983 * state.
4984 */
mas_destroy(struct ma_state * mas)4985 void mas_destroy(struct ma_state *mas)
4986 {
4987 mas->mas_flags &= ~MA_STATE_PREALLOC;
4988 mas_empty_nodes(mas);
4989 }
4990 EXPORT_SYMBOL_GPL(mas_destroy);
4991
mas_may_activate(struct ma_state * mas)4992 static void mas_may_activate(struct ma_state *mas)
4993 {
4994 if (!mas->node) {
4995 mas->status = ma_start;
4996 } else if (mas->index > mas->max || mas->index < mas->min) {
4997 mas->status = ma_start;
4998 } else {
4999 mas->status = ma_active;
5000 }
5001 }
5002
mas_next_setup(struct ma_state * mas,unsigned long max,void ** entry)5003 static bool mas_next_setup(struct ma_state *mas, unsigned long max,
5004 void **entry)
5005 {
5006 bool was_none = mas_is_none(mas);
5007
5008 if (unlikely(mas->last >= max)) {
5009 mas->status = ma_overflow;
5010 return true;
5011 }
5012
5013 switch (mas->status) {
5014 case ma_active:
5015 return false;
5016 case ma_none:
5017 fallthrough;
5018 case ma_pause:
5019 mas->status = ma_start;
5020 fallthrough;
5021 case ma_start:
5022 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
5023 break;
5024 case ma_overflow:
5025 /* Overflowed before, but the max changed */
5026 mas_may_activate(mas);
5027 break;
5028 case ma_underflow:
5029 /* The user expects the mas to be one before where it is */
5030 mas_may_activate(mas);
5031 *entry = mas_walk(mas);
5032 if (*entry)
5033 return true;
5034 break;
5035 case ma_root:
5036 break;
5037 case ma_error:
5038 return true;
5039 }
5040
5041 if (likely(mas_is_active(mas))) /* Fast path */
5042 return false;
5043
5044 if (mas_is_ptr(mas)) {
5045 *entry = NULL;
5046 if (was_none && mas->index == 0) {
5047 mas->index = mas->last = 0;
5048 return true;
5049 }
5050 mas->index = 1;
5051 mas->last = ULONG_MAX;
5052 mas->status = ma_none;
5053 return true;
5054 }
5055
5056 if (mas_is_none(mas))
5057 return true;
5058
5059 return false;
5060 }
5061
5062 /**
5063 * mas_next() - Get the next entry.
5064 * @mas: The maple state
5065 * @max: The maximum index to check.
5066 *
5067 * Returns the next entry after @mas->index.
5068 * Must hold rcu_read_lock or the write lock.
5069 * Can return the zero entry.
5070 *
5071 * Return: The next entry or %NULL
5072 */
mas_next(struct ma_state * mas,unsigned long max)5073 void *mas_next(struct ma_state *mas, unsigned long max)
5074 {
5075 void *entry = NULL;
5076
5077 if (mas_next_setup(mas, max, &entry))
5078 return entry;
5079
5080 /* Retries on dead nodes handled by mas_next_slot */
5081 return mas_next_slot(mas, max, false);
5082 }
5083 EXPORT_SYMBOL_GPL(mas_next);
5084
5085 /**
5086 * mas_next_range() - Advance the maple state to the next range
5087 * @mas: The maple state
5088 * @max: The maximum index to check.
5089 *
5090 * Sets @mas->index and @mas->last to the range.
5091 * Must hold rcu_read_lock or the write lock.
5092 * Can return the zero entry.
5093 *
5094 * Return: The next entry or %NULL
5095 */
mas_next_range(struct ma_state * mas,unsigned long max)5096 void *mas_next_range(struct ma_state *mas, unsigned long max)
5097 {
5098 void *entry = NULL;
5099
5100 if (mas_next_setup(mas, max, &entry))
5101 return entry;
5102
5103 /* Retries on dead nodes handled by mas_next_slot */
5104 return mas_next_slot(mas, max, true);
5105 }
5106 EXPORT_SYMBOL_GPL(mas_next_range);
5107
5108 /**
5109 * mt_next() - get the next value in the maple tree
5110 * @mt: The maple tree
5111 * @index: The start index
5112 * @max: The maximum index to check
5113 *
5114 * Takes RCU read lock internally to protect the search, which does not
5115 * protect the returned pointer after dropping RCU read lock.
5116 * See also: Documentation/core-api/maple_tree.rst
5117 *
5118 * Return: The entry higher than @index or %NULL if nothing is found.
5119 */
mt_next(struct maple_tree * mt,unsigned long index,unsigned long max)5120 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
5121 {
5122 void *entry = NULL;
5123 MA_STATE(mas, mt, index, index);
5124
5125 rcu_read_lock();
5126 entry = mas_next(&mas, max);
5127 rcu_read_unlock();
5128 return entry;
5129 }
5130 EXPORT_SYMBOL_GPL(mt_next);
5131
mas_prev_setup(struct ma_state * mas,unsigned long min,void ** entry)5132 static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry)
5133 {
5134 if (unlikely(mas->index <= min)) {
5135 mas->status = ma_underflow;
5136 return true;
5137 }
5138
5139 switch (mas->status) {
5140 case ma_active:
5141 return false;
5142 case ma_start:
5143 break;
5144 case ma_none:
5145 fallthrough;
5146 case ma_pause:
5147 mas->status = ma_start;
5148 break;
5149 case ma_underflow:
5150 /* underflowed before but the min changed */
5151 mas_may_activate(mas);
5152 break;
5153 case ma_overflow:
5154 /* User expects mas to be one after where it is */
5155 mas_may_activate(mas);
5156 *entry = mas_walk(mas);
5157 if (*entry)
5158 return true;
5159 break;
5160 case ma_root:
5161 break;
5162 case ma_error:
5163 return true;
5164 }
5165
5166 if (mas_is_start(mas))
5167 mas_walk(mas);
5168
5169 if (unlikely(mas_is_ptr(mas))) {
5170 if (!mas->index) {
5171 mas->status = ma_none;
5172 return true;
5173 }
5174 mas->index = mas->last = 0;
5175 *entry = mas_root(mas);
5176 return true;
5177 }
5178
5179 if (mas_is_none(mas)) {
5180 if (mas->index) {
5181 /* Walked to out-of-range pointer? */
5182 mas->index = mas->last = 0;
5183 mas->status = ma_root;
5184 *entry = mas_root(mas);
5185 return true;
5186 }
5187 return true;
5188 }
5189
5190 return false;
5191 }
5192
5193 /**
5194 * mas_prev() - Get the previous entry
5195 * @mas: The maple state
5196 * @min: The minimum value to check.
5197 *
5198 * Must hold rcu_read_lock or the write lock.
5199 * Will reset mas to ma_start if the status is ma_none. Will stop on not
5200 * searchable nodes.
5201 *
5202 * Return: the previous value or %NULL.
5203 */
mas_prev(struct ma_state * mas,unsigned long min)5204 void *mas_prev(struct ma_state *mas, unsigned long min)
5205 {
5206 void *entry = NULL;
5207
5208 if (mas_prev_setup(mas, min, &entry))
5209 return entry;
5210
5211 return mas_prev_slot(mas, min, false);
5212 }
5213 EXPORT_SYMBOL_GPL(mas_prev);
5214
5215 /**
5216 * mas_prev_range() - Advance to the previous range
5217 * @mas: The maple state
5218 * @min: The minimum value to check.
5219 *
5220 * Sets @mas->index and @mas->last to the range.
5221 * Must hold rcu_read_lock or the write lock.
5222 * Will reset mas to ma_start if the node is ma_none. Will stop on not
5223 * searchable nodes.
5224 *
5225 * Return: the previous value or %NULL.
5226 */
mas_prev_range(struct ma_state * mas,unsigned long min)5227 void *mas_prev_range(struct ma_state *mas, unsigned long min)
5228 {
5229 void *entry = NULL;
5230
5231 if (mas_prev_setup(mas, min, &entry))
5232 return entry;
5233
5234 return mas_prev_slot(mas, min, true);
5235 }
5236 EXPORT_SYMBOL_GPL(mas_prev_range);
5237
5238 /**
5239 * mt_prev() - get the previous value in the maple tree
5240 * @mt: The maple tree
5241 * @index: The start index
5242 * @min: The minimum index to check
5243 *
5244 * Takes RCU read lock internally to protect the search, which does not
5245 * protect the returned pointer after dropping RCU read lock.
5246 * See also: Documentation/core-api/maple_tree.rst
5247 *
5248 * Return: The entry before @index or %NULL if nothing is found.
5249 */
mt_prev(struct maple_tree * mt,unsigned long index,unsigned long min)5250 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
5251 {
5252 void *entry = NULL;
5253 MA_STATE(mas, mt, index, index);
5254
5255 rcu_read_lock();
5256 entry = mas_prev(&mas, min);
5257 rcu_read_unlock();
5258 return entry;
5259 }
5260 EXPORT_SYMBOL_GPL(mt_prev);
5261
5262 /**
5263 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock.
5264 * @mas: The maple state to pause
5265 *
5266 * Some users need to pause a walk and drop the lock they're holding in
5267 * order to yield to a higher priority thread or carry out an operation
5268 * on an entry. Those users should call this function before they drop
5269 * the lock. It resets the @mas to be suitable for the next iteration
5270 * of the loop after the user has reacquired the lock. If most entries
5271 * found during a walk require you to call mas_pause(), the mt_for_each()
5272 * iterator may be more appropriate.
5273 *
5274 */
mas_pause(struct ma_state * mas)5275 void mas_pause(struct ma_state *mas)
5276 {
5277 mas->status = ma_pause;
5278 mas->node = NULL;
5279 }
5280 EXPORT_SYMBOL_GPL(mas_pause);
5281
5282 /**
5283 * mas_find_setup() - Internal function to set up mas_find*().
5284 * @mas: The maple state
5285 * @max: The maximum index
5286 * @entry: Pointer to the entry
5287 *
5288 * Returns: True if entry is the answer, false otherwise.
5289 */
mas_find_setup(struct ma_state * mas,unsigned long max,void ** entry)5290 static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry)
5291 {
5292 switch (mas->status) {
5293 case ma_active:
5294 if (mas->last < max)
5295 return false;
5296 return true;
5297 case ma_start:
5298 break;
5299 case ma_pause:
5300 if (unlikely(mas->last >= max))
5301 return true;
5302
5303 mas->index = ++mas->last;
5304 mas->status = ma_start;
5305 break;
5306 case ma_none:
5307 if (unlikely(mas->last >= max))
5308 return true;
5309
5310 mas->index = mas->last;
5311 mas->status = ma_start;
5312 break;
5313 case ma_underflow:
5314 /* mas is pointing at entry before unable to go lower */
5315 if (unlikely(mas->index >= max)) {
5316 mas->status = ma_overflow;
5317 return true;
5318 }
5319
5320 mas_may_activate(mas);
5321 *entry = mas_walk(mas);
5322 if (*entry)
5323 return true;
5324 break;
5325 case ma_overflow:
5326 if (unlikely(mas->last >= max))
5327 return true;
5328
5329 mas_may_activate(mas);
5330 *entry = mas_walk(mas);
5331 if (*entry)
5332 return true;
5333 break;
5334 case ma_root:
5335 break;
5336 case ma_error:
5337 return true;
5338 }
5339
5340 if (mas_is_start(mas)) {
5341 /* First run or continue */
5342 if (mas->index > max)
5343 return true;
5344
5345 *entry = mas_walk(mas);
5346 if (*entry)
5347 return true;
5348
5349 }
5350
5351 if (unlikely(mas_is_ptr(mas)))
5352 goto ptr_out_of_range;
5353
5354 if (unlikely(mas_is_none(mas)))
5355 return true;
5356
5357 if (mas->index == max)
5358 return true;
5359
5360 return false;
5361
5362 ptr_out_of_range:
5363 mas->status = ma_none;
5364 mas->index = 1;
5365 mas->last = ULONG_MAX;
5366 return true;
5367 }
5368
5369 /**
5370 * mas_find() - On the first call, find the entry at or after mas->index up to
5371 * %max. Otherwise, find the entry after mas->index.
5372 * @mas: The maple state
5373 * @max: The maximum value to check.
5374 *
5375 * Must hold rcu_read_lock or the write lock.
5376 * If an entry exists, last and index are updated accordingly.
5377 * May set @mas->status to ma_overflow.
5378 *
5379 * Return: The entry or %NULL.
5380 */
mas_find(struct ma_state * mas,unsigned long max)5381 void *mas_find(struct ma_state *mas, unsigned long max)
5382 {
5383 void *entry = NULL;
5384
5385 if (mas_find_setup(mas, max, &entry))
5386 return entry;
5387
5388 /* Retries on dead nodes handled by mas_next_slot */
5389 entry = mas_next_slot(mas, max, false);
5390 /* Ignore overflow */
5391 mas->status = ma_active;
5392 return entry;
5393 }
5394 EXPORT_SYMBOL_GPL(mas_find);
5395
5396 /**
5397 * mas_find_range() - On the first call, find the entry at or after
5398 * mas->index up to %max. Otherwise, advance to the next slot mas->index.
5399 * @mas: The maple state
5400 * @max: The maximum value to check.
5401 *
5402 * Must hold rcu_read_lock or the write lock.
5403 * If an entry exists, last and index are updated accordingly.
5404 * May set @mas->status to ma_overflow.
5405 *
5406 * Return: The entry or %NULL.
5407 */
mas_find_range(struct ma_state * mas,unsigned long max)5408 void *mas_find_range(struct ma_state *mas, unsigned long max)
5409 {
5410 void *entry = NULL;
5411
5412 if (mas_find_setup(mas, max, &entry))
5413 return entry;
5414
5415 /* Retries on dead nodes handled by mas_next_slot */
5416 return mas_next_slot(mas, max, true);
5417 }
5418 EXPORT_SYMBOL_GPL(mas_find_range);
5419
5420 /**
5421 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev()
5422 * @mas: The maple state
5423 * @min: The minimum index
5424 * @entry: Pointer to the entry
5425 *
5426 * Returns: True if entry is the answer, false otherwise.
5427 */
mas_find_rev_setup(struct ma_state * mas,unsigned long min,void ** entry)5428 static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min,
5429 void **entry)
5430 {
5431
5432 switch (mas->status) {
5433 case ma_active:
5434 goto active;
5435 case ma_start:
5436 break;
5437 case ma_pause:
5438 if (unlikely(mas->index <= min)) {
5439 mas->status = ma_underflow;
5440 return true;
5441 }
5442 mas->last = --mas->index;
5443 mas->status = ma_start;
5444 break;
5445 case ma_none:
5446 if (mas->index <= min)
5447 goto none;
5448
5449 mas->last = mas->index;
5450 mas->status = ma_start;
5451 break;
5452 case ma_overflow: /* user expects the mas to be one after where it is */
5453 if (unlikely(mas->index <= min)) {
5454 mas->status = ma_underflow;
5455 return true;
5456 }
5457
5458 mas->status = ma_active;
5459 break;
5460 case ma_underflow: /* user expects the mas to be one before where it is */
5461 if (unlikely(mas->index <= min))
5462 return true;
5463
5464 mas->status = ma_active;
5465 break;
5466 case ma_root:
5467 break;
5468 case ma_error:
5469 return true;
5470 }
5471
5472 if (mas_is_start(mas)) {
5473 /* First run or continue */
5474 if (mas->index < min)
5475 return true;
5476
5477 *entry = mas_walk(mas);
5478 if (*entry)
5479 return true;
5480 }
5481
5482 if (unlikely(mas_is_ptr(mas)))
5483 goto none;
5484
5485 if (unlikely(mas_is_none(mas))) {
5486 /*
5487 * Walked to the location, and there was nothing so the previous
5488 * location is 0.
5489 */
5490 mas->last = mas->index = 0;
5491 mas->status = ma_root;
5492 *entry = mas_root(mas);
5493 return true;
5494 }
5495
5496 active:
5497 if (mas->index < min)
5498 return true;
5499
5500 return false;
5501
5502 none:
5503 mas->status = ma_none;
5504 return true;
5505 }
5506
5507 /**
5508 * mas_find_rev: On the first call, find the first non-null entry at or below
5509 * mas->index down to %min. Otherwise find the first non-null entry below
5510 * mas->index down to %min.
5511 * @mas: The maple state
5512 * @min: The minimum value to check.
5513 *
5514 * Must hold rcu_read_lock or the write lock.
5515 * If an entry exists, last and index are updated accordingly.
5516 * May set @mas->status to ma_underflow.
5517 *
5518 * Return: The entry or %NULL.
5519 */
mas_find_rev(struct ma_state * mas,unsigned long min)5520 void *mas_find_rev(struct ma_state *mas, unsigned long min)
5521 {
5522 void *entry = NULL;
5523
5524 if (mas_find_rev_setup(mas, min, &entry))
5525 return entry;
5526
5527 /* Retries on dead nodes handled by mas_prev_slot */
5528 return mas_prev_slot(mas, min, false);
5529
5530 }
5531 EXPORT_SYMBOL_GPL(mas_find_rev);
5532
5533 /**
5534 * mas_find_range_rev: On the first call, find the first non-null entry at or
5535 * below mas->index down to %min. Otherwise advance to the previous slot after
5536 * mas->index down to %min.
5537 * @mas: The maple state
5538 * @min: The minimum value to check.
5539 *
5540 * Must hold rcu_read_lock or the write lock.
5541 * If an entry exists, last and index are updated accordingly.
5542 * May set @mas->status to ma_underflow.
5543 *
5544 * Return: The entry or %NULL.
5545 */
mas_find_range_rev(struct ma_state * mas,unsigned long min)5546 void *mas_find_range_rev(struct ma_state *mas, unsigned long min)
5547 {
5548 void *entry = NULL;
5549
5550 if (mas_find_rev_setup(mas, min, &entry))
5551 return entry;
5552
5553 /* Retries on dead nodes handled by mas_prev_slot */
5554 return mas_prev_slot(mas, min, true);
5555 }
5556 EXPORT_SYMBOL_GPL(mas_find_range_rev);
5557
5558 /**
5559 * mas_erase() - Find the range in which index resides and erase the entire
5560 * range.
5561 * @mas: The maple state
5562 *
5563 * Must hold the write lock.
5564 * Searches for @mas->index, sets @mas->index and @mas->last to the range and
5565 * erases that range.
5566 *
5567 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated.
5568 */
mas_erase(struct ma_state * mas)5569 void *mas_erase(struct ma_state *mas)
5570 {
5571 void *entry;
5572 unsigned long index = mas->index;
5573 MA_WR_STATE(wr_mas, mas, NULL);
5574
5575 if (!mas_is_active(mas) || !mas_is_start(mas))
5576 mas->status = ma_start;
5577
5578 write_retry:
5579 entry = mas_state_walk(mas);
5580 if (!entry)
5581 return NULL;
5582
5583 /* Must reset to ensure spanning writes of last slot are detected */
5584 mas_reset(mas);
5585 mas_wr_preallocate(&wr_mas, NULL);
5586 if (mas_nomem(mas, GFP_KERNEL)) {
5587 /* in case the range of entry changed when unlocked */
5588 mas->index = mas->last = index;
5589 goto write_retry;
5590 }
5591
5592 if (mas_is_err(mas))
5593 goto out;
5594
5595 mas_wr_store_entry(&wr_mas);
5596 out:
5597 mas_destroy(mas);
5598 return entry;
5599 }
5600 EXPORT_SYMBOL_GPL(mas_erase);
5601
5602 /**
5603 * mas_nomem() - Check if there was an error allocating and do the allocation
5604 * if necessary If there are allocations, then free them.
5605 * @mas: The maple state
5606 * @gfp: The GFP_FLAGS to use for allocations
5607 * Return: true on allocation, false otherwise.
5608 */
mas_nomem(struct ma_state * mas,gfp_t gfp)5609 bool mas_nomem(struct ma_state *mas, gfp_t gfp)
5610 __must_hold(mas->tree->ma_lock)
5611 {
5612 if (likely(mas->node != MA_ERROR(-ENOMEM)))
5613 return false;
5614
5615 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
5616 mtree_unlock(mas->tree);
5617 mas_alloc_nodes(mas, gfp);
5618 mtree_lock(mas->tree);
5619 } else {
5620 mas_alloc_nodes(mas, gfp);
5621 }
5622
5623 if (!mas->sheaf && !mas->alloc)
5624 return false;
5625
5626 mas->status = ma_start;
5627 return true;
5628 }
5629
maple_tree_init(void)5630 void __init maple_tree_init(void)
5631 {
5632 struct kmem_cache_args args = {
5633 .align = sizeof(struct maple_node),
5634 .sheaf_capacity = 32,
5635 };
5636
5637 maple_node_cache = kmem_cache_create("maple_node",
5638 sizeof(struct maple_node), &args,
5639 SLAB_PANIC);
5640 }
5641
5642 /**
5643 * mtree_load() - Load a value stored in a maple tree
5644 * @mt: The maple tree
5645 * @index: The index to load
5646 *
5647 * Return: the entry or %NULL
5648 */
mtree_load(struct maple_tree * mt,unsigned long index)5649 void *mtree_load(struct maple_tree *mt, unsigned long index)
5650 {
5651 MA_STATE(mas, mt, index, index);
5652 void *entry;
5653
5654 trace_ma_read(TP_FCT, &mas);
5655 rcu_read_lock();
5656 retry:
5657 entry = mas_start(&mas);
5658 if (unlikely(mas_is_none(&mas)))
5659 goto unlock;
5660
5661 if (unlikely(mas_is_ptr(&mas))) {
5662 if (index)
5663 entry = NULL;
5664
5665 goto unlock;
5666 }
5667
5668 entry = mtree_lookup_walk(&mas);
5669 if (!entry && unlikely(mas_is_start(&mas)))
5670 goto retry;
5671 unlock:
5672 rcu_read_unlock();
5673 if (xa_is_zero(entry))
5674 return NULL;
5675
5676 return entry;
5677 }
5678 EXPORT_SYMBOL(mtree_load);
5679
5680 /**
5681 * mtree_store_range() - Store an entry at a given range.
5682 * @mt: The maple tree
5683 * @index: The start of the range
5684 * @last: The end of the range
5685 * @entry: The entry to store
5686 * @gfp: The GFP_FLAGS to use for allocations
5687 *
5688 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5689 * be allocated.
5690 */
mtree_store_range(struct maple_tree * mt,unsigned long index,unsigned long last,void * entry,gfp_t gfp)5691 int mtree_store_range(struct maple_tree *mt, unsigned long index,
5692 unsigned long last, void *entry, gfp_t gfp)
5693 {
5694 MA_STATE(mas, mt, index, last);
5695 int ret = 0;
5696
5697 trace_ma_write(TP_FCT, &mas, 0, entry);
5698 if (WARN_ON_ONCE(xa_is_advanced(entry)))
5699 return -EINVAL;
5700
5701 if (index > last)
5702 return -EINVAL;
5703
5704 mtree_lock(mt);
5705 ret = mas_store_gfp(&mas, entry, gfp);
5706 mtree_unlock(mt);
5707
5708 return ret;
5709 }
5710 EXPORT_SYMBOL(mtree_store_range);
5711
5712 /**
5713 * mtree_store() - Store an entry at a given index.
5714 * @mt: The maple tree
5715 * @index: The index to store the value
5716 * @entry: The entry to store
5717 * @gfp: The GFP_FLAGS to use for allocations
5718 *
5719 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5720 * be allocated.
5721 */
mtree_store(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)5722 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
5723 gfp_t gfp)
5724 {
5725 return mtree_store_range(mt, index, index, entry, gfp);
5726 }
5727 EXPORT_SYMBOL(mtree_store);
5728
5729 /**
5730 * mtree_insert_range() - Insert an entry at a given range if there is no value.
5731 * @mt: The maple tree
5732 * @first: The start of the range
5733 * @last: The end of the range
5734 * @entry: The entry to store
5735 * @gfp: The GFP_FLAGS to use for allocations.
5736 *
5737 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
5738 * request, -ENOMEM if memory could not be allocated.
5739 */
mtree_insert_range(struct maple_tree * mt,unsigned long first,unsigned long last,void * entry,gfp_t gfp)5740 int mtree_insert_range(struct maple_tree *mt, unsigned long first,
5741 unsigned long last, void *entry, gfp_t gfp)
5742 {
5743 MA_STATE(ms, mt, first, last);
5744 int ret = 0;
5745
5746 if (WARN_ON_ONCE(xa_is_advanced(entry)))
5747 return -EINVAL;
5748
5749 if (first > last)
5750 return -EINVAL;
5751
5752 mtree_lock(mt);
5753 retry:
5754 mas_insert(&ms, entry);
5755 if (mas_nomem(&ms, gfp))
5756 goto retry;
5757
5758 mtree_unlock(mt);
5759 if (mas_is_err(&ms))
5760 ret = xa_err(ms.node);
5761
5762 mas_destroy(&ms);
5763 return ret;
5764 }
5765 EXPORT_SYMBOL(mtree_insert_range);
5766
5767 /**
5768 * mtree_insert() - Insert an entry at a given index if there is no value.
5769 * @mt: The maple tree
5770 * @index : The index to store the value
5771 * @entry: The entry to store
5772 * @gfp: The GFP_FLAGS to use for allocations.
5773 *
5774 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
5775 * request, -ENOMEM if memory could not be allocated.
5776 */
mtree_insert(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)5777 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry,
5778 gfp_t gfp)
5779 {
5780 return mtree_insert_range(mt, index, index, entry, gfp);
5781 }
5782 EXPORT_SYMBOL(mtree_insert);
5783
mtree_alloc_range(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)5784 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
5785 void *entry, unsigned long size, unsigned long min,
5786 unsigned long max, gfp_t gfp)
5787 {
5788 int ret = 0;
5789
5790 MA_STATE(mas, mt, 0, 0);
5791 if (!mt_is_alloc(mt))
5792 return -EINVAL;
5793
5794 if (WARN_ON_ONCE(mt_is_reserved(entry)))
5795 return -EINVAL;
5796
5797 mtree_lock(mt);
5798 retry:
5799 ret = mas_empty_area(&mas, min, max, size);
5800 if (ret)
5801 goto unlock;
5802
5803 mas_insert(&mas, entry);
5804 /*
5805 * mas_nomem() may release the lock, causing the allocated area
5806 * to be unavailable, so try to allocate a free area again.
5807 */
5808 if (mas_nomem(&mas, gfp))
5809 goto retry;
5810
5811 if (mas_is_err(&mas))
5812 ret = xa_err(mas.node);
5813 else
5814 *startp = mas.index;
5815
5816 unlock:
5817 mtree_unlock(mt);
5818 mas_destroy(&mas);
5819 return ret;
5820 }
5821 EXPORT_SYMBOL(mtree_alloc_range);
5822
5823 /**
5824 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree.
5825 * @mt: The maple tree.
5826 * @startp: Pointer to ID.
5827 * @range_lo: Lower bound of range to search.
5828 * @range_hi: Upper bound of range to search.
5829 * @entry: The entry to store.
5830 * @next: Pointer to next ID to allocate.
5831 * @gfp: The GFP_FLAGS to use for allocations.
5832 *
5833 * Finds an empty entry in @mt after @next, stores the new index into
5834 * the @id pointer, stores the entry at that index, then updates @next.
5835 *
5836 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag.
5837 *
5838 * Context: Any context. Takes and releases the mt.lock. May sleep if
5839 * the @gfp flags permit.
5840 *
5841 * Return: 0 if the allocation succeeded without wrapping, 1 if the
5842 * allocation succeeded after wrapping, -ENOMEM if memory could not be
5843 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no
5844 * free entries.
5845 */
mtree_alloc_cyclic(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)5846 int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
5847 void *entry, unsigned long range_lo, unsigned long range_hi,
5848 unsigned long *next, gfp_t gfp)
5849 {
5850 int ret;
5851
5852 MA_STATE(mas, mt, 0, 0);
5853
5854 if (!mt_is_alloc(mt))
5855 return -EINVAL;
5856 if (WARN_ON_ONCE(mt_is_reserved(entry)))
5857 return -EINVAL;
5858 mtree_lock(mt);
5859 ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi,
5860 next, gfp);
5861 mtree_unlock(mt);
5862 return ret;
5863 }
5864 EXPORT_SYMBOL(mtree_alloc_cyclic);
5865
mtree_alloc_rrange(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)5866 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
5867 void *entry, unsigned long size, unsigned long min,
5868 unsigned long max, gfp_t gfp)
5869 {
5870 int ret = 0;
5871
5872 MA_STATE(mas, mt, 0, 0);
5873 if (!mt_is_alloc(mt))
5874 return -EINVAL;
5875
5876 if (WARN_ON_ONCE(mt_is_reserved(entry)))
5877 return -EINVAL;
5878
5879 mtree_lock(mt);
5880 retry:
5881 ret = mas_empty_area_rev(&mas, min, max, size);
5882 if (ret)
5883 goto unlock;
5884
5885 mas_insert(&mas, entry);
5886 /*
5887 * mas_nomem() may release the lock, causing the allocated area
5888 * to be unavailable, so try to allocate a free area again.
5889 */
5890 if (mas_nomem(&mas, gfp))
5891 goto retry;
5892
5893 if (mas_is_err(&mas))
5894 ret = xa_err(mas.node);
5895 else
5896 *startp = mas.index;
5897
5898 unlock:
5899 mtree_unlock(mt);
5900 mas_destroy(&mas);
5901 return ret;
5902 }
5903 EXPORT_SYMBOL(mtree_alloc_rrange);
5904
5905 /**
5906 * mtree_erase() - Find an index and erase the entire range.
5907 * @mt: The maple tree
5908 * @index: The index to erase
5909 *
5910 * Erasing is the same as a walk to an entry then a store of a NULL to that
5911 * ENTIRE range. In fact, it is implemented as such using the advanced API.
5912 *
5913 * Return: The entry stored at the @index or %NULL
5914 */
mtree_erase(struct maple_tree * mt,unsigned long index)5915 void *mtree_erase(struct maple_tree *mt, unsigned long index)
5916 {
5917 void *entry = NULL;
5918
5919 MA_STATE(mas, mt, index, index);
5920 trace_ma_op(TP_FCT, &mas);
5921
5922 mtree_lock(mt);
5923 entry = mas_erase(&mas);
5924 mtree_unlock(mt);
5925
5926 return entry;
5927 }
5928 EXPORT_SYMBOL(mtree_erase);
5929
5930 /*
5931 * mas_dup_free() - Free an incomplete duplication of a tree.
5932 * @mas: The maple state of a incomplete tree.
5933 *
5934 * The parameter @mas->node passed in indicates that the allocation failed on
5935 * this node. This function frees all nodes starting from @mas->node in the
5936 * reverse order of mas_dup_build(). There is no need to hold the source tree
5937 * lock at this time.
5938 */
mas_dup_free(struct ma_state * mas)5939 static void mas_dup_free(struct ma_state *mas)
5940 {
5941 struct maple_node *node;
5942 enum maple_type type;
5943 void __rcu **slots;
5944 unsigned char count, i;
5945
5946 /* Maybe the first node allocation failed. */
5947 if (mas_is_none(mas))
5948 return;
5949
5950 while (!mte_is_root(mas->node)) {
5951 mas_ascend(mas);
5952 if (mas->offset) {
5953 mas->offset--;
5954 do {
5955 mas_descend(mas);
5956 mas->offset = mas_data_end(mas);
5957 } while (!mte_is_leaf(mas->node));
5958
5959 mas_ascend(mas);
5960 }
5961
5962 node = mte_to_node(mas->node);
5963 type = mte_node_type(mas->node);
5964 slots = ma_slots(node, type);
5965 count = mas_data_end(mas) + 1;
5966 for (i = 0; i < count; i++)
5967 ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
5968 mt_free_bulk(count, slots);
5969 }
5970
5971 node = mte_to_node(mas->node);
5972 kfree(node);
5973 }
5974
5975 /*
5976 * mas_copy_node() - Copy a maple node and replace the parent.
5977 * @mas: The maple state of source tree.
5978 * @new_mas: The maple state of new tree.
5979 * @parent: The parent of the new node.
5980 *
5981 * Copy @mas->node to @new_mas->node, set @parent to be the parent of
5982 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM.
5983 */
mas_copy_node(struct ma_state * mas,struct ma_state * new_mas,struct maple_pnode * parent)5984 static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas,
5985 struct maple_pnode *parent)
5986 {
5987 struct maple_node *node = mte_to_node(mas->node);
5988 struct maple_node *new_node = mte_to_node(new_mas->node);
5989 unsigned long val;
5990
5991 /* Copy the node completely. */
5992 memcpy(new_node, node, sizeof(struct maple_node));
5993 /* Update the parent node pointer. */
5994 val = (unsigned long)node->parent & MAPLE_NODE_MASK;
5995 new_node->parent = ma_parent_ptr(val | (unsigned long)parent);
5996 }
5997
5998 /*
5999 * mas_dup_alloc() - Allocate child nodes for a maple node.
6000 * @mas: The maple state of source tree.
6001 * @new_mas: The maple state of new tree.
6002 * @gfp: The GFP_FLAGS to use for allocations.
6003 *
6004 * This function allocates child nodes for @new_mas->node during the duplication
6005 * process. If memory allocation fails, @mas is set to -ENOMEM.
6006 */
mas_dup_alloc(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6007 static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
6008 gfp_t gfp)
6009 {
6010 struct maple_node *node = mte_to_node(mas->node);
6011 struct maple_node *new_node = mte_to_node(new_mas->node);
6012 enum maple_type type;
6013 unsigned char count, i;
6014 void __rcu **slots;
6015 void __rcu **new_slots;
6016 unsigned long val;
6017
6018 /* Allocate memory for child nodes. */
6019 type = mte_node_type(mas->node);
6020 new_slots = ma_slots(new_node, type);
6021 count = mas->node_request = mas_data_end(mas) + 1;
6022 mas_alloc_nodes(mas, gfp);
6023 if (unlikely(mas_is_err(mas)))
6024 return;
6025
6026 slots = ma_slots(node, type);
6027 for (i = 0; i < count; i++) {
6028 val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
6029 val &= MAPLE_NODE_MASK;
6030 /*
6031 * Warning, see rcu_assign_pointer() documentation. Since this
6032 * is a duplication of a tree, there are no readers walking the
6033 * tree until after the rcu_assign_pointer() call in
6034 * mas_dup_build().
6035 */
6036 RCU_INIT_POINTER(new_slots[i],
6037 ma_mnode_ptr((unsigned long)mas_pop_node(mas) |
6038 val));
6039 }
6040 }
6041
6042 /*
6043 * mas_dup_build() - Build a new maple tree from a source tree
6044 * @mas: The maple state of source tree, need to be in MAS_START state.
6045 * @new_mas: The maple state of new tree, need to be in MAS_START state.
6046 * @gfp: The GFP_FLAGS to use for allocations.
6047 *
6048 * This function builds a new tree in DFS preorder. If the memory allocation
6049 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the
6050 * last node. mas_dup_free() will free the incomplete duplication of a tree.
6051 *
6052 * Note that the attributes of the two trees need to be exactly the same, and the
6053 * new tree needs to be empty, otherwise -EINVAL will be set in @mas.
6054 */
mas_dup_build(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6055 static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
6056 gfp_t gfp)
6057 {
6058 struct maple_node *node;
6059 struct maple_pnode *parent = NULL;
6060 struct maple_enode *root;
6061 enum maple_type type;
6062
6063 if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) ||
6064 unlikely(!mtree_empty(new_mas->tree))) {
6065 mas_set_err(mas, -EINVAL);
6066 return;
6067 }
6068
6069 root = mas_start(mas);
6070 if (mas_is_ptr(mas) || mas_is_none(mas))
6071 goto set_new_tree;
6072
6073 node = mt_alloc_one(gfp);
6074 if (!node) {
6075 new_mas->status = ma_none;
6076 mas_set_err(mas, -ENOMEM);
6077 return;
6078 }
6079
6080 type = mte_node_type(mas->node);
6081 root = mt_mk_node(node, type);
6082 new_mas->node = root;
6083 new_mas->min = 0;
6084 new_mas->max = ULONG_MAX;
6085 root = mte_mk_root(root);
6086 while (1) {
6087 mas_copy_node(mas, new_mas, parent);
6088 if (!mte_is_leaf(mas->node)) {
6089 /* Only allocate child nodes for non-leaf nodes. */
6090 mas_dup_alloc(mas, new_mas, gfp);
6091 if (unlikely(mas_is_err(mas)))
6092 goto empty_mas;
6093 } else {
6094 /*
6095 * This is the last leaf node and duplication is
6096 * completed.
6097 */
6098 if (mas->max == ULONG_MAX)
6099 goto done;
6100
6101 /* This is not the last leaf node and needs to go up. */
6102 do {
6103 mas_ascend(mas);
6104 mas_ascend(new_mas);
6105 } while (mas->offset == mas_data_end(mas));
6106
6107 /* Move to the next subtree. */
6108 mas->offset++;
6109 new_mas->offset++;
6110 }
6111
6112 mas_descend(mas);
6113 parent = ma_parent_ptr(mte_to_node(new_mas->node));
6114 mas_descend(new_mas);
6115 mas->offset = 0;
6116 new_mas->offset = 0;
6117 }
6118 done:
6119 /* Specially handle the parent of the root node. */
6120 mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas));
6121 set_new_tree:
6122 /* Make them the same height */
6123 new_mas->tree->ma_flags = mas->tree->ma_flags;
6124 rcu_assign_pointer(new_mas->tree->ma_root, root);
6125 empty_mas:
6126 mas_empty_nodes(mas);
6127 }
6128
6129 /**
6130 * __mt_dup(): Duplicate an entire maple tree
6131 * @mt: The source maple tree
6132 * @new: The new maple tree
6133 * @gfp: The GFP_FLAGS to use for allocations
6134 *
6135 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
6136 * traversal. It uses memcpy() to copy nodes in the source tree and allocate
6137 * new child nodes in non-leaf nodes. The new node is exactly the same as the
6138 * source node except for all the addresses stored in it. It will be faster than
6139 * traversing all elements in the source tree and inserting them one by one into
6140 * the new tree.
6141 * The user needs to ensure that the attributes of the source tree and the new
6142 * tree are the same, and the new tree needs to be an empty tree, otherwise
6143 * -EINVAL will be returned.
6144 * Note that the user needs to manually lock the source tree and the new tree.
6145 *
6146 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
6147 * the attributes of the two trees are different or the new tree is not an empty
6148 * tree.
6149 */
__mt_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6150 int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
6151 {
6152 int ret = 0;
6153 MA_STATE(mas, mt, 0, 0);
6154 MA_STATE(new_mas, new, 0, 0);
6155
6156 mas_dup_build(&mas, &new_mas, gfp);
6157 if (unlikely(mas_is_err(&mas))) {
6158 ret = xa_err(mas.node);
6159 if (ret == -ENOMEM)
6160 mas_dup_free(&new_mas);
6161 }
6162
6163 return ret;
6164 }
6165 EXPORT_SYMBOL(__mt_dup);
6166
6167 /**
6168 * mtree_dup(): Duplicate an entire maple tree
6169 * @mt: The source maple tree
6170 * @new: The new maple tree
6171 * @gfp: The GFP_FLAGS to use for allocations
6172 *
6173 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
6174 * traversal. It uses memcpy() to copy nodes in the source tree and allocate
6175 * new child nodes in non-leaf nodes. The new node is exactly the same as the
6176 * source node except for all the addresses stored in it. It will be faster than
6177 * traversing all elements in the source tree and inserting them one by one into
6178 * the new tree.
6179 * The user needs to ensure that the attributes of the source tree and the new
6180 * tree are the same, and the new tree needs to be an empty tree, otherwise
6181 * -EINVAL will be returned.
6182 *
6183 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
6184 * the attributes of the two trees are different or the new tree is not an empty
6185 * tree.
6186 */
mtree_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6187 int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
6188 {
6189 int ret = 0;
6190 MA_STATE(mas, mt, 0, 0);
6191 MA_STATE(new_mas, new, 0, 0);
6192
6193 mas_lock(&new_mas);
6194 mas_lock_nested(&mas, SINGLE_DEPTH_NESTING);
6195 mas_dup_build(&mas, &new_mas, gfp);
6196 mas_unlock(&mas);
6197 if (unlikely(mas_is_err(&mas))) {
6198 ret = xa_err(mas.node);
6199 if (ret == -ENOMEM)
6200 mas_dup_free(&new_mas);
6201 }
6202
6203 mas_unlock(&new_mas);
6204 return ret;
6205 }
6206 EXPORT_SYMBOL(mtree_dup);
6207
6208 /**
6209 * __mt_destroy() - Walk and free all nodes of a locked maple tree.
6210 * @mt: The maple tree
6211 *
6212 * Note: Does not handle locking.
6213 */
__mt_destroy(struct maple_tree * mt)6214 void __mt_destroy(struct maple_tree *mt)
6215 {
6216 void *root = mt_root_locked(mt);
6217
6218 rcu_assign_pointer(mt->ma_root, NULL);
6219 if (xa_is_node(root))
6220 mte_destroy_walk(root, mt);
6221
6222 mt->ma_flags = mt_attr(mt);
6223 }
6224 EXPORT_SYMBOL_GPL(__mt_destroy);
6225
6226 /**
6227 * mtree_destroy() - Destroy a maple tree
6228 * @mt: The maple tree
6229 *
6230 * Frees all resources used by the tree. Handles locking.
6231 */
mtree_destroy(struct maple_tree * mt)6232 void mtree_destroy(struct maple_tree *mt)
6233 {
6234 mtree_lock(mt);
6235 __mt_destroy(mt);
6236 mtree_unlock(mt);
6237 }
6238 EXPORT_SYMBOL(mtree_destroy);
6239
6240 /**
6241 * mt_find() - Search from the start up until an entry is found.
6242 * @mt: The maple tree
6243 * @index: Pointer which contains the start location of the search
6244 * @max: The maximum value of the search range
6245 *
6246 * Takes RCU read lock internally to protect the search, which does not
6247 * protect the returned pointer after dropping RCU read lock.
6248 * See also: Documentation/core-api/maple_tree.rst
6249 *
6250 * In case that an entry is found @index is updated to point to the next
6251 * possible entry independent whether the found entry is occupying a
6252 * single index or a range if indices.
6253 *
6254 * Return: The entry at or after the @index or %NULL
6255 */
mt_find(struct maple_tree * mt,unsigned long * index,unsigned long max)6256 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
6257 {
6258 MA_STATE(mas, mt, *index, *index);
6259 void *entry;
6260 #ifdef CONFIG_DEBUG_MAPLE_TREE
6261 unsigned long copy = *index;
6262 #endif
6263
6264 trace_ma_read(TP_FCT, &mas);
6265
6266 if ((*index) > max)
6267 return NULL;
6268
6269 rcu_read_lock();
6270 retry:
6271 entry = mas_state_walk(&mas);
6272 if (mas_is_start(&mas))
6273 goto retry;
6274
6275 if (unlikely(xa_is_zero(entry)))
6276 entry = NULL;
6277
6278 if (entry)
6279 goto unlock;
6280
6281 while (mas_is_active(&mas) && (mas.last < max)) {
6282 entry = mas_next_slot(&mas, max, false);
6283 if (likely(entry && !xa_is_zero(entry)))
6284 break;
6285 }
6286
6287 if (unlikely(xa_is_zero(entry)))
6288 entry = NULL;
6289 unlock:
6290 rcu_read_unlock();
6291 if (likely(entry)) {
6292 *index = mas.last + 1;
6293 #ifdef CONFIG_DEBUG_MAPLE_TREE
6294 if (MT_WARN_ON(mt, (*index) && ((*index) <= copy)))
6295 pr_err("index not increased! %lx <= %lx\n",
6296 *index, copy);
6297 #endif
6298 }
6299
6300 return entry;
6301 }
6302 EXPORT_SYMBOL(mt_find);
6303
6304 /**
6305 * mt_find_after() - Search from the start up until an entry is found.
6306 * @mt: The maple tree
6307 * @index: Pointer which contains the start location of the search
6308 * @max: The maximum value to check
6309 *
6310 * Same as mt_find() except that it checks @index for 0 before
6311 * searching. If @index == 0, the search is aborted. This covers a wrap
6312 * around of @index to 0 in an iterator loop.
6313 *
6314 * Return: The entry at or after the @index or %NULL
6315 */
mt_find_after(struct maple_tree * mt,unsigned long * index,unsigned long max)6316 void *mt_find_after(struct maple_tree *mt, unsigned long *index,
6317 unsigned long max)
6318 {
6319 if (!(*index))
6320 return NULL;
6321
6322 return mt_find(mt, index, max);
6323 }
6324 EXPORT_SYMBOL(mt_find_after);
6325
6326 #ifdef CONFIG_DEBUG_MAPLE_TREE
6327 atomic_t maple_tree_tests_run;
6328 EXPORT_SYMBOL_GPL(maple_tree_tests_run);
6329 atomic_t maple_tree_tests_passed;
6330 EXPORT_SYMBOL_GPL(maple_tree_tests_passed);
6331
6332 #ifndef __KERNEL__
6333 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int);
mt_set_non_kernel(unsigned int val)6334 void mt_set_non_kernel(unsigned int val)
6335 {
6336 kmem_cache_set_non_kernel(maple_node_cache, val);
6337 }
6338
6339 extern void kmem_cache_set_callback(struct kmem_cache *cachep,
6340 void (*callback)(void *));
mt_set_callback(void (* callback)(void *))6341 void mt_set_callback(void (*callback)(void *))
6342 {
6343 kmem_cache_set_callback(maple_node_cache, callback);
6344 }
6345
6346 extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private);
mt_set_private(void * private)6347 void mt_set_private(void *private)
6348 {
6349 kmem_cache_set_private(maple_node_cache, private);
6350 }
6351
6352 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
mt_get_alloc_size(void)6353 unsigned long mt_get_alloc_size(void)
6354 {
6355 return kmem_cache_get_alloc(maple_node_cache);
6356 }
6357
6358 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *);
mt_zero_nr_tallocated(void)6359 void mt_zero_nr_tallocated(void)
6360 {
6361 kmem_cache_zero_nr_tallocated(maple_node_cache);
6362 }
6363
6364 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *);
mt_nr_tallocated(void)6365 unsigned int mt_nr_tallocated(void)
6366 {
6367 return kmem_cache_nr_tallocated(maple_node_cache);
6368 }
6369
6370 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *);
mt_nr_allocated(void)6371 unsigned int mt_nr_allocated(void)
6372 {
6373 return kmem_cache_nr_allocated(maple_node_cache);
6374 }
6375
mt_cache_shrink(void)6376 void mt_cache_shrink(void)
6377 {
6378 }
6379 #else
6380 /*
6381 * mt_cache_shrink() - For testing, don't use this.
6382 *
6383 * Certain testcases can trigger an OOM when combined with other memory
6384 * debugging configuration options. This function is used to reduce the
6385 * possibility of an out of memory even due to kmem_cache objects remaining
6386 * around for longer than usual.
6387 */
mt_cache_shrink(void)6388 void mt_cache_shrink(void)
6389 {
6390 kmem_cache_shrink(maple_node_cache);
6391
6392 }
6393 EXPORT_SYMBOL_GPL(mt_cache_shrink);
6394
6395 #endif /* not defined __KERNEL__ */
6396 /*
6397 * mas_get_slot() - Get the entry in the maple state node stored at @offset.
6398 * @mas: The maple state
6399 * @offset: The offset into the slot array to fetch.
6400 *
6401 * Return: The entry stored at @offset.
6402 */
mas_get_slot(struct ma_state * mas,unsigned char offset)6403 static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
6404 unsigned char offset)
6405 {
6406 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)),
6407 offset);
6408 }
6409
6410 /* Depth first search, post-order */
mas_dfs_postorder(struct ma_state * mas,unsigned long max)6411 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
6412 {
6413
6414 struct maple_enode *p, *mn = mas->node;
6415 unsigned long p_min, p_max;
6416
6417 mas_next_node(mas, mas_mn(mas), max);
6418 if (!mas_is_overflow(mas))
6419 return;
6420
6421 if (mte_is_root(mn))
6422 return;
6423
6424 mas->node = mn;
6425 mas_ascend(mas);
6426 do {
6427 p = mas->node;
6428 p_min = mas->min;
6429 p_max = mas->max;
6430 mas_prev_node(mas, 0);
6431 } while (!mas_is_underflow(mas));
6432
6433 mas->node = p;
6434 mas->max = p_max;
6435 mas->min = p_min;
6436 }
6437
6438 /* Tree validations */
6439 static void mt_dump_node(const struct maple_tree *mt, void *entry,
6440 unsigned long min, unsigned long max, unsigned int depth,
6441 enum mt_dump_format format);
mt_dump_range(unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6442 static void mt_dump_range(unsigned long min, unsigned long max,
6443 unsigned int depth, enum mt_dump_format format)
6444 {
6445 static const char spaces[] = " ";
6446
6447 switch(format) {
6448 case mt_dump_hex:
6449 if (min == max)
6450 pr_info("%.*s%lx: ", depth * 2, spaces, min);
6451 else
6452 pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max);
6453 break;
6454 case mt_dump_dec:
6455 if (min == max)
6456 pr_info("%.*s%lu: ", depth * 2, spaces, min);
6457 else
6458 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
6459 }
6460 }
6461
mt_dump_entry(void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6462 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
6463 unsigned int depth, enum mt_dump_format format)
6464 {
6465 mt_dump_range(min, max, depth, format);
6466
6467 if (xa_is_value(entry))
6468 pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry),
6469 xa_to_value(entry), entry);
6470 else if (xa_is_zero(entry))
6471 pr_cont("zero (%ld)\n", xa_to_internal(entry));
6472 else if (mt_is_reserved(entry))
6473 pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry);
6474 else
6475 pr_cont(PTR_FMT "\n", entry);
6476 }
6477
mt_dump_range64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6478 static void mt_dump_range64(const struct maple_tree *mt, void *entry,
6479 unsigned long min, unsigned long max, unsigned int depth,
6480 enum mt_dump_format format)
6481 {
6482 struct maple_range_64 *node = &mte_to_node(entry)->mr64;
6483 bool leaf = mte_is_leaf(entry);
6484 unsigned long first = min;
6485 int i;
6486
6487 pr_cont(" contents: ");
6488 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) {
6489 switch(format) {
6490 case mt_dump_hex:
6491 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
6492 break;
6493 case mt_dump_dec:
6494 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
6495 }
6496 }
6497 pr_cont(PTR_FMT "\n", node->slot[i]);
6498 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
6499 unsigned long last = max;
6500
6501 if (i < (MAPLE_RANGE64_SLOTS - 1))
6502 last = node->pivot[i];
6503 else if (!node->slot[i] && max != mt_node_max(entry))
6504 break;
6505 if (last == 0 && i > 0)
6506 break;
6507 if (leaf)
6508 mt_dump_entry(mt_slot(mt, node->slot, i),
6509 first, last, depth + 1, format);
6510 else if (node->slot[i])
6511 mt_dump_node(mt, mt_slot(mt, node->slot, i),
6512 first, last, depth + 1, format);
6513
6514 if (last == max)
6515 break;
6516 if (last > max) {
6517 switch(format) {
6518 case mt_dump_hex:
6519 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
6520 node, last, max, i);
6521 break;
6522 case mt_dump_dec:
6523 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
6524 node, last, max, i);
6525 }
6526 }
6527 first = last + 1;
6528 }
6529 }
6530
mt_dump_arange64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6531 static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
6532 unsigned long min, unsigned long max, unsigned int depth,
6533 enum mt_dump_format format)
6534 {
6535 struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
6536 unsigned long first = min;
6537 int i;
6538
6539 pr_cont(" contents: ");
6540 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
6541 switch (format) {
6542 case mt_dump_hex:
6543 pr_cont("%lx ", node->gap[i]);
6544 break;
6545 case mt_dump_dec:
6546 pr_cont("%lu ", node->gap[i]);
6547 }
6548 }
6549 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
6550 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) {
6551 switch (format) {
6552 case mt_dump_hex:
6553 pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
6554 break;
6555 case mt_dump_dec:
6556 pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
6557 }
6558 }
6559 pr_cont(PTR_FMT "\n", node->slot[i]);
6560 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
6561 unsigned long last = max;
6562
6563 if (i < (MAPLE_ARANGE64_SLOTS - 1))
6564 last = node->pivot[i];
6565 else if (!node->slot[i])
6566 break;
6567 if (last == 0 && i > 0)
6568 break;
6569 if (node->slot[i])
6570 mt_dump_node(mt, mt_slot(mt, node->slot, i),
6571 first, last, depth + 1, format);
6572
6573 if (last == max)
6574 break;
6575 if (last > max) {
6576 switch(format) {
6577 case mt_dump_hex:
6578 pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
6579 node, last, max, i);
6580 break;
6581 case mt_dump_dec:
6582 pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
6583 node, last, max, i);
6584 }
6585 }
6586 first = last + 1;
6587 }
6588 }
6589
mt_dump_node(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)6590 static void mt_dump_node(const struct maple_tree *mt, void *entry,
6591 unsigned long min, unsigned long max, unsigned int depth,
6592 enum mt_dump_format format)
6593 {
6594 struct maple_node *node = mte_to_node(entry);
6595 unsigned int type = mte_node_type(entry);
6596 unsigned int i;
6597
6598 mt_dump_range(min, max, depth, format);
6599
6600 pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node,
6601 depth, type, node ? node->parent : NULL);
6602 switch (type) {
6603 case maple_dense:
6604 pr_cont("\n");
6605 for (i = 0; i < MAPLE_NODE_SLOTS; i++) {
6606 if (min + i > max)
6607 pr_cont("OUT OF RANGE: ");
6608 mt_dump_entry(mt_slot(mt, node->slot, i),
6609 min + i, min + i, depth, format);
6610 }
6611 break;
6612 case maple_leaf_64:
6613 case maple_range_64:
6614 mt_dump_range64(mt, entry, min, max, depth, format);
6615 break;
6616 case maple_arange_64:
6617 mt_dump_arange64(mt, entry, min, max, depth, format);
6618 break;
6619
6620 default:
6621 pr_cont(" UNKNOWN TYPE\n");
6622 }
6623 }
6624
mt_dump(const struct maple_tree * mt,enum mt_dump_format format)6625 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format)
6626 {
6627 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
6628
6629 pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n",
6630 mt, mt->ma_flags, mt_height(mt), entry);
6631 if (xa_is_node(entry))
6632 mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format);
6633 else if (entry)
6634 mt_dump_entry(entry, 0, 0, 0, format);
6635 else
6636 pr_info("(empty)\n");
6637 }
6638 EXPORT_SYMBOL_GPL(mt_dump);
6639
6640 /*
6641 * Calculate the maximum gap in a node and check if that's what is reported in
6642 * the parent (unless root).
6643 */
mas_validate_gaps(struct ma_state * mas)6644 static void mas_validate_gaps(struct ma_state *mas)
6645 {
6646 struct maple_enode *mte = mas->node;
6647 struct maple_node *p_mn, *node = mte_to_node(mte);
6648 enum maple_type mt = mte_node_type(mas->node);
6649 unsigned long gap = 0, max_gap = 0;
6650 unsigned long p_end, p_start = mas->min;
6651 unsigned char p_slot, offset;
6652 unsigned long *gaps = NULL;
6653 unsigned long *pivots = ma_pivots(node, mt);
6654 unsigned int i;
6655
6656 if (ma_is_dense(mt)) {
6657 for (i = 0; i < mt_slot_count(mte); i++) {
6658 if (mas_get_slot(mas, i)) {
6659 if (gap > max_gap)
6660 max_gap = gap;
6661 gap = 0;
6662 continue;
6663 }
6664 gap++;
6665 }
6666 goto counted;
6667 }
6668
6669 gaps = ma_gaps(node, mt);
6670 for (i = 0; i < mt_slot_count(mte); i++) {
6671 p_end = mas_safe_pivot(mas, pivots, i, mt);
6672
6673 if (!gaps) {
6674 if (!mas_get_slot(mas, i))
6675 gap = p_end - p_start + 1;
6676 } else {
6677 void *entry = mas_get_slot(mas, i);
6678
6679 gap = gaps[i];
6680 MT_BUG_ON(mas->tree, !entry);
6681
6682 if (gap > p_end - p_start + 1) {
6683 pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n",
6684 mas_mn(mas), i, gap, p_end, p_start,
6685 p_end - p_start + 1);
6686 MT_BUG_ON(mas->tree, gap > p_end - p_start + 1);
6687 }
6688 }
6689
6690 if (gap > max_gap)
6691 max_gap = gap;
6692
6693 p_start = p_end + 1;
6694 if (p_end >= mas->max)
6695 break;
6696 }
6697
6698 counted:
6699 if (mt == maple_arange_64) {
6700 MT_BUG_ON(mas->tree, !gaps);
6701 offset = ma_meta_gap(node);
6702 if (offset > i) {
6703 pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset);
6704 MT_BUG_ON(mas->tree, 1);
6705 }
6706
6707 if (gaps[offset] != max_gap) {
6708 pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n",
6709 node, offset, max_gap);
6710 MT_BUG_ON(mas->tree, 1);
6711 }
6712
6713 for (i++ ; i < mt_slot_count(mte); i++) {
6714 if (gaps[i] != 0) {
6715 pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n",
6716 node, i);
6717 MT_BUG_ON(mas->tree, 1);
6718 }
6719 }
6720 }
6721
6722 if (mte_is_root(mte))
6723 return;
6724
6725 p_slot = mte_parent_slot(mas->node);
6726 p_mn = mte_parent(mte);
6727 MT_BUG_ON(mas->tree, max_gap > mas->max);
6728 if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) {
6729 pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap);
6730 mt_dump(mas->tree, mt_dump_hex);
6731 MT_BUG_ON(mas->tree, 1);
6732 }
6733 }
6734
mas_validate_parent_slot(struct ma_state * mas)6735 static void mas_validate_parent_slot(struct ma_state *mas)
6736 {
6737 struct maple_node *parent;
6738 struct maple_enode *node;
6739 enum maple_type p_type;
6740 unsigned char p_slot;
6741 void __rcu **slots;
6742 int i;
6743
6744 if (mte_is_root(mas->node))
6745 return;
6746
6747 p_slot = mte_parent_slot(mas->node);
6748 p_type = mas_parent_type(mas, mas->node);
6749 parent = mte_parent(mas->node);
6750 slots = ma_slots(parent, p_type);
6751 MT_BUG_ON(mas->tree, mas_mn(mas) == parent);
6752
6753 /* Check prev/next parent slot for duplicate node entry */
6754
6755 for (i = 0; i < mt_slots[p_type]; i++) {
6756 node = mas_slot(mas, slots, i);
6757 if (i == p_slot) {
6758 if (node != mas->node)
6759 pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n",
6760 parent, i, mas_mn(mas));
6761 MT_BUG_ON(mas->tree, node != mas->node);
6762 } else if (node == mas->node) {
6763 pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n",
6764 mas_mn(mas), parent, i, p_slot);
6765 MT_BUG_ON(mas->tree, node == mas->node);
6766 }
6767 }
6768 }
6769
mas_validate_child_slot(struct ma_state * mas)6770 static void mas_validate_child_slot(struct ma_state *mas)
6771 {
6772 enum maple_type type = mte_node_type(mas->node);
6773 void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
6774 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type);
6775 struct maple_enode *child;
6776 unsigned char i;
6777
6778 if (mte_is_leaf(mas->node))
6779 return;
6780
6781 for (i = 0; i < mt_slots[type]; i++) {
6782 child = mas_slot(mas, slots, i);
6783
6784 if (!child) {
6785 pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n",
6786 mas_mn(mas), i);
6787 MT_BUG_ON(mas->tree, 1);
6788 }
6789
6790 if (mte_parent_slot(child) != i) {
6791 pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n",
6792 mas_mn(mas), i, mte_to_node(child),
6793 mte_parent_slot(child));
6794 MT_BUG_ON(mas->tree, 1);
6795 }
6796
6797 if (mte_parent(child) != mte_to_node(mas->node)) {
6798 pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n",
6799 mte_to_node(child), mte_parent(child),
6800 mte_to_node(mas->node));
6801 MT_BUG_ON(mas->tree, 1);
6802 }
6803
6804 if (i < mt_pivots[type] && pivots[i] == mas->max)
6805 break;
6806 }
6807 }
6808
6809 /*
6810 * Validate all pivots are within mas->min and mas->max, check metadata ends
6811 * where the maximum ends and ensure there is no slots or pivots set outside of
6812 * the end of the data.
6813 */
mas_validate_limits(struct ma_state * mas)6814 static void mas_validate_limits(struct ma_state *mas)
6815 {
6816 int i;
6817 unsigned long prev_piv = 0;
6818 enum maple_type type = mte_node_type(mas->node);
6819 void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
6820 unsigned long *pivots = ma_pivots(mas_mn(mas), type);
6821
6822 for (i = 0; i < mt_slots[type]; i++) {
6823 unsigned long piv;
6824
6825 piv = mas_safe_pivot(mas, pivots, i, type);
6826
6827 if (!piv && (i != 0)) {
6828 pr_err("Missing node limit pivot at " PTR_FMT "[%u]",
6829 mas_mn(mas), i);
6830 MAS_WARN_ON(mas, 1);
6831 }
6832
6833 if (prev_piv > piv) {
6834 pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n",
6835 mas_mn(mas), i, piv, prev_piv);
6836 MAS_WARN_ON(mas, piv < prev_piv);
6837 }
6838
6839 if (piv < mas->min) {
6840 pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i,
6841 piv, mas->min);
6842 MAS_WARN_ON(mas, piv < mas->min);
6843 }
6844 if (piv > mas->max) {
6845 pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i,
6846 piv, mas->max);
6847 MAS_WARN_ON(mas, piv > mas->max);
6848 }
6849 prev_piv = piv;
6850 if (piv == mas->max)
6851 break;
6852 }
6853
6854 if (mas_data_end(mas) != i) {
6855 pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n",
6856 mas_mn(mas), mas_data_end(mas), i);
6857 MT_BUG_ON(mas->tree, 1);
6858 }
6859
6860 for (i += 1; i < mt_slots[type]; i++) {
6861 void *entry = mas_slot(mas, slots, i);
6862
6863 if (entry && (i != mt_slots[type] - 1)) {
6864 pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n",
6865 mas_mn(mas), i, entry);
6866 MT_BUG_ON(mas->tree, entry != NULL);
6867 }
6868
6869 if (i < mt_pivots[type]) {
6870 unsigned long piv = pivots[i];
6871
6872 if (!piv)
6873 continue;
6874
6875 pr_err(PTR_FMT "[%u] should not have piv %lu\n",
6876 mas_mn(mas), i, piv);
6877 MAS_WARN_ON(mas, i < mt_pivots[type] - 1);
6878 }
6879 }
6880 }
6881
mt_validate_nulls(struct maple_tree * mt)6882 static void mt_validate_nulls(struct maple_tree *mt)
6883 {
6884 void *entry, *last = (void *)1;
6885 unsigned char offset = 0;
6886 void __rcu **slots;
6887 MA_STATE(mas, mt, 0, 0);
6888
6889 mas_start(&mas);
6890 if (mas_is_none(&mas) || (mas_is_ptr(&mas)))
6891 return;
6892
6893 while (!mte_is_leaf(mas.node))
6894 mas_descend(&mas);
6895
6896 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node));
6897 do {
6898 entry = mas_slot(&mas, slots, offset);
6899 if (!last && !entry) {
6900 pr_err("Sequential nulls end at " PTR_FMT "[%u]\n",
6901 mas_mn(&mas), offset);
6902 }
6903 MT_BUG_ON(mt, !last && !entry);
6904 last = entry;
6905 if (offset == mas_data_end(&mas)) {
6906 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
6907 if (mas_is_overflow(&mas))
6908 return;
6909 offset = 0;
6910 slots = ma_slots(mte_to_node(mas.node),
6911 mte_node_type(mas.node));
6912 } else {
6913 offset++;
6914 }
6915
6916 } while (!mas_is_overflow(&mas));
6917 }
6918
6919 /*
6920 * validate a maple tree by checking:
6921 * 1. The limits (pivots are within mas->min to mas->max)
6922 * 2. The gap is correctly set in the parents
6923 */
mt_validate(struct maple_tree * mt)6924 void mt_validate(struct maple_tree *mt)
6925 __must_hold(mas->tree->ma_lock)
6926 {
6927 unsigned char end;
6928
6929 MA_STATE(mas, mt, 0, 0);
6930 mas_start(&mas);
6931 if (!mas_is_active(&mas))
6932 return;
6933
6934 while (!mte_is_leaf(mas.node))
6935 mas_descend(&mas);
6936
6937 while (!mas_is_overflow(&mas)) {
6938 MAS_WARN_ON(&mas, mte_dead_node(mas.node));
6939 end = mas_data_end(&mas);
6940 if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) &&
6941 (!mte_is_root(mas.node)))) {
6942 pr_err("Invalid size %u of " PTR_FMT "\n",
6943 end, mas_mn(&mas));
6944 }
6945
6946 mas_validate_parent_slot(&mas);
6947 mas_validate_limits(&mas);
6948 mas_validate_child_slot(&mas);
6949 if (mt_is_alloc(mt))
6950 mas_validate_gaps(&mas);
6951 mas_dfs_postorder(&mas, ULONG_MAX);
6952 }
6953 mt_validate_nulls(mt);
6954 }
6955 EXPORT_SYMBOL_GPL(mt_validate);
6956
mas_dump(const struct ma_state * mas)6957 void mas_dump(const struct ma_state *mas)
6958 {
6959 pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ",
6960 mas->tree, mas->node);
6961 switch (mas->status) {
6962 case ma_active:
6963 pr_err("(ma_active)");
6964 break;
6965 case ma_none:
6966 pr_err("(ma_none)");
6967 break;
6968 case ma_root:
6969 pr_err("(ma_root)");
6970 break;
6971 case ma_start:
6972 pr_err("(ma_start) ");
6973 break;
6974 case ma_pause:
6975 pr_err("(ma_pause) ");
6976 break;
6977 case ma_overflow:
6978 pr_err("(ma_overflow) ");
6979 break;
6980 case ma_underflow:
6981 pr_err("(ma_underflow) ");
6982 break;
6983 case ma_error:
6984 pr_err("(ma_error) ");
6985 break;
6986 }
6987
6988 pr_err("Store Type: ");
6989 switch (mas->store_type) {
6990 case wr_invalid:
6991 pr_err("invalid store type\n");
6992 break;
6993 case wr_new_root:
6994 pr_err("new_root\n");
6995 break;
6996 case wr_store_root:
6997 pr_err("store_root\n");
6998 break;
6999 case wr_exact_fit:
7000 pr_err("exact_fit\n");
7001 break;
7002 case wr_split_store:
7003 pr_err("split_store\n");
7004 break;
7005 case wr_slot_store:
7006 pr_err("slot_store\n");
7007 break;
7008 case wr_append:
7009 pr_err("append\n");
7010 break;
7011 case wr_node_store:
7012 pr_err("node_store\n");
7013 break;
7014 case wr_spanning_store:
7015 pr_err("spanning_store\n");
7016 break;
7017 case wr_rebalance:
7018 pr_err("rebalance\n");
7019 break;
7020 }
7021
7022 pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
7023 mas->index, mas->last);
7024 pr_err(" min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n",
7025 mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth,
7026 mas->mas_flags);
7027 if (mas->index > mas->last)
7028 pr_err("Check index & last\n");
7029 }
7030 EXPORT_SYMBOL_GPL(mas_dump);
7031
mas_wr_dump(const struct ma_wr_state * wr_mas)7032 void mas_wr_dump(const struct ma_wr_state *wr_mas)
7033 {
7034 pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n",
7035 wr_mas->node, wr_mas->r_min, wr_mas->r_max);
7036 pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n",
7037 wr_mas->type, wr_mas->offset_end, wr_mas->mas->end,
7038 wr_mas->end_piv);
7039 }
7040 EXPORT_SYMBOL_GPL(mas_wr_dump);
7041
7042 #endif /* CONFIG_DEBUG_MAPLE_TREE */
7043