1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _EYTZINGER_H
3 #define _EYTZINGER_H
4 
5 #include <linux/bitops.h>
6 #include <linux/log2.h>
7 
8 #ifdef EYTZINGER_DEBUG
9 #include <linux/bug.h>
10 #define EYTZINGER_BUG_ON(cond)		BUG_ON(cond)
11 #else
12 #define EYTZINGER_BUG_ON(cond)
13 #endif
14 
15 /*
16  * Traversal for trees in eytzinger layout - a full binary tree layed out in an
17  * array.
18  *
19  * Consider using an eytzinger tree any time you would otherwise be doing binary
20  * search over an array. Binary search is a worst case scenario for branch
21  * prediction and prefetching, but in an eytzinger tree every node's children
22  * are adjacent in memory, thus we can prefetch children before knowing the
23  * result of the comparison, assuming multiple nodes fit on a cacheline.
24  *
25  * Two variants are provided, for one based indexing and zero based indexing.
26  *
27  * Zero based indexing is more convenient, but one based indexing has better
28  * alignment and thus better performance because each new level of the tree
29  * starts at a power of two, and thus if element 0 was cacheline aligned, each
30  * new level will be as well.
31  */
32 
33 static inline unsigned eytzinger1_child(unsigned i, unsigned child)
34 {
35 	EYTZINGER_BUG_ON(child > 1);
36 
37 	return (i << 1) + child;
38 }
39 
40 static inline unsigned eytzinger1_left_child(unsigned i)
41 {
42 	return eytzinger1_child(i, 0);
43 }
44 
45 static inline unsigned eytzinger1_right_child(unsigned i)
46 {
47 	return eytzinger1_child(i, 1);
48 }
49 
50 static inline unsigned eytzinger1_first(unsigned size)
51 {
52 	return size ? rounddown_pow_of_two(size) : 0;
53 }
54 
55 static inline unsigned eytzinger1_last(unsigned size)
56 {
57 	return rounddown_pow_of_two(size + 1) - 1;
58 }
59 
60 static inline unsigned eytzinger1_next(unsigned i, unsigned size)
61 {
62 	EYTZINGER_BUG_ON(i == 0 || i > size);
63 
64 	if (eytzinger1_right_child(i) <= size) {
65 		i = eytzinger1_right_child(i);
66 
67 		i <<= __fls(size) - __fls(i);
68 		i >>= i > size;
69 	} else {
70 		i >>= ffz(i) + 1;
71 	}
72 
73 	return i;
74 }
75 
76 static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
77 {
78 	EYTZINGER_BUG_ON(i == 0 || i > size);
79 
80 	if (eytzinger1_left_child(i) <= size) {
81 		i = eytzinger1_left_child(i) + 1;
82 
83 		i <<= __fls(size) - __fls(i);
84 		i -= 1;
85 		i >>= i > size;
86 	} else {
87 		i >>= __ffs(i) + 1;
88 	}
89 
90 	return i;
91 }
92 
93 static inline unsigned eytzinger1_extra(unsigned size)
94 {
95 	return size
96 		? (size + 1 - rounddown_pow_of_two(size)) << 1
97 		: 0;
98 }
99 
100 static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
101 					      unsigned extra)
102 {
103 	unsigned b = __fls(i);
104 	unsigned shift = __fls(size) - b;
105 	int s;
106 
107 	EYTZINGER_BUG_ON(!i || i > size);
108 
109 	i  ^= 1U << b;
110 	i <<= 1;
111 	i  |= 1;
112 	i <<= shift;
113 
114 	/*
115 	 * sign bit trick:
116 	 *
117 	 * if (i > extra)
118 	 *	i -= (i - extra) >> 1;
119 	 */
120 	s = extra - i;
121 	i += (s >> 1) & (s >> 31);
122 
123 	return i;
124 }
125 
126 static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
127 					       unsigned extra)
128 {
129 	unsigned shift;
130 	int s;
131 
132 	EYTZINGER_BUG_ON(!i || i > size);
133 
134 	/*
135 	 * sign bit trick:
136 	 *
137 	 * if (i > extra)
138 	 *	i += i - extra;
139 	 */
140 	s = extra - i;
141 	i -= s & (s >> 31);
142 
143 	shift = __ffs(i);
144 
145 	i >>= shift + 1;
146 	i  |= 1U << (__fls(size) - shift);
147 
148 	return i;
149 }
150 
151 static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
152 {
153 	return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
154 }
155 
156 static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
157 {
158 	return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
159 }
160 
161 #define eytzinger1_for_each(_i, _size)			\
162 	for (unsigned (_i) = eytzinger1_first((_size));	\
163 	     (_i) != 0;					\
164 	     (_i) = eytzinger1_next((_i), (_size)))
165 
166 /* Zero based indexing version: */
167 
168 static inline unsigned eytzinger0_child(unsigned i, unsigned child)
169 {
170 	EYTZINGER_BUG_ON(child > 1);
171 
172 	return (i << 1) + 1 + child;
173 }
174 
175 static inline unsigned eytzinger0_left_child(unsigned i)
176 {
177 	return eytzinger0_child(i, 0);
178 }
179 
180 static inline unsigned eytzinger0_right_child(unsigned i)
181 {
182 	return eytzinger0_child(i, 1);
183 }
184 
185 static inline unsigned eytzinger0_first(unsigned size)
186 {
187 	return eytzinger1_first(size) - 1;
188 }
189 
190 static inline unsigned eytzinger0_last(unsigned size)
191 {
192 	return eytzinger1_last(size) - 1;
193 }
194 
195 static inline unsigned eytzinger0_next(unsigned i, unsigned size)
196 {
197 	return eytzinger1_next(i + 1, size) - 1;
198 }
199 
200 static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
201 {
202 	return eytzinger1_prev(i + 1, size) - 1;
203 }
204 
205 static inline unsigned eytzinger0_extra(unsigned size)
206 {
207 	return eytzinger1_extra(size);
208 }
209 
210 static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
211 					       unsigned extra)
212 {
213 	return __eytzinger1_to_inorder(i + 1, size, extra) - 1;
214 }
215 
216 static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
217 					       unsigned extra)
218 {
219 	return __inorder_to_eytzinger1(i + 1, size, extra) - 1;
220 }
221 
222 static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
223 {
224 	return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
225 }
226 
227 static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
228 {
229 	return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
230 }
231 
232 #define eytzinger0_for_each(_i, _size)			\
233 	for (unsigned (_i) = eytzinger0_first((_size));	\
234 	     (_i) != -1;				\
235 	     (_i) = eytzinger0_next((_i), (_size)))
236 
237 #define eytzinger0_for_each_prev(_i, _size)		\
238 	for (unsigned (_i) = eytzinger0_last((_size));	\
239 	     (_i) != -1;				\
240 	     (_i) = eytzinger0_prev((_i), (_size)))
241 
242 /* return greatest node <= @search, or -1 if not found */
243 static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
244 				     cmp_func_t cmp, const void *search)
245 {
246 	void *base1 = base - size;
247 	unsigned n = 1;
248 
249 	while (n <= nr)
250 		n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0);
251 	n >>= __ffs(n) + 1;
252 	return n - 1;
253 }
254 
255 /* return smallest node > @search, or -1 if not found */
256 static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
257 				     cmp_func_t cmp, const void *search)
258 {
259 	void *base1 = base - size;
260 	unsigned n = 1;
261 
262 	while (n <= nr)
263 		n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0);
264 	n >>= __ffs(n + 1) + 1;
265 	return n - 1;
266 }
267 
268 /* return smallest node >= @search, or -1 if not found */
269 static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size,
270 				     cmp_func_t cmp, const void *search)
271 {
272 	void *base1 = base - size;
273 	unsigned n = 1;
274 
275 	while (n <= nr)
276 		n = eytzinger1_child(n, cmp(base1 + n * size, search) < 0);
277 	n >>= __ffs(n + 1) + 1;
278 	return n - 1;
279 }
280 
281 #define eytzinger0_find(base, nr, size, _cmp, search)			\
282 ({									\
283 	size_t _size		= (size);				\
284 	void *_base1		= (void *)(base) - _size;		\
285 	const void *_search	= (search);				\
286 	size_t _nr		= (nr);					\
287 	size_t _i		= 1;					\
288 	int _res;							\
289 									\
290 	while (_i <= _nr &&						\
291 	       (_res = _cmp(_search, _base1 + _i * _size)))		\
292 		_i = eytzinger1_child(_i, _res > 0);			\
293 	_i - 1;								\
294 })
295 
296 void eytzinger0_sort_r(void *, size_t, size_t,
297 		       cmp_r_func_t, swap_r_func_t, const void *);
298 void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
299 
300 #endif /* _EYTZINGER_H */
301