1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _EYTZINGER_H 3 #define _EYTZINGER_H 4 5 #include <linux/bitops.h> 6 #include <linux/log2.h> 7 8 #ifdef EYTZINGER_DEBUG 9 #include <linux/bug.h> 10 #define EYTZINGER_BUG_ON(cond) BUG_ON(cond) 11 #else 12 #define EYTZINGER_BUG_ON(cond) 13 #endif 14 15 /* 16 * Traversal for trees in eytzinger layout - a full binary tree layed out in an 17 * array. 18 * 19 * Consider using an eytzinger tree any time you would otherwise be doing binary 20 * search over an array. Binary search is a worst case scenario for branch 21 * prediction and prefetching, but in an eytzinger tree every node's children 22 * are adjacent in memory, thus we can prefetch children before knowing the 23 * result of the comparison, assuming multiple nodes fit on a cacheline. 24 * 25 * Two variants are provided, for one based indexing and zero based indexing. 26 * 27 * Zero based indexing is more convenient, but one based indexing has better 28 * alignment and thus better performance because each new level of the tree 29 * starts at a power of two, and thus if element 0 was cacheline aligned, each 30 * new level will be as well. 31 */ 32 33 static inline unsigned eytzinger1_child(unsigned i, unsigned child) 34 { 35 EYTZINGER_BUG_ON(child > 1); 36 37 return (i << 1) + child; 38 } 39 40 static inline unsigned eytzinger1_left_child(unsigned i) 41 { 42 return eytzinger1_child(i, 0); 43 } 44 45 static inline unsigned eytzinger1_right_child(unsigned i) 46 { 47 return eytzinger1_child(i, 1); 48 } 49 50 static inline unsigned eytzinger1_first(unsigned size) 51 { 52 return size ? rounddown_pow_of_two(size) : 0; 53 } 54 55 static inline unsigned eytzinger1_last(unsigned size) 56 { 57 return rounddown_pow_of_two(size + 1) - 1; 58 } 59 60 static inline unsigned eytzinger1_next(unsigned i, unsigned size) 61 { 62 EYTZINGER_BUG_ON(i == 0 || i > size); 63 64 if (eytzinger1_right_child(i) <= size) { 65 i = eytzinger1_right_child(i); 66 67 i <<= __fls(size) - __fls(i); 68 i >>= i > size; 69 } else { 70 i >>= ffz(i) + 1; 71 } 72 73 return i; 74 } 75 76 static inline unsigned eytzinger1_prev(unsigned i, unsigned size) 77 { 78 EYTZINGER_BUG_ON(i == 0 || i > size); 79 80 if (eytzinger1_left_child(i) <= size) { 81 i = eytzinger1_left_child(i) + 1; 82 83 i <<= __fls(size) - __fls(i); 84 i -= 1; 85 i >>= i > size; 86 } else { 87 i >>= __ffs(i) + 1; 88 } 89 90 return i; 91 } 92 93 static inline unsigned eytzinger1_extra(unsigned size) 94 { 95 return size 96 ? (size + 1 - rounddown_pow_of_two(size)) << 1 97 : 0; 98 } 99 100 static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, 101 unsigned extra) 102 { 103 unsigned b = __fls(i); 104 unsigned shift = __fls(size) - b; 105 int s; 106 107 EYTZINGER_BUG_ON(!i || i > size); 108 109 i ^= 1U << b; 110 i <<= 1; 111 i |= 1; 112 i <<= shift; 113 114 /* 115 * sign bit trick: 116 * 117 * if (i > extra) 118 * i -= (i - extra) >> 1; 119 */ 120 s = extra - i; 121 i += (s >> 1) & (s >> 31); 122 123 return i; 124 } 125 126 static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, 127 unsigned extra) 128 { 129 unsigned shift; 130 int s; 131 132 EYTZINGER_BUG_ON(!i || i > size); 133 134 /* 135 * sign bit trick: 136 * 137 * if (i > extra) 138 * i += i - extra; 139 */ 140 s = extra - i; 141 i -= s & (s >> 31); 142 143 shift = __ffs(i); 144 145 i >>= shift + 1; 146 i |= 1U << (__fls(size) - shift); 147 148 return i; 149 } 150 151 static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size) 152 { 153 return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size)); 154 } 155 156 static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) 157 { 158 return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size)); 159 } 160 161 #define eytzinger1_for_each(_i, _size) \ 162 for (unsigned (_i) = eytzinger1_first((_size)); \ 163 (_i) != 0; \ 164 (_i) = eytzinger1_next((_i), (_size))) 165 166 /* Zero based indexing version: */ 167 168 static inline unsigned eytzinger0_child(unsigned i, unsigned child) 169 { 170 EYTZINGER_BUG_ON(child > 1); 171 172 return (i << 1) + 1 + child; 173 } 174 175 static inline unsigned eytzinger0_left_child(unsigned i) 176 { 177 return eytzinger0_child(i, 0); 178 } 179 180 static inline unsigned eytzinger0_right_child(unsigned i) 181 { 182 return eytzinger0_child(i, 1); 183 } 184 185 static inline unsigned eytzinger0_first(unsigned size) 186 { 187 return eytzinger1_first(size) - 1; 188 } 189 190 static inline unsigned eytzinger0_last(unsigned size) 191 { 192 return eytzinger1_last(size) - 1; 193 } 194 195 static inline unsigned eytzinger0_next(unsigned i, unsigned size) 196 { 197 return eytzinger1_next(i + 1, size) - 1; 198 } 199 200 static inline unsigned eytzinger0_prev(unsigned i, unsigned size) 201 { 202 return eytzinger1_prev(i + 1, size) - 1; 203 } 204 205 static inline unsigned eytzinger0_extra(unsigned size) 206 { 207 return eytzinger1_extra(size); 208 } 209 210 static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size, 211 unsigned extra) 212 { 213 return __eytzinger1_to_inorder(i + 1, size, extra) - 1; 214 } 215 216 static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size, 217 unsigned extra) 218 { 219 return __inorder_to_eytzinger1(i + 1, size, extra) - 1; 220 } 221 222 static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size) 223 { 224 return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size)); 225 } 226 227 static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) 228 { 229 return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size)); 230 } 231 232 #define eytzinger0_for_each(_i, _size) \ 233 for (unsigned (_i) = eytzinger0_first((_size)); \ 234 (_i) != -1; \ 235 (_i) = eytzinger0_next((_i), (_size))) 236 237 #define eytzinger0_for_each_prev(_i, _size) \ 238 for (unsigned (_i) = eytzinger0_last((_size)); \ 239 (_i) != -1; \ 240 (_i) = eytzinger0_prev((_i), (_size))) 241 242 /* return greatest node <= @search, or -1 if not found */ 243 static inline int eytzinger0_find_le(void *base, size_t nr, size_t size, 244 cmp_func_t cmp, const void *search) 245 { 246 void *base1 = base - size; 247 unsigned n = 1; 248 249 while (n <= nr) 250 n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0); 251 n >>= __ffs(n) + 1; 252 return n - 1; 253 } 254 255 /* return smallest node > @search, or -1 if not found */ 256 static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size, 257 cmp_func_t cmp, const void *search) 258 { 259 void *base1 = base - size; 260 unsigned n = 1; 261 262 while (n <= nr) 263 n = eytzinger1_child(n, cmp(base1 + n * size, search) <= 0); 264 n >>= __ffs(n + 1) + 1; 265 return n - 1; 266 } 267 268 /* return smallest node >= @search, or -1 if not found */ 269 static inline int eytzinger0_find_ge(void *base, size_t nr, size_t size, 270 cmp_func_t cmp, const void *search) 271 { 272 void *base1 = base - size; 273 unsigned n = 1; 274 275 while (n <= nr) 276 n = eytzinger1_child(n, cmp(base1 + n * size, search) < 0); 277 n >>= __ffs(n + 1) + 1; 278 return n - 1; 279 } 280 281 #define eytzinger0_find(base, nr, size, _cmp, search) \ 282 ({ \ 283 size_t _size = (size); \ 284 void *_base1 = (void *)(base) - _size; \ 285 const void *_search = (search); \ 286 size_t _nr = (nr); \ 287 size_t _i = 1; \ 288 int _res; \ 289 \ 290 while (_i <= _nr && \ 291 (_res = _cmp(_search, _base1 + _i * _size))) \ 292 _i = eytzinger1_child(_i, _res > 0); \ 293 _i - 1; \ 294 }) 295 296 void eytzinger0_sort_r(void *, size_t, size_t, 297 cmp_r_func_t, swap_r_func_t, const void *); 298 void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); 299 300 #endif /* _EYTZINGER_H */ 301