1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #ifndef _NFT_SET_PIPAPO_H 4 5 #include <linux/log2.h> 6 #include <net/ipv6.h> /* For the maximum length of a field */ 7 8 /* Count of concatenated fields depends on count of 32-bit nftables registers */ 9 #define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT 10 11 /* Restrict usage to multiple fields, make sure rbtree is used otherwise */ 12 #define NFT_PIPAPO_MIN_FIELDS 2 13 14 /* Largest supported field size */ 15 #define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr)) 16 #define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE) 17 18 /* Bits to be grouped together in table buckets depending on set size */ 19 #define NFT_PIPAPO_GROUP_BITS_INIT NFT_PIPAPO_GROUP_BITS_SMALL_SET 20 #define NFT_PIPAPO_GROUP_BITS_SMALL_SET 8 21 #define NFT_PIPAPO_GROUP_BITS_LARGE_SET 4 22 #define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4 \ 23 BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) || \ 24 (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4)) 25 #define NFT_PIPAPO_GROUPS_PER_BYTE(f) (BITS_PER_BYTE / (f)->bb) 26 27 /* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the 28 * small group width, and switch to the big group width if the table gets 29 * smaller than NFT_PIPAPO_LT_SIZE_LOW. 30 * 31 * Picking 2MiB as threshold (for a single table) avoids as much as possible 32 * crossing page boundaries on most architectures (x86-64 and MIPS huge pages, 33 * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which 34 * keeps performance nice in case kvmalloc() gives us non-contiguous areas. 35 */ 36 #define NFT_PIPAPO_LT_SIZE_THRESHOLD (1 << 21) 37 #define NFT_PIPAPO_LT_SIZE_HYSTERESIS (1 << 16) 38 #define NFT_PIPAPO_LT_SIZE_HIGH NFT_PIPAPO_LT_SIZE_THRESHOLD 39 #define NFT_PIPAPO_LT_SIZE_LOW NFT_PIPAPO_LT_SIZE_THRESHOLD - \ 40 NFT_PIPAPO_LT_SIZE_HYSTERESIS 41 42 /* Fields are padded to 32 bits in input registers */ 43 #define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \ 44 (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32))) 45 #define NFT_PIPAPO_GROUPS_PADDING(f) \ 46 (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \ 47 NFT_PIPAPO_GROUPS_PER_BYTE(f)) 48 49 /* Number of buckets given by 2 ^ n, with n bucket bits */ 50 #define NFT_PIPAPO_BUCKETS(bb) (1 << (bb)) 51 52 /* Each n-bit range maps to up to n * 2 rules */ 53 #define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2)) 54 55 /* Use the rest of mapping table buckets for rule indices, but it makes no sense 56 * to exceed 32 bits 57 */ 58 #if BITS_PER_LONG == 64 59 #define NFT_PIPAPO_MAP_TOBITS 32 60 #else 61 #define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS) 62 #endif 63 64 /* ...which gives us the highest allowed index for a rule */ 65 #define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \ 66 - (1UL << NFT_PIPAPO_MAP_NBITS)) 67 68 /* Definitions for vectorised implementations */ 69 #ifdef NFT_PIPAPO_ALIGN 70 #define NFT_PIPAPO_ALIGN_HEADROOM \ 71 (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN) 72 #define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN)) 73 #else 74 #define NFT_PIPAPO_ALIGN_HEADROOM 0 75 #define NFT_PIPAPO_LT_ALIGN(lt) (lt) 76 #endif /* NFT_PIPAPO_ALIGN */ 77 78 #define nft_pipapo_for_each_field(field, index, match) \ 79 for ((field) = (match)->f, (index) = 0; \ 80 (index) < (match)->field_count; \ 81 (index)++, (field)++) 82 83 /** 84 * union nft_pipapo_map_bucket - Bucket of mapping table 85 * @to: First rule number (in next field) this rule maps to 86 * @n: Number of rules (in next field) this rule maps to 87 * @e: If there's no next field, pointer to element this rule maps to 88 */ 89 union nft_pipapo_map_bucket { 90 struct { 91 #if BITS_PER_LONG == 64 92 static_assert(NFT_PIPAPO_MAP_TOBITS <= 32); 93 u32 to; 94 95 static_assert(NFT_PIPAPO_MAP_NBITS <= 32); 96 u32 n; 97 #else 98 unsigned long to:NFT_PIPAPO_MAP_TOBITS; 99 unsigned long n:NFT_PIPAPO_MAP_NBITS; 100 #endif 101 }; 102 struct nft_pipapo_elem *e; 103 }; 104 105 /** 106 * struct nft_pipapo_field - Lookup, mapping tables and related data for a field 107 * @rules: Number of inserted rules 108 * @bsize: Size of each bucket in lookup table, in longs 109 * @rules_alloc: Number of allocated rules, always >= rules 110 * @groups: Amount of bit groups 111 * @bb: Number of bits grouped together in lookup table buckets 112 * @lt: Lookup table: 'groups' rows of buckets 113 * @mt: Mapping table: one bucket per rule 114 */ 115 struct nft_pipapo_field { 116 unsigned int rules; 117 unsigned int bsize; 118 unsigned int rules_alloc; 119 u8 groups; 120 u8 bb; 121 unsigned long *lt; 122 union nft_pipapo_map_bucket *mt; 123 }; 124 125 /** 126 * struct nft_pipapo_scratch - percpu data used for lookup and matching 127 * @map_index: Current working bitmap index, toggled between field matches 128 * @align_off: Offset to get the originally allocated address 129 * @map: store partial matching results during lookup 130 */ 131 struct nft_pipapo_scratch { 132 u8 map_index; 133 u32 align_off; 134 unsigned long map[]; 135 }; 136 137 /** 138 * struct nft_pipapo_match - Data used for lookup and matching 139 * @field_count: Amount of fields in set 140 * @bsize_max: Maximum lookup table bucket size of all fields, in longs 141 * @scratch: Preallocated per-CPU maps for partial matching results 142 * @rcu: Matching data is swapped on commits 143 * @f: Fields, with lookup and mapping tables 144 */ 145 struct nft_pipapo_match { 146 u8 field_count; 147 unsigned int bsize_max; 148 struct nft_pipapo_scratch * __percpu *scratch; 149 struct rcu_head rcu; 150 struct nft_pipapo_field f[] __counted_by(field_count); 151 }; 152 153 /** 154 * struct nft_pipapo - Representation of a set 155 * @match: Currently in-use matching data 156 * @clone: Copy where pending insertions and deletions are kept 157 * @width: Total bytes to be matched for one packet, including padding 158 * @last_gc: Timestamp of last garbage collection run, jiffies 159 */ 160 struct nft_pipapo { 161 struct nft_pipapo_match __rcu *match; 162 struct nft_pipapo_match *clone; 163 int width; 164 unsigned long last_gc; 165 }; 166 167 struct nft_pipapo_elem; 168 169 /** 170 * struct nft_pipapo_elem - API-facing representation of single set element 171 * @priv: element placeholder 172 * @ext: nftables API extensions 173 */ 174 struct nft_pipapo_elem { 175 struct nft_elem_priv priv; 176 struct nft_set_ext ext; 177 }; 178 179 int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, 180 unsigned long *dst, 181 const union nft_pipapo_map_bucket *mt, bool match_only); 182 183 /** 184 * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets 185 * @f: Field including lookup table 186 * @dst: Area to store result 187 * @data: Input data selecting table buckets 188 */ 189 static inline void pipapo_and_field_buckets_4bit(const struct nft_pipapo_field *f, 190 unsigned long *dst, 191 const u8 *data) 192 { 193 unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); 194 int group; 195 196 for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) { 197 u8 v; 198 199 v = *data >> 4; 200 __bitmap_and(dst, dst, lt + v * f->bsize, 201 f->bsize * BITS_PER_LONG); 202 lt += f->bsize * NFT_PIPAPO_BUCKETS(4); 203 204 v = *data & 0x0f; 205 __bitmap_and(dst, dst, lt + v * f->bsize, 206 f->bsize * BITS_PER_LONG); 207 lt += f->bsize * NFT_PIPAPO_BUCKETS(4); 208 } 209 } 210 211 /** 212 * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets 213 * @f: Field including lookup table 214 * @dst: Area to store result 215 * @data: Input data selecting table buckets 216 */ 217 static inline void pipapo_and_field_buckets_8bit(const struct nft_pipapo_field *f, 218 unsigned long *dst, 219 const u8 *data) 220 { 221 unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); 222 int group; 223 224 for (group = 0; group < f->groups; group++, data++) { 225 __bitmap_and(dst, dst, lt + *data * f->bsize, 226 f->bsize * BITS_PER_LONG); 227 lt += f->bsize * NFT_PIPAPO_BUCKETS(8); 228 } 229 } 230 231 /** 232 * pipapo_estimate_size() - Estimate worst-case for set size 233 * @desc: Set description, element count and field description used here 234 * 235 * The size for this set type can vary dramatically, as it depends on the number 236 * of rules (composing netmasks) the entries expand to. We compute the worst 237 * case here. 238 * 239 * In general, for a non-ranged entry or a single composing netmask, we need 240 * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that 241 * is, each input bit needs four bits of matching data), plus a bucket in the 242 * mapping table for each field. 243 * 244 * Return: worst-case set size in bytes, 0 on any overflow 245 */ 246 static u64 pipapo_estimate_size(const struct nft_set_desc *desc) 247 { 248 unsigned long entry_size; 249 u64 size; 250 int i; 251 252 for (i = 0, entry_size = 0; i < desc->field_count; i++) { 253 unsigned long rules; 254 255 if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES) 256 return 0; 257 258 /* Worst-case ranges for each concatenated field: each n-bit 259 * field can expand to up to n * 2 rules in each bucket, and 260 * each rule also needs a mapping bucket. 261 */ 262 rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2; 263 entry_size += rules * 264 NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) / 265 BITS_PER_BYTE; 266 entry_size += rules * sizeof(union nft_pipapo_map_bucket); 267 } 268 269 /* Rules in lookup and mapping tables are needed for each entry */ 270 size = desc->size * entry_size; 271 if (size && div_u64(size, desc->size) != entry_size) 272 return 0; 273 274 size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2; 275 276 size += sizeof(struct nft_pipapo_field) * desc->field_count; 277 278 return size; 279 } 280 281 /** 282 * pipapo_resmap_init() - Initialise result map before first use 283 * @m: Matching data, including mapping table 284 * @res_map: Result map 285 * 286 * Initialize all bits covered by the first field to one, so that after 287 * the first step, only the matching bits of the first bit group remain. 288 * 289 * If other fields have a large bitmap, set remainder of res_map to 0. 290 */ 291 static inline void pipapo_resmap_init(const struct nft_pipapo_match *m, unsigned long *res_map) 292 { 293 const struct nft_pipapo_field *f = m->f; 294 int i; 295 296 for (i = 0; i < f->bsize; i++) 297 res_map[i] = ULONG_MAX; 298 299 for (i = f->bsize; i < m->bsize_max; i++) 300 res_map[i] = 0ul; 301 } 302 #endif /* _NFT_SET_PIPAPO_H */ 303