Lines Matching +full:child +full:- +full:node
1 // SPDX-License-Identifier: GPL-2.0-only
21 /* Intermediate node */
27 struct lpm_trie_node __rcu *child[2]; member
51 * lead to more nodes containing more specific matches. Each node also stores
58 * [0xc0, 0xa8, 0x00, 0x00] in big-endian notation. This documentation will
59 * stick to IP-address notation for readability though.
61 * As the trie is empty initially, the new node (1) will be places as root
62 * node, denoted as (R) in the example below. As there are no other node, both
63 * child pointers are %NULL.
65 * +----------------+
70 * +----------------+
72 * Next, let's add a new node (2) matching 192.168.0.0/24. As there is already
73 * a node with the same data and a smaller prefix (ie, a less specific one),
74 * node (2) will become a child of (1). In child index depends on the next bit
76 * child[0] of (1):
78 * +----------------+
83 * +----------------+
85 * +----------------+
90 * +----------------+
92 * The child[1] slot of (1) could be filled with another node which has bit #17
96 * +----------------+
101 * +----------------+
103 * +----------------+ +------------------+
108 * +----------------+ +------------------+
110 * Let's add another node (4) to the game for 192.168.1.0/24. In order to place
111 * it, node (1) is looked at first, and because (4) of the semantics laid out
112 * above (bit #17 is 0), it would normally be attached to (1) as child[0].
113 * However, that slot is already allocated, so a new node is needed in between.
114 * That node does not have a value attached to it and it will never be
119 * +----------------+
124 * +----------------+
126 * +----------------+ +------------------+
129 * | value: --- | | value: 3 |
131 * +----------------+ +------------------+
133 * +----------------+ +----------------+
138 * +----------------+ +----------------+
140 * 192.168.1.1/32 would be a child of (5) etc.
142 * An intermediate node will be turned into a 'real' node on demand. In the
143 * example above, (4) would be re-used if 192.168.0.0/23 is added to the trie.
148 * The lookup starts at the root node. If the current node matches and if there
149 * is a child that can be used to become more specific, the trie is traversed
150 * downwards. The last node in the traversal that is a non-intermediate one is
156 return !!(data[index / 8] & (1 << (7 - (index % 8)))); in extract_bit()
160 * __longest_prefix_match() - determine the longest prefix
162 * @node: The node to operate on
163 * @key: The key to compare to @node
165 * Determine the longest prefix of @node that matches the bits in @key.
169 const struct lpm_trie_node *node, in __longest_prefix_match() argument
172 u32 limit = min(node->prefixlen, key->prefixlen); in __longest_prefix_match()
183 if (trie->data_size >= 8) { in __longest_prefix_match()
184 u64 diff = be64_to_cpu(*(__be64 *)node->data ^ in __longest_prefix_match()
185 *(__be64 *)key->data); in __longest_prefix_match()
187 prefixlen = 64 - fls64(diff); in __longest_prefix_match()
196 while (trie->data_size >= i + 4) { in __longest_prefix_match()
197 u32 diff = be32_to_cpu(*(__be32 *)&node->data[i] ^ in __longest_prefix_match()
198 *(__be32 *)&key->data[i]); in __longest_prefix_match()
200 prefixlen += 32 - fls(diff); in __longest_prefix_match()
208 if (trie->data_size >= i + 2) { in __longest_prefix_match()
209 u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^ in __longest_prefix_match()
210 *(__be16 *)&key->data[i]); in __longest_prefix_match()
212 prefixlen += 16 - fls(diff); in __longest_prefix_match()
220 if (trie->data_size >= i + 1) { in __longest_prefix_match()
221 prefixlen += 8 - fls(node->data[i] ^ key->data[i]); in __longest_prefix_match()
231 const struct lpm_trie_node *node, in longest_prefix_match() argument
234 return __longest_prefix_match(trie, node, key); in longest_prefix_match()
241 struct lpm_trie_node *node, *found = NULL; in trie_lookup_elem() local
244 if (key->prefixlen > trie->max_prefixlen) in trie_lookup_elem()
247 /* Start walking the trie from the root node ... */ in trie_lookup_elem()
249 for (node = rcu_dereference_check(trie->root, rcu_read_lock_bh_held()); in trie_lookup_elem()
250 node;) { in trie_lookup_elem()
254 /* Determine the longest prefix of @node that matches @key. in trie_lookup_elem()
258 matchlen = __longest_prefix_match(trie, node, key); in trie_lookup_elem()
259 if (matchlen == trie->max_prefixlen) { in trie_lookup_elem()
260 found = node; in trie_lookup_elem()
265 * length of @node, bail out and return the node we have seen in trie_lookup_elem()
268 if (matchlen < node->prefixlen) in trie_lookup_elem()
271 /* Consider this node as return candidate unless it is an in trie_lookup_elem()
274 if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) in trie_lookup_elem()
275 found = node; in trie_lookup_elem()
277 /* If the node match is fully satisfied, let's see if we can in trie_lookup_elem()
281 next_bit = extract_bit(key->data, node->prefixlen); in trie_lookup_elem()
282 node = rcu_dereference_check(node->child[next_bit], in trie_lookup_elem()
289 return found->data + trie->data_size; in trie_lookup_elem()
295 struct lpm_trie_node *node; in lpm_trie_node_alloc() local
297 node = bpf_mem_cache_alloc(&trie->ma); in lpm_trie_node_alloc()
299 if (!node) in lpm_trie_node_alloc()
302 node->flags = 0; in lpm_trie_node_alloc()
305 memcpy(node->data + trie->data_size, value, in lpm_trie_node_alloc()
306 trie->map.value_size); in lpm_trie_node_alloc()
308 return node; in lpm_trie_node_alloc()
314 return -ENOENT; in trie_check_add_elem()
315 if (trie->n_entries == trie->map.max_entries) in trie_check_add_elem()
316 return -ENOSPC; in trie_check_add_elem()
317 trie->n_entries++; in trie_check_add_elem()
326 struct lpm_trie_node *node, *im_node, *new_node; in trie_update_elem() local
336 return -EINVAL; in trie_update_elem()
338 if (key->prefixlen > trie->max_prefixlen) in trie_update_elem()
339 return -EINVAL; in trie_update_elem()
341 /* Allocate and fill a new node */ in trie_update_elem()
344 return -ENOMEM; in trie_update_elem()
346 ret = raw_res_spin_lock_irqsave(&trie->lock, irq_flags); in trie_update_elem()
350 new_node->prefixlen = key->prefixlen; in trie_update_elem()
351 RCU_INIT_POINTER(new_node->child[0], NULL); in trie_update_elem()
352 RCU_INIT_POINTER(new_node->child[1], NULL); in trie_update_elem()
353 memcpy(new_node->data, key->data, trie->data_size); in trie_update_elem()
355 /* Now find a slot to attach the new node. To do that, walk the tree in trie_update_elem()
356 * from the root and match as many bits as possible for each node until in trie_update_elem()
358 * an intermediate node. in trie_update_elem()
360 slot = &trie->root; in trie_update_elem()
362 while ((node = rcu_dereference(*slot))) { in trie_update_elem()
363 matchlen = longest_prefix_match(trie, node, key); in trie_update_elem()
365 if (node->prefixlen != matchlen || in trie_update_elem()
366 node->prefixlen == key->prefixlen) in trie_update_elem()
369 next_bit = extract_bit(key->data, node->prefixlen); in trie_update_elem()
370 slot = &node->child[next_bit]; in trie_update_elem()
373 /* If the slot is empty (a free child pointer or an empty root), in trie_update_elem()
376 if (!node) { in trie_update_elem()
388 if (node->prefixlen == matchlen) { in trie_update_elem()
389 if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) { in trie_update_elem()
391 ret = -EEXIST; in trie_update_elem()
400 new_node->child[0] = node->child[0]; in trie_update_elem()
401 new_node->child[1] = node->child[1]; in trie_update_elem()
404 free_node = node; in trie_update_elem()
413 /* If the new node matches the prefix completely, it must be inserted in trie_update_elem()
414 * as an ancestor. Simply insert it between @node and *@slot. in trie_update_elem()
416 if (matchlen == key->prefixlen) { in trie_update_elem()
417 next_bit = extract_bit(node->data, matchlen); in trie_update_elem()
418 rcu_assign_pointer(new_node->child[next_bit], node); in trie_update_elem()
425 trie->n_entries--; in trie_update_elem()
426 ret = -ENOMEM; in trie_update_elem()
430 im_node->prefixlen = matchlen; in trie_update_elem()
431 im_node->flags |= LPM_TREE_NODE_FLAG_IM; in trie_update_elem()
432 memcpy(im_node->data, node->data, trie->data_size); in trie_update_elem()
434 /* Now determine which child to install in which slot */ in trie_update_elem()
435 if (extract_bit(key->data, matchlen)) { in trie_update_elem()
436 rcu_assign_pointer(im_node->child[0], node); in trie_update_elem()
437 rcu_assign_pointer(im_node->child[1], new_node); in trie_update_elem()
439 rcu_assign_pointer(im_node->child[0], new_node); in trie_update_elem()
440 rcu_assign_pointer(im_node->child[1], node); in trie_update_elem()
443 /* Finally, assign the intermediate node to the determined slot */ in trie_update_elem()
447 raw_res_spin_unlock_irqrestore(&trie->lock, irq_flags); in trie_update_elem()
450 bpf_mem_cache_free(&trie->ma, new_node); in trie_update_elem()
451 bpf_mem_cache_free_rcu(&trie->ma, free_node); in trie_update_elem()
463 struct lpm_trie_node *node, *parent; in trie_delete_elem() local
469 if (key->prefixlen > trie->max_prefixlen) in trie_delete_elem()
470 return -EINVAL; in trie_delete_elem()
472 ret = raw_res_spin_lock_irqsave(&trie->lock, irq_flags); in trie_delete_elem()
477 * track of the path we traverse. We will need to know the node in trie_delete_elem()
478 * we wish to delete, and the slot that points to the node we want in trie_delete_elem()
482 trim = &trie->root; in trie_delete_elem()
485 while ((node = rcu_dereference(*trim))) { in trie_delete_elem()
486 matchlen = longest_prefix_match(trie, node, key); in trie_delete_elem()
488 if (node->prefixlen != matchlen || in trie_delete_elem()
489 node->prefixlen == key->prefixlen) in trie_delete_elem()
492 parent = node; in trie_delete_elem()
494 next_bit = extract_bit(key->data, node->prefixlen); in trie_delete_elem()
495 trim = &node->child[next_bit]; in trie_delete_elem()
498 if (!node || node->prefixlen != key->prefixlen || in trie_delete_elem()
499 node->prefixlen != matchlen || in trie_delete_elem()
500 (node->flags & LPM_TREE_NODE_FLAG_IM)) { in trie_delete_elem()
501 ret = -ENOENT; in trie_delete_elem()
505 trie->n_entries--; in trie_delete_elem()
507 /* If the node we are removing has two children, simply mark it in trie_delete_elem()
510 if (rcu_access_pointer(node->child[0]) && in trie_delete_elem()
511 rcu_access_pointer(node->child[1])) { in trie_delete_elem()
512 node->flags |= LPM_TREE_NODE_FLAG_IM; in trie_delete_elem()
516 /* If the parent of the node we are about to delete is an intermediate in trie_delete_elem()
517 * node, and the deleted node doesn't have any children, we can delete in trie_delete_elem()
518 * the intermediate parent as well and promote its other child in trie_delete_elem()
523 if (parent && (parent->flags & LPM_TREE_NODE_FLAG_IM) && in trie_delete_elem()
524 !node->child[0] && !node->child[1]) { in trie_delete_elem()
525 if (node == rcu_access_pointer(parent->child[0])) in trie_delete_elem()
527 *trim2, rcu_access_pointer(parent->child[1])); in trie_delete_elem()
530 *trim2, rcu_access_pointer(parent->child[0])); in trie_delete_elem()
532 free_node = node; in trie_delete_elem()
536 /* The node we are removing has either zero or one child. If there in trie_delete_elem()
537 * is a child, move it into the removed node's slot then delete in trie_delete_elem()
538 * the node. Otherwise just clear the slot and delete the node. in trie_delete_elem()
540 if (node->child[0]) in trie_delete_elem()
541 rcu_assign_pointer(*trim, rcu_access_pointer(node->child[0])); in trie_delete_elem()
542 else if (node->child[1]) in trie_delete_elem()
543 rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1])); in trie_delete_elem()
546 free_node = node; in trie_delete_elem()
549 raw_res_spin_unlock_irqrestore(&trie->lock, irq_flags); in trie_delete_elem()
551 bpf_mem_cache_free_rcu(&trie->ma, free_parent); in trie_delete_elem()
552 bpf_mem_cache_free_rcu(&trie->ma, free_node); in trie_delete_elem()
560 #define LPM_VAL_SIZE_MAX (KMALLOC_MAX_SIZE - LPM_DATA_SIZE_MAX - \
578 if (attr->max_entries == 0 || in trie_alloc()
579 !(attr->map_flags & BPF_F_NO_PREALLOC) || in trie_alloc()
580 attr->map_flags & ~LPM_CREATE_FLAG_MASK || in trie_alloc()
581 !bpf_map_flags_access_ok(attr->map_flags) || in trie_alloc()
582 attr->key_size < LPM_KEY_SIZE_MIN || in trie_alloc()
583 attr->key_size > LPM_KEY_SIZE_MAX || in trie_alloc()
584 attr->value_size < LPM_VAL_SIZE_MIN || in trie_alloc()
585 attr->value_size > LPM_VAL_SIZE_MAX) in trie_alloc()
586 return ERR_PTR(-EINVAL); in trie_alloc()
590 return ERR_PTR(-ENOMEM); in trie_alloc()
593 bpf_map_init_from_attr(&trie->map, attr); in trie_alloc()
594 trie->data_size = attr->key_size - in trie_alloc()
596 trie->max_prefixlen = trie->data_size * 8; in trie_alloc()
598 raw_res_spin_lock_init(&trie->lock); in trie_alloc()
601 leaf_size = sizeof(struct lpm_trie_node) + trie->data_size + in trie_alloc()
602 trie->map.value_size; in trie_alloc()
603 err = bpf_mem_alloc_init(&trie->ma, leaf_size, false); in trie_alloc()
606 return &trie->map; in trie_alloc()
617 struct lpm_trie_node *node; in trie_free() local
619 /* Always start at the root and walk down to a node that has no in trie_free()
620 * children. Then free that node, nullify its reference in the parent in trie_free()
625 slot = &trie->root; in trie_free()
628 node = rcu_dereference_protected(*slot, 1); in trie_free()
629 if (!node) in trie_free()
632 if (rcu_access_pointer(node->child[0])) { in trie_free()
633 slot = &node->child[0]; in trie_free()
637 if (rcu_access_pointer(node->child[1])) { in trie_free()
638 slot = &node->child[1]; in trie_free()
643 * node without waiting for the extra RCU GP. in trie_free()
645 bpf_mem_cache_raw_free(node); in trie_free()
652 bpf_mem_alloc_destroy(&trie->ma); in trie_free()
658 struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root; in trie_get_next_key() local
662 int err = 0, stack_ptr = -1; in trie_get_next_key()
666 /* The get_next_key follows postorder. For the 4 node example in in trie_get_next_key()
678 search_root = rcu_dereference(trie->root); in trie_get_next_key()
680 return -ENOENT; in trie_get_next_key()
682 /* For invalid key, find the leftmost node in the trie */ in trie_get_next_key()
683 if (!key || key->prefixlen > trie->max_prefixlen) in trie_get_next_key()
686 node_stack = kmalloc_array(trie->max_prefixlen + 1, in trie_get_next_key()
690 return -ENOMEM; in trie_get_next_key()
692 /* Try to find the exact node for the given key */ in trie_get_next_key()
693 for (node = search_root; node;) { in trie_get_next_key()
694 node_stack[++stack_ptr] = node; in trie_get_next_key()
695 matchlen = longest_prefix_match(trie, node, key); in trie_get_next_key()
696 if (node->prefixlen != matchlen || in trie_get_next_key()
697 node->prefixlen == key->prefixlen) in trie_get_next_key()
700 next_bit = extract_bit(key->data, node->prefixlen); in trie_get_next_key()
701 node = rcu_dereference(node->child[next_bit]); in trie_get_next_key()
703 if (!node || node->prefixlen != matchlen || in trie_get_next_key()
704 (node->flags & LPM_TREE_NODE_FLAG_IM)) in trie_get_next_key()
707 /* The node with the exactly-matching key has been found, in trie_get_next_key()
708 * find the first node in postorder after the matched node. in trie_get_next_key()
710 node = node_stack[stack_ptr]; in trie_get_next_key()
712 parent = node_stack[stack_ptr - 1]; in trie_get_next_key()
713 if (rcu_dereference(parent->child[0]) == node) { in trie_get_next_key()
714 search_root = rcu_dereference(parent->child[1]); in trie_get_next_key()
718 if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) { in trie_get_next_key()
723 node = parent; in trie_get_next_key()
724 stack_ptr--; in trie_get_next_key()
728 err = -ENOENT; in trie_get_next_key()
732 /* Find the leftmost non-intermediate node, all intermediate nodes in trie_get_next_key()
735 for (node = search_root; node;) { in trie_get_next_key()
736 if (node->flags & LPM_TREE_NODE_FLAG_IM) { in trie_get_next_key()
737 node = rcu_dereference(node->child[0]); in trie_get_next_key()
739 next_node = node; in trie_get_next_key()
740 node = rcu_dereference(node->child[0]); in trie_get_next_key()
741 if (!node) in trie_get_next_key()
742 node = rcu_dereference(next_node->child[1]); in trie_get_next_key()
746 next_key->prefixlen = next_node->prefixlen; in trie_get_next_key()
748 next_node->data, trie->data_size); in trie_get_next_key()
760 return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ? in trie_check_btf()
761 -EINVAL : 0; in trie_check_btf()
769 elem_size = sizeof(struct lpm_trie_node) + trie->data_size + in trie_mem_usage()
770 trie->map.value_size; in trie_mem_usage()
771 return elem_size * READ_ONCE(trie->n_entries); in trie_mem_usage()