13241b1d3SJoe Thornber /* 23241b1d3SJoe Thornber * Copyright (C) 2011 Red Hat, Inc. 33241b1d3SJoe Thornber * 43241b1d3SJoe Thornber * This file is released under the GPL. 53241b1d3SJoe Thornber */ 63241b1d3SJoe Thornber 73241b1d3SJoe Thornber #include "dm-btree-internal.h" 83241b1d3SJoe Thornber #include "dm-space-map.h" 93241b1d3SJoe Thornber #include "dm-transaction-manager.h" 103241b1d3SJoe Thornber 111944ce60SPaul Gortmaker #include <linux/export.h> 123241b1d3SJoe Thornber #include <linux/device-mapper.h> 133241b1d3SJoe Thornber 143241b1d3SJoe Thornber #define DM_MSG_PREFIX "btree" 153241b1d3SJoe Thornber 163241b1d3SJoe Thornber /*---------------------------------------------------------------- 173241b1d3SJoe Thornber * Array manipulation 183241b1d3SJoe Thornber *--------------------------------------------------------------*/ 193241b1d3SJoe Thornber static void memcpy_disk(void *dest, const void *src, size_t len) 203241b1d3SJoe Thornber __dm_written_to_disk(src) 213241b1d3SJoe Thornber { 223241b1d3SJoe Thornber memcpy(dest, src, len); 233241b1d3SJoe Thornber __dm_unbless_for_disk(src); 243241b1d3SJoe Thornber } 253241b1d3SJoe Thornber 263241b1d3SJoe Thornber static void array_insert(void *base, size_t elt_size, unsigned nr_elts, 273241b1d3SJoe Thornber unsigned index, void *elt) 283241b1d3SJoe Thornber __dm_written_to_disk(elt) 293241b1d3SJoe Thornber { 303241b1d3SJoe Thornber if (index < nr_elts) 313241b1d3SJoe Thornber memmove(base + (elt_size * (index + 1)), 323241b1d3SJoe Thornber base + (elt_size * index), 333241b1d3SJoe Thornber (nr_elts - index) * elt_size); 343241b1d3SJoe Thornber 353241b1d3SJoe Thornber memcpy_disk(base + (elt_size * index), elt, elt_size); 363241b1d3SJoe Thornber } 373241b1d3SJoe Thornber 383241b1d3SJoe Thornber /*----------------------------------------------------------------*/ 393241b1d3SJoe Thornber 403241b1d3SJoe Thornber /* makes the assumption that no two keys are the same. */ 41550929faSMikulas Patocka static int bsearch(struct btree_node *n, uint64_t key, int want_hi) 423241b1d3SJoe Thornber { 433241b1d3SJoe Thornber int lo = -1, hi = le32_to_cpu(n->header.nr_entries); 443241b1d3SJoe Thornber 453241b1d3SJoe Thornber while (hi - lo > 1) { 463241b1d3SJoe Thornber int mid = lo + ((hi - lo) / 2); 473241b1d3SJoe Thornber uint64_t mid_key = le64_to_cpu(n->keys[mid]); 483241b1d3SJoe Thornber 493241b1d3SJoe Thornber if (mid_key == key) 503241b1d3SJoe Thornber return mid; 513241b1d3SJoe Thornber 523241b1d3SJoe Thornber if (mid_key < key) 533241b1d3SJoe Thornber lo = mid; 543241b1d3SJoe Thornber else 553241b1d3SJoe Thornber hi = mid; 563241b1d3SJoe Thornber } 573241b1d3SJoe Thornber 583241b1d3SJoe Thornber return want_hi ? hi : lo; 593241b1d3SJoe Thornber } 603241b1d3SJoe Thornber 61550929faSMikulas Patocka int lower_bound(struct btree_node *n, uint64_t key) 623241b1d3SJoe Thornber { 633241b1d3SJoe Thornber return bsearch(n, key, 0); 643241b1d3SJoe Thornber } 653241b1d3SJoe Thornber 66993ceab9SJoe Thornber static int upper_bound(struct btree_node *n, uint64_t key) 67993ceab9SJoe Thornber { 68993ceab9SJoe Thornber return bsearch(n, key, 1); 69993ceab9SJoe Thornber } 70993ceab9SJoe Thornber 71550929faSMikulas Patocka void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, 723241b1d3SJoe Thornber struct dm_btree_value_type *vt) 733241b1d3SJoe Thornber { 743241b1d3SJoe Thornber uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); 753241b1d3SJoe Thornber 763241b1d3SJoe Thornber if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) 77*be500ed7SJoe Thornber dm_tm_with_runs(tm, value_ptr(n, 0), nr_entries, dm_tm_inc_range); 78*be500ed7SJoe Thornber 793241b1d3SJoe Thornber else if (vt->inc) 80*be500ed7SJoe Thornber vt->inc(vt->context, value_ptr(n, 0), nr_entries); 813241b1d3SJoe Thornber } 823241b1d3SJoe Thornber 83550929faSMikulas Patocka static int insert_at(size_t value_size, struct btree_node *node, unsigned index, 843241b1d3SJoe Thornber uint64_t key, void *value) 853241b1d3SJoe Thornber __dm_written_to_disk(value) 863241b1d3SJoe Thornber { 873241b1d3SJoe Thornber uint32_t nr_entries = le32_to_cpu(node->header.nr_entries); 883241b1d3SJoe Thornber __le64 key_le = cpu_to_le64(key); 893241b1d3SJoe Thornber 903241b1d3SJoe Thornber if (index > nr_entries || 913241b1d3SJoe Thornber index >= le32_to_cpu(node->header.max_entries)) { 923241b1d3SJoe Thornber DMERR("too many entries in btree node for insert"); 933241b1d3SJoe Thornber __dm_unbless_for_disk(value); 943241b1d3SJoe Thornber return -ENOMEM; 953241b1d3SJoe Thornber } 963241b1d3SJoe Thornber 973241b1d3SJoe Thornber __dm_bless_for_disk(&key_le); 983241b1d3SJoe Thornber 993241b1d3SJoe Thornber array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le); 1003241b1d3SJoe Thornber array_insert(value_base(node), value_size, nr_entries, index, value); 1013241b1d3SJoe Thornber node->header.nr_entries = cpu_to_le32(nr_entries + 1); 1023241b1d3SJoe Thornber 1033241b1d3SJoe Thornber return 0; 1043241b1d3SJoe Thornber } 1053241b1d3SJoe Thornber 1063241b1d3SJoe Thornber /*----------------------------------------------------------------*/ 1073241b1d3SJoe Thornber 1083241b1d3SJoe Thornber /* 1093241b1d3SJoe Thornber * We want 3n entries (for some n). This works more nicely for repeated 1103241b1d3SJoe Thornber * insert remove loops than (2n + 1). 1113241b1d3SJoe Thornber */ 1123241b1d3SJoe Thornber static uint32_t calc_max_entries(size_t value_size, size_t block_size) 1133241b1d3SJoe Thornber { 1143241b1d3SJoe Thornber uint32_t total, n; 1153241b1d3SJoe Thornber size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */ 1163241b1d3SJoe Thornber 1173241b1d3SJoe Thornber block_size -= sizeof(struct node_header); 1183241b1d3SJoe Thornber total = block_size / elt_size; 1193241b1d3SJoe Thornber n = total / 3; /* rounds down */ 1203241b1d3SJoe Thornber 1213241b1d3SJoe Thornber return 3 * n; 1223241b1d3SJoe Thornber } 1233241b1d3SJoe Thornber 1243241b1d3SJoe Thornber int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) 1253241b1d3SJoe Thornber { 1263241b1d3SJoe Thornber int r; 1273241b1d3SJoe Thornber struct dm_block *b; 128550929faSMikulas Patocka struct btree_node *n; 1293241b1d3SJoe Thornber size_t block_size; 1303241b1d3SJoe Thornber uint32_t max_entries; 1313241b1d3SJoe Thornber 1323241b1d3SJoe Thornber r = new_block(info, &b); 1333241b1d3SJoe Thornber if (r < 0) 1343241b1d3SJoe Thornber return r; 1353241b1d3SJoe Thornber 1363241b1d3SJoe Thornber block_size = dm_bm_block_size(dm_tm_get_bm(info->tm)); 1373241b1d3SJoe Thornber max_entries = calc_max_entries(info->value_type.size, block_size); 1383241b1d3SJoe Thornber 1393241b1d3SJoe Thornber n = dm_block_data(b); 1403241b1d3SJoe Thornber memset(n, 0, block_size); 1413241b1d3SJoe Thornber n->header.flags = cpu_to_le32(LEAF_NODE); 1423241b1d3SJoe Thornber n->header.nr_entries = cpu_to_le32(0); 1433241b1d3SJoe Thornber n->header.max_entries = cpu_to_le32(max_entries); 1443241b1d3SJoe Thornber n->header.value_size = cpu_to_le32(info->value_type.size); 1453241b1d3SJoe Thornber 1463241b1d3SJoe Thornber *root = dm_block_location(b); 1474c7da06fSMikulas Patocka unlock_block(info, b); 1484c7da06fSMikulas Patocka 1494c7da06fSMikulas Patocka return 0; 1503241b1d3SJoe Thornber } 1513241b1d3SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_empty); 1523241b1d3SJoe Thornber 1533241b1d3SJoe Thornber /*----------------------------------------------------------------*/ 1543241b1d3SJoe Thornber 1553241b1d3SJoe Thornber /* 1563241b1d3SJoe Thornber * Deletion uses a recursive algorithm, since we have limited stack space 1573241b1d3SJoe Thornber * we explicitly manage our own stack on the heap. 1583241b1d3SJoe Thornber */ 1593241b1d3SJoe Thornber #define MAX_SPINE_DEPTH 64 1603241b1d3SJoe Thornber struct frame { 1613241b1d3SJoe Thornber struct dm_block *b; 162550929faSMikulas Patocka struct btree_node *n; 1633241b1d3SJoe Thornber unsigned level; 1643241b1d3SJoe Thornber unsigned nr_children; 1653241b1d3SJoe Thornber unsigned current_child; 1663241b1d3SJoe Thornber }; 1673241b1d3SJoe Thornber 1683241b1d3SJoe Thornber struct del_stack { 16904f17c80SJoe Thornber struct dm_btree_info *info; 1703241b1d3SJoe Thornber struct dm_transaction_manager *tm; 1713241b1d3SJoe Thornber int top; 1723241b1d3SJoe Thornber struct frame spine[MAX_SPINE_DEPTH]; 1733241b1d3SJoe Thornber }; 1743241b1d3SJoe Thornber 1753241b1d3SJoe Thornber static int top_frame(struct del_stack *s, struct frame **f) 1763241b1d3SJoe Thornber { 1773241b1d3SJoe Thornber if (s->top < 0) { 1783241b1d3SJoe Thornber DMERR("btree deletion stack empty"); 1793241b1d3SJoe Thornber return -EINVAL; 1803241b1d3SJoe Thornber } 1813241b1d3SJoe Thornber 1823241b1d3SJoe Thornber *f = s->spine + s->top; 1833241b1d3SJoe Thornber 1843241b1d3SJoe Thornber return 0; 1853241b1d3SJoe Thornber } 1863241b1d3SJoe Thornber 1873241b1d3SJoe Thornber static int unprocessed_frames(struct del_stack *s) 1883241b1d3SJoe Thornber { 1893241b1d3SJoe Thornber return s->top >= 0; 1903241b1d3SJoe Thornber } 1913241b1d3SJoe Thornber 19204f17c80SJoe Thornber static void prefetch_children(struct del_stack *s, struct frame *f) 19304f17c80SJoe Thornber { 19404f17c80SJoe Thornber unsigned i; 19504f17c80SJoe Thornber struct dm_block_manager *bm = dm_tm_get_bm(s->tm); 19604f17c80SJoe Thornber 19704f17c80SJoe Thornber for (i = 0; i < f->nr_children; i++) 19804f17c80SJoe Thornber dm_bm_prefetch(bm, value64(f->n, i)); 19904f17c80SJoe Thornber } 20004f17c80SJoe Thornber 20104f17c80SJoe Thornber static bool is_internal_level(struct dm_btree_info *info, struct frame *f) 20204f17c80SJoe Thornber { 20304f17c80SJoe Thornber return f->level < (info->levels - 1); 20404f17c80SJoe Thornber } 20504f17c80SJoe Thornber 2063241b1d3SJoe Thornber static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) 2073241b1d3SJoe Thornber { 2083241b1d3SJoe Thornber int r; 2093241b1d3SJoe Thornber uint32_t ref_count; 2103241b1d3SJoe Thornber 2113241b1d3SJoe Thornber if (s->top >= MAX_SPINE_DEPTH - 1) { 2123241b1d3SJoe Thornber DMERR("btree deletion stack out of memory"); 2133241b1d3SJoe Thornber return -ENOMEM; 2143241b1d3SJoe Thornber } 2153241b1d3SJoe Thornber 2163241b1d3SJoe Thornber r = dm_tm_ref(s->tm, b, &ref_count); 2173241b1d3SJoe Thornber if (r) 2183241b1d3SJoe Thornber return r; 2193241b1d3SJoe Thornber 2203241b1d3SJoe Thornber if (ref_count > 1) 2213241b1d3SJoe Thornber /* 2223241b1d3SJoe Thornber * This is a shared node, so we can just decrement it's 2233241b1d3SJoe Thornber * reference counter and leave the children. 2243241b1d3SJoe Thornber */ 2253241b1d3SJoe Thornber dm_tm_dec(s->tm, b); 2263241b1d3SJoe Thornber 2273241b1d3SJoe Thornber else { 22804f17c80SJoe Thornber uint32_t flags; 2293241b1d3SJoe Thornber struct frame *f = s->spine + ++s->top; 2303241b1d3SJoe Thornber 2313241b1d3SJoe Thornber r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b); 2323241b1d3SJoe Thornber if (r) { 2333241b1d3SJoe Thornber s->top--; 2343241b1d3SJoe Thornber return r; 2353241b1d3SJoe Thornber } 2363241b1d3SJoe Thornber 2373241b1d3SJoe Thornber f->n = dm_block_data(f->b); 2383241b1d3SJoe Thornber f->level = level; 2393241b1d3SJoe Thornber f->nr_children = le32_to_cpu(f->n->header.nr_entries); 2403241b1d3SJoe Thornber f->current_child = 0; 24104f17c80SJoe Thornber 24204f17c80SJoe Thornber flags = le32_to_cpu(f->n->header.flags); 24304f17c80SJoe Thornber if (flags & INTERNAL_NODE || is_internal_level(s->info, f)) 24404f17c80SJoe Thornber prefetch_children(s, f); 2453241b1d3SJoe Thornber } 2463241b1d3SJoe Thornber 2473241b1d3SJoe Thornber return 0; 2483241b1d3SJoe Thornber } 2493241b1d3SJoe Thornber 2503241b1d3SJoe Thornber static void pop_frame(struct del_stack *s) 2513241b1d3SJoe Thornber { 2523241b1d3SJoe Thornber struct frame *f = s->spine + s->top--; 2533241b1d3SJoe Thornber 2543241b1d3SJoe Thornber dm_tm_dec(s->tm, dm_block_location(f->b)); 2553241b1d3SJoe Thornber dm_tm_unlock(s->tm, f->b); 2563241b1d3SJoe Thornber } 2573241b1d3SJoe Thornber 258ed8b45a3SJoe Thornber static void unlock_all_frames(struct del_stack *s) 259ed8b45a3SJoe Thornber { 260ed8b45a3SJoe Thornber struct frame *f; 261ed8b45a3SJoe Thornber 262ed8b45a3SJoe Thornber while (unprocessed_frames(s)) { 263ed8b45a3SJoe Thornber f = s->spine + s->top--; 264ed8b45a3SJoe Thornber dm_tm_unlock(s->tm, f->b); 265ed8b45a3SJoe Thornber } 266ed8b45a3SJoe Thornber } 267ed8b45a3SJoe Thornber 2683241b1d3SJoe Thornber int dm_btree_del(struct dm_btree_info *info, dm_block_t root) 2693241b1d3SJoe Thornber { 2703241b1d3SJoe Thornber int r; 2713241b1d3SJoe Thornber struct del_stack *s; 2723241b1d3SJoe Thornber 2739f9ef065SJoe Thornber /* 2749f9ef065SJoe Thornber * dm_btree_del() is called via an ioctl, as such should be 2759f9ef065SJoe Thornber * considered an FS op. We can't recurse back into the FS, so we 2769f9ef065SJoe Thornber * allocate GFP_NOFS. 2779f9ef065SJoe Thornber */ 2789f9ef065SJoe Thornber s = kmalloc(sizeof(*s), GFP_NOFS); 2793241b1d3SJoe Thornber if (!s) 2803241b1d3SJoe Thornber return -ENOMEM; 28104f17c80SJoe Thornber s->info = info; 2823241b1d3SJoe Thornber s->tm = info->tm; 2833241b1d3SJoe Thornber s->top = -1; 2843241b1d3SJoe Thornber 285e3cbf945SJoe Thornber r = push_frame(s, root, 0); 2863241b1d3SJoe Thornber if (r) 2873241b1d3SJoe Thornber goto out; 2883241b1d3SJoe Thornber 2893241b1d3SJoe Thornber while (unprocessed_frames(s)) { 2903241b1d3SJoe Thornber uint32_t flags; 2913241b1d3SJoe Thornber struct frame *f; 2923241b1d3SJoe Thornber dm_block_t b; 2933241b1d3SJoe Thornber 2943241b1d3SJoe Thornber r = top_frame(s, &f); 2953241b1d3SJoe Thornber if (r) 2963241b1d3SJoe Thornber goto out; 2973241b1d3SJoe Thornber 2983241b1d3SJoe Thornber if (f->current_child >= f->nr_children) { 2993241b1d3SJoe Thornber pop_frame(s); 3003241b1d3SJoe Thornber continue; 3013241b1d3SJoe Thornber } 3023241b1d3SJoe Thornber 3033241b1d3SJoe Thornber flags = le32_to_cpu(f->n->header.flags); 3043241b1d3SJoe Thornber if (flags & INTERNAL_NODE) { 3053241b1d3SJoe Thornber b = value64(f->n, f->current_child); 3063241b1d3SJoe Thornber f->current_child++; 3073241b1d3SJoe Thornber r = push_frame(s, b, f->level); 3083241b1d3SJoe Thornber if (r) 3093241b1d3SJoe Thornber goto out; 3103241b1d3SJoe Thornber 311e3cbf945SJoe Thornber } else if (is_internal_level(info, f)) { 3123241b1d3SJoe Thornber b = value64(f->n, f->current_child); 3133241b1d3SJoe Thornber f->current_child++; 3143241b1d3SJoe Thornber r = push_frame(s, b, f->level + 1); 3153241b1d3SJoe Thornber if (r) 3163241b1d3SJoe Thornber goto out; 3173241b1d3SJoe Thornber 3183241b1d3SJoe Thornber } else { 319*be500ed7SJoe Thornber if (info->value_type.dec) 3203241b1d3SJoe Thornber info->value_type.dec(info->value_type.context, 321*be500ed7SJoe Thornber value_ptr(f->n, 0), f->nr_children); 322cd5acf0bSJoe Thornber pop_frame(s); 3233241b1d3SJoe Thornber } 3243241b1d3SJoe Thornber } 3253241b1d3SJoe Thornber out: 326ed8b45a3SJoe Thornber if (r) { 327ed8b45a3SJoe Thornber /* cleanup all frames of del_stack */ 328ed8b45a3SJoe Thornber unlock_all_frames(s); 329ed8b45a3SJoe Thornber } 3303241b1d3SJoe Thornber kfree(s); 331ed8b45a3SJoe Thornber 3323241b1d3SJoe Thornber return r; 3333241b1d3SJoe Thornber } 3343241b1d3SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_del); 3353241b1d3SJoe Thornber 3363241b1d3SJoe Thornber /*----------------------------------------------------------------*/ 3373241b1d3SJoe Thornber 3383241b1d3SJoe Thornber static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, 339550929faSMikulas Patocka int (*search_fn)(struct btree_node *, uint64_t), 3403241b1d3SJoe Thornber uint64_t *result_key, void *v, size_t value_size) 3413241b1d3SJoe Thornber { 3423241b1d3SJoe Thornber int i, r; 3433241b1d3SJoe Thornber uint32_t flags, nr_entries; 3443241b1d3SJoe Thornber 3453241b1d3SJoe Thornber do { 3463241b1d3SJoe Thornber r = ro_step(s, block); 3473241b1d3SJoe Thornber if (r < 0) 3483241b1d3SJoe Thornber return r; 3493241b1d3SJoe Thornber 3503241b1d3SJoe Thornber i = search_fn(ro_node(s), key); 3513241b1d3SJoe Thornber 3523241b1d3SJoe Thornber flags = le32_to_cpu(ro_node(s)->header.flags); 3533241b1d3SJoe Thornber nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries); 3543241b1d3SJoe Thornber if (i < 0 || i >= nr_entries) 3553241b1d3SJoe Thornber return -ENODATA; 3563241b1d3SJoe Thornber 3573241b1d3SJoe Thornber if (flags & INTERNAL_NODE) 3583241b1d3SJoe Thornber block = value64(ro_node(s), i); 3593241b1d3SJoe Thornber 3603241b1d3SJoe Thornber } while (!(flags & LEAF_NODE)); 3613241b1d3SJoe Thornber 3623241b1d3SJoe Thornber *result_key = le64_to_cpu(ro_node(s)->keys[i]); 363399c9bdbSHuaisheng Ye if (v) 364a3aefb39SJoe Thornber memcpy(v, value_ptr(ro_node(s), i), value_size); 3653241b1d3SJoe Thornber 3663241b1d3SJoe Thornber return 0; 3673241b1d3SJoe Thornber } 3683241b1d3SJoe Thornber 3693241b1d3SJoe Thornber int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, 3703241b1d3SJoe Thornber uint64_t *keys, void *value_le) 3713241b1d3SJoe Thornber { 3723241b1d3SJoe Thornber unsigned level, last_level = info->levels - 1; 3733241b1d3SJoe Thornber int r = -ENODATA; 3743241b1d3SJoe Thornber uint64_t rkey; 3753241b1d3SJoe Thornber __le64 internal_value_le; 3763241b1d3SJoe Thornber struct ro_spine spine; 3773241b1d3SJoe Thornber 3783241b1d3SJoe Thornber init_ro_spine(&spine, info); 3793241b1d3SJoe Thornber for (level = 0; level < info->levels; level++) { 3803241b1d3SJoe Thornber size_t size; 3813241b1d3SJoe Thornber void *value_p; 3823241b1d3SJoe Thornber 3833241b1d3SJoe Thornber if (level == last_level) { 3843241b1d3SJoe Thornber value_p = value_le; 3853241b1d3SJoe Thornber size = info->value_type.size; 3863241b1d3SJoe Thornber 3873241b1d3SJoe Thornber } else { 3883241b1d3SJoe Thornber value_p = &internal_value_le; 3893241b1d3SJoe Thornber size = sizeof(uint64_t); 3903241b1d3SJoe Thornber } 3913241b1d3SJoe Thornber 3923241b1d3SJoe Thornber r = btree_lookup_raw(&spine, root, keys[level], 3933241b1d3SJoe Thornber lower_bound, &rkey, 3943241b1d3SJoe Thornber value_p, size); 3953241b1d3SJoe Thornber 3963241b1d3SJoe Thornber if (!r) { 3973241b1d3SJoe Thornber if (rkey != keys[level]) { 3983241b1d3SJoe Thornber exit_ro_spine(&spine); 3993241b1d3SJoe Thornber return -ENODATA; 4003241b1d3SJoe Thornber } 4013241b1d3SJoe Thornber } else { 4023241b1d3SJoe Thornber exit_ro_spine(&spine); 4033241b1d3SJoe Thornber return r; 4043241b1d3SJoe Thornber } 4053241b1d3SJoe Thornber 4063241b1d3SJoe Thornber root = le64_to_cpu(internal_value_le); 4073241b1d3SJoe Thornber } 4083241b1d3SJoe Thornber exit_ro_spine(&spine); 4093241b1d3SJoe Thornber 4103241b1d3SJoe Thornber return r; 4113241b1d3SJoe Thornber } 4123241b1d3SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_lookup); 4133241b1d3SJoe Thornber 414993ceab9SJoe Thornber static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t root, 415993ceab9SJoe Thornber uint64_t key, uint64_t *rkey, void *value_le) 416993ceab9SJoe Thornber { 417993ceab9SJoe Thornber int r, i; 418993ceab9SJoe Thornber uint32_t flags, nr_entries; 419993ceab9SJoe Thornber struct dm_block *node; 420993ceab9SJoe Thornber struct btree_node *n; 421993ceab9SJoe Thornber 422993ceab9SJoe Thornber r = bn_read_lock(info, root, &node); 423993ceab9SJoe Thornber if (r) 424993ceab9SJoe Thornber return r; 425993ceab9SJoe Thornber 426993ceab9SJoe Thornber n = dm_block_data(node); 427993ceab9SJoe Thornber flags = le32_to_cpu(n->header.flags); 428993ceab9SJoe Thornber nr_entries = le32_to_cpu(n->header.nr_entries); 429993ceab9SJoe Thornber 430993ceab9SJoe Thornber if (flags & INTERNAL_NODE) { 431993ceab9SJoe Thornber i = lower_bound(n, key); 432e7e0f730SJoe Thornber if (i < 0) { 433e7e0f730SJoe Thornber /* 434e7e0f730SJoe Thornber * avoid early -ENODATA return when all entries are 435e7e0f730SJoe Thornber * higher than the search @key. 436e7e0f730SJoe Thornber */ 437e7e0f730SJoe Thornber i = 0; 438e7e0f730SJoe Thornber } 439e7e0f730SJoe Thornber if (i >= nr_entries) { 440993ceab9SJoe Thornber r = -ENODATA; 441993ceab9SJoe Thornber goto out; 442993ceab9SJoe Thornber } 443993ceab9SJoe Thornber 444993ceab9SJoe Thornber r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le); 445993ceab9SJoe Thornber if (r == -ENODATA && i < (nr_entries - 1)) { 446993ceab9SJoe Thornber i++; 447993ceab9SJoe Thornber r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le); 448993ceab9SJoe Thornber } 449993ceab9SJoe Thornber 450993ceab9SJoe Thornber } else { 451993ceab9SJoe Thornber i = upper_bound(n, key); 452993ceab9SJoe Thornber if (i < 0 || i >= nr_entries) { 453993ceab9SJoe Thornber r = -ENODATA; 454993ceab9SJoe Thornber goto out; 455993ceab9SJoe Thornber } 456993ceab9SJoe Thornber 457993ceab9SJoe Thornber *rkey = le64_to_cpu(n->keys[i]); 458993ceab9SJoe Thornber memcpy(value_le, value_ptr(n, i), info->value_type.size); 459993ceab9SJoe Thornber } 460993ceab9SJoe Thornber out: 461993ceab9SJoe Thornber dm_tm_unlock(info->tm, node); 462993ceab9SJoe Thornber return r; 463993ceab9SJoe Thornber } 464993ceab9SJoe Thornber 465993ceab9SJoe Thornber int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root, 466993ceab9SJoe Thornber uint64_t *keys, uint64_t *rkey, void *value_le) 467993ceab9SJoe Thornber { 468993ceab9SJoe Thornber unsigned level; 469993ceab9SJoe Thornber int r = -ENODATA; 470993ceab9SJoe Thornber __le64 internal_value_le; 471993ceab9SJoe Thornber struct ro_spine spine; 472993ceab9SJoe Thornber 473993ceab9SJoe Thornber init_ro_spine(&spine, info); 474993ceab9SJoe Thornber for (level = 0; level < info->levels - 1u; level++) { 475993ceab9SJoe Thornber r = btree_lookup_raw(&spine, root, keys[level], 476993ceab9SJoe Thornber lower_bound, rkey, 477993ceab9SJoe Thornber &internal_value_le, sizeof(uint64_t)); 478993ceab9SJoe Thornber if (r) 479993ceab9SJoe Thornber goto out; 480993ceab9SJoe Thornber 481993ceab9SJoe Thornber if (*rkey != keys[level]) { 482993ceab9SJoe Thornber r = -ENODATA; 483993ceab9SJoe Thornber goto out; 484993ceab9SJoe Thornber } 485993ceab9SJoe Thornber 486993ceab9SJoe Thornber root = le64_to_cpu(internal_value_le); 487993ceab9SJoe Thornber } 488993ceab9SJoe Thornber 489993ceab9SJoe Thornber r = dm_btree_lookup_next_single(info, root, keys[level], rkey, value_le); 490993ceab9SJoe Thornber out: 491993ceab9SJoe Thornber exit_ro_spine(&spine); 492993ceab9SJoe Thornber return r; 493993ceab9SJoe Thornber } 494993ceab9SJoe Thornber 495993ceab9SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_lookup_next); 496993ceab9SJoe Thornber 4974eafdb15SJoe Thornber /*----------------------------------------------------------------*/ 4984eafdb15SJoe Thornber 4994eafdb15SJoe Thornber /* 5004eafdb15SJoe Thornber * Copies entries from one region of a btree node to another. The regions 5014eafdb15SJoe Thornber * must not overlap. 5024eafdb15SJoe Thornber */ 5034eafdb15SJoe Thornber static void copy_entries(struct btree_node *dest, unsigned dest_offset, 5044eafdb15SJoe Thornber struct btree_node *src, unsigned src_offset, 5054eafdb15SJoe Thornber unsigned count) 5064eafdb15SJoe Thornber { 5074eafdb15SJoe Thornber size_t value_size = le32_to_cpu(dest->header.value_size); 5084eafdb15SJoe Thornber memcpy(dest->keys + dest_offset, src->keys + src_offset, count * sizeof(uint64_t)); 5094eafdb15SJoe Thornber memcpy(value_ptr(dest, dest_offset), value_ptr(src, src_offset), count * value_size); 5104eafdb15SJoe Thornber } 5114eafdb15SJoe Thornber 5124eafdb15SJoe Thornber /* 5134eafdb15SJoe Thornber * Moves entries from one region fo a btree node to another. The regions 5144eafdb15SJoe Thornber * may overlap. 5154eafdb15SJoe Thornber */ 5164eafdb15SJoe Thornber static void move_entries(struct btree_node *dest, unsigned dest_offset, 5174eafdb15SJoe Thornber struct btree_node *src, unsigned src_offset, 5184eafdb15SJoe Thornber unsigned count) 5194eafdb15SJoe Thornber { 5204eafdb15SJoe Thornber size_t value_size = le32_to_cpu(dest->header.value_size); 5214eafdb15SJoe Thornber memmove(dest->keys + dest_offset, src->keys + src_offset, count * sizeof(uint64_t)); 5224eafdb15SJoe Thornber memmove(value_ptr(dest, dest_offset), value_ptr(src, src_offset), count * value_size); 5234eafdb15SJoe Thornber } 5244eafdb15SJoe Thornber 5254eafdb15SJoe Thornber /* 5264eafdb15SJoe Thornber * Erases the first 'count' entries of a btree node, shifting following 5274eafdb15SJoe Thornber * entries down into their place. 5284eafdb15SJoe Thornber */ 5294eafdb15SJoe Thornber static void shift_down(struct btree_node *n, unsigned count) 5304eafdb15SJoe Thornber { 5314eafdb15SJoe Thornber move_entries(n, 0, n, count, le32_to_cpu(n->header.nr_entries) - count); 5324eafdb15SJoe Thornber } 5334eafdb15SJoe Thornber 5344eafdb15SJoe Thornber /* 5354eafdb15SJoe Thornber * Moves entries in a btree node up 'count' places, making space for 5364eafdb15SJoe Thornber * new entries at the start of the node. 5374eafdb15SJoe Thornber */ 5384eafdb15SJoe Thornber static void shift_up(struct btree_node *n, unsigned count) 5394eafdb15SJoe Thornber { 5404eafdb15SJoe Thornber move_entries(n, count, n, 0, le32_to_cpu(n->header.nr_entries)); 5414eafdb15SJoe Thornber } 5424eafdb15SJoe Thornber 5434eafdb15SJoe Thornber /* 5444eafdb15SJoe Thornber * Redistributes entries between two btree nodes to make them 5454eafdb15SJoe Thornber * have similar numbers of entries. 5464eafdb15SJoe Thornber */ 5474eafdb15SJoe Thornber static void redistribute2(struct btree_node *left, struct btree_node *right) 5484eafdb15SJoe Thornber { 5494eafdb15SJoe Thornber unsigned nr_left = le32_to_cpu(left->header.nr_entries); 5504eafdb15SJoe Thornber unsigned nr_right = le32_to_cpu(right->header.nr_entries); 5514eafdb15SJoe Thornber unsigned total = nr_left + nr_right; 5524eafdb15SJoe Thornber unsigned target_left = total / 2; 5534eafdb15SJoe Thornber unsigned target_right = total - target_left; 5544eafdb15SJoe Thornber 5554eafdb15SJoe Thornber if (nr_left < target_left) { 5564eafdb15SJoe Thornber unsigned delta = target_left - nr_left; 5574eafdb15SJoe Thornber copy_entries(left, nr_left, right, 0, delta); 5584eafdb15SJoe Thornber shift_down(right, delta); 5594eafdb15SJoe Thornber } else if (nr_left > target_left) { 5604eafdb15SJoe Thornber unsigned delta = nr_left - target_left; 5614eafdb15SJoe Thornber if (nr_right) 5624eafdb15SJoe Thornber shift_up(right, delta); 5634eafdb15SJoe Thornber copy_entries(right, 0, left, target_left, delta); 5644eafdb15SJoe Thornber } 5654eafdb15SJoe Thornber 5664eafdb15SJoe Thornber left->header.nr_entries = cpu_to_le32(target_left); 5674eafdb15SJoe Thornber right->header.nr_entries = cpu_to_le32(target_right); 5684eafdb15SJoe Thornber } 5694eafdb15SJoe Thornber 5704eafdb15SJoe Thornber /* 5714eafdb15SJoe Thornber * Redistribute entries between three nodes. Assumes the central 5724eafdb15SJoe Thornber * node is empty. 5734eafdb15SJoe Thornber */ 5744eafdb15SJoe Thornber static void redistribute3(struct btree_node *left, struct btree_node *center, 5754eafdb15SJoe Thornber struct btree_node *right) 5764eafdb15SJoe Thornber { 5774eafdb15SJoe Thornber unsigned nr_left = le32_to_cpu(left->header.nr_entries); 5784eafdb15SJoe Thornber unsigned nr_center = le32_to_cpu(center->header.nr_entries); 5794eafdb15SJoe Thornber unsigned nr_right = le32_to_cpu(right->header.nr_entries); 5804eafdb15SJoe Thornber unsigned total, target_left, target_center, target_right; 5814eafdb15SJoe Thornber 5824eafdb15SJoe Thornber BUG_ON(nr_center); 5834eafdb15SJoe Thornber 5844eafdb15SJoe Thornber total = nr_left + nr_right; 5854eafdb15SJoe Thornber target_left = total / 3; 5864eafdb15SJoe Thornber target_center = (total - target_left) / 2; 5874eafdb15SJoe Thornber target_right = (total - target_left - target_center); 5884eafdb15SJoe Thornber 5894eafdb15SJoe Thornber if (nr_left < target_left) { 5904eafdb15SJoe Thornber unsigned left_short = target_left - nr_left; 5914eafdb15SJoe Thornber copy_entries(left, nr_left, right, 0, left_short); 5924eafdb15SJoe Thornber copy_entries(center, 0, right, left_short, target_center); 5934eafdb15SJoe Thornber shift_down(right, nr_right - target_right); 5944eafdb15SJoe Thornber 5954eafdb15SJoe Thornber } else if (nr_left < (target_left + target_center)) { 5964eafdb15SJoe Thornber unsigned left_to_center = nr_left - target_left; 5974eafdb15SJoe Thornber copy_entries(center, 0, left, target_left, left_to_center); 5984eafdb15SJoe Thornber copy_entries(center, left_to_center, right, 0, target_center - left_to_center); 5994eafdb15SJoe Thornber shift_down(right, nr_right - target_right); 6004eafdb15SJoe Thornber 6014eafdb15SJoe Thornber } else { 6024eafdb15SJoe Thornber unsigned right_short = target_right - nr_right; 6034eafdb15SJoe Thornber shift_up(right, right_short); 6044eafdb15SJoe Thornber copy_entries(right, 0, left, nr_left - right_short, right_short); 6054eafdb15SJoe Thornber copy_entries(center, 0, left, target_left, nr_left - target_left); 6064eafdb15SJoe Thornber } 6074eafdb15SJoe Thornber 6084eafdb15SJoe Thornber left->header.nr_entries = cpu_to_le32(target_left); 6094eafdb15SJoe Thornber center->header.nr_entries = cpu_to_le32(target_center); 6104eafdb15SJoe Thornber right->header.nr_entries = cpu_to_le32(target_right); 6114eafdb15SJoe Thornber } 6124eafdb15SJoe Thornber 6133241b1d3SJoe Thornber /* 6143241b1d3SJoe Thornber * Splits a node by creating a sibling node and shifting half the nodes 6153241b1d3SJoe Thornber * contents across. Assumes there is a parent node, and it has room for 6163241b1d3SJoe Thornber * another child. 6173241b1d3SJoe Thornber * 6183241b1d3SJoe Thornber * Before: 6193241b1d3SJoe Thornber * +--------+ 6203241b1d3SJoe Thornber * | Parent | 6213241b1d3SJoe Thornber * +--------+ 6223241b1d3SJoe Thornber * | 6233241b1d3SJoe Thornber * v 6243241b1d3SJoe Thornber * +----------+ 6253241b1d3SJoe Thornber * | A ++++++ | 6263241b1d3SJoe Thornber * +----------+ 6273241b1d3SJoe Thornber * 6283241b1d3SJoe Thornber * 6293241b1d3SJoe Thornber * After: 6303241b1d3SJoe Thornber * +--------+ 6313241b1d3SJoe Thornber * | Parent | 6323241b1d3SJoe Thornber * +--------+ 6333241b1d3SJoe Thornber * | | 6343241b1d3SJoe Thornber * v +------+ 6353241b1d3SJoe Thornber * +---------+ | 6363241b1d3SJoe Thornber * | A* +++ | v 6373241b1d3SJoe Thornber * +---------+ +-------+ 6383241b1d3SJoe Thornber * | B +++ | 6393241b1d3SJoe Thornber * +-------+ 6403241b1d3SJoe Thornber * 6413241b1d3SJoe Thornber * Where A* is a shadow of A. 6423241b1d3SJoe Thornber */ 6434eafdb15SJoe Thornber static int split_one_into_two(struct shadow_spine *s, unsigned parent_index, 6444eafdb15SJoe Thornber struct dm_btree_value_type *vt, uint64_t key) 6453241b1d3SJoe Thornber { 6463241b1d3SJoe Thornber int r; 6473241b1d3SJoe Thornber struct dm_block *left, *right, *parent; 648550929faSMikulas Patocka struct btree_node *ln, *rn, *pn; 6493241b1d3SJoe Thornber __le64 location; 6503241b1d3SJoe Thornber 6513241b1d3SJoe Thornber left = shadow_current(s); 6523241b1d3SJoe Thornber 6533241b1d3SJoe Thornber r = new_block(s->info, &right); 6543241b1d3SJoe Thornber if (r < 0) 6553241b1d3SJoe Thornber return r; 6563241b1d3SJoe Thornber 6573241b1d3SJoe Thornber ln = dm_block_data(left); 6583241b1d3SJoe Thornber rn = dm_block_data(right); 6593241b1d3SJoe Thornber 6603241b1d3SJoe Thornber rn->header.flags = ln->header.flags; 6614eafdb15SJoe Thornber rn->header.nr_entries = cpu_to_le32(0); 6623241b1d3SJoe Thornber rn->header.max_entries = ln->header.max_entries; 6633241b1d3SJoe Thornber rn->header.value_size = ln->header.value_size; 6644eafdb15SJoe Thornber redistribute2(ln, rn); 6653241b1d3SJoe Thornber 6664eafdb15SJoe Thornber /* patch up the parent */ 6673241b1d3SJoe Thornber parent = shadow_parent(s); 6683241b1d3SJoe Thornber pn = dm_block_data(parent); 6693241b1d3SJoe Thornber 6703241b1d3SJoe Thornber location = cpu_to_le64(dm_block_location(right)); 6713241b1d3SJoe Thornber __dm_bless_for_disk(&location); 6723241b1d3SJoe Thornber r = insert_at(sizeof(__le64), pn, parent_index + 1, 6733241b1d3SJoe Thornber le64_to_cpu(rn->keys[0]), &location); 67430ce6e1cSMike Snitzer if (r) { 67530ce6e1cSMike Snitzer unlock_block(s->info, right); 6763241b1d3SJoe Thornber return r; 67730ce6e1cSMike Snitzer } 6783241b1d3SJoe Thornber 6794eafdb15SJoe Thornber /* patch up the spine */ 6803241b1d3SJoe Thornber if (key < le64_to_cpu(rn->keys[0])) { 6813241b1d3SJoe Thornber unlock_block(s->info, right); 6823241b1d3SJoe Thornber s->nodes[1] = left; 6833241b1d3SJoe Thornber } else { 6843241b1d3SJoe Thornber unlock_block(s->info, left); 6853241b1d3SJoe Thornber s->nodes[1] = right; 6863241b1d3SJoe Thornber } 6873241b1d3SJoe Thornber 6883241b1d3SJoe Thornber return 0; 6893241b1d3SJoe Thornber } 6903241b1d3SJoe Thornber 6913241b1d3SJoe Thornber /* 6924eafdb15SJoe Thornber * We often need to modify a sibling node. This function shadows a particular 6934eafdb15SJoe Thornber * child of the given parent node. Making sure to update the parent to point 6944eafdb15SJoe Thornber * to the new shadow. 6954eafdb15SJoe Thornber */ 6964eafdb15SJoe Thornber static int shadow_child(struct dm_btree_info *info, struct dm_btree_value_type *vt, 6974eafdb15SJoe Thornber struct btree_node *parent, unsigned index, 6984eafdb15SJoe Thornber struct dm_block **result) 6994eafdb15SJoe Thornber { 7004eafdb15SJoe Thornber int r, inc; 7014eafdb15SJoe Thornber dm_block_t root; 7024eafdb15SJoe Thornber struct btree_node *node; 7034eafdb15SJoe Thornber 7044eafdb15SJoe Thornber root = value64(parent, index); 7054eafdb15SJoe Thornber 7064eafdb15SJoe Thornber r = dm_tm_shadow_block(info->tm, root, &btree_node_validator, 7074eafdb15SJoe Thornber result, &inc); 7084eafdb15SJoe Thornber if (r) 7094eafdb15SJoe Thornber return r; 7104eafdb15SJoe Thornber 7114eafdb15SJoe Thornber node = dm_block_data(*result); 7124eafdb15SJoe Thornber 7134eafdb15SJoe Thornber if (inc) 7144eafdb15SJoe Thornber inc_children(info->tm, node, vt); 7154eafdb15SJoe Thornber 7164eafdb15SJoe Thornber *((__le64 *) value_ptr(parent, index)) = 7174eafdb15SJoe Thornber cpu_to_le64(dm_block_location(*result)); 7184eafdb15SJoe Thornber 7194eafdb15SJoe Thornber return 0; 7204eafdb15SJoe Thornber } 7214eafdb15SJoe Thornber 7224eafdb15SJoe Thornber /* 7234eafdb15SJoe Thornber * Splits two nodes into three. This is more work, but results in fuller 7244eafdb15SJoe Thornber * nodes, so saves metadata space. 7254eafdb15SJoe Thornber */ 7264eafdb15SJoe Thornber static int split_two_into_three(struct shadow_spine *s, unsigned parent_index, 7274eafdb15SJoe Thornber struct dm_btree_value_type *vt, uint64_t key) 7284eafdb15SJoe Thornber { 7294eafdb15SJoe Thornber int r; 7304eafdb15SJoe Thornber unsigned middle_index; 7314eafdb15SJoe Thornber struct dm_block *left, *middle, *right, *parent; 7324eafdb15SJoe Thornber struct btree_node *ln, *rn, *mn, *pn; 7334eafdb15SJoe Thornber __le64 location; 7344eafdb15SJoe Thornber 7354eafdb15SJoe Thornber parent = shadow_parent(s); 7364eafdb15SJoe Thornber pn = dm_block_data(parent); 7374eafdb15SJoe Thornber 7384eafdb15SJoe Thornber if (parent_index == 0) { 7394eafdb15SJoe Thornber middle_index = 1; 7404eafdb15SJoe Thornber left = shadow_current(s); 7414eafdb15SJoe Thornber r = shadow_child(s->info, vt, pn, parent_index + 1, &right); 7424eafdb15SJoe Thornber if (r) 7434eafdb15SJoe Thornber return r; 7444eafdb15SJoe Thornber } else { 7454eafdb15SJoe Thornber middle_index = parent_index; 7464eafdb15SJoe Thornber right = shadow_current(s); 7474eafdb15SJoe Thornber r = shadow_child(s->info, vt, pn, parent_index - 1, &left); 7484eafdb15SJoe Thornber if (r) 7494eafdb15SJoe Thornber return r; 7504eafdb15SJoe Thornber } 7514eafdb15SJoe Thornber 7524eafdb15SJoe Thornber r = new_block(s->info, &middle); 7534eafdb15SJoe Thornber if (r < 0) 7544eafdb15SJoe Thornber return r; 7554eafdb15SJoe Thornber 7564eafdb15SJoe Thornber ln = dm_block_data(left); 7574eafdb15SJoe Thornber mn = dm_block_data(middle); 7584eafdb15SJoe Thornber rn = dm_block_data(right); 7594eafdb15SJoe Thornber 7604eafdb15SJoe Thornber mn->header.nr_entries = cpu_to_le32(0); 7614eafdb15SJoe Thornber mn->header.flags = ln->header.flags; 7624eafdb15SJoe Thornber mn->header.max_entries = ln->header.max_entries; 7634eafdb15SJoe Thornber mn->header.value_size = ln->header.value_size; 7644eafdb15SJoe Thornber 7654eafdb15SJoe Thornber redistribute3(ln, mn, rn); 7664eafdb15SJoe Thornber 7674eafdb15SJoe Thornber /* patch up the parent */ 7684eafdb15SJoe Thornber pn->keys[middle_index] = rn->keys[0]; 7694eafdb15SJoe Thornber location = cpu_to_le64(dm_block_location(middle)); 7704eafdb15SJoe Thornber __dm_bless_for_disk(&location); 7714eafdb15SJoe Thornber r = insert_at(sizeof(__le64), pn, middle_index, 7724eafdb15SJoe Thornber le64_to_cpu(mn->keys[0]), &location); 7734eafdb15SJoe Thornber if (r) { 7744eafdb15SJoe Thornber if (shadow_current(s) != left) 7754eafdb15SJoe Thornber unlock_block(s->info, left); 7764eafdb15SJoe Thornber 7774eafdb15SJoe Thornber unlock_block(s->info, middle); 7784eafdb15SJoe Thornber 7794eafdb15SJoe Thornber if (shadow_current(s) != right) 7804eafdb15SJoe Thornber unlock_block(s->info, right); 7814eafdb15SJoe Thornber 7824eafdb15SJoe Thornber return r; 7834eafdb15SJoe Thornber } 7844eafdb15SJoe Thornber 7854eafdb15SJoe Thornber 7864eafdb15SJoe Thornber /* patch up the spine */ 7874eafdb15SJoe Thornber if (key < le64_to_cpu(mn->keys[0])) { 7884eafdb15SJoe Thornber unlock_block(s->info, middle); 7894eafdb15SJoe Thornber unlock_block(s->info, right); 7904eafdb15SJoe Thornber s->nodes[1] = left; 7914eafdb15SJoe Thornber } else if (key < le64_to_cpu(rn->keys[0])) { 7924eafdb15SJoe Thornber unlock_block(s->info, left); 7934eafdb15SJoe Thornber unlock_block(s->info, right); 7944eafdb15SJoe Thornber s->nodes[1] = middle; 7954eafdb15SJoe Thornber } else { 7964eafdb15SJoe Thornber unlock_block(s->info, left); 7974eafdb15SJoe Thornber unlock_block(s->info, middle); 7984eafdb15SJoe Thornber s->nodes[1] = right; 7994eafdb15SJoe Thornber } 8004eafdb15SJoe Thornber 8014eafdb15SJoe Thornber return 0; 8024eafdb15SJoe Thornber } 8034eafdb15SJoe Thornber 8044eafdb15SJoe Thornber /*----------------------------------------------------------------*/ 8054eafdb15SJoe Thornber 8064eafdb15SJoe Thornber /* 8073241b1d3SJoe Thornber * Splits a node by creating two new children beneath the given node. 8083241b1d3SJoe Thornber * 8093241b1d3SJoe Thornber * Before: 8103241b1d3SJoe Thornber * +----------+ 8113241b1d3SJoe Thornber * | A ++++++ | 8123241b1d3SJoe Thornber * +----------+ 8133241b1d3SJoe Thornber * 8143241b1d3SJoe Thornber * 8153241b1d3SJoe Thornber * After: 8163241b1d3SJoe Thornber * +------------+ 8173241b1d3SJoe Thornber * | A (shadow) | 8183241b1d3SJoe Thornber * +------------+ 8193241b1d3SJoe Thornber * | | 8203241b1d3SJoe Thornber * +------+ +----+ 8213241b1d3SJoe Thornber * | | 8223241b1d3SJoe Thornber * v v 8233241b1d3SJoe Thornber * +-------+ +-------+ 8243241b1d3SJoe Thornber * | B +++ | | C +++ | 8253241b1d3SJoe Thornber * +-------+ +-------+ 8263241b1d3SJoe Thornber */ 8273241b1d3SJoe Thornber static int btree_split_beneath(struct shadow_spine *s, uint64_t key) 8283241b1d3SJoe Thornber { 8293241b1d3SJoe Thornber int r; 8303241b1d3SJoe Thornber size_t size; 8313241b1d3SJoe Thornber unsigned nr_left, nr_right; 8323241b1d3SJoe Thornber struct dm_block *left, *right, *new_parent; 833550929faSMikulas Patocka struct btree_node *pn, *ln, *rn; 8343241b1d3SJoe Thornber __le64 val; 8353241b1d3SJoe Thornber 8363241b1d3SJoe Thornber new_parent = shadow_current(s); 8373241b1d3SJoe Thornber 838e4f9d601SZhangXiaoxu pn = dm_block_data(new_parent); 839e4f9d601SZhangXiaoxu size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? 840e4f9d601SZhangXiaoxu sizeof(__le64) : s->info->value_type.size; 841e4f9d601SZhangXiaoxu 842e4f9d601SZhangXiaoxu /* create & init the left block */ 8433241b1d3SJoe Thornber r = new_block(s->info, &left); 8443241b1d3SJoe Thornber if (r < 0) 8453241b1d3SJoe Thornber return r; 8463241b1d3SJoe Thornber 847e4f9d601SZhangXiaoxu ln = dm_block_data(left); 848e4f9d601SZhangXiaoxu nr_left = le32_to_cpu(pn->header.nr_entries) / 2; 849e4f9d601SZhangXiaoxu 850e4f9d601SZhangXiaoxu ln->header.flags = pn->header.flags; 851e4f9d601SZhangXiaoxu ln->header.nr_entries = cpu_to_le32(nr_left); 852e4f9d601SZhangXiaoxu ln->header.max_entries = pn->header.max_entries; 853e4f9d601SZhangXiaoxu ln->header.value_size = pn->header.value_size; 854e4f9d601SZhangXiaoxu memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); 855e4f9d601SZhangXiaoxu memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); 856e4f9d601SZhangXiaoxu 857e4f9d601SZhangXiaoxu /* create & init the right block */ 8583241b1d3SJoe Thornber r = new_block(s->info, &right); 8593241b1d3SJoe Thornber if (r < 0) { 8604dcb8b57SMike Snitzer unlock_block(s->info, left); 8613241b1d3SJoe Thornber return r; 8623241b1d3SJoe Thornber } 8633241b1d3SJoe Thornber 8643241b1d3SJoe Thornber rn = dm_block_data(right); 8653241b1d3SJoe Thornber nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left; 8663241b1d3SJoe Thornber 8673241b1d3SJoe Thornber rn->header.flags = pn->header.flags; 8683241b1d3SJoe Thornber rn->header.nr_entries = cpu_to_le32(nr_right); 8693241b1d3SJoe Thornber rn->header.max_entries = pn->header.max_entries; 8703241b1d3SJoe Thornber rn->header.value_size = pn->header.value_size; 8713241b1d3SJoe Thornber memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0])); 872a3aefb39SJoe Thornber memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left), 8733241b1d3SJoe Thornber nr_right * size); 8743241b1d3SJoe Thornber 8753241b1d3SJoe Thornber /* new_parent should just point to l and r now */ 8763241b1d3SJoe Thornber pn->header.flags = cpu_to_le32(INTERNAL_NODE); 8773241b1d3SJoe Thornber pn->header.nr_entries = cpu_to_le32(2); 8783241b1d3SJoe Thornber pn->header.max_entries = cpu_to_le32( 8793241b1d3SJoe Thornber calc_max_entries(sizeof(__le64), 8803241b1d3SJoe Thornber dm_bm_block_size( 8813241b1d3SJoe Thornber dm_tm_get_bm(s->info->tm)))); 8823241b1d3SJoe Thornber pn->header.value_size = cpu_to_le32(sizeof(__le64)); 8833241b1d3SJoe Thornber 8843241b1d3SJoe Thornber val = cpu_to_le64(dm_block_location(left)); 8853241b1d3SJoe Thornber __dm_bless_for_disk(&val); 8863241b1d3SJoe Thornber pn->keys[0] = ln->keys[0]; 887a3aefb39SJoe Thornber memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64)); 8883241b1d3SJoe Thornber 8893241b1d3SJoe Thornber val = cpu_to_le64(dm_block_location(right)); 8903241b1d3SJoe Thornber __dm_bless_for_disk(&val); 8913241b1d3SJoe Thornber pn->keys[1] = rn->keys[0]; 892a3aefb39SJoe Thornber memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); 8933241b1d3SJoe Thornber 8943241b1d3SJoe Thornber unlock_block(s->info, left); 895bc68d0a4SJoe Thornber unlock_block(s->info, right); 8963241b1d3SJoe Thornber return 0; 8973241b1d3SJoe Thornber } 8983241b1d3SJoe Thornber 8994eafdb15SJoe Thornber /*----------------------------------------------------------------*/ 9004eafdb15SJoe Thornber 9014eafdb15SJoe Thornber /* 9024eafdb15SJoe Thornber * Redistributes a node's entries with its left sibling. 9034eafdb15SJoe Thornber */ 9044eafdb15SJoe Thornber static int rebalance_left(struct shadow_spine *s, struct dm_btree_value_type *vt, 9054eafdb15SJoe Thornber unsigned parent_index, uint64_t key) 9064eafdb15SJoe Thornber { 9074eafdb15SJoe Thornber int r; 9084eafdb15SJoe Thornber struct dm_block *sib; 9094eafdb15SJoe Thornber struct btree_node *left, *right, *parent = dm_block_data(shadow_parent(s)); 9104eafdb15SJoe Thornber 9114eafdb15SJoe Thornber r = shadow_child(s->info, vt, parent, parent_index - 1, &sib); 9124eafdb15SJoe Thornber if (r) 9134eafdb15SJoe Thornber return r; 9144eafdb15SJoe Thornber 9154eafdb15SJoe Thornber left = dm_block_data(sib); 9164eafdb15SJoe Thornber right = dm_block_data(shadow_current(s)); 9174eafdb15SJoe Thornber redistribute2(left, right); 9184eafdb15SJoe Thornber *key_ptr(parent, parent_index) = right->keys[0]; 9194eafdb15SJoe Thornber 9204eafdb15SJoe Thornber if (key < le64_to_cpu(right->keys[0])) { 9214eafdb15SJoe Thornber unlock_block(s->info, s->nodes[1]); 9224eafdb15SJoe Thornber s->nodes[1] = sib; 9234eafdb15SJoe Thornber } else { 9244eafdb15SJoe Thornber unlock_block(s->info, sib); 9254eafdb15SJoe Thornber } 9264eafdb15SJoe Thornber 9274eafdb15SJoe Thornber return 0; 9284eafdb15SJoe Thornber } 9294eafdb15SJoe Thornber 9304eafdb15SJoe Thornber /* 9314eafdb15SJoe Thornber * Redistributes a nodes entries with its right sibling. 9324eafdb15SJoe Thornber */ 9334eafdb15SJoe Thornber static int rebalance_right(struct shadow_spine *s, struct dm_btree_value_type *vt, 9344eafdb15SJoe Thornber unsigned parent_index, uint64_t key) 9354eafdb15SJoe Thornber { 9364eafdb15SJoe Thornber int r; 9374eafdb15SJoe Thornber struct dm_block *sib; 9384eafdb15SJoe Thornber struct btree_node *left, *right, *parent = dm_block_data(shadow_parent(s)); 9394eafdb15SJoe Thornber 9404eafdb15SJoe Thornber r = shadow_child(s->info, vt, parent, parent_index + 1, &sib); 9414eafdb15SJoe Thornber if (r) 9424eafdb15SJoe Thornber return r; 9434eafdb15SJoe Thornber 9444eafdb15SJoe Thornber left = dm_block_data(shadow_current(s)); 9454eafdb15SJoe Thornber right = dm_block_data(sib); 9464eafdb15SJoe Thornber redistribute2(left, right); 9474eafdb15SJoe Thornber *key_ptr(parent, parent_index + 1) = right->keys[0]; 9484eafdb15SJoe Thornber 9494eafdb15SJoe Thornber if (key < le64_to_cpu(right->keys[0])) { 9504eafdb15SJoe Thornber unlock_block(s->info, sib); 9514eafdb15SJoe Thornber } else { 9524eafdb15SJoe Thornber unlock_block(s->info, s->nodes[1]); 9534eafdb15SJoe Thornber s->nodes[1] = sib; 9544eafdb15SJoe Thornber } 9554eafdb15SJoe Thornber 9564eafdb15SJoe Thornber return 0; 9574eafdb15SJoe Thornber } 9584eafdb15SJoe Thornber 9594eafdb15SJoe Thornber /* 9604eafdb15SJoe Thornber * Returns the number of spare entries in a node. 9614eafdb15SJoe Thornber */ 9624eafdb15SJoe Thornber static int get_node_free_space(struct dm_btree_info *info, dm_block_t b, unsigned *space) 9634eafdb15SJoe Thornber { 9644eafdb15SJoe Thornber int r; 9654eafdb15SJoe Thornber unsigned nr_entries; 9664eafdb15SJoe Thornber struct dm_block *block; 9674eafdb15SJoe Thornber struct btree_node *node; 9684eafdb15SJoe Thornber 9694eafdb15SJoe Thornber r = bn_read_lock(info, b, &block); 9704eafdb15SJoe Thornber if (r) 9714eafdb15SJoe Thornber return r; 9724eafdb15SJoe Thornber 9734eafdb15SJoe Thornber node = dm_block_data(block); 9744eafdb15SJoe Thornber nr_entries = le32_to_cpu(node->header.nr_entries); 9754eafdb15SJoe Thornber *space = le32_to_cpu(node->header.max_entries) - nr_entries; 9764eafdb15SJoe Thornber 9774eafdb15SJoe Thornber unlock_block(info, block); 9784eafdb15SJoe Thornber return 0; 9794eafdb15SJoe Thornber } 9804eafdb15SJoe Thornber 9814eafdb15SJoe Thornber /* 9824eafdb15SJoe Thornber * Make space in a node, either by moving some entries to a sibling, 9834eafdb15SJoe Thornber * or creating a new sibling node. SPACE_THRESHOLD defines the minimum 9844eafdb15SJoe Thornber * number of free entries that must be in the sibling to make the move 9854eafdb15SJoe Thornber * worth while. If the siblings are shared (eg, part of a snapshot), 9864eafdb15SJoe Thornber * then they are not touched, since this break sharing and so consume 9874eafdb15SJoe Thornber * more space than we save. 9884eafdb15SJoe Thornber */ 9894eafdb15SJoe Thornber #define SPACE_THRESHOLD 8 9904eafdb15SJoe Thornber static int rebalance_or_split(struct shadow_spine *s, struct dm_btree_value_type *vt, 9914eafdb15SJoe Thornber unsigned parent_index, uint64_t key) 9924eafdb15SJoe Thornber { 9934eafdb15SJoe Thornber int r; 9944eafdb15SJoe Thornber struct btree_node *parent = dm_block_data(shadow_parent(s)); 9954eafdb15SJoe Thornber unsigned nr_parent = le32_to_cpu(parent->header.nr_entries); 9964eafdb15SJoe Thornber unsigned free_space; 9974eafdb15SJoe Thornber int left_shared = 0, right_shared = 0; 9984eafdb15SJoe Thornber 9994eafdb15SJoe Thornber /* Should we move entries to the left sibling? */ 10004eafdb15SJoe Thornber if (parent_index > 0) { 10014eafdb15SJoe Thornber dm_block_t left_b = value64(parent, parent_index - 1); 10024eafdb15SJoe Thornber r = dm_tm_block_is_shared(s->info->tm, left_b, &left_shared); 10034eafdb15SJoe Thornber if (r) 10044eafdb15SJoe Thornber return r; 10054eafdb15SJoe Thornber 10064eafdb15SJoe Thornber if (!left_shared) { 10074eafdb15SJoe Thornber r = get_node_free_space(s->info, left_b, &free_space); 10084eafdb15SJoe Thornber if (r) 10094eafdb15SJoe Thornber return r; 10104eafdb15SJoe Thornber 10114eafdb15SJoe Thornber if (free_space >= SPACE_THRESHOLD) 10124eafdb15SJoe Thornber return rebalance_left(s, vt, parent_index, key); 10134eafdb15SJoe Thornber } 10144eafdb15SJoe Thornber } 10154eafdb15SJoe Thornber 10164eafdb15SJoe Thornber /* Should we move entries to the right sibling? */ 10174eafdb15SJoe Thornber if (parent_index < (nr_parent - 1)) { 10184eafdb15SJoe Thornber dm_block_t right_b = value64(parent, parent_index + 1); 10194eafdb15SJoe Thornber r = dm_tm_block_is_shared(s->info->tm, right_b, &right_shared); 10204eafdb15SJoe Thornber if (r) 10214eafdb15SJoe Thornber return r; 10224eafdb15SJoe Thornber 10234eafdb15SJoe Thornber if (!right_shared) { 10244eafdb15SJoe Thornber r = get_node_free_space(s->info, right_b, &free_space); 10254eafdb15SJoe Thornber if (r) 10264eafdb15SJoe Thornber return r; 10274eafdb15SJoe Thornber 10284eafdb15SJoe Thornber if (free_space >= SPACE_THRESHOLD) 10294eafdb15SJoe Thornber return rebalance_right(s, vt, parent_index, key); 10304eafdb15SJoe Thornber } 10314eafdb15SJoe Thornber } 10324eafdb15SJoe Thornber 10334eafdb15SJoe Thornber /* 10344eafdb15SJoe Thornber * We need to split the node, normally we split two nodes 10354eafdb15SJoe Thornber * into three. But when inserting a sequence that is either 10364eafdb15SJoe Thornber * monotonically increasing or decreasing it's better to split 10374eafdb15SJoe Thornber * a single node into two. 10384eafdb15SJoe Thornber */ 10394eafdb15SJoe Thornber if (left_shared || right_shared || (nr_parent <= 2) || 10404eafdb15SJoe Thornber (parent_index == 0) || (parent_index + 1 == nr_parent)) { 10414eafdb15SJoe Thornber return split_one_into_two(s, parent_index, vt, key); 10424eafdb15SJoe Thornber } else { 10434eafdb15SJoe Thornber return split_two_into_three(s, parent_index, vt, key); 10444eafdb15SJoe Thornber } 10454eafdb15SJoe Thornber } 10464eafdb15SJoe Thornber 10474eafdb15SJoe Thornber /* 10484eafdb15SJoe Thornber * Does the node contain a particular key? 10494eafdb15SJoe Thornber */ 10504eafdb15SJoe Thornber static bool contains_key(struct btree_node *node, uint64_t key) 10514eafdb15SJoe Thornber { 10524eafdb15SJoe Thornber int i = lower_bound(node, key); 10534eafdb15SJoe Thornber 10544eafdb15SJoe Thornber if (i >= 0 && le64_to_cpu(node->keys[i]) == key) 10554eafdb15SJoe Thornber return true; 10564eafdb15SJoe Thornber 10574eafdb15SJoe Thornber return false; 10584eafdb15SJoe Thornber } 10594eafdb15SJoe Thornber 10604eafdb15SJoe Thornber /* 10614eafdb15SJoe Thornber * In general we preemptively make sure there's a free entry in every 10624eafdb15SJoe Thornber * node on the spine when doing an insert. But we can avoid that with 10634eafdb15SJoe Thornber * leaf nodes if we know it's an overwrite. 10644eafdb15SJoe Thornber */ 10654eafdb15SJoe Thornber static bool has_space_for_insert(struct btree_node *node, uint64_t key) 10664eafdb15SJoe Thornber { 10674eafdb15SJoe Thornber if (node->header.nr_entries == node->header.max_entries) { 10684eafdb15SJoe Thornber if (le32_to_cpu(node->header.flags) & LEAF_NODE) { 10694eafdb15SJoe Thornber /* we don't need space if it's an overwrite */ 10704eafdb15SJoe Thornber return contains_key(node, key); 10714eafdb15SJoe Thornber } 10724eafdb15SJoe Thornber 10734eafdb15SJoe Thornber return false; 10744eafdb15SJoe Thornber } 10754eafdb15SJoe Thornber 10764eafdb15SJoe Thornber return true; 10774eafdb15SJoe Thornber } 10784eafdb15SJoe Thornber 10793241b1d3SJoe Thornber static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, 10803241b1d3SJoe Thornber struct dm_btree_value_type *vt, 10813241b1d3SJoe Thornber uint64_t key, unsigned *index) 10823241b1d3SJoe Thornber { 10833241b1d3SJoe Thornber int r, i = *index, top = 1; 1084550929faSMikulas Patocka struct btree_node *node; 10853241b1d3SJoe Thornber 10863241b1d3SJoe Thornber for (;;) { 10873241b1d3SJoe Thornber r = shadow_step(s, root, vt); 10883241b1d3SJoe Thornber if (r < 0) 10893241b1d3SJoe Thornber return r; 10903241b1d3SJoe Thornber 10913241b1d3SJoe Thornber node = dm_block_data(shadow_current(s)); 10923241b1d3SJoe Thornber 10933241b1d3SJoe Thornber /* 10943241b1d3SJoe Thornber * We have to patch up the parent node, ugly, but I don't 10953241b1d3SJoe Thornber * see a way to do this automatically as part of the spine 10963241b1d3SJoe Thornber * op. 10973241b1d3SJoe Thornber */ 10983241b1d3SJoe Thornber if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */ 10993241b1d3SJoe Thornber __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); 11003241b1d3SJoe Thornber 11013241b1d3SJoe Thornber __dm_bless_for_disk(&location); 1102a3aefb39SJoe Thornber memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i), 11033241b1d3SJoe Thornber &location, sizeof(__le64)); 11043241b1d3SJoe Thornber } 11053241b1d3SJoe Thornber 11063241b1d3SJoe Thornber node = dm_block_data(shadow_current(s)); 11073241b1d3SJoe Thornber 11084eafdb15SJoe Thornber if (!has_space_for_insert(node, key)) { 11093241b1d3SJoe Thornber if (top) 11103241b1d3SJoe Thornber r = btree_split_beneath(s, key); 11113241b1d3SJoe Thornber else 11124eafdb15SJoe Thornber r = rebalance_or_split(s, vt, i, key); 11133241b1d3SJoe Thornber 11143241b1d3SJoe Thornber if (r < 0) 11153241b1d3SJoe Thornber return r; 11163241b1d3SJoe Thornber 11174eafdb15SJoe Thornber /* making space can cause the current node to change */ 11183241b1d3SJoe Thornber node = dm_block_data(shadow_current(s)); 11194eafdb15SJoe Thornber } 11203241b1d3SJoe Thornber 11213241b1d3SJoe Thornber i = lower_bound(node, key); 11223241b1d3SJoe Thornber 11233241b1d3SJoe Thornber if (le32_to_cpu(node->header.flags) & LEAF_NODE) 11243241b1d3SJoe Thornber break; 11253241b1d3SJoe Thornber 11263241b1d3SJoe Thornber if (i < 0) { 11273241b1d3SJoe Thornber /* change the bounds on the lowest key */ 11283241b1d3SJoe Thornber node->keys[0] = cpu_to_le64(key); 11293241b1d3SJoe Thornber i = 0; 11303241b1d3SJoe Thornber } 11313241b1d3SJoe Thornber 11323241b1d3SJoe Thornber root = value64(node, i); 11333241b1d3SJoe Thornber top = 0; 11343241b1d3SJoe Thornber } 11353241b1d3SJoe Thornber 11363241b1d3SJoe Thornber if (i < 0 || le64_to_cpu(node->keys[i]) != key) 11373241b1d3SJoe Thornber i++; 11383241b1d3SJoe Thornber 11393241b1d3SJoe Thornber *index = i; 11403241b1d3SJoe Thornber return 0; 11413241b1d3SJoe Thornber } 11423241b1d3SJoe Thornber 1143*be500ed7SJoe Thornber static int __btree_get_overwrite_leaf(struct shadow_spine *s, dm_block_t root, 1144*be500ed7SJoe Thornber uint64_t key, int *index) 1145*be500ed7SJoe Thornber { 1146*be500ed7SJoe Thornber int r, i = -1; 1147*be500ed7SJoe Thornber struct btree_node *node; 1148*be500ed7SJoe Thornber 1149*be500ed7SJoe Thornber *index = 0; 1150*be500ed7SJoe Thornber for (;;) { 1151*be500ed7SJoe Thornber r = shadow_step(s, root, &s->info->value_type); 1152*be500ed7SJoe Thornber if (r < 0) 1153*be500ed7SJoe Thornber return r; 1154*be500ed7SJoe Thornber 1155*be500ed7SJoe Thornber node = dm_block_data(shadow_current(s)); 1156*be500ed7SJoe Thornber 1157*be500ed7SJoe Thornber /* 1158*be500ed7SJoe Thornber * We have to patch up the parent node, ugly, but I don't 1159*be500ed7SJoe Thornber * see a way to do this automatically as part of the spine 1160*be500ed7SJoe Thornber * op. 1161*be500ed7SJoe Thornber */ 1162*be500ed7SJoe Thornber if (shadow_has_parent(s) && i >= 0) { 1163*be500ed7SJoe Thornber __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); 1164*be500ed7SJoe Thornber 1165*be500ed7SJoe Thornber __dm_bless_for_disk(&location); 1166*be500ed7SJoe Thornber memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i), 1167*be500ed7SJoe Thornber &location, sizeof(__le64)); 1168*be500ed7SJoe Thornber } 1169*be500ed7SJoe Thornber 1170*be500ed7SJoe Thornber node = dm_block_data(shadow_current(s)); 1171*be500ed7SJoe Thornber i = lower_bound(node, key); 1172*be500ed7SJoe Thornber 1173*be500ed7SJoe Thornber BUG_ON(i < 0); 1174*be500ed7SJoe Thornber BUG_ON(i >= le32_to_cpu(node->header.nr_entries)); 1175*be500ed7SJoe Thornber 1176*be500ed7SJoe Thornber if (le32_to_cpu(node->header.flags) & LEAF_NODE) { 1177*be500ed7SJoe Thornber if (key != le64_to_cpu(node->keys[i])) 1178*be500ed7SJoe Thornber return -EINVAL; 1179*be500ed7SJoe Thornber break; 1180*be500ed7SJoe Thornber } 1181*be500ed7SJoe Thornber 1182*be500ed7SJoe Thornber root = value64(node, i); 1183*be500ed7SJoe Thornber } 1184*be500ed7SJoe Thornber 1185*be500ed7SJoe Thornber *index = i; 1186*be500ed7SJoe Thornber return 0; 1187*be500ed7SJoe Thornber } 1188*be500ed7SJoe Thornber 1189*be500ed7SJoe Thornber int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root, 1190*be500ed7SJoe Thornber uint64_t key, int *index, 1191*be500ed7SJoe Thornber dm_block_t *new_root, struct dm_block **leaf) 1192*be500ed7SJoe Thornber { 1193*be500ed7SJoe Thornber int r; 1194*be500ed7SJoe Thornber struct shadow_spine spine; 1195*be500ed7SJoe Thornber 1196*be500ed7SJoe Thornber BUG_ON(info->levels > 1); 1197*be500ed7SJoe Thornber init_shadow_spine(&spine, info); 1198*be500ed7SJoe Thornber r = __btree_get_overwrite_leaf(&spine, root, key, index); 1199*be500ed7SJoe Thornber if (!r) { 1200*be500ed7SJoe Thornber *new_root = shadow_root(&spine); 1201*be500ed7SJoe Thornber *leaf = shadow_current(&spine); 1202*be500ed7SJoe Thornber 1203*be500ed7SJoe Thornber /* 1204*be500ed7SJoe Thornber * Decrement the count so exit_shadow_spine() doesn't 1205*be500ed7SJoe Thornber * unlock the leaf. 1206*be500ed7SJoe Thornber */ 1207*be500ed7SJoe Thornber spine.count--; 1208*be500ed7SJoe Thornber } 1209*be500ed7SJoe Thornber exit_shadow_spine(&spine); 1210*be500ed7SJoe Thornber 1211*be500ed7SJoe Thornber return r; 1212*be500ed7SJoe Thornber } 1213*be500ed7SJoe Thornber 1214ba503835SMike Snitzer static bool need_insert(struct btree_node *node, uint64_t *keys, 1215ba503835SMike Snitzer unsigned level, unsigned index) 1216ba503835SMike Snitzer { 1217ba503835SMike Snitzer return ((index >= le32_to_cpu(node->header.nr_entries)) || 1218ba503835SMike Snitzer (le64_to_cpu(node->keys[index]) != keys[level])); 1219ba503835SMike Snitzer } 1220ba503835SMike Snitzer 12213241b1d3SJoe Thornber static int insert(struct dm_btree_info *info, dm_block_t root, 12223241b1d3SJoe Thornber uint64_t *keys, void *value, dm_block_t *new_root, 12233241b1d3SJoe Thornber int *inserted) 12243241b1d3SJoe Thornber __dm_written_to_disk(value) 12253241b1d3SJoe Thornber { 1226ba503835SMike Snitzer int r; 12273241b1d3SJoe Thornber unsigned level, index = -1, last_level = info->levels - 1; 12283241b1d3SJoe Thornber dm_block_t block = root; 12293241b1d3SJoe Thornber struct shadow_spine spine; 1230550929faSMikulas Patocka struct btree_node *n; 12313241b1d3SJoe Thornber struct dm_btree_value_type le64_type; 12323241b1d3SJoe Thornber 1233b0dc3c8bSJoe Thornber init_le64_type(info->tm, &le64_type); 12343241b1d3SJoe Thornber init_shadow_spine(&spine, info); 12353241b1d3SJoe Thornber 12363241b1d3SJoe Thornber for (level = 0; level < (info->levels - 1); level++) { 12373241b1d3SJoe Thornber r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index); 12383241b1d3SJoe Thornber if (r < 0) 12393241b1d3SJoe Thornber goto bad; 12403241b1d3SJoe Thornber 12413241b1d3SJoe Thornber n = dm_block_data(shadow_current(&spine)); 12423241b1d3SJoe Thornber 1243ba503835SMike Snitzer if (need_insert(n, keys, level, index)) { 12443241b1d3SJoe Thornber dm_block_t new_tree; 12453241b1d3SJoe Thornber __le64 new_le; 12463241b1d3SJoe Thornber 12473241b1d3SJoe Thornber r = dm_btree_empty(info, &new_tree); 12483241b1d3SJoe Thornber if (r < 0) 12493241b1d3SJoe Thornber goto bad; 12503241b1d3SJoe Thornber 12513241b1d3SJoe Thornber new_le = cpu_to_le64(new_tree); 12523241b1d3SJoe Thornber __dm_bless_for_disk(&new_le); 12533241b1d3SJoe Thornber 12543241b1d3SJoe Thornber r = insert_at(sizeof(uint64_t), n, index, 12553241b1d3SJoe Thornber keys[level], &new_le); 12563241b1d3SJoe Thornber if (r) 12573241b1d3SJoe Thornber goto bad; 12583241b1d3SJoe Thornber } 12593241b1d3SJoe Thornber 12603241b1d3SJoe Thornber if (level < last_level) 12613241b1d3SJoe Thornber block = value64(n, index); 12623241b1d3SJoe Thornber } 12633241b1d3SJoe Thornber 12643241b1d3SJoe Thornber r = btree_insert_raw(&spine, block, &info->value_type, 12653241b1d3SJoe Thornber keys[level], &index); 12663241b1d3SJoe Thornber if (r < 0) 12673241b1d3SJoe Thornber goto bad; 12683241b1d3SJoe Thornber 12693241b1d3SJoe Thornber n = dm_block_data(shadow_current(&spine)); 12703241b1d3SJoe Thornber 1271ba503835SMike Snitzer if (need_insert(n, keys, level, index)) { 12723241b1d3SJoe Thornber if (inserted) 12733241b1d3SJoe Thornber *inserted = 1; 12743241b1d3SJoe Thornber 12753241b1d3SJoe Thornber r = insert_at(info->value_type.size, n, index, 12763241b1d3SJoe Thornber keys[level], value); 12773241b1d3SJoe Thornber if (r) 12783241b1d3SJoe Thornber goto bad_unblessed; 12793241b1d3SJoe Thornber } else { 12803241b1d3SJoe Thornber if (inserted) 12813241b1d3SJoe Thornber *inserted = 0; 12823241b1d3SJoe Thornber 12833241b1d3SJoe Thornber if (info->value_type.dec && 12843241b1d3SJoe Thornber (!info->value_type.equal || 12853241b1d3SJoe Thornber !info->value_type.equal( 12863241b1d3SJoe Thornber info->value_type.context, 1287a3aefb39SJoe Thornber value_ptr(n, index), 12883241b1d3SJoe Thornber value))) { 12893241b1d3SJoe Thornber info->value_type.dec(info->value_type.context, 1290*be500ed7SJoe Thornber value_ptr(n, index), 1); 12913241b1d3SJoe Thornber } 1292a3aefb39SJoe Thornber memcpy_disk(value_ptr(n, index), 12933241b1d3SJoe Thornber value, info->value_type.size); 12943241b1d3SJoe Thornber } 12953241b1d3SJoe Thornber 12963241b1d3SJoe Thornber *new_root = shadow_root(&spine); 12973241b1d3SJoe Thornber exit_shadow_spine(&spine); 12983241b1d3SJoe Thornber 12993241b1d3SJoe Thornber return 0; 13003241b1d3SJoe Thornber 13013241b1d3SJoe Thornber bad: 13023241b1d3SJoe Thornber __dm_unbless_for_disk(value); 13033241b1d3SJoe Thornber bad_unblessed: 13043241b1d3SJoe Thornber exit_shadow_spine(&spine); 13053241b1d3SJoe Thornber return r; 13063241b1d3SJoe Thornber } 13073241b1d3SJoe Thornber 13083241b1d3SJoe Thornber int dm_btree_insert(struct dm_btree_info *info, dm_block_t root, 13093241b1d3SJoe Thornber uint64_t *keys, void *value, dm_block_t *new_root) 13103241b1d3SJoe Thornber __dm_written_to_disk(value) 13113241b1d3SJoe Thornber { 13123241b1d3SJoe Thornber return insert(info, root, keys, value, new_root, NULL); 13133241b1d3SJoe Thornber } 13143241b1d3SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_insert); 13153241b1d3SJoe Thornber 13163241b1d3SJoe Thornber int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root, 13173241b1d3SJoe Thornber uint64_t *keys, void *value, dm_block_t *new_root, 13183241b1d3SJoe Thornber int *inserted) 13193241b1d3SJoe Thornber __dm_written_to_disk(value) 13203241b1d3SJoe Thornber { 13213241b1d3SJoe Thornber return insert(info, root, keys, value, new_root, inserted); 13223241b1d3SJoe Thornber } 13233241b1d3SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_insert_notify); 13243241b1d3SJoe Thornber 13253241b1d3SJoe Thornber /*----------------------------------------------------------------*/ 13263241b1d3SJoe Thornber 1327f164e690SJoe Thornber static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest, 13283241b1d3SJoe Thornber uint64_t *result_key, dm_block_t *next_block) 13293241b1d3SJoe Thornber { 13303241b1d3SJoe Thornber int i, r; 13313241b1d3SJoe Thornber uint32_t flags; 13323241b1d3SJoe Thornber 13333241b1d3SJoe Thornber do { 13343241b1d3SJoe Thornber r = ro_step(s, block); 13353241b1d3SJoe Thornber if (r < 0) 13363241b1d3SJoe Thornber return r; 13373241b1d3SJoe Thornber 13383241b1d3SJoe Thornber flags = le32_to_cpu(ro_node(s)->header.flags); 13393241b1d3SJoe Thornber i = le32_to_cpu(ro_node(s)->header.nr_entries); 13403241b1d3SJoe Thornber if (!i) 13413241b1d3SJoe Thornber return -ENODATA; 13423241b1d3SJoe Thornber else 13433241b1d3SJoe Thornber i--; 13443241b1d3SJoe Thornber 1345f164e690SJoe Thornber if (find_highest) 13463241b1d3SJoe Thornber *result_key = le64_to_cpu(ro_node(s)->keys[i]); 1347f164e690SJoe Thornber else 1348f164e690SJoe Thornber *result_key = le64_to_cpu(ro_node(s)->keys[0]); 1349f164e690SJoe Thornber 13507d1fedb6SVinothkumar Raja if (next_block || flags & INTERNAL_NODE) { 13517d1fedb6SVinothkumar Raja if (find_highest) 13523241b1d3SJoe Thornber block = value64(ro_node(s), i); 13537d1fedb6SVinothkumar Raja else 13547d1fedb6SVinothkumar Raja block = value64(ro_node(s), 0); 13557d1fedb6SVinothkumar Raja } 13563241b1d3SJoe Thornber 13573241b1d3SJoe Thornber } while (flags & INTERNAL_NODE); 13583241b1d3SJoe Thornber 13593241b1d3SJoe Thornber if (next_block) 13603241b1d3SJoe Thornber *next_block = block; 13613241b1d3SJoe Thornber return 0; 13623241b1d3SJoe Thornber } 13633241b1d3SJoe Thornber 1364f164e690SJoe Thornber static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root, 1365f164e690SJoe Thornber bool find_highest, uint64_t *result_keys) 13663241b1d3SJoe Thornber { 13673241b1d3SJoe Thornber int r = 0, count = 0, level; 13683241b1d3SJoe Thornber struct ro_spine spine; 13693241b1d3SJoe Thornber 13703241b1d3SJoe Thornber init_ro_spine(&spine, info); 13713241b1d3SJoe Thornber for (level = 0; level < info->levels; level++) { 1372f164e690SJoe Thornber r = find_key(&spine, root, find_highest, result_keys + level, 13733241b1d3SJoe Thornber level == info->levels - 1 ? NULL : &root); 13743241b1d3SJoe Thornber if (r == -ENODATA) { 13753241b1d3SJoe Thornber r = 0; 13763241b1d3SJoe Thornber break; 13773241b1d3SJoe Thornber 13783241b1d3SJoe Thornber } else if (r) 13793241b1d3SJoe Thornber break; 13803241b1d3SJoe Thornber 13813241b1d3SJoe Thornber count++; 13823241b1d3SJoe Thornber } 13833241b1d3SJoe Thornber exit_ro_spine(&spine); 13843241b1d3SJoe Thornber 13853241b1d3SJoe Thornber return r ? r : count; 13863241b1d3SJoe Thornber } 1387f164e690SJoe Thornber 1388f164e690SJoe Thornber int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, 1389f164e690SJoe Thornber uint64_t *result_keys) 1390f164e690SJoe Thornber { 1391f164e690SJoe Thornber return dm_btree_find_key(info, root, true, result_keys); 1392f164e690SJoe Thornber } 13933241b1d3SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); 13944e7f1f90SJoe Thornber 1395f164e690SJoe Thornber int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, 1396f164e690SJoe Thornber uint64_t *result_keys) 1397f164e690SJoe Thornber { 1398f164e690SJoe Thornber return dm_btree_find_key(info, root, false, result_keys); 1399f164e690SJoe Thornber } 1400f164e690SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key); 1401f164e690SJoe Thornber 1402f164e690SJoe Thornber /*----------------------------------------------------------------*/ 1403f164e690SJoe Thornber 14044e7f1f90SJoe Thornber /* 14054e7f1f90SJoe Thornber * FIXME: We shouldn't use a recursive algorithm when we have limited stack 14064e7f1f90SJoe Thornber * space. Also this only works for single level trees. 14074e7f1f90SJoe Thornber */ 14089b460d36SJoe Thornber static int walk_node(struct dm_btree_info *info, dm_block_t block, 14094e7f1f90SJoe Thornber int (*fn)(void *context, uint64_t *keys, void *leaf), 14104e7f1f90SJoe Thornber void *context) 14114e7f1f90SJoe Thornber { 14124e7f1f90SJoe Thornber int r; 14134e7f1f90SJoe Thornber unsigned i, nr; 14149b460d36SJoe Thornber struct dm_block *node; 14154e7f1f90SJoe Thornber struct btree_node *n; 14164e7f1f90SJoe Thornber uint64_t keys; 14174e7f1f90SJoe Thornber 14189b460d36SJoe Thornber r = bn_read_lock(info, block, &node); 14199b460d36SJoe Thornber if (r) 14209b460d36SJoe Thornber return r; 14219b460d36SJoe Thornber 14229b460d36SJoe Thornber n = dm_block_data(node); 14234e7f1f90SJoe Thornber 14244e7f1f90SJoe Thornber nr = le32_to_cpu(n->header.nr_entries); 14254e7f1f90SJoe Thornber for (i = 0; i < nr; i++) { 14264e7f1f90SJoe Thornber if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { 14279b460d36SJoe Thornber r = walk_node(info, value64(n, i), fn, context); 14284e7f1f90SJoe Thornber if (r) 14294e7f1f90SJoe Thornber goto out; 14304e7f1f90SJoe Thornber } else { 14314e7f1f90SJoe Thornber keys = le64_to_cpu(*key_ptr(n, i)); 14324e7f1f90SJoe Thornber r = fn(context, &keys, value_ptr(n, i)); 14334e7f1f90SJoe Thornber if (r) 14344e7f1f90SJoe Thornber goto out; 14354e7f1f90SJoe Thornber } 14364e7f1f90SJoe Thornber } 14374e7f1f90SJoe Thornber 14384e7f1f90SJoe Thornber out: 14399b460d36SJoe Thornber dm_tm_unlock(info->tm, node); 14404e7f1f90SJoe Thornber return r; 14414e7f1f90SJoe Thornber } 14424e7f1f90SJoe Thornber 14434e7f1f90SJoe Thornber int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, 14444e7f1f90SJoe Thornber int (*fn)(void *context, uint64_t *keys, void *leaf), 14454e7f1f90SJoe Thornber void *context) 14464e7f1f90SJoe Thornber { 14474e7f1f90SJoe Thornber BUG_ON(info->levels > 1); 14489b460d36SJoe Thornber return walk_node(info, root, fn, context); 14494e7f1f90SJoe Thornber } 14504e7f1f90SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_walk); 14517d111c81SJoe Thornber 14527d111c81SJoe Thornber /*----------------------------------------------------------------*/ 14537d111c81SJoe Thornber 14547d111c81SJoe Thornber static void prefetch_values(struct dm_btree_cursor *c) 14557d111c81SJoe Thornber { 14567d111c81SJoe Thornber unsigned i, nr; 14577d111c81SJoe Thornber __le64 value_le; 14587d111c81SJoe Thornber struct cursor_node *n = c->nodes + c->depth - 1; 14597d111c81SJoe Thornber struct btree_node *bn = dm_block_data(n->b); 14607d111c81SJoe Thornber struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm); 14617d111c81SJoe Thornber 14627d111c81SJoe Thornber BUG_ON(c->info->value_type.size != sizeof(value_le)); 14637d111c81SJoe Thornber 14647d111c81SJoe Thornber nr = le32_to_cpu(bn->header.nr_entries); 14657d111c81SJoe Thornber for (i = 0; i < nr; i++) { 14667d111c81SJoe Thornber memcpy(&value_le, value_ptr(bn, i), sizeof(value_le)); 14677d111c81SJoe Thornber dm_bm_prefetch(bm, le64_to_cpu(value_le)); 14687d111c81SJoe Thornber } 14697d111c81SJoe Thornber } 14707d111c81SJoe Thornber 14717d111c81SJoe Thornber static bool leaf_node(struct dm_btree_cursor *c) 14727d111c81SJoe Thornber { 14737d111c81SJoe Thornber struct cursor_node *n = c->nodes + c->depth - 1; 14747d111c81SJoe Thornber struct btree_node *bn = dm_block_data(n->b); 14757d111c81SJoe Thornber 14767d111c81SJoe Thornber return le32_to_cpu(bn->header.flags) & LEAF_NODE; 14777d111c81SJoe Thornber } 14787d111c81SJoe Thornber 14797d111c81SJoe Thornber static int push_node(struct dm_btree_cursor *c, dm_block_t b) 14807d111c81SJoe Thornber { 14817d111c81SJoe Thornber int r; 14827d111c81SJoe Thornber struct cursor_node *n = c->nodes + c->depth; 14837d111c81SJoe Thornber 14847d111c81SJoe Thornber if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) { 14857d111c81SJoe Thornber DMERR("couldn't push cursor node, stack depth too high"); 14867d111c81SJoe Thornber return -EINVAL; 14877d111c81SJoe Thornber } 14887d111c81SJoe Thornber 14897d111c81SJoe Thornber r = bn_read_lock(c->info, b, &n->b); 14907d111c81SJoe Thornber if (r) 14917d111c81SJoe Thornber return r; 14927d111c81SJoe Thornber 14937d111c81SJoe Thornber n->index = 0; 14947d111c81SJoe Thornber c->depth++; 14957d111c81SJoe Thornber 14967d111c81SJoe Thornber if (c->prefetch_leaves || !leaf_node(c)) 14977d111c81SJoe Thornber prefetch_values(c); 14987d111c81SJoe Thornber 14997d111c81SJoe Thornber return 0; 15007d111c81SJoe Thornber } 15017d111c81SJoe Thornber 15027d111c81SJoe Thornber static void pop_node(struct dm_btree_cursor *c) 15037d111c81SJoe Thornber { 15047d111c81SJoe Thornber c->depth--; 15057d111c81SJoe Thornber unlock_block(c->info, c->nodes[c->depth].b); 15067d111c81SJoe Thornber } 15077d111c81SJoe Thornber 15087d111c81SJoe Thornber static int inc_or_backtrack(struct dm_btree_cursor *c) 15097d111c81SJoe Thornber { 15107d111c81SJoe Thornber struct cursor_node *n; 15117d111c81SJoe Thornber struct btree_node *bn; 15127d111c81SJoe Thornber 15137d111c81SJoe Thornber for (;;) { 15147d111c81SJoe Thornber if (!c->depth) 15157d111c81SJoe Thornber return -ENODATA; 15167d111c81SJoe Thornber 15177d111c81SJoe Thornber n = c->nodes + c->depth - 1; 15187d111c81SJoe Thornber bn = dm_block_data(n->b); 15197d111c81SJoe Thornber 15207d111c81SJoe Thornber n->index++; 15217d111c81SJoe Thornber if (n->index < le32_to_cpu(bn->header.nr_entries)) 15227d111c81SJoe Thornber break; 15237d111c81SJoe Thornber 15247d111c81SJoe Thornber pop_node(c); 15257d111c81SJoe Thornber } 15267d111c81SJoe Thornber 15277d111c81SJoe Thornber return 0; 15287d111c81SJoe Thornber } 15297d111c81SJoe Thornber 15307d111c81SJoe Thornber static int find_leaf(struct dm_btree_cursor *c) 15317d111c81SJoe Thornber { 15327d111c81SJoe Thornber int r = 0; 15337d111c81SJoe Thornber struct cursor_node *n; 15347d111c81SJoe Thornber struct btree_node *bn; 15357d111c81SJoe Thornber __le64 value_le; 15367d111c81SJoe Thornber 15377d111c81SJoe Thornber for (;;) { 15387d111c81SJoe Thornber n = c->nodes + c->depth - 1; 15397d111c81SJoe Thornber bn = dm_block_data(n->b); 15407d111c81SJoe Thornber 15417d111c81SJoe Thornber if (le32_to_cpu(bn->header.flags) & LEAF_NODE) 15427d111c81SJoe Thornber break; 15437d111c81SJoe Thornber 15447d111c81SJoe Thornber memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le)); 15457d111c81SJoe Thornber r = push_node(c, le64_to_cpu(value_le)); 15467d111c81SJoe Thornber if (r) { 15477d111c81SJoe Thornber DMERR("push_node failed"); 15487d111c81SJoe Thornber break; 15497d111c81SJoe Thornber } 15507d111c81SJoe Thornber } 15517d111c81SJoe Thornber 15527d111c81SJoe Thornber if (!r && (le32_to_cpu(bn->header.nr_entries) == 0)) 15537d111c81SJoe Thornber return -ENODATA; 15547d111c81SJoe Thornber 15557d111c81SJoe Thornber return r; 15567d111c81SJoe Thornber } 15577d111c81SJoe Thornber 15587d111c81SJoe Thornber int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, 15597d111c81SJoe Thornber bool prefetch_leaves, struct dm_btree_cursor *c) 15607d111c81SJoe Thornber { 15617d111c81SJoe Thornber int r; 15627d111c81SJoe Thornber 15637d111c81SJoe Thornber c->info = info; 15647d111c81SJoe Thornber c->root = root; 15657d111c81SJoe Thornber c->depth = 0; 15667d111c81SJoe Thornber c->prefetch_leaves = prefetch_leaves; 15677d111c81SJoe Thornber 15687d111c81SJoe Thornber r = push_node(c, root); 15697d111c81SJoe Thornber if (r) 15707d111c81SJoe Thornber return r; 15717d111c81SJoe Thornber 15727d111c81SJoe Thornber return find_leaf(c); 15737d111c81SJoe Thornber } 15747d111c81SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_cursor_begin); 15757d111c81SJoe Thornber 15767d111c81SJoe Thornber void dm_btree_cursor_end(struct dm_btree_cursor *c) 15777d111c81SJoe Thornber { 15787d111c81SJoe Thornber while (c->depth) 15797d111c81SJoe Thornber pop_node(c); 15807d111c81SJoe Thornber } 15817d111c81SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_cursor_end); 15827d111c81SJoe Thornber 15837d111c81SJoe Thornber int dm_btree_cursor_next(struct dm_btree_cursor *c) 15847d111c81SJoe Thornber { 15857d111c81SJoe Thornber int r = inc_or_backtrack(c); 15867d111c81SJoe Thornber if (!r) { 15877d111c81SJoe Thornber r = find_leaf(c); 15887d111c81SJoe Thornber if (r) 15897d111c81SJoe Thornber DMERR("find_leaf failed"); 15907d111c81SJoe Thornber } 15917d111c81SJoe Thornber 15927d111c81SJoe Thornber return r; 15937d111c81SJoe Thornber } 15947d111c81SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_cursor_next); 15957d111c81SJoe Thornber 15969b696229SJoe Thornber int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count) 15979b696229SJoe Thornber { 15989b696229SJoe Thornber int r = 0; 15999b696229SJoe Thornber 16009b696229SJoe Thornber while (count-- && !r) 16019b696229SJoe Thornber r = dm_btree_cursor_next(c); 16029b696229SJoe Thornber 16039b696229SJoe Thornber return r; 16049b696229SJoe Thornber } 16059b696229SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_cursor_skip); 16069b696229SJoe Thornber 16077d111c81SJoe Thornber int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le) 16087d111c81SJoe Thornber { 16097d111c81SJoe Thornber if (c->depth) { 16107d111c81SJoe Thornber struct cursor_node *n = c->nodes + c->depth - 1; 16117d111c81SJoe Thornber struct btree_node *bn = dm_block_data(n->b); 16127d111c81SJoe Thornber 16137d111c81SJoe Thornber if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE) 16147d111c81SJoe Thornber return -EINVAL; 16157d111c81SJoe Thornber 16167d111c81SJoe Thornber *key = le64_to_cpu(*key_ptr(bn, n->index)); 16177d111c81SJoe Thornber memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size); 16187d111c81SJoe Thornber return 0; 16197d111c81SJoe Thornber 16207d111c81SJoe Thornber } else 16217d111c81SJoe Thornber return -ENODATA; 16227d111c81SJoe Thornber } 16237d111c81SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value); 1624