1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/slab.h>
4 #include <trace/events/btrfs.h>
5 #include "messages.h"
6 #include "ctree.h"
7 #include "extent_io.h"
8 #include "extent-io-tree.h"
9 #include "btrfs_inode.h"
10
11 static struct kmem_cache *extent_state_cache;
12
extent_state_in_tree(const struct extent_state * state)13 static inline bool extent_state_in_tree(const struct extent_state *state)
14 {
15 return !RB_EMPTY_NODE(&state->rb_node);
16 }
17
18 #ifdef CONFIG_BTRFS_DEBUG
19 static LIST_HEAD(states);
20 static DEFINE_SPINLOCK(leak_lock);
21
btrfs_leak_debug_add_state(struct extent_state * state)22 static inline void btrfs_leak_debug_add_state(struct extent_state *state)
23 {
24 unsigned long flags;
25
26 spin_lock_irqsave(&leak_lock, flags);
27 list_add(&state->leak_list, &states);
28 spin_unlock_irqrestore(&leak_lock, flags);
29 }
30
btrfs_leak_debug_del_state(struct extent_state * state)31 static inline void btrfs_leak_debug_del_state(struct extent_state *state)
32 {
33 unsigned long flags;
34
35 spin_lock_irqsave(&leak_lock, flags);
36 list_del(&state->leak_list);
37 spin_unlock_irqrestore(&leak_lock, flags);
38 }
39
btrfs_extent_state_leak_debug_check(void)40 static inline void btrfs_extent_state_leak_debug_check(void)
41 {
42 struct extent_state *state;
43
44 while (!list_empty(&states)) {
45 state = list_first_entry(&states, struct extent_state, leak_list);
46 btrfs_err(NULL,
47 "state leak: start %llu end %llu state %u in tree %d refs %d",
48 state->start, state->end, state->state,
49 extent_state_in_tree(state),
50 refcount_read(&state->refs));
51 list_del(&state->leak_list);
52 WARN_ON_ONCE(1);
53 kmem_cache_free(extent_state_cache, state);
54 }
55 }
56
57 #define btrfs_debug_check_extent_io_range(tree, start, end) \
58 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
__btrfs_debug_check_extent_io_range(const char * caller,struct extent_io_tree * tree,u64 start,u64 end)59 static inline void __btrfs_debug_check_extent_io_range(const char *caller,
60 struct extent_io_tree *tree,
61 u64 start, u64 end)
62 {
63 const struct btrfs_inode *inode = tree->inode;
64 u64 isize;
65
66 if (tree->owner != IO_TREE_INODE_IO)
67 return;
68
69 isize = i_size_read(&inode->vfs_inode);
70 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
71 btrfs_debug_rl(inode->root->fs_info,
72 "%s: ino %llu isize %llu odd range [%llu,%llu]",
73 caller, btrfs_ino(inode), isize, start, end);
74 }
75 }
76 #else
77 #define btrfs_leak_debug_add_state(state) do {} while (0)
78 #define btrfs_leak_debug_del_state(state) do {} while (0)
79 #define btrfs_extent_state_leak_debug_check() do {} while (0)
80 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
81 #endif
82
83 /* Read-only access to the inode. */
btrfs_extent_io_tree_to_inode(const struct extent_io_tree * tree)84 const struct btrfs_inode *btrfs_extent_io_tree_to_inode(const struct extent_io_tree *tree)
85 {
86 if (tree->owner == IO_TREE_INODE_IO)
87 return tree->inode;
88 return NULL;
89 }
90
91 /* For read-only access to fs_info. */
btrfs_extent_io_tree_to_fs_info(const struct extent_io_tree * tree)92 const struct btrfs_fs_info *btrfs_extent_io_tree_to_fs_info(const struct extent_io_tree *tree)
93 {
94 if (tree->owner == IO_TREE_INODE_IO)
95 return tree->inode->root->fs_info;
96 return tree->fs_info;
97 }
98
btrfs_extent_io_tree_init(struct btrfs_fs_info * fs_info,struct extent_io_tree * tree,unsigned int owner)99 void btrfs_extent_io_tree_init(struct btrfs_fs_info *fs_info,
100 struct extent_io_tree *tree, unsigned int owner)
101 {
102 tree->state = RB_ROOT;
103 spin_lock_init(&tree->lock);
104 tree->fs_info = fs_info;
105 tree->owner = owner;
106 }
107
108 /*
109 * Empty an io tree, removing and freeing every extent state record from the
110 * tree. This should be called once we are sure no other task can access the
111 * tree anymore, so no tree updates happen after we empty the tree and there
112 * aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never
113 * set on any extent state when calling this function).
114 */
btrfs_extent_io_tree_release(struct extent_io_tree * tree)115 void btrfs_extent_io_tree_release(struct extent_io_tree *tree)
116 {
117 struct rb_root root;
118 struct extent_state *state;
119 struct extent_state *tmp;
120
121 spin_lock(&tree->lock);
122 root = tree->state;
123 tree->state = RB_ROOT;
124 rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) {
125 /* Clear node to keep free_extent_state() happy. */
126 RB_CLEAR_NODE(&state->rb_node);
127 ASSERT(!(state->state & EXTENT_LOCK_BITS));
128 /*
129 * No need for a memory barrier here, as we are holding the tree
130 * lock and we only change the waitqueue while holding that lock
131 * (see wait_extent_bit()).
132 */
133 ASSERT(!waitqueue_active(&state->wq));
134 btrfs_free_extent_state(state);
135 cond_resched_lock(&tree->lock);
136 }
137 /*
138 * Should still be empty even after a reschedule, no other task should
139 * be accessing the tree anymore.
140 */
141 ASSERT(RB_EMPTY_ROOT(&tree->state));
142 spin_unlock(&tree->lock);
143 }
144
alloc_extent_state(gfp_t mask)145 static struct extent_state *alloc_extent_state(gfp_t mask)
146 {
147 struct extent_state *state;
148
149 /*
150 * The given mask might be not appropriate for the slab allocator,
151 * drop the unsupported bits
152 */
153 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
154 state = kmem_cache_alloc(extent_state_cache, mask);
155 if (!state)
156 return state;
157 state->state = 0;
158 RB_CLEAR_NODE(&state->rb_node);
159 btrfs_leak_debug_add_state(state);
160 refcount_set(&state->refs, 1);
161 init_waitqueue_head(&state->wq);
162 trace_btrfs_alloc_extent_state(state, mask, _RET_IP_);
163 return state;
164 }
165
alloc_extent_state_atomic(struct extent_state * prealloc)166 static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc)
167 {
168 if (!prealloc)
169 prealloc = alloc_extent_state(GFP_ATOMIC);
170
171 return prealloc;
172 }
173
btrfs_free_extent_state(struct extent_state * state)174 void btrfs_free_extent_state(struct extent_state *state)
175 {
176 if (!state)
177 return;
178 if (refcount_dec_and_test(&state->refs)) {
179 WARN_ON(extent_state_in_tree(state));
180 btrfs_leak_debug_del_state(state);
181 trace_btrfs_free_extent_state(state, _RET_IP_);
182 kmem_cache_free(extent_state_cache, state);
183 }
184 }
185
add_extent_changeset(struct extent_state * state,u32 bits,struct extent_changeset * changeset,int set)186 static int add_extent_changeset(struct extent_state *state, u32 bits,
187 struct extent_changeset *changeset,
188 int set)
189 {
190 if (!changeset)
191 return 0;
192 if (set && (state->state & bits) == bits)
193 return 0;
194 if (!set && (state->state & bits) == 0)
195 return 0;
196 changeset->bytes_changed += state->end - state->start + 1;
197
198 return ulist_add(&changeset->range_changed, state->start, state->end, GFP_ATOMIC);
199 }
200
next_state(struct extent_state * state)201 static inline struct extent_state *next_state(struct extent_state *state)
202 {
203 struct rb_node *next = rb_next(&state->rb_node);
204
205 return rb_entry_safe(next, struct extent_state, rb_node);
206 }
207
prev_state(struct extent_state * state)208 static inline struct extent_state *prev_state(struct extent_state *state)
209 {
210 struct rb_node *next = rb_prev(&state->rb_node);
211
212 return rb_entry_safe(next, struct extent_state, rb_node);
213 }
214
215 /*
216 * Search @tree for an entry that contains @offset or if none exists for the
217 * first entry that starts and ends after that offset.
218 *
219 * @tree: the tree to search
220 * @offset: search offset
221 * @node_ret: pointer where new node should be anchored (used when inserting an
222 * entry in the tree)
223 * @parent_ret: points to entry which would have been the parent of the entry,
224 * containing @offset
225 *
226 * Return a pointer to the entry that contains @offset byte address.
227 *
228 * If no such entry exists, return the first entry that starts and ends after
229 * @offset if one exists, otherwise NULL.
230 *
231 * If the returned entry starts at @offset, then @node_ret and @parent_ret
232 * aren't changed.
233 */
tree_search_for_insert(struct extent_io_tree * tree,u64 offset,struct rb_node *** node_ret,struct rb_node ** parent_ret)234 static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree,
235 u64 offset,
236 struct rb_node ***node_ret,
237 struct rb_node **parent_ret)
238 {
239 struct rb_root *root = &tree->state;
240 struct rb_node **node = &root->rb_node;
241 struct rb_node *prev = NULL;
242 struct extent_state *entry = NULL;
243
244 while (*node) {
245 prev = *node;
246 entry = rb_entry(prev, struct extent_state, rb_node);
247
248 if (offset < entry->start)
249 node = &(*node)->rb_left;
250 else if (offset > entry->end)
251 node = &(*node)->rb_right;
252 else
253 return entry;
254 }
255
256 if (node_ret)
257 *node_ret = node;
258 if (parent_ret)
259 *parent_ret = prev;
260
261 /*
262 * Return either the current entry if it contains offset (it ends after
263 * or at offset) or the first entry that starts and ends after offset if
264 * one exists, or NULL.
265 */
266 while (entry && offset > entry->end)
267 entry = next_state(entry);
268
269 return entry;
270 }
271
272 /*
273 * Search offset in the tree or fill neighbor rbtree node pointers.
274 *
275 * @tree: the tree to search
276 * @offset: offset that should fall within an entry in @tree
277 * @next_ret: pointer to the first entry whose range ends after @offset
278 * @prev_ret: pointer to the first entry whose range begins before @offset
279 *
280 * Return a pointer to the entry that contains @offset byte address. If no
281 * such entry exists, then return NULL and fill @prev_ret and @next_ret.
282 * Otherwise return the found entry and other pointers are left untouched.
283 */
tree_search_prev_next(struct extent_io_tree * tree,u64 offset,struct extent_state ** prev_ret,struct extent_state ** next_ret)284 static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree,
285 u64 offset,
286 struct extent_state **prev_ret,
287 struct extent_state **next_ret)
288 {
289 struct rb_root *root = &tree->state;
290 struct rb_node **node = &root->rb_node;
291 struct extent_state *orig_prev;
292 struct extent_state *entry = NULL;
293
294 ASSERT(prev_ret);
295 ASSERT(next_ret);
296
297 while (*node) {
298 entry = rb_entry(*node, struct extent_state, rb_node);
299
300 if (offset < entry->start)
301 node = &(*node)->rb_left;
302 else if (offset > entry->end)
303 node = &(*node)->rb_right;
304 else
305 return entry;
306 }
307
308 orig_prev = entry;
309 while (entry && offset > entry->end)
310 entry = next_state(entry);
311 *next_ret = entry;
312 entry = orig_prev;
313
314 while (entry && offset < entry->start)
315 entry = prev_state(entry);
316 *prev_ret = entry;
317
318 return NULL;
319 }
320
321 /*
322 * Inexact rb-tree search, return the next entry if @offset is not found
323 */
tree_search(struct extent_io_tree * tree,u64 offset)324 static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset)
325 {
326 return tree_search_for_insert(tree, offset, NULL, NULL);
327 }
328
extent_io_tree_panic(const struct extent_io_tree * tree,const struct extent_state * state,const char * opname,int err)329 static void __cold extent_io_tree_panic(const struct extent_io_tree *tree,
330 const struct extent_state *state,
331 const char *opname,
332 int err)
333 {
334 btrfs_panic(btrfs_extent_io_tree_to_fs_info(tree), err,
335 "extent io tree error on %s state start %llu end %llu",
336 opname, state->start, state->end);
337 }
338
merge_prev_state(struct extent_io_tree * tree,struct extent_state * state)339 static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state)
340 {
341 struct extent_state *prev;
342
343 prev = prev_state(state);
344 if (prev && prev->end == state->start - 1 && prev->state == state->state) {
345 if (tree->owner == IO_TREE_INODE_IO)
346 btrfs_merge_delalloc_extent(tree->inode, state, prev);
347 state->start = prev->start;
348 rb_erase(&prev->rb_node, &tree->state);
349 RB_CLEAR_NODE(&prev->rb_node);
350 btrfs_free_extent_state(prev);
351 }
352 }
353
merge_next_state(struct extent_io_tree * tree,struct extent_state * state)354 static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state)
355 {
356 struct extent_state *next;
357
358 next = next_state(state);
359 if (next && next->start == state->end + 1 && next->state == state->state) {
360 if (tree->owner == IO_TREE_INODE_IO)
361 btrfs_merge_delalloc_extent(tree->inode, state, next);
362 state->end = next->end;
363 rb_erase(&next->rb_node, &tree->state);
364 RB_CLEAR_NODE(&next->rb_node);
365 btrfs_free_extent_state(next);
366 }
367 }
368
369 /*
370 * Utility function to look for merge candidates inside a given range. Any
371 * extents with matching state are merged together into a single extent in the
372 * tree. Extents with EXTENT_IO in their state field are not merged because
373 * the end_io handlers need to be able to do operations on them without
374 * sleeping (or doing allocations/splits).
375 *
376 * This should be called with the tree lock held.
377 */
merge_state(struct extent_io_tree * tree,struct extent_state * state)378 static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
379 {
380 if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY))
381 return;
382
383 merge_prev_state(tree, state);
384 merge_next_state(tree, state);
385 }
386
set_state_bits(struct extent_io_tree * tree,struct extent_state * state,u32 bits,struct extent_changeset * changeset)387 static void set_state_bits(struct extent_io_tree *tree,
388 struct extent_state *state,
389 u32 bits, struct extent_changeset *changeset)
390 {
391 u32 bits_to_set = bits & ~EXTENT_CTLBITS;
392 int ret;
393
394 if (tree->owner == IO_TREE_INODE_IO)
395 btrfs_set_delalloc_extent(tree->inode, state, bits);
396
397 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
398 BUG_ON(ret < 0);
399 state->state |= bits_to_set;
400 }
401
402 /*
403 * Insert an extent_state struct into the tree. 'bits' are set on the
404 * struct before it is inserted.
405 *
406 * Returns a pointer to the struct extent_state record containing the range
407 * requested for insertion, which may be the same as the given struct or it
408 * may be an existing record in the tree that was expanded to accommodate the
409 * requested range. In case of an extent_state different from the one that was
410 * given, the later can be freed or reused by the caller.
411 *
412 * On error it returns an error pointer.
413 *
414 * The tree lock is not taken internally. This is a utility function and
415 * probably isn't what you want to call (see set/clear_extent_bit).
416 */
insert_state(struct extent_io_tree * tree,struct extent_state * state,u32 bits,struct extent_changeset * changeset)417 static struct extent_state *insert_state(struct extent_io_tree *tree,
418 struct extent_state *state,
419 u32 bits,
420 struct extent_changeset *changeset)
421 {
422 struct rb_node **node;
423 struct rb_node *parent = NULL;
424 const u64 start = state->start - 1;
425 const u64 end = state->end + 1;
426 const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY));
427
428 set_state_bits(tree, state, bits, changeset);
429
430 node = &tree->state.rb_node;
431 while (*node) {
432 struct extent_state *entry;
433
434 parent = *node;
435 entry = rb_entry(parent, struct extent_state, rb_node);
436
437 if (state->end < entry->start) {
438 if (try_merge && end == entry->start &&
439 state->state == entry->state) {
440 if (tree->owner == IO_TREE_INODE_IO)
441 btrfs_merge_delalloc_extent(tree->inode,
442 state, entry);
443 entry->start = state->start;
444 merge_prev_state(tree, entry);
445 state->state = 0;
446 return entry;
447 }
448 node = &(*node)->rb_left;
449 } else if (state->end > entry->end) {
450 if (try_merge && entry->end == start &&
451 state->state == entry->state) {
452 if (tree->owner == IO_TREE_INODE_IO)
453 btrfs_merge_delalloc_extent(tree->inode,
454 state, entry);
455 entry->end = state->end;
456 merge_next_state(tree, entry);
457 state->state = 0;
458 return entry;
459 }
460 node = &(*node)->rb_right;
461 } else {
462 return ERR_PTR(-EEXIST);
463 }
464 }
465
466 rb_link_node(&state->rb_node, parent, node);
467 rb_insert_color(&state->rb_node, &tree->state);
468
469 return state;
470 }
471
472 /*
473 * Insert state to @tree to the location given by @node and @parent.
474 */
insert_state_fast(struct extent_io_tree * tree,struct extent_state * state,struct rb_node ** node,struct rb_node * parent,unsigned bits,struct extent_changeset * changeset)475 static void insert_state_fast(struct extent_io_tree *tree,
476 struct extent_state *state, struct rb_node **node,
477 struct rb_node *parent, unsigned bits,
478 struct extent_changeset *changeset)
479 {
480 set_state_bits(tree, state, bits, changeset);
481 rb_link_node(&state->rb_node, parent, node);
482 rb_insert_color(&state->rb_node, &tree->state);
483 merge_state(tree, state);
484 }
485
486 /*
487 * Split a given extent state struct in two, inserting the preallocated
488 * struct 'prealloc' as the newly created second half. 'split' indicates an
489 * offset inside 'orig' where it should be split.
490 *
491 * Before calling,
492 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
493 * are two extent state structs in the tree:
494 * prealloc: [orig->start, split - 1]
495 * orig: [ split, orig->end ]
496 *
497 * The tree locks are not taken by this function. They need to be held
498 * by the caller.
499 */
split_state(struct extent_io_tree * tree,struct extent_state * orig,struct extent_state * prealloc,u64 split)500 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
501 struct extent_state *prealloc, u64 split)
502 {
503 struct rb_node *parent = NULL;
504 struct rb_node **node;
505
506 if (tree->owner == IO_TREE_INODE_IO)
507 btrfs_split_delalloc_extent(tree->inode, orig, split);
508
509 prealloc->start = orig->start;
510 prealloc->end = split - 1;
511 prealloc->state = orig->state;
512 orig->start = split;
513
514 parent = &orig->rb_node;
515 node = &parent;
516 while (*node) {
517 struct extent_state *entry;
518
519 parent = *node;
520 entry = rb_entry(parent, struct extent_state, rb_node);
521
522 if (prealloc->end < entry->start) {
523 node = &(*node)->rb_left;
524 } else if (prealloc->end > entry->end) {
525 node = &(*node)->rb_right;
526 } else {
527 btrfs_free_extent_state(prealloc);
528 return -EEXIST;
529 }
530 }
531
532 rb_link_node(&prealloc->rb_node, parent, node);
533 rb_insert_color(&prealloc->rb_node, &tree->state);
534
535 return 0;
536 }
537
538 /*
539 * Use this during tree iteration to avoid doing next node searches when it's
540 * not needed (the current record ends at or after the target range's end).
541 */
next_search_state(struct extent_state * state,u64 end)542 static inline struct extent_state *next_search_state(struct extent_state *state, u64 end)
543 {
544 if (state->end < end)
545 return next_state(state);
546
547 return NULL;
548 }
549
550 /*
551 * Utility function to clear some bits in an extent state struct. It will
552 * optionally wake up anyone waiting on this state (wake == 1).
553 *
554 * If no bits are set on the state struct after clearing things, the
555 * struct is freed and removed from the tree
556 */
clear_state_bit(struct extent_io_tree * tree,struct extent_state * state,u32 bits,int wake,u64 end,struct extent_changeset * changeset)557 static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
558 struct extent_state *state,
559 u32 bits, int wake, u64 end,
560 struct extent_changeset *changeset)
561 {
562 struct extent_state *next;
563 u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
564 int ret;
565
566 if (tree->owner == IO_TREE_INODE_IO)
567 btrfs_clear_delalloc_extent(tree->inode, state, bits);
568
569 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
570 BUG_ON(ret < 0);
571 state->state &= ~bits_to_clear;
572 if (wake)
573 wake_up(&state->wq);
574 if (state->state == 0) {
575 next = next_search_state(state, end);
576 if (extent_state_in_tree(state)) {
577 rb_erase(&state->rb_node, &tree->state);
578 RB_CLEAR_NODE(&state->rb_node);
579 btrfs_free_extent_state(state);
580 } else {
581 WARN_ON(1);
582 }
583 } else {
584 merge_state(tree, state);
585 next = next_search_state(state, end);
586 }
587 return next;
588 }
589
590 /*
591 * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
592 * unset the EXTENT_NOWAIT bit.
593 */
set_gfp_mask_from_bits(u32 * bits,gfp_t * mask)594 static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
595 {
596 *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
597 *bits &= EXTENT_NOWAIT - 1;
598 }
599
600 /*
601 * Clear some bits on a range in the tree. This may require splitting or
602 * inserting elements in the tree, so the gfp mask is used to indicate which
603 * allocations or sleeping are allowed.
604 *
605 * The range [start, end] is inclusive.
606 *
607 * This takes the tree lock, and returns 0 on success and < 0 on error.
608 */
btrfs_clear_extent_bit_changeset(struct extent_io_tree * tree,u64 start,u64 end,u32 bits,struct extent_state ** cached_state,struct extent_changeset * changeset)609 int btrfs_clear_extent_bit_changeset(struct extent_io_tree *tree, u64 start, u64 end,
610 u32 bits, struct extent_state **cached_state,
611 struct extent_changeset *changeset)
612 {
613 struct extent_state *state;
614 struct extent_state *cached;
615 struct extent_state *prealloc = NULL;
616 u64 last_end;
617 int ret = 0;
618 bool clear;
619 bool wake;
620 const bool delete = (bits & EXTENT_CLEAR_ALL_BITS);
621 gfp_t mask;
622
623 set_gfp_mask_from_bits(&bits, &mask);
624 btrfs_debug_check_extent_io_range(tree, start, end);
625 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
626
627 if (delete)
628 bits |= ~EXTENT_CTLBITS;
629
630 if (bits & EXTENT_DELALLOC)
631 bits |= EXTENT_NORESERVE;
632
633 wake = (bits & EXTENT_LOCK_BITS);
634 clear = (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY));
635 again:
636 if (!prealloc) {
637 /*
638 * Don't care for allocation failure here because we might end
639 * up not needing the pre-allocated extent state at all, which
640 * is the case if we only have in the tree extent states that
641 * cover our input range and don't cover too any other range.
642 * If we end up needing a new extent state we allocate it later.
643 */
644 prealloc = alloc_extent_state(mask);
645 }
646
647 spin_lock(&tree->lock);
648 if (cached_state) {
649 cached = *cached_state;
650
651 if (clear) {
652 *cached_state = NULL;
653 cached_state = NULL;
654 }
655
656 if (cached && extent_state_in_tree(cached) &&
657 cached->start <= start && cached->end > start) {
658 if (clear)
659 refcount_dec(&cached->refs);
660 state = cached;
661 goto hit_next;
662 }
663 if (clear)
664 btrfs_free_extent_state(cached);
665 }
666
667 /* This search will find the extents that end after our range starts. */
668 state = tree_search(tree, start);
669 if (!state)
670 goto out;
671 hit_next:
672 if (state->start > end)
673 goto out;
674 WARN_ON(state->end < start);
675 last_end = state->end;
676
677 /* The state doesn't have the wanted bits, go ahead. */
678 if (!(state->state & bits)) {
679 state = next_search_state(state, end);
680 goto next;
681 }
682
683 /*
684 * | ---- desired range ---- |
685 * | state | or
686 * | ------------- state -------------- |
687 *
688 * We need to split the extent we found, and may flip bits on second
689 * half.
690 *
691 * If the extent we found extends past our range, we just split and
692 * search again. It'll get split again the next time though.
693 *
694 * If the extent we found is inside our range, we clear the desired bit
695 * on it.
696 */
697
698 if (state->start < start) {
699 prealloc = alloc_extent_state_atomic(prealloc);
700 if (!prealloc)
701 goto search_again;
702 ret = split_state(tree, state, prealloc, start);
703 prealloc = NULL;
704 if (ret) {
705 extent_io_tree_panic(tree, state, "split", ret);
706 goto out;
707 }
708 if (state->end <= end) {
709 state = clear_state_bit(tree, state, bits, wake, end,
710 changeset);
711 goto next;
712 }
713 if (need_resched())
714 goto search_again;
715 /*
716 * Fallthrough and try atomic extent state allocation if needed.
717 * If it fails we'll jump to 'search_again' retry the allocation
718 * in non-atomic mode and start the search again.
719 */
720 }
721 /*
722 * | ---- desired range ---- |
723 * | state |
724 * We need to split the extent, and clear the bit on the first half.
725 */
726 if (state->start <= end && state->end > end) {
727 prealloc = alloc_extent_state_atomic(prealloc);
728 if (!prealloc)
729 goto search_again;
730 ret = split_state(tree, state, prealloc, end + 1);
731 if (ret) {
732 extent_io_tree_panic(tree, state, "split", ret);
733 prealloc = NULL;
734 goto out;
735 }
736
737 if (wake)
738 wake_up(&state->wq);
739
740 clear_state_bit(tree, prealloc, bits, wake, end, changeset);
741
742 prealloc = NULL;
743 goto out;
744 }
745
746 state = clear_state_bit(tree, state, bits, wake, end, changeset);
747 next:
748 if (last_end >= end)
749 goto out;
750 start = last_end + 1;
751 if (state && !need_resched())
752 goto hit_next;
753
754 search_again:
755 spin_unlock(&tree->lock);
756 if (gfpflags_allow_blocking(mask))
757 cond_resched();
758 goto again;
759
760 out:
761 spin_unlock(&tree->lock);
762 btrfs_free_extent_state(prealloc);
763
764 return ret;
765
766 }
767
768 /*
769 * Wait for one or more bits to clear on a range in the state tree.
770 * The range [start, end] is inclusive.
771 * The tree lock is taken by this function
772 */
wait_extent_bit(struct extent_io_tree * tree,u64 start,u64 end,u32 bits,struct extent_state ** cached_state)773 static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
774 u32 bits, struct extent_state **cached_state)
775 {
776 struct extent_state *state;
777
778 btrfs_debug_check_extent_io_range(tree, start, end);
779
780 spin_lock(&tree->lock);
781 again:
782 /*
783 * Maintain cached_state, as we may not remove it from the tree if there
784 * are more bits than the bits we're waiting on set on this state.
785 */
786 if (cached_state && *cached_state) {
787 state = *cached_state;
788 if (extent_state_in_tree(state) &&
789 state->start <= start && start < state->end)
790 goto process_node;
791 }
792 while (1) {
793 /*
794 * This search will find all the extents that end after our
795 * range starts.
796 */
797 state = tree_search(tree, start);
798 process_node:
799 if (!state)
800 break;
801 if (state->start > end)
802 goto out;
803
804 if (state->state & bits) {
805 DEFINE_WAIT(wait);
806
807 start = state->start;
808 refcount_inc(&state->refs);
809 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
810 spin_unlock(&tree->lock);
811 schedule();
812 spin_lock(&tree->lock);
813 finish_wait(&state->wq, &wait);
814 btrfs_free_extent_state(state);
815 goto again;
816 }
817 start = state->end + 1;
818
819 if (start > end)
820 break;
821
822 if (!cond_resched_lock(&tree->lock)) {
823 state = next_state(state);
824 goto process_node;
825 }
826 }
827 out:
828 /* This state is no longer useful, clear it and free it up. */
829 if (cached_state && *cached_state) {
830 state = *cached_state;
831 *cached_state = NULL;
832 btrfs_free_extent_state(state);
833 }
834 spin_unlock(&tree->lock);
835 }
836
cache_state_if_flags(struct extent_state * state,struct extent_state ** cached_ptr,unsigned flags)837 static void cache_state_if_flags(struct extent_state *state,
838 struct extent_state **cached_ptr,
839 unsigned flags)
840 {
841 if (cached_ptr && !(*cached_ptr)) {
842 if (!flags || (state->state & flags)) {
843 *cached_ptr = state;
844 refcount_inc(&state->refs);
845 }
846 }
847 }
848
cache_state(struct extent_state * state,struct extent_state ** cached_ptr)849 static void cache_state(struct extent_state *state,
850 struct extent_state **cached_ptr)
851 {
852 return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY);
853 }
854
855 /*
856 * Find the first state struct with 'bits' set after 'start', and return it.
857 * tree->lock must be held. NULL will returned if nothing was found after
858 * 'start'.
859 */
find_first_extent_bit_state(struct extent_io_tree * tree,u64 start,u32 bits)860 static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
861 u64 start, u32 bits)
862 {
863 struct extent_state *state;
864
865 /*
866 * This search will find all the extents that end after our range
867 * starts.
868 */
869 state = tree_search(tree, start);
870 while (state) {
871 if (state->state & bits)
872 return state;
873 state = next_state(state);
874 }
875 return NULL;
876 }
877
878 /*
879 * Find the first offset in the io tree with one or more @bits set.
880 *
881 * Note: If there are multiple bits set in @bits, any of them will match.
882 *
883 * Return true if we find something, and update @start_ret and @end_ret.
884 * Return false if we found nothing.
885 */
btrfs_find_first_extent_bit(struct extent_io_tree * tree,u64 start,u64 * start_ret,u64 * end_ret,u32 bits,struct extent_state ** cached_state)886 bool btrfs_find_first_extent_bit(struct extent_io_tree *tree, u64 start,
887 u64 *start_ret, u64 *end_ret, u32 bits,
888 struct extent_state **cached_state)
889 {
890 struct extent_state *state;
891 bool ret = false;
892
893 spin_lock(&tree->lock);
894 if (cached_state && *cached_state) {
895 state = *cached_state;
896 if (state->end == start - 1 && extent_state_in_tree(state)) {
897 while ((state = next_state(state)) != NULL) {
898 if (state->state & bits)
899 break;
900 }
901 /*
902 * If we found the next extent state, clear cached_state
903 * so that we can cache the next extent state below and
904 * avoid future calls going over the same extent state
905 * again. If we haven't found any, clear as well since
906 * it's now useless.
907 */
908 btrfs_free_extent_state(*cached_state);
909 *cached_state = NULL;
910 if (state)
911 goto got_it;
912 goto out;
913 }
914 btrfs_free_extent_state(*cached_state);
915 *cached_state = NULL;
916 }
917
918 state = find_first_extent_bit_state(tree, start, bits);
919 got_it:
920 if (state) {
921 cache_state_if_flags(state, cached_state, 0);
922 *start_ret = state->start;
923 *end_ret = state->end;
924 ret = true;
925 }
926 out:
927 spin_unlock(&tree->lock);
928 return ret;
929 }
930
931 /*
932 * Find a contiguous area of bits
933 *
934 * @tree: io tree to check
935 * @start: offset to start the search from
936 * @start_ret: the first offset we found with the bits set
937 * @end_ret: the final contiguous range of the bits that were set
938 * @bits: bits to look for
939 *
940 * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
941 * to set bits appropriately, and then merge them again. During this time it
942 * will drop the tree->lock, so use this helper if you want to find the actual
943 * contiguous area for given bits. We will search to the first bit we find, and
944 * then walk down the tree until we find a non-contiguous area. The area
945 * returned will be the full contiguous area with the bits set.
946 *
947 * Returns true if we found a range with the given bits set, in which case
948 * @start_ret and @end_ret are updated, or false if no range was found.
949 */
btrfs_find_contiguous_extent_bit(struct extent_io_tree * tree,u64 start,u64 * start_ret,u64 * end_ret,u32 bits)950 bool btrfs_find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
951 u64 *start_ret, u64 *end_ret, u32 bits)
952 {
953 struct extent_state *state;
954 bool ret = false;
955
956 ASSERT(!btrfs_fs_incompat(btrfs_extent_io_tree_to_fs_info(tree), NO_HOLES));
957
958 spin_lock(&tree->lock);
959 state = find_first_extent_bit_state(tree, start, bits);
960 if (state) {
961 *start_ret = state->start;
962 *end_ret = state->end;
963 while ((state = next_state(state)) != NULL) {
964 if (state->start > (*end_ret + 1))
965 break;
966 *end_ret = state->end;
967 }
968 ret = true;
969 }
970 spin_unlock(&tree->lock);
971 return ret;
972 }
973
974 /*
975 * Find a contiguous range of bytes in the file marked as delalloc, not more
976 * than 'max_bytes'. start and end are used to return the range,
977 *
978 * True is returned if we find something, false if nothing was in the tree.
979 */
btrfs_find_delalloc_range(struct extent_io_tree * tree,u64 * start,u64 * end,u64 max_bytes,struct extent_state ** cached_state)980 bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
981 u64 *end, u64 max_bytes,
982 struct extent_state **cached_state)
983 {
984 struct extent_state *state;
985 u64 cur_start = *start;
986 bool found = false;
987 u64 total_bytes = 0;
988
989 spin_lock(&tree->lock);
990
991 /*
992 * This search will find all the extents that end after our range
993 * starts.
994 */
995 state = tree_search(tree, cur_start);
996 if (!state) {
997 *end = (u64)-1;
998 goto out;
999 }
1000
1001 while (state) {
1002 if (found && (state->start != cur_start ||
1003 (state->state & EXTENT_BOUNDARY))) {
1004 goto out;
1005 }
1006 if (!(state->state & EXTENT_DELALLOC)) {
1007 if (!found)
1008 *end = state->end;
1009 goto out;
1010 }
1011 if (!found) {
1012 *start = state->start;
1013 *cached_state = state;
1014 refcount_inc(&state->refs);
1015 }
1016 found = true;
1017 *end = state->end;
1018 cur_start = state->end + 1;
1019 total_bytes += state->end - state->start + 1;
1020 if (total_bytes >= max_bytes)
1021 break;
1022 state = next_state(state);
1023 }
1024 out:
1025 spin_unlock(&tree->lock);
1026 return found;
1027 }
1028
1029 /*
1030 * Set some bits on a range in the tree. This may require allocations or
1031 * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
1032 * GFP_NOWAIT.
1033 *
1034 * If any of the exclusive bits are set, this will fail with -EEXIST if some
1035 * part of the range already has the desired bits set. The extent_state of the
1036 * existing range is returned in failed_state in this case, and the start of the
1037 * existing range is returned in failed_start. failed_state is used as an
1038 * optimization for wait_extent_bit, failed_start must be used as the source of
1039 * truth as failed_state may have changed since we returned.
1040 *
1041 * [start, end] is inclusive This takes the tree lock.
1042 */
set_extent_bit(struct extent_io_tree * tree,u64 start,u64 end,u32 bits,u64 * failed_start,struct extent_state ** failed_state,struct extent_state ** cached_state,struct extent_changeset * changeset)1043 static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1044 u32 bits, u64 *failed_start,
1045 struct extent_state **failed_state,
1046 struct extent_state **cached_state,
1047 struct extent_changeset *changeset)
1048 {
1049 struct extent_state *state;
1050 struct extent_state *prealloc = NULL;
1051 struct rb_node **p = NULL;
1052 struct rb_node *parent = NULL;
1053 int ret = 0;
1054 u64 last_start;
1055 u64 last_end;
1056 u32 exclusive_bits = (bits & EXTENT_LOCK_BITS);
1057 gfp_t mask;
1058
1059 set_gfp_mask_from_bits(&bits, &mask);
1060 btrfs_debug_check_extent_io_range(tree, start, end);
1061 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
1062
1063 if (exclusive_bits)
1064 ASSERT(failed_start);
1065 else
1066 ASSERT(failed_start == NULL && failed_state == NULL);
1067 again:
1068 if (!prealloc) {
1069 /*
1070 * Don't care for allocation failure here because we might end
1071 * up not needing the pre-allocated extent state at all, which
1072 * is the case if we only have in the tree extent states that
1073 * cover our input range and don't cover too any other range.
1074 * If we end up needing a new extent state we allocate it later.
1075 */
1076 prealloc = alloc_extent_state(mask);
1077 }
1078 /* Optimistically preallocate the extent changeset ulist node. */
1079 if (changeset)
1080 extent_changeset_prealloc(changeset, mask);
1081
1082 spin_lock(&tree->lock);
1083 if (cached_state && *cached_state) {
1084 state = *cached_state;
1085 if (state->start <= start && state->end > start &&
1086 extent_state_in_tree(state))
1087 goto hit_next;
1088 }
1089 /*
1090 * This search will find all the extents that end after our range
1091 * starts.
1092 */
1093 state = tree_search_for_insert(tree, start, &p, &parent);
1094 if (!state) {
1095 prealloc = alloc_extent_state_atomic(prealloc);
1096 if (!prealloc)
1097 goto search_again;
1098 prealloc->start = start;
1099 prealloc->end = end;
1100 insert_state_fast(tree, prealloc, p, parent, bits, changeset);
1101 cache_state(prealloc, cached_state);
1102 prealloc = NULL;
1103 goto out;
1104 }
1105 hit_next:
1106 last_start = state->start;
1107 last_end = state->end;
1108
1109 /*
1110 * | ---- desired range ---- |
1111 * | state |
1112 *
1113 * Just lock what we found and keep going
1114 */
1115 if (state->start == start && state->end <= end) {
1116 if (state->state & exclusive_bits) {
1117 *failed_start = state->start;
1118 cache_state(state, failed_state);
1119 ret = -EEXIST;
1120 goto out;
1121 }
1122
1123 set_state_bits(tree, state, bits, changeset);
1124 cache_state(state, cached_state);
1125 merge_state(tree, state);
1126 if (last_end >= end)
1127 goto out;
1128 start = last_end + 1;
1129 state = next_state(state);
1130 if (state && state->start == start && !need_resched())
1131 goto hit_next;
1132 goto search_again;
1133 }
1134
1135 /*
1136 * | ---- desired range ---- |
1137 * | state |
1138 * or
1139 * | ------------- state -------------- |
1140 *
1141 * We need to split the extent we found, and may flip bits on second
1142 * half.
1143 *
1144 * If the extent we found extends past our range, we just split and
1145 * search again. It'll get split again the next time though.
1146 *
1147 * If the extent we found is inside our range, we set the desired bit
1148 * on it.
1149 */
1150 if (state->start < start) {
1151 if (state->state & exclusive_bits) {
1152 *failed_start = start;
1153 cache_state(state, failed_state);
1154 ret = -EEXIST;
1155 goto out;
1156 }
1157
1158 /*
1159 * If this extent already has all the bits we want set, then
1160 * skip it, not necessary to split it or do anything with it.
1161 */
1162 if ((state->state & bits) == bits) {
1163 start = state->end + 1;
1164 cache_state(state, cached_state);
1165 goto search_again;
1166 }
1167
1168 prealloc = alloc_extent_state_atomic(prealloc);
1169 if (!prealloc)
1170 goto search_again;
1171 ret = split_state(tree, state, prealloc, start);
1172 if (ret)
1173 extent_io_tree_panic(tree, state, "split", ret);
1174
1175 prealloc = NULL;
1176 if (ret)
1177 goto out;
1178 if (state->end <= end) {
1179 set_state_bits(tree, state, bits, changeset);
1180 cache_state(state, cached_state);
1181 merge_state(tree, state);
1182 if (last_end >= end)
1183 goto out;
1184 start = last_end + 1;
1185 state = next_state(state);
1186 if (state && state->start == start && !need_resched())
1187 goto hit_next;
1188 }
1189 goto search_again;
1190 }
1191 /*
1192 * | ---- desired range ---- |
1193 * | state | or | state |
1194 *
1195 * There's a hole, we need to insert something in it and ignore the
1196 * extent we found.
1197 */
1198 if (state->start > start) {
1199 struct extent_state *inserted_state;
1200
1201 prealloc = alloc_extent_state_atomic(prealloc);
1202 if (!prealloc)
1203 goto search_again;
1204
1205 /*
1206 * Avoid to free 'prealloc' if it can be merged with the later
1207 * extent.
1208 */
1209 prealloc->start = start;
1210 if (end < last_start)
1211 prealloc->end = end;
1212 else
1213 prealloc->end = last_start - 1;
1214
1215 inserted_state = insert_state(tree, prealloc, bits, changeset);
1216 if (IS_ERR(inserted_state)) {
1217 ret = PTR_ERR(inserted_state);
1218 extent_io_tree_panic(tree, prealloc, "insert", ret);
1219 goto out;
1220 }
1221
1222 cache_state(inserted_state, cached_state);
1223 if (inserted_state == prealloc)
1224 prealloc = NULL;
1225 start = inserted_state->end + 1;
1226
1227 /* Beyond target range, stop. */
1228 if (start > end)
1229 goto out;
1230
1231 if (need_resched())
1232 goto search_again;
1233
1234 state = next_search_state(inserted_state, end);
1235 /*
1236 * If there's a next state, whether contiguous or not, we don't
1237 * need to unlock and start search again. If it's not contiguous
1238 * we will end up here and try to allocate a prealloc state and insert.
1239 */
1240 if (state)
1241 goto hit_next;
1242 goto search_again;
1243 }
1244 /*
1245 * | ---- desired range ---- |
1246 * | state |
1247 *
1248 * We need to split the extent, and set the bit on the first half
1249 */
1250 if (state->start <= end && state->end > end) {
1251 if (state->state & exclusive_bits) {
1252 *failed_start = start;
1253 cache_state(state, failed_state);
1254 ret = -EEXIST;
1255 goto out;
1256 }
1257
1258 prealloc = alloc_extent_state_atomic(prealloc);
1259 if (!prealloc)
1260 goto search_again;
1261 ret = split_state(tree, state, prealloc, end + 1);
1262 if (ret) {
1263 extent_io_tree_panic(tree, state, "split", ret);
1264 prealloc = NULL;
1265 goto out;
1266 }
1267
1268 set_state_bits(tree, prealloc, bits, changeset);
1269 cache_state(prealloc, cached_state);
1270 merge_state(tree, prealloc);
1271 prealloc = NULL;
1272 goto out;
1273 }
1274
1275 search_again:
1276 if (start > end)
1277 goto out;
1278 spin_unlock(&tree->lock);
1279 if (gfpflags_allow_blocking(mask))
1280 cond_resched();
1281 goto again;
1282
1283 out:
1284 spin_unlock(&tree->lock);
1285 btrfs_free_extent_state(prealloc);
1286
1287 return ret;
1288
1289 }
1290
btrfs_set_extent_bit(struct extent_io_tree * tree,u64 start,u64 end,u32 bits,struct extent_state ** cached_state)1291 int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1292 u32 bits, struct extent_state **cached_state)
1293 {
1294 return set_extent_bit(tree, start, end, bits, NULL, NULL, cached_state, NULL);
1295 }
1296
1297 /*
1298 * Convert all bits in a given range from one bit to another
1299 *
1300 * @tree: the io tree to search
1301 * @start: the start offset in bytes
1302 * @end: the end offset in bytes (inclusive)
1303 * @bits: the bits to set in this range
1304 * @clear_bits: the bits to clear in this range
1305 * @cached_state: state that we're going to cache
1306 *
1307 * This will go through and set bits for the given range. If any states exist
1308 * already in this range they are set with the given bit and cleared of the
1309 * clear_bits. This is only meant to be used by things that are mergeable, ie.
1310 * converting from say DELALLOC to DIRTY. This is not meant to be used with
1311 * boundary bits like LOCK.
1312 *
1313 * All allocations are done with GFP_NOFS.
1314 */
btrfs_convert_extent_bit(struct extent_io_tree * tree,u64 start,u64 end,u32 bits,u32 clear_bits,struct extent_state ** cached_state)1315 int btrfs_convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1316 u32 bits, u32 clear_bits,
1317 struct extent_state **cached_state)
1318 {
1319 struct extent_state *state;
1320 struct extent_state *prealloc = NULL;
1321 struct rb_node **p = NULL;
1322 struct rb_node *parent = NULL;
1323 int ret = 0;
1324 u64 last_start;
1325 u64 last_end;
1326 bool first_iteration = true;
1327
1328 btrfs_debug_check_extent_io_range(tree, start, end);
1329 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1330 clear_bits);
1331
1332 again:
1333 if (!prealloc) {
1334 /*
1335 * Best effort, don't worry if extent state allocation fails
1336 * here for the first iteration. We might have a cached state
1337 * that matches exactly the target range, in which case no
1338 * extent state allocations are needed. We'll only know this
1339 * after locking the tree.
1340 */
1341 prealloc = alloc_extent_state(GFP_NOFS);
1342 if (!prealloc && !first_iteration)
1343 return -ENOMEM;
1344 }
1345
1346 spin_lock(&tree->lock);
1347 if (cached_state && *cached_state) {
1348 state = *cached_state;
1349 if (state->start <= start && state->end > start &&
1350 extent_state_in_tree(state))
1351 goto hit_next;
1352 }
1353
1354 /*
1355 * This search will find all the extents that end after our range
1356 * starts.
1357 */
1358 state = tree_search_for_insert(tree, start, &p, &parent);
1359 if (!state) {
1360 prealloc = alloc_extent_state_atomic(prealloc);
1361 if (!prealloc) {
1362 ret = -ENOMEM;
1363 goto out;
1364 }
1365 prealloc->start = start;
1366 prealloc->end = end;
1367 insert_state_fast(tree, prealloc, p, parent, bits, NULL);
1368 cache_state(prealloc, cached_state);
1369 prealloc = NULL;
1370 goto out;
1371 }
1372 hit_next:
1373 last_start = state->start;
1374 last_end = state->end;
1375
1376 /*
1377 * | ---- desired range ---- |
1378 * | state |
1379 *
1380 * Just lock what we found and keep going.
1381 */
1382 if (state->start == start && state->end <= end) {
1383 set_state_bits(tree, state, bits, NULL);
1384 cache_state(state, cached_state);
1385 state = clear_state_bit(tree, state, clear_bits, 0, end, NULL);
1386 if (last_end >= end)
1387 goto out;
1388 start = last_end + 1;
1389 if (state && state->start == start && !need_resched())
1390 goto hit_next;
1391 goto search_again;
1392 }
1393
1394 /*
1395 * | ---- desired range ---- |
1396 * | state |
1397 * or
1398 * | ------------- state -------------- |
1399 *
1400 * We need to split the extent we found, and may flip bits on second
1401 * half.
1402 *
1403 * If the extent we found extends past our range, we just split and
1404 * search again. It'll get split again the next time though.
1405 *
1406 * If the extent we found is inside our range, we set the desired bit
1407 * on it.
1408 */
1409 if (state->start < start) {
1410 prealloc = alloc_extent_state_atomic(prealloc);
1411 if (!prealloc) {
1412 ret = -ENOMEM;
1413 goto out;
1414 }
1415 ret = split_state(tree, state, prealloc, start);
1416 prealloc = NULL;
1417 if (ret) {
1418 extent_io_tree_panic(tree, state, "split", ret);
1419 goto out;
1420 }
1421 if (state->end <= end) {
1422 set_state_bits(tree, state, bits, NULL);
1423 cache_state(state, cached_state);
1424 state = clear_state_bit(tree, state, clear_bits, 0, end, NULL);
1425 if (last_end >= end)
1426 goto out;
1427 start = last_end + 1;
1428 if (state && state->start == start && !need_resched())
1429 goto hit_next;
1430 }
1431 goto search_again;
1432 }
1433 /*
1434 * | ---- desired range ---- |
1435 * | state | or | state |
1436 *
1437 * There's a hole, we need to insert something in it and ignore the
1438 * extent we found.
1439 */
1440 if (state->start > start) {
1441 struct extent_state *inserted_state;
1442
1443 prealloc = alloc_extent_state_atomic(prealloc);
1444 if (!prealloc) {
1445 ret = -ENOMEM;
1446 goto out;
1447 }
1448
1449 /*
1450 * Avoid to free 'prealloc' if it can be merged with the later
1451 * extent.
1452 */
1453 prealloc->start = start;
1454 if (end < last_start)
1455 prealloc->end = end;
1456 else
1457 prealloc->end = last_start - 1;
1458
1459 inserted_state = insert_state(tree, prealloc, bits, NULL);
1460 if (IS_ERR(inserted_state)) {
1461 ret = PTR_ERR(inserted_state);
1462 extent_io_tree_panic(tree, prealloc, "insert", ret);
1463 goto out;
1464 }
1465 cache_state(inserted_state, cached_state);
1466 if (inserted_state == prealloc)
1467 prealloc = NULL;
1468 start = inserted_state->end + 1;
1469
1470 /* Beyond target range, stop. */
1471 if (start > end)
1472 goto out;
1473
1474 if (need_resched())
1475 goto search_again;
1476
1477 state = next_search_state(inserted_state, end);
1478 /*
1479 * If there's a next state, whether contiguous or not, we don't
1480 * need to unlock and start search again. If it's not contiguous
1481 * we will end up here and try to allocate a prealloc state and insert.
1482 */
1483 if (state)
1484 goto hit_next;
1485 goto search_again;
1486 }
1487 /*
1488 * | ---- desired range ---- |
1489 * | state |
1490 *
1491 * We need to split the extent, and set the bit on the first half.
1492 */
1493 if (state->start <= end && state->end > end) {
1494 prealloc = alloc_extent_state_atomic(prealloc);
1495 if (!prealloc) {
1496 ret = -ENOMEM;
1497 goto out;
1498 }
1499
1500 ret = split_state(tree, state, prealloc, end + 1);
1501 if (ret) {
1502 extent_io_tree_panic(tree, state, "split", ret);
1503 prealloc = NULL;
1504 goto out;
1505 }
1506
1507 set_state_bits(tree, prealloc, bits, NULL);
1508 cache_state(prealloc, cached_state);
1509 clear_state_bit(tree, prealloc, clear_bits, 0, end, NULL);
1510 prealloc = NULL;
1511 goto out;
1512 }
1513
1514 search_again:
1515 if (start > end)
1516 goto out;
1517 spin_unlock(&tree->lock);
1518 cond_resched();
1519 first_iteration = false;
1520 goto again;
1521
1522 out:
1523 spin_unlock(&tree->lock);
1524 btrfs_free_extent_state(prealloc);
1525
1526 return ret;
1527 }
1528
1529 /*
1530 * Find the first range that has @bits not set. This range could start before
1531 * @start.
1532 *
1533 * @tree: the tree to search
1534 * @start: offset at/after which the found extent should start
1535 * @start_ret: records the beginning of the range
1536 * @end_ret: records the end of the range (inclusive)
1537 * @bits: the set of bits which must be unset
1538 *
1539 * Since unallocated range is also considered one which doesn't have the bits
1540 * set it's possible that @end_ret contains -1, this happens in case the range
1541 * spans (last_range_end, end of device]. In this case it's up to the caller to
1542 * trim @end_ret to the appropriate size.
1543 */
btrfs_find_first_clear_extent_bit(struct extent_io_tree * tree,u64 start,u64 * start_ret,u64 * end_ret,u32 bits)1544 void btrfs_find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1545 u64 *start_ret, u64 *end_ret, u32 bits)
1546 {
1547 struct extent_state *state;
1548 struct extent_state *prev = NULL, *next = NULL;
1549
1550 spin_lock(&tree->lock);
1551
1552 /* Find first extent with bits cleared */
1553 while (1) {
1554 state = tree_search_prev_next(tree, start, &prev, &next);
1555 if (!state && !next && !prev) {
1556 /*
1557 * Tree is completely empty, send full range and let
1558 * caller deal with it
1559 */
1560 *start_ret = 0;
1561 *end_ret = -1;
1562 goto out;
1563 } else if (!state && !next) {
1564 /*
1565 * We are past the last allocated chunk, set start at
1566 * the end of the last extent.
1567 */
1568 *start_ret = prev->end + 1;
1569 *end_ret = -1;
1570 goto out;
1571 } else if (!state) {
1572 state = next;
1573 }
1574
1575 /*
1576 * At this point 'state' either contains 'start' or start is
1577 * before 'state'
1578 */
1579 if (in_range(start, state->start, state->end - state->start + 1)) {
1580 if (state->state & bits) {
1581 /*
1582 * |--range with bits sets--|
1583 * |
1584 * start
1585 */
1586 start = state->end + 1;
1587 } else {
1588 /*
1589 * 'start' falls within a range that doesn't
1590 * have the bits set, so take its start as the
1591 * beginning of the desired range
1592 *
1593 * |--range with bits cleared----|
1594 * |
1595 * start
1596 */
1597 *start_ret = state->start;
1598 break;
1599 }
1600 } else {
1601 /*
1602 * |---prev range---|---hole/unset---|---node range---|
1603 * |
1604 * start
1605 *
1606 * or
1607 *
1608 * |---hole/unset--||--first node--|
1609 * 0 |
1610 * start
1611 */
1612 if (prev)
1613 *start_ret = prev->end + 1;
1614 else
1615 *start_ret = 0;
1616 break;
1617 }
1618 }
1619
1620 /*
1621 * Find the longest stretch from start until an entry which has the
1622 * bits set
1623 */
1624 while (state) {
1625 if (state->end >= start && !(state->state & bits)) {
1626 *end_ret = state->end;
1627 } else {
1628 *end_ret = state->start - 1;
1629 break;
1630 }
1631 state = next_state(state);
1632 }
1633 out:
1634 spin_unlock(&tree->lock);
1635 }
1636
1637 /*
1638 * Count the number of bytes in the tree that have a given bit(s) set for a
1639 * given range.
1640 *
1641 * @tree: The io tree to search.
1642 * @start: The start offset of the range. This value is updated to the
1643 * offset of the first byte found with the given bit(s), so it
1644 * can end up being bigger than the initial value.
1645 * @search_end: The end offset (inclusive value) of the search range.
1646 * @max_bytes: The maximum byte count we are interested. The search stops
1647 * once it reaches this count.
1648 * @bits: The bits the range must have in order to be accounted for.
1649 * If multiple bits are set, then only subranges that have all
1650 * the bits set are accounted for.
1651 * @contig: Indicate if we should ignore holes in the range or not. If
1652 * this is true, then stop once we find a hole.
1653 * @cached_state: A cached state to be used across multiple calls to this
1654 * function in order to speedup searches. Use NULL if this is
1655 * called only once or if each call does not start where the
1656 * previous one ended.
1657 *
1658 * Returns the total number of bytes found within the given range that have
1659 * all given bits set. If the returned number of bytes is greater than zero
1660 * then @start is updated with the offset of the first byte with the bits set.
1661 */
btrfs_count_range_bits(struct extent_io_tree * tree,u64 * start,u64 search_end,u64 max_bytes,u32 bits,bool contig,struct extent_state ** cached_state)1662 u64 btrfs_count_range_bits(struct extent_io_tree *tree,
1663 u64 *start, u64 search_end, u64 max_bytes,
1664 u32 bits, bool contig,
1665 struct extent_state **cached_state)
1666 {
1667 struct extent_state *state = NULL;
1668 struct extent_state *cached;
1669 u64 cur_start = *start;
1670 u64 total_bytes = 0;
1671 u64 last = 0;
1672 int found = 0;
1673
1674 if (WARN_ON(search_end < cur_start))
1675 return 0;
1676
1677 spin_lock(&tree->lock);
1678
1679 if (!cached_state || !*cached_state)
1680 goto search;
1681
1682 cached = *cached_state;
1683
1684 if (!extent_state_in_tree(cached))
1685 goto search;
1686
1687 if (cached->start <= cur_start && cur_start <= cached->end) {
1688 state = cached;
1689 } else if (cached->start > cur_start) {
1690 struct extent_state *prev;
1691
1692 /*
1693 * The cached state starts after our search range's start. Check
1694 * if the previous state record starts at or before the range we
1695 * are looking for, and if so, use it - this is a common case
1696 * when there are holes between records in the tree. If there is
1697 * no previous state record, we can start from our cached state.
1698 */
1699 prev = prev_state(cached);
1700 if (!prev)
1701 state = cached;
1702 else if (prev->start <= cur_start && cur_start <= prev->end)
1703 state = prev;
1704 }
1705
1706 /*
1707 * This search will find all the extents that end after our range
1708 * starts.
1709 */
1710 search:
1711 if (!state)
1712 state = tree_search(tree, cur_start);
1713
1714 while (state) {
1715 if (state->start > search_end)
1716 break;
1717 if (contig && found && state->start > last + 1)
1718 break;
1719 if (state->end >= cur_start && (state->state & bits) == bits) {
1720 total_bytes += min(search_end, state->end) + 1 -
1721 max(cur_start, state->start);
1722 if (total_bytes >= max_bytes)
1723 break;
1724 if (!found) {
1725 *start = max(cur_start, state->start);
1726 found = 1;
1727 }
1728 last = state->end;
1729 } else if (contig && found) {
1730 break;
1731 }
1732 state = next_state(state);
1733 }
1734
1735 if (cached_state) {
1736 btrfs_free_extent_state(*cached_state);
1737 *cached_state = state;
1738 if (state)
1739 refcount_inc(&state->refs);
1740 }
1741
1742 spin_unlock(&tree->lock);
1743
1744 return total_bytes;
1745 }
1746
1747 /*
1748 * Check if the single @bit exists in the given range.
1749 */
btrfs_test_range_bit_exists(struct extent_io_tree * tree,u64 start,u64 end,u32 bit)1750 bool btrfs_test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit)
1751 {
1752 struct extent_state *state;
1753 bool bitset = false;
1754
1755 ASSERT(is_power_of_2(bit));
1756
1757 spin_lock(&tree->lock);
1758 state = tree_search(tree, start);
1759 while (state) {
1760 if (state->start > end)
1761 break;
1762
1763 if (state->state & bit) {
1764 bitset = true;
1765 break;
1766 }
1767
1768 if (state->end >= end)
1769 break;
1770 state = next_state(state);
1771 }
1772 spin_unlock(&tree->lock);
1773 return bitset;
1774 }
1775
btrfs_get_range_bits(struct extent_io_tree * tree,u64 start,u64 end,u32 * bits,struct extent_state ** cached_state)1776 void btrfs_get_range_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 *bits,
1777 struct extent_state **cached_state)
1778 {
1779 struct extent_state *state;
1780
1781 /*
1782 * The cached state is currently mandatory and not used to start the
1783 * search, only to cache the first state record found in the range.
1784 */
1785 ASSERT(cached_state != NULL);
1786 ASSERT(*cached_state == NULL);
1787
1788 *bits = 0;
1789
1790 spin_lock(&tree->lock);
1791 state = tree_search(tree, start);
1792 if (state && state->start < end) {
1793 *cached_state = state;
1794 refcount_inc(&state->refs);
1795 }
1796 while (state) {
1797 if (state->start > end)
1798 break;
1799
1800 *bits |= state->state;
1801
1802 if (state->end >= end)
1803 break;
1804
1805 state = next_state(state);
1806 }
1807 spin_unlock(&tree->lock);
1808 }
1809
1810 /*
1811 * Check if the whole range [@start,@end) contains the single @bit set.
1812 */
btrfs_test_range_bit(struct extent_io_tree * tree,u64 start,u64 end,u32 bit,struct extent_state * cached)1813 bool btrfs_test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
1814 struct extent_state *cached)
1815 {
1816 struct extent_state *state;
1817 bool bitset = true;
1818
1819 ASSERT(is_power_of_2(bit));
1820 ASSERT(start < end);
1821
1822 spin_lock(&tree->lock);
1823 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1824 cached->end > start)
1825 state = cached;
1826 else
1827 state = tree_search(tree, start);
1828 while (state) {
1829 if (state->start > start) {
1830 bitset = false;
1831 break;
1832 }
1833
1834 if ((state->state & bit) == 0) {
1835 bitset = false;
1836 break;
1837 }
1838
1839 if (state->end >= end)
1840 break;
1841
1842 /* Next state must start where this one ends. */
1843 start = state->end + 1;
1844 state = next_state(state);
1845 }
1846
1847 /* We ran out of states and were still inside of our range. */
1848 if (!state)
1849 bitset = false;
1850 spin_unlock(&tree->lock);
1851 return bitset;
1852 }
1853
1854 /* Wrappers around set/clear extent bit */
btrfs_set_record_extent_bits(struct extent_io_tree * tree,u64 start,u64 end,u32 bits,struct extent_changeset * changeset)1855 int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1856 u32 bits, struct extent_changeset *changeset)
1857 {
1858 /*
1859 * We don't support EXTENT_LOCK_BITS yet, as current changeset will
1860 * record any bits changed, so for EXTENT_LOCK_BITS case, it will either
1861 * fail with -EEXIST or changeset will record the whole range.
1862 */
1863 ASSERT(!(bits & EXTENT_LOCK_BITS));
1864
1865 return set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
1866 }
1867
btrfs_clear_record_extent_bits(struct extent_io_tree * tree,u64 start,u64 end,u32 bits,struct extent_changeset * changeset)1868 int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1869 u32 bits, struct extent_changeset *changeset)
1870 {
1871 /*
1872 * Don't support EXTENT_LOCK_BITS case, same reason as
1873 * set_record_extent_bits().
1874 */
1875 ASSERT(!(bits & EXTENT_LOCK_BITS));
1876
1877 return btrfs_clear_extent_bit_changeset(tree, start, end, bits, NULL, changeset);
1878 }
1879
btrfs_try_lock_extent_bits(struct extent_io_tree * tree,u64 start,u64 end,u32 bits,struct extent_state ** cached)1880 bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1881 u32 bits, struct extent_state **cached)
1882 {
1883 int ret;
1884 u64 failed_start;
1885
1886 ret = set_extent_bit(tree, start, end, bits, &failed_start, NULL, cached, NULL);
1887 if (ret == -EEXIST) {
1888 if (failed_start > start)
1889 btrfs_clear_extent_bit(tree, start, failed_start - 1,
1890 bits, cached);
1891 return 0;
1892 }
1893 return 1;
1894 }
1895
1896 /*
1897 * Either insert or lock state struct between start and end use mask to tell
1898 * us if waiting is desired.
1899 */
btrfs_lock_extent_bits(struct extent_io_tree * tree,u64 start,u64 end,u32 bits,struct extent_state ** cached_state)1900 int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
1901 struct extent_state **cached_state)
1902 {
1903 struct extent_state *failed_state = NULL;
1904 int ret;
1905 u64 failed_start;
1906
1907 ret = set_extent_bit(tree, start, end, bits, &failed_start,
1908 &failed_state, cached_state, NULL);
1909 while (ret == -EEXIST) {
1910 if (failed_start != start)
1911 btrfs_clear_extent_bit(tree, start, failed_start - 1,
1912 bits, cached_state);
1913
1914 wait_extent_bit(tree, failed_start, end, bits, &failed_state);
1915 ret = set_extent_bit(tree, start, end, bits, &failed_start,
1916 &failed_state, cached_state, NULL);
1917 }
1918 return ret;
1919 }
1920
1921 /*
1922 * Get the extent state that follows the given extent state.
1923 * This is meant to be used in a context where we know no other tasks can
1924 * concurrently modify the tree.
1925 */
btrfs_next_extent_state(struct extent_io_tree * tree,struct extent_state * state)1926 struct extent_state *btrfs_next_extent_state(struct extent_io_tree *tree,
1927 struct extent_state *state)
1928 {
1929 struct extent_state *next;
1930
1931 spin_lock(&tree->lock);
1932 ASSERT(extent_state_in_tree(state));
1933 next = next_state(state);
1934 if (next)
1935 refcount_inc(&next->refs);
1936 spin_unlock(&tree->lock);
1937
1938 return next;
1939 }
1940
btrfs_extent_state_free_cachep(void)1941 void __cold btrfs_extent_state_free_cachep(void)
1942 {
1943 btrfs_extent_state_leak_debug_check();
1944 kmem_cache_destroy(extent_state_cache);
1945 }
1946
btrfs_extent_state_init_cachep(void)1947 int __init btrfs_extent_state_init_cachep(void)
1948 {
1949 extent_state_cache = kmem_cache_create("btrfs_extent_state",
1950 sizeof(struct extent_state), 0, 0,
1951 NULL);
1952 if (!extent_state_cache)
1953 return -ENOMEM;
1954
1955 return 0;
1956 }
1957