Lines Matching +full:cache +full:- +full:block +full:- +full:size
1 // SPDX-License-Identifier: GPL-2.0
5 #include "block-group.h"
6 #include "space-info.h"
7 #include "disk-io.h"
8 #include "free-space-cache.h"
9 #include "free-space-tree.h"
12 #include "ref-verify.h"
14 #include "tree-log.h"
15 #include "delalloc-space.h"
27 struct btrfs_balance_control *bctl = fs_info->balance_ctl; in get_restripe_target()
34 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { in get_restripe_target()
35 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; in get_restripe_target()
37 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { in get_restripe_target()
38 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; in get_restripe_target()
40 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) { in get_restripe_target()
41 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; in get_restripe_target()
56 u64 num_devices = fs_info->fs_devices->rw_devices; in btrfs_reduce_alloc_profile()
65 spin_lock(&fs_info->balance_lock); in btrfs_reduce_alloc_profile()
68 spin_unlock(&fs_info->balance_lock); in btrfs_reduce_alloc_profile()
71 spin_unlock(&fs_info->balance_lock); in btrfs_reduce_alloc_profile()
103 seq = read_seqbegin(&fs_info->profiles_lock); in btrfs_get_alloc_profile()
106 flags |= fs_info->avail_data_alloc_bits; in btrfs_get_alloc_profile()
108 flags |= fs_info->avail_system_alloc_bits; in btrfs_get_alloc_profile()
110 flags |= fs_info->avail_metadata_alloc_bits; in btrfs_get_alloc_profile()
111 } while (read_seqretry(&fs_info->profiles_lock, seq)); in btrfs_get_alloc_profile()
116 void btrfs_get_block_group(struct btrfs_block_group *cache) in btrfs_get_block_group() argument
118 refcount_inc(&cache->refs); in btrfs_get_block_group()
121 void btrfs_put_block_group(struct btrfs_block_group *cache) in btrfs_put_block_group() argument
123 if (refcount_dec_and_test(&cache->refs)) { in btrfs_put_block_group()
124 WARN_ON(cache->pinned > 0); in btrfs_put_block_group()
125 WARN_ON(cache->reserved > 0); in btrfs_put_block_group()
132 if (WARN_ON(!list_empty(&cache->discard_list))) in btrfs_put_block_group()
133 btrfs_discard_cancel_work(&cache->fs_info->discard_ctl, in btrfs_put_block_group()
134 cache); in btrfs_put_block_group()
139 * And it will definitely cause use-after-free when caller in btrfs_put_block_group()
144 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root)); in btrfs_put_block_group()
145 kfree(cache->free_space_ctl); in btrfs_put_block_group()
146 kfree(cache); in btrfs_put_block_group()
151 * This adds the block group to the fs_info rb tree for the block group cache
158 struct btrfs_block_group *cache; in btrfs_add_block_group_cache() local
160 ASSERT(block_group->length != 0); in btrfs_add_block_group_cache()
162 spin_lock(&info->block_group_cache_lock); in btrfs_add_block_group_cache()
163 p = &info->block_group_cache_tree.rb_node; in btrfs_add_block_group_cache()
167 cache = rb_entry(parent, struct btrfs_block_group, cache_node); in btrfs_add_block_group_cache()
168 if (block_group->start < cache->start) { in btrfs_add_block_group_cache()
169 p = &(*p)->rb_left; in btrfs_add_block_group_cache()
170 } else if (block_group->start > cache->start) { in btrfs_add_block_group_cache()
171 p = &(*p)->rb_right; in btrfs_add_block_group_cache()
173 spin_unlock(&info->block_group_cache_lock); in btrfs_add_block_group_cache()
174 return -EEXIST; in btrfs_add_block_group_cache()
178 rb_link_node(&block_group->cache_node, parent, p); in btrfs_add_block_group_cache()
179 rb_insert_color(&block_group->cache_node, in btrfs_add_block_group_cache()
180 &info->block_group_cache_tree); in btrfs_add_block_group_cache()
182 if (info->first_logical_byte > block_group->start) in btrfs_add_block_group_cache()
183 info->first_logical_byte = block_group->start; in btrfs_add_block_group_cache()
185 spin_unlock(&info->block_group_cache_lock); in btrfs_add_block_group_cache()
191 * This will return the block group at or after bytenr if contains is 0, else
192 * it will return the block group that contains the bytenr
197 struct btrfs_block_group *cache, *ret = NULL; in block_group_cache_tree_search() local
201 spin_lock(&info->block_group_cache_lock); in block_group_cache_tree_search()
202 n = info->block_group_cache_tree.rb_node; in block_group_cache_tree_search()
205 cache = rb_entry(n, struct btrfs_block_group, cache_node); in block_group_cache_tree_search()
206 end = cache->start + cache->length - 1; in block_group_cache_tree_search()
207 start = cache->start; in block_group_cache_tree_search()
210 if (!contains && (!ret || start < ret->start)) in block_group_cache_tree_search()
211 ret = cache; in block_group_cache_tree_search()
212 n = n->rb_left; in block_group_cache_tree_search()
215 ret = cache; in block_group_cache_tree_search()
218 n = n->rb_right; in block_group_cache_tree_search()
220 ret = cache; in block_group_cache_tree_search()
226 if (bytenr == 0 && info->first_logical_byte > ret->start) in block_group_cache_tree_search()
227 info->first_logical_byte = ret->start; in block_group_cache_tree_search()
229 spin_unlock(&info->block_group_cache_lock); in block_group_cache_tree_search()
235 * Return the block group that starts at or after bytenr
244 * Return the block group that contains the given bytenr
253 struct btrfs_block_group *cache) in btrfs_next_block_group() argument
255 struct btrfs_fs_info *fs_info = cache->fs_info; in btrfs_next_block_group()
258 spin_lock(&fs_info->block_group_cache_lock); in btrfs_next_block_group()
260 /* If our block group was removed, we need a full search. */ in btrfs_next_block_group()
261 if (RB_EMPTY_NODE(&cache->cache_node)) { in btrfs_next_block_group()
262 const u64 next_bytenr = cache->start + cache->length; in btrfs_next_block_group()
264 spin_unlock(&fs_info->block_group_cache_lock); in btrfs_next_block_group()
265 btrfs_put_block_group(cache); in btrfs_next_block_group()
266 cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache; in btrfs_next_block_group()
268 node = rb_next(&cache->cache_node); in btrfs_next_block_group()
269 btrfs_put_block_group(cache); in btrfs_next_block_group()
271 cache = rb_entry(node, struct btrfs_block_group, cache_node); in btrfs_next_block_group()
272 btrfs_get_block_group(cache); in btrfs_next_block_group()
274 cache = NULL; in btrfs_next_block_group()
275 spin_unlock(&fs_info->block_group_cache_lock); in btrfs_next_block_group()
276 return cache; in btrfs_next_block_group()
288 spin_lock(&bg->lock); in btrfs_inc_nocow_writers()
289 if (bg->ro) in btrfs_inc_nocow_writers()
292 atomic_inc(&bg->nocow_writers); in btrfs_inc_nocow_writers()
293 spin_unlock(&bg->lock); in btrfs_inc_nocow_writers()
295 /* No put on block group, done by btrfs_dec_nocow_writers */ in btrfs_inc_nocow_writers()
308 if (atomic_dec_and_test(&bg->nocow_writers)) in btrfs_dec_nocow_writers()
309 wake_up_var(&bg->nocow_writers); in btrfs_dec_nocow_writers()
320 wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers)); in btrfs_wait_nocow_writers()
330 if (atomic_dec_and_test(&bg->reservations)) in btrfs_dec_block_group_reservations()
331 wake_up_var(&bg->reservations); in btrfs_dec_block_group_reservations()
337 struct btrfs_space_info *space_info = bg->space_info; in btrfs_wait_block_group_reservations()
339 ASSERT(bg->ro); in btrfs_wait_block_group_reservations()
341 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA)) in btrfs_wait_block_group_reservations()
345 * Our block group is read only but before we set it to read only, in btrfs_wait_block_group_reservations()
350 * block group's reservations counter is incremented while a read lock in btrfs_wait_block_group_reservations()
354 down_write(&space_info->groups_sem); in btrfs_wait_block_group_reservations()
355 up_write(&space_info->groups_sem); in btrfs_wait_block_group_reservations()
357 wait_var_event(&bg->reservations, !atomic_read(&bg->reservations)); in btrfs_wait_block_group_reservations()
361 struct btrfs_block_group *cache) in btrfs_get_caching_control() argument
365 spin_lock(&cache->lock); in btrfs_get_caching_control()
366 if (!cache->caching_ctl) { in btrfs_get_caching_control()
367 spin_unlock(&cache->lock); in btrfs_get_caching_control()
371 ctl = cache->caching_ctl; in btrfs_get_caching_control()
372 refcount_inc(&ctl->count); in btrfs_get_caching_control()
373 spin_unlock(&cache->lock); in btrfs_get_caching_control()
379 if (refcount_dec_and_test(&ctl->count)) in btrfs_put_caching_control()
384 * When we wait for progress in the block group caching, its because our
389 * up, and then it will check the block group free space numbers for our min
391 * a free extent of a given size, but this is a good start.
393 * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
394 * any of the information in this block group.
396 void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, in btrfs_wait_block_group_cache_progress() argument
401 caching_ctl = btrfs_get_caching_control(cache); in btrfs_wait_block_group_cache_progress()
405 wait_event(caching_ctl->wait, btrfs_block_group_done(cache) || in btrfs_wait_block_group_cache_progress()
406 (cache->free_space_ctl->free_space >= num_bytes)); in btrfs_wait_block_group_cache_progress()
411 int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) in btrfs_wait_block_group_cache_done() argument
416 caching_ctl = btrfs_get_caching_control(cache); in btrfs_wait_block_group_cache_done()
418 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; in btrfs_wait_block_group_cache_done()
420 wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); in btrfs_wait_block_group_cache_done()
421 if (cache->cached == BTRFS_CACHE_ERROR) in btrfs_wait_block_group_cache_done()
422 ret = -EIO; in btrfs_wait_block_group_cache_done()
430 struct btrfs_fs_info *fs_info = block_group->fs_info; in fragment_free_space()
431 u64 start = block_group->start; in fragment_free_space()
432 u64 len = block_group->length; in fragment_free_space()
433 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ? in fragment_free_space()
434 fs_info->nodesize : fs_info->sectorsize; in fragment_free_space()
443 len -= step; in fragment_free_space()
456 struct btrfs_fs_info *info = block_group->fs_info; in add_new_free_space()
457 u64 extent_start, extent_end, size, total_added = 0; in add_new_free_space() local
461 ret = find_first_extent_bit(&info->excluded_extents, start, in add_new_free_space()
471 size = extent_start - start; in add_new_free_space()
472 total_added += size; in add_new_free_space()
474 start, size); in add_new_free_space()
475 BUG_ON(ret); /* -ENOMEM or logic error */ in add_new_free_space()
483 size = end - start; in add_new_free_space()
484 total_added += size; in add_new_free_space()
486 size); in add_new_free_space()
487 BUG_ON(ret); /* -ENOMEM or logic error */ in add_new_free_space()
495 struct btrfs_block_group *block_group = caching_ctl->block_group; in load_extent_tree_free()
496 struct btrfs_fs_info *fs_info = block_group->fs_info; in load_extent_tree_free()
497 struct btrfs_root *extent_root = fs_info->extent_root; in load_extent_tree_free()
509 return -ENOMEM; in load_extent_tree_free()
511 last = max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET); in load_extent_tree_free()
516 * allocate from this block group until we've had a chance to fragment in load_extent_tree_free()
526 * root, since its read-only in load_extent_tree_free()
528 path->skip_locking = 1; in load_extent_tree_free()
529 path->search_commit_root = 1; in load_extent_tree_free()
530 path->reada = READA_FORWARD; in load_extent_tree_free()
541 leaf = path->nodes[0]; in load_extent_tree_free()
546 last = (u64)-1; in load_extent_tree_free()
550 if (path->slots[0] < nritems) { in load_extent_tree_free()
551 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); in load_extent_tree_free()
558 rwsem_is_contended(&fs_info->commit_root_sem)) { in load_extent_tree_free()
560 caching_ctl->progress = last; in load_extent_tree_free()
562 up_read(&fs_info->commit_root_sem); in load_extent_tree_free()
563 mutex_unlock(&caching_ctl->mutex); in load_extent_tree_free()
565 mutex_lock(&caching_ctl->mutex); in load_extent_tree_free()
566 down_read(&fs_info->commit_root_sem); in load_extent_tree_free()
575 leaf = path->nodes[0]; in load_extent_tree_free()
586 caching_ctl->progress = last; in load_extent_tree_free()
591 if (key.objectid < block_group->start) { in load_extent_tree_free()
592 path->slots[0]++; in load_extent_tree_free()
596 if (key.objectid >= block_group->start + block_group->length) in load_extent_tree_free()
605 fs_info->nodesize; in load_extent_tree_free()
612 wake_up(&caching_ctl->wait); in load_extent_tree_free()
615 path->slots[0]++; in load_extent_tree_free()
620 block_group->start + block_group->length); in load_extent_tree_free()
621 caching_ctl->progress = (u64)-1; in load_extent_tree_free()
636 block_group = caching_ctl->block_group; in caching_thread()
637 fs_info = block_group->fs_info; in caching_thread()
639 mutex_lock(&caching_ctl->mutex); in caching_thread()
640 down_read(&fs_info->commit_root_sem); in caching_thread()
647 spin_lock(&block_group->lock); in caching_thread()
648 block_group->caching_ctl = NULL; in caching_thread()
649 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED; in caching_thread()
650 spin_unlock(&block_group->lock); in caching_thread()
656 spin_lock(&block_group->space_info->lock); in caching_thread()
657 spin_lock(&block_group->lock); in caching_thread()
658 bytes_used = block_group->length - block_group->used; in caching_thread()
659 block_group->space_info->bytes_used += bytes_used >> 1; in caching_thread()
660 spin_unlock(&block_group->lock); in caching_thread()
661 spin_unlock(&block_group->space_info->lock); in caching_thread()
666 caching_ctl->progress = (u64)-1; in caching_thread()
668 up_read(&fs_info->commit_root_sem); in caching_thread()
670 mutex_unlock(&caching_ctl->mutex); in caching_thread()
672 wake_up(&caching_ctl->wait); in caching_thread()
678 int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only) in btrfs_cache_block_group() argument
681 struct btrfs_fs_info *fs_info = cache->fs_info; in btrfs_cache_block_group()
687 return -ENOMEM; in btrfs_cache_block_group()
689 INIT_LIST_HEAD(&caching_ctl->list); in btrfs_cache_block_group()
690 mutex_init(&caching_ctl->mutex); in btrfs_cache_block_group()
691 init_waitqueue_head(&caching_ctl->wait); in btrfs_cache_block_group()
692 caching_ctl->block_group = cache; in btrfs_cache_block_group()
693 caching_ctl->progress = cache->start; in btrfs_cache_block_group()
694 refcount_set(&caching_ctl->count, 1); in btrfs_cache_block_group()
695 btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); in btrfs_cache_block_group()
697 spin_lock(&cache->lock); in btrfs_cache_block_group()
700 * case where one thread starts to load the space cache info, and then in btrfs_cache_block_group()
702 * allocation while the other thread is still loading the space cache in btrfs_cache_block_group()
703 * info. The previous loop should have kept us from choosing this block in btrfs_cache_block_group()
705 * block groups we need to first check if we're doing a fast load here, in btrfs_cache_block_group()
707 * from a block group who's cache gets evicted for one reason or in btrfs_cache_block_group()
710 while (cache->cached == BTRFS_CACHE_FAST) { in btrfs_cache_block_group()
713 ctl = cache->caching_ctl; in btrfs_cache_block_group()
714 refcount_inc(&ctl->count); in btrfs_cache_block_group()
715 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE); in btrfs_cache_block_group()
716 spin_unlock(&cache->lock); in btrfs_cache_block_group()
720 finish_wait(&ctl->wait, &wait); in btrfs_cache_block_group()
722 spin_lock(&cache->lock); in btrfs_cache_block_group()
725 if (cache->cached != BTRFS_CACHE_NO) { in btrfs_cache_block_group()
726 spin_unlock(&cache->lock); in btrfs_cache_block_group()
730 WARN_ON(cache->caching_ctl); in btrfs_cache_block_group()
731 cache->caching_ctl = caching_ctl; in btrfs_cache_block_group()
732 cache->cached = BTRFS_CACHE_FAST; in btrfs_cache_block_group()
733 spin_unlock(&cache->lock); in btrfs_cache_block_group()
736 mutex_lock(&caching_ctl->mutex); in btrfs_cache_block_group()
737 ret = load_free_space_cache(cache); in btrfs_cache_block_group()
739 spin_lock(&cache->lock); in btrfs_cache_block_group()
741 cache->caching_ctl = NULL; in btrfs_cache_block_group()
742 cache->cached = BTRFS_CACHE_FINISHED; in btrfs_cache_block_group()
743 cache->last_byte_to_unpin = (u64)-1; in btrfs_cache_block_group()
744 caching_ctl->progress = (u64)-1; in btrfs_cache_block_group()
747 cache->caching_ctl = NULL; in btrfs_cache_block_group()
748 cache->cached = BTRFS_CACHE_NO; in btrfs_cache_block_group()
750 cache->cached = BTRFS_CACHE_STARTED; in btrfs_cache_block_group()
751 cache->has_caching_ctl = 1; in btrfs_cache_block_group()
754 spin_unlock(&cache->lock); in btrfs_cache_block_group()
757 btrfs_should_fragment_free_space(cache)) { in btrfs_cache_block_group()
760 spin_lock(&cache->space_info->lock); in btrfs_cache_block_group()
761 spin_lock(&cache->lock); in btrfs_cache_block_group()
762 bytes_used = cache->length - cache->used; in btrfs_cache_block_group()
763 cache->space_info->bytes_used += bytes_used >> 1; in btrfs_cache_block_group()
764 spin_unlock(&cache->lock); in btrfs_cache_block_group()
765 spin_unlock(&cache->space_info->lock); in btrfs_cache_block_group()
766 fragment_free_space(cache); in btrfs_cache_block_group()
769 mutex_unlock(&caching_ctl->mutex); in btrfs_cache_block_group()
771 wake_up(&caching_ctl->wait); in btrfs_cache_block_group()
774 btrfs_free_excluded_extents(cache); in btrfs_cache_block_group()
782 spin_lock(&cache->lock); in btrfs_cache_block_group()
784 cache->caching_ctl = NULL; in btrfs_cache_block_group()
785 cache->cached = BTRFS_CACHE_NO; in btrfs_cache_block_group()
787 cache->cached = BTRFS_CACHE_STARTED; in btrfs_cache_block_group()
788 cache->has_caching_ctl = 1; in btrfs_cache_block_group()
790 spin_unlock(&cache->lock); in btrfs_cache_block_group()
791 wake_up(&caching_ctl->wait); in btrfs_cache_block_group()
799 down_write(&fs_info->commit_root_sem); in btrfs_cache_block_group()
800 refcount_inc(&caching_ctl->count); in btrfs_cache_block_group()
801 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); in btrfs_cache_block_group()
802 up_write(&fs_info->commit_root_sem); in btrfs_cache_block_group()
804 btrfs_get_block_group(cache); in btrfs_cache_block_group()
806 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); in btrfs_cache_block_group()
816 write_seqlock(&fs_info->profiles_lock); in clear_avail_alloc_bits()
818 fs_info->avail_data_alloc_bits &= ~extra_flags; in clear_avail_alloc_bits()
820 fs_info->avail_metadata_alloc_bits &= ~extra_flags; in clear_avail_alloc_bits()
822 fs_info->avail_system_alloc_bits &= ~extra_flags; in clear_avail_alloc_bits()
823 write_sequnlock(&fs_info->profiles_lock); in clear_avail_alloc_bits()
829 * - RAID56 - in case there's neither RAID5 nor RAID6 profile block group
832 * - RAID1C34 - same as above for RAID1C3 and RAID1C4 block groups
842 struct list_head *head = &fs_info->space_info; in clear_incompat_bg_bits()
846 down_read(&sinfo->groups_sem); in clear_incompat_bg_bits()
847 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5])) in clear_incompat_bg_bits()
849 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6])) in clear_incompat_bg_bits()
851 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C3])) in clear_incompat_bg_bits()
853 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C4])) in clear_incompat_bg_bits()
855 up_read(&sinfo->groups_sem); in clear_incompat_bg_bits()
868 struct btrfs_fs_info *fs_info = trans->fs_info; in remove_block_group_item()
873 root = fs_info->extent_root; in remove_block_group_item()
874 key.objectid = block_group->start; in remove_block_group_item()
876 key.offset = block_group->length; in remove_block_group_item()
878 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); in remove_block_group_item()
880 ret = -ENOENT; in remove_block_group_item()
891 struct btrfs_fs_info *fs_info = trans->fs_info; in btrfs_remove_block_group()
895 struct btrfs_root *tree_root = fs_info->tree_root; in btrfs_remove_block_group()
908 BUG_ON(!block_group->ro); in btrfs_remove_block_group()
912 * Free the reserved super bytes from this block group before in btrfs_remove_block_group()
916 btrfs_free_ref_tree_range(fs_info, block_group->start, in btrfs_remove_block_group()
917 block_group->length); in btrfs_remove_block_group()
919 index = btrfs_bg_flags_to_raid_index(block_group->flags); in btrfs_remove_block_group()
920 factor = btrfs_bg_type_to_factor(block_group->flags); in btrfs_remove_block_group()
922 /* make sure this block group isn't part of an allocation cluster */ in btrfs_remove_block_group()
923 cluster = &fs_info->data_alloc_cluster; in btrfs_remove_block_group()
924 spin_lock(&cluster->refill_lock); in btrfs_remove_block_group()
926 spin_unlock(&cluster->refill_lock); in btrfs_remove_block_group()
929 * make sure this block group isn't part of a metadata in btrfs_remove_block_group()
932 cluster = &fs_info->meta_alloc_cluster; in btrfs_remove_block_group()
933 spin_lock(&cluster->refill_lock); in btrfs_remove_block_group()
935 spin_unlock(&cluster->refill_lock); in btrfs_remove_block_group()
939 ret = -ENOMEM; in btrfs_remove_block_group()
949 mutex_lock(&trans->transaction->cache_write_mutex); in btrfs_remove_block_group()
951 * Make sure our free space cache IO is done before removing the in btrfs_remove_block_group()
954 spin_lock(&trans->transaction->dirty_bgs_lock); in btrfs_remove_block_group()
955 if (!list_empty(&block_group->io_list)) { in btrfs_remove_block_group()
956 list_del_init(&block_group->io_list); in btrfs_remove_block_group()
958 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode); in btrfs_remove_block_group()
960 spin_unlock(&trans->transaction->dirty_bgs_lock); in btrfs_remove_block_group()
963 spin_lock(&trans->transaction->dirty_bgs_lock); in btrfs_remove_block_group()
966 if (!list_empty(&block_group->dirty_list)) { in btrfs_remove_block_group()
967 list_del_init(&block_group->dirty_list); in btrfs_remove_block_group()
971 spin_unlock(&trans->transaction->dirty_bgs_lock); in btrfs_remove_block_group()
972 mutex_unlock(&trans->transaction->cache_write_mutex); in btrfs_remove_block_group()
981 /* One for the block groups ref */ in btrfs_remove_block_group()
982 spin_lock(&block_group->lock); in btrfs_remove_block_group()
983 if (block_group->iref) { in btrfs_remove_block_group()
984 block_group->iref = 0; in btrfs_remove_block_group()
985 block_group->inode = NULL; in btrfs_remove_block_group()
986 spin_unlock(&block_group->lock); in btrfs_remove_block_group()
989 spin_unlock(&block_group->lock); in btrfs_remove_block_group()
997 key.offset = block_group->start; in btrfs_remove_block_group()
999 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); in btrfs_remove_block_group()
1011 spin_lock(&fs_info->block_group_cache_lock); in btrfs_remove_block_group()
1012 rb_erase(&block_group->cache_node, in btrfs_remove_block_group()
1013 &fs_info->block_group_cache_tree); in btrfs_remove_block_group()
1014 RB_CLEAR_NODE(&block_group->cache_node); in btrfs_remove_block_group()
1016 /* Once for the block groups rbtree */ in btrfs_remove_block_group()
1019 if (fs_info->first_logical_byte == block_group->start) in btrfs_remove_block_group()
1020 fs_info->first_logical_byte = (u64)-1; in btrfs_remove_block_group()
1021 spin_unlock(&fs_info->block_group_cache_lock); in btrfs_remove_block_group()
1023 down_write(&block_group->space_info->groups_sem); in btrfs_remove_block_group()
1028 list_del_init(&block_group->list); in btrfs_remove_block_group()
1029 if (list_empty(&block_group->space_info->block_groups[index])) { in btrfs_remove_block_group()
1030 kobj = block_group->space_info->block_group_kobjs[index]; in btrfs_remove_block_group()
1031 block_group->space_info->block_group_kobjs[index] = NULL; in btrfs_remove_block_group()
1032 clear_avail_alloc_bits(fs_info, block_group->flags); in btrfs_remove_block_group()
1034 up_write(&block_group->space_info->groups_sem); in btrfs_remove_block_group()
1035 clear_incompat_bg_bits(fs_info, block_group->flags); in btrfs_remove_block_group()
1041 if (block_group->has_caching_ctl) in btrfs_remove_block_group()
1043 if (block_group->cached == BTRFS_CACHE_STARTED) in btrfs_remove_block_group()
1045 if (block_group->has_caching_ctl) { in btrfs_remove_block_group()
1046 down_write(&fs_info->commit_root_sem); in btrfs_remove_block_group()
1051 &fs_info->caching_block_groups, list) in btrfs_remove_block_group()
1052 if (ctl->block_group == block_group) { in btrfs_remove_block_group()
1054 refcount_inc(&caching_ctl->count); in btrfs_remove_block_group()
1059 list_del_init(&caching_ctl->list); in btrfs_remove_block_group()
1060 up_write(&fs_info->commit_root_sem); in btrfs_remove_block_group()
1068 spin_lock(&trans->transaction->dirty_bgs_lock); in btrfs_remove_block_group()
1069 WARN_ON(!list_empty(&block_group->dirty_list)); in btrfs_remove_block_group()
1070 WARN_ON(!list_empty(&block_group->io_list)); in btrfs_remove_block_group()
1071 spin_unlock(&trans->transaction->dirty_bgs_lock); in btrfs_remove_block_group()
1075 spin_lock(&block_group->space_info->lock); in btrfs_remove_block_group()
1076 list_del_init(&block_group->ro_list); in btrfs_remove_block_group()
1079 WARN_ON(block_group->space_info->total_bytes in btrfs_remove_block_group()
1080 < block_group->length); in btrfs_remove_block_group()
1081 WARN_ON(block_group->space_info->bytes_readonly in btrfs_remove_block_group()
1082 < block_group->length); in btrfs_remove_block_group()
1083 WARN_ON(block_group->space_info->disk_total in btrfs_remove_block_group()
1084 < block_group->length * factor); in btrfs_remove_block_group()
1086 block_group->space_info->total_bytes -= block_group->length; in btrfs_remove_block_group()
1087 block_group->space_info->bytes_readonly -= block_group->length; in btrfs_remove_block_group()
1088 block_group->space_info->disk_total -= block_group->length * factor; in btrfs_remove_block_group()
1090 spin_unlock(&block_group->space_info->lock); in btrfs_remove_block_group()
1093 * Remove the free space for the block group from the free space tree in btrfs_remove_block_group()
1094 * and the block group's item from the extent tree before marking the in btrfs_remove_block_group()
1095 * block group as removed. This is to prevent races with tasks that in btrfs_remove_block_group()
1096 * freeze and unfreeze a block group, this task and another task in btrfs_remove_block_group()
1097 * allocating a new block group - the unfreeze task ends up removing in btrfs_remove_block_group()
1098 * the block group's extent map before the task calling this function in btrfs_remove_block_group()
1099 * deletes the block group item from the extent tree, allowing for in btrfs_remove_block_group()
1100 * another task to attempt to create another block group with the same in btrfs_remove_block_group()
1101 * item key (and failing with -EEXIST and a transaction abort). in btrfs_remove_block_group()
1111 spin_lock(&block_group->lock); in btrfs_remove_block_group()
1112 block_group->removed = 1; in btrfs_remove_block_group()
1114 * At this point trimming or scrub can't start on this block group, in btrfs_remove_block_group()
1115 * because we removed the block group from the rbtree in btrfs_remove_block_group()
1116 * fs_info->block_group_cache_tree so no one can't find it anymore and in btrfs_remove_block_group()
1117 * even if someone already got this block group before we removed it in btrfs_remove_block_group()
1118 * from the rbtree, they have already incremented block_group->frozen - in btrfs_remove_block_group()
1123 * And we must not remove the extent map from the fs_info->mapping_tree in btrfs_remove_block_group()
1125 * ranges from being reused for a new block group. This is needed to in btrfs_remove_block_group()
1131 * allowing for new block groups to be created that can reuse the same in btrfs_remove_block_group()
1135 * is mounted with -odiscard. The same protections must remain in btrfs_remove_block_group()
1139 remove_em = (atomic_read(&block_group->frozen) == 0); in btrfs_remove_block_group()
1140 spin_unlock(&block_group->lock); in btrfs_remove_block_group()
1145 em_tree = &fs_info->mapping_tree; in btrfs_remove_block_group()
1146 write_lock(&em_tree->lock); in btrfs_remove_block_group()
1148 write_unlock(&em_tree->lock); in btrfs_remove_block_group()
1165 struct extent_map_tree *em_tree = &fs_info->mapping_tree; in btrfs_start_trans_remove_block_group()
1170 read_lock(&em_tree->lock); in btrfs_start_trans_remove_block_group()
1172 read_unlock(&em_tree->lock); in btrfs_start_trans_remove_block_group()
1173 ASSERT(em && em->start == chunk_offset); in btrfs_start_trans_remove_block_group()
1177 * to remove a block group (done at btrfs_remove_chunk() and at in btrfs_start_trans_remove_block_group()
1182 * 1 unit for deleting the block group item (located in the extent in btrfs_start_trans_remove_block_group()
1189 * In order to remove a block group we also need to reserve units in the in btrfs_start_trans_remove_block_group()
1194 map = em->map_lookup; in btrfs_start_trans_remove_block_group()
1195 num_items = 3 + map->num_stripes; in btrfs_start_trans_remove_block_group()
1198 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, in btrfs_start_trans_remove_block_group()
1203 * Mark block group @cache read-only, so later write won't happen to block
1204 * group @cache.
1206 * If @force is not set, this function will only mark the block group readonly
1207 * if we have enough free space (1M) in other metadata/system block groups.
1208 * If @force is not set, this function will mark the block group readonly
1211 * NOTE: This function doesn't care if other block groups can contain all the
1212 * data in this block group. That check should be done by relocation routine,
1215 static int inc_block_group_ro(struct btrfs_block_group *cache, int force) in inc_block_group_ro() argument
1217 struct btrfs_space_info *sinfo = cache->space_info; in inc_block_group_ro()
1219 int ret = -ENOSPC; in inc_block_group_ro()
1221 spin_lock(&sinfo->lock); in inc_block_group_ro()
1222 spin_lock(&cache->lock); in inc_block_group_ro()
1224 if (cache->ro) { in inc_block_group_ro()
1225 cache->ro++; in inc_block_group_ro()
1230 num_bytes = cache->length - cache->reserved - cache->pinned - in inc_block_group_ro()
1231 cache->bytes_super - cache->used; in inc_block_group_ro()
1239 } else if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) { in inc_block_group_ro()
1246 if (sinfo_used + num_bytes <= sinfo->total_bytes) in inc_block_group_ro()
1253 * leeway to allow us to mark this block group as read only. in inc_block_group_ro()
1255 if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes, in inc_block_group_ro()
1261 sinfo->bytes_readonly += num_bytes; in inc_block_group_ro()
1262 cache->ro++; in inc_block_group_ro()
1263 list_add_tail(&cache->ro_list, &sinfo->ro_bgs); in inc_block_group_ro()
1266 spin_unlock(&cache->lock); in inc_block_group_ro()
1267 spin_unlock(&sinfo->lock); in inc_block_group_ro()
1268 if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) { in inc_block_group_ro()
1269 btrfs_info(cache->fs_info, in inc_block_group_ro()
1270 "unable to make block group %llu ro", cache->start); in inc_block_group_ro()
1271 btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0); in inc_block_group_ro()
1279 struct btrfs_fs_info *fs_info = bg->fs_info; in clean_pinned_extents()
1281 const u64 start = bg->start; in clean_pinned_extents()
1282 const u64 end = start + bg->length - 1; in clean_pinned_extents()
1285 spin_lock(&fs_info->trans_lock); in clean_pinned_extents()
1286 if (trans->transaction->list.prev != &fs_info->trans_list) { in clean_pinned_extents()
1287 prev_trans = list_last_entry(&trans->transaction->list, in clean_pinned_extents()
1289 refcount_inc(&prev_trans->use_count); in clean_pinned_extents()
1291 spin_unlock(&fs_info->trans_lock); in clean_pinned_extents()
1297 * transaction N - 1, and have seen a range belonging to the block in clean_pinned_extents()
1298 * group in pinned_extents before we were able to clear the whole block in clean_pinned_extents()
1300 * the block group after we unpinned it from pinned_extents and removed in clean_pinned_extents()
1303 mutex_lock(&fs_info->unused_bg_unpin_mutex); in clean_pinned_extents()
1305 ret = clear_extent_bits(&prev_trans->pinned_extents, start, end, in clean_pinned_extents()
1311 ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end, in clean_pinned_extents()
1314 mutex_unlock(&fs_info->unused_bg_unpin_mutex); in clean_pinned_extents()
1333 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) in btrfs_delete_unused_bgs()
1336 spin_lock(&fs_info->unused_bgs_lock); in btrfs_delete_unused_bgs()
1337 while (!list_empty(&fs_info->unused_bgs)) { in btrfs_delete_unused_bgs()
1340 block_group = list_first_entry(&fs_info->unused_bgs, in btrfs_delete_unused_bgs()
1343 list_del_init(&block_group->bg_list); in btrfs_delete_unused_bgs()
1345 space_info = block_group->space_info; in btrfs_delete_unused_bgs()
1351 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_delete_unused_bgs()
1353 btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group); in btrfs_delete_unused_bgs()
1355 mutex_lock(&fs_info->delete_unused_bgs_mutex); in btrfs_delete_unused_bgs()
1358 down_write(&space_info->groups_sem); in btrfs_delete_unused_bgs()
1361 * Async discard moves the final block group discard to be prior in btrfs_delete_unused_bgs()
1368 up_write(&space_info->groups_sem); in btrfs_delete_unused_bgs()
1370 btrfs_discard_queue_work(&fs_info->discard_ctl, in btrfs_delete_unused_bgs()
1375 spin_lock(&block_group->lock); in btrfs_delete_unused_bgs()
1376 if (block_group->reserved || block_group->pinned || in btrfs_delete_unused_bgs()
1377 block_group->used || block_group->ro || in btrfs_delete_unused_bgs()
1378 list_is_singular(&block_group->list)) { in btrfs_delete_unused_bgs()
1381 * outstanding allocations in this block group. We do in btrfs_delete_unused_bgs()
1383 * this block group. in btrfs_delete_unused_bgs()
1386 spin_unlock(&block_group->lock); in btrfs_delete_unused_bgs()
1387 up_write(&space_info->groups_sem); in btrfs_delete_unused_bgs()
1390 spin_unlock(&block_group->lock); in btrfs_delete_unused_bgs()
1394 up_write(&space_info->groups_sem); in btrfs_delete_unused_bgs()
1405 block_group->start); in btrfs_delete_unused_bgs()
1413 * We could have pending pinned extents for this block group, in btrfs_delete_unused_bgs()
1428 spin_lock(&fs_info->discard_ctl.lock); in btrfs_delete_unused_bgs()
1429 if (!list_empty(&block_group->discard_list)) { in btrfs_delete_unused_bgs()
1430 spin_unlock(&fs_info->discard_ctl.lock); in btrfs_delete_unused_bgs()
1432 btrfs_discard_queue_work(&fs_info->discard_ctl, in btrfs_delete_unused_bgs()
1436 spin_unlock(&fs_info->discard_ctl.lock); in btrfs_delete_unused_bgs()
1439 spin_lock(&space_info->lock); in btrfs_delete_unused_bgs()
1440 spin_lock(&block_group->lock); in btrfs_delete_unused_bgs()
1443 -block_group->pinned); in btrfs_delete_unused_bgs()
1444 space_info->bytes_readonly += block_group->pinned; in btrfs_delete_unused_bgs()
1445 percpu_counter_add_batch(&space_info->total_bytes_pinned, in btrfs_delete_unused_bgs()
1446 -block_group->pinned, in btrfs_delete_unused_bgs()
1448 block_group->pinned = 0; in btrfs_delete_unused_bgs()
1450 spin_unlock(&block_group->lock); in btrfs_delete_unused_bgs()
1451 spin_unlock(&space_info->lock); in btrfs_delete_unused_bgs()
1454 * The normal path here is an unused block group is passed here, in btrfs_delete_unused_bgs()
1457 * before coming down the unused block group path as trimming in btrfs_delete_unused_bgs()
1474 ret = btrfs_remove_chunk(trans, block_group->start); in btrfs_delete_unused_bgs()
1483 * If we're not mounted with -odiscard, we can just forget in btrfs_delete_unused_bgs()
1484 * about this block group. Otherwise we'll need to wait in btrfs_delete_unused_bgs()
1488 spin_lock(&fs_info->unused_bgs_lock); in btrfs_delete_unused_bgs()
1491 * fs_info->unused_bgs, so use a list_move operation in btrfs_delete_unused_bgs()
1492 * to add the block group to the deleted_bgs list. in btrfs_delete_unused_bgs()
1494 list_move(&block_group->bg_list, in btrfs_delete_unused_bgs()
1495 &trans->transaction->deleted_bgs); in btrfs_delete_unused_bgs()
1496 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_delete_unused_bgs()
1502 mutex_unlock(&fs_info->delete_unused_bgs_mutex); in btrfs_delete_unused_bgs()
1504 spin_lock(&fs_info->unused_bgs_lock); in btrfs_delete_unused_bgs()
1506 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_delete_unused_bgs()
1511 mutex_unlock(&fs_info->delete_unused_bgs_mutex); in btrfs_delete_unused_bgs()
1518 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_mark_bg_unused()
1520 spin_lock(&fs_info->unused_bgs_lock); in btrfs_mark_bg_unused()
1521 if (list_empty(&bg->bg_list)) { in btrfs_mark_bg_unused()
1524 list_add_tail(&bg->bg_list, &fs_info->unused_bgs); in btrfs_mark_bg_unused()
1526 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_mark_bg_unused()
1540 slot = path->slots[0]; in read_bg_from_eb()
1541 leaf = path->nodes[0]; in read_bg_from_eb()
1543 em_tree = &fs_info->mapping_tree; in read_bg_from_eb()
1544 read_lock(&em_tree->lock); in read_bg_from_eb()
1545 em = lookup_extent_mapping(em_tree, key->objectid, key->offset); in read_bg_from_eb()
1546 read_unlock(&em_tree->lock); in read_bg_from_eb()
1550 key->objectid, key->offset); in read_bg_from_eb()
1551 return -ENOENT; in read_bg_from_eb()
1554 if (em->start != key->objectid || em->len != key->offset) { in read_bg_from_eb()
1556 "block group %llu len %llu mismatch with chunk %llu len %llu", in read_bg_from_eb()
1557 key->objectid, key->offset, em->start, em->len); in read_bg_from_eb()
1558 ret = -EUCLEAN; in read_bg_from_eb()
1567 if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { in read_bg_from_eb()
1569 "block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", in read_bg_from_eb()
1570 key->objectid, key->offset, flags, in read_bg_from_eb()
1571 (BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type)); in read_bg_from_eb()
1572 ret = -EUCLEAN; in read_bg_from_eb()
1584 struct btrfs_root *root = fs_info->extent_root; in find_first_block_group()
1595 slot = path->slots[0]; in find_first_block_group()
1596 leaf = path->nodes[0]; in find_first_block_group()
1607 if (found_key.objectid >= key->objectid && in find_first_block_group()
1613 path->slots[0]++; in find_first_block_group()
1624 write_seqlock(&fs_info->profiles_lock); in set_avail_alloc_bits()
1626 fs_info->avail_data_alloc_bits |= extra_flags; in set_avail_alloc_bits()
1628 fs_info->avail_metadata_alloc_bits |= extra_flags; in set_avail_alloc_bits()
1630 fs_info->avail_system_alloc_bits |= extra_flags; in set_avail_alloc_bits()
1631 write_sequnlock(&fs_info->profiles_lock); in set_avail_alloc_bits()
1635 * btrfs_rmap_block - Map a physical disk address to a list of logical addresses
1636 * @chunk_start: logical address of block group
1640 * @stripe_len: size of IO stripe for the given block group
1643 * Used primarily to exclude those portions of a block group that contain super
1644 * block copies.
1661 return -EIO; in btrfs_rmap_block()
1663 map = em->map_lookup; in btrfs_rmap_block()
1664 data_stripe_length = em->orig_block_len; in btrfs_rmap_block()
1665 io_stripe_size = map->stripe_len; in btrfs_rmap_block()
1668 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) in btrfs_rmap_block()
1669 io_stripe_size = map->stripe_len * nr_data_stripes(map); in btrfs_rmap_block()
1671 buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS); in btrfs_rmap_block()
1673 ret = -ENOMEM; in btrfs_rmap_block()
1677 for (i = 0; i < map->num_stripes; i++) { in btrfs_rmap_block()
1682 if (!in_range(physical, map->stripes[i].physical, in btrfs_rmap_block()
1686 stripe_nr = physical - map->stripes[i].physical; in btrfs_rmap_block()
1687 stripe_nr = div64_u64(stripe_nr, map->stripe_len); in btrfs_rmap_block()
1689 if (map->type & BTRFS_BLOCK_GROUP_RAID10) { in btrfs_rmap_block()
1690 stripe_nr = stripe_nr * map->num_stripes + i; in btrfs_rmap_block()
1691 stripe_nr = div_u64(stripe_nr, map->sub_stripes); in btrfs_rmap_block()
1692 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { in btrfs_rmap_block()
1693 stripe_nr = stripe_nr * map->num_stripes + i; in btrfs_rmap_block()
1698 * instead of map->stripe_len in btrfs_rmap_block()
1723 static int exclude_super_stripes(struct btrfs_block_group *cache) in exclude_super_stripes() argument
1725 struct btrfs_fs_info *fs_info = cache->fs_info; in exclude_super_stripes()
1731 if (cache->start < BTRFS_SUPER_INFO_OFFSET) { in exclude_super_stripes()
1732 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start; in exclude_super_stripes()
1733 cache->bytes_super += stripe_len; in exclude_super_stripes()
1734 ret = btrfs_add_excluded_extent(fs_info, cache->start, in exclude_super_stripes()
1742 ret = btrfs_rmap_block(fs_info, cache->start, in exclude_super_stripes()
1747 while (nr--) { in exclude_super_stripes()
1749 cache->start + cache->length - logical[nr]); in exclude_super_stripes()
1751 cache->bytes_super += len; in exclude_super_stripes()
1765 static void link_block_group(struct btrfs_block_group *cache) in link_block_group() argument
1767 struct btrfs_space_info *space_info = cache->space_info; in link_block_group()
1768 int index = btrfs_bg_flags_to_raid_index(cache->flags); in link_block_group()
1770 down_write(&space_info->groups_sem); in link_block_group()
1771 list_add_tail(&cache->list, &space_info->block_groups[index]); in link_block_group()
1772 up_write(&space_info->groups_sem); in link_block_group()
1778 struct btrfs_block_group *cache; in btrfs_create_block_group_cache() local
1780 cache = kzalloc(sizeof(*cache), GFP_NOFS); in btrfs_create_block_group_cache()
1781 if (!cache) in btrfs_create_block_group_cache()
1784 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), in btrfs_create_block_group_cache()
1786 if (!cache->free_space_ctl) { in btrfs_create_block_group_cache()
1787 kfree(cache); in btrfs_create_block_group_cache()
1791 cache->start = start; in btrfs_create_block_group_cache()
1793 cache->fs_info = fs_info; in btrfs_create_block_group_cache()
1794 cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start); in btrfs_create_block_group_cache()
1796 cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED; in btrfs_create_block_group_cache()
1798 refcount_set(&cache->refs, 1); in btrfs_create_block_group_cache()
1799 spin_lock_init(&cache->lock); in btrfs_create_block_group_cache()
1800 init_rwsem(&cache->data_rwsem); in btrfs_create_block_group_cache()
1801 INIT_LIST_HEAD(&cache->list); in btrfs_create_block_group_cache()
1802 INIT_LIST_HEAD(&cache->cluster_list); in btrfs_create_block_group_cache()
1803 INIT_LIST_HEAD(&cache->bg_list); in btrfs_create_block_group_cache()
1804 INIT_LIST_HEAD(&cache->ro_list); in btrfs_create_block_group_cache()
1805 INIT_LIST_HEAD(&cache->discard_list); in btrfs_create_block_group_cache()
1806 INIT_LIST_HEAD(&cache->dirty_list); in btrfs_create_block_group_cache()
1807 INIT_LIST_HEAD(&cache->io_list); in btrfs_create_block_group_cache()
1808 btrfs_init_free_space_ctl(cache); in btrfs_create_block_group_cache()
1809 atomic_set(&cache->frozen, 0); in btrfs_create_block_group_cache()
1810 mutex_init(&cache->free_space_lock); in btrfs_create_block_group_cache()
1811 btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root); in btrfs_create_block_group_cache()
1813 return cache; in btrfs_create_block_group_cache()
1817 * Iterate all chunks and verify that each of them has the corresponding block
1822 struct extent_map_tree *map_tree = &fs_info->mapping_tree; in check_chunk_block_group_mappings()
1829 read_lock(&map_tree->lock); in check_chunk_block_group_mappings()
1836 read_unlock(&map_tree->lock); in check_chunk_block_group_mappings()
1840 bg = btrfs_lookup_block_group(fs_info, em->start); in check_chunk_block_group_mappings()
1843 "chunk start=%llu len=%llu doesn't have corresponding block group", in check_chunk_block_group_mappings()
1844 em->start, em->len); in check_chunk_block_group_mappings()
1845 ret = -EUCLEAN; in check_chunk_block_group_mappings()
1849 if (bg->start != em->start || bg->length != em->len || in check_chunk_block_group_mappings()
1850 (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != in check_chunk_block_group_mappings()
1851 (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { in check_chunk_block_group_mappings()
1853 "chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", in check_chunk_block_group_mappings()
1854 em->start, em->len, in check_chunk_block_group_mappings()
1855 em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, in check_chunk_block_group_mappings()
1856 bg->start, bg->length, in check_chunk_block_group_mappings()
1857 bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); in check_chunk_block_group_mappings()
1858 ret = -EUCLEAN; in check_chunk_block_group_mappings()
1863 start = em->start + em->len; in check_chunk_block_group_mappings()
1870 static void read_block_group_item(struct btrfs_block_group *cache, in read_block_group_item() argument
1874 struct extent_buffer *leaf = path->nodes[0]; in read_block_group_item()
1876 int slot = path->slots[0]; in read_block_group_item()
1878 cache->length = key->offset; in read_block_group_item()
1882 cache->used = btrfs_stack_block_group_used(&bgi); in read_block_group_item()
1883 cache->flags = btrfs_stack_block_group_flags(&bgi); in read_block_group_item()
1891 struct btrfs_block_group *cache; in read_one_block_group() local
1896 ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY); in read_one_block_group()
1898 cache = btrfs_create_block_group_cache(info, key->objectid); in read_one_block_group()
1899 if (!cache) in read_one_block_group()
1900 return -ENOMEM; in read_one_block_group()
1902 read_block_group_item(cache, path, key); in read_one_block_group()
1904 set_free_space_tree_thresholds(cache); in read_one_block_group()
1908 * When we mount with old space cache, we need to in read_one_block_group()
1912 * truncate the old free space cache inode and in read_one_block_group()
1915 * the new space cache info onto disk. in read_one_block_group()
1918 cache->disk_cache_state = BTRFS_DC_CLEAR; in read_one_block_group()
1920 if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) && in read_one_block_group()
1921 (cache->flags & BTRFS_BLOCK_GROUP_DATA))) { in read_one_block_group()
1923 "bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", in read_one_block_group()
1924 cache->start); in read_one_block_group()
1925 ret = -EINVAL; in read_one_block_group()
1934 ret = exclude_super_stripes(cache); in read_one_block_group()
1937 btrfs_free_excluded_extents(cache); in read_one_block_group()
1947 if (cache->length == cache->used) { in read_one_block_group()
1948 cache->last_byte_to_unpin = (u64)-1; in read_one_block_group()
1949 cache->cached = BTRFS_CACHE_FINISHED; in read_one_block_group()
1950 btrfs_free_excluded_extents(cache); in read_one_block_group()
1951 } else if (cache->used == 0) { in read_one_block_group()
1952 cache->last_byte_to_unpin = (u64)-1; in read_one_block_group()
1953 cache->cached = BTRFS_CACHE_FINISHED; in read_one_block_group()
1954 add_new_free_space(cache, cache->start, in read_one_block_group()
1955 cache->start + cache->length); in read_one_block_group()
1956 btrfs_free_excluded_extents(cache); in read_one_block_group()
1959 ret = btrfs_add_block_group_cache(info, cache); in read_one_block_group()
1961 btrfs_remove_free_space_cache(cache); in read_one_block_group()
1964 trace_btrfs_add_block_group(info, cache, 0); in read_one_block_group()
1965 btrfs_update_space_info(info, cache->flags, cache->length, in read_one_block_group()
1966 cache->used, cache->bytes_super, &space_info); in read_one_block_group()
1968 cache->space_info = space_info; in read_one_block_group()
1970 link_block_group(cache); in read_one_block_group()
1972 set_avail_alloc_bits(info, cache->flags); in read_one_block_group()
1973 if (btrfs_chunk_readonly(info, cache->start)) { in read_one_block_group()
1974 inc_block_group_ro(cache, 1); in read_one_block_group()
1975 } else if (cache->used == 0) { in read_one_block_group()
1976 ASSERT(list_empty(&cache->bg_list)); in read_one_block_group()
1978 btrfs_discard_queue_work(&info->discard_ctl, cache); in read_one_block_group()
1980 btrfs_mark_bg_unused(cache); in read_one_block_group()
1984 btrfs_put_block_group(cache); in read_one_block_group()
1992 struct btrfs_block_group *cache; in btrfs_read_block_groups() local
2003 return -ENOMEM; in btrfs_read_block_groups()
2005 cache_gen = btrfs_super_cache_generation(info->super_copy); in btrfs_read_block_groups()
2007 btrfs_super_generation(info->super_copy) != cache_gen) in btrfs_read_block_groups()
2019 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); in btrfs_read_block_groups()
2029 list_for_each_entry(space_info, &info->space_info, list) { in btrfs_read_block_groups()
2033 if (list_empty(&space_info->block_groups[i])) in btrfs_read_block_groups()
2035 cache = list_first_entry(&space_info->block_groups[i], in btrfs_read_block_groups()
2038 btrfs_sysfs_add_block_group_type(cache); in btrfs_read_block_groups()
2041 if (!(btrfs_get_alloc_profile(info, space_info->flags) & in btrfs_read_block_groups()
2048 * Avoid allocating from un-mirrored block group if there are in btrfs_read_block_groups()
2049 * mirrored block groups. in btrfs_read_block_groups()
2051 list_for_each_entry(cache, in btrfs_read_block_groups()
2052 &space_info->block_groups[BTRFS_RAID_RAID0], in btrfs_read_block_groups()
2054 inc_block_group_ro(cache, 1); in btrfs_read_block_groups()
2055 list_for_each_entry(cache, in btrfs_read_block_groups()
2056 &space_info->block_groups[BTRFS_RAID_SINGLE], in btrfs_read_block_groups()
2058 inc_block_group_ro(cache, 1); in btrfs_read_block_groups()
2071 struct btrfs_fs_info *fs_info = trans->fs_info; in insert_block_group_item()
2076 spin_lock(&block_group->lock); in insert_block_group_item()
2077 btrfs_set_stack_block_group_used(&bgi, block_group->used); in insert_block_group_item()
2080 btrfs_set_stack_block_group_flags(&bgi, block_group->flags); in insert_block_group_item()
2081 key.objectid = block_group->start; in insert_block_group_item()
2083 key.offset = block_group->length; in insert_block_group_item()
2084 spin_unlock(&block_group->lock); in insert_block_group_item()
2086 root = fs_info->extent_root; in insert_block_group_item()
2092 struct btrfs_fs_info *fs_info = trans->fs_info; in btrfs_create_pending_block_groups()
2096 if (!trans->can_flush_pending_bgs) in btrfs_create_pending_block_groups()
2099 while (!list_empty(&trans->new_bgs)) { in btrfs_create_pending_block_groups()
2102 block_group = list_first_entry(&trans->new_bgs, in btrfs_create_pending_block_groups()
2108 index = btrfs_bg_flags_to_raid_index(block_group->flags); in btrfs_create_pending_block_groups()
2113 ret = btrfs_finish_chunk_alloc(trans, block_group->start, in btrfs_create_pending_block_groups()
2114 block_group->length); in btrfs_create_pending_block_groups()
2125 if (block_group->space_info->block_group_kobjs[index] == NULL) in btrfs_create_pending_block_groups()
2131 list_del_init(&block_group->bg_list); in btrfs_create_pending_block_groups()
2137 u64 type, u64 chunk_offset, u64 size) in btrfs_make_block_group() argument
2139 struct btrfs_fs_info *fs_info = trans->fs_info; in btrfs_make_block_group()
2140 struct btrfs_block_group *cache; in btrfs_make_block_group() local
2145 cache = btrfs_create_block_group_cache(fs_info, chunk_offset); in btrfs_make_block_group()
2146 if (!cache) in btrfs_make_block_group()
2147 return -ENOMEM; in btrfs_make_block_group()
2149 cache->length = size; in btrfs_make_block_group()
2150 set_free_space_tree_thresholds(cache); in btrfs_make_block_group()
2151 cache->used = bytes_used; in btrfs_make_block_group()
2152 cache->flags = type; in btrfs_make_block_group()
2153 cache->last_byte_to_unpin = (u64)-1; in btrfs_make_block_group()
2154 cache->cached = BTRFS_CACHE_FINISHED; in btrfs_make_block_group()
2155 cache->needs_free_space = 1; in btrfs_make_block_group()
2156 ret = exclude_super_stripes(cache); in btrfs_make_block_group()
2159 btrfs_free_excluded_extents(cache); in btrfs_make_block_group()
2160 btrfs_put_block_group(cache); in btrfs_make_block_group()
2164 add_new_free_space(cache, chunk_offset, chunk_offset + size); in btrfs_make_block_group()
2166 btrfs_free_excluded_extents(cache); in btrfs_make_block_group()
2169 if (btrfs_should_fragment_free_space(cache)) { in btrfs_make_block_group()
2170 u64 new_bytes_used = size - bytes_used; in btrfs_make_block_group()
2173 fragment_free_space(cache); in btrfs_make_block_group()
2178 * assigned to our block group. We want our bg to be added to the rbtree in btrfs_make_block_group()
2179 * with its ->space_info set. in btrfs_make_block_group()
2181 cache->space_info = btrfs_find_space_info(fs_info, cache->flags); in btrfs_make_block_group()
2182 ASSERT(cache->space_info); in btrfs_make_block_group()
2184 ret = btrfs_add_block_group_cache(fs_info, cache); in btrfs_make_block_group()
2186 btrfs_remove_free_space_cache(cache); in btrfs_make_block_group()
2187 btrfs_put_block_group(cache); in btrfs_make_block_group()
2192 * Now that our block group has its ->space_info set and is inserted in in btrfs_make_block_group()
2195 trace_btrfs_add_block_group(fs_info, cache, 1); in btrfs_make_block_group()
2196 btrfs_update_space_info(fs_info, cache->flags, size, bytes_used, in btrfs_make_block_group()
2197 cache->bytes_super, &cache->space_info); in btrfs_make_block_group()
2200 link_block_group(cache); in btrfs_make_block_group()
2202 list_add_tail(&cache->bg_list, &trans->new_bgs); in btrfs_make_block_group()
2203 trans->delayed_ref_updates++; in btrfs_make_block_group()
2211 * Mark one block group RO, can be called several times for the same block
2214 * @cache: the destination block group
2215 * @do_chunk_alloc: whether need to do chunk pre-allocation, this is to
2217 * block group RO.
2219 int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, in btrfs_inc_block_group_ro() argument
2222 struct btrfs_fs_info *fs_info = cache->fs_info; in btrfs_inc_block_group_ro()
2228 trans = btrfs_join_transaction(fs_info->extent_root); in btrfs_inc_block_group_ro()
2233 * we're not allowed to set block groups readonly after the dirty in btrfs_inc_block_group_ro()
2234 * block groups cache has started writing. If it already started, in btrfs_inc_block_group_ro()
2237 mutex_lock(&fs_info->ro_block_group_mutex); in btrfs_inc_block_group_ro()
2238 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) { in btrfs_inc_block_group_ro()
2239 u64 transid = trans->transid; in btrfs_inc_block_group_ro()
2241 mutex_unlock(&fs_info->ro_block_group_mutex); in btrfs_inc_block_group_ro()
2253 * corresponding block group with the new raid level. in btrfs_inc_block_group_ro()
2255 alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); in btrfs_inc_block_group_ro()
2256 if (alloc_flags != cache->flags) { in btrfs_inc_block_group_ro()
2263 if (ret == -ENOSPC) in btrfs_inc_block_group_ro()
2270 ret = inc_block_group_ro(cache, 0); in btrfs_inc_block_group_ro()
2275 alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags); in btrfs_inc_block_group_ro()
2279 ret = inc_block_group_ro(cache, 0); in btrfs_inc_block_group_ro()
2281 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { in btrfs_inc_block_group_ro()
2282 alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); in btrfs_inc_block_group_ro()
2283 mutex_lock(&fs_info->chunk_mutex); in btrfs_inc_block_group_ro()
2285 mutex_unlock(&fs_info->chunk_mutex); in btrfs_inc_block_group_ro()
2288 mutex_unlock(&fs_info->ro_block_group_mutex); in btrfs_inc_block_group_ro()
2294 void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) in btrfs_dec_block_group_ro() argument
2296 struct btrfs_space_info *sinfo = cache->space_info; in btrfs_dec_block_group_ro()
2299 BUG_ON(!cache->ro); in btrfs_dec_block_group_ro()
2301 spin_lock(&sinfo->lock); in btrfs_dec_block_group_ro()
2302 spin_lock(&cache->lock); in btrfs_dec_block_group_ro()
2303 if (!--cache->ro) { in btrfs_dec_block_group_ro()
2304 num_bytes = cache->length - cache->reserved - in btrfs_dec_block_group_ro()
2305 cache->pinned - cache->bytes_super - cache->used; in btrfs_dec_block_group_ro()
2306 sinfo->bytes_readonly -= num_bytes; in btrfs_dec_block_group_ro()
2307 list_del_init(&cache->ro_list); in btrfs_dec_block_group_ro()
2309 spin_unlock(&cache->lock); in btrfs_dec_block_group_ro()
2310 spin_unlock(&sinfo->lock); in btrfs_dec_block_group_ro()
2315 struct btrfs_block_group *cache) in update_block_group_item() argument
2317 struct btrfs_fs_info *fs_info = trans->fs_info; in update_block_group_item()
2319 struct btrfs_root *root = fs_info->extent_root; in update_block_group_item()
2325 key.objectid = cache->start; in update_block_group_item()
2327 key.offset = cache->length; in update_block_group_item()
2332 ret = -ENOENT; in update_block_group_item()
2336 leaf = path->nodes[0]; in update_block_group_item()
2337 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); in update_block_group_item()
2338 btrfs_set_stack_block_group_used(&bgi, cache->used); in update_block_group_item()
2341 btrfs_set_stack_block_group_flags(&bgi, cache->flags); in update_block_group_item()
2354 struct btrfs_fs_info *fs_info = block_group->fs_info; in cache_save_setup()
2355 struct btrfs_root *root = fs_info->tree_root; in cache_save_setup()
2365 * If this block group is smaller than 100 megs don't bother caching the in cache_save_setup()
2366 * block group. in cache_save_setup()
2368 if (block_group->length < (100 * SZ_1M)) { in cache_save_setup()
2369 spin_lock(&block_group->lock); in cache_save_setup()
2370 block_group->disk_cache_state = BTRFS_DC_WRITTEN; in cache_save_setup()
2371 spin_unlock(&block_group->lock); in cache_save_setup()
2379 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { in cache_save_setup()
2389 if (block_group->ro) in cache_save_setup()
2400 * from here on out we know not to trust this cache when we load up next in cache_save_setup()
2403 BTRFS_I(inode)->generation = 0; in cache_save_setup()
2408 * super cache generation to 0 so we know to invalidate the in cache_save_setup()
2409 * cache, but then we'd have to keep track of the block groups in cache_save_setup()
2410 * that fail this way so we know we _have_ to reset this cache in cache_save_setup()
2411 * before the next commit or risk reading stale cache. So to in cache_save_setup()
2422 if (block_group->cache_generation == trans->transid && in cache_save_setup()
2430 &fs_info->global_block_rsv); in cache_save_setup()
2439 spin_lock(&block_group->lock); in cache_save_setup()
2440 if (block_group->cached != BTRFS_CACHE_FINISHED || in cache_save_setup()
2449 spin_unlock(&block_group->lock); in cache_save_setup()
2452 spin_unlock(&block_group->lock); in cache_save_setup()
2455 * We hit an ENOSPC when setting up the cache in this transaction, just in cache_save_setup()
2456 * skip doing the setup, we've already cleared the cache so we're safe. in cache_save_setup()
2458 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) { in cache_save_setup()
2459 ret = -ENOSPC; in cache_save_setup()
2464 * Try to preallocate enough space based on how big the block group is. in cache_save_setup()
2467 * cache. in cache_save_setup()
2469 num_pages = div_u64(block_group->length, SZ_256M); in cache_save_setup()
2485 * Our cache requires contiguous chunks so that we don't modify a bunch in cache_save_setup()
2486 * of metadata or split extents when writing the cache out, which means in cache_save_setup()
2489 * other block groups for this transaction, maybe we'll unpin enough in cache_save_setup()
2494 else if (ret == -ENOSPC) in cache_save_setup()
2495 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); in cache_save_setup()
2502 spin_lock(&block_group->lock); in cache_save_setup()
2504 block_group->cache_generation = trans->transid; in cache_save_setup()
2505 block_group->disk_cache_state = dcs; in cache_save_setup()
2506 spin_unlock(&block_group->lock); in cache_save_setup()
2514 struct btrfs_fs_info *fs_info = trans->fs_info; in btrfs_setup_space_cache()
2515 struct btrfs_block_group *cache, *tmp; in btrfs_setup_space_cache() local
2516 struct btrfs_transaction *cur_trans = trans->transaction; in btrfs_setup_space_cache()
2519 if (list_empty(&cur_trans->dirty_bgs) || in btrfs_setup_space_cache()
2525 return -ENOMEM; in btrfs_setup_space_cache()
2527 /* Could add new block groups, use _safe just in case */ in btrfs_setup_space_cache()
2528 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs, in btrfs_setup_space_cache()
2530 if (cache->disk_cache_state == BTRFS_DC_CLEAR) in btrfs_setup_space_cache()
2531 cache_save_setup(cache, trans, path); in btrfs_setup_space_cache()
2539 * Transaction commit does final block group cache writeback during a critical
2541 * order for the cache to actually match the block group, but can introduce a
2544 * So, btrfs_start_dirty_block_groups is here to kick off block group cache IO.
2545 * There's a chance we'll have to redo some of it if the block group changes
2547 * getting rid of the easy block groups while we're still allowing others to
2552 struct btrfs_fs_info *fs_info = trans->fs_info; in btrfs_start_dirty_block_groups()
2553 struct btrfs_block_group *cache; in btrfs_start_dirty_block_groups() local
2554 struct btrfs_transaction *cur_trans = trans->transaction; in btrfs_start_dirty_block_groups()
2559 struct list_head *io = &cur_trans->io_bgs; in btrfs_start_dirty_block_groups()
2563 spin_lock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2564 if (list_empty(&cur_trans->dirty_bgs)) { in btrfs_start_dirty_block_groups()
2565 spin_unlock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2568 list_splice_init(&cur_trans->dirty_bgs, &dirty); in btrfs_start_dirty_block_groups()
2569 spin_unlock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2572 /* Make sure all the block groups on our dirty list actually exist */ in btrfs_start_dirty_block_groups()
2578 return -ENOMEM; in btrfs_start_dirty_block_groups()
2583 * removal of empty block groups deleting this block group while we are in btrfs_start_dirty_block_groups()
2584 * writing out the cache in btrfs_start_dirty_block_groups()
2586 mutex_lock(&trans->transaction->cache_write_mutex); in btrfs_start_dirty_block_groups()
2590 cache = list_first_entry(&dirty, struct btrfs_block_group, in btrfs_start_dirty_block_groups()
2593 * This can happen if something re-dirties a block group that in btrfs_start_dirty_block_groups()
2597 if (!list_empty(&cache->io_list)) { in btrfs_start_dirty_block_groups()
2598 list_del_init(&cache->io_list); in btrfs_start_dirty_block_groups()
2599 btrfs_wait_cache_io(trans, cache, path); in btrfs_start_dirty_block_groups()
2600 btrfs_put_block_group(cache); in btrfs_start_dirty_block_groups()
2605 * btrfs_wait_cache_io uses the cache->dirty_list to decide if in btrfs_start_dirty_block_groups()
2612 spin_lock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2613 list_del_init(&cache->dirty_list); in btrfs_start_dirty_block_groups()
2614 spin_unlock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2618 cache_save_setup(cache, trans, path); in btrfs_start_dirty_block_groups()
2620 if (cache->disk_cache_state == BTRFS_DC_SETUP) { in btrfs_start_dirty_block_groups()
2621 cache->io_ctl.inode = NULL; in btrfs_start_dirty_block_groups()
2622 ret = btrfs_write_out_cache(trans, cache, path); in btrfs_start_dirty_block_groups()
2623 if (ret == 0 && cache->io_ctl.inode) { in btrfs_start_dirty_block_groups()
2632 list_add_tail(&cache->io_list, io); in btrfs_start_dirty_block_groups()
2635 * If we failed to write the cache, the in btrfs_start_dirty_block_groups()
2642 ret = update_block_group_item(trans, path, cache); in btrfs_start_dirty_block_groups()
2644 * Our block group might still be attached to the list in btrfs_start_dirty_block_groups()
2645 * of new block groups in the transaction handle of some in btrfs_start_dirty_block_groups()
2646 * other task (struct btrfs_trans_handle->new_bgs). This in btrfs_start_dirty_block_groups()
2647 * means its block group item isn't yet in the extent in btrfs_start_dirty_block_groups()
2652 if (ret == -ENOENT) { in btrfs_start_dirty_block_groups()
2654 spin_lock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2655 if (list_empty(&cache->dirty_list)) { in btrfs_start_dirty_block_groups()
2656 list_add_tail(&cache->dirty_list, in btrfs_start_dirty_block_groups()
2657 &cur_trans->dirty_bgs); in btrfs_start_dirty_block_groups()
2658 btrfs_get_block_group(cache); in btrfs_start_dirty_block_groups()
2661 spin_unlock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2667 /* If it's not on the io list, we need to put the block group */ in btrfs_start_dirty_block_groups()
2669 btrfs_put_block_group(cache); in btrfs_start_dirty_block_groups()
2678 * us from writing caches for block groups that are going to be in btrfs_start_dirty_block_groups()
2681 mutex_unlock(&trans->transaction->cache_write_mutex); in btrfs_start_dirty_block_groups()
2682 mutex_lock(&trans->transaction->cache_write_mutex); in btrfs_start_dirty_block_groups()
2684 mutex_unlock(&trans->transaction->cache_write_mutex); in btrfs_start_dirty_block_groups()
2693 spin_lock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2694 list_splice_init(&cur_trans->dirty_bgs, &dirty); in btrfs_start_dirty_block_groups()
2696 * dirty_bgs_lock protects us from concurrent block group in btrfs_start_dirty_block_groups()
2700 spin_unlock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2703 spin_unlock(&cur_trans->dirty_bgs_lock); in btrfs_start_dirty_block_groups()
2714 struct btrfs_fs_info *fs_info = trans->fs_info; in btrfs_write_dirty_block_groups()
2715 struct btrfs_block_group *cache; in btrfs_write_dirty_block_groups() local
2716 struct btrfs_transaction *cur_trans = trans->transaction; in btrfs_write_dirty_block_groups()
2720 struct list_head *io = &cur_trans->io_bgs; in btrfs_write_dirty_block_groups()
2725 return -ENOMEM; in btrfs_write_dirty_block_groups()
2730 * transaction's list of dirty block groups. These tasks correspond to in btrfs_write_dirty_block_groups()
2732 * space cache, which run inode.c:btrfs_finish_ordered_io(), and can in btrfs_write_dirty_block_groups()
2733 * allocate new block groups as a result of COWing nodes of the root in btrfs_write_dirty_block_groups()
2742 spin_lock(&cur_trans->dirty_bgs_lock); in btrfs_write_dirty_block_groups()
2743 while (!list_empty(&cur_trans->dirty_bgs)) { in btrfs_write_dirty_block_groups()
2744 cache = list_first_entry(&cur_trans->dirty_bgs, in btrfs_write_dirty_block_groups()
2749 * This can happen if cache_save_setup re-dirties a block group in btrfs_write_dirty_block_groups()
2753 if (!list_empty(&cache->io_list)) { in btrfs_write_dirty_block_groups()
2754 spin_unlock(&cur_trans->dirty_bgs_lock); in btrfs_write_dirty_block_groups()
2755 list_del_init(&cache->io_list); in btrfs_write_dirty_block_groups()
2756 btrfs_wait_cache_io(trans, cache, path); in btrfs_write_dirty_block_groups()
2757 btrfs_put_block_group(cache); in btrfs_write_dirty_block_groups()
2758 spin_lock(&cur_trans->dirty_bgs_lock); in btrfs_write_dirty_block_groups()
2765 list_del_init(&cache->dirty_list); in btrfs_write_dirty_block_groups()
2766 spin_unlock(&cur_trans->dirty_bgs_lock); in btrfs_write_dirty_block_groups()
2769 cache_save_setup(cache, trans, path); in btrfs_write_dirty_block_groups()
2773 (unsigned long) -1); in btrfs_write_dirty_block_groups()
2775 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) { in btrfs_write_dirty_block_groups()
2776 cache->io_ctl.inode = NULL; in btrfs_write_dirty_block_groups()
2777 ret = btrfs_write_out_cache(trans, cache, path); in btrfs_write_dirty_block_groups()
2778 if (ret == 0 && cache->io_ctl.inode) { in btrfs_write_dirty_block_groups()
2781 list_add_tail(&cache->io_list, io); in btrfs_write_dirty_block_groups()
2784 * If we failed to write the cache, the in btrfs_write_dirty_block_groups()
2791 ret = update_block_group_item(trans, path, cache); in btrfs_write_dirty_block_groups()
2794 * created a new block group while updating a free space in btrfs_write_dirty_block_groups()
2795 * cache's inode (at inode.c:btrfs_finish_ordered_io()) in btrfs_write_dirty_block_groups()
2797 * which case the new block group is still attached to in btrfs_write_dirty_block_groups()
2799 * finished yet (no block group item in the extent tree in btrfs_write_dirty_block_groups()
2805 if (ret == -ENOENT) { in btrfs_write_dirty_block_groups()
2806 wait_event(cur_trans->writer_wait, in btrfs_write_dirty_block_groups()
2807 atomic_read(&cur_trans->num_writers) == 1); in btrfs_write_dirty_block_groups()
2808 ret = update_block_group_item(trans, path, cache); in btrfs_write_dirty_block_groups()
2814 /* If its not on the io list, we need to put the block group */ in btrfs_write_dirty_block_groups()
2816 btrfs_put_block_group(cache); in btrfs_write_dirty_block_groups()
2818 spin_lock(&cur_trans->dirty_bgs_lock); in btrfs_write_dirty_block_groups()
2820 spin_unlock(&cur_trans->dirty_bgs_lock); in btrfs_write_dirty_block_groups()
2827 cache = list_first_entry(io, struct btrfs_block_group, in btrfs_write_dirty_block_groups()
2829 list_del_init(&cache->io_list); in btrfs_write_dirty_block_groups()
2830 btrfs_wait_cache_io(trans, cache, path); in btrfs_write_dirty_block_groups()
2831 btrfs_put_block_group(cache); in btrfs_write_dirty_block_groups()
2841 struct btrfs_fs_info *info = trans->fs_info; in btrfs_update_block_group()
2842 struct btrfs_block_group *cache = NULL; in btrfs_update_block_group() local
2849 /* Block accounting for super block */ in btrfs_update_block_group()
2850 spin_lock(&info->delalloc_root_lock); in btrfs_update_block_group()
2851 old_val = btrfs_super_bytes_used(info->super_copy); in btrfs_update_block_group()
2855 old_val -= num_bytes; in btrfs_update_block_group()
2856 btrfs_set_super_bytes_used(info->super_copy, old_val); in btrfs_update_block_group()
2857 spin_unlock(&info->delalloc_root_lock); in btrfs_update_block_group()
2860 cache = btrfs_lookup_block_group(info, bytenr); in btrfs_update_block_group()
2861 if (!cache) { in btrfs_update_block_group()
2862 ret = -ENOENT; in btrfs_update_block_group()
2865 factor = btrfs_bg_type_to_factor(cache->flags); in btrfs_update_block_group()
2868 * If this block group has free space cache written out, we in btrfs_update_block_group()
2871 * space back to the block group, otherwise we will leak space. in btrfs_update_block_group()
2873 if (!alloc && !btrfs_block_group_done(cache)) in btrfs_update_block_group()
2874 btrfs_cache_block_group(cache, 1); in btrfs_update_block_group()
2876 byte_in_group = bytenr - cache->start; in btrfs_update_block_group()
2877 WARN_ON(byte_in_group > cache->length); in btrfs_update_block_group()
2879 spin_lock(&cache->space_info->lock); in btrfs_update_block_group()
2880 spin_lock(&cache->lock); in btrfs_update_block_group()
2883 cache->disk_cache_state < BTRFS_DC_CLEAR) in btrfs_update_block_group()
2884 cache->disk_cache_state = BTRFS_DC_CLEAR; in btrfs_update_block_group()
2886 old_val = cache->used; in btrfs_update_block_group()
2887 num_bytes = min(total, cache->length - byte_in_group); in btrfs_update_block_group()
2890 cache->used = old_val; in btrfs_update_block_group()
2891 cache->reserved -= num_bytes; in btrfs_update_block_group()
2892 cache->space_info->bytes_reserved -= num_bytes; in btrfs_update_block_group()
2893 cache->space_info->bytes_used += num_bytes; in btrfs_update_block_group()
2894 cache->space_info->disk_used += num_bytes * factor; in btrfs_update_block_group()
2895 spin_unlock(&cache->lock); in btrfs_update_block_group()
2896 spin_unlock(&cache->space_info->lock); in btrfs_update_block_group()
2898 old_val -= num_bytes; in btrfs_update_block_group()
2899 cache->used = old_val; in btrfs_update_block_group()
2900 cache->pinned += num_bytes; in btrfs_update_block_group()
2902 cache->space_info, num_bytes); in btrfs_update_block_group()
2903 cache->space_info->bytes_used -= num_bytes; in btrfs_update_block_group()
2904 cache->space_info->disk_used -= num_bytes * factor; in btrfs_update_block_group()
2905 spin_unlock(&cache->lock); in btrfs_update_block_group()
2906 spin_unlock(&cache->space_info->lock); in btrfs_update_block_group()
2909 &cache->space_info->total_bytes_pinned, in btrfs_update_block_group()
2912 set_extent_dirty(&trans->transaction->pinned_extents, in btrfs_update_block_group()
2913 bytenr, bytenr + num_bytes - 1, in btrfs_update_block_group()
2917 spin_lock(&trans->transaction->dirty_bgs_lock); in btrfs_update_block_group()
2918 if (list_empty(&cache->dirty_list)) { in btrfs_update_block_group()
2919 list_add_tail(&cache->dirty_list, in btrfs_update_block_group()
2920 &trans->transaction->dirty_bgs); in btrfs_update_block_group()
2921 trans->delayed_ref_updates++; in btrfs_update_block_group()
2922 btrfs_get_block_group(cache); in btrfs_update_block_group()
2924 spin_unlock(&trans->transaction->dirty_bgs_lock); in btrfs_update_block_group()
2927 * No longer have used bytes in this block group, queue it for in btrfs_update_block_group()
2928 * deletion. We do this after adding the block group to the in btrfs_update_block_group()
2930 * cache writeout. in btrfs_update_block_group()
2934 btrfs_mark_bg_unused(cache); in btrfs_update_block_group()
2937 btrfs_put_block_group(cache); in btrfs_update_block_group()
2938 total -= num_bytes; in btrfs_update_block_group()
2942 /* Modified block groups are accounted for in the delayed_refs_rsv. */ in btrfs_update_block_group()
2948 * btrfs_add_reserved_bytes - update the block_group and space info counters
2949 * @cache: The cache we are manipulating
2956 * reservation and the block group has become read only we cannot make the
2957 * reservation and return -EAGAIN, otherwise this function always succeeds.
2959 int btrfs_add_reserved_bytes(struct btrfs_block_group *cache, in btrfs_add_reserved_bytes() argument
2962 struct btrfs_space_info *space_info = cache->space_info; in btrfs_add_reserved_bytes()
2965 spin_lock(&space_info->lock); in btrfs_add_reserved_bytes()
2966 spin_lock(&cache->lock); in btrfs_add_reserved_bytes()
2967 if (cache->ro) { in btrfs_add_reserved_bytes()
2968 ret = -EAGAIN; in btrfs_add_reserved_bytes()
2970 cache->reserved += num_bytes; in btrfs_add_reserved_bytes()
2971 space_info->bytes_reserved += num_bytes; in btrfs_add_reserved_bytes()
2972 trace_btrfs_space_reservation(cache->fs_info, "space_info", in btrfs_add_reserved_bytes()
2973 space_info->flags, num_bytes, 1); in btrfs_add_reserved_bytes()
2974 btrfs_space_info_update_bytes_may_use(cache->fs_info, in btrfs_add_reserved_bytes()
2975 space_info, -ram_bytes); in btrfs_add_reserved_bytes()
2977 cache->delalloc_bytes += num_bytes; in btrfs_add_reserved_bytes()
2984 btrfs_try_granting_tickets(cache->fs_info, space_info); in btrfs_add_reserved_bytes()
2986 spin_unlock(&cache->lock); in btrfs_add_reserved_bytes()
2987 spin_unlock(&space_info->lock); in btrfs_add_reserved_bytes()
2992 * btrfs_free_reserved_bytes - update the block_group and space info counters
2993 * @cache: The cache we are manipulating
3002 void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, in btrfs_free_reserved_bytes() argument
3005 struct btrfs_space_info *space_info = cache->space_info; in btrfs_free_reserved_bytes()
3007 spin_lock(&space_info->lock); in btrfs_free_reserved_bytes()
3008 spin_lock(&cache->lock); in btrfs_free_reserved_bytes()
3009 if (cache->ro) in btrfs_free_reserved_bytes()
3010 space_info->bytes_readonly += num_bytes; in btrfs_free_reserved_bytes()
3011 cache->reserved -= num_bytes; in btrfs_free_reserved_bytes()
3012 space_info->bytes_reserved -= num_bytes; in btrfs_free_reserved_bytes()
3013 space_info->max_extent_size = 0; in btrfs_free_reserved_bytes()
3016 cache->delalloc_bytes -= num_bytes; in btrfs_free_reserved_bytes()
3017 spin_unlock(&cache->lock); in btrfs_free_reserved_bytes()
3019 btrfs_try_granting_tickets(cache->fs_info, space_info); in btrfs_free_reserved_bytes()
3020 spin_unlock(&space_info->lock); in btrfs_free_reserved_bytes()
3025 struct list_head *head = &info->space_info; in force_metadata_allocation()
3029 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) in force_metadata_allocation()
3030 found->force_alloc = CHUNK_ALLOC_FORCE; in force_metadata_allocation()
3045 * about 1% of the FS size. in should_alloc_chunk()
3048 thresh = btrfs_super_total_bytes(fs_info->super_copy); in should_alloc_chunk()
3051 if (sinfo->total_bytes - bytes_used < thresh) in should_alloc_chunk()
3055 if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8)) in should_alloc_chunk()
3062 u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type); in btrfs_force_chunk_alloc()
3069 * - return 1 if it successfully allocates a chunk,
3070 * - return errors including -ENOSPC otherwise.
3072 * - return 0 if it doesn't need to allocate a new chunk,
3073 * - return 1 if it successfully allocates a chunk,
3074 * - return errors including -ENOSPC otherwise.
3079 struct btrfs_fs_info *fs_info = trans->fs_info; in btrfs_chunk_alloc()
3085 /* Don't re-enter if we're already allocating a chunk */ in btrfs_chunk_alloc()
3086 if (trans->allocating_chunk) in btrfs_chunk_alloc()
3087 return -ENOSPC; in btrfs_chunk_alloc()
3093 spin_lock(&space_info->lock); in btrfs_chunk_alloc()
3094 if (force < space_info->force_alloc) in btrfs_chunk_alloc()
3095 force = space_info->force_alloc; in btrfs_chunk_alloc()
3097 if (space_info->full) { in btrfs_chunk_alloc()
3100 ret = -ENOSPC; in btrfs_chunk_alloc()
3103 spin_unlock(&space_info->lock); in btrfs_chunk_alloc()
3106 spin_unlock(&space_info->lock); in btrfs_chunk_alloc()
3108 } else if (space_info->chunk_alloc) { in btrfs_chunk_alloc()
3110 * Someone is already allocating, so we need to block in btrfs_chunk_alloc()
3116 spin_unlock(&space_info->lock); in btrfs_chunk_alloc()
3117 mutex_lock(&fs_info->chunk_mutex); in btrfs_chunk_alloc()
3118 mutex_unlock(&fs_info->chunk_mutex); in btrfs_chunk_alloc()
3121 space_info->chunk_alloc = 1; in btrfs_chunk_alloc()
3123 spin_unlock(&space_info->lock); in btrfs_chunk_alloc()
3129 mutex_lock(&fs_info->chunk_mutex); in btrfs_chunk_alloc()
3130 trans->allocating_chunk = true; in btrfs_chunk_alloc()
3144 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { in btrfs_chunk_alloc()
3145 fs_info->data_chunk_allocations++; in btrfs_chunk_alloc()
3146 if (!(fs_info->data_chunk_allocations % in btrfs_chunk_alloc()
3147 fs_info->metadata_ratio)) in btrfs_chunk_alloc()
3158 trans->allocating_chunk = false; in btrfs_chunk_alloc()
3160 spin_lock(&space_info->lock); in btrfs_chunk_alloc()
3162 if (ret == -ENOSPC) in btrfs_chunk_alloc()
3163 space_info->full = 1; in btrfs_chunk_alloc()
3168 space_info->max_extent_size = 0; in btrfs_chunk_alloc()
3171 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; in btrfs_chunk_alloc()
3173 space_info->chunk_alloc = 0; in btrfs_chunk_alloc()
3174 spin_unlock(&space_info->lock); in btrfs_chunk_alloc()
3175 mutex_unlock(&fs_info->chunk_mutex); in btrfs_chunk_alloc()
3177 * When we allocate a new chunk we reserve space in the chunk block in btrfs_chunk_alloc()
3183 * large number of new block groups to create in our transaction in btrfs_chunk_alloc()
3184 * handle's new_bgs list to avoid exhausting the chunk block reserve in btrfs_chunk_alloc()
3185 * in extreme cases - like having a single transaction create many new in btrfs_chunk_alloc()
3186 * block groups when starting to write out the free space caches of all in btrfs_chunk_alloc()
3187 * the block groups that were made dirty during the lifetime of the in btrfs_chunk_alloc()
3190 if (trans->chunk_bytes_reserved >= (u64)SZ_2M) in btrfs_chunk_alloc()
3202 num_dev = fs_info->fs_devices->rw_devices; in get_profile_num_devs()
3212 struct btrfs_fs_info *fs_info = trans->fs_info; in check_system_chunk()
3221 * atomic and race free space reservation in the chunk block reserve. in check_system_chunk()
3223 lockdep_assert_held(&fs_info->chunk_mutex); in check_system_chunk()
3226 spin_lock(&info->lock); in check_system_chunk()
3227 left = info->total_bytes - btrfs_space_info_used(info, true); in check_system_chunk()
3228 spin_unlock(&info->lock); in check_system_chunk()
3255 ret = btrfs_block_rsv_add(fs_info->chunk_root, in check_system_chunk()
3256 &fs_info->chunk_block_rsv, in check_system_chunk()
3259 trans->chunk_bytes_reserved += thresh; in check_system_chunk()
3274 spin_lock(&block_group->lock); in btrfs_put_block_group_cache()
3275 if (block_group->iref) in btrfs_put_block_group_cache()
3277 spin_unlock(&block_group->lock); in btrfs_put_block_group_cache()
3287 inode = block_group->inode; in btrfs_put_block_group_cache()
3288 block_group->iref = 0; in btrfs_put_block_group_cache()
3289 block_group->inode = NULL; in btrfs_put_block_group_cache()
3290 spin_unlock(&block_group->lock); in btrfs_put_block_group_cache()
3291 ASSERT(block_group->io_ctl.inode == NULL); in btrfs_put_block_group_cache()
3293 last = block_group->start + block_group->length; in btrfs_put_block_group_cache()
3299 * Must be called only after stopping all workers, since we could have block
3301 * freed the block groups before stopping them.
3310 down_write(&info->commit_root_sem); in btrfs_free_block_groups()
3311 while (!list_empty(&info->caching_block_groups)) { in btrfs_free_block_groups()
3312 caching_ctl = list_entry(info->caching_block_groups.next, in btrfs_free_block_groups()
3314 list_del(&caching_ctl->list); in btrfs_free_block_groups()
3317 up_write(&info->commit_root_sem); in btrfs_free_block_groups()
3319 spin_lock(&info->unused_bgs_lock); in btrfs_free_block_groups()
3320 while (!list_empty(&info->unused_bgs)) { in btrfs_free_block_groups()
3321 block_group = list_first_entry(&info->unused_bgs, in btrfs_free_block_groups()
3324 list_del_init(&block_group->bg_list); in btrfs_free_block_groups()
3327 spin_unlock(&info->unused_bgs_lock); in btrfs_free_block_groups()
3329 spin_lock(&info->block_group_cache_lock); in btrfs_free_block_groups()
3330 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { in btrfs_free_block_groups()
3333 rb_erase(&block_group->cache_node, in btrfs_free_block_groups()
3334 &info->block_group_cache_tree); in btrfs_free_block_groups()
3335 RB_CLEAR_NODE(&block_group->cache_node); in btrfs_free_block_groups()
3336 spin_unlock(&info->block_group_cache_lock); in btrfs_free_block_groups()
3338 down_write(&block_group->space_info->groups_sem); in btrfs_free_block_groups()
3339 list_del(&block_group->list); in btrfs_free_block_groups()
3340 up_write(&block_group->space_info->groups_sem); in btrfs_free_block_groups()
3343 * We haven't cached this block group, which means we could in btrfs_free_block_groups()
3344 * possibly have excluded extents on this block group. in btrfs_free_block_groups()
3346 if (block_group->cached == BTRFS_CACHE_NO || in btrfs_free_block_groups()
3347 block_group->cached == BTRFS_CACHE_ERROR) in btrfs_free_block_groups()
3351 ASSERT(block_group->cached != BTRFS_CACHE_STARTED); in btrfs_free_block_groups()
3352 ASSERT(list_empty(&block_group->dirty_list)); in btrfs_free_block_groups()
3353 ASSERT(list_empty(&block_group->io_list)); in btrfs_free_block_groups()
3354 ASSERT(list_empty(&block_group->bg_list)); in btrfs_free_block_groups()
3355 ASSERT(refcount_read(&block_group->refs) == 1); in btrfs_free_block_groups()
3358 spin_lock(&info->block_group_cache_lock); in btrfs_free_block_groups()
3360 spin_unlock(&info->block_group_cache_lock); in btrfs_free_block_groups()
3364 while (!list_empty(&info->space_info)) { in btrfs_free_block_groups()
3365 space_info = list_entry(info->space_info.next, in btrfs_free_block_groups()
3373 if (WARN_ON(space_info->bytes_pinned > 0 || in btrfs_free_block_groups()
3374 space_info->bytes_reserved > 0 || in btrfs_free_block_groups()
3375 space_info->bytes_may_use > 0)) in btrfs_free_block_groups()
3377 WARN_ON(space_info->reclaim_size > 0); in btrfs_free_block_groups()
3378 list_del(&space_info->list); in btrfs_free_block_groups()
3384 void btrfs_freeze_block_group(struct btrfs_block_group *cache) in btrfs_freeze_block_group() argument
3386 atomic_inc(&cache->frozen); in btrfs_freeze_block_group()
3391 struct btrfs_fs_info *fs_info = block_group->fs_info; in btrfs_unfreeze_block_group()
3396 spin_lock(&block_group->lock); in btrfs_unfreeze_block_group()
3397 cleanup = (atomic_dec_and_test(&block_group->frozen) && in btrfs_unfreeze_block_group()
3398 block_group->removed); in btrfs_unfreeze_block_group()
3399 spin_unlock(&block_group->lock); in btrfs_unfreeze_block_group()
3402 em_tree = &fs_info->mapping_tree; in btrfs_unfreeze_block_group()
3403 write_lock(&em_tree->lock); in btrfs_unfreeze_block_group()
3404 em = lookup_extent_mapping(em_tree, block_group->start, in btrfs_unfreeze_block_group()
3408 write_unlock(&em_tree->lock); in btrfs_unfreeze_block_group()
3416 * tasks trimming this block group have left 1 entry each one. in btrfs_unfreeze_block_group()
3419 __btrfs_remove_free_space_cache(block_group->free_space_ctl); in btrfs_unfreeze_block_group()