Lines Matching +full:i +full:- +full:cache +full:- +full:block +full:- +full:size
1 // SPDX-License-Identifier: GPL-2.0
12 #include "rcu-string.h"
13 #include "disk-io.h"
14 #include "block-group.h"
15 #include "dev-replace.h"
16 #include "space-info.h"
24 #define WP_MISSING_DEV ((u64)-1)
26 #define WP_CONVENTIONAL ((u64)-2)
31 * - primary superblock: 0B (zone 0)
32 * - first copy: 512G (zone starting at that offset)
33 * - second copy: 4T (zone starting at that offset)
48 * - BTRFS_SUPER_MIRROR_MAX zones for superblock mirrors
49 * - 3 zones to ensure at least one zone per SYSTEM, META and DATA block group
50 * - 1 zone for tree-log dedicated block group
51 * - 1 zone for relocation
56 * Minimum / maximum supported zone size. Currently, SMR disks have a zone
57 * size of 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range.
58 * We do not expect the zone size to become larger than 8GiB or smaller than
71 return (zone->cond == BLK_ZONE_COND_FULL) || in sb_zone_is_full()
72 (zone->wp + SUPER_INFO_SECTORS > zone->start + zone->capacity); in sb_zone_is_full()
91 for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { in sb_write_pointer() local
92 ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL); in sb_write_pointer()
93 empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY); in sb_write_pointer()
94 full[i] = sb_zone_is_full(&zones[i]); in sb_write_pointer()
117 return -ENOENT; in sb_write_pointer()
120 struct address_space *mapping = bdev->bd_mapping; in sb_write_pointer()
124 for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { in sb_write_pointer() local
125 u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT; in sb_write_pointer()
126 u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) - in sb_write_pointer()
129 page[i] = read_cache_page_gfp(mapping, in sb_write_pointer()
131 if (IS_ERR(page[i])) { in sb_write_pointer()
132 if (i == 1) in sb_write_pointer()
134 return PTR_ERR(page[i]); in sb_write_pointer()
136 super[i] = page_address(page[i]); in sb_write_pointer()
145 for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) in sb_write_pointer() local
146 btrfs_release_disk_super(super[i]); in sb_write_pointer()
152 return -EUCLEAN; in sb_write_pointer()
168 case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break; in sb_zone_number()
169 case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break; in sb_zone_number()
186 return (u64)zone_number << zone_info->zone_size_shift; in zone_start_physical()
190 * Emulate blkdev_report_zones() for a non-zoned device. It slices up the block
197 const sector_t zone_sectors = device->fs_info->zone_size >> SECTOR_SHIFT; in emulate_report_zones()
198 sector_t bdev_size = bdev_nr_sectors(device->bdev); in emulate_report_zones()
199 unsigned int i; in emulate_report_zones() local
202 for (i = 0; i < nr_zones; i++) { in emulate_report_zones()
203 zones[i].start = i * zone_sectors + pos; in emulate_report_zones()
204 zones[i].len = zone_sectors; in emulate_report_zones()
205 zones[i].capacity = zone_sectors; in emulate_report_zones()
206 zones[i].wp = zones[i].start + zone_sectors; in emulate_report_zones()
207 zones[i].type = BLK_ZONE_TYPE_CONVENTIONAL; in emulate_report_zones()
208 zones[i].cond = BLK_ZONE_COND_NOT_WP; in emulate_report_zones()
210 if (zones[i].wp >= bdev_size) { in emulate_report_zones()
211 i++; in emulate_report_zones()
216 return i; in emulate_report_zones()
222 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_get_dev_zones()
228 if (!bdev_is_zoned(device->bdev)) { in btrfs_get_dev_zones()
234 /* Check cache */ in btrfs_get_dev_zones()
235 if (zinfo->zone_cache) { in btrfs_get_dev_zones()
236 unsigned int i; in btrfs_get_dev_zones() local
239 ASSERT(IS_ALIGNED(pos, zinfo->zone_size)); in btrfs_get_dev_zones()
240 zno = pos >> zinfo->zone_size_shift; in btrfs_get_dev_zones()
245 *nr_zones = min_t(u32, *nr_zones, zinfo->nr_zones - zno); in btrfs_get_dev_zones()
247 for (i = 0; i < *nr_zones; i++) { in btrfs_get_dev_zones()
250 zone_info = &zinfo->zone_cache[zno + i]; in btrfs_get_dev_zones()
251 if (!zone_info->len) in btrfs_get_dev_zones()
255 if (i == *nr_zones) { in btrfs_get_dev_zones()
256 /* Cache hit on all the zones */ in btrfs_get_dev_zones()
257 memcpy(zones, zinfo->zone_cache + zno, in btrfs_get_dev_zones()
258 sizeof(*zinfo->zone_cache) * *nr_zones); in btrfs_get_dev_zones()
263 ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, in btrfs_get_dev_zones()
266 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zones()
268 pos, rcu_str_deref(device->name), in btrfs_get_dev_zones()
269 device->devid); in btrfs_get_dev_zones()
274 return -EIO; in btrfs_get_dev_zones()
276 /* Populate cache */ in btrfs_get_dev_zones()
277 if (zinfo->zone_cache) { in btrfs_get_dev_zones()
278 u32 zno = pos >> zinfo->zone_size_shift; in btrfs_get_dev_zones()
280 memcpy(zinfo->zone_cache + zno, zones, in btrfs_get_dev_zones()
281 sizeof(*zinfo->zone_cache) * *nr_zones); in btrfs_get_dev_zones()
287 /* The emulated zone size is determined from the size of device extent */
291 struct btrfs_root *root = fs_info->dev_root; in calculate_emulated_zone_size()
303 return -ENOMEM; in calculate_emulated_zone_size()
309 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { in calculate_emulated_zone_size()
315 return -EUCLEAN; in calculate_emulated_zone_size()
318 leaf = path->nodes[0]; in calculate_emulated_zone_size()
319 dext = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); in calculate_emulated_zone_size()
320 fs_info->zone_size = btrfs_dev_extent_length(leaf, dext); in calculate_emulated_zone_size()
326 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_get_dev_zone_info_all_devices()
330 /* fs_info->zone_size might not set yet. Use the incomapt flag here. */ in btrfs_get_dev_zone_info_all_devices()
334 mutex_lock(&fs_devices->device_list_mutex); in btrfs_get_dev_zone_info_all_devices()
335 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_get_dev_zone_info_all_devices()
337 if (!device->bdev) in btrfs_get_dev_zone_info_all_devices()
344 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_get_dev_zone_info_all_devices()
351 struct btrfs_fs_info *fs_info = device->fs_info; in btrfs_get_dev_zone_info()
353 struct block_device *bdev = device->bdev; in btrfs_get_dev_zone_info()
359 unsigned int i, nreported = 0, nr_zones; in btrfs_get_dev_zone_info() local
371 if (device->zone_info) in btrfs_get_dev_zone_info()
376 return -ENOMEM; in btrfs_get_dev_zone_info()
378 device->zone_info = zone_info; in btrfs_get_dev_zone_info()
381 if (!fs_info->zone_size) { in btrfs_get_dev_zone_info()
387 ASSERT(fs_info->zone_size); in btrfs_get_dev_zone_info()
388 zone_sectors = fs_info->zone_size >> SECTOR_SHIFT; in btrfs_get_dev_zone_info()
394 zone_info->zone_size = zone_sectors << SECTOR_SHIFT; in btrfs_get_dev_zone_info()
396 /* We reject devices with a zone size larger than 8GB */ in btrfs_get_dev_zone_info()
397 if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) { in btrfs_get_dev_zone_info()
399 "zoned: %s: zone size %llu larger than supported maximum %llu", in btrfs_get_dev_zone_info()
400 rcu_str_deref(device->name), in btrfs_get_dev_zone_info()
401 zone_info->zone_size, BTRFS_MAX_ZONE_SIZE); in btrfs_get_dev_zone_info()
402 ret = -EINVAL; in btrfs_get_dev_zone_info()
404 } else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) { in btrfs_get_dev_zone_info()
406 "zoned: %s: zone size %llu smaller than supported minimum %u", in btrfs_get_dev_zone_info()
407 rcu_str_deref(device->name), in btrfs_get_dev_zone_info()
408 zone_info->zone_size, BTRFS_MIN_ZONE_SIZE); in btrfs_get_dev_zone_info()
409 ret = -EINVAL; in btrfs_get_dev_zone_info()
414 zone_info->zone_size_shift = ilog2(zone_info->zone_size); in btrfs_get_dev_zone_info()
415 zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); in btrfs_get_dev_zone_info()
417 zone_info->nr_zones++; in btrfs_get_dev_zone_info()
423 rcu_str_deref(device->name), max_active_zones, in btrfs_get_dev_zone_info()
425 ret = -EINVAL; in btrfs_get_dev_zone_info()
428 zone_info->max_active_zones = max_active_zones; in btrfs_get_dev_zone_info()
430 zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
431 if (!zone_info->seq_zones) { in btrfs_get_dev_zone_info()
432 ret = -ENOMEM; in btrfs_get_dev_zone_info()
436 zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
437 if (!zone_info->empty_zones) { in btrfs_get_dev_zone_info()
438 ret = -ENOMEM; in btrfs_get_dev_zone_info()
442 zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
443 if (!zone_info->active_zones) { in btrfs_get_dev_zone_info()
444 ret = -ENOMEM; in btrfs_get_dev_zone_info()
450 ret = -ENOMEM; in btrfs_get_dev_zone_info()
455 * Enable zone cache only for a zoned device. On a non-zoned device, we in btrfs_get_dev_zone_info()
457 * use the cache. in btrfs_get_dev_zone_info()
459 if (populate_cache && bdev_is_zoned(device->bdev)) { in btrfs_get_dev_zone_info()
460 zone_info->zone_cache = vcalloc(zone_info->nr_zones, in btrfs_get_dev_zone_info()
462 if (!zone_info->zone_cache) { in btrfs_get_dev_zone_info()
463 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
464 "zoned: failed to allocate zone cache for %s", in btrfs_get_dev_zone_info()
465 rcu_str_deref(device->name)); in btrfs_get_dev_zone_info()
466 ret = -ENOMEM; in btrfs_get_dev_zone_info()
480 for (i = 0; i < nr_zones; i++) { in btrfs_get_dev_zone_info()
481 if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ) in btrfs_get_dev_zone_info()
482 __set_bit(nreported, zone_info->seq_zones); in btrfs_get_dev_zone_info()
483 switch (zones[i].cond) { in btrfs_get_dev_zone_info()
485 __set_bit(nreported, zone_info->empty_zones); in btrfs_get_dev_zone_info()
490 __set_bit(nreported, zone_info->active_zones); in btrfs_get_dev_zone_info()
496 sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len; in btrfs_get_dev_zone_info()
499 if (nreported != zone_info->nr_zones) { in btrfs_get_dev_zone_info()
500 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
502 rcu_str_deref(device->name), nreported, in btrfs_get_dev_zone_info()
503 zone_info->nr_zones); in btrfs_get_dev_zone_info()
504 ret = -EIO; in btrfs_get_dev_zone_info()
510 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
512 nactive, rcu_str_deref(device->name), in btrfs_get_dev_zone_info()
514 ret = -EIO; in btrfs_get_dev_zone_info()
517 atomic_set(&zone_info->active_zones_left, in btrfs_get_dev_zone_info()
518 max_active_zones - nactive); in btrfs_get_dev_zone_info()
519 set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags); in btrfs_get_dev_zone_info()
524 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { in btrfs_get_dev_zone_info()
527 int sb_pos = BTRFS_NR_SB_LOG_ZONES * i; in btrfs_get_dev_zone_info()
529 sb_zone = sb_zone_number(zone_info->zone_size_shift, i); in btrfs_get_dev_zone_info()
530 if (sb_zone + 1 >= zone_info->nr_zones) in btrfs_get_dev_zone_info()
535 &zone_info->sb_zones[sb_pos], in btrfs_get_dev_zone_info()
541 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
542 "zoned: failed to read super block log zone info at devid %llu zone %u", in btrfs_get_dev_zone_info()
543 device->devid, sb_zone); in btrfs_get_dev_zone_info()
544 ret = -EUCLEAN; in btrfs_get_dev_zone_info()
552 if (zone_info->sb_zones[BTRFS_NR_SB_LOG_ZONES * i].type == in btrfs_get_dev_zone_info()
556 ret = sb_write_pointer(device->bdev, in btrfs_get_dev_zone_info()
557 &zone_info->sb_zones[sb_pos], &sb_wp); in btrfs_get_dev_zone_info()
558 if (ret != -ENOENT && ret) { in btrfs_get_dev_zone_info()
559 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
560 "zoned: super block log zone corrupted devid %llu zone %u", in btrfs_get_dev_zone_info()
561 device->devid, sb_zone); in btrfs_get_dev_zone_info()
562 ret = -EUCLEAN; in btrfs_get_dev_zone_info()
571 model = "host-managed zoned"; in btrfs_get_dev_zone_info()
579 "%s block device %s, %u %szones of %llu bytes", in btrfs_get_dev_zone_info()
580 model, rcu_str_deref(device->name), zone_info->nr_zones, in btrfs_get_dev_zone_info()
581 emulated, zone_info->zone_size); in btrfs_get_dev_zone_info()
593 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_destroy_dev_zone_info()
598 bitmap_free(zone_info->active_zones); in btrfs_destroy_dev_zone_info()
599 bitmap_free(zone_info->seq_zones); in btrfs_destroy_dev_zone_info()
600 bitmap_free(zone_info->empty_zones); in btrfs_destroy_dev_zone_info()
601 vfree(zone_info->zone_cache); in btrfs_destroy_dev_zone_info()
603 device->zone_info = NULL; in btrfs_destroy_dev_zone_info()
610 zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL); in btrfs_clone_dev_zone_info()
614 zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
615 if (!zone_info->seq_zones) in btrfs_clone_dev_zone_info()
618 bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, in btrfs_clone_dev_zone_info()
619 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
621 zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
622 if (!zone_info->empty_zones) in btrfs_clone_dev_zone_info()
625 bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, in btrfs_clone_dev_zone_info()
626 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
628 zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
629 if (!zone_info->active_zones) in btrfs_clone_dev_zone_info()
632 bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, in btrfs_clone_dev_zone_info()
633 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
634 zone_info->zone_cache = NULL; in btrfs_clone_dev_zone_info()
639 bitmap_free(zone_info->seq_zones); in btrfs_clone_dev_zone_info()
640 bitmap_free(zone_info->empty_zones); in btrfs_clone_dev_zone_info()
641 bitmap_free(zone_info->active_zones); in btrfs_clone_dev_zone_info()
653 return ret ? ret : -EIO; in btrfs_get_dev_zone()
662 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { in btrfs_check_for_zoned_device()
663 if (device->bdev && bdev_is_zoned(device->bdev)) { in btrfs_check_for_zoned_device()
666 device->bdev); in btrfs_check_for_zoned_device()
667 return -EINVAL; in btrfs_check_for_zoned_device()
676 struct queue_limits *lim = &fs_info->limits; in btrfs_check_zoned_mode()
682 * Host-Managed devices can't be used without the ZONED flag. With the in btrfs_check_zoned_mode()
690 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { in btrfs_check_zoned_mode()
691 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_check_zoned_mode()
693 if (!device->bdev) in btrfs_check_zoned_mode()
697 zone_size = zone_info->zone_size; in btrfs_check_zoned_mode()
698 } else if (zone_info->zone_size != zone_size) { in btrfs_check_zoned_mode()
700 "zoned: unequal block device zone sizes: have %llu found %llu", in btrfs_check_zoned_mode()
701 zone_info->zone_size, zone_size); in btrfs_check_zoned_mode()
702 return -EINVAL; in btrfs_check_zoned_mode()
706 * With the zoned emulation, we can have non-zoned device on the in btrfs_check_zoned_mode()
708 * append size. in btrfs_check_zoned_mode()
710 if (bdev_is_zoned(device->bdev)) in btrfs_check_zoned_mode()
711 blk_stack_limits(lim, bdev_limits(device->bdev), 0); in btrfs_check_zoned_mode()
727 "zoned: zone size %llu not aligned to stripe %u", in btrfs_check_zoned_mode()
729 return -EINVAL; in btrfs_check_zoned_mode()
733 btrfs_err(fs_info, "zoned: mixed block groups not supported"); in btrfs_check_zoned_mode()
734 return -EINVAL; in btrfs_check_zoned_mode()
737 fs_info->zone_size = zone_size; in btrfs_check_zoned_mode()
745 fs_info->max_zone_append_size = ALIGN_DOWN( in btrfs_check_zoned_mode()
746 min3((u64)lim->max_zone_append_sectors << SECTOR_SHIFT, in btrfs_check_zoned_mode()
747 (u64)lim->max_sectors << SECTOR_SHIFT, in btrfs_check_zoned_mode()
748 (u64)lim->max_segments << PAGE_SHIFT), in btrfs_check_zoned_mode()
749 fs_info->sectorsize); in btrfs_check_zoned_mode()
750 fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; in btrfs_check_zoned_mode()
752 fs_info->max_extent_size = min_not_zero(fs_info->max_extent_size, in btrfs_check_zoned_mode()
753 fs_info->max_zone_append_size); in btrfs_check_zoned_mode()
756 * Check mount options here, because we might change fs_info->zoned in btrfs_check_zoned_mode()
757 * from fs_info->zone_size. in btrfs_check_zoned_mode()
759 ret = btrfs_check_mountopts_zoned(fs_info, &fs_info->mount_opt); in btrfs_check_zoned_mode()
763 btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size); in btrfs_check_zoned_mode()
774 * Space cache writing is not COWed. Disable that to avoid write errors in btrfs_check_mountopts_zoned()
778 btrfs_err(info, "zoned: space cache v1 is not supported"); in btrfs_check_mountopts_zoned()
779 return -EINVAL; in btrfs_check_mountopts_zoned()
784 return -EINVAL; in btrfs_check_mountopts_zoned()
808 if (ret != -ENOENT && ret < 0) in sb_log_location()
819 if (reset && reset->cond != BLK_ZONE_COND_EMPTY) { in sb_log_location()
826 reset->start, reset->len); in sb_log_location()
831 reset->cond = BLK_ZONE_COND_EMPTY; in sb_log_location()
832 reset->wp = reset->start; in sb_log_location()
834 } else if (ret != -ENOENT) { in sb_log_location()
849 wp -= BTRFS_SUPER_INFO_SIZE; in sb_log_location()
877 return -EINVAL; in btrfs_sb_log_location_bdev()
884 return -ENOENT; in btrfs_sb_log_location_bdev()
892 return -EIO; in btrfs_sb_log_location_bdev()
900 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_sb_log_location()
904 * For a zoned filesystem on a non-zoned block device, use the same in btrfs_sb_log_location()
905 * super block locations as regular filesystem. Doing so, the super in btrfs_sb_log_location()
906 * block can always be retrieved and the zoned flag of the volume in btrfs_sb_log_location()
907 * detected from the super block information. in btrfs_sb_log_location()
909 if (!bdev_is_zoned(device->bdev)) { in btrfs_sb_log_location()
914 zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); in btrfs_sb_log_location()
915 if (zone_num + 1 >= zinfo->nr_zones) in btrfs_sb_log_location()
916 return -ENOENT; in btrfs_sb_log_location()
918 return sb_log_location(device->bdev, in btrfs_sb_log_location()
919 &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror], in btrfs_sb_log_location()
931 zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); in is_sb_log_zone()
932 if (zone_num + 1 >= zinfo->nr_zones) in is_sb_log_zone()
935 if (!test_bit(zone_num, zinfo->seq_zones)) in is_sb_log_zone()
943 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_advance_sb_log()
945 int i; in btrfs_advance_sb_log() local
950 zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror]; in btrfs_advance_sb_log()
951 for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { in btrfs_advance_sb_log()
953 if (zone->cond == BLK_ZONE_COND_FULL) { in btrfs_advance_sb_log()
958 if (zone->cond == BLK_ZONE_COND_EMPTY) in btrfs_advance_sb_log()
959 zone->cond = BLK_ZONE_COND_IMP_OPEN; in btrfs_advance_sb_log()
961 zone->wp += SUPER_INFO_SECTORS; in btrfs_advance_sb_log()
972 if (zone->wp != zone->start + zone->capacity) { in btrfs_advance_sb_log()
977 ret = blkdev_zone_mgmt(device->bdev, in btrfs_advance_sb_log()
978 REQ_OP_ZONE_FINISH, zone->start, in btrfs_advance_sb_log()
979 zone->len); in btrfs_advance_sb_log()
985 zone->wp = zone->start + zone->len; in btrfs_advance_sb_log()
986 zone->cond = BLK_ZONE_COND_FULL; in btrfs_advance_sb_log()
993 return -EIO; in btrfs_advance_sb_log()
1013 return -ENOENT; in btrfs_reset_sb_log_zones()
1028 * @num_bytes: size of wanted region
1037 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_find_allocatable_zones()
1038 const u8 shift = zinfo->zone_size_shift; in btrfs_find_allocatable_zones()
1043 int i; in btrfs_find_allocatable_zones() local
1045 ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size)); in btrfs_find_allocatable_zones()
1046 ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size)); in btrfs_find_allocatable_zones()
1052 if (end > zinfo->nr_zones) in btrfs_find_allocatable_zones()
1057 !bitmap_test_range_all_set(zinfo->empty_zones, begin, nzones)) { in btrfs_find_allocatable_zones()
1058 pos += zinfo->zone_size; in btrfs_find_allocatable_zones()
1063 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { in btrfs_find_allocatable_zones()
1067 sb_zone = sb_zone_number(shift, i); in btrfs_find_allocatable_zones()
1077 sb_pos = btrfs_sb_offset(i); in btrfs_find_allocatable_zones()
1082 zinfo->zone_size); in btrfs_find_allocatable_zones()
1095 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_dev_set_active_zone()
1096 unsigned int zno = (pos >> zone_info->zone_size_shift); in btrfs_dev_set_active_zone()
1099 if (zone_info->max_active_zones == 0) in btrfs_dev_set_active_zone()
1102 if (!test_bit(zno, zone_info->active_zones)) { in btrfs_dev_set_active_zone()
1104 if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0) in btrfs_dev_set_active_zone()
1106 if (test_and_set_bit(zno, zone_info->active_zones)) { in btrfs_dev_set_active_zone()
1108 atomic_inc(&zone_info->active_zones_left); in btrfs_dev_set_active_zone()
1117 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_dev_clear_active_zone()
1118 unsigned int zno = (pos >> zone_info->zone_size_shift); in btrfs_dev_clear_active_zone()
1121 if (zone_info->max_active_zones == 0) in btrfs_dev_clear_active_zone()
1124 if (test_and_clear_bit(zno, zone_info->active_zones)) in btrfs_dev_clear_active_zone()
1125 atomic_inc(&zone_info->active_zones_left); in btrfs_dev_clear_active_zone()
1136 ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET, in btrfs_reset_device_zone()
1146 physical += device->zone_info->zone_size; in btrfs_reset_device_zone()
1147 length -= device->zone_info->zone_size; in btrfs_reset_device_zone()
1153 int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) in btrfs_ensure_empty_zones() argument
1155 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_ensure_empty_zones()
1156 const u8 shift = zinfo->zone_size_shift; in btrfs_ensure_empty_zones()
1158 unsigned long nbits = size >> shift; in btrfs_ensure_empty_zones()
1162 ASSERT(IS_ALIGNED(start, zinfo->zone_size)); in btrfs_ensure_empty_zones()
1163 ASSERT(IS_ALIGNED(size, zinfo->zone_size)); in btrfs_ensure_empty_zones()
1165 if (begin + nbits > zinfo->nr_zones) in btrfs_ensure_empty_zones()
1166 return -ERANGE; in btrfs_ensure_empty_zones()
1169 if (bitmap_test_range_all_zero(zinfo->seq_zones, begin, nbits)) in btrfs_ensure_empty_zones()
1173 if (bitmap_test_range_all_set(zinfo->seq_zones, begin, nbits) && in btrfs_ensure_empty_zones()
1174 bitmap_test_range_all_set(zinfo->empty_zones, begin, nbits)) in btrfs_ensure_empty_zones()
1177 for (pos = start; pos < start + size; pos += zinfo->zone_size) { in btrfs_ensure_empty_zones()
1186 device->fs_info, in btrfs_ensure_empty_zones()
1188 rcu_str_deref(device->name), device->devid, pos >> shift); in btrfs_ensure_empty_zones()
1191 ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size, in btrfs_ensure_empty_zones()
1202 * for a block group consist of conventional zones. It is pointed to the
1203 * end of the highest addressed extent in the block group as an allocation
1206 static int calculate_alloc_pointer(struct btrfs_block_group *cache, in calculate_alloc_pointer() argument
1209 struct btrfs_fs_info *fs_info = cache->fs_info; in calculate_alloc_pointer()
1218 * Avoid tree lookups for a new block group, there's no use for it. in calculate_alloc_pointer()
1221 * Also, we have a lock chain of extent buffer lock -> chunk mutex. in calculate_alloc_pointer()
1222 * For new a block group, this function is called from in calculate_alloc_pointer()
1234 return -ENOMEM; in calculate_alloc_pointer()
1236 key.objectid = cache->start + cache->length; in calculate_alloc_pointer()
1244 ret = -EUCLEAN; in calculate_alloc_pointer()
1248 ret = btrfs_previous_extent_item(root, path, cache->start); in calculate_alloc_pointer()
1257 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); in calculate_alloc_pointer()
1262 length = fs_info->nodesize; in calculate_alloc_pointer()
1264 if (!(found_key.objectid >= cache->start && in calculate_alloc_pointer()
1265 found_key.objectid + length <= cache->start + cache->length)) { in calculate_alloc_pointer()
1266 return -EUCLEAN; in calculate_alloc_pointer()
1268 *offset_ret = found_key.objectid + length - cache->start; in calculate_alloc_pointer()
1282 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; in btrfs_load_zone_info()
1289 info->physical = map->stripes[zone_idx].physical; in btrfs_load_zone_info()
1291 down_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1292 device = map->stripes[zone_idx].dev; in btrfs_load_zone_info()
1294 if (!device->bdev) { in btrfs_load_zone_info()
1295 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1296 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1301 if (!device->zone_info->max_active_zones) in btrfs_load_zone_info()
1304 if (!btrfs_dev_is_sequential(device, info->physical)) { in btrfs_load_zone_info()
1305 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1306 info->alloc_offset = WP_CONVENTIONAL; in btrfs_load_zone_info()
1310 ASSERT(!new || btrfs_dev_is_empty_zone(device, info->physical)); in btrfs_load_zone_info()
1312 /* This zone will be used for allocation, so mark this zone non-empty. */ in btrfs_load_zone_info()
1313 btrfs_dev_clear_zone_empty(device, info->physical); in btrfs_load_zone_info()
1316 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) in btrfs_load_zone_info()
1317 btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); in btrfs_load_zone_info()
1323 WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); in btrfs_load_zone_info()
1328 capacity = bdev_zone_capacity(device->bdev, info->physical >> SECTOR_SHIFT); in btrfs_load_zone_info()
1329 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1330 info->alloc_offset = 0; in btrfs_load_zone_info()
1331 info->capacity = capacity << SECTOR_SHIFT; in btrfs_load_zone_info()
1337 ret = btrfs_get_dev_zone(device, info->physical, &zone); in btrfs_load_zone_info()
1340 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1341 if (ret != -EIO && ret != -EOPNOTSUPP) in btrfs_load_zone_info()
1343 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1350 zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), in btrfs_load_zone_info()
1351 device->devid); in btrfs_load_zone_info()
1352 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1353 return -EIO; in btrfs_load_zone_info()
1356 info->capacity = (zone.capacity << SECTOR_SHIFT); in btrfs_load_zone_info()
1363 (info->physical >> device->zone_info->zone_size_shift), in btrfs_load_zone_info()
1364 rcu_str_deref(device->name), device->devid); in btrfs_load_zone_info()
1365 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1368 info->alloc_offset = 0; in btrfs_load_zone_info()
1371 info->alloc_offset = info->capacity; in btrfs_load_zone_info()
1375 info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); in btrfs_load_zone_info()
1380 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1389 if (info->alloc_offset == WP_MISSING_DEV) { in btrfs_load_block_group_single()
1390 btrfs_err(bg->fs_info, in btrfs_load_block_group_single()
1392 info->physical); in btrfs_load_block_group_single()
1393 return -EIO; in btrfs_load_block_group_single()
1396 bg->alloc_offset = info->alloc_offset; in btrfs_load_block_group_single()
1397 bg->zone_capacity = info->capacity; in btrfs_load_block_group_single()
1399 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_single()
1408 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_dup()
1410 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_dup()
1411 btrfs_err(fs_info, "zoned: data DUP profile needs raid-stripe-tree"); in btrfs_load_block_group_dup()
1412 return -EINVAL; in btrfs_load_block_group_dup()
1415 bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); in btrfs_load_block_group_dup()
1418 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1421 return -EIO; in btrfs_load_block_group_dup()
1424 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1427 return -EIO; in btrfs_load_block_group_dup()
1430 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1432 return -EIO; in btrfs_load_block_group_dup()
1437 return -EIO; in btrfs_load_block_group_dup()
1439 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_dup()
1442 bg->alloc_offset = zone_info[0].alloc_offset; in btrfs_load_block_group_dup()
1451 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_raid1()
1452 int i; in btrfs_load_block_group_raid1() local
1454 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_raid1()
1455 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_raid1()
1456 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid1()
1457 return -EINVAL; in btrfs_load_block_group_raid1()
1461 bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); in btrfs_load_block_group_raid1()
1463 for (i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_raid1()
1464 if (zone_info[i].alloc_offset == WP_MISSING_DEV || in btrfs_load_block_group_raid1()
1465 zone_info[i].alloc_offset == WP_CONVENTIONAL) in btrfs_load_block_group_raid1()
1468 if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) && in btrfs_load_block_group_raid1()
1472 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid1()
1473 return -EIO; in btrfs_load_block_group_raid1()
1475 if (test_bit(0, active) != test_bit(i, active)) { in btrfs_load_block_group_raid1()
1478 return -EIO; in btrfs_load_block_group_raid1()
1482 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_raid1()
1487 bg->alloc_offset = zone_info[0].alloc_offset; in btrfs_load_block_group_raid1()
1489 bg->alloc_offset = zone_info[i - 1].alloc_offset; in btrfs_load_block_group_raid1()
1499 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_raid0()
1501 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_raid0()
1502 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_raid0()
1503 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid0()
1504 return -EINVAL; in btrfs_load_block_group_raid0()
1507 for (int i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_raid0() local
1508 if (zone_info[i].alloc_offset == WP_MISSING_DEV || in btrfs_load_block_group_raid0()
1509 zone_info[i].alloc_offset == WP_CONVENTIONAL) in btrfs_load_block_group_raid0()
1512 if (test_bit(0, active) != test_bit(i, active)) { in btrfs_load_block_group_raid0()
1514 return -EIO; in btrfs_load_block_group_raid0()
1517 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_raid0()
1519 bg->zone_capacity += zone_info[i].capacity; in btrfs_load_block_group_raid0()
1520 bg->alloc_offset += zone_info[i].alloc_offset; in btrfs_load_block_group_raid0()
1531 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_raid10()
1533 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_raid10()
1534 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_raid10()
1535 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid10()
1536 return -EINVAL; in btrfs_load_block_group_raid10()
1539 for (int i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_raid10() local
1540 if (zone_info[i].alloc_offset == WP_MISSING_DEV || in btrfs_load_block_group_raid10()
1541 zone_info[i].alloc_offset == WP_CONVENTIONAL) in btrfs_load_block_group_raid10()
1544 if (test_bit(0, active) != test_bit(i, active)) { in btrfs_load_block_group_raid10()
1546 return -EIO; in btrfs_load_block_group_raid10()
1549 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_raid10()
1552 if ((i % map->sub_stripes) == 0) { in btrfs_load_block_group_raid10()
1553 bg->zone_capacity += zone_info[i].capacity; in btrfs_load_block_group_raid10()
1554 bg->alloc_offset += zone_info[i].alloc_offset; in btrfs_load_block_group_raid10()
1561 int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) in btrfs_load_block_group_zone_info() argument
1563 struct btrfs_fs_info *fs_info = cache->fs_info; in btrfs_load_block_group_zone_info()
1565 u64 logical = cache->start; in btrfs_load_block_group_zone_info()
1566 u64 length = cache->length; in btrfs_load_block_group_zone_info()
1569 int i; in btrfs_load_block_group_zone_info() local
1579 if (!IS_ALIGNED(length, fs_info->zone_size)) { in btrfs_load_block_group_zone_info()
1581 "zoned: block group %llu len %llu unaligned to zone size %llu", in btrfs_load_block_group_zone_info()
1582 logical, length, fs_info->zone_size); in btrfs_load_block_group_zone_info()
1583 return -EIO; in btrfs_load_block_group_zone_info()
1588 return -EINVAL; in btrfs_load_block_group_zone_info()
1590 cache->physical_map = map; in btrfs_load_block_group_zone_info()
1592 zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); in btrfs_load_block_group_zone_info()
1594 ret = -ENOMEM; in btrfs_load_block_group_zone_info()
1598 active = bitmap_zalloc(map->num_stripes, GFP_NOFS); in btrfs_load_block_group_zone_info()
1600 ret = -ENOMEM; in btrfs_load_block_group_zone_info()
1604 for (i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_zone_info()
1605 ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map, new); in btrfs_load_block_group_zone_info()
1609 if (zone_info[i].alloc_offset == WP_CONVENTIONAL) in btrfs_load_block_group_zone_info()
1616 set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); in btrfs_load_block_group_zone_info()
1619 /* Zone capacity is always zone size in emulation */ in btrfs_load_block_group_zone_info()
1620 cache->zone_capacity = cache->length; in btrfs_load_block_group_zone_info()
1621 ret = calculate_alloc_pointer(cache, &last_alloc, new); in btrfs_load_block_group_zone_info()
1625 cache->start); in btrfs_load_block_group_zone_info()
1627 } else if (map->num_stripes == num_conventional) { in btrfs_load_block_group_zone_info()
1628 cache->alloc_offset = last_alloc; in btrfs_load_block_group_zone_info()
1629 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); in btrfs_load_block_group_zone_info()
1634 profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK; in btrfs_load_block_group_zone_info()
1637 ret = btrfs_load_block_group_single(cache, &zone_info[0], active); in btrfs_load_block_group_zone_info()
1640 ret = btrfs_load_block_group_dup(cache, map, zone_info, active); in btrfs_load_block_group_zone_info()
1645 ret = btrfs_load_block_group_raid1(cache, map, zone_info, active); in btrfs_load_block_group_zone_info()
1648 ret = btrfs_load_block_group_raid0(cache, map, zone_info, active); in btrfs_load_block_group_zone_info()
1651 ret = btrfs_load_block_group_raid10(cache, map, zone_info, active); in btrfs_load_block_group_zone_info()
1657 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_zone_info()
1658 ret = -EINVAL; in btrfs_load_block_group_zone_info()
1662 if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 && in btrfs_load_block_group_zone_info()
1665 * Detected broken write pointer. Make this block group in btrfs_load_block_group_zone_info()
1667 * allocatable region. Relocating this block group will fix the in btrfs_load_block_group_zone_info()
1672 * reading from this block group won't work anyway by a missing in btrfs_load_block_group_zone_info()
1675 cache->alloc_offset = cache->zone_capacity; in btrfs_load_block_group_zone_info()
1680 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && in btrfs_load_block_group_zone_info()
1681 (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && in btrfs_load_block_group_zone_info()
1682 !fs_info->stripe_root) { in btrfs_load_block_group_zone_info()
1683 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_zone_info()
1684 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_zone_info()
1685 return -EINVAL; in btrfs_load_block_group_zone_info()
1688 if (cache->alloc_offset > cache->zone_capacity) { in btrfs_load_block_group_zone_info()
1690 "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", in btrfs_load_block_group_zone_info()
1691 cache->alloc_offset, cache->zone_capacity, in btrfs_load_block_group_zone_info()
1692 cache->start); in btrfs_load_block_group_zone_info()
1693 ret = -EIO; in btrfs_load_block_group_zone_info()
1697 if (!ret && num_conventional && last_alloc > cache->alloc_offset) { in btrfs_load_block_group_zone_info()
1700 logical, last_alloc, cache->alloc_offset); in btrfs_load_block_group_zone_info()
1701 ret = -EIO; in btrfs_load_block_group_zone_info()
1705 cache->meta_write_pointer = cache->alloc_offset + cache->start; in btrfs_load_block_group_zone_info()
1706 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags)) { in btrfs_load_block_group_zone_info()
1707 btrfs_get_block_group(cache); in btrfs_load_block_group_zone_info()
1708 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_load_block_group_zone_info()
1709 list_add_tail(&cache->active_bg_list, in btrfs_load_block_group_zone_info()
1710 &fs_info->zone_active_bgs); in btrfs_load_block_group_zone_info()
1711 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_load_block_group_zone_info()
1714 btrfs_free_chunk_map(cache->physical_map); in btrfs_load_block_group_zone_info()
1715 cache->physical_map = NULL; in btrfs_load_block_group_zone_info()
1723 void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) in btrfs_calc_zone_unusable() argument
1727 if (!btrfs_is_zoned(cache->fs_info)) in btrfs_calc_zone_unusable()
1730 WARN_ON(cache->bytes_super != 0); in btrfs_calc_zone_unusable()
1731 unusable = (cache->alloc_offset - cache->used) + in btrfs_calc_zone_unusable()
1732 (cache->length - cache->zone_capacity); in btrfs_calc_zone_unusable()
1733 free = cache->zone_capacity - cache->alloc_offset; in btrfs_calc_zone_unusable()
1735 /* We only need ->free_space in ALLOC_SEQ block groups */ in btrfs_calc_zone_unusable()
1736 cache->cached = BTRFS_CACHE_FINISHED; in btrfs_calc_zone_unusable()
1737 cache->free_space_ctl->free_space = free; in btrfs_calc_zone_unusable()
1738 cache->zone_unusable = unusable; in btrfs_calc_zone_unusable()
1743 u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT); in btrfs_use_zone_append()
1744 struct btrfs_inode *inode = bbio->inode; in btrfs_use_zone_append()
1745 struct btrfs_fs_info *fs_info = bbio->fs_info; in btrfs_use_zone_append()
1746 struct btrfs_block_group *cache; in btrfs_use_zone_append() local
1755 if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) in btrfs_use_zone_append()
1761 * Furthermore we have set aside own block-group from which only the in btrfs_use_zone_append()
1766 if (btrfs_is_data_reloc_root(inode->root)) in btrfs_use_zone_append()
1769 cache = btrfs_lookup_block_group(fs_info, start); in btrfs_use_zone_append()
1770 ASSERT(cache); in btrfs_use_zone_append()
1771 if (!cache) in btrfs_use_zone_append()
1774 ret = !!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); in btrfs_use_zone_append()
1775 btrfs_put_block_group(cache); in btrfs_use_zone_append()
1782 const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; in btrfs_record_physical_zoned()
1783 struct btrfs_ordered_sum *sum = bbio->sums; in btrfs_record_physical_zoned()
1785 if (physical < bbio->orig_physical) in btrfs_record_physical_zoned()
1786 sum->logical -= bbio->orig_physical - physical; in btrfs_record_physical_zoned()
1788 sum->logical += physical - bbio->orig_physical; in btrfs_record_physical_zoned()
1794 struct extent_map_tree *em_tree = &ordered->inode->extent_tree; in btrfs_rewrite_logical_zoned()
1797 ordered->disk_bytenr = logical; in btrfs_rewrite_logical_zoned()
1799 write_lock(&em_tree->lock); in btrfs_rewrite_logical_zoned()
1800 em = search_extent_mapping(em_tree, ordered->file_offset, in btrfs_rewrite_logical_zoned()
1801 ordered->num_bytes); in btrfs_rewrite_logical_zoned()
1803 ASSERT(em->offset == 0); in btrfs_rewrite_logical_zoned()
1804 em->disk_bytenr = logical; in btrfs_rewrite_logical_zoned()
1806 write_unlock(&em_tree->lock); in btrfs_rewrite_logical_zoned()
1814 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && in btrfs_zoned_split_ordered()
1815 split_extent_map(ordered->inode, ordered->file_offset, in btrfs_zoned_split_ordered()
1816 ordered->num_bytes, len, logical)) in btrfs_zoned_split_ordered()
1822 new->disk_bytenr = logical; in btrfs_zoned_split_ordered()
1829 struct btrfs_inode *inode = ordered->inode; in btrfs_finish_ordered_zoned()
1830 struct btrfs_fs_info *fs_info = inode->root->fs_info; in btrfs_finish_ordered_zoned()
1835 * Write to pre-allocated region is for the data relocation, and so in btrfs_finish_ordered_zoned()
1838 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) in btrfs_finish_ordered_zoned()
1841 ASSERT(!list_empty(&ordered->list)); in btrfs_finish_ordered_zoned()
1842 /* The ordered->list can be empty in the above pre-alloc case. */ in btrfs_finish_ordered_zoned()
1843 sum = list_first_entry(&ordered->list, struct btrfs_ordered_sum, list); in btrfs_finish_ordered_zoned()
1844 logical = sum->logical; in btrfs_finish_ordered_zoned()
1845 len = sum->len; in btrfs_finish_ordered_zoned()
1847 while (len < ordered->disk_num_bytes) { in btrfs_finish_ordered_zoned()
1849 if (sum->logical == logical + len) { in btrfs_finish_ordered_zoned()
1850 len += sum->len; in btrfs_finish_ordered_zoned()
1854 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); in btrfs_finish_ordered_zoned()
1858 logical = sum->logical; in btrfs_finish_ordered_zoned()
1859 len = sum->len; in btrfs_finish_ordered_zoned()
1862 if (ordered->disk_bytenr != logical) in btrfs_finish_ordered_zoned()
1867 * If we end up here for nodatasum I/O, the btrfs_ordered_sum structures in btrfs_finish_ordered_zoned()
1872 if ((inode->flags & BTRFS_INODE_NODATASUM) || in btrfs_finish_ordered_zoned()
1873 test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state)) { in btrfs_finish_ordered_zoned()
1874 while ((sum = list_first_entry_or_null(&ordered->list, in btrfs_finish_ordered_zoned()
1876 list_del(&sum->list); in btrfs_finish_ordered_zoned()
1885 const struct writeback_control *wbc = ctx->wbc; in check_bg_is_active()
1886 struct btrfs_block_group *block_group = ctx->zoned_bg; in check_bg_is_active()
1887 struct btrfs_fs_info *fs_info = block_group->fs_info; in check_bg_is_active()
1889 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) in check_bg_is_active()
1892 if (fs_info->treelog_bg == block_group->start) { in check_bg_is_active()
1902 /* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */ in check_bg_is_active()
1903 lockdep_assert_held(&fs_info->zoned_meta_io_lock); in check_bg_is_active()
1910 if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) { in check_bg_is_active()
1911 if (wbc->sync_mode == WB_SYNC_NONE || in check_bg_is_active()
1912 (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)) in check_bg_is_active()
1916 /* Pivot active metadata/system block group. */ in check_bg_is_active()
1939 * Check if @ctx->eb is aligned to the write pointer.
1942 * 0: @ctx->eb is at the write pointer. You can write it.
1943 * -EAGAIN: There is a hole. The caller should handle the case.
1944 * -EBUSY: There is a hole, but the caller can just bail out.
1949 const struct writeback_control *wbc = ctx->wbc; in btrfs_check_meta_write_pointer()
1950 const struct extent_buffer *eb = ctx->eb; in btrfs_check_meta_write_pointer()
1951 struct btrfs_block_group *block_group = ctx->zoned_bg; in btrfs_check_meta_write_pointer()
1957 if (block_group->start > eb->start || in btrfs_check_meta_write_pointer()
1958 block_group->start + block_group->length <= eb->start) { in btrfs_check_meta_write_pointer()
1961 ctx->zoned_bg = NULL; in btrfs_check_meta_write_pointer()
1966 block_group = btrfs_lookup_block_group(fs_info, eb->start); in btrfs_check_meta_write_pointer()
1969 ctx->zoned_bg = block_group; in btrfs_check_meta_write_pointer()
1972 if (block_group->meta_write_pointer == eb->start) { in btrfs_check_meta_write_pointer()
1975 if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) in btrfs_check_meta_write_pointer()
1978 if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) in btrfs_check_meta_write_pointer()
1979 tgt = &fs_info->active_system_bg; in btrfs_check_meta_write_pointer()
1981 tgt = &fs_info->active_meta_bg; in btrfs_check_meta_write_pointer()
1987 * Since we may release fs_info->zoned_meta_io_lock, someone can already in btrfs_check_meta_write_pointer()
1990 if (block_group->meta_write_pointer > eb->start) in btrfs_check_meta_write_pointer()
1991 return -EBUSY; in btrfs_check_meta_write_pointer()
1994 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) in btrfs_check_meta_write_pointer()
1995 return -EAGAIN; in btrfs_check_meta_write_pointer()
1996 return -EBUSY; in btrfs_check_meta_write_pointer()
2002 return -EOPNOTSUPP; in btrfs_zoned_issue_zeroout()
2004 return blkdev_issue_zeroout(device->bdev, physical >> SECTOR_SHIFT, in btrfs_zoned_issue_zeroout()
2015 int i, ret; in read_zone_info() local
2020 ret = -EIO; in read_zone_info()
2024 if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { in read_zone_info()
2025 ret = -EINVAL; in read_zone_info()
2030 nmirrors = (int)bioc->num_stripes; in read_zone_info()
2031 for (i = 0; i < nmirrors; i++) { in read_zone_info()
2032 u64 physical = bioc->stripes[i].physical; in read_zone_info()
2033 struct btrfs_device *dev = bioc->stripes[i].dev; in read_zone_info()
2036 if (!dev->bdev) in read_zone_info()
2041 if (ret == -EIO || ret == -EOPNOTSUPP) in read_zone_info()
2053 * filling zeros between @physical_pos to a write pointer of dev-replace
2059 struct btrfs_fs_info *fs_info = tgt_dev->fs_info; in btrfs_sync_zone_write_pointer()
2072 wp = physical_start + ((zone.wp - zone.start) << SECTOR_SHIFT); in btrfs_sync_zone_write_pointer()
2078 return -EUCLEAN; in btrfs_sync_zone_write_pointer()
2080 length = wp - physical_pos; in btrfs_sync_zone_write_pointer()
2085 * Activate block group and underlying device zones
2087 * @block_group: the block group to activate
2093 struct btrfs_fs_info *fs_info = block_group->fs_info; in btrfs_zone_activate()
2097 const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA); in btrfs_zone_activate()
2099 int i; in btrfs_zone_activate() local
2101 if (!btrfs_is_zoned(block_group->fs_info)) in btrfs_zone_activate()
2104 map = block_group->physical_map; in btrfs_zone_activate()
2106 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2107 spin_lock(&block_group->lock); in btrfs_zone_activate()
2108 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { in btrfs_zone_activate()
2119 for (i = 0; i < map->num_stripes; i++) { in btrfs_zone_activate()
2123 device = map->stripes[i].dev; in btrfs_zone_activate()
2124 physical = map->stripes[i].physical; in btrfs_zone_activate()
2125 zinfo = device->zone_info; in btrfs_zone_activate()
2127 if (!device->bdev) in btrfs_zone_activate()
2130 if (zinfo->max_active_zones == 0) in btrfs_zone_activate()
2134 reserved = zinfo->reserved_active_zones; in btrfs_zone_activate()
2136 * For the data block group, leave active zones for one in btrfs_zone_activate()
2137 * metadata block group and one system block group. in btrfs_zone_activate()
2139 if (atomic_read(&zinfo->active_zones_left) <= reserved) { in btrfs_zone_activate()
2150 zinfo->reserved_active_zones--; in btrfs_zone_activate()
2154 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); in btrfs_zone_activate()
2155 spin_unlock(&block_group->lock); in btrfs_zone_activate()
2157 /* For the active block group list */ in btrfs_zone_activate()
2159 list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); in btrfs_zone_activate()
2160 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2165 spin_unlock(&block_group->lock); in btrfs_zone_activate()
2166 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2172 struct btrfs_fs_info *fs_info = block_group->fs_info; in wait_eb_writebacks()
2173 const u64 end = block_group->start + block_group->length; in wait_eb_writebacks()
2179 radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, in wait_eb_writebacks()
2180 block_group->start >> fs_info->sectorsize_bits) { in wait_eb_writebacks()
2189 if (eb->start < block_group->start) in wait_eb_writebacks()
2191 if (eb->start >= end) in wait_eb_writebacks()
2204 struct btrfs_fs_info *fs_info = block_group->fs_info; in do_zone_finish()
2206 const bool is_metadata = (block_group->flags & in do_zone_finish()
2208 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; in do_zone_finish()
2210 int i; in do_zone_finish() local
2212 spin_lock(&block_group->lock); in do_zone_finish()
2213 if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { in do_zone_finish()
2214 spin_unlock(&block_group->lock); in do_zone_finish()
2220 block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) { in do_zone_finish()
2221 spin_unlock(&block_group->lock); in do_zone_finish()
2222 return -EAGAIN; in do_zone_finish()
2226 * If we are sure that the block group is full (= no more room left for in do_zone_finish()
2227 * new allocation) and the IO for the last usable block is completed, we in do_zone_finish()
2230 * and block_group->meta_write_pointer for metadata. in do_zone_finish()
2233 if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { in do_zone_finish()
2234 spin_unlock(&block_group->lock); in do_zone_finish()
2235 return -EAGAIN; in do_zone_finish()
2237 spin_unlock(&block_group->lock); in do_zone_finish()
2243 /* Ensure all writes in this block group finish */ in do_zone_finish()
2251 spin_lock(&block_group->lock); in do_zone_finish()
2254 * Bail out if someone already deactivated the block group, or in do_zone_finish()
2255 * allocated space is left in the block group. in do_zone_finish()
2258 &block_group->runtime_flags)) { in do_zone_finish()
2259 spin_unlock(&block_group->lock); in do_zone_finish()
2264 if (block_group->reserved || in do_zone_finish()
2266 &block_group->runtime_flags)) { in do_zone_finish()
2267 spin_unlock(&block_group->lock); in do_zone_finish()
2269 return -EAGAIN; in do_zone_finish()
2273 clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); in do_zone_finish()
2274 block_group->alloc_offset = block_group->zone_capacity; in do_zone_finish()
2275 if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) in do_zone_finish()
2276 block_group->meta_write_pointer = block_group->start + in do_zone_finish()
2277 block_group->zone_capacity; in do_zone_finish()
2278 block_group->free_space_ctl->free_space = 0; in do_zone_finish()
2281 spin_unlock(&block_group->lock); in do_zone_finish()
2283 down_read(&dev_replace->rwsem); in do_zone_finish()
2284 map = block_group->physical_map; in do_zone_finish()
2285 for (i = 0; i < map->num_stripes; i++) { in do_zone_finish()
2286 struct btrfs_device *device = map->stripes[i].dev; in do_zone_finish()
2287 const u64 physical = map->stripes[i].physical; in do_zone_finish()
2288 struct btrfs_zoned_device_info *zinfo = device->zone_info; in do_zone_finish()
2291 if (!device->bdev) in do_zone_finish()
2294 if (zinfo->max_active_zones == 0) in do_zone_finish()
2298 ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, in do_zone_finish()
2300 zinfo->zone_size >> SECTOR_SHIFT); in do_zone_finish()
2304 up_read(&dev_replace->rwsem); in do_zone_finish()
2308 if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) in do_zone_finish()
2309 zinfo->reserved_active_zones++; in do_zone_finish()
2312 up_read(&dev_replace->rwsem); in do_zone_finish()
2317 spin_lock(&fs_info->zone_active_bgs_lock); in do_zone_finish()
2318 ASSERT(!list_empty(&block_group->active_bg_list)); in do_zone_finish()
2319 list_del_init(&block_group->active_bg_list); in do_zone_finish()
2320 spin_unlock(&fs_info->zone_active_bgs_lock); in do_zone_finish()
2325 clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); in do_zone_finish()
2332 if (!btrfs_is_zoned(block_group->fs_info)) in btrfs_zone_finish()
2340 struct btrfs_fs_info *fs_info = fs_devices->fs_info; in btrfs_can_activate_zone()
2347 if (test_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags)) in btrfs_can_activate_zone()
2351 mutex_lock(&fs_info->chunk_mutex); in btrfs_can_activate_zone()
2352 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_can_activate_zone()
2353 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { in btrfs_can_activate_zone()
2354 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_can_activate_zone()
2357 if (!device->bdev) in btrfs_can_activate_zone()
2360 if (!zinfo->max_active_zones) { in btrfs_can_activate_zone()
2366 reserved = zinfo->reserved_active_zones; in btrfs_can_activate_zone()
2370 ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved)); in btrfs_can_activate_zone()
2373 ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved)); in btrfs_can_activate_zone()
2379 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_can_activate_zone()
2380 mutex_unlock(&fs_info->chunk_mutex); in btrfs_can_activate_zone()
2383 set_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); in btrfs_can_activate_zone()
2400 if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) in btrfs_zone_finish_endio()
2401 min_alloc_bytes = fs_info->sectorsize; in btrfs_zone_finish_endio()
2403 min_alloc_bytes = fs_info->nodesize; in btrfs_zone_finish_endio()
2405 /* Bail out if we can allocate more data from this block group. */ in btrfs_zone_finish_endio()
2407 block_group->start + block_group->zone_capacity) in btrfs_zone_finish_endio()
2421 wait_on_extent_buffer_writeback(bg->last_eb); in btrfs_zone_finish_endio_workfn()
2422 free_extent_buffer(bg->last_eb); in btrfs_zone_finish_endio_workfn()
2423 btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length); in btrfs_zone_finish_endio_workfn()
2430 if (!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &bg->runtime_flags) || in btrfs_schedule_zone_finish_bg()
2431 eb->start + eb->len * 2 <= bg->start + bg->zone_capacity) in btrfs_schedule_zone_finish_bg()
2434 if (WARN_ON(bg->zone_finish_work.func == btrfs_zone_finish_endio_workfn)) { in btrfs_schedule_zone_finish_bg()
2435 btrfs_err(bg->fs_info, "double scheduling of bg %llu zone finishing", in btrfs_schedule_zone_finish_bg()
2436 bg->start); in btrfs_schedule_zone_finish_bg()
2442 atomic_inc(&eb->refs); in btrfs_schedule_zone_finish_bg()
2443 bg->last_eb = eb; in btrfs_schedule_zone_finish_bg()
2444 INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn); in btrfs_schedule_zone_finish_bg()
2445 queue_work(system_unbound_wq, &bg->zone_finish_work); in btrfs_schedule_zone_finish_bg()
2450 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_clear_data_reloc_bg()
2452 spin_lock(&fs_info->relocation_bg_lock); in btrfs_clear_data_reloc_bg()
2453 if (fs_info->data_reloc_bg == bg->start) in btrfs_clear_data_reloc_bg()
2454 fs_info->data_reloc_bg = 0; in btrfs_clear_data_reloc_bg()
2455 spin_unlock(&fs_info->relocation_bg_lock); in btrfs_clear_data_reloc_bg()
2460 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_free_zone_cache()
2466 mutex_lock(&fs_devices->device_list_mutex); in btrfs_free_zone_cache()
2467 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_free_zone_cache()
2468 if (device->zone_info) { in btrfs_free_zone_cache()
2469 vfree(device->zone_info->zone_cache); in btrfs_free_zone_cache()
2470 device->zone_info->zone_cache = NULL; in btrfs_free_zone_cache()
2473 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_free_zone_cache()
2478 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_zoned_should_reclaim()
2486 if (fs_info->bg_reclaim_threshold == 0) in btrfs_zoned_should_reclaim()
2489 mutex_lock(&fs_devices->device_list_mutex); in btrfs_zoned_should_reclaim()
2490 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_zoned_should_reclaim()
2491 if (!device->bdev) in btrfs_zoned_should_reclaim()
2494 total += device->disk_total_bytes; in btrfs_zoned_should_reclaim()
2495 used += device->bytes_used; in btrfs_zoned_should_reclaim()
2497 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_zoned_should_reclaim()
2500 return factor >= fs_info->bg_reclaim_threshold; in btrfs_zoned_should_reclaim()
2512 /* It should be called on a previous data relocation block group. */ in btrfs_zoned_release_data_reloc_bg()
2513 ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); in btrfs_zoned_release_data_reloc_bg()
2515 spin_lock(&block_group->lock); in btrfs_zoned_release_data_reloc_bg()
2516 if (!test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) in btrfs_zoned_release_data_reloc_bg()
2520 if (block_group->start + block_group->alloc_offset == logical + length) { in btrfs_zoned_release_data_reloc_bg()
2522 * Now, release this block group for further allocations and in btrfs_zoned_release_data_reloc_bg()
2526 &block_group->runtime_flags); in btrfs_zoned_release_data_reloc_bg()
2530 spin_unlock(&block_group->lock); in btrfs_zoned_release_data_reloc_bg()
2541 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_zone_finish_one_bg()
2542 list_for_each_entry(block_group, &fs_info->zone_active_bgs, in btrfs_zone_finish_one_bg()
2546 spin_lock(&block_group->lock); in btrfs_zone_finish_one_bg()
2547 if (block_group->reserved || block_group->alloc_offset == 0 || in btrfs_zone_finish_one_bg()
2548 (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || in btrfs_zone_finish_one_bg()
2549 test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { in btrfs_zone_finish_one_bg()
2550 spin_unlock(&block_group->lock); in btrfs_zone_finish_one_bg()
2554 avail = block_group->zone_capacity - block_group->alloc_offset; in btrfs_zone_finish_one_bg()
2562 spin_unlock(&block_group->lock); in btrfs_zone_finish_one_bg()
2564 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_finish_one_bg()
2582 if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) in btrfs_zoned_activate_one_bg()
2589 down_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2591 list_for_each_entry(bg, &space_info->block_groups[index], in btrfs_zoned_activate_one_bg()
2593 if (!spin_trylock(&bg->lock)) in btrfs_zoned_activate_one_bg()
2597 &bg->runtime_flags)) { in btrfs_zoned_activate_one_bg()
2598 spin_unlock(&bg->lock); in btrfs_zoned_activate_one_bg()
2601 spin_unlock(&bg->lock); in btrfs_zoned_activate_one_bg()
2604 up_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2611 up_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2627 * Reserve zones for one metadata block group, one tree-log block group, and one
2628 * system block group.
2632 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_check_active_zone_reservation()
2635 /* Reserve zones for normal SINGLE metadata and tree-log block group. */ in btrfs_check_active_zone_reservation()
2637 /* Reserve a zone for SINGLE system block group. */ in btrfs_check_active_zone_reservation()
2640 if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) in btrfs_check_active_zone_reservation()
2647 if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP) in btrfs_check_active_zone_reservation()
2649 if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP) in btrfs_check_active_zone_reservation()
2653 mutex_lock(&fs_devices->device_list_mutex); in btrfs_check_active_zone_reservation()
2654 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_check_active_zone_reservation()
2655 if (!device->bdev) in btrfs_check_active_zone_reservation()
2658 device->zone_info->reserved_active_zones = in btrfs_check_active_zone_reservation()
2661 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_check_active_zone_reservation()
2663 /* Release reservation for currently active block groups. */ in btrfs_check_active_zone_reservation()
2664 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_check_active_zone_reservation()
2665 list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { in btrfs_check_active_zone_reservation()
2666 struct btrfs_chunk_map *map = block_group->physical_map; in btrfs_check_active_zone_reservation()
2668 if (!(block_group->flags & in btrfs_check_active_zone_reservation()
2672 for (int i = 0; i < map->num_stripes; i++) in btrfs_check_active_zone_reservation() local
2673 map->stripes[i].dev->zone_info->reserved_active_zones--; in btrfs_check_active_zone_reservation()
2675 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_check_active_zone_reservation()
2679 * Reset the zones of unused block groups from @space_info->bytes_zone_unusable.
2684 * This one resets the zones of a block group, so we can reuse the region
2685 * without removing the block group. On the other hand, btrfs_delete_unused_bgs()
2686 * just removes a block group and frees up the underlying zones. So, we still
2687 * need to allocate a new block group to reuse the zones.
2689 * Resetting is faster than deleting/recreating a block group. It is similar
2691 * the block group's profile with this operation.
2695 struct btrfs_fs_info *fs_info = space_info->fs_info; in btrfs_reset_unused_block_groups()
2696 const sector_t zone_size_sectors = fs_info->zone_size >> SECTOR_SHIFT; in btrfs_reset_unused_block_groups()
2708 * Here, we choose a fully zone_unusable block group. It's in btrfs_reset_unused_block_groups()
2709 * technically possible to reset a partly zone_unusable block in btrfs_reset_unused_block_groups()
2715 spin_lock(&fs_info->unused_bgs_lock); in btrfs_reset_unused_block_groups()
2716 list_for_each_entry(bg, &fs_info->unused_bgs, bg_list) { in btrfs_reset_unused_block_groups()
2717 if ((bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != space_info->flags) in btrfs_reset_unused_block_groups()
2723 * &bg->lock -> &fs_info->unused_bgs_lock. We skip a in btrfs_reset_unused_block_groups()
2724 * block group if we cannot take its lock. in btrfs_reset_unused_block_groups()
2726 if (!spin_trylock(&bg->lock)) in btrfs_reset_unused_block_groups()
2728 if (btrfs_is_block_group_used(bg) || bg->zone_unusable < bg->length) { in btrfs_reset_unused_block_groups()
2729 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2732 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2737 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_reset_unused_block_groups()
2741 list_del_init(&bg->bg_list); in btrfs_reset_unused_block_groups()
2743 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_reset_unused_block_groups()
2746 * Since the block group is fully zone_unusable and we cannot in btrfs_reset_unused_block_groups()
2747 * allocate from this block group anymore, we don't need to set in btrfs_reset_unused_block_groups()
2748 * this block group read-only. in btrfs_reset_unused_block_groups()
2751 down_read(&fs_info->dev_replace.rwsem); in btrfs_reset_unused_block_groups()
2752 map = bg->physical_map; in btrfs_reset_unused_block_groups()
2753 for (int i = 0; i < map->num_stripes; i++) { in btrfs_reset_unused_block_groups() local
2754 struct btrfs_io_stripe *stripe = &map->stripes[i]; in btrfs_reset_unused_block_groups()
2759 ret = blkdev_zone_mgmt(stripe->dev->bdev, REQ_OP_ZONE_RESET, in btrfs_reset_unused_block_groups()
2760 stripe->physical >> SECTOR_SHIFT, in btrfs_reset_unused_block_groups()
2765 up_read(&fs_info->dev_replace.rwsem); in btrfs_reset_unused_block_groups()
2769 up_read(&fs_info->dev_replace.rwsem); in btrfs_reset_unused_block_groups()
2771 spin_lock(&space_info->lock); in btrfs_reset_unused_block_groups()
2772 spin_lock(&bg->lock); in btrfs_reset_unused_block_groups()
2774 if (bg->ro) { in btrfs_reset_unused_block_groups()
2775 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2776 spin_unlock(&space_info->lock); in btrfs_reset_unused_block_groups()
2780 reclaimed = bg->alloc_offset; in btrfs_reset_unused_block_groups()
2781 bg->zone_unusable = bg->length - bg->zone_capacity; in btrfs_reset_unused_block_groups()
2782 bg->alloc_offset = 0; in btrfs_reset_unused_block_groups()
2785 * block group. in btrfs_reset_unused_block_groups()
2787 ASSERT(reclaimed == bg->zone_capacity); in btrfs_reset_unused_block_groups()
2788 bg->free_space_ctl->free_space += reclaimed; in btrfs_reset_unused_block_groups()
2789 space_info->bytes_zone_unusable -= reclaimed; in btrfs_reset_unused_block_groups()
2790 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2792 spin_unlock(&space_info->lock); in btrfs_reset_unused_block_groups()
2796 num_bytes -= reclaimed; in btrfs_reset_unused_block_groups()