Lines Matching +full:d +full:- +full:cache +full:- +full:block +full:- +full:size
1 // SPDX-License-Identifier: GPL-2.0
3 * bcache setup/teardown code, and some metadata io - read a superblock and
66 unsigned int bucket_size = le16_to_cpu(s->bucket_size); in get_bucket_size()
68 if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { in get_bucket_size()
72 max = sizeof(unsigned int) * BITS_PER_BYTE - 1; in get_bucket_size()
73 order = le16_to_cpu(s->bucket_size); in get_bucket_size()
79 pr_err("Bucket size (1 << %u) overflows\n", in get_bucket_size()
84 le16_to_cpu(s->obso_bucket_size_hi) << 16; in get_bucket_size()
97 sb->first_bucket= le16_to_cpu(s->first_bucket); in read_super_common()
98 sb->nbuckets = le64_to_cpu(s->nbuckets); in read_super_common()
99 sb->bucket_size = get_bucket_size(sb, s); in read_super_common()
101 sb->nr_in_set = le16_to_cpu(s->nr_in_set); in read_super_common()
102 sb->nr_this_dev = le16_to_cpu(s->nr_this_dev); in read_super_common()
105 if (sb->keys > SB_JOURNAL_BUCKETS) in read_super_common()
109 if (sb->nbuckets > LONG_MAX) in read_super_common()
113 if (sb->nbuckets < 1 << 7) in read_super_common()
116 err = "Bad block size (not power of 2)"; in read_super_common()
117 if (!is_power_of_2(sb->block_size)) in read_super_common()
120 err = "Bad block size (larger than page size)"; in read_super_common()
121 if (sb->block_size > PAGE_SECTORS) in read_super_common()
124 err = "Bad bucket size (not power of 2)"; in read_super_common()
125 if (!is_power_of_2(sb->bucket_size)) in read_super_common()
128 err = "Bad bucket size (smaller than page size)"; in read_super_common()
129 if (sb->bucket_size < PAGE_SECTORS) in read_super_common()
133 if (get_capacity(bdev->bd_disk) < in read_super_common()
134 sb->bucket_size * sb->nbuckets) in read_super_common()
138 if (bch_is_zero(sb->set_uuid, 16)) in read_super_common()
141 err = "Bad cache device number in set"; in read_super_common()
142 if (!sb->nr_in_set || in read_super_common()
143 sb->nr_in_set <= sb->nr_this_dev || in read_super_common()
144 sb->nr_in_set > MAX_CACHES_PER_SET) in read_super_common()
148 for (i = 0; i < sb->keys; i++) in read_super_common()
149 if (sb->d[i] != sb->first_bucket + i) in read_super_common()
153 if (sb->first_bucket + sb->keys > sb->nbuckets) in read_super_common()
157 if (sb->first_bucket * sb->bucket_size < 16) in read_super_common()
174 page = read_cache_page_gfp(bdev->bd_mapping, in read_super()
180 sb->offset = le64_to_cpu(s->offset); in read_super()
181 sb->version = le64_to_cpu(s->version); in read_super()
183 memcpy(sb->magic, s->magic, 16); in read_super()
184 memcpy(sb->uuid, s->uuid, 16); in read_super()
185 memcpy(sb->set_uuid, s->set_uuid, 16); in read_super()
186 memcpy(sb->label, s->label, SB_LABEL_SIZE); in read_super()
188 sb->flags = le64_to_cpu(s->flags); in read_super()
189 sb->seq = le64_to_cpu(s->seq); in read_super()
190 sb->last_mount = le32_to_cpu(s->last_mount); in read_super()
191 sb->keys = le16_to_cpu(s->keys); in read_super()
194 sb->d[i] = le64_to_cpu(s->d[i]); in read_super()
196 pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n", in read_super()
197 sb->version, sb->flags, sb->seq, sb->keys); in read_super()
200 if (sb->offset != SB_SECTOR) in read_super()
204 if (memcmp(sb->magic, bcache_magic, 16)) in read_super()
208 if (s->csum != csum_set(s)) in read_super()
212 if (bch_is_zero(sb->uuid, 16)) in read_super()
215 sb->block_size = le16_to_cpu(s->block_size); in read_super()
217 err = "Superblock block size smaller than device block size"; in read_super()
218 if (sb->block_size << 9 < bdev_logical_block_size(bdev)) in read_super()
221 switch (sb->version) { in read_super()
223 sb->data_offset = BDEV_DATA_START_DEFAULT; in read_super()
227 sb->data_offset = le64_to_cpu(s->data_offset); in read_super()
230 if (sb->data_offset < BDEV_DATA_START_DEFAULT) in read_super()
245 sb->feature_compat = le64_to_cpu(s->feature_compat); in read_super()
246 sb->feature_incompat = le64_to_cpu(s->feature_incompat); in read_super()
247 sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat); in read_super()
254 err = "Unsupported read-only compatible feature found"; in read_super()
271 sb->last_mount = (u32)ktime_get_real_seconds(); in read_super()
281 struct cached_dev *dc = bio->bi_private; in write_bdev_super_endio()
283 if (bio->bi_status) in write_bdev_super_endio()
286 closure_put(&dc->sb_write); in write_bdev_super_endio()
294 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META; in __write_super()
295 bio->bi_iter.bi_sector = SB_SECTOR; in __write_super()
299 out->offset = cpu_to_le64(sb->offset); in __write_super()
301 memcpy(out->uuid, sb->uuid, 16); in __write_super()
302 memcpy(out->set_uuid, sb->set_uuid, 16); in __write_super()
303 memcpy(out->label, sb->label, SB_LABEL_SIZE); in __write_super()
305 out->flags = cpu_to_le64(sb->flags); in __write_super()
306 out->seq = cpu_to_le64(sb->seq); in __write_super()
308 out->last_mount = cpu_to_le32(sb->last_mount); in __write_super()
309 out->first_bucket = cpu_to_le16(sb->first_bucket); in __write_super()
310 out->keys = cpu_to_le16(sb->keys); in __write_super()
312 for (i = 0; i < sb->keys; i++) in __write_super()
313 out->d[i] = cpu_to_le64(sb->d[i]); in __write_super()
315 if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { in __write_super()
316 out->feature_compat = cpu_to_le64(sb->feature_compat); in __write_super()
317 out->feature_incompat = cpu_to_le64(sb->feature_incompat); in __write_super()
318 out->feature_ro_compat = cpu_to_le64(sb->feature_ro_compat); in __write_super()
321 out->version = cpu_to_le64(sb->version); in __write_super()
322 out->csum = csum_set(out); in __write_super()
325 sb->version, sb->flags, sb->seq); in __write_super()
334 up(&dc->sb_write_mutex); in CLOSURE_CALLBACK()
339 struct closure *cl = &dc->sb_write; in bch_write_bdev_super()
340 struct bio *bio = &dc->sb_bio; in bch_write_bdev_super()
342 down(&dc->sb_write_mutex); in bch_write_bdev_super()
345 bio_init(bio, dc->bdev, dc->sb_bv, 1, 0); in bch_write_bdev_super()
346 bio->bi_end_io = write_bdev_super_endio; in bch_write_bdev_super()
347 bio->bi_private = dc; in bch_write_bdev_super()
351 __write_super(&dc->sb, dc->sb_disk, bio); in bch_write_bdev_super()
358 struct cache *ca = bio->bi_private; in write_super_endio()
361 bch_count_io_errors(ca, bio->bi_status, 0, in write_super_endio()
363 closure_put(&ca->set->sb_write); in write_super_endio()
370 up(&c->sb_write_mutex); in CLOSURE_CALLBACK()
375 struct closure *cl = &c->sb_write; in bcache_write_super()
376 struct cache *ca = c->cache; in bcache_write_super()
377 struct bio *bio = &ca->sb_bio; in bcache_write_super()
380 down(&c->sb_write_mutex); in bcache_write_super()
381 closure_init(cl, &c->cl); in bcache_write_super()
383 ca->sb.seq++; in bcache_write_super()
385 if (ca->sb.version < version) in bcache_write_super()
386 ca->sb.version = version; in bcache_write_super()
388 bio_init(bio, ca->bdev, ca->sb_bv, 1, 0); in bcache_write_super()
389 bio->bi_end_io = write_super_endio; in bcache_write_super()
390 bio->bi_private = ca; in bcache_write_super()
393 __write_super(&ca->sb, ca->sb_disk, bio); in bcache_write_super()
402 struct closure *cl = bio->bi_private; in uuid_endio()
405 cache_set_err_on(bio->bi_status, c, "accessing uuids"); in uuid_endio()
414 up(&c->uuid_write_mutex); in CLOSURE_CALLBACK()
420 struct closure *cl = &c->uuid_write; in uuid_io()
426 down(&c->uuid_write_mutex); in uuid_io()
432 bio->bi_opf = opf | REQ_SYNC | REQ_META; in uuid_io()
433 bio->bi_iter.bi_size = KEY_SIZE(k) << 9; in uuid_io()
435 bio->bi_end_io = uuid_endio; in uuid_io()
436 bio->bi_private = cl; in uuid_io()
437 bch_bio_map(bio, c->uuids); in uuid_io()
449 for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) in uuid_io()
450 if (!bch_is_zero(u->uuid, 16)) in uuid_io()
452 u - c->uuids, u->uuid, u->label, in uuid_io()
453 u->first_reg, u->last_reg, u->invalidated); in uuid_io()
460 struct bkey *k = &j->uuid_bucket; in uuid_read()
465 bkey_copy(&c->uuid_bucket, k); in uuid_read()
468 if (j->version < BCACHE_JSET_VERSION_UUIDv1) { in uuid_read()
469 struct uuid_entry_v0 *u0 = (void *) c->uuids; in uuid_read()
470 struct uuid_entry *u1 = (void *) c->uuids; in uuid_read()
481 for (i = c->nr_uuids - 1; in uuid_read()
483 --i) { in uuid_read()
503 struct cache *ca = c->cache; in __uuid_write()
504 unsigned int size; in __uuid_write() local
512 size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS; in __uuid_write()
513 SET_KEY_SIZE(&k.key, size); in __uuid_write()
518 atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written); in __uuid_write()
520 bkey_copy(&c->uuid_bucket, &k.key); in __uuid_write()
539 for (u = c->uuids; in uuid_find()
540 u < c->uuids + c->nr_uuids; u++) in uuid_find()
541 if (!memcmp(u->uuid, uuid, 16)) in uuid_find()
562 * lru (and in the future other) cache replacement policies; for most purposes
566 * it's actually the gens that must be written out at specific times - it's no
583 struct cache *ca = bio->bi_private; in prio_endio()
585 cache_set_err_on(bio->bi_status, ca->set, "accessing priorities"); in prio_endio()
586 bch_bbio_free(bio, ca->set); in prio_endio()
587 closure_put(&ca->prio); in prio_endio()
590 static void prio_io(struct cache *ca, uint64_t bucket, blk_opf_t opf) in prio_io()
592 struct closure *cl = &ca->prio; in prio_io()
593 struct bio *bio = bch_bbio_alloc(ca->set); in prio_io()
597 bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size; in prio_io()
598 bio_set_dev(bio, ca->bdev); in prio_io()
599 bio->bi_iter.bi_size = meta_bucket_bytes(&ca->sb); in prio_io()
601 bio->bi_end_io = prio_endio; in prio_io()
602 bio->bi_private = ca; in prio_io()
603 bio->bi_opf = opf | REQ_SYNC | REQ_META; in prio_io()
604 bch_bio_map(bio, ca->disk_buckets); in prio_io()
606 closure_bio_submit(ca->set, bio, &ca->prio); in prio_io()
610 int bch_prio_write(struct cache *ca, bool wait) in bch_prio_write()
617 fifo_used(&ca->free[RESERVE_PRIO]), in bch_prio_write()
618 fifo_used(&ca->free[RESERVE_NONE]), in bch_prio_write()
619 fifo_used(&ca->free_inc)); in bch_prio_write()
622 * Pre-check if there are enough free buckets. In the non-blocking in bch_prio_write()
627 size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) + in bch_prio_write()
628 fifo_used(&ca->free[RESERVE_NONE]); in bch_prio_write()
630 return -ENOMEM; in bch_prio_write()
635 lockdep_assert_held(&ca->set->bucket_lock); in bch_prio_write()
637 ca->disk_buckets->seq++; in bch_prio_write()
639 atomic_long_add(ca->sb.bucket_size * prio_buckets(ca), in bch_prio_write()
640 &ca->meta_sectors_written); in bch_prio_write()
642 for (i = prio_buckets(ca) - 1; i >= 0; --i) { in bch_prio_write()
644 struct prio_set *p = ca->disk_buckets; in bch_prio_write()
645 struct bucket_disk *d = p->data; in bch_prio_write() local
646 struct bucket_disk *end = d + prios_per_bucket(ca); in bch_prio_write()
648 for (b = ca->buckets + i * prios_per_bucket(ca); in bch_prio_write()
649 b < ca->buckets + ca->sb.nbuckets && d < end; in bch_prio_write()
650 b++, d++) { in bch_prio_write()
651 d->prio = cpu_to_le16(b->prio); in bch_prio_write()
652 d->gen = b->gen; in bch_prio_write()
655 p->next_bucket = ca->prio_buckets[i + 1]; in bch_prio_write()
656 p->magic = pset_magic(&ca->sb); in bch_prio_write()
657 p->csum = bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8); in bch_prio_write()
660 BUG_ON(bucket == -1); in bch_prio_write()
662 mutex_unlock(&ca->set->bucket_lock); in bch_prio_write()
664 mutex_lock(&ca->set->bucket_lock); in bch_prio_write()
666 ca->prio_buckets[i] = bucket; in bch_prio_write()
667 atomic_dec_bug(&ca->buckets[bucket].pin); in bch_prio_write()
670 mutex_unlock(&ca->set->bucket_lock); in bch_prio_write()
672 bch_journal_meta(ca->set, &cl); in bch_prio_write()
675 mutex_lock(&ca->set->bucket_lock); in bch_prio_write()
682 if (ca->prio_last_buckets[i]) in bch_prio_write()
684 &ca->buckets[ca->prio_last_buckets[i]]); in bch_prio_write()
686 ca->prio_last_buckets[i] = ca->prio_buckets[i]; in bch_prio_write()
691 static int prio_read(struct cache *ca, uint64_t bucket) in prio_read()
693 struct prio_set *p = ca->disk_buckets; in prio_read()
694 struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d; in prio_read() local
697 int ret = -EIO; in prio_read()
699 for (b = ca->buckets; in prio_read()
700 b < ca->buckets + ca->sb.nbuckets; in prio_read()
701 b++, d++) { in prio_read()
702 if (d == end) { in prio_read()
703 ca->prio_buckets[bucket_nr] = bucket; in prio_read()
704 ca->prio_last_buckets[bucket_nr] = bucket; in prio_read()
709 if (p->csum != in prio_read()
710 bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8)) { in prio_read()
715 if (p->magic != pset_magic(&ca->sb)) { in prio_read()
720 bucket = p->next_bucket; in prio_read()
721 d = p->data; in prio_read()
724 b->prio = le16_to_cpu(d->prio); in prio_read()
725 b->gen = b->last_gc = d->gen; in prio_read()
737 struct bcache_device *d = disk->private_data; in open_dev() local
739 if (test_bit(BCACHE_DEV_CLOSING, &d->flags)) in open_dev()
740 return -ENXIO; in open_dev()
742 closure_get(&d->cl); in open_dev()
748 struct bcache_device *d = b->private_data; in release_dev() local
750 closure_put(&d->cl); in release_dev()
756 struct bcache_device *d = b->bd_disk->private_data; in ioctl_dev() local
758 return d->ioctl(d, mode, cmd, arg); in ioctl_dev()
777 void bcache_device_stop(struct bcache_device *d) in bcache_device_stop() argument
779 if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags)) in bcache_device_stop()
782 * - cached device: cached_dev_flush() in bcache_device_stop()
783 * - flash dev: flash_dev_flush() in bcache_device_stop()
785 closure_queue(&d->cl); in bcache_device_stop()
788 static void bcache_device_unlink(struct bcache_device *d) in bcache_device_unlink() argument
792 if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) { in bcache_device_unlink()
793 struct cache *ca = d->c->cache; in bcache_device_unlink()
795 sysfs_remove_link(&d->c->kobj, d->name); in bcache_device_unlink()
796 sysfs_remove_link(&d->kobj, "cache"); in bcache_device_unlink()
798 bd_unlink_disk_holder(ca->bdev, d->disk); in bcache_device_unlink()
802 static void bcache_device_link(struct bcache_device *d, struct cache_set *c, in bcache_device_link() argument
805 struct cache *ca = c->cache; in bcache_device_link()
808 bd_link_disk_holder(ca->bdev, d->disk); in bcache_device_link()
810 snprintf(d->name, BCACHEDEVNAME_SIZE, in bcache_device_link()
811 "%s%u", name, d->id); in bcache_device_link()
813 ret = sysfs_create_link(&d->kobj, &c->kobj, "cache"); in bcache_device_link()
815 pr_err("Couldn't create device -> cache set symlink\n"); in bcache_device_link()
817 ret = sysfs_create_link(&c->kobj, &d->kobj, d->name); in bcache_device_link()
819 pr_err("Couldn't create cache set -> device symlink\n"); in bcache_device_link()
821 clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); in bcache_device_link()
824 static void bcache_device_detach(struct bcache_device *d) in bcache_device_detach() argument
828 atomic_dec(&d->c->attached_dev_nr); in bcache_device_detach()
830 if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) { in bcache_device_detach()
831 struct uuid_entry *u = d->c->uuids + d->id; in bcache_device_detach()
834 memcpy(u->uuid, invalid_uuid, 16); in bcache_device_detach()
835 u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); in bcache_device_detach()
836 bch_uuid_write(d->c); in bcache_device_detach()
839 bcache_device_unlink(d); in bcache_device_detach()
841 d->c->devices[d->id] = NULL; in bcache_device_detach()
842 closure_put(&d->c->caching); in bcache_device_detach()
843 d->c = NULL; in bcache_device_detach()
846 static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, in bcache_device_attach() argument
849 d->id = id; in bcache_device_attach()
850 d->c = c; in bcache_device_attach()
851 c->devices[id] = d; in bcache_device_attach()
853 if (id >= c->devices_max_used) in bcache_device_attach()
854 c->devices_max_used = id + 1; in bcache_device_attach()
856 closure_get(&c->caching); in bcache_device_attach()
869 static void bcache_device_free(struct bcache_device *d) in bcache_device_free() argument
871 struct gendisk *disk = d->disk; in bcache_device_free()
876 pr_info("%s stopped\n", disk->disk_name); in bcache_device_free()
880 if (d->c) in bcache_device_free()
881 bcache_device_detach(d); in bcache_device_free()
885 first_minor_to_idx(disk->first_minor)); in bcache_device_free()
889 bioset_exit(&d->bio_split); in bcache_device_free()
890 kvfree(d->full_dirty_stripes); in bcache_device_free()
891 kvfree(d->stripe_sectors_dirty); in bcache_device_free()
893 closure_debug_destroy(&d->cl); in bcache_device_free()
896 static int bcache_device_init(struct bcache_device *d, unsigned int block_size, in bcache_device_init() argument
917 d->stripe_size = bdev_io_opt(cached_bdev) >> SECTOR_SHIFT; in bcache_device_init()
920 if (!d->stripe_size) in bcache_device_init()
921 d->stripe_size = 1 << 31; in bcache_device_init()
922 else if (d->stripe_size < BCH_MIN_STRIPE_SZ) in bcache_device_init()
923 d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size); in bcache_device_init()
925 n = DIV_ROUND_UP_ULL(sectors, d->stripe_size); in bcache_device_init()
929 return -ENOMEM; in bcache_device_init()
931 d->nr_stripes = n; in bcache_device_init()
933 n = d->nr_stripes * sizeof(atomic_t); in bcache_device_init()
934 d->stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL); in bcache_device_init()
935 if (!d->stripe_sectors_dirty) in bcache_device_init()
936 return -ENOMEM; in bcache_device_init()
938 n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long); in bcache_device_init()
939 d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL); in bcache_device_init()
940 if (!d->full_dirty_stripes) in bcache_device_init()
943 idx = ida_alloc_max(&bcache_device_idx, BCACHE_DEVICE_IDX_MAX - 1, in bcache_device_init()
948 if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio), in bcache_device_init()
955 * Block/page size is checked for BCACHE_SB_VERSION_CDEV. in bcache_device_init()
957 …pr_info("bcache%i: sb/logical block size (%u) greater than page size (%lu) falling back to device … in bcache_device_init()
961 /* This also adjusts physical block size/min io size if needed */ in bcache_device_init()
965 d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); in bcache_device_init()
966 if (IS_ERR(d->disk)) in bcache_device_init()
969 set_capacity(d->disk, sectors); in bcache_device_init()
970 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); in bcache_device_init()
972 d->disk->major = bcache_major; in bcache_device_init()
973 d->disk->first_minor = idx_to_first_minor(idx); in bcache_device_init()
974 d->disk->minors = BCACHE_MINORS; in bcache_device_init()
975 d->disk->fops = ops; in bcache_device_init()
976 d->disk->private_data = d; in bcache_device_init()
980 bioset_exit(&d->bio_split); in bcache_device_init()
984 kvfree(d->full_dirty_stripes); in bcache_device_init()
986 kvfree(d->stripe_sectors_dirty); in bcache_device_init()
987 return -ENOMEM; in bcache_device_init()
998 list_for_each_entry(dc, &c->cached_devs, list) in calc_cached_dev_sectors()
999 sectors += bdev_nr_sectors(dc->bdev); in calc_cached_dev_sectors()
1001 c->cached_dev_sectors = sectors; in calc_cached_dev_sectors()
1012 * dc->io_disable might be set via sysfs interface, so check it in cached_dev_status_update()
1015 while (!kthread_should_stop() && !dc->io_disable) { in cached_dev_status_update()
1016 q = bdev_get_queue(dc->bdev); in cached_dev_status_update()
1018 dc->offline_seconds++; in cached_dev_status_update()
1020 dc->offline_seconds = 0; in cached_dev_status_update()
1022 if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { in cached_dev_status_update()
1023 pr_err("%pg: device offline for %d seconds\n", in cached_dev_status_update()
1024 dc->bdev, in cached_dev_status_update()
1027 dc->disk.name); in cached_dev_status_update()
1028 dc->io_disable = true; in cached_dev_status_update()
1031 bcache_device_stop(&dc->disk); in cached_dev_status_update()
1045 struct bcache_device *d = &dc->disk; in bch_cached_dev_run() local
1046 char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL); in bch_cached_dev_run()
1049 kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid), in bch_cached_dev_run()
1054 if (dc->io_disable) { in bch_cached_dev_run()
1055 pr_err("I/O disabled on cached dev %pg\n", dc->bdev); in bch_cached_dev_run()
1056 ret = -EIO; in bch_cached_dev_run()
1060 if (atomic_xchg(&dc->running, 1)) { in bch_cached_dev_run()
1061 pr_info("cached dev %pg is running already\n", dc->bdev); in bch_cached_dev_run()
1062 ret = -EBUSY; in bch_cached_dev_run()
1066 if (!d->c && in bch_cached_dev_run()
1067 BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { in bch_cached_dev_run()
1072 SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE); in bch_cached_dev_run()
1077 ret = add_disk(d->disk); in bch_cached_dev_run()
1080 bd_link_disk_holder(dc->bdev, dc->disk.disk); in bch_cached_dev_run()
1082 * won't show up in the uevent file, use udevadm monitor -e instead in bch_cached_dev_run()
1085 kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); in bch_cached_dev_run()
1087 if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || in bch_cached_dev_run()
1088 sysfs_create_link(&disk_to_dev(d->disk)->kobj, in bch_cached_dev_run()
1089 &d->kobj, "bcache")) { in bch_cached_dev_run()
1090 pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n"); in bch_cached_dev_run()
1091 ret = -ENOMEM; in bch_cached_dev_run()
1095 dc->status_update_thread = kthread_run(cached_dev_status_update, in bch_cached_dev_run()
1097 if (IS_ERR(dc->status_update_thread)) { in bch_cached_dev_run()
1110 * work dc->writeback_rate_update is running. Wait until the routine
1121 &dc->disk.flags)) in cancel_writeback_rate_update_dwork()
1123 time_out--; in cancel_writeback_rate_update_dwork()
1128 pr_warn("give up waiting for dc->writeback_write_update to quit\n"); in cancel_writeback_rate_update_dwork()
1130 cancel_delayed_work_sync(&dc->writeback_rate_update); in cancel_writeback_rate_update_dwork()
1136 struct cache_set *c = dc->disk.c; in cached_dev_detach_finish()
1138 BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); in cached_dev_detach_finish()
1139 BUG_ON(refcount_read(&dc->count)); in cached_dev_detach_finish()
1142 if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) in cached_dev_detach_finish()
1145 if (!IS_ERR_OR_NULL(dc->writeback_thread)) { in cached_dev_detach_finish()
1146 kthread_stop(dc->writeback_thread); in cached_dev_detach_finish()
1147 dc->writeback_thread = NULL; in cached_dev_detach_finish()
1152 bcache_device_detach(&dc->disk); in cached_dev_detach_finish()
1153 list_move(&dc->list, &uncached_devices); in cached_dev_detach_finish()
1156 clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); in cached_dev_detach_finish()
1157 clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); in cached_dev_detach_finish()
1161 pr_info("Caching disabled for %pg\n", dc->bdev); in cached_dev_detach_finish()
1164 closure_put(&dc->disk.cl); in cached_dev_detach_finish()
1171 if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) in bch_cached_dev_detach()
1174 if (test_and_set_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) in bch_cached_dev_detach()
1178 * Block the device from being closed and freed until we're finished in bch_cached_dev_detach()
1181 closure_get(&dc->disk.cl); in bch_cached_dev_detach()
1196 if ((set_uuid && memcmp(set_uuid, c->set_uuid, 16)) || in bch_cached_dev_attach()
1197 (!set_uuid && memcmp(dc->sb.set_uuid, c->set_uuid, 16))) in bch_cached_dev_attach()
1198 return -ENOENT; in bch_cached_dev_attach()
1200 if (dc->disk.c) { in bch_cached_dev_attach()
1201 pr_err("Can't attach %pg: already attached\n", dc->bdev); in bch_cached_dev_attach()
1202 return -EINVAL; in bch_cached_dev_attach()
1205 if (test_bit(CACHE_SET_STOPPING, &c->flags)) { in bch_cached_dev_attach()
1206 pr_err("Can't attach %pg: shutting down\n", dc->bdev); in bch_cached_dev_attach()
1207 return -EINVAL; in bch_cached_dev_attach()
1210 if (dc->sb.block_size < c->cache->sb.block_size) { in bch_cached_dev_attach()
1212 pr_err("Couldn't attach %pg: block size less than set's block size\n", in bch_cached_dev_attach()
1213 dc->bdev); in bch_cached_dev_attach()
1214 return -EINVAL; in bch_cached_dev_attach()
1218 list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { in bch_cached_dev_attach()
1219 if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { in bch_cached_dev_attach()
1221 dc->bdev); in bch_cached_dev_attach()
1223 return -EINVAL; in bch_cached_dev_attach()
1227 u = uuid_find(c, dc->sb.uuid); in bch_cached_dev_attach()
1230 (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE || in bch_cached_dev_attach()
1231 BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) { in bch_cached_dev_attach()
1232 memcpy(u->uuid, invalid_uuid, 16); in bch_cached_dev_attach()
1233 u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds()); in bch_cached_dev_attach()
1238 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { in bch_cached_dev_attach()
1239 pr_err("Couldn't find uuid for %pg in set\n", dc->bdev); in bch_cached_dev_attach()
1240 return -ENOENT; in bch_cached_dev_attach()
1245 pr_err("Not caching %pg, no room for UUID\n", dc->bdev); in bch_cached_dev_attach()
1246 return -EINVAL; in bch_cached_dev_attach()
1252 * sysfs_remove_file(&dc->kobj, &sysfs_attach); in bch_cached_dev_attach()
1255 if (bch_is_zero(u->uuid, 16)) { in bch_cached_dev_attach()
1260 memcpy(u->uuid, dc->sb.uuid, 16); in bch_cached_dev_attach()
1261 memcpy(u->label, dc->sb.label, SB_LABEL_SIZE); in bch_cached_dev_attach()
1262 u->first_reg = u->last_reg = rtime; in bch_cached_dev_attach()
1265 memcpy(dc->sb.set_uuid, c->set_uuid, 16); in bch_cached_dev_attach()
1266 SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); in bch_cached_dev_attach()
1271 u->last_reg = rtime; in bch_cached_dev_attach()
1275 bcache_device_attach(&dc->disk, c, u - c->uuids); in bch_cached_dev_attach()
1276 list_move(&dc->list, &c->cached_devs); in bch_cached_dev_attach()
1280 * dc->c must be set before dc->count != 0 - paired with the mb in in bch_cached_dev_attach()
1284 refcount_set(&dc->count, 1); in bch_cached_dev_attach()
1286 /* Block writeback thread, but spawn it */ in bch_cached_dev_attach()
1287 down_write(&dc->writeback_lock); in bch_cached_dev_attach()
1289 up_write(&dc->writeback_lock); in bch_cached_dev_attach()
1291 dc->disk.disk->disk_name); in bch_cached_dev_attach()
1292 return -ENOMEM; in bch_cached_dev_attach()
1295 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { in bch_cached_dev_attach()
1296 atomic_set(&dc->has_dirty, 1); in bch_cached_dev_attach()
1300 bch_sectors_dirty_init(&dc->disk); in bch_cached_dev_attach()
1303 if (ret && (ret != -EBUSY)) { in bch_cached_dev_attach()
1304 up_write(&dc->writeback_lock); in bch_cached_dev_attach()
1311 kthread_stop(dc->writeback_thread); in bch_cached_dev_attach()
1313 pr_err("Couldn't run cached device %pg\n", dc->bdev); in bch_cached_dev_attach()
1317 bcache_device_link(&dc->disk, c, "bdev"); in bch_cached_dev_attach()
1318 atomic_inc(&c->attached_dev_nr); in bch_cached_dev_attach()
1320 if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) { in bch_cached_dev_attach()
1321 …pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); in bch_cached_dev_attach()
1322 pr_err("Please update to the latest bcache-tools to create the cache device\n"); in bch_cached_dev_attach()
1323 set_disk_ro(dc->disk.disk, 1); in bch_cached_dev_attach()
1327 up_write(&dc->writeback_lock); in bch_cached_dev_attach()
1330 dc->bdev, in bch_cached_dev_attach()
1331 dc->disk.disk->disk_name, in bch_cached_dev_attach()
1332 dc->disk.c->set_uuid); in bch_cached_dev_attach()
1336 /* when dc->disk.kobj released */
1349 if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) in CLOSURE_CALLBACK()
1352 if (!IS_ERR_OR_NULL(dc->writeback_thread)) in CLOSURE_CALLBACK()
1353 kthread_stop(dc->writeback_thread); in CLOSURE_CALLBACK()
1354 if (!IS_ERR_OR_NULL(dc->status_update_thread)) in CLOSURE_CALLBACK()
1355 kthread_stop(dc->status_update_thread); in CLOSURE_CALLBACK()
1359 if (atomic_read(&dc->running)) { in CLOSURE_CALLBACK()
1360 bd_unlink_disk_holder(dc->bdev, dc->disk.disk); in CLOSURE_CALLBACK()
1361 del_gendisk(dc->disk.disk); in CLOSURE_CALLBACK()
1363 bcache_device_free(&dc->disk); in CLOSURE_CALLBACK()
1364 list_del(&dc->list); in CLOSURE_CALLBACK()
1368 if (dc->sb_disk) in CLOSURE_CALLBACK()
1369 put_page(virt_to_page(dc->sb_disk)); in CLOSURE_CALLBACK()
1371 if (dc->bdev_file) in CLOSURE_CALLBACK()
1372 fput(dc->bdev_file); in CLOSURE_CALLBACK()
1376 kobject_put(&dc->disk.kobj); in CLOSURE_CALLBACK()
1382 struct bcache_device *d = &dc->disk; in CLOSURE_CALLBACK() local
1385 bcache_device_unlink(d); in CLOSURE_CALLBACK()
1388 bch_cache_accounting_destroy(&dc->accounting); in CLOSURE_CALLBACK()
1389 kobject_del(&d->kobj); in CLOSURE_CALLBACK()
1398 struct request_queue *q = bdev_get_queue(dc->bdev); in cached_dev_init()
1401 INIT_LIST_HEAD(&dc->list); in cached_dev_init()
1402 closure_init(&dc->disk.cl, NULL); in cached_dev_init()
1403 set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq); in cached_dev_init()
1404 kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); in cached_dev_init()
1405 INIT_WORK(&dc->detach, cached_dev_detach_finish); in cached_dev_init()
1406 sema_init(&dc->sb_write_mutex, 1); in cached_dev_init()
1407 INIT_LIST_HEAD(&dc->io_lru); in cached_dev_init()
1408 spin_lock_init(&dc->io_lock); in cached_dev_init()
1409 bch_cache_accounting_init(&dc->accounting, &dc->disk.cl); in cached_dev_init()
1411 dc->sequential_cutoff = 4 << 20; in cached_dev_init()
1413 for (io = dc->io; io < dc->io + RECENT_IO; io++) { in cached_dev_init()
1414 list_add(&io->lru, &dc->io_lru); in cached_dev_init()
1415 hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); in cached_dev_init()
1418 if (bdev_io_opt(dc->bdev)) in cached_dev_init()
1419 dc->partial_stripes_expensive = !!(q->limits.features & in cached_dev_init()
1422 ret = bcache_device_init(&dc->disk, block_size, in cached_dev_init()
1423 bdev_nr_sectors(dc->bdev) - dc->sb.data_offset, in cached_dev_init()
1424 dc->bdev, &bcache_cached_ops); in cached_dev_init()
1428 atomic_set(&dc->io_errors, 0); in cached_dev_init()
1429 dc->io_disable = false; in cached_dev_init()
1430 dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT; in cached_dev_init()
1432 dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO; in cached_dev_init()
1439 /* Cached device - bcache superblock */
1447 int ret = -ENOMEM; in register_bdev()
1449 memcpy(&dc->sb, sb, sizeof(struct cache_sb)); in register_bdev()
1450 dc->bdev_file = bdev_file; in register_bdev()
1451 dc->bdev = file_bdev(bdev_file); in register_bdev()
1452 dc->sb_disk = sb_disk; in register_bdev()
1454 if (cached_dev_init(dc, sb->block_size << 9)) in register_bdev()
1458 if (kobject_add(&dc->disk.kobj, bdev_kobj(dc->bdev), "bcache")) in register_bdev()
1460 if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) in register_bdev()
1463 pr_info("registered backing device %pg\n", dc->bdev); in register_bdev()
1465 list_add(&dc->list, &uncached_devices); in register_bdev()
1466 /* attach to a matched cache set if it exists */ in register_bdev()
1470 if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE || in register_bdev()
1471 BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) { in register_bdev()
1480 pr_notice("error %pg: %s\n", dc->bdev, err); in register_bdev()
1481 bcache_device_stop(&dc->disk); in register_bdev()
1487 /* When d->kobj released */
1490 struct bcache_device *d = container_of(kobj, struct bcache_device, in bch_flash_dev_release() local
1492 kfree(d); in bch_flash_dev_release()
1497 closure_type(d, struct bcache_device, cl); in CLOSURE_CALLBACK()
1500 atomic_long_sub(bcache_dev_sectors_dirty(d), in CLOSURE_CALLBACK()
1501 &d->c->flash_dev_dirty_sectors); in CLOSURE_CALLBACK()
1502 del_gendisk(d->disk); in CLOSURE_CALLBACK()
1503 bcache_device_free(d); in CLOSURE_CALLBACK()
1505 kobject_put(&d->kobj); in CLOSURE_CALLBACK()
1510 closure_type(d, struct bcache_device, cl); in CLOSURE_CALLBACK()
1513 bcache_device_unlink(d); in CLOSURE_CALLBACK()
1515 kobject_del(&d->kobj); in CLOSURE_CALLBACK()
1521 int err = -ENOMEM; in flash_dev_run()
1522 struct bcache_device *d = kzalloc(sizeof(struct bcache_device), in flash_dev_run() local
1524 if (!d) in flash_dev_run()
1527 closure_init(&d->cl, NULL); in flash_dev_run()
1528 set_closure_fn(&d->cl, flash_dev_flush, system_wq); in flash_dev_run()
1530 kobject_init(&d->kobj, &bch_flash_dev_ktype); in flash_dev_run()
1532 if (bcache_device_init(d, block_bytes(c->cache), u->sectors, in flash_dev_run()
1536 bcache_device_attach(d, c, u - c->uuids); in flash_dev_run()
1537 bch_sectors_dirty_init(d); in flash_dev_run()
1538 bch_flash_dev_request_init(d); in flash_dev_run()
1539 err = add_disk(d->disk); in flash_dev_run()
1543 err = kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache"); in flash_dev_run()
1547 bcache_device_link(d, c, "volume"); in flash_dev_run()
1549 if (bch_has_feature_obso_large_bucket(&c->cache->sb)) { in flash_dev_run()
1550 …pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); in flash_dev_run()
1551 pr_err("Please update to the latest bcache-tools to create the cache device\n"); in flash_dev_run()
1552 set_disk_ro(d->disk, 1); in flash_dev_run()
1557 kobject_put(&d->kobj); in flash_dev_run()
1567 for (u = c->uuids; in flash_devs_run()
1568 u < c->uuids + c->nr_uuids && !ret; in flash_devs_run()
1576 int bch_flash_dev_create(struct cache_set *c, uint64_t size) in bch_flash_dev_create() argument
1580 if (test_bit(CACHE_SET_STOPPING, &c->flags)) in bch_flash_dev_create()
1581 return -EINTR; in bch_flash_dev_create()
1583 if (!test_bit(CACHE_SET_RUNNING, &c->flags)) in bch_flash_dev_create()
1584 return -EPERM; in bch_flash_dev_create()
1589 return -EINVAL; in bch_flash_dev_create()
1592 get_random_bytes(u->uuid, 16); in bch_flash_dev_create()
1593 memset(u->label, 0, 32); in bch_flash_dev_create()
1594 u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds()); in bch_flash_dev_create()
1597 u->sectors = size >> 9; in bch_flash_dev_create()
1606 if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) in bch_cached_dev_error()
1609 dc->io_disable = true; in bch_cached_dev_error()
1614 dc->disk.disk->disk_name, dc->bdev); in bch_cached_dev_error()
1616 bcache_device_stop(&dc->disk); in bch_cached_dev_error()
1620 /* Cache set */
1628 if (c->on_error != ON_ERROR_PANIC && in bch_cache_set_error()
1629 test_bit(CACHE_SET_STOPPING, &c->flags)) in bch_cache_set_error()
1632 if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) in bch_cache_set_error()
1646 c->set_uuid, &vaf); in bch_cache_set_error()
1650 if (c->on_error == ON_ERROR_PANIC) in bch_cache_set_error()
1657 /* When c->kobj released */
1669 struct cache *ca; in CLOSURE_CALLBACK()
1671 debugfs_remove(c->debug); in CLOSURE_CALLBACK()
1678 bch_bset_sort_state_free(&c->sort); in CLOSURE_CALLBACK()
1679 free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->cache->sb))); in CLOSURE_CALLBACK()
1681 ca = c->cache; in CLOSURE_CALLBACK()
1683 ca->set = NULL; in CLOSURE_CALLBACK()
1684 c->cache = NULL; in CLOSURE_CALLBACK()
1685 kobject_put(&ca->kobj); in CLOSURE_CALLBACK()
1689 if (c->moving_gc_wq) in CLOSURE_CALLBACK()
1690 destroy_workqueue(c->moving_gc_wq); in CLOSURE_CALLBACK()
1691 bioset_exit(&c->bio_split); in CLOSURE_CALLBACK()
1692 mempool_exit(&c->fill_iter); in CLOSURE_CALLBACK()
1693 mempool_exit(&c->bio_meta); in CLOSURE_CALLBACK()
1694 mempool_exit(&c->search); in CLOSURE_CALLBACK()
1695 kfree(c->devices); in CLOSURE_CALLBACK()
1697 list_del(&c->list); in CLOSURE_CALLBACK()
1700 pr_info("Cache set %pU unregistered\n", c->set_uuid); in CLOSURE_CALLBACK()
1703 closure_debug_destroy(&c->cl); in CLOSURE_CALLBACK()
1704 kobject_put(&c->kobj); in CLOSURE_CALLBACK()
1710 struct cache *ca = c->cache; in CLOSURE_CALLBACK()
1713 bch_cache_accounting_destroy(&c->accounting); in CLOSURE_CALLBACK()
1715 kobject_put(&c->internal); in CLOSURE_CALLBACK()
1716 kobject_del(&c->kobj); in CLOSURE_CALLBACK()
1718 if (!IS_ERR_OR_NULL(c->gc_thread)) in CLOSURE_CALLBACK()
1719 kthread_stop(c->gc_thread); in CLOSURE_CALLBACK()
1721 if (!IS_ERR_OR_NULL(c->root)) in CLOSURE_CALLBACK()
1722 list_add(&c->root->list, &c->btree_cache); in CLOSURE_CALLBACK()
1725 * Avoid flushing cached nodes if cache set is retiring in CLOSURE_CALLBACK()
1728 if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) in CLOSURE_CALLBACK()
1729 list_for_each_entry(b, &c->btree_cache, list) { in CLOSURE_CALLBACK()
1730 mutex_lock(&b->write_lock); in CLOSURE_CALLBACK()
1733 mutex_unlock(&b->write_lock); in CLOSURE_CALLBACK()
1736 if (ca->alloc_thread) in CLOSURE_CALLBACK()
1737 kthread_stop(ca->alloc_thread); in CLOSURE_CALLBACK()
1739 if (c->journal.cur) { in CLOSURE_CALLBACK()
1740 cancel_delayed_work_sync(&c->journal.work); in CLOSURE_CALLBACK()
1742 c->journal.work.work.func(&c->journal.work.work); in CLOSURE_CALLBACK()
1750 * cache set is unregistering due to too many I/O errors. In this condition,
1752 * value and whether the broken cache has dirty data:
1754 * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
1762 * backing device is clean on the broken cache device.
1765 struct bcache_device *d, in conditional_stop_bcache_device() argument
1768 if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) { in conditional_stop_bcache_device()
1769 pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n", in conditional_stop_bcache_device()
1770 d->disk->disk_name, c->set_uuid); in conditional_stop_bcache_device()
1771 bcache_device_stop(d); in conditional_stop_bcache_device()
1772 } else if (atomic_read(&dc->has_dirty)) { in conditional_stop_bcache_device()
1774 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO in conditional_stop_bcache_device()
1775 * and dc->has_dirty == 1 in conditional_stop_bcache_device()
1777 …pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potenti… in conditional_stop_bcache_device()
1778 d->disk->disk_name); in conditional_stop_bcache_device()
1780 * There might be a small time gap that cache set is in conditional_stop_bcache_device()
1783 * backing device as no cache set attached to. This in conditional_stop_bcache_device()
1785 * data in writeback mode while cache is dirty. in conditional_stop_bcache_device()
1787 * to a broken cache device, dc->io_disable should be in conditional_stop_bcache_device()
1790 dc->io_disable = true; in conditional_stop_bcache_device()
1793 bcache_device_stop(d); in conditional_stop_bcache_device()
1796 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO in conditional_stop_bcache_device()
1797 * and dc->has_dirty == 0 in conditional_stop_bcache_device()
1799 pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n", in conditional_stop_bcache_device()
1800 d->disk->disk_name); in conditional_stop_bcache_device()
1808 struct bcache_device *d; in CLOSURE_CALLBACK() local
1813 for (i = 0; i < c->devices_max_used; i++) { in CLOSURE_CALLBACK()
1814 d = c->devices[i]; in CLOSURE_CALLBACK()
1815 if (!d) in CLOSURE_CALLBACK()
1818 if (!UUID_FLASH_ONLY(&c->uuids[i]) && in CLOSURE_CALLBACK()
1819 test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { in CLOSURE_CALLBACK()
1820 dc = container_of(d, struct cached_dev, disk); in CLOSURE_CALLBACK()
1822 if (test_bit(CACHE_SET_IO_DISABLE, &c->flags)) in CLOSURE_CALLBACK()
1823 conditional_stop_bcache_device(c, d, dc); in CLOSURE_CALLBACK()
1825 bcache_device_stop(d); in CLOSURE_CALLBACK()
1836 if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags)) in bch_cache_set_stop()
1838 closure_queue(&c->caching); in bch_cache_set_stop()
1843 set_bit(CACHE_SET_UNREGISTERING, &c->flags); in bch_cache_set_unregister()
1853 struct cache *ca = container_of(sb, struct cache, sb); in bch_cache_set_alloc()
1860 closure_init(&c->cl, NULL); in bch_cache_set_alloc()
1861 set_closure_fn(&c->cl, cache_set_free, system_wq); in bch_cache_set_alloc()
1863 closure_init(&c->caching, &c->cl); in bch_cache_set_alloc()
1864 set_closure_fn(&c->caching, __cache_set_unregister, system_wq); in bch_cache_set_alloc()
1867 closure_set_stopped(&c->cl); in bch_cache_set_alloc()
1868 closure_put(&c->cl); in bch_cache_set_alloc()
1870 kobject_init(&c->kobj, &bch_cache_set_ktype); in bch_cache_set_alloc()
1871 kobject_init(&c->internal, &bch_cache_set_internal_ktype); in bch_cache_set_alloc()
1873 bch_cache_accounting_init(&c->accounting, &c->cl); in bch_cache_set_alloc()
1875 memcpy(c->set_uuid, sb->set_uuid, 16); in bch_cache_set_alloc()
1877 c->cache = ca; in bch_cache_set_alloc()
1878 c->cache->set = c; in bch_cache_set_alloc()
1879 c->bucket_bits = ilog2(sb->bucket_size); in bch_cache_set_alloc()
1880 c->block_bits = ilog2(sb->block_size); in bch_cache_set_alloc()
1881 c->nr_uuids = meta_bucket_bytes(sb) / sizeof(struct uuid_entry); in bch_cache_set_alloc()
1882 c->devices_max_used = 0; in bch_cache_set_alloc()
1883 atomic_set(&c->attached_dev_nr, 0); in bch_cache_set_alloc()
1884 c->btree_pages = meta_bucket_pages(sb); in bch_cache_set_alloc()
1885 if (c->btree_pages > BTREE_MAX_PAGES) in bch_cache_set_alloc()
1886 c->btree_pages = max_t(int, c->btree_pages / 4, in bch_cache_set_alloc()
1889 sema_init(&c->sb_write_mutex, 1); in bch_cache_set_alloc()
1890 mutex_init(&c->bucket_lock); in bch_cache_set_alloc()
1891 init_waitqueue_head(&c->btree_cache_wait); in bch_cache_set_alloc()
1892 spin_lock_init(&c->btree_cannibalize_lock); in bch_cache_set_alloc()
1893 init_waitqueue_head(&c->bucket_wait); in bch_cache_set_alloc()
1894 init_waitqueue_head(&c->gc_wait); in bch_cache_set_alloc()
1895 sema_init(&c->uuid_write_mutex, 1); in bch_cache_set_alloc()
1897 spin_lock_init(&c->btree_gc_time.lock); in bch_cache_set_alloc()
1898 spin_lock_init(&c->btree_split_time.lock); in bch_cache_set_alloc()
1899 spin_lock_init(&c->btree_read_time.lock); in bch_cache_set_alloc()
1903 INIT_LIST_HEAD(&c->list); in bch_cache_set_alloc()
1904 INIT_LIST_HEAD(&c->cached_devs); in bch_cache_set_alloc()
1905 INIT_LIST_HEAD(&c->btree_cache); in bch_cache_set_alloc()
1906 INIT_LIST_HEAD(&c->btree_cache_freeable); in bch_cache_set_alloc()
1907 INIT_LIST_HEAD(&c->btree_cache_freed); in bch_cache_set_alloc()
1908 INIT_LIST_HEAD(&c->data_buckets); in bch_cache_set_alloc()
1910 iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * in bch_cache_set_alloc()
1913 c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); in bch_cache_set_alloc()
1914 if (!c->devices) in bch_cache_set_alloc()
1917 if (mempool_init_slab_pool(&c->search, 32, bch_search_cache)) in bch_cache_set_alloc()
1920 if (mempool_init_kmalloc_pool(&c->bio_meta, 2, in bch_cache_set_alloc()
1925 if (mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size)) in bch_cache_set_alloc()
1928 if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio), in bch_cache_set_alloc()
1932 c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb); in bch_cache_set_alloc()
1933 if (!c->uuids) in bch_cache_set_alloc()
1936 c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0); in bch_cache_set_alloc()
1937 if (!c->moving_gc_wq) in bch_cache_set_alloc()
1949 if (bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages))) in bch_cache_set_alloc()
1952 c->congested_read_threshold_us = 2000; in bch_cache_set_alloc()
1953 c->congested_write_threshold_us = 20000; in bch_cache_set_alloc()
1954 c->error_limit = DEFAULT_IO_ERROR_LIMIT; in bch_cache_set_alloc()
1955 c->idle_max_writeback_rate_enabled = 1; in bch_cache_set_alloc()
1956 WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags)); in bch_cache_set_alloc()
1968 struct cache *ca = c->cache; in run_cache_set()
1975 c->nbuckets = ca->sb.nbuckets; in run_cache_set()
1978 if (CACHE_SYNC(&c->cache->sb)) { in run_cache_set()
1992 j = &list_entry(journal.prev, struct journal_replay, list)->j; in run_cache_set()
1995 if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev])) in run_cache_set()
2004 k = &j->btree_root; in run_cache_set()
2011 c->root = bch_btree_node_get(c, NULL, k, in run_cache_set()
2012 j->btree_level, in run_cache_set()
2014 if (IS_ERR(c->root)) in run_cache_set()
2017 list_del_init(&c->root->list); in run_cache_set()
2018 rw_unlock(true, c->root); in run_cache_set()
2035 * gc_gen - this is a hack but oh well. in run_cache_set()
2037 bch_journal_next(&c->journal); in run_cache_set()
2053 if (j->version < BCACHE_JSET_VERSION_UUID) in run_cache_set()
2063 ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7, in run_cache_set()
2066 for (j = 0; j < ca->sb.keys; j++) in run_cache_set()
2067 ca->sb.d[j] = ca->sb.first_bucket + j; in run_cache_set()
2075 mutex_lock(&c->bucket_lock); in run_cache_set()
2077 mutex_unlock(&c->bucket_lock); in run_cache_set()
2084 c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL); in run_cache_set()
2085 if (IS_ERR(c->root)) in run_cache_set()
2088 mutex_lock(&c->root->write_lock); in run_cache_set()
2089 bkey_copy_key(&c->root->key, &MAX_KEY); in run_cache_set()
2090 bch_btree_node_write(c->root, &cl); in run_cache_set()
2091 mutex_unlock(&c->root->write_lock); in run_cache_set()
2093 bch_btree_set_root(c->root); in run_cache_set()
2094 rw_unlock(true, c->root); in run_cache_set()
2098 * everything is set up - fortunately journal entries won't be in run_cache_set()
2101 SET_CACHE_SYNC(&c->cache->sb, true); in run_cache_set()
2103 bch_journal_next(&c->journal); in run_cache_set()
2112 c->cache->sb.last_mount = (u32)ktime_get_real_seconds(); in run_cache_set()
2115 if (bch_has_feature_obso_large_bucket(&c->cache->sb)) in run_cache_set()
2116 pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n"); in run_cache_set()
2123 bch_journal_space_reserve(&c->journal); in run_cache_set()
2124 set_bit(CACHE_SET_RUNNING, &c->flags); in run_cache_set()
2129 list_del(&l->list); in run_cache_set()
2137 return -EIO; in run_cache_set()
2140 static const char *register_cache_set(struct cache *ca) in register_cache_set()
2147 if (!memcmp(c->set_uuid, ca->sb.set_uuid, 16)) { in register_cache_set()
2148 if (c->cache) in register_cache_set()
2149 return "duplicate cache set member"; in register_cache_set()
2154 c = bch_cache_set_alloc(&ca->sb); in register_cache_set()
2159 if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->set_uuid) || in register_cache_set()
2160 kobject_add(&c->internal, &c->kobj, "internal")) in register_cache_set()
2163 if (bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj)) in register_cache_set()
2168 list_add(&c->list, &bch_cache_sets); in register_cache_set()
2170 sprintf(buf, "cache%i", ca->sb.nr_this_dev); in register_cache_set()
2171 if (sysfs_create_link(&ca->kobj, &c->kobj, "set") || in register_cache_set()
2172 sysfs_create_link(&c->kobj, &ca->kobj, buf)) in register_cache_set()
2175 kobject_get(&ca->kobj); in register_cache_set()
2176 ca->set = c; in register_cache_set()
2177 ca->set->cache = ca; in register_cache_set()
2179 err = "failed to run cache set"; in register_cache_set()
2189 /* Cache device */
2191 /* When ca->kobj released */
2194 struct cache *ca = container_of(kobj, struct cache, kobj); in bch_cache_release()
2197 if (ca->set) { in bch_cache_release()
2198 BUG_ON(ca->set->cache != ca); in bch_cache_release()
2199 ca->set->cache = NULL; in bch_cache_release()
2202 free_pages((unsigned long) ca->disk_buckets, ilog2(meta_bucket_pages(&ca->sb))); in bch_cache_release()
2203 kfree(ca->prio_buckets); in bch_cache_release()
2204 vfree(ca->buckets); in bch_cache_release()
2206 free_heap(&ca->heap); in bch_cache_release()
2207 free_fifo(&ca->free_inc); in bch_cache_release()
2210 free_fifo(&ca->free[i]); in bch_cache_release()
2212 if (ca->sb_disk) in bch_cache_release()
2213 put_page(virt_to_page(ca->sb_disk)); in bch_cache_release()
2215 if (ca->bdev_file) in bch_cache_release()
2216 fput(ca->bdev_file); in bch_cache_release()
2222 static int cache_alloc(struct cache *ca) in cache_alloc()
2227 int ret = -ENOMEM; in cache_alloc()
2231 kobject_init(&ca->kobj, &bch_cache_ktype); in cache_alloc()
2233 bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0); in cache_alloc()
2236 * when ca->sb.njournal_buckets is not zero, journal exists, in cache_alloc()
2244 btree_buckets = ca->sb.njournal_buckets ?: 8; in cache_alloc()
2245 free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; in cache_alloc()
2247 ret = -EPERM; in cache_alloc()
2248 err = "ca->sb.nbuckets is too small"; in cache_alloc()
2252 if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, in cache_alloc()
2254 err = "ca->free[RESERVE_BTREE] alloc failed"; in cache_alloc()
2258 if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), in cache_alloc()
2260 err = "ca->free[RESERVE_PRIO] alloc failed"; in cache_alloc()
2264 if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) { in cache_alloc()
2265 err = "ca->free[RESERVE_MOVINGGC] alloc failed"; in cache_alloc()
2269 if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) { in cache_alloc()
2270 err = "ca->free[RESERVE_NONE] alloc failed"; in cache_alloc()
2274 if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) { in cache_alloc()
2275 err = "ca->free_inc alloc failed"; in cache_alloc()
2279 if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) { in cache_alloc()
2280 err = "ca->heap alloc failed"; in cache_alloc()
2284 ca->buckets = vzalloc(array_size(sizeof(struct bucket), in cache_alloc()
2285 ca->sb.nbuckets)); in cache_alloc()
2286 if (!ca->buckets) { in cache_alloc()
2287 err = "ca->buckets alloc failed"; in cache_alloc()
2291 ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t), in cache_alloc()
2294 if (!ca->prio_buckets) { in cache_alloc()
2295 err = "ca->prio_buckets alloc failed"; in cache_alloc()
2299 ca->disk_buckets = alloc_meta_bucket_pages(GFP_KERNEL, &ca->sb); in cache_alloc()
2300 if (!ca->disk_buckets) { in cache_alloc()
2301 err = "ca->disk_buckets alloc failed"; in cache_alloc()
2305 ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); in cache_alloc()
2308 atomic_set(&b->pin, 0); in cache_alloc()
2312 kfree(ca->prio_buckets); in cache_alloc()
2314 vfree(ca->buckets); in cache_alloc()
2316 free_heap(&ca->heap); in cache_alloc()
2318 free_fifo(&ca->free_inc); in cache_alloc()
2320 free_fifo(&ca->free[RESERVE_NONE]); in cache_alloc()
2322 free_fifo(&ca->free[RESERVE_MOVINGGC]); in cache_alloc()
2324 free_fifo(&ca->free[RESERVE_PRIO]); in cache_alloc()
2326 free_fifo(&ca->free[RESERVE_BTREE]); in cache_alloc()
2331 pr_notice("error %pg: %s\n", ca->bdev, err); in cache_alloc()
2337 struct cache *ca) in register_cache()
2342 memcpy(&ca->sb, sb, sizeof(struct cache_sb)); in register_cache()
2343 ca->bdev_file = bdev_file; in register_cache()
2344 ca->bdev = file_bdev(bdev_file); in register_cache()
2345 ca->sb_disk = sb_disk; in register_cache()
2348 ca->discard = CACHE_DISCARD(&ca->sb); in register_cache()
2352 if (ret == -ENOMEM) in register_cache()
2353 err = "cache_alloc(): -ENOMEM"; in register_cache()
2354 else if (ret == -EPERM) in register_cache()
2355 err = "cache_alloc(): cache device is too small"; in register_cache()
2360 * If we failed here, it means ca->kobj is not initialized yet, in register_cache()
2363 * we explicitly call fput() on the block device here. in register_cache()
2369 if (kobject_add(&ca->kobj, bdev_kobj(file_bdev(bdev_file)), "bcache")) { in register_cache()
2372 ret = -ENOMEM; in register_cache()
2381 ret = -ENODEV; in register_cache()
2385 pr_info("registered cache device %pg\n", file_bdev(ca->bdev_file)); in register_cache()
2388 kobject_put(&ca->kobj); in register_cache()
2395 const char *buffer, size_t size);
2398 const char *buffer, size_t size);
2410 list_for_each_entry_safe(dc, t, &c->cached_devs, list) in bch_is_open_backing()
2411 if (dc->bdev->bd_dev == dev) in bch_is_open_backing()
2414 if (dc->bdev->bd_dev == dev) in bch_is_open_backing()
2424 struct cache *ca = c->cache; in bch_is_open_cache()
2426 if (ca->bdev->bd_dev == dev) in bch_is_open_cache()
2454 if (register_bdev(args->sb, args->sb_disk, args->bdev_file, in register_bdev_worker()
2455 args->holder) < 0) in register_bdev_worker()
2461 args->path); in register_bdev_worker()
2462 kfree(args->sb); in register_bdev_worker()
2463 kfree(args->path); in register_bdev_worker()
2475 if (register_cache(args->sb, args->sb_disk, args->bdev_file, in register_cache_worker()
2476 args->holder)) in register_cache_worker()
2480 pr_info("error %s: fail to register cache device\n", in register_cache_worker()
2481 args->path); in register_cache_worker()
2482 kfree(args->sb); in register_cache_worker()
2483 kfree(args->path); in register_cache_worker()
2490 if (SB_IS_BDEV(args->sb)) in register_device_async()
2491 INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker); in register_device_async()
2493 INIT_DELAYED_WORK(&args->reg_work, register_cache_worker); in register_device_async()
2496 queue_delayed_work(system_wq, &args->reg_work, 10); in register_device_async()
2503 return kzalloc(sizeof(struct cache), GFP_KERNEL); in alloc_holder_object()
2507 const char *buffer, size_t size) in register_bcache() argument
2523 ret = -EBUSY; in register_bcache()
2534 ret = -ENOMEM; in register_bcache()
2536 path = kstrndup(buffer, size, GFP_KERNEL); in register_bcache()
2544 ret = -EINVAL; in register_bcache()
2556 ret = -ENOMEM; in register_bcache()
2562 bdev_file2 = bdev_file_open_by_dev(file_bdev(bdev_file)->bd_dev, in register_bcache()
2569 if (ret == -EBUSY) { in register_bcache()
2581 ret = size; in register_bcache()
2595 ret = -ENOMEM; in register_bcache()
2600 args->path = path; in register_bcache()
2601 args->sb = sb; in register_bcache()
2602 args->sb_disk = sb_disk; in register_bcache()
2603 args->bdev_file = bdev_file; in register_bcache()
2604 args->holder = holder; in register_bcache()
2628 return size; in register_bcache()
2659 size_t size) in bch_pending_bdevs_cleanup() argument
2662 ssize_t ret = size; in bch_pending_bdevs_cleanup()
2672 pdev->dc = dc; in bch_pending_bdevs_cleanup()
2673 list_add(&pdev->list, &pending_devs); in bch_pending_bdevs_cleanup()
2677 char *pdev_set_uuid = pdev->dc->sb.set_uuid; in bch_pending_bdevs_cleanup()
2679 char *set_uuid = c->set_uuid; in bch_pending_bdevs_cleanup()
2682 list_del(&pdev->list); in bch_pending_bdevs_cleanup()
2692 list_del(&pdev->list); in bch_pending_bdevs_cleanup()
2693 bcache_device_stop(&pdev->dc->disk); in bch_pending_bdevs_cleanup()
2739 * avoid potential deadlock during reboot, because cache in bcache_reboot()
2746 * bcache_reboot() won't be re-entered on by other thread, in bcache_reboot()
2754 bcache_device_stop(&dc->disk); in bcache_reboot()
2765 long timeout = start + 10 * HZ - jiffies; in bcache_reboot()
2906 return -ENOMEM; in bcache_init()
2921 MODULE_DESCRIPTION("Bcache: a Linux block layer cache");