Lines Matching +full:suspend +full:- +full:to +full:- +full:disk

1 // SPDX-License-Identifier: GPL-2.0-only
4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
9 #include "dm-core.h"
10 #include "dm-rq.h"
11 #include "dm-uevent.h"
12 #include "dm-ima.h"
14 #include <linux/bio-integrity.h>
33 #include <linux/blk-crypto.h>
34 #include <linux/blk-crypto-profile.h>
47 * dm_io into one list, and reuse bio->bi_private as the list head. Before
48 * ending this fs bio, we will recover its ->bi_private.
81 * One of these is allocated (on-stack) per original bio.
101 return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size; in dm_per_bio_data()
102 return (char *)bio - DM_IO_BIO_OFFSET - data_size; in dm_per_bio_data()
110 if (io->magic == DM_IO_MAGIC) in dm_bio_from_per_bio_data()
112 BUG_ON(io->magic != DM_TIO_MAGIC); in dm_bio_from_per_bio_data()
119 return container_of(bio, struct dm_target_io, clone)->target_bio_nr; in dm_bio_get_target_bio_nr()
123 #define MINOR_ALLOCED ((void *)-1)
146 * Bio-based DM's mempools' reserved IOs set by the user.
200 DM_NUMA_NODE, num_online_nodes() - 1); in dm_get_numa_node()
213 r = -ENOMEM; in local_init()
287 while (i--) in dm_init()
297 while (i--) in dm_exit()
311 return test_bit(DMF_DELETING, &md->flags); in dm_deleting_md()
314 static int dm_blk_open(struct gendisk *disk, blk_mode_t mode) in dm_blk_open() argument
320 md = disk->private_data; in dm_blk_open()
324 if (test_bit(DMF_FREEING, &md->flags) || in dm_blk_open()
331 atomic_inc(&md->open_count); in dm_blk_open()
335 return md ? 0 : -ENXIO; in dm_blk_open()
338 static void dm_blk_close(struct gendisk *disk) in dm_blk_close() argument
344 md = disk->private_data; in dm_blk_close()
348 if (atomic_dec_and_test(&md->open_count) && in dm_blk_close()
349 (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) in dm_blk_close()
359 return atomic_read(&md->open_count); in dm_open_count()
372 r = -EBUSY; in dm_lock_for_deletion()
374 set_bit(DMF_DEFERRED_REMOVE, &md->flags); in dm_lock_for_deletion()
375 } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags)) in dm_lock_for_deletion()
376 r = -EEXIST; in dm_lock_for_deletion()
378 set_bit(DMF_DELETING, &md->flags); in dm_lock_for_deletion()
391 if (test_bit(DMF_DELETING, &md->flags)) in dm_cancel_deferred_remove()
392 r = -EBUSY; in dm_cancel_deferred_remove()
394 clear_bit(DMF_DEFERRED_REMOVE, &md->flags); in dm_cancel_deferred_remove()
408 struct mapped_device *md = bdev->bd_disk->private_data; in dm_blk_getgeo()
422 r = -ENOTTY; in dm_prepare_ioctl()
428 if (map->num_targets != 1) in dm_prepare_ioctl()
432 if (!ti->type->prepare_ioctl) in dm_prepare_ioctl()
436 return -EAGAIN; in dm_prepare_ioctl()
438 r = ti->type->prepare_ioctl(ti, bdev, cmd, arg, forward); in dm_prepare_ioctl()
439 if (r == -ENOTCONN && *forward && !fatal_signal_pending(current)) { in dm_prepare_ioctl()
456 struct mapped_device *md = bdev->bd_disk->private_data; in dm_blk_ioctl()
471 "%s: sending ioctl %x to DM device without required privilege.", in dm_blk_ioctl()
472 current->comm, cmd); in dm_blk_ioctl()
473 r = -ENOIOCTLCMD; in dm_blk_ioctl()
478 if (!bdev->bd_disk->fops->ioctl) in dm_blk_ioctl()
479 r = -ENOTTY; in dm_blk_ioctl()
481 r = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); in dm_blk_ioctl()
489 return jiffies_to_nsecs(clone_to_tio(bio)->io->start_time); in dm_start_time_ns_from_clone()
495 return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size); in bio_is_flush_with_data()
507 return io->sectors; in dm_io_sectors()
513 struct bio *bio = io->orig_bio; in dm_io_acct()
517 bdev_start_io_acct(bio->bi_bdev, bio_op(bio), in dm_io_acct()
518 io->start_time); in dm_io_acct()
520 bdev_end_io_acct(bio->bi_bdev, bio_op(bio), in dm_io_acct()
522 io->start_time); in dm_io_acct()
526 unlikely(dm_stats_used(&io->md->stats))) { in dm_io_acct()
530 sector = bio_end_sector(bio) - io->sector_offset; in dm_io_acct()
532 sector = bio->bi_iter.bi_sector; in dm_io_acct()
534 dm_stats_account_io(&io->md->stats, bio_data_dir(bio), in dm_io_acct()
536 end, io->start_time, &io->stats_aux); in dm_io_acct()
559 spin_lock_irqsave(&io->lock, flags); in dm_start_io_acct()
561 spin_unlock_irqrestore(&io->lock, flags); in dm_start_io_acct()
565 spin_unlock_irqrestore(&io->lock, flags); in dm_start_io_acct()
582 clone = bio_alloc_clone(NULL, bio, gfp_mask, &md->mempools->io_bs); in alloc_io()
586 tio->flags = 0; in alloc_io()
588 tio->io = NULL; in alloc_io()
591 io->magic = DM_IO_MAGIC; in alloc_io()
592 io->status = BLK_STS_OK; in alloc_io()
595 atomic_set(&io->io_count, 2); in alloc_io()
596 this_cpu_inc(*md->pending_io); in alloc_io()
597 io->orig_bio = bio; in alloc_io()
598 io->md = md; in alloc_io()
599 spin_lock_init(&io->lock); in alloc_io()
600 io->start_time = jiffies; in alloc_io()
601 io->flags = 0; in alloc_io()
602 if (blk_queue_io_stat(md->queue)) in alloc_io()
606 unlikely(dm_stats_used(&md->stats))) in alloc_io()
607 dm_stats_record_start(&md->stats, &io->stats_aux); in alloc_io()
614 bio_put(&io->tio.clone); in free_io()
620 struct mapped_device *md = ci->io->md; in alloc_tio()
624 if (!ci->io->tio.io) { in alloc_tio()
625 /* the dm_target_io embedded in ci->io is available */ in alloc_tio()
626 tio = &ci->io->tio; in alloc_tio()
628 clone = &tio->clone; in alloc_tio()
630 clone = bio_alloc_clone(NULL, ci->bio, gfp_mask, in alloc_tio()
631 &md->mempools->bs); in alloc_tio()
636 clone->bi_opf &= ~REQ_DM_POLL_LIST; in alloc_tio()
639 tio->flags = 0; /* also clears DM_TIO_INSIDE_DM_IO */ in alloc_tio()
642 tio->magic = DM_TIO_MAGIC; in alloc_tio()
643 tio->io = ci->io; in alloc_tio()
644 tio->ti = ti; in alloc_tio()
645 tio->target_bio_nr = target_bio_nr; in alloc_tio()
646 tio->len_ptr = len; in alloc_tio()
647 tio->old_sector = 0; in alloc_tio()
650 clone->bi_bdev = md->disk->part0; in alloc_tio()
651 if (likely(ti != NULL) && unlikely(ti->needs_bio_set_dev)) in alloc_tio()
652 bio_set_dev(clone, md->disk->part0); in alloc_tio()
655 clone->bi_iter.bi_size = to_bytes(*len); in alloc_tio()
671 * Add the bio to the list of deferred io.
677 spin_lock_irqsave(&md->deferred_lock, flags); in queue_io()
678 bio_list_add(&md->deferred, bio); in queue_io()
679 spin_unlock_irqrestore(&md->deferred_lock, flags); in queue_io()
680 queue_work(md->wq, &md->work); in queue_io()
685 * function to access the md->map field, and make sure they call
689 int *srcu_idx) __acquires(md->io_barrier) in dm_get_live_table()
691 *srcu_idx = srcu_read_lock(&md->io_barrier); in dm_get_live_table()
693 return srcu_dereference(md->map, &md->io_barrier); in dm_get_live_table()
697 int srcu_idx) __releases(md->io_barrier) in dm_put_live_table()
699 srcu_read_unlock(&md->io_barrier, srcu_idx); in dm_put_live_table()
704 synchronize_srcu(&md->io_barrier); in dm_sync_table()
709 * A fast alternative to dm_get_live_table/dm_put_live_table.
715 return rcu_dereference(md->map); in dm_get_live_table_fast()
723 static char *_dm_claim_ptr = "I belong to device-mapper";
737 td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); in open_table_device()
739 return ERR_PTR(-ENOMEM); in open_table_device()
740 refcount_set(&td->count, 1); in open_table_device()
751 * We can be called before the dm disk is added. In that case we can't in open_table_device()
755 if (md->disk->slave_dir) { in open_table_device()
756 r = bd_link_disk_holder(bdev, md->disk); in open_table_device()
761 td->dm_dev.mode = mode; in open_table_device()
762 td->dm_dev.bdev = bdev; in open_table_device()
763 td->dm_dev.bdev_file = bdev_file; in open_table_device()
764 td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, in open_table_device()
766 format_dev_t(td->dm_dev.name, dev); in open_table_device()
767 list_add(&td->list, &md->table_devices); in open_table_device()
782 if (md->disk->slave_dir) in close_table_device()
783 bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); in close_table_device()
786 if (unlikely(test_bit(DMF_DEFERRED_REMOVE, &md->flags))) in close_table_device()
787 fput(td->dm_dev.bdev_file); in close_table_device()
789 __fput_sync(td->dm_dev.bdev_file); in close_table_device()
791 put_dax(td->dm_dev.dax_dev); in close_table_device()
792 list_del(&td->list); in close_table_device()
802 if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode) in find_table_device()
813 mutex_lock(&md->table_devices_lock); in dm_get_table_device()
814 td = find_table_device(&md->table_devices, dev, mode); in dm_get_table_device()
818 mutex_unlock(&md->table_devices_lock); in dm_get_table_device()
822 refcount_inc(&td->count); in dm_get_table_device()
824 mutex_unlock(&md->table_devices_lock); in dm_get_table_device()
826 *result = &td->dm_dev; in dm_get_table_device()
834 mutex_lock(&md->table_devices_lock); in dm_put_table_device()
835 if (refcount_dec_and_test(&td->count)) in dm_put_table_device()
837 mutex_unlock(&md->table_devices_lock); in dm_put_table_device()
845 *geo = md->geometry; in dm_get_geometry()
855 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; in dm_set_geometry()
857 if (geo->start > sz) { in dm_set_geometry()
859 return -EINVAL; in dm_set_geometry()
862 md->geometry = *geo; in dm_set_geometry()
869 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); in __noflush_suspending()
874 struct mapped_device *md = io->md; in dm_requeue_add_io()
877 struct dm_io *next = md->requeue_list; in dm_requeue_add_io()
879 md->requeue_list = io; in dm_requeue_add_io()
880 io->next = next; in dm_requeue_add_io()
882 bio_list_add_head(&md->deferred, io->orig_bio); in dm_requeue_add_io()
889 queue_work(md->wq, &md->requeue_work); in dm_kick_requeue()
891 queue_work(md->wq, &md->work); in dm_kick_requeue()
896 * io->status is updated with error if requeue disallowed.
900 struct bio *bio = io->orig_bio; in dm_handle_requeue()
901 bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE); in dm_handle_requeue()
902 bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) && in dm_handle_requeue()
903 (bio->bi_opf & REQ_POLLED)); in dm_handle_requeue()
904 struct mapped_device *md = io->md; in dm_handle_requeue()
910 if (bio->bi_opf & REQ_POLLED) { in dm_handle_requeue()
913 * (io->orig_bio may only reflect a subset of the in dm_handle_requeue()
914 * pre-split original) so clear REQ_POLLED. in dm_handle_requeue()
923 spin_lock_irqsave(&md->deferred_lock, flags); in dm_handle_requeue()
931 * noflush suspend was interrupted or this is in dm_handle_requeue()
932 * a write to a zoned target. in dm_handle_requeue()
934 io->status = BLK_STS_IOERR; in dm_handle_requeue()
936 spin_unlock_irqrestore(&md->deferred_lock, flags); in dm_handle_requeue()
947 struct bio *bio = io->orig_bio; in __dm_io_complete()
948 struct mapped_device *md = io->md; in __dm_io_complete()
956 io_error = io->status; in __dm_io_complete()
961 * Must handle target that DM_MAPIO_SUBMITTED only to in __dm_io_complete()
969 this_cpu_dec(*md->pending_io); in __dm_io_complete()
971 /* nudge anyone waiting on suspend queue */ in __dm_io_complete()
972 if (unlikely(wq_has_sleeper(&md->wait))) in __dm_io_complete()
973 wake_up(&md->wait); in __dm_io_complete()
984 bio->bi_opf &= ~REQ_PREFLUSH; in __dm_io_complete()
989 bio->bi_status = io_error; in __dm_io_complete()
1001 /* reuse deferred lock to simplify dm_handle_requeue */ in dm_wq_requeue_work()
1002 spin_lock_irqsave(&md->deferred_lock, flags); in dm_wq_requeue_work()
1003 io = md->requeue_list; in dm_wq_requeue_work()
1004 md->requeue_list = NULL; in dm_wq_requeue_work()
1005 spin_unlock_irqrestore(&md->deferred_lock, flags); in dm_wq_requeue_work()
1008 struct dm_io *next = io->next; in dm_wq_requeue_work()
1010 dm_io_rewind(io, &md->disk->bio_split); in dm_wq_requeue_work()
1012 io->next = NULL; in dm_wq_requeue_work()
1022 * 1) io->orig_bio points to the real original bio, and the part mapped to
1025 * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io.
1031 * we may run into long bio clone chain during suspend and OOM could in dm_io_complete()
1046 if (atomic_dec_and_test(&io->io_count)) in __dm_io_dec_pending()
1054 /* Push-back supersedes any I/O errors */ in dm_io_set_error()
1055 spin_lock_irqsave(&io->lock, flags); in dm_io_set_error()
1056 if (!(io->status == BLK_STS_DM_REQUEUE && in dm_io_set_error()
1057 __noflush_suspending(io->md))) { in dm_io_set_error()
1058 io->status = error; in dm_io_set_error()
1060 spin_unlock_irqrestore(&io->lock, flags); in dm_io_set_error()
1073 * count on 'md'. But _not_ imposing verification to avoid atomic_read(),
1077 return &md->queue->limits; in dm_get_queue_limits()
1082 return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios); in swap_bios_limit()
1087 blk_status_t error = bio->bi_status; in clone_endio()
1089 struct dm_target *ti = tio->ti; in clone_endio()
1090 dm_endio_fn endio = likely(ti != NULL) ? ti->type->end_io : NULL; in clone_endio()
1091 struct dm_io *io = tio->io; in clone_endio()
1092 struct mapped_device *md = io->md; in clone_endio()
1096 !bdev_max_discard_sectors(bio->bi_bdev)) in clone_endio()
1097 blk_queue_disable_discard(md->queue); in clone_endio()
1099 !bdev_write_zeroes_sectors(bio->bi_bdev)) in clone_endio()
1100 blk_queue_disable_write_zeroes(md->queue); in clone_endio()
1104 unlikely(bdev_is_zoned(bio->bi_bdev))) in clone_endio()
1114 * Requeuing writes to a sequential zone of a zoned in clone_endio()
1138 up(&md->swap_bios_semaphore); in clone_endio()
1145 * Return maximum size of I/O possible at the supplied sector up to the current
1151 return ti->len - target_offset; in max_io_len_target_boundary()
1162 * Does the target need to split IO even further? in __max_io_len()
1163 * - varied (per target) IO splitting is a tenet of DM; this in __max_io_len()
1170 min(max_sectors ? : queue_max_sectors(ti->table->md->queue), in __max_io_len()
1176 return __max_io_len(ti, sector, ti->max_io_len, 0); in max_io_len()
1184 ti->error = "Maximum size of target IO is too large"; in dm_set_target_max_io_len()
1185 return -EINVAL; in dm_set_target_max_io_len()
1188 ti->max_io_len = (uint32_t) len; in dm_set_target_max_io_len()
1196 __acquires(md->io_barrier) in dm_dax_get_live_target()
1219 long len, ret = -EIO; in dm_dax_direct_access()
1226 if (!ti->type->direct_access) in dm_dax_direct_access()
1232 ret = ti->type->direct_access(ti, pgoff, nr_pages, mode, kaddr, pfn); in dm_dax_direct_access()
1246 int ret = -EIO; in dm_dax_zero_page_range()
1253 if (WARN_ON(!ti->type->dax_zero_page_range)) { in dm_dax_zero_page_range()
1255 * ->zero_page_range() is mandatory dax operation. If we are in dm_dax_zero_page_range()
1260 ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages); in dm_dax_zero_page_range()
1277 if (!ti || !ti->type->dax_recovery_write) in dm_dax_recovery_write()
1280 ret = ti->type->dax_recovery_write(ti, pgoff, addr, bytes, i); in dm_dax_recovery_write()
1293 * dm_accept_partial_bio informs the dm that the target only wants to process
1298 * +--------------------+---------------+-------+
1300 * +--------------------+---------------+-------+
1302 * <-------------- *tio->len_ptr --------------->
1303 * <----- bio_sectors ----->
1304 * <-- n_sectors -->
1308 * Region 2 is the remaining bio size that the target wants to process.
1309 * (it may be empty if region 1 is non-empty, although there is no reason
1310 * to make it empty)
1311 * The target requires that region 3 is to be sent in the next bio.
1313 * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
1320 struct dm_io *io = tio->io; in dm_accept_partial_bio()
1324 BUG_ON(bio_sectors > *tio->len_ptr); in dm_accept_partial_bio()
1328 unlikely(bdev_is_zoned(bio->bi_bdev))) { in dm_accept_partial_bio()
1337 *tio->len_ptr -= bio_sectors - n_sectors; in dm_accept_partial_bio()
1338 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; in dm_accept_partial_bio()
1345 io->sectors = n_sectors; in dm_accept_partial_bio()
1346 io->sector_offset = bio_sectors(io->orig_bio); in dm_accept_partial_bio()
1351 * @clone: clone bio that DM core passed to target's .map function
1354 * Targets should use this interface to submit bios they take
1357 * Target should also enable ti->accounts_remapped_io
1362 struct dm_io *io = tio->io; in dm_submit_bio_remap()
1369 * Account io->origin_bio to DM dev on behalf of target in dm_submit_bio_remap()
1374 trace_block_bio_remap(tgt_clone, disk_devt(io->md->disk), in dm_submit_bio_remap()
1375 tio->old_sector); in dm_submit_bio_remap()
1382 mutex_lock(&md->swap_bios_lock); in __set_swap_bios_limit()
1383 while (latch < md->swap_bios) { in __set_swap_bios_limit()
1385 down(&md->swap_bios_semaphore); in __set_swap_bios_limit()
1386 md->swap_bios--; in __set_swap_bios_limit()
1388 while (latch > md->swap_bios) { in __set_swap_bios_limit()
1390 up(&md->swap_bios_semaphore); in __set_swap_bios_limit()
1391 md->swap_bios++; in __set_swap_bios_limit()
1393 mutex_unlock(&md->swap_bios_lock); in __set_swap_bios_limit()
1399 struct dm_target *ti = tio->ti; in __map_bio()
1400 struct dm_io *io = tio->io; in __map_bio()
1401 struct mapped_device *md = io->md; in __map_bio()
1404 clone->bi_end_io = clone_endio; in __map_bio()
1409 tio->old_sector = clone->bi_iter.bi_sector; in __map_bio()
1415 if (unlikely(latch != md->swap_bios)) in __map_bio()
1417 down(&md->swap_bios_semaphore); in __map_bio()
1420 if (likely(ti->type->map == linear_map)) in __map_bio()
1422 else if (ti->type->map == stripe_map) in __map_bio()
1425 r = ti->type->map(ti, clone); in __map_bio()
1430 if (!ti->accounts_remapped_io) in __map_bio()
1440 up(&md->swap_bios_semaphore); in __map_bio()
1455 struct dm_io *io = ci->io; in setup_split_accounting()
1457 if (ci->sector_count > len) { in setup_split_accounting()
1463 io->sectors = len; in setup_split_accounting()
1464 io->sector_offset = bio_sectors(ci->bio); in setup_split_accounting()
1479 mutex_lock(&ci->io->md->table_devices_lock); in alloc_multiple_bios()
1489 mutex_unlock(&ci->io->md->table_devices_lock); in alloc_multiple_bios()
1508 /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ in __send_duplicate_bios()
1513 * Using alloc_multiple_bios(), even if num_bios is 1, to consistently in __send_duplicate_bios()
1529 struct dm_table *t = ci->map; in __send_empty_flush()
1533 if ((ci->io->orig_bio->bi_opf & (REQ_IDLE | REQ_SYNC)) == in __send_empty_flush()
1538 * Use an on-stack bio for this, it's safe since we don't in __send_empty_flush()
1539 * need to reference it after submit. It's just used as in __send_empty_flush()
1542 bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0, opf); in __send_empty_flush()
1544 ci->bio = &flush_bio; in __send_empty_flush()
1545 ci->sector_count = 0; in __send_empty_flush()
1546 ci->io->tio.clone.bi_iter.bi_size = 0; in __send_empty_flush()
1548 if (!t->flush_bypasses_map) { in __send_empty_flush()
1549 for (unsigned int i = 0; i < t->num_targets; i++) { in __send_empty_flush()
1553 if (unlikely(ti->num_flush_bios == 0)) in __send_empty_flush()
1556 atomic_add(ti->num_flush_bios, &ci->io->io_count); in __send_empty_flush()
1557 bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, in __send_empty_flush()
1559 atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count); in __send_empty_flush()
1563 * Note that there's no need to grab t->devices_lock here in __send_empty_flush()
1575 * used by multiple targets), so we set tio->ti = NULL. in __send_empty_flush()
1576 * We must check for NULL in the I/O processing path, to in __send_empty_flush()
1580 atomic_add(1, &ci->io->io_count); in __send_empty_flush()
1581 bio_set_dev(clone, dd->dm_dev->bdev); in __send_empty_flush()
1582 clone->bi_end_io = clone_endio; in __send_empty_flush()
1591 atomic_sub(1, &ci->io->io_count); in __send_empty_flush()
1593 bio_uninit(ci->bio); in __send_empty_flush()
1602 len = min_t(sector_t, ci->sector_count, in __send_abnormal_io()
1603 __max_io_len(ti, ci->sector, max_granularity, max_sectors)); in __send_abnormal_io()
1605 atomic_add(num_bios, &ci->io->io_count); in __send_abnormal_io()
1611 atomic_sub(num_bios - bios + 1, &ci->io->io_count); in __send_abnormal_io()
1613 ci->sector += len; in __send_abnormal_io()
1614 ci->sector_count -= len; in __send_abnormal_io()
1640 struct queue_limits *limits = dm_get_queue_limits(ti->table->md); in __process_abnormal_io()
1642 switch (bio_op(ci->bio)) { in __process_abnormal_io()
1644 num_bios = ti->num_discard_bios; in __process_abnormal_io()
1645 max_sectors = limits->max_discard_sectors; in __process_abnormal_io()
1646 if (ti->max_discard_granularity) in __process_abnormal_io()
1650 num_bios = ti->num_secure_erase_bios; in __process_abnormal_io()
1651 max_sectors = limits->max_secure_erase_sectors; in __process_abnormal_io()
1654 num_bios = ti->num_write_zeroes_bios; in __process_abnormal_io()
1655 max_sectors = limits->max_write_zeroes_sectors; in __process_abnormal_io()
1676 * Reuse ->bi_private as dm_io list head for storing all dm_io instances
1677 * associated with this bio, and this bio's bi_private needs to be
1678 * stored in dm_io->data before the reuse.
1680 * bio->bi_private is owned by fs or upper layer, so block layer won't
1686 return (struct dm_io **)&bio->bi_private; in dm_poll_list_head()
1693 if (!(bio->bi_opf & REQ_DM_POLL_LIST)) { in dm_queue_poll_io()
1694 bio->bi_opf |= REQ_DM_POLL_LIST; in dm_queue_poll_io()
1699 io->data = bio->bi_private; in dm_queue_poll_io()
1701 /* tell block layer to poll for completion */ in dm_queue_poll_io()
1702 bio->bi_cookie = ~BLK_QC_T_NONE; in dm_queue_poll_io()
1704 io->next = NULL; in dm_queue_poll_io()
1707 * bio recursed due to split, reuse original poll list, in dm_queue_poll_io()
1708 * and save bio->bi_private too. in dm_queue_poll_io()
1710 io->data = (*head)->data; in dm_queue_poll_io()
1711 io->next = *head; in dm_queue_poll_io()
1718 * Select the correct strategy for processing a non-flush bio.
1726 ti = dm_table_find_target(ci->map, ci->sector); in __split_and_process_bio()
1730 if (unlikely(ci->is_abnormal_io)) in __split_and_process_bio()
1737 ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED); in __split_and_process_bio()
1739 len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); in __split_and_process_bio()
1740 if (ci->bio->bi_opf & REQ_ATOMIC && len != ci->sector_count) in __split_and_process_bio()
1745 if (unlikely(ci->bio->bi_opf & REQ_NOWAIT)) { in __split_and_process_bio()
1746 if (unlikely(!dm_target_supports_nowait(ti->type))) in __split_and_process_bio()
1757 ci->sector += len; in __split_and_process_bio()
1758 ci->sector_count -= len; in __split_and_process_bio()
1766 ci->map = map; in init_clone_info()
1767 ci->io = io; in init_clone_info()
1768 ci->bio = bio; in init_clone_info()
1769 ci->is_abnormal_io = is_abnormal; in init_clone_info()
1770 ci->submit_as_polled = false; in init_clone_info()
1771 ci->sector = bio->bi_iter.bi_sector; in init_clone_info()
1772 ci->sector_count = bio_sectors(bio); in init_clone_info()
1774 /* Shouldn't happen but sector_count was being set to 0 so... */ in init_clone_info()
1776 WARN_ON_ONCE(op_is_zone_mgmt(bio_op(bio)) && ci->sector_count)) in init_clone_info()
1777 ci->sector_count = 0; in init_clone_info()
1798 * split any large BIO to the mapped device limits to not submit BIOs in dm_zone_bio_needs_split()
1799 * that span zone boundaries and to avoid potential deadlocks with in dm_zone_bio_needs_split()
1816 struct mapped_device *md = ci->io->md; in __send_zone_reset_all_emulated()
1817 unsigned int zone_sectors = md->disk->queue->limits.chunk_sectors; in __send_zone_reset_all_emulated()
1822 sector_t sector = ti->begin; in __send_zone_reset_all_emulated()
1826 nr_zones = ti->len >> ilog2(zone_sectors); in __send_zone_reset_all_emulated()
1831 ret = dm_zone_get_reset_bitmap(md, ci->map, ti->begin, in __send_zone_reset_all_emulated()
1838 /* If we have no zone to reset, we are done. */ in __send_zone_reset_all_emulated()
1843 atomic_add(nr_zones, &ci->io->io_count); in __send_zone_reset_all_emulated()
1853 /* This may take a while, so be nice to others */ in __send_zone_reset_all_emulated()
1858 * We may need to reset thousands of zones, so let's in __send_zone_reset_all_emulated()
1865 /* Get a clone and change it to a regular reset operation. */ in __send_zone_reset_all_emulated()
1867 clone->bi_opf &= ~REQ_OP_MASK; in __send_zone_reset_all_emulated()
1868 clone->bi_opf |= REQ_OP_ZONE_RESET | REQ_SYNC; in __send_zone_reset_all_emulated()
1869 clone->bi_iter.bi_sector = sector; in __send_zone_reset_all_emulated()
1870 clone->bi_iter.bi_size = 0; in __send_zone_reset_all_emulated()
1875 nr_reset--; in __send_zone_reset_all_emulated()
1879 atomic_sub(nr_zones - num_bios, &ci->io->io_count); in __send_zone_reset_all_emulated()
1880 ci->sector_count = 0; in __send_zone_reset_all_emulated()
1893 atomic_add(1, &ci->io->io_count); in __send_zone_reset_all_native()
1895 atomic_sub(1 - bios, &ci->io->io_count); in __send_zone_reset_all_native()
1897 ci->sector_count = 0; in __send_zone_reset_all_native()
1902 struct dm_table *t = ci->map; in __send_zone_reset_all()
1905 for (unsigned int i = 0; i < t->num_targets; i++) { in __send_zone_reset_all()
1908 if (ti->zone_reset_all_supported) { in __send_zone_reset_all()
1919 atomic_sub(1, &ci->io->io_count); in __send_zone_reset_all()
1940 * Entry point to split a bio into clones and submit them to the targets.
1962 * emulation to ensure that the BIO does not cross zone in dm_split_and_process_bio()
1972 * need zone append emulation (e.g. dm-crypt). in dm_split_and_process_bio()
1978 if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) { in dm_split_and_process_bio()
1981 * multiple bios and there's no easy way how to undo the in dm_split_and_process_bio()
1984 if (bio->bi_opf & REQ_PREFLUSH) { in dm_split_and_process_bio()
1990 /* Unable to do anything without dm_io. */ in dm_split_and_process_bio()
1999 if (bio->bi_opf & REQ_PREFLUSH) { in dm_split_and_process_bio()
2015 * Remainder must be passed to submit_bio_noacct() so it gets handled in dm_split_and_process_bio()
2018 bio_trim(bio, io->sectors, ci.sector_count); in dm_split_and_process_bio()
2019 trace_block_split(bio, bio->bi_iter.bi_sector); in dm_split_and_process_bio()
2024 * Drop the extra reference count for non-POLLED bio, and hold one in dm_split_and_process_bio()
2028 * in bio->bi_private, so that dm_poll_bio can poll them all. in dm_split_and_process_bio()
2036 atomic_dec(&io->io_count); in dm_split_and_process_bio()
2044 struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; in dm_submit_bio()
2057 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { in dm_submit_bio()
2058 if (bio->bi_opf & REQ_NOWAIT) in dm_submit_bio()
2060 else if (bio->bi_opf & REQ_RAHEAD) in dm_submit_bio()
2075 WARN_ON_ONCE(!dm_tio_is_normal(&io->tio)); in dm_poll_dm_io()
2078 if (atomic_read(&io->io_count) > 1) in dm_poll_dm_io()
2079 bio_poll(&io->tio.clone, iob, flags); in dm_poll_dm_io()
2082 return atomic_read(&io->io_count) == 1; in dm_poll_dm_io()
2094 if (!(bio->bi_opf & REQ_DM_POLL_LIST)) in dm_poll_bio()
2103 * submitted via submit_bio_noacct()'s depth-first submission. in dm_poll_bio()
2107 bio->bi_opf &= ~REQ_DM_POLL_LIST; in dm_poll_bio()
2108 bio->bi_private = list->data; in dm_poll_bio()
2110 for (curr = list, next = curr->next; curr; curr = next, next = in dm_poll_bio()
2111 curr ? curr->next : NULL) { in dm_poll_bio()
2119 curr->next = tmp; in dm_poll_bio()
2126 bio->bi_opf |= REQ_DM_POLL_LIST; in dm_poll_bio()
2127 /* Reset bio->bi_private to dm_io list head */ in dm_poll_bio()
2135 *---------------------------------------------------------------
2136 * An IDR is used to keep track of allocated minor numbers.
2137 *---------------------------------------------------------------
2154 return -EINVAL; in specific_minor()
2164 return r == -ENOSPC ? -EBUSY : r; in specific_minor()
2194 dm_destroy_crypto_profile(q->crypto_profile); in dm_queue_destroy_crypto_profile()
2206 if (md->wq) in cleanup_mapped_device()
2207 destroy_workqueue(md->wq); in cleanup_mapped_device()
2208 dm_free_md_mempools(md->mempools); in cleanup_mapped_device()
2210 if (md->dax_dev) { in cleanup_mapped_device()
2211 dax_remove_host(md->disk); in cleanup_mapped_device()
2212 kill_dax(md->dax_dev); in cleanup_mapped_device()
2213 put_dax(md->dax_dev); in cleanup_mapped_device()
2214 md->dax_dev = NULL; in cleanup_mapped_device()
2217 if (md->disk) { in cleanup_mapped_device()
2219 md->disk->private_data = NULL; in cleanup_mapped_device()
2225 list_for_each_entry(td, &md->table_devices, list) { in cleanup_mapped_device()
2226 bd_unlink_disk_holder(td->dm_dev.bdev, in cleanup_mapped_device()
2227 md->disk); in cleanup_mapped_device()
2231 * Hold lock to make sure del_gendisk() won't concurrent in cleanup_mapped_device()
2234 mutex_lock(&md->table_devices_lock); in cleanup_mapped_device()
2235 del_gendisk(md->disk); in cleanup_mapped_device()
2236 mutex_unlock(&md->table_devices_lock); in cleanup_mapped_device()
2238 dm_queue_destroy_crypto_profile(md->queue); in cleanup_mapped_device()
2239 put_disk(md->disk); in cleanup_mapped_device()
2242 if (md->pending_io) { in cleanup_mapped_device()
2243 free_percpu(md->pending_io); in cleanup_mapped_device()
2244 md->pending_io = NULL; in cleanup_mapped_device()
2247 cleanup_srcu_struct(&md->io_barrier); in cleanup_mapped_device()
2249 mutex_destroy(&md->suspend_lock); in cleanup_mapped_device()
2250 mutex_destroy(&md->type_lock); in cleanup_mapped_device()
2251 mutex_destroy(&md->table_devices_lock); in cleanup_mapped_device()
2252 mutex_destroy(&md->swap_bios_lock); in cleanup_mapped_device()
2269 DMERR("unable to allocate device, out of memory."); in alloc_dev()
2284 r = init_srcu_struct(&md->io_barrier); in alloc_dev()
2288 md->numa_node_id = numa_node_id; in alloc_dev()
2289 md->init_tio_pdu = false; in alloc_dev()
2290 md->type = DM_TYPE_NONE; in alloc_dev()
2291 mutex_init(&md->suspend_lock); in alloc_dev()
2292 mutex_init(&md->type_lock); in alloc_dev()
2293 mutex_init(&md->table_devices_lock); in alloc_dev()
2294 spin_lock_init(&md->deferred_lock); in alloc_dev()
2295 atomic_set(&md->holders, 1); in alloc_dev()
2296 atomic_set(&md->open_count, 0); in alloc_dev()
2297 atomic_set(&md->event_nr, 0); in alloc_dev()
2298 atomic_set(&md->uevent_seq, 0); in alloc_dev()
2299 INIT_LIST_HEAD(&md->uevent_list); in alloc_dev()
2300 INIT_LIST_HEAD(&md->table_devices); in alloc_dev()
2301 spin_lock_init(&md->uevent_lock); in alloc_dev()
2304 * default to bio-based until DM table is loaded and md->type in alloc_dev()
2305 * established. If request-based table is loaded: blk-mq will in alloc_dev()
2308 md->disk = blk_alloc_disk(NULL, md->numa_node_id); in alloc_dev()
2309 if (IS_ERR(md->disk)) { in alloc_dev()
2310 md->disk = NULL; in alloc_dev()
2313 md->queue = md->disk->queue; in alloc_dev()
2315 init_waitqueue_head(&md->wait); in alloc_dev()
2316 INIT_WORK(&md->work, dm_wq_work); in alloc_dev()
2317 INIT_WORK(&md->requeue_work, dm_wq_requeue_work); in alloc_dev()
2318 init_waitqueue_head(&md->eventq); in alloc_dev()
2319 init_completion(&md->kobj_holder.completion); in alloc_dev()
2321 md->requeue_list = NULL; in alloc_dev()
2322 md->swap_bios = get_swap_bios(); in alloc_dev()
2323 sema_init(&md->swap_bios_semaphore, md->swap_bios); in alloc_dev()
2324 mutex_init(&md->swap_bios_lock); in alloc_dev()
2326 md->disk->major = _major; in alloc_dev()
2327 md->disk->first_minor = minor; in alloc_dev()
2328 md->disk->minors = 1; in alloc_dev()
2329 md->disk->flags |= GENHD_FL_NO_PART; in alloc_dev()
2330 md->disk->fops = &dm_blk_dops; in alloc_dev()
2331 md->disk->private_data = md; in alloc_dev()
2332 sprintf(md->disk->disk_name, "dm-%d", minor); in alloc_dev()
2336 if (PTR_ERR(dax_dev) != -EOPNOTSUPP) in alloc_dev()
2341 md->dax_dev = dax_dev; in alloc_dev()
2342 if (dax_add_host(dax_dev, md->disk)) in alloc_dev()
2346 format_dev_t(md->name, MKDEV(_major, minor)); in alloc_dev()
2348 md->wq = alloc_workqueue("kdmflush/%s", WQ_MEM_RECLAIM, 0, md->name); in alloc_dev()
2349 if (!md->wq) in alloc_dev()
2352 md->pending_io = alloc_percpu(unsigned long); in alloc_dev()
2353 if (!md->pending_io) in alloc_dev()
2356 r = dm_stats_init(&md->stats); in alloc_dev()
2384 int minor = MINOR(disk_devt(md->disk)); in free_dev()
2390 WARN_ON_ONCE(!list_empty(&md->table_devices)); in free_dev()
2391 dm_stats_cleanup(&md->stats); in free_dev()
2399 * Bind a table to the device.
2407 spin_lock_irqsave(&md->uevent_lock, flags); in event_callback()
2408 list_splice_init(&md->uevent_list, &uevents); in event_callback()
2409 spin_unlock_irqrestore(&md->uevent_lock, flags); in event_callback()
2411 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); in event_callback()
2413 atomic_inc(&md->event_nr); in event_callback()
2414 wake_up(&md->eventq); in event_callback()
2428 lockdep_assert_held(&md->suspend_lock); in __bind()
2435 old_map = ERR_PTR(-EINVAL); in __bind()
2439 set_capacity(md->disk, size); in __bind()
2441 ret = dm_table_set_restrictions(t, md->queue, limits); in __bind()
2443 set_capacity(md->disk, old_size); in __bind()
2452 memset(&md->geometry, 0, sizeof(md->geometry)); in __bind()
2458 * Leverage the fact that request-based DM targets are in __bind()
2459 * immutable singletons - used to optimize dm_mq_queue_rq. in __bind()
2461 md->immutable_target = dm_table_get_immutable_target(t); in __bind()
2464 * There is no need to reload with request-based dm because the in __bind()
2467 * Note for future: If you are to reload bioset, prep-ed in __bind()
2468 * requests in the queue may refer to bio from the old bioset, in __bind()
2469 * so you must walk through the queue to unprep. in __bind()
2471 if (!md->mempools) in __bind()
2472 md->mempools = t->mempools; in __bind()
2474 dm_free_md_mempools(t->mempools); in __bind()
2481 dm_free_md_mempools(md->mempools); in __bind()
2482 md->mempools = t->mempools; in __bind()
2484 t->mempools = NULL; in __bind()
2486 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); in __bind()
2487 rcu_assign_pointer(md->map, (void *)t); in __bind()
2488 md->immutable_target_type = dm_table_get_immutable_target_type(t); in __bind()
2497 * Returns unbound table for the caller to free.
2501 struct dm_table *map = rcu_dereference_protected(md->map, 1); in __unbind()
2507 RCU_INIT_POINTER(md->map, NULL); in __unbind()
2522 return -ENXIO; in dm_create()
2531 * Functions to manage md->type.
2532 * All are required to hold md->type_lock.
2536 mutex_lock(&md->type_lock); in dm_lock_md_type()
2541 mutex_unlock(&md->type_lock); in dm_unlock_md_type()
2546 return md->type; in dm_get_md_type()
2551 return md->immutable_target_type; in dm_get_immutable_target_type()
2567 md->disk->fops = &dm_rq_blk_dops; in dm_setup_md_queue()
2570 DMERR("Cannot initialize queue for request-based dm mapped device"); in dm_setup_md_queue()
2580 r = dm_table_set_restrictions(t, md->queue, &limits); in dm_setup_md_queue()
2585 * Hold lock to make sure add_disk() and del_gendisk() won't concurrent in dm_setup_md_queue()
2588 mutex_lock(&md->table_devices_lock); in dm_setup_md_queue()
2589 r = add_disk(md->disk); in dm_setup_md_queue()
2590 mutex_unlock(&md->table_devices_lock); in dm_setup_md_queue()
2595 * Register the holder relationship for devices added before the disk in dm_setup_md_queue()
2598 list_for_each_entry(td, &md->table_devices, list) { in dm_setup_md_queue()
2599 r = bd_link_disk_holder(td->dm_dev.bdev, md->disk); in dm_setup_md_queue()
2608 md->type = type; in dm_setup_md_queue()
2612 list_for_each_entry_continue_reverse(td, &md->table_devices, list) in dm_setup_md_queue()
2613 bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); in dm_setup_md_queue()
2614 mutex_lock(&md->table_devices_lock); in dm_setup_md_queue()
2615 del_gendisk(md->disk); in dm_setup_md_queue()
2616 mutex_unlock(&md->table_devices_lock); in dm_setup_md_queue()
2632 test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) { in dm_get_md()
2646 return md->interface_ptr; in dm_get_mdptr()
2651 md->interface_ptr = ptr; in dm_set_mdptr()
2656 atomic_inc(&md->holders); in dm_get()
2657 BUG_ON(test_bit(DMF_FREEING, &md->flags)); in dm_get()
2663 if (test_bit(DMF_FREEING, &md->flags)) { in dm_hold()
2665 return -EBUSY; in dm_hold()
2675 return md->name; in dm_device_name()
2688 set_bit(DMF_FREEING, &md->flags); in __dm_destroy()
2691 blk_mark_disk_dead(md->disk); in __dm_destroy()
2695 * do not race with internal suspend. in __dm_destroy()
2697 mutex_lock(&md->suspend_lock); in __dm_destroy()
2701 set_bit(DMF_SUSPENDED, &md->flags); in __dm_destroy()
2702 set_bit(DMF_POST_SUSPENDING, &md->flags); in __dm_destroy()
2707 mutex_unlock(&md->suspend_lock); in __dm_destroy()
2710 * Rare, but there may be I/O requests still going to complete, in __dm_destroy()
2711 * for example. Wait for all references to disappear. in __dm_destroy()
2716 while (atomic_read(&md->holders)) in __dm_destroy()
2718 else if (atomic_read(&md->holders)) in __dm_destroy()
2720 dm_device_name(md), atomic_read(&md->holders)); in __dm_destroy()
2738 atomic_dec(&md->holders); in dm_put()
2748 sum += *per_cpu_ptr(md->pending_io, cpu); in dm_in_flight_bios()
2759 prepare_to_wait(&md->wait, &wait, task_state); in dm_wait_for_bios_completion()
2765 r = -ERESTARTSYS; in dm_wait_for_bios_completion()
2771 finish_wait(&md->wait, &wait); in dm_wait_for_bios_completion()
2782 if (!queue_is_mq(md->queue)) in dm_wait_for_completion()
2786 if (!blk_mq_queue_inflight(md->queue)) in dm_wait_for_completion()
2790 r = -ERESTARTSYS; in dm_wait_for_completion()
2808 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { in dm_wq_work()
2809 spin_lock_irq(&md->deferred_lock); in dm_wq_work()
2810 bio = bio_list_pop(&md->deferred); in dm_wq_work()
2811 spin_unlock_irq(&md->deferred_lock); in dm_wq_work()
2823 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); in dm_queue_flush()
2825 queue_work(md->wq, &md->work); in dm_queue_flush()
2829 * Swap in a new table, returning the old one for the caller to destroy.
2833 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL); in dm_swap_table()
2837 mutex_lock(&md->suspend_lock); in dm_swap_table()
2852 limits = md->queue->limits; in dm_swap_table()
2868 mutex_unlock(&md->suspend_lock); in dm_swap_table()
2873 * Functions to lock and unlock any filesystem running on the
2880 WARN_ON(test_bit(DMF_FROZEN, &md->flags)); in lock_fs()
2882 r = bdev_freeze(md->disk->part0); in lock_fs()
2884 set_bit(DMF_FROZEN, &md->flags); in lock_fs()
2890 if (!test_bit(DMF_FROZEN, &md->flags)) in unlock_fs()
2892 bdev_thaw(md->disk->part0); in unlock_fs()
2893 clear_bit(DMF_FROZEN, &md->flags); in unlock_fs()
2902 * now. There is no request-processing activity. All new requests
2903 * are being added to md->deferred list.
2913 lockdep_assert_held(&md->suspend_lock); in __dm_suspend()
2920 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); in __dm_suspend()
2931 * Flush I/O to the device. in __dm_suspend()
2934 * (lock_fs() flushes I/Os and waits for them to complete.) in __dm_suspend()
2946 * to target drivers i.e. no one may be executing in __dm_suspend()
2949 * To get all processes out of dm_split_and_process_bio in dm_submit_bio, in __dm_suspend()
2950 * we take the write lock. To prevent any process from reentering in __dm_suspend()
2953 * flush_workqueue(md->wq). in __dm_suspend()
2955 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); in __dm_suspend()
2957 synchronize_srcu(&md->io_barrier); in __dm_suspend()
2960 * Stop md->queue before flushing md->wq in case request-based in __dm_suspend()
2961 * dm defers requests to md->wq from md->queue. in __dm_suspend()
2964 dm_stop_queue(md->queue); in __dm_suspend()
2966 flush_workqueue(md->wq); in __dm_suspend()
2970 * We call dm_wait_for_completion to wait for all existing requests in __dm_suspend()
2971 * to finish. in __dm_suspend()
2975 set_bit(dmf_suspended_flag, &md->flags); in __dm_suspend()
2978 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); in __dm_suspend()
2980 synchronize_srcu(&md->io_barrier); in __dm_suspend()
2987 dm_start_queue(md->queue); in __dm_suspend()
2998 * We need to be able to change a mapping table under a mounted
2999 * filesystem. For example we might want to move some data in
3001 * dm_bind_table, dm_suspend must be called to flush any in
3005 * Suspend mechanism in request-based dm.
3009 * 3. Wait for all in-flight I/Os to be completed or requeued.
3011 * To abort suspend, start the request_queue.
3019 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); in dm_suspend()
3022 r = -EINVAL; in dm_suspend()
3028 mutex_unlock(&md->suspend_lock); in dm_suspend()
3029 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); in dm_suspend()
3035 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); in dm_suspend()
3045 set_bit(DMF_POST_SUSPENDING, &md->flags); in dm_suspend()
3047 clear_bit(DMF_POST_SUSPENDING, &md->flags); in dm_suspend()
3050 mutex_unlock(&md->suspend_lock); in dm_suspend()
3068 * Request-based dm is queueing the deferred I/Os in its request_queue. in __dm_resume()
3071 dm_start_queue(md->queue); in __dm_resume()
3084 r = -EINVAL; in dm_resume()
3085 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); in dm_resume()
3092 mutex_unlock(&md->suspend_lock); in dm_resume()
3093 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); in dm_resume()
3099 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); in dm_resume()
3107 clear_bit(DMF_SUSPENDED, &md->flags); in dm_resume()
3109 mutex_unlock(&md->suspend_lock); in dm_resume()
3115 * Internal suspend/resume works like userspace-driven suspend. It waits
3116 * until all bios finish and prevents issuing new bios to the target drivers.
3124 lockdep_assert_held(&md->suspend_lock); in __dm_internal_suspend()
3126 if (md->internal_suspend_count++) in __dm_internal_suspend()
3127 return; /* nested internal suspend */ in __dm_internal_suspend()
3130 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); in __dm_internal_suspend()
3131 return; /* nest suspend */ in __dm_internal_suspend()
3134 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); in __dm_internal_suspend()
3137 * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is in __dm_internal_suspend()
3138 * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend in __dm_internal_suspend()
3139 * would require changing .presuspend to return an error -- avoid this in __dm_internal_suspend()
3140 * until there is a need for more elaborate variants of internal suspend. in __dm_internal_suspend()
3145 set_bit(DMF_POST_SUSPENDING, &md->flags); in __dm_internal_suspend()
3147 clear_bit(DMF_POST_SUSPENDING, &md->flags); in __dm_internal_suspend()
3155 BUG_ON(!md->internal_suspend_count); in __dm_internal_resume()
3157 if (--md->internal_suspend_count) in __dm_internal_resume()
3158 return; /* resume from nested internal suspend */ in __dm_internal_resume()
3161 goto done; /* resume from nested suspend */ in __dm_internal_resume()
3163 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); in __dm_internal_resume()
3168 * tricky situation. We can't return an error to the caller. We in __dm_internal_resume()
3174 * So, we fake normal suspend here, to make sure that the in __dm_internal_resume()
3178 set_bit(DMF_SUSPENDED, &md->flags); in __dm_internal_resume()
3181 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); in __dm_internal_resume()
3183 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY); in __dm_internal_resume()
3188 mutex_lock(&md->suspend_lock); in dm_internal_suspend_noflush()
3190 mutex_unlock(&md->suspend_lock); in dm_internal_suspend_noflush()
3196 mutex_lock(&md->suspend_lock); in dm_internal_resume()
3198 mutex_unlock(&md->suspend_lock); in dm_internal_resume()
3203 * Fast variants of internal suspend/resume hold md->suspend_lock,
3204 * which prevents interaction with userspace-driven suspend.
3209 mutex_lock(&md->suspend_lock); in dm_internal_suspend_fast()
3213 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); in dm_internal_suspend_fast()
3214 synchronize_srcu(&md->io_barrier); in dm_internal_suspend_fast()
3215 flush_workqueue(md->wq); in dm_internal_suspend_fast()
3228 mutex_unlock(&md->suspend_lock); in dm_internal_resume_fast()
3233 *---------------------------------------------------------------
3235 *---------------------------------------------------------------
3256 r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp); in dm_kobject_uevent()
3265 return atomic_add_return(1, &md->uevent_seq); in dm_next_uevent_seq()
3270 return atomic_read(&md->event_nr); in dm_get_event_nr()
3275 return wait_event_interruptible(md->eventq, in dm_wait_event()
3276 (event_nr != atomic_read(&md->event_nr))); in dm_wait_event()
3283 spin_lock_irqsave(&md->uevent_lock, flags); in dm_uevent_add()
3284 list_add(elist, &md->uevent_list); in dm_uevent_add()
3285 spin_unlock_irqrestore(&md->uevent_lock, flags); in dm_uevent_add()
3294 return md->disk; in dm_disk()
3300 return &md->kobj_holder.kobj; in dm_kobject()
3310 if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) { in dm_get_from_kobject()
3323 return test_bit(DMF_SUSPENDED, &md->flags); in dm_suspended_md()
3328 return test_bit(DMF_POST_SUSPENDING, &md->flags); in dm_post_suspending_md()
3333 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); in dm_suspended_internally_md()
3338 return test_bit(DMF_DEFERRED_REMOVE, &md->flags); in dm_test_deferred_remove_flag()
3343 return dm_suspended_md(ti->table->md); in dm_suspended()
3349 return dm_post_suspending_md(ti->table->md); in dm_post_suspending()
3355 return __noflush_suspending(ti->table->md); in dm_noflush_suspending()
3364 bioset_exit(&pools->bs); in dm_free_md_mempools()
3365 bioset_exit(&pools->io_bs); in dm_free_md_mempools()
3379 const struct block_device_operations *fops = dev->bdev->bd_disk->fops; in __dm_get_unique_id()
3381 if (!fops->get_unique_id) in __dm_get_unique_id()
3384 return fops->get_unique_id(dev->bdev->bd_disk, dm_id->id, dm_id->type); in __dm_get_unique_id()
3388 * Allow access to get_unique_id() for the first device returning a
3389 * non-zero result. Reasonable use expects all devices to have the
3392 static int dm_blk_get_unique_id(struct gendisk *disk, u8 *id, in dm_blk_get_unique_id() argument
3395 struct mapped_device *md = disk->private_data; in dm_blk_get_unique_id()
3410 if (table->num_targets != 1) in dm_blk_get_unique_id()
3414 if (!ti->type->iterate_devices) in dm_blk_get_unique_id()
3417 ret = ti->type->iterate_devices(ti, __dm_get_unique_id, &dm_id); in dm_blk_get_unique_id()
3438 struct mapped_device *md = bdev->bd_disk->private_data; in dm_call_pr()
3441 int ret = -ENOTTY, srcu_idx; in dm_call_pr()
3448 if (table->num_targets != 1) in dm_call_pr()
3453 ret = -EAGAIN; in dm_call_pr()
3457 ret = -EINVAL; in dm_call_pr()
3458 if (!ti->type->iterate_devices) in dm_call_pr()
3461 ti->type->iterate_devices(ti, fn, pr); in dm_call_pr()
3469 * For register / unregister we need to manually call out to every path.
3475 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; in __dm_pr_register()
3478 if (!ops || !ops->pr_register) { in __dm_pr_register()
3479 pr->ret = -EOPNOTSUPP; in __dm_pr_register()
3480 return -1; in __dm_pr_register()
3483 ret = ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags); in __dm_pr_register()
3487 if (!pr->ret) in __dm_pr_register()
3488 pr->ret = ret; in __dm_pr_register()
3490 if (pr->fail_early) in __dm_pr_register()
3491 return -1; in __dm_pr_register()
3510 /* Didn't even get to register a path */ in dm_pr_register()
3521 /* unregister all paths if we failed to register any path */ in dm_pr_register()
3535 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; in __dm_pr_reserve()
3537 if (!ops || !ops->pr_reserve) { in __dm_pr_reserve()
3538 pr->ret = -EOPNOTSUPP; in __dm_pr_reserve()
3539 return -1; in __dm_pr_reserve()
3542 pr->ret = ops->pr_reserve(dev->bdev, pr->old_key, pr->type, pr->flags); in __dm_pr_reserve()
3543 if (!pr->ret) in __dm_pr_reserve()
3544 return -1; in __dm_pr_reserve()
3569 * If there is a non-All Registrants type of reservation, the release must be
3572 * try each path to make sure we got the correct path.
3578 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; in __dm_pr_release()
3580 if (!ops || !ops->pr_release) { in __dm_pr_release()
3581 pr->ret = -EOPNOTSUPP; in __dm_pr_release()
3582 return -1; in __dm_pr_release()
3585 pr->ret = ops->pr_release(dev->bdev, pr->old_key, pr->type); in __dm_pr_release()
3586 if (pr->ret) in __dm_pr_release()
3587 return -1; in __dm_pr_release()
3612 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; in __dm_pr_preempt()
3614 if (!ops || !ops->pr_preempt) { in __dm_pr_preempt()
3615 pr->ret = -EOPNOTSUPP; in __dm_pr_preempt()
3616 return -1; in __dm_pr_preempt()
3619 pr->ret = ops->pr_preempt(dev->bdev, pr->old_key, pr->new_key, pr->type, in __dm_pr_preempt()
3620 pr->abort); in __dm_pr_preempt()
3621 if (!pr->ret) in __dm_pr_preempt()
3622 return -1; in __dm_pr_preempt()
3647 struct mapped_device *md = bdev->bd_disk->private_data; in dm_pr_clear()
3652 /* Not a real ioctl, but targets must not interpret non-DM ioctls */ in dm_pr_clear()
3658 ops = bdev->bd_disk->fops->pr_ops; in dm_pr_clear()
3659 if (ops && ops->pr_clear) in dm_pr_clear()
3660 r = ops->pr_clear(bdev, key); in dm_pr_clear()
3662 r = -EOPNOTSUPP; in dm_pr_clear()
3672 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; in __dm_pr_read_keys()
3674 if (!ops || !ops->pr_read_keys) { in __dm_pr_read_keys()
3675 pr->ret = -EOPNOTSUPP; in __dm_pr_read_keys()
3676 return -1; in __dm_pr_read_keys()
3679 pr->ret = ops->pr_read_keys(dev->bdev, pr->read_keys); in __dm_pr_read_keys()
3680 if (!pr->ret) in __dm_pr_read_keys()
3681 return -1; in __dm_pr_read_keys()
3704 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; in __dm_pr_read_reservation()
3706 if (!ops || !ops->pr_read_reservation) { in __dm_pr_read_reservation()
3707 pr->ret = -EOPNOTSUPP; in __dm_pr_read_reservation()
3708 return -1; in __dm_pr_read_reservation()
3711 pr->ret = ops->pr_read_reservation(dev->bdev, pr->rsv); in __dm_pr_read_reservation()
3712 if (!pr->ret) in __dm_pr_read_reservation()
3713 return -1; in __dm_pr_read_reservation()
3782 MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
3791 MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");