11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * gendisk handling 31da177e4SLinus Torvalds */ 41da177e4SLinus Torvalds 51da177e4SLinus Torvalds #include <linux/module.h> 61da177e4SLinus Torvalds #include <linux/fs.h> 71da177e4SLinus Torvalds #include <linux/genhd.h> 8b446b60eSAndrew Morton #include <linux/kdev_t.h> 91da177e4SLinus Torvalds #include <linux/kernel.h> 101da177e4SLinus Torvalds #include <linux/blkdev.h> 1166114cadSTejun Heo #include <linux/backing-dev.h> 121da177e4SLinus Torvalds #include <linux/init.h> 131da177e4SLinus Torvalds #include <linux/spinlock.h> 14f500975aSAlexey Dobriyan #include <linux/proc_fs.h> 151da177e4SLinus Torvalds #include <linux/seq_file.h> 161da177e4SLinus Torvalds #include <linux/slab.h> 171da177e4SLinus Torvalds #include <linux/kmod.h> 181da177e4SLinus Torvalds #include <linux/kobj_map.h> 1958383af6SJes Sorensen #include <linux/mutex.h> 20bcce3de1STejun Heo #include <linux/idr.h> 2177ea887eSTejun Heo #include <linux/log2.h> 2225e823c8SMing Lei #include <linux/pm_runtime.h> 231da177e4SLinus Torvalds 24ff88972cSAdrian Bunk #include "blk.h" 25ff88972cSAdrian Bunk 26edfaa7c3SKay Sievers static DEFINE_MUTEX(block_class_lock); 27edfaa7c3SKay Sievers struct kobject *block_depr; 281da177e4SLinus Torvalds 29bcce3de1STejun Heo /* for extended dynamic devt allocation, currently only one major is used */ 30ce23bba8STejun Heo #define NR_EXT_DEVT (1 << MINORBITS) 31bcce3de1STejun Heo 322da78092SKeith Busch /* For extended devt allocation. ext_devt_lock prevents look up 33bcce3de1STejun Heo * results from going away underneath its user. 34bcce3de1STejun Heo */ 352da78092SKeith Busch static DEFINE_SPINLOCK(ext_devt_lock); 36bcce3de1STejun Heo static DEFINE_IDR(ext_devt_idr); 37bcce3de1STejun Heo 381826eadfSAdrian Bunk static struct device_type disk_type; 391826eadfSAdrian Bunk 4012c2bdb2SDerek Basehore static void disk_check_events(struct disk_events *ev, 4112c2bdb2SDerek Basehore unsigned int *clearing_ptr); 429f53d2feSStanislaw Gruszka static void disk_alloc_events(struct gendisk *disk); 4377ea887eSTejun Heo static void disk_add_events(struct gendisk *disk); 4477ea887eSTejun Heo static void disk_del_events(struct gendisk *disk); 4577ea887eSTejun Heo static void disk_release_events(struct gendisk *disk); 4677ea887eSTejun Heo 47e71bf0d0STejun Heo /** 48e71bf0d0STejun Heo * disk_get_part - get partition 49e71bf0d0STejun Heo * @disk: disk to look partition from 50e71bf0d0STejun Heo * @partno: partition number 51e71bf0d0STejun Heo * 52e71bf0d0STejun Heo * Look for partition @partno from @disk. If found, increment 53e71bf0d0STejun Heo * reference count and return it. 54e71bf0d0STejun Heo * 55e71bf0d0STejun Heo * CONTEXT: 56e71bf0d0STejun Heo * Don't care. 57e71bf0d0STejun Heo * 58e71bf0d0STejun Heo * RETURNS: 59e71bf0d0STejun Heo * Pointer to the found partition on success, NULL if not found. 60e71bf0d0STejun Heo */ 61e71bf0d0STejun Heo struct hd_struct *disk_get_part(struct gendisk *disk, int partno) 62e71bf0d0STejun Heo { 63540eed56STejun Heo struct hd_struct *part = NULL; 64540eed56STejun Heo struct disk_part_tbl *ptbl; 65e71bf0d0STejun Heo 66540eed56STejun Heo if (unlikely(partno < 0)) 67e71bf0d0STejun Heo return NULL; 68540eed56STejun Heo 69e71bf0d0STejun Heo rcu_read_lock(); 70540eed56STejun Heo 71540eed56STejun Heo ptbl = rcu_dereference(disk->part_tbl); 72540eed56STejun Heo if (likely(partno < ptbl->len)) { 73540eed56STejun Heo part = rcu_dereference(ptbl->part[partno]); 74e71bf0d0STejun Heo if (part) 75ed9e1982STejun Heo get_device(part_to_dev(part)); 76540eed56STejun Heo } 77540eed56STejun Heo 78e71bf0d0STejun Heo rcu_read_unlock(); 79e71bf0d0STejun Heo 80e71bf0d0STejun Heo return part; 81e71bf0d0STejun Heo } 82e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_get_part); 83e71bf0d0STejun Heo 84e71bf0d0STejun Heo /** 85e71bf0d0STejun Heo * disk_part_iter_init - initialize partition iterator 86e71bf0d0STejun Heo * @piter: iterator to initialize 87e71bf0d0STejun Heo * @disk: disk to iterate over 88e71bf0d0STejun Heo * @flags: DISK_PITER_* flags 89e71bf0d0STejun Heo * 90e71bf0d0STejun Heo * Initialize @piter so that it iterates over partitions of @disk. 91e71bf0d0STejun Heo * 92e71bf0d0STejun Heo * CONTEXT: 93e71bf0d0STejun Heo * Don't care. 94e71bf0d0STejun Heo */ 95e71bf0d0STejun Heo void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, 96e71bf0d0STejun Heo unsigned int flags) 97e71bf0d0STejun Heo { 98540eed56STejun Heo struct disk_part_tbl *ptbl; 99540eed56STejun Heo 100540eed56STejun Heo rcu_read_lock(); 101540eed56STejun Heo ptbl = rcu_dereference(disk->part_tbl); 102540eed56STejun Heo 103e71bf0d0STejun Heo piter->disk = disk; 104e71bf0d0STejun Heo piter->part = NULL; 105e71bf0d0STejun Heo 106e71bf0d0STejun Heo if (flags & DISK_PITER_REVERSE) 107540eed56STejun Heo piter->idx = ptbl->len - 1; 10871982a40STejun Heo else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0)) 109e71bf0d0STejun Heo piter->idx = 0; 110b5d0b9dfSTejun Heo else 111b5d0b9dfSTejun Heo piter->idx = 1; 112e71bf0d0STejun Heo 113e71bf0d0STejun Heo piter->flags = flags; 114540eed56STejun Heo 115540eed56STejun Heo rcu_read_unlock(); 116e71bf0d0STejun Heo } 117e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_part_iter_init); 118e71bf0d0STejun Heo 119e71bf0d0STejun Heo /** 120e71bf0d0STejun Heo * disk_part_iter_next - proceed iterator to the next partition and return it 121e71bf0d0STejun Heo * @piter: iterator of interest 122e71bf0d0STejun Heo * 123e71bf0d0STejun Heo * Proceed @piter to the next partition and return it. 124e71bf0d0STejun Heo * 125e71bf0d0STejun Heo * CONTEXT: 126e71bf0d0STejun Heo * Don't care. 127e71bf0d0STejun Heo */ 128e71bf0d0STejun Heo struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) 129e71bf0d0STejun Heo { 130540eed56STejun Heo struct disk_part_tbl *ptbl; 131e71bf0d0STejun Heo int inc, end; 132e71bf0d0STejun Heo 133e71bf0d0STejun Heo /* put the last partition */ 134e71bf0d0STejun Heo disk_put_part(piter->part); 135e71bf0d0STejun Heo piter->part = NULL; 136e71bf0d0STejun Heo 137540eed56STejun Heo /* get part_tbl */ 138e71bf0d0STejun Heo rcu_read_lock(); 139540eed56STejun Heo ptbl = rcu_dereference(piter->disk->part_tbl); 140e71bf0d0STejun Heo 141e71bf0d0STejun Heo /* determine iteration parameters */ 142e71bf0d0STejun Heo if (piter->flags & DISK_PITER_REVERSE) { 143e71bf0d0STejun Heo inc = -1; 14471982a40STejun Heo if (piter->flags & (DISK_PITER_INCL_PART0 | 14571982a40STejun Heo DISK_PITER_INCL_EMPTY_PART0)) 146e71bf0d0STejun Heo end = -1; 147b5d0b9dfSTejun Heo else 148b5d0b9dfSTejun Heo end = 0; 149e71bf0d0STejun Heo } else { 150e71bf0d0STejun Heo inc = 1; 151540eed56STejun Heo end = ptbl->len; 152e71bf0d0STejun Heo } 153e71bf0d0STejun Heo 154e71bf0d0STejun Heo /* iterate to the next partition */ 155e71bf0d0STejun Heo for (; piter->idx != end; piter->idx += inc) { 156e71bf0d0STejun Heo struct hd_struct *part; 157e71bf0d0STejun Heo 158540eed56STejun Heo part = rcu_dereference(ptbl->part[piter->idx]); 159e71bf0d0STejun Heo if (!part) 160e71bf0d0STejun Heo continue; 161c83f6bf9SVivek Goyal if (!part_nr_sects_read(part) && 16271982a40STejun Heo !(piter->flags & DISK_PITER_INCL_EMPTY) && 16371982a40STejun Heo !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && 16471982a40STejun Heo piter->idx == 0)) 165e71bf0d0STejun Heo continue; 166e71bf0d0STejun Heo 167ed9e1982STejun Heo get_device(part_to_dev(part)); 168e71bf0d0STejun Heo piter->part = part; 169e71bf0d0STejun Heo piter->idx += inc; 170e71bf0d0STejun Heo break; 171e71bf0d0STejun Heo } 172e71bf0d0STejun Heo 173e71bf0d0STejun Heo rcu_read_unlock(); 174e71bf0d0STejun Heo 175e71bf0d0STejun Heo return piter->part; 176e71bf0d0STejun Heo } 177e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_part_iter_next); 178e71bf0d0STejun Heo 179e71bf0d0STejun Heo /** 180e71bf0d0STejun Heo * disk_part_iter_exit - finish up partition iteration 181e71bf0d0STejun Heo * @piter: iter of interest 182e71bf0d0STejun Heo * 183e71bf0d0STejun Heo * Called when iteration is over. Cleans up @piter. 184e71bf0d0STejun Heo * 185e71bf0d0STejun Heo * CONTEXT: 186e71bf0d0STejun Heo * Don't care. 187e71bf0d0STejun Heo */ 188e71bf0d0STejun Heo void disk_part_iter_exit(struct disk_part_iter *piter) 189e71bf0d0STejun Heo { 190e71bf0d0STejun Heo disk_put_part(piter->part); 191e71bf0d0STejun Heo piter->part = NULL; 192e71bf0d0STejun Heo } 193e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_part_iter_exit); 194e71bf0d0STejun Heo 195a6f23657SJens Axboe static inline int sector_in_part(struct hd_struct *part, sector_t sector) 196a6f23657SJens Axboe { 197a6f23657SJens Axboe return part->start_sect <= sector && 198c83f6bf9SVivek Goyal sector < part->start_sect + part_nr_sects_read(part); 199a6f23657SJens Axboe } 200a6f23657SJens Axboe 201e71bf0d0STejun Heo /** 202e71bf0d0STejun Heo * disk_map_sector_rcu - map sector to partition 203e71bf0d0STejun Heo * @disk: gendisk of interest 204e71bf0d0STejun Heo * @sector: sector to map 205e71bf0d0STejun Heo * 206e71bf0d0STejun Heo * Find out which partition @sector maps to on @disk. This is 207e71bf0d0STejun Heo * primarily used for stats accounting. 208e71bf0d0STejun Heo * 209e71bf0d0STejun Heo * CONTEXT: 210e71bf0d0STejun Heo * RCU read locked. The returned partition pointer is valid only 211e71bf0d0STejun Heo * while preemption is disabled. 212e71bf0d0STejun Heo * 213e71bf0d0STejun Heo * RETURNS: 214074a7acaSTejun Heo * Found partition on success, part0 is returned if no partition matches 215e71bf0d0STejun Heo */ 216e71bf0d0STejun Heo struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 217e71bf0d0STejun Heo { 218540eed56STejun Heo struct disk_part_tbl *ptbl; 219a6f23657SJens Axboe struct hd_struct *part; 220e71bf0d0STejun Heo int i; 221e71bf0d0STejun Heo 222540eed56STejun Heo ptbl = rcu_dereference(disk->part_tbl); 223540eed56STejun Heo 224a6f23657SJens Axboe part = rcu_dereference(ptbl->last_lookup); 225a6f23657SJens Axboe if (part && sector_in_part(part, sector)) 226e71bf0d0STejun Heo return part; 227a6f23657SJens Axboe 228a6f23657SJens Axboe for (i = 1; i < ptbl->len; i++) { 229a6f23657SJens Axboe part = rcu_dereference(ptbl->part[i]); 230a6f23657SJens Axboe 231a6f23657SJens Axboe if (part && sector_in_part(part, sector)) { 232a6f23657SJens Axboe rcu_assign_pointer(ptbl->last_lookup, part); 233a6f23657SJens Axboe return part; 234a6f23657SJens Axboe } 235e71bf0d0STejun Heo } 236074a7acaSTejun Heo return &disk->part0; 237e71bf0d0STejun Heo } 238e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_map_sector_rcu); 239e71bf0d0STejun Heo 2401da177e4SLinus Torvalds /* 2411da177e4SLinus Torvalds * Can be deleted altogether. Later. 2421da177e4SLinus Torvalds * 2431da177e4SLinus Torvalds */ 2441da177e4SLinus Torvalds static struct blk_major_name { 2451da177e4SLinus Torvalds struct blk_major_name *next; 2461da177e4SLinus Torvalds int major; 2471da177e4SLinus Torvalds char name[16]; 24868eef3b4SJoe Korty } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 2491da177e4SLinus Torvalds 2501da177e4SLinus Torvalds /* index in the above - for now: assume no multimajor ranges */ 251e61eb2e9SYang Zhang static inline int major_to_index(unsigned major) 2521da177e4SLinus Torvalds { 25368eef3b4SJoe Korty return major % BLKDEV_MAJOR_HASH_SIZE; 2541da177e4SLinus Torvalds } 2551da177e4SLinus Torvalds 25668eef3b4SJoe Korty #ifdef CONFIG_PROC_FS 257cf771cb5STejun Heo void blkdev_show(struct seq_file *seqf, off_t offset) 2587170be5fSNeil Horman { 25968eef3b4SJoe Korty struct blk_major_name *dp; 2607170be5fSNeil Horman 26168eef3b4SJoe Korty if (offset < BLKDEV_MAJOR_HASH_SIZE) { 262edfaa7c3SKay Sievers mutex_lock(&block_class_lock); 26368eef3b4SJoe Korty for (dp = major_names[offset]; dp; dp = dp->next) 264cf771cb5STejun Heo seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 265edfaa7c3SKay Sievers mutex_unlock(&block_class_lock); 26668eef3b4SJoe Korty } 2677170be5fSNeil Horman } 26868eef3b4SJoe Korty #endif /* CONFIG_PROC_FS */ 2691da177e4SLinus Torvalds 2709e8c0bccSMárton Németh /** 2719e8c0bccSMárton Németh * register_blkdev - register a new block device 2729e8c0bccSMárton Németh * 2739e8c0bccSMárton Németh * @major: the requested major device number [1..255]. If @major=0, try to 2749e8c0bccSMárton Németh * allocate any unused major number. 2759e8c0bccSMárton Németh * @name: the name of the new block device as a zero terminated string 2769e8c0bccSMárton Németh * 2779e8c0bccSMárton Németh * The @name must be unique within the system. 2789e8c0bccSMárton Németh * 2799e8c0bccSMárton Németh * The return value depends on the @major input parameter. 2809e8c0bccSMárton Németh * - if a major device number was requested in range [1..255] then the 2819e8c0bccSMárton Németh * function returns zero on success, or a negative error code 2829e8c0bccSMárton Németh * - if any unused major number was requested with @major=0 parameter 2839e8c0bccSMárton Németh * then the return value is the allocated major number in range 2849e8c0bccSMárton Németh * [1..255] or a negative error code otherwise 2859e8c0bccSMárton Németh */ 2861da177e4SLinus Torvalds int register_blkdev(unsigned int major, const char *name) 2871da177e4SLinus Torvalds { 2881da177e4SLinus Torvalds struct blk_major_name **n, *p; 2891da177e4SLinus Torvalds int index, ret = 0; 2901da177e4SLinus Torvalds 291edfaa7c3SKay Sievers mutex_lock(&block_class_lock); 2921da177e4SLinus Torvalds 2931da177e4SLinus Torvalds /* temporary */ 2941da177e4SLinus Torvalds if (major == 0) { 2951da177e4SLinus Torvalds for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 2961da177e4SLinus Torvalds if (major_names[index] == NULL) 2971da177e4SLinus Torvalds break; 2981da177e4SLinus Torvalds } 2991da177e4SLinus Torvalds 3001da177e4SLinus Torvalds if (index == 0) { 3011da177e4SLinus Torvalds printk("register_blkdev: failed to get major for %s\n", 3021da177e4SLinus Torvalds name); 3031da177e4SLinus Torvalds ret = -EBUSY; 3041da177e4SLinus Torvalds goto out; 3051da177e4SLinus Torvalds } 3061da177e4SLinus Torvalds major = index; 3071da177e4SLinus Torvalds ret = major; 3081da177e4SLinus Torvalds } 3091da177e4SLinus Torvalds 3101da177e4SLinus Torvalds p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); 3111da177e4SLinus Torvalds if (p == NULL) { 3121da177e4SLinus Torvalds ret = -ENOMEM; 3131da177e4SLinus Torvalds goto out; 3141da177e4SLinus Torvalds } 3151da177e4SLinus Torvalds 3161da177e4SLinus Torvalds p->major = major; 3171da177e4SLinus Torvalds strlcpy(p->name, name, sizeof(p->name)); 3181da177e4SLinus Torvalds p->next = NULL; 3191da177e4SLinus Torvalds index = major_to_index(major); 3201da177e4SLinus Torvalds 3211da177e4SLinus Torvalds for (n = &major_names[index]; *n; n = &(*n)->next) { 3221da177e4SLinus Torvalds if ((*n)->major == major) 3231da177e4SLinus Torvalds break; 3241da177e4SLinus Torvalds } 3251da177e4SLinus Torvalds if (!*n) 3261da177e4SLinus Torvalds *n = p; 3271da177e4SLinus Torvalds else 3281da177e4SLinus Torvalds ret = -EBUSY; 3291da177e4SLinus Torvalds 3301da177e4SLinus Torvalds if (ret < 0) { 3311da177e4SLinus Torvalds printk("register_blkdev: cannot get major %d for %s\n", 3321da177e4SLinus Torvalds major, name); 3331da177e4SLinus Torvalds kfree(p); 3341da177e4SLinus Torvalds } 3351da177e4SLinus Torvalds out: 336edfaa7c3SKay Sievers mutex_unlock(&block_class_lock); 3371da177e4SLinus Torvalds return ret; 3381da177e4SLinus Torvalds } 3391da177e4SLinus Torvalds 3401da177e4SLinus Torvalds EXPORT_SYMBOL(register_blkdev); 3411da177e4SLinus Torvalds 342f4480240SAkinobu Mita void unregister_blkdev(unsigned int major, const char *name) 3431da177e4SLinus Torvalds { 3441da177e4SLinus Torvalds struct blk_major_name **n; 3451da177e4SLinus Torvalds struct blk_major_name *p = NULL; 3461da177e4SLinus Torvalds int index = major_to_index(major); 3471da177e4SLinus Torvalds 348edfaa7c3SKay Sievers mutex_lock(&block_class_lock); 3491da177e4SLinus Torvalds for (n = &major_names[index]; *n; n = &(*n)->next) 3501da177e4SLinus Torvalds if ((*n)->major == major) 3511da177e4SLinus Torvalds break; 352294462a5SAkinobu Mita if (!*n || strcmp((*n)->name, name)) { 353294462a5SAkinobu Mita WARN_ON(1); 354294462a5SAkinobu Mita } else { 3551da177e4SLinus Torvalds p = *n; 3561da177e4SLinus Torvalds *n = p->next; 3571da177e4SLinus Torvalds } 358edfaa7c3SKay Sievers mutex_unlock(&block_class_lock); 3591da177e4SLinus Torvalds kfree(p); 3601da177e4SLinus Torvalds } 3611da177e4SLinus Torvalds 3621da177e4SLinus Torvalds EXPORT_SYMBOL(unregister_blkdev); 3631da177e4SLinus Torvalds 3641da177e4SLinus Torvalds static struct kobj_map *bdev_map; 3651da177e4SLinus Torvalds 366bcce3de1STejun Heo /** 367870d6656STejun Heo * blk_mangle_minor - scatter minor numbers apart 368870d6656STejun Heo * @minor: minor number to mangle 369870d6656STejun Heo * 370870d6656STejun Heo * Scatter consecutively allocated @minor number apart if MANGLE_DEVT 371870d6656STejun Heo * is enabled. Mangling twice gives the original value. 372870d6656STejun Heo * 373870d6656STejun Heo * RETURNS: 374870d6656STejun Heo * Mangled value. 375870d6656STejun Heo * 376870d6656STejun Heo * CONTEXT: 377870d6656STejun Heo * Don't care. 378870d6656STejun Heo */ 379870d6656STejun Heo static int blk_mangle_minor(int minor) 380870d6656STejun Heo { 381870d6656STejun Heo #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT 382870d6656STejun Heo int i; 383870d6656STejun Heo 384870d6656STejun Heo for (i = 0; i < MINORBITS / 2; i++) { 385870d6656STejun Heo int low = minor & (1 << i); 386870d6656STejun Heo int high = minor & (1 << (MINORBITS - 1 - i)); 387870d6656STejun Heo int distance = MINORBITS - 1 - 2 * i; 388870d6656STejun Heo 389870d6656STejun Heo minor ^= low | high; /* clear both bits */ 390870d6656STejun Heo low <<= distance; /* swap the positions */ 391870d6656STejun Heo high >>= distance; 392870d6656STejun Heo minor |= low | high; /* and set */ 393870d6656STejun Heo } 394870d6656STejun Heo #endif 395870d6656STejun Heo return minor; 396870d6656STejun Heo } 397870d6656STejun Heo 398870d6656STejun Heo /** 399bcce3de1STejun Heo * blk_alloc_devt - allocate a dev_t for a partition 400bcce3de1STejun Heo * @part: partition to allocate dev_t for 401bcce3de1STejun Heo * @devt: out parameter for resulting dev_t 402bcce3de1STejun Heo * 403bcce3de1STejun Heo * Allocate a dev_t for block device. 404bcce3de1STejun Heo * 405bcce3de1STejun Heo * RETURNS: 406bcce3de1STejun Heo * 0 on success, allocated dev_t is returned in *@devt. -errno on 407bcce3de1STejun Heo * failure. 408bcce3de1STejun Heo * 409bcce3de1STejun Heo * CONTEXT: 410bcce3de1STejun Heo * Might sleep. 411bcce3de1STejun Heo */ 412bcce3de1STejun Heo int blk_alloc_devt(struct hd_struct *part, dev_t *devt) 413bcce3de1STejun Heo { 414bcce3de1STejun Heo struct gendisk *disk = part_to_disk(part); 415bab998d6STejun Heo int idx; 416bcce3de1STejun Heo 417bcce3de1STejun Heo /* in consecutive minor range? */ 418bcce3de1STejun Heo if (part->partno < disk->minors) { 419bcce3de1STejun Heo *devt = MKDEV(disk->major, disk->first_minor + part->partno); 420bcce3de1STejun Heo return 0; 421bcce3de1STejun Heo } 422bcce3de1STejun Heo 423bcce3de1STejun Heo /* allocate ext devt */ 4242da78092SKeith Busch idr_preload(GFP_KERNEL); 4252da78092SKeith Busch 4264d66e5e9SDan Williams spin_lock_bh(&ext_devt_lock); 4272da78092SKeith Busch idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); 4284d66e5e9SDan Williams spin_unlock_bh(&ext_devt_lock); 4292da78092SKeith Busch 4302da78092SKeith Busch idr_preload_end(); 431bab998d6STejun Heo if (idx < 0) 432bab998d6STejun Heo return idx == -ENOSPC ? -EBUSY : idx; 433bcce3de1STejun Heo 434870d6656STejun Heo *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); 435bcce3de1STejun Heo return 0; 436bcce3de1STejun Heo } 437bcce3de1STejun Heo 438bcce3de1STejun Heo /** 439bcce3de1STejun Heo * blk_free_devt - free a dev_t 440bcce3de1STejun Heo * @devt: dev_t to free 441bcce3de1STejun Heo * 442bcce3de1STejun Heo * Free @devt which was allocated using blk_alloc_devt(). 443bcce3de1STejun Heo * 444bcce3de1STejun Heo * CONTEXT: 445bcce3de1STejun Heo * Might sleep. 446bcce3de1STejun Heo */ 447bcce3de1STejun Heo void blk_free_devt(dev_t devt) 448bcce3de1STejun Heo { 449bcce3de1STejun Heo if (devt == MKDEV(0, 0)) 450bcce3de1STejun Heo return; 451bcce3de1STejun Heo 452bcce3de1STejun Heo if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 4534d66e5e9SDan Williams spin_lock_bh(&ext_devt_lock); 454870d6656STejun Heo idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 4554d66e5e9SDan Williams spin_unlock_bh(&ext_devt_lock); 456bcce3de1STejun Heo } 457bcce3de1STejun Heo } 458bcce3de1STejun Heo 4591f014290STejun Heo static char *bdevt_str(dev_t devt, char *buf) 4601f014290STejun Heo { 4611f014290STejun Heo if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { 4621f014290STejun Heo char tbuf[BDEVT_SIZE]; 4631f014290STejun Heo snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); 4641f014290STejun Heo snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); 4651f014290STejun Heo } else 4661f014290STejun Heo snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 4671f014290STejun Heo 4681f014290STejun Heo return buf; 4691f014290STejun Heo } 4701f014290STejun Heo 4711da177e4SLinus Torvalds /* 4721da177e4SLinus Torvalds * Register device numbers dev..(dev+range-1) 4731da177e4SLinus Torvalds * range must be nonzero 4741da177e4SLinus Torvalds * The hash chain is sorted on range, so that subranges can override. 4751da177e4SLinus Torvalds */ 476edfaa7c3SKay Sievers void blk_register_region(dev_t devt, unsigned long range, struct module *module, 4771da177e4SLinus Torvalds struct kobject *(*probe)(dev_t, int *, void *), 4781da177e4SLinus Torvalds int (*lock)(dev_t, void *), void *data) 4791da177e4SLinus Torvalds { 480edfaa7c3SKay Sievers kobj_map(bdev_map, devt, range, module, probe, lock, data); 4811da177e4SLinus Torvalds } 4821da177e4SLinus Torvalds 4831da177e4SLinus Torvalds EXPORT_SYMBOL(blk_register_region); 4841da177e4SLinus Torvalds 485edfaa7c3SKay Sievers void blk_unregister_region(dev_t devt, unsigned long range) 4861da177e4SLinus Torvalds { 487edfaa7c3SKay Sievers kobj_unmap(bdev_map, devt, range); 4881da177e4SLinus Torvalds } 4891da177e4SLinus Torvalds 4901da177e4SLinus Torvalds EXPORT_SYMBOL(blk_unregister_region); 4911da177e4SLinus Torvalds 492cf771cb5STejun Heo static struct kobject *exact_match(dev_t devt, int *partno, void *data) 4931da177e4SLinus Torvalds { 4941da177e4SLinus Torvalds struct gendisk *p = data; 495edfaa7c3SKay Sievers 496ed9e1982STejun Heo return &disk_to_dev(p)->kobj; 4971da177e4SLinus Torvalds } 4981da177e4SLinus Torvalds 499edfaa7c3SKay Sievers static int exact_lock(dev_t devt, void *data) 5001da177e4SLinus Torvalds { 5011da177e4SLinus Torvalds struct gendisk *p = data; 5021da177e4SLinus Torvalds 5031da177e4SLinus Torvalds if (!get_disk(p)) 5041da177e4SLinus Torvalds return -1; 5051da177e4SLinus Torvalds return 0; 5061da177e4SLinus Torvalds } 5071da177e4SLinus Torvalds 5084752bc30SAl Viro static void register_disk(struct gendisk *disk) 509d2bf1b67STejun Heo { 510d2bf1b67STejun Heo struct device *ddev = disk_to_dev(disk); 511d2bf1b67STejun Heo struct block_device *bdev; 512d2bf1b67STejun Heo struct disk_part_iter piter; 513d2bf1b67STejun Heo struct hd_struct *part; 514d2bf1b67STejun Heo int err; 515d2bf1b67STejun Heo 516d2bf1b67STejun Heo ddev->parent = disk->driverfs_dev; 517d2bf1b67STejun Heo 518ffc8b308SKees Cook dev_set_name(ddev, "%s", disk->disk_name); 519d2bf1b67STejun Heo 520d2bf1b67STejun Heo /* delay uevents, until we scanned partition table */ 521d2bf1b67STejun Heo dev_set_uevent_suppress(ddev, 1); 522d2bf1b67STejun Heo 523d2bf1b67STejun Heo if (device_add(ddev)) 524d2bf1b67STejun Heo return; 525d2bf1b67STejun Heo if (!sysfs_deprecated) { 526d2bf1b67STejun Heo err = sysfs_create_link(block_depr, &ddev->kobj, 527d2bf1b67STejun Heo kobject_name(&ddev->kobj)); 528d2bf1b67STejun Heo if (err) { 529d2bf1b67STejun Heo device_del(ddev); 530d2bf1b67STejun Heo return; 531d2bf1b67STejun Heo } 532d2bf1b67STejun Heo } 53325e823c8SMing Lei 53425e823c8SMing Lei /* 53525e823c8SMing Lei * avoid probable deadlock caused by allocating memory with 53625e823c8SMing Lei * GFP_KERNEL in runtime_resume callback of its all ancestor 53725e823c8SMing Lei * devices 53825e823c8SMing Lei */ 53925e823c8SMing Lei pm_runtime_set_memalloc_noio(ddev, true); 54025e823c8SMing Lei 541d2bf1b67STejun Heo disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); 542d2bf1b67STejun Heo disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); 543d2bf1b67STejun Heo 544d2bf1b67STejun Heo /* No minors to use for partitions */ 545d27769ecSTejun Heo if (!disk_part_scan_enabled(disk)) 546d2bf1b67STejun Heo goto exit; 547d2bf1b67STejun Heo 548d2bf1b67STejun Heo /* No such device (e.g., media were just removed) */ 549d2bf1b67STejun Heo if (!get_capacity(disk)) 550d2bf1b67STejun Heo goto exit; 551d2bf1b67STejun Heo 552d2bf1b67STejun Heo bdev = bdget_disk(disk, 0); 553d2bf1b67STejun Heo if (!bdev) 554d2bf1b67STejun Heo goto exit; 555d2bf1b67STejun Heo 556d2bf1b67STejun Heo bdev->bd_invalidated = 1; 557d2bf1b67STejun Heo err = blkdev_get(bdev, FMODE_READ, NULL); 558d2bf1b67STejun Heo if (err < 0) 559d2bf1b67STejun Heo goto exit; 560d2bf1b67STejun Heo blkdev_put(bdev, FMODE_READ); 561d2bf1b67STejun Heo 562d2bf1b67STejun Heo exit: 563d2bf1b67STejun Heo /* announce disk after possible partitions are created */ 564d2bf1b67STejun Heo dev_set_uevent_suppress(ddev, 0); 565d2bf1b67STejun Heo kobject_uevent(&ddev->kobj, KOBJ_ADD); 566d2bf1b67STejun Heo 567d2bf1b67STejun Heo /* announce possible partitions */ 568d2bf1b67STejun Heo disk_part_iter_init(&piter, disk, 0); 569d2bf1b67STejun Heo while ((part = disk_part_iter_next(&piter))) 570d2bf1b67STejun Heo kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); 571d2bf1b67STejun Heo disk_part_iter_exit(&piter); 572d2bf1b67STejun Heo } 573d2bf1b67STejun Heo 5741da177e4SLinus Torvalds /** 5751da177e4SLinus Torvalds * add_disk - add partitioning information to kernel list 5761da177e4SLinus Torvalds * @disk: per-device partitioning information 5771da177e4SLinus Torvalds * 5781da177e4SLinus Torvalds * This function registers the partitioning information in @disk 5791da177e4SLinus Torvalds * with the kernel. 5803e1a7ff8STejun Heo * 5813e1a7ff8STejun Heo * FIXME: error handling 5821da177e4SLinus Torvalds */ 5831da177e4SLinus Torvalds void add_disk(struct gendisk *disk) 5841da177e4SLinus Torvalds { 585cf0ca9feSPeter Zijlstra struct backing_dev_info *bdi; 5863e1a7ff8STejun Heo dev_t devt; 5876ffeea77SGreg Kroah-Hartman int retval; 588cf0ca9feSPeter Zijlstra 5893e1a7ff8STejun Heo /* minors == 0 indicates to use ext devt from part0 and should 5903e1a7ff8STejun Heo * be accompanied with EXT_DEVT flag. Make sure all 5913e1a7ff8STejun Heo * parameters make sense. 5923e1a7ff8STejun Heo */ 5933e1a7ff8STejun Heo WARN_ON(disk->minors && !(disk->major || disk->first_minor)); 5943e1a7ff8STejun Heo WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT)); 5953e1a7ff8STejun Heo 5961da177e4SLinus Torvalds disk->flags |= GENHD_FL_UP; 5973e1a7ff8STejun Heo 5983e1a7ff8STejun Heo retval = blk_alloc_devt(&disk->part0, &devt); 5993e1a7ff8STejun Heo if (retval) { 6003e1a7ff8STejun Heo WARN_ON(1); 6013e1a7ff8STejun Heo return; 6023e1a7ff8STejun Heo } 6033e1a7ff8STejun Heo disk_to_dev(disk)->devt = devt; 6043e1a7ff8STejun Heo 6053e1a7ff8STejun Heo /* ->major and ->first_minor aren't supposed to be 6063e1a7ff8STejun Heo * dereferenced from here on, but set them just in case. 6073e1a7ff8STejun Heo */ 6083e1a7ff8STejun Heo disk->major = MAJOR(devt); 6093e1a7ff8STejun Heo disk->first_minor = MINOR(devt); 6103e1a7ff8STejun Heo 6119f53d2feSStanislaw Gruszka disk_alloc_events(disk); 6129f53d2feSStanislaw Gruszka 61301ea5063SSigned-off-by: Jan Kara /* Register BDI before referencing it from bdev */ 61401ea5063SSigned-off-by: Jan Kara bdi = &disk->queue->backing_dev_info; 61501ea5063SSigned-off-by: Jan Kara bdi_register_dev(bdi, disk_devt(disk)); 61601ea5063SSigned-off-by: Jan Kara 617f331c029STejun Heo blk_register_region(disk_devt(disk), disk->minors, NULL, 618f331c029STejun Heo exact_match, exact_lock, disk); 6191da177e4SLinus Torvalds register_disk(disk); 6201da177e4SLinus Torvalds blk_register_queue(disk); 621cf0ca9feSPeter Zijlstra 622523e1d39STejun Heo /* 623523e1d39STejun Heo * Take an extra ref on queue which will be put on disk_release() 624523e1d39STejun Heo * so that it sticks around as long as @disk is there. 625523e1d39STejun Heo */ 62609ac46c4STejun Heo WARN_ON_ONCE(!blk_get_queue(disk->queue)); 627523e1d39STejun Heo 628ed9e1982STejun Heo retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 629ed9e1982STejun Heo "bdi"); 6306ffeea77SGreg Kroah-Hartman WARN_ON(retval); 63177ea887eSTejun Heo 63277ea887eSTejun Heo disk_add_events(disk); 63325520d55SMartin K. Petersen blk_integrity_add(disk); 6341da177e4SLinus Torvalds } 6351da177e4SLinus Torvalds EXPORT_SYMBOL(add_disk); 6361da177e4SLinus Torvalds 637d2bf1b67STejun Heo void del_gendisk(struct gendisk *disk) 6381da177e4SLinus Torvalds { 639d2bf1b67STejun Heo struct disk_part_iter piter; 640d2bf1b67STejun Heo struct hd_struct *part; 641d2bf1b67STejun Heo 64225520d55SMartin K. Petersen blk_integrity_del(disk); 64377ea887eSTejun Heo disk_del_events(disk); 64477ea887eSTejun Heo 645d2bf1b67STejun Heo /* invalidate stuff */ 646d2bf1b67STejun Heo disk_part_iter_init(&piter, disk, 647d2bf1b67STejun Heo DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); 648d2bf1b67STejun Heo while ((part = disk_part_iter_next(&piter))) { 649d2bf1b67STejun Heo invalidate_partition(disk, part->partno); 650d2bf1b67STejun Heo delete_partition(disk, part->partno); 651d2bf1b67STejun Heo } 652d2bf1b67STejun Heo disk_part_iter_exit(&piter); 653d2bf1b67STejun Heo 654d2bf1b67STejun Heo invalidate_partition(disk, 0); 655d2bf1b67STejun Heo set_capacity(disk, 0); 656d2bf1b67STejun Heo disk->flags &= ~GENHD_FL_UP; 657d2bf1b67STejun Heo 658ed9e1982STejun Heo sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 6591da177e4SLinus Torvalds blk_unregister_queue(disk); 660f331c029STejun Heo blk_unregister_region(disk_devt(disk), disk->minors); 661d2bf1b67STejun Heo 662d2bf1b67STejun Heo part_stat_set_all(&disk->part0, 0); 663d2bf1b67STejun Heo disk->part0.stamp = 0; 664d2bf1b67STejun Heo 665d2bf1b67STejun Heo kobject_put(disk->part0.holder_dir); 666d2bf1b67STejun Heo kobject_put(disk->slave_dir); 667d2bf1b67STejun Heo disk->driverfs_dev = NULL; 668d2bf1b67STejun Heo if (!sysfs_deprecated) 669d2bf1b67STejun Heo sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); 67025e823c8SMing Lei pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); 671d2bf1b67STejun Heo device_del(disk_to_dev(disk)); 6721da177e4SLinus Torvalds } 673d2bf1b67STejun Heo EXPORT_SYMBOL(del_gendisk); 6741da177e4SLinus Torvalds 6751da177e4SLinus Torvalds /** 6761da177e4SLinus Torvalds * get_gendisk - get partitioning information for a given device 677710027a4SRandy Dunlap * @devt: device to get partitioning information for 678496aa8a9SRandy Dunlap * @partno: returned partition index 6791da177e4SLinus Torvalds * 6801da177e4SLinus Torvalds * This function gets the structure containing partitioning 681710027a4SRandy Dunlap * information for the given device @devt. 6821da177e4SLinus Torvalds */ 683cf771cb5STejun Heo struct gendisk *get_gendisk(dev_t devt, int *partno) 6841da177e4SLinus Torvalds { 685bcce3de1STejun Heo struct gendisk *disk = NULL; 686edfaa7c3SKay Sievers 687bcce3de1STejun Heo if (MAJOR(devt) != BLOCK_EXT_MAJOR) { 688bcce3de1STejun Heo struct kobject *kobj; 689bcce3de1STejun Heo 690bcce3de1STejun Heo kobj = kobj_lookup(bdev_map, devt, partno); 691bcce3de1STejun Heo if (kobj) 692bcce3de1STejun Heo disk = dev_to_disk(kobj_to_dev(kobj)); 693bcce3de1STejun Heo } else { 694bcce3de1STejun Heo struct hd_struct *part; 695bcce3de1STejun Heo 6964d66e5e9SDan Williams spin_lock_bh(&ext_devt_lock); 697870d6656STejun Heo part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 698bcce3de1STejun Heo if (part && get_disk(part_to_disk(part))) { 699bcce3de1STejun Heo *partno = part->partno; 700bcce3de1STejun Heo disk = part_to_disk(part); 701bcce3de1STejun Heo } 7024d66e5e9SDan Williams spin_unlock_bh(&ext_devt_lock); 703bcce3de1STejun Heo } 704bcce3de1STejun Heo 705bcce3de1STejun Heo return disk; 7061da177e4SLinus Torvalds } 707b6ac23afSDivyesh Shah EXPORT_SYMBOL(get_gendisk); 7081da177e4SLinus Torvalds 709f331c029STejun Heo /** 710f331c029STejun Heo * bdget_disk - do bdget() by gendisk and partition number 711f331c029STejun Heo * @disk: gendisk of interest 712f331c029STejun Heo * @partno: partition number 713f331c029STejun Heo * 714f331c029STejun Heo * Find partition @partno from @disk, do bdget() on it. 715f331c029STejun Heo * 716f331c029STejun Heo * CONTEXT: 717f331c029STejun Heo * Don't care. 718f331c029STejun Heo * 719f331c029STejun Heo * RETURNS: 720f331c029STejun Heo * Resulting block_device on success, NULL on failure. 721f331c029STejun Heo */ 722aeb3d3a8SHarvey Harrison struct block_device *bdget_disk(struct gendisk *disk, int partno) 723f331c029STejun Heo { 724e71bf0d0STejun Heo struct hd_struct *part; 725548b10ebSTejun Heo struct block_device *bdev = NULL; 726f331c029STejun Heo 727e71bf0d0STejun Heo part = disk_get_part(disk, partno); 7282bbedcb4STejun Heo if (part) 729548b10ebSTejun Heo bdev = bdget(part_devt(part)); 730e71bf0d0STejun Heo disk_put_part(part); 731f331c029STejun Heo 732548b10ebSTejun Heo return bdev; 733f331c029STejun Heo } 734f331c029STejun Heo EXPORT_SYMBOL(bdget_disk); 735f331c029STejun Heo 736dd2a345fSDave Gilbert /* 7375c6f35c5SGreg Kroah-Hartman * print a full list of all partitions - intended for places where the root 7385c6f35c5SGreg Kroah-Hartman * filesystem can't be mounted and thus to give the victim some idea of what 7395c6f35c5SGreg Kroah-Hartman * went wrong 7405c6f35c5SGreg Kroah-Hartman */ 7415c6f35c5SGreg Kroah-Hartman void __init printk_all_partitions(void) 7425c6f35c5SGreg Kroah-Hartman { 743def4e38dSTejun Heo struct class_dev_iter iter; 744def4e38dSTejun Heo struct device *dev; 745def4e38dSTejun Heo 746def4e38dSTejun Heo class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 747def4e38dSTejun Heo while ((dev = class_dev_iter_next(&iter))) { 748def4e38dSTejun Heo struct gendisk *disk = dev_to_disk(dev); 749e71bf0d0STejun Heo struct disk_part_iter piter; 750e71bf0d0STejun Heo struct hd_struct *part; 7511f014290STejun Heo char name_buf[BDEVNAME_SIZE]; 7521f014290STejun Heo char devt_buf[BDEVT_SIZE]; 753def4e38dSTejun Heo 754def4e38dSTejun Heo /* 755def4e38dSTejun Heo * Don't show empty devices or things that have been 75625985edcSLucas De Marchi * suppressed 757def4e38dSTejun Heo */ 758def4e38dSTejun Heo if (get_capacity(disk) == 0 || 759def4e38dSTejun Heo (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 760def4e38dSTejun Heo continue; 761def4e38dSTejun Heo 762def4e38dSTejun Heo /* 763def4e38dSTejun Heo * Note, unlike /proc/partitions, I am showing the 764def4e38dSTejun Heo * numbers in hex - the same format as the root= 765def4e38dSTejun Heo * option takes. 766def4e38dSTejun Heo */ 767074a7acaSTejun Heo disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 768074a7acaSTejun Heo while ((part = disk_part_iter_next(&piter))) { 769074a7acaSTejun Heo bool is_part0 = part == &disk->part0; 770074a7acaSTejun Heo 771b5af921eSWill Drewry printk("%s%s %10llu %s %s", is_part0 ? "" : " ", 772074a7acaSTejun Heo bdevt_str(part_devt(part), devt_buf), 773c83f6bf9SVivek Goyal (unsigned long long)part_nr_sects_read(part) >> 1 774c83f6bf9SVivek Goyal , disk_name(disk, part->partno, name_buf), 7751ad7e899SStephen Warren part->info ? part->info->uuid : ""); 776074a7acaSTejun Heo if (is_part0) { 777def4e38dSTejun Heo if (disk->driverfs_dev != NULL && 778def4e38dSTejun Heo disk->driverfs_dev->driver != NULL) 779def4e38dSTejun Heo printk(" driver: %s\n", 780def4e38dSTejun Heo disk->driverfs_dev->driver->name); 781def4e38dSTejun Heo else 782def4e38dSTejun Heo printk(" (driver?)\n"); 783074a7acaSTejun Heo } else 784074a7acaSTejun Heo printk("\n"); 785074a7acaSTejun Heo } 786e71bf0d0STejun Heo disk_part_iter_exit(&piter); 787def4e38dSTejun Heo } 788def4e38dSTejun Heo class_dev_iter_exit(&iter); 789dd2a345fSDave Gilbert } 790dd2a345fSDave Gilbert 7911da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 7921da177e4SLinus Torvalds /* iterator */ 793def4e38dSTejun Heo static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) 79468c4d4a7SGreg Kroah-Hartman { 795def4e38dSTejun Heo loff_t skip = *pos; 796def4e38dSTejun Heo struct class_dev_iter *iter; 797def4e38dSTejun Heo struct device *dev; 79868c4d4a7SGreg Kroah-Hartman 799aeb3d3a8SHarvey Harrison iter = kmalloc(sizeof(*iter), GFP_KERNEL); 800def4e38dSTejun Heo if (!iter) 801def4e38dSTejun Heo return ERR_PTR(-ENOMEM); 802def4e38dSTejun Heo 803def4e38dSTejun Heo seqf->private = iter; 804def4e38dSTejun Heo class_dev_iter_init(iter, &block_class, NULL, &disk_type); 805def4e38dSTejun Heo do { 806def4e38dSTejun Heo dev = class_dev_iter_next(iter); 807def4e38dSTejun Heo if (!dev) 808def4e38dSTejun Heo return NULL; 809def4e38dSTejun Heo } while (skip--); 810def4e38dSTejun Heo 811def4e38dSTejun Heo return dev_to_disk(dev); 81268c4d4a7SGreg Kroah-Hartman } 81368c4d4a7SGreg Kroah-Hartman 814def4e38dSTejun Heo static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) 8151da177e4SLinus Torvalds { 816edfaa7c3SKay Sievers struct device *dev; 81766c64afeSGreg Kroah-Hartman 818def4e38dSTejun Heo (*pos)++; 819def4e38dSTejun Heo dev = class_dev_iter_next(seqf->private); 8202ac3cee5STejun Heo if (dev) 821edfaa7c3SKay Sievers return dev_to_disk(dev); 8222ac3cee5STejun Heo 8231da177e4SLinus Torvalds return NULL; 8241da177e4SLinus Torvalds } 8251da177e4SLinus Torvalds 826def4e38dSTejun Heo static void disk_seqf_stop(struct seq_file *seqf, void *v) 82727f30251SGreg Kroah-Hartman { 828def4e38dSTejun Heo struct class_dev_iter *iter = seqf->private; 829def4e38dSTejun Heo 830def4e38dSTejun Heo /* stop is called even after start failed :-( */ 831def4e38dSTejun Heo if (iter) { 832def4e38dSTejun Heo class_dev_iter_exit(iter); 833def4e38dSTejun Heo kfree(iter); 834def4e38dSTejun Heo } 83527f30251SGreg Kroah-Hartman } 83627f30251SGreg Kroah-Hartman 837def4e38dSTejun Heo static void *show_partition_start(struct seq_file *seqf, loff_t *pos) 8381da177e4SLinus Torvalds { 83906768067SJianpeng Ma void *p; 8401da177e4SLinus Torvalds 841def4e38dSTejun Heo p = disk_seqf_start(seqf, pos); 842b9f985b6SYang Zhang if (!IS_ERR_OR_NULL(p) && !*pos) 843def4e38dSTejun Heo seq_puts(seqf, "major minor #blocks name\n\n"); 844def4e38dSTejun Heo return p; 8451da177e4SLinus Torvalds } 8461da177e4SLinus Torvalds 847cf771cb5STejun Heo static int show_partition(struct seq_file *seqf, void *v) 8481da177e4SLinus Torvalds { 8491da177e4SLinus Torvalds struct gendisk *sgp = v; 850e71bf0d0STejun Heo struct disk_part_iter piter; 851e71bf0d0STejun Heo struct hd_struct *part; 8521da177e4SLinus Torvalds char buf[BDEVNAME_SIZE]; 8531da177e4SLinus Torvalds 8541da177e4SLinus Torvalds /* Don't show non-partitionable removeable devices or empty devices */ 855d27769ecSTejun Heo if (!get_capacity(sgp) || (!disk_max_parts(sgp) && 856f331c029STejun Heo (sgp->flags & GENHD_FL_REMOVABLE))) 8571da177e4SLinus Torvalds return 0; 8581da177e4SLinus Torvalds if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 8591da177e4SLinus Torvalds return 0; 8601da177e4SLinus Torvalds 8611da177e4SLinus Torvalds /* show the full disk and all non-0 size partitions of it */ 862074a7acaSTejun Heo disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); 863e71bf0d0STejun Heo while ((part = disk_part_iter_next(&piter))) 8641f014290STejun Heo seq_printf(seqf, "%4d %7d %10llu %s\n", 865f331c029STejun Heo MAJOR(part_devt(part)), MINOR(part_devt(part)), 866c83f6bf9SVivek Goyal (unsigned long long)part_nr_sects_read(part) >> 1, 867f331c029STejun Heo disk_name(sgp, part->partno, buf)); 868e71bf0d0STejun Heo disk_part_iter_exit(&piter); 8691da177e4SLinus Torvalds 8701da177e4SLinus Torvalds return 0; 8711da177e4SLinus Torvalds } 8721da177e4SLinus Torvalds 873f500975aSAlexey Dobriyan static const struct seq_operations partitions_op = { 874def4e38dSTejun Heo .start = show_partition_start, 875def4e38dSTejun Heo .next = disk_seqf_next, 876def4e38dSTejun Heo .stop = disk_seqf_stop, 8771da177e4SLinus Torvalds .show = show_partition 8781da177e4SLinus Torvalds }; 879f500975aSAlexey Dobriyan 880f500975aSAlexey Dobriyan static int partitions_open(struct inode *inode, struct file *file) 881f500975aSAlexey Dobriyan { 882f500975aSAlexey Dobriyan return seq_open(file, &partitions_op); 883f500975aSAlexey Dobriyan } 884f500975aSAlexey Dobriyan 885f500975aSAlexey Dobriyan static const struct file_operations proc_partitions_operations = { 886f500975aSAlexey Dobriyan .open = partitions_open, 887f500975aSAlexey Dobriyan .read = seq_read, 888f500975aSAlexey Dobriyan .llseek = seq_lseek, 889f500975aSAlexey Dobriyan .release = seq_release, 890f500975aSAlexey Dobriyan }; 8911da177e4SLinus Torvalds #endif 8921da177e4SLinus Torvalds 8931da177e4SLinus Torvalds 894cf771cb5STejun Heo static struct kobject *base_probe(dev_t devt, int *partno, void *data) 8951da177e4SLinus Torvalds { 896edfaa7c3SKay Sievers if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 8971da177e4SLinus Torvalds /* Make old-style 2.4 aliases work */ 898edfaa7c3SKay Sievers request_module("block-major-%d", MAJOR(devt)); 8991da177e4SLinus Torvalds return NULL; 9001da177e4SLinus Torvalds } 9011da177e4SLinus Torvalds 9021da177e4SLinus Torvalds static int __init genhd_device_init(void) 9031da177e4SLinus Torvalds { 904e105b8bfSDan Williams int error; 905e105b8bfSDan Williams 906e105b8bfSDan Williams block_class.dev_kobj = sysfs_dev_block_kobj; 907e105b8bfSDan Williams error = class_register(&block_class); 908ee27a558SRoland McGrath if (unlikely(error)) 909ee27a558SRoland McGrath return error; 910edfaa7c3SKay Sievers bdev_map = kobj_map_init(base_probe, &block_class_lock); 9111da177e4SLinus Torvalds blk_dev_init(); 912edfaa7c3SKay Sievers 913561ec68eSZhang, Yanmin register_blkdev(BLOCK_EXT_MAJOR, "blkext"); 914561ec68eSZhang, Yanmin 915edfaa7c3SKay Sievers /* create top-level block dir */ 916e52eec13SAndi Kleen if (!sysfs_deprecated) 917edfaa7c3SKay Sievers block_depr = kobject_create_and_add("block", NULL); 918830d3cfbSGreg Kroah-Hartman return 0; 9191da177e4SLinus Torvalds } 9201da177e4SLinus Torvalds 9211da177e4SLinus Torvalds subsys_initcall(genhd_device_init); 9221da177e4SLinus Torvalds 923edfaa7c3SKay Sievers static ssize_t disk_range_show(struct device *dev, 924edfaa7c3SKay Sievers struct device_attribute *attr, char *buf) 9251da177e4SLinus Torvalds { 926edfaa7c3SKay Sievers struct gendisk *disk = dev_to_disk(dev); 9271da177e4SLinus Torvalds 928edfaa7c3SKay Sievers return sprintf(buf, "%d\n", disk->minors); 9291da177e4SLinus Torvalds } 9301da177e4SLinus Torvalds 9311f014290STejun Heo static ssize_t disk_ext_range_show(struct device *dev, 9321f014290STejun Heo struct device_attribute *attr, char *buf) 9331f014290STejun Heo { 9341f014290STejun Heo struct gendisk *disk = dev_to_disk(dev); 9351f014290STejun Heo 936b5d0b9dfSTejun Heo return sprintf(buf, "%d\n", disk_max_parts(disk)); 9371f014290STejun Heo } 9381f014290STejun Heo 939edfaa7c3SKay Sievers static ssize_t disk_removable_show(struct device *dev, 940edfaa7c3SKay Sievers struct device_attribute *attr, char *buf) 941a7fd6706SKay Sievers { 942edfaa7c3SKay Sievers struct gendisk *disk = dev_to_disk(dev); 943a7fd6706SKay Sievers 944edfaa7c3SKay Sievers return sprintf(buf, "%d\n", 9451da177e4SLinus Torvalds (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 946edfaa7c3SKay Sievers } 9471da177e4SLinus Torvalds 9481c9ce527SKay Sievers static ssize_t disk_ro_show(struct device *dev, 9491c9ce527SKay Sievers struct device_attribute *attr, char *buf) 9501c9ce527SKay Sievers { 9511c9ce527SKay Sievers struct gendisk *disk = dev_to_disk(dev); 9521c9ce527SKay Sievers 953b7db9956STejun Heo return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); 9541c9ce527SKay Sievers } 9551c9ce527SKay Sievers 956edfaa7c3SKay Sievers static ssize_t disk_capability_show(struct device *dev, 957edfaa7c3SKay Sievers struct device_attribute *attr, char *buf) 95886ce18d7SKristen Carlson Accardi { 959edfaa7c3SKay Sievers struct gendisk *disk = dev_to_disk(dev); 960edfaa7c3SKay Sievers 961edfaa7c3SKay Sievers return sprintf(buf, "%x\n", disk->flags); 96286ce18d7SKristen Carlson Accardi } 963edfaa7c3SKay Sievers 964c72758f3SMartin K. Petersen static ssize_t disk_alignment_offset_show(struct device *dev, 965c72758f3SMartin K. Petersen struct device_attribute *attr, 966c72758f3SMartin K. Petersen char *buf) 967c72758f3SMartin K. Petersen { 968c72758f3SMartin K. Petersen struct gendisk *disk = dev_to_disk(dev); 969c72758f3SMartin K. Petersen 970c72758f3SMartin K. Petersen return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); 971c72758f3SMartin K. Petersen } 972c72758f3SMartin K. Petersen 97386b37281SMartin K. Petersen static ssize_t disk_discard_alignment_show(struct device *dev, 97486b37281SMartin K. Petersen struct device_attribute *attr, 97586b37281SMartin K. Petersen char *buf) 97686b37281SMartin K. Petersen { 97786b37281SMartin K. Petersen struct gendisk *disk = dev_to_disk(dev); 97886b37281SMartin K. Petersen 979dd3d145dSMartin K. Petersen return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue)); 98086b37281SMartin K. Petersen } 98186b37281SMartin K. Petersen 982edfaa7c3SKay Sievers static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 9831f014290STejun Heo static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); 984edfaa7c3SKay Sievers static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 9851c9ce527SKay Sievers static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); 986e5610521STejun Heo static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 987c72758f3SMartin K. Petersen static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); 98886b37281SMartin K. Petersen static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show, 98986b37281SMartin K. Petersen NULL); 990edfaa7c3SKay Sievers static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 991074a7acaSTejun Heo static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 992316d315bSNikanth Karthikesan static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 993c17bb495SAkinobu Mita #ifdef CONFIG_FAIL_MAKE_REQUEST 994edfaa7c3SKay Sievers static struct device_attribute dev_attr_fail = 995eddb2e26STejun Heo __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 996c17bb495SAkinobu Mita #endif 997581d4e28SJens Axboe #ifdef CONFIG_FAIL_IO_TIMEOUT 998581d4e28SJens Axboe static struct device_attribute dev_attr_fail_timeout = 999581d4e28SJens Axboe __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show, 1000581d4e28SJens Axboe part_timeout_store); 1001581d4e28SJens Axboe #endif 1002edfaa7c3SKay Sievers 1003edfaa7c3SKay Sievers static struct attribute *disk_attrs[] = { 1004edfaa7c3SKay Sievers &dev_attr_range.attr, 10051f014290STejun Heo &dev_attr_ext_range.attr, 1006edfaa7c3SKay Sievers &dev_attr_removable.attr, 10071c9ce527SKay Sievers &dev_attr_ro.attr, 1008edfaa7c3SKay Sievers &dev_attr_size.attr, 1009c72758f3SMartin K. Petersen &dev_attr_alignment_offset.attr, 101086b37281SMartin K. Petersen &dev_attr_discard_alignment.attr, 1011edfaa7c3SKay Sievers &dev_attr_capability.attr, 1012edfaa7c3SKay Sievers &dev_attr_stat.attr, 1013316d315bSNikanth Karthikesan &dev_attr_inflight.attr, 1014edfaa7c3SKay Sievers #ifdef CONFIG_FAIL_MAKE_REQUEST 1015edfaa7c3SKay Sievers &dev_attr_fail.attr, 1016edfaa7c3SKay Sievers #endif 1017581d4e28SJens Axboe #ifdef CONFIG_FAIL_IO_TIMEOUT 1018581d4e28SJens Axboe &dev_attr_fail_timeout.attr, 1019581d4e28SJens Axboe #endif 1020edfaa7c3SKay Sievers NULL 10211da177e4SLinus Torvalds }; 10221da177e4SLinus Torvalds 1023edfaa7c3SKay Sievers static struct attribute_group disk_attr_group = { 1024edfaa7c3SKay Sievers .attrs = disk_attrs, 1025edfaa7c3SKay Sievers }; 1026edfaa7c3SKay Sievers 1027a4dbd674SDavid Brownell static const struct attribute_group *disk_attr_groups[] = { 1028edfaa7c3SKay Sievers &disk_attr_group, 1029edfaa7c3SKay Sievers NULL 1030edfaa7c3SKay Sievers }; 1031edfaa7c3SKay Sievers 1032540eed56STejun Heo /** 1033540eed56STejun Heo * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way 1034540eed56STejun Heo * @disk: disk to replace part_tbl for 1035540eed56STejun Heo * @new_ptbl: new part_tbl to install 1036540eed56STejun Heo * 1037540eed56STejun Heo * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The 1038540eed56STejun Heo * original ptbl is freed using RCU callback. 1039540eed56STejun Heo * 1040540eed56STejun Heo * LOCKING: 1041540eed56STejun Heo * Matching bd_mutx locked. 1042540eed56STejun Heo */ 1043540eed56STejun Heo static void disk_replace_part_tbl(struct gendisk *disk, 1044540eed56STejun Heo struct disk_part_tbl *new_ptbl) 1045540eed56STejun Heo { 1046540eed56STejun Heo struct disk_part_tbl *old_ptbl = disk->part_tbl; 1047540eed56STejun Heo 1048540eed56STejun Heo rcu_assign_pointer(disk->part_tbl, new_ptbl); 1049a6f23657SJens Axboe 1050a6f23657SJens Axboe if (old_ptbl) { 1051a6f23657SJens Axboe rcu_assign_pointer(old_ptbl->last_lookup, NULL); 105257bdfbf9SLai Jiangshan kfree_rcu(old_ptbl, rcu_head); 1053540eed56STejun Heo } 1054a6f23657SJens Axboe } 1055540eed56STejun Heo 1056540eed56STejun Heo /** 1057540eed56STejun Heo * disk_expand_part_tbl - expand disk->part_tbl 1058540eed56STejun Heo * @disk: disk to expand part_tbl for 1059540eed56STejun Heo * @partno: expand such that this partno can fit in 1060540eed56STejun Heo * 1061540eed56STejun Heo * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl 1062540eed56STejun Heo * uses RCU to allow unlocked dereferencing for stats and other stuff. 1063540eed56STejun Heo * 1064540eed56STejun Heo * LOCKING: 1065540eed56STejun Heo * Matching bd_mutex locked, might sleep. 1066540eed56STejun Heo * 1067540eed56STejun Heo * RETURNS: 1068540eed56STejun Heo * 0 on success, -errno on failure. 1069540eed56STejun Heo */ 1070540eed56STejun Heo int disk_expand_part_tbl(struct gendisk *disk, int partno) 1071540eed56STejun Heo { 1072540eed56STejun Heo struct disk_part_tbl *old_ptbl = disk->part_tbl; 1073540eed56STejun Heo struct disk_part_tbl *new_ptbl; 1074540eed56STejun Heo int len = old_ptbl ? old_ptbl->len : 0; 10755fabcb4cSJens Axboe int i, target; 1076540eed56STejun Heo size_t size; 10775fabcb4cSJens Axboe 10785fabcb4cSJens Axboe /* 10795fabcb4cSJens Axboe * check for int overflow, since we can get here from blkpg_ioctl() 10805fabcb4cSJens Axboe * with a user passed 'partno'. 10815fabcb4cSJens Axboe */ 10825fabcb4cSJens Axboe target = partno + 1; 10835fabcb4cSJens Axboe if (target < 0) 10845fabcb4cSJens Axboe return -EINVAL; 1085540eed56STejun Heo 1086540eed56STejun Heo /* disk_max_parts() is zero during initialization, ignore if so */ 1087540eed56STejun Heo if (disk_max_parts(disk) && target > disk_max_parts(disk)) 1088540eed56STejun Heo return -EINVAL; 1089540eed56STejun Heo 1090540eed56STejun Heo if (target <= len) 1091540eed56STejun Heo return 0; 1092540eed56STejun Heo 1093540eed56STejun Heo size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]); 1094540eed56STejun Heo new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id); 1095540eed56STejun Heo if (!new_ptbl) 1096540eed56STejun Heo return -ENOMEM; 1097540eed56STejun Heo 1098540eed56STejun Heo new_ptbl->len = target; 1099540eed56STejun Heo 1100540eed56STejun Heo for (i = 0; i < len; i++) 1101540eed56STejun Heo rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); 1102540eed56STejun Heo 1103540eed56STejun Heo disk_replace_part_tbl(disk, new_ptbl); 1104540eed56STejun Heo return 0; 1105540eed56STejun Heo } 1106540eed56STejun Heo 1107edfaa7c3SKay Sievers static void disk_release(struct device *dev) 11081da177e4SLinus Torvalds { 1109edfaa7c3SKay Sievers struct gendisk *disk = dev_to_disk(dev); 1110edfaa7c3SKay Sievers 11112da78092SKeith Busch blk_free_devt(dev->devt); 111277ea887eSTejun Heo disk_release_events(disk); 11131da177e4SLinus Torvalds kfree(disk->random); 1114540eed56STejun Heo disk_replace_part_tbl(disk, NULL); 1115b54e5ed8SMing Lei hd_free_part(&disk->part0); 1116523e1d39STejun Heo if (disk->queue) 1117523e1d39STejun Heo blk_put_queue(disk->queue); 11181da177e4SLinus Torvalds kfree(disk); 11191da177e4SLinus Torvalds } 1120edfaa7c3SKay Sievers struct class block_class = { 1121edfaa7c3SKay Sievers .name = "block", 11221da177e4SLinus Torvalds }; 11231da177e4SLinus Torvalds 11243c2670e6SKay Sievers static char *block_devnode(struct device *dev, umode_t *mode, 11254e4098a3SGreg Kroah-Hartman kuid_t *uid, kgid_t *gid) 1126b03f38b6SKay Sievers { 1127b03f38b6SKay Sievers struct gendisk *disk = dev_to_disk(dev); 1128b03f38b6SKay Sievers 1129e454cea2SKay Sievers if (disk->devnode) 1130e454cea2SKay Sievers return disk->devnode(disk, mode); 1131b03f38b6SKay Sievers return NULL; 1132b03f38b6SKay Sievers } 1133b03f38b6SKay Sievers 11341826eadfSAdrian Bunk static struct device_type disk_type = { 1135edfaa7c3SKay Sievers .name = "disk", 1136edfaa7c3SKay Sievers .groups = disk_attr_groups, 1137edfaa7c3SKay Sievers .release = disk_release, 1138e454cea2SKay Sievers .devnode = block_devnode, 11391da177e4SLinus Torvalds }; 11401da177e4SLinus Torvalds 1141a6e2ba88SRandy Dunlap #ifdef CONFIG_PROC_FS 1142cf771cb5STejun Heo /* 1143cf771cb5STejun Heo * aggregate disk stat collector. Uses the same stats that the sysfs 1144cf771cb5STejun Heo * entries do, above, but makes them available through one seq_file. 1145cf771cb5STejun Heo * 1146cf771cb5STejun Heo * The output looks suspiciously like /proc/partitions with a bunch of 1147cf771cb5STejun Heo * extra fields. 1148cf771cb5STejun Heo */ 1149cf771cb5STejun Heo static int diskstats_show(struct seq_file *seqf, void *v) 11501da177e4SLinus Torvalds { 11511da177e4SLinus Torvalds struct gendisk *gp = v; 1152e71bf0d0STejun Heo struct disk_part_iter piter; 1153e71bf0d0STejun Heo struct hd_struct *hd; 11541da177e4SLinus Torvalds char buf[BDEVNAME_SIZE]; 1155c9959059STejun Heo int cpu; 11561da177e4SLinus Torvalds 11571da177e4SLinus Torvalds /* 1158ed9e1982STejun Heo if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) 1159cf771cb5STejun Heo seq_puts(seqf, "major minor name" 11601da177e4SLinus Torvalds " rio rmerge rsect ruse wio wmerge " 11611da177e4SLinus Torvalds "wsect wuse running use aveq" 11621da177e4SLinus Torvalds "\n\n"); 11631da177e4SLinus Torvalds */ 11641da177e4SLinus Torvalds 116571982a40STejun Heo disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); 1166e71bf0d0STejun Heo while ((hd = disk_part_iter_next(&piter))) { 1167074a7acaSTejun Heo cpu = part_stat_lock(); 1168c9959059STejun Heo part_round_stats(cpu, hd); 1169074a7acaSTejun Heo part_stat_unlock(); 1170f95fe9cfSHerbert Poetzl seq_printf(seqf, "%4d %7d %s %lu %lu %lu " 1171f95fe9cfSHerbert Poetzl "%u %lu %lu %lu %u %u %u %u\n", 1172f331c029STejun Heo MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1173f331c029STejun Heo disk_name(gp, hd->partno, buf), 117453f22956SLiu Yuan part_stat_read(hd, ios[READ]), 117553f22956SLiu Yuan part_stat_read(hd, merges[READ]), 1176f95fe9cfSHerbert Poetzl part_stat_read(hd, sectors[READ]), 117753f22956SLiu Yuan jiffies_to_msecs(part_stat_read(hd, ticks[READ])), 117853f22956SLiu Yuan part_stat_read(hd, ios[WRITE]), 117953f22956SLiu Yuan part_stat_read(hd, merges[WRITE]), 1180f95fe9cfSHerbert Poetzl part_stat_read(hd, sectors[WRITE]), 118153f22956SLiu Yuan jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), 1182316d315bSNikanth Karthikesan part_in_flight(hd), 118328f39d55SJerome Marchand jiffies_to_msecs(part_stat_read(hd, io_ticks)), 118428f39d55SJerome Marchand jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 118528f39d55SJerome Marchand ); 11861da177e4SLinus Torvalds } 1187e71bf0d0STejun Heo disk_part_iter_exit(&piter); 11881da177e4SLinus Torvalds 11891da177e4SLinus Torvalds return 0; 11901da177e4SLinus Torvalds } 11911da177e4SLinus Torvalds 119231d85ab2SAlexey Dobriyan static const struct seq_operations diskstats_op = { 1193def4e38dSTejun Heo .start = disk_seqf_start, 1194def4e38dSTejun Heo .next = disk_seqf_next, 1195def4e38dSTejun Heo .stop = disk_seqf_stop, 11961da177e4SLinus Torvalds .show = diskstats_show 11971da177e4SLinus Torvalds }; 1198f500975aSAlexey Dobriyan 119931d85ab2SAlexey Dobriyan static int diskstats_open(struct inode *inode, struct file *file) 120031d85ab2SAlexey Dobriyan { 120131d85ab2SAlexey Dobriyan return seq_open(file, &diskstats_op); 120231d85ab2SAlexey Dobriyan } 120331d85ab2SAlexey Dobriyan 120431d85ab2SAlexey Dobriyan static const struct file_operations proc_diskstats_operations = { 120531d85ab2SAlexey Dobriyan .open = diskstats_open, 120631d85ab2SAlexey Dobriyan .read = seq_read, 120731d85ab2SAlexey Dobriyan .llseek = seq_lseek, 120831d85ab2SAlexey Dobriyan .release = seq_release, 120931d85ab2SAlexey Dobriyan }; 121031d85ab2SAlexey Dobriyan 1211f500975aSAlexey Dobriyan static int __init proc_genhd_init(void) 1212f500975aSAlexey Dobriyan { 121331d85ab2SAlexey Dobriyan proc_create("diskstats", 0, NULL, &proc_diskstats_operations); 1214f500975aSAlexey Dobriyan proc_create("partitions", 0, NULL, &proc_partitions_operations); 1215f500975aSAlexey Dobriyan return 0; 1216f500975aSAlexey Dobriyan } 1217f500975aSAlexey Dobriyan module_init(proc_genhd_init); 1218a6e2ba88SRandy Dunlap #endif /* CONFIG_PROC_FS */ 12191da177e4SLinus Torvalds 1220cf771cb5STejun Heo dev_t blk_lookup_devt(const char *name, int partno) 1221edfaa7c3SKay Sievers { 1222edfaa7c3SKay Sievers dev_t devt = MKDEV(0, 0); 1223def4e38dSTejun Heo struct class_dev_iter iter; 1224def4e38dSTejun Heo struct device *dev; 1225edfaa7c3SKay Sievers 1226def4e38dSTejun Heo class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 1227def4e38dSTejun Heo while ((dev = class_dev_iter_next(&iter))) { 1228def4e38dSTejun Heo struct gendisk *disk = dev_to_disk(dev); 1229548b10ebSTejun Heo struct hd_struct *part; 1230def4e38dSTejun Heo 12313ada8b7eSKay Sievers if (strcmp(dev_name(dev), name)) 1232f331c029STejun Heo continue; 1233f331c029STejun Heo 123441b8c853SNeil Brown if (partno < disk->minors) { 123541b8c853SNeil Brown /* We need to return the right devno, even 123641b8c853SNeil Brown * if the partition doesn't exist yet. 123741b8c853SNeil Brown */ 123841b8c853SNeil Brown devt = MKDEV(MAJOR(dev->devt), 123941b8c853SNeil Brown MINOR(dev->devt) + partno); 124041b8c853SNeil Brown break; 124141b8c853SNeil Brown } 1242e71bf0d0STejun Heo part = disk_get_part(disk, partno); 12432bbedcb4STejun Heo if (part) { 1244f331c029STejun Heo devt = part_devt(part); 1245e71bf0d0STejun Heo disk_put_part(part); 1246f331c029STejun Heo break; 1247def4e38dSTejun Heo } 1248548b10ebSTejun Heo disk_put_part(part); 1249548b10ebSTejun Heo } 1250def4e38dSTejun Heo class_dev_iter_exit(&iter); 1251edfaa7c3SKay Sievers return devt; 1252edfaa7c3SKay Sievers } 1253edfaa7c3SKay Sievers EXPORT_SYMBOL(blk_lookup_devt); 1254edfaa7c3SKay Sievers 12551da177e4SLinus Torvalds struct gendisk *alloc_disk(int minors) 12561da177e4SLinus Torvalds { 1257c304a51bSEzequiel Garcia return alloc_disk_node(minors, NUMA_NO_NODE); 12581946089aSChristoph Lameter } 1259689d6facSTejun Heo EXPORT_SYMBOL(alloc_disk); 12601946089aSChristoph Lameter 12611946089aSChristoph Lameter struct gendisk *alloc_disk_node(int minors, int node_id) 12621946089aSChristoph Lameter { 12631946089aSChristoph Lameter struct gendisk *disk; 12641946089aSChristoph Lameter 1265c1b511ebSJoe Perches disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); 12661da177e4SLinus Torvalds if (disk) { 1267074a7acaSTejun Heo if (!init_part_stats(&disk->part0)) { 12681da177e4SLinus Torvalds kfree(disk); 12691da177e4SLinus Torvalds return NULL; 12701da177e4SLinus Torvalds } 1271bf91db18SCheng Renquan disk->node_id = node_id; 1272540eed56STejun Heo if (disk_expand_part_tbl(disk, 0)) { 1273074a7acaSTejun Heo free_part_stats(&disk->part0); 12741da177e4SLinus Torvalds kfree(disk); 12751da177e4SLinus Torvalds return NULL; 12761da177e4SLinus Torvalds } 1277540eed56STejun Heo disk->part_tbl->part[0] = &disk->part0; 12786c23a968SJens Axboe 1279c83f6bf9SVivek Goyal /* 1280c83f6bf9SVivek Goyal * set_capacity() and get_capacity() currently don't use 1281c83f6bf9SVivek Goyal * seqcounter to read/update the part0->nr_sects. Still init 1282c83f6bf9SVivek Goyal * the counter as we can read the sectors in IO submission 1283c83f6bf9SVivek Goyal * patch using seqence counters. 1284c83f6bf9SVivek Goyal * 1285c83f6bf9SVivek Goyal * TODO: Ideally set_capacity() and get_capacity() should be 1286c83f6bf9SVivek Goyal * converted to make use of bd_mutex and sequence counters. 1287c83f6bf9SVivek Goyal */ 1288c83f6bf9SVivek Goyal seqcount_init(&disk->part0.nr_sects_seq); 12896c71013eSMing Lei if (hd_ref_init(&disk->part0)) { 12906c71013eSMing Lei hd_free_part(&disk->part0); 12916c71013eSMing Lei kfree(disk); 12926c71013eSMing Lei return NULL; 12936c71013eSMing Lei } 1294b5d0b9dfSTejun Heo 12951da177e4SLinus Torvalds disk->minors = minors; 12961da177e4SLinus Torvalds rand_initialize_disk(disk); 1297ed9e1982STejun Heo disk_to_dev(disk)->class = &block_class; 1298ed9e1982STejun Heo disk_to_dev(disk)->type = &disk_type; 1299ed9e1982STejun Heo device_initialize(disk_to_dev(disk)); 13001da177e4SLinus Torvalds } 13011da177e4SLinus Torvalds return disk; 13021da177e4SLinus Torvalds } 13031946089aSChristoph Lameter EXPORT_SYMBOL(alloc_disk_node); 13041da177e4SLinus Torvalds 13051da177e4SLinus Torvalds struct kobject *get_disk(struct gendisk *disk) 13061da177e4SLinus Torvalds { 13071da177e4SLinus Torvalds struct module *owner; 13081da177e4SLinus Torvalds struct kobject *kobj; 13091da177e4SLinus Torvalds 13101da177e4SLinus Torvalds if (!disk->fops) 13111da177e4SLinus Torvalds return NULL; 13121da177e4SLinus Torvalds owner = disk->fops->owner; 13131da177e4SLinus Torvalds if (owner && !try_module_get(owner)) 13141da177e4SLinus Torvalds return NULL; 1315ed9e1982STejun Heo kobj = kobject_get(&disk_to_dev(disk)->kobj); 13161da177e4SLinus Torvalds if (kobj == NULL) { 13171da177e4SLinus Torvalds module_put(owner); 13181da177e4SLinus Torvalds return NULL; 13191da177e4SLinus Torvalds } 13201da177e4SLinus Torvalds return kobj; 13211da177e4SLinus Torvalds 13221da177e4SLinus Torvalds } 13231da177e4SLinus Torvalds 13241da177e4SLinus Torvalds EXPORT_SYMBOL(get_disk); 13251da177e4SLinus Torvalds 13261da177e4SLinus Torvalds void put_disk(struct gendisk *disk) 13271da177e4SLinus Torvalds { 13281da177e4SLinus Torvalds if (disk) 1329ed9e1982STejun Heo kobject_put(&disk_to_dev(disk)->kobj); 13301da177e4SLinus Torvalds } 13311da177e4SLinus Torvalds 13321da177e4SLinus Torvalds EXPORT_SYMBOL(put_disk); 13331da177e4SLinus Torvalds 1334e3264a4dSHannes Reinecke static void set_disk_ro_uevent(struct gendisk *gd, int ro) 1335e3264a4dSHannes Reinecke { 1336e3264a4dSHannes Reinecke char event[] = "DISK_RO=1"; 1337e3264a4dSHannes Reinecke char *envp[] = { event, NULL }; 1338e3264a4dSHannes Reinecke 1339e3264a4dSHannes Reinecke if (!ro) 1340e3264a4dSHannes Reinecke event[8] = '0'; 1341e3264a4dSHannes Reinecke kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1342e3264a4dSHannes Reinecke } 1343e3264a4dSHannes Reinecke 13441da177e4SLinus Torvalds void set_device_ro(struct block_device *bdev, int flag) 13451da177e4SLinus Torvalds { 13461da177e4SLinus Torvalds bdev->bd_part->policy = flag; 13471da177e4SLinus Torvalds } 13481da177e4SLinus Torvalds 13491da177e4SLinus Torvalds EXPORT_SYMBOL(set_device_ro); 13501da177e4SLinus Torvalds 13511da177e4SLinus Torvalds void set_disk_ro(struct gendisk *disk, int flag) 13521da177e4SLinus Torvalds { 1353e71bf0d0STejun Heo struct disk_part_iter piter; 1354e71bf0d0STejun Heo struct hd_struct *part; 1355e71bf0d0STejun Heo 1356e3264a4dSHannes Reinecke if (disk->part0.policy != flag) { 1357e3264a4dSHannes Reinecke set_disk_ro_uevent(disk, flag); 1358e3264a4dSHannes Reinecke disk->part0.policy = flag; 1359e3264a4dSHannes Reinecke } 1360e3264a4dSHannes Reinecke 1361e3264a4dSHannes Reinecke disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 1362e71bf0d0STejun Heo while ((part = disk_part_iter_next(&piter))) 1363e71bf0d0STejun Heo part->policy = flag; 1364e71bf0d0STejun Heo disk_part_iter_exit(&piter); 13651da177e4SLinus Torvalds } 13661da177e4SLinus Torvalds 13671da177e4SLinus Torvalds EXPORT_SYMBOL(set_disk_ro); 13681da177e4SLinus Torvalds 13691da177e4SLinus Torvalds int bdev_read_only(struct block_device *bdev) 13701da177e4SLinus Torvalds { 13711da177e4SLinus Torvalds if (!bdev) 13721da177e4SLinus Torvalds return 0; 13731da177e4SLinus Torvalds return bdev->bd_part->policy; 13741da177e4SLinus Torvalds } 13751da177e4SLinus Torvalds 13761da177e4SLinus Torvalds EXPORT_SYMBOL(bdev_read_only); 13771da177e4SLinus Torvalds 1378cf771cb5STejun Heo int invalidate_partition(struct gendisk *disk, int partno) 13791da177e4SLinus Torvalds { 13801da177e4SLinus Torvalds int res = 0; 1381cf771cb5STejun Heo struct block_device *bdev = bdget_disk(disk, partno); 13821da177e4SLinus Torvalds if (bdev) { 13832ef41634SChristoph Hellwig fsync_bdev(bdev); 138493b270f7SNeilBrown res = __invalidate_device(bdev, true); 13851da177e4SLinus Torvalds bdput(bdev); 13861da177e4SLinus Torvalds } 13871da177e4SLinus Torvalds return res; 13881da177e4SLinus Torvalds } 13891da177e4SLinus Torvalds 13901da177e4SLinus Torvalds EXPORT_SYMBOL(invalidate_partition); 139177ea887eSTejun Heo 139277ea887eSTejun Heo /* 139377ea887eSTejun Heo * Disk events - monitor disk events like media change and eject request. 139477ea887eSTejun Heo */ 139577ea887eSTejun Heo struct disk_events { 139677ea887eSTejun Heo struct list_head node; /* all disk_event's */ 139777ea887eSTejun Heo struct gendisk *disk; /* the associated disk */ 139877ea887eSTejun Heo spinlock_t lock; 139977ea887eSTejun Heo 1400fdd514e1STejun Heo struct mutex block_mutex; /* protects blocking */ 140177ea887eSTejun Heo int block; /* event blocking depth */ 140277ea887eSTejun Heo unsigned int pending; /* events already sent out */ 140377ea887eSTejun Heo unsigned int clearing; /* events being cleared */ 140477ea887eSTejun Heo 140577ea887eSTejun Heo long poll_msecs; /* interval, -1 for default */ 140677ea887eSTejun Heo struct delayed_work dwork; 140777ea887eSTejun Heo }; 140877ea887eSTejun Heo 140977ea887eSTejun Heo static const char *disk_events_strs[] = { 141077ea887eSTejun Heo [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change", 141177ea887eSTejun Heo [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request", 141277ea887eSTejun Heo }; 141377ea887eSTejun Heo 141477ea887eSTejun Heo static char *disk_uevents[] = { 141577ea887eSTejun Heo [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1", 141677ea887eSTejun Heo [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1", 141777ea887eSTejun Heo }; 141877ea887eSTejun Heo 141977ea887eSTejun Heo /* list of all disk_events */ 142077ea887eSTejun Heo static DEFINE_MUTEX(disk_events_mutex); 142177ea887eSTejun Heo static LIST_HEAD(disk_events); 142277ea887eSTejun Heo 142377ea887eSTejun Heo /* disable in-kernel polling by default */ 142477ea887eSTejun Heo static unsigned long disk_events_dfl_poll_msecs = 0; 142577ea887eSTejun Heo 142677ea887eSTejun Heo static unsigned long disk_events_poll_jiffies(struct gendisk *disk) 142777ea887eSTejun Heo { 142877ea887eSTejun Heo struct disk_events *ev = disk->ev; 142977ea887eSTejun Heo long intv_msecs = 0; 143077ea887eSTejun Heo 143177ea887eSTejun Heo /* 143277ea887eSTejun Heo * If device-specific poll interval is set, always use it. If 143377ea887eSTejun Heo * the default is being used, poll iff there are events which 143477ea887eSTejun Heo * can't be monitored asynchronously. 143577ea887eSTejun Heo */ 143677ea887eSTejun Heo if (ev->poll_msecs >= 0) 143777ea887eSTejun Heo intv_msecs = ev->poll_msecs; 143877ea887eSTejun Heo else if (disk->events & ~disk->async_events) 143977ea887eSTejun Heo intv_msecs = disk_events_dfl_poll_msecs; 144077ea887eSTejun Heo 144177ea887eSTejun Heo return msecs_to_jiffies(intv_msecs); 144277ea887eSTejun Heo } 144377ea887eSTejun Heo 1444c3af54afSTejun Heo /** 1445c3af54afSTejun Heo * disk_block_events - block and flush disk event checking 1446c3af54afSTejun Heo * @disk: disk to block events for 1447c3af54afSTejun Heo * 1448c3af54afSTejun Heo * On return from this function, it is guaranteed that event checking 1449c3af54afSTejun Heo * isn't in progress and won't happen until unblocked by 1450c3af54afSTejun Heo * disk_unblock_events(). Events blocking is counted and the actual 1451c3af54afSTejun Heo * unblocking happens after the matching number of unblocks are done. 1452c3af54afSTejun Heo * 1453c3af54afSTejun Heo * Note that this intentionally does not block event checking from 1454c3af54afSTejun Heo * disk_clear_events(). 1455c3af54afSTejun Heo * 1456c3af54afSTejun Heo * CONTEXT: 1457c3af54afSTejun Heo * Might sleep. 1458c3af54afSTejun Heo */ 1459c3af54afSTejun Heo void disk_block_events(struct gendisk *disk) 146077ea887eSTejun Heo { 146177ea887eSTejun Heo struct disk_events *ev = disk->ev; 146277ea887eSTejun Heo unsigned long flags; 146377ea887eSTejun Heo bool cancel; 146477ea887eSTejun Heo 1465c3af54afSTejun Heo if (!ev) 1466c3af54afSTejun Heo return; 1467c3af54afSTejun Heo 1468fdd514e1STejun Heo /* 1469fdd514e1STejun Heo * Outer mutex ensures that the first blocker completes canceling 1470fdd514e1STejun Heo * the event work before further blockers are allowed to finish. 1471fdd514e1STejun Heo */ 1472fdd514e1STejun Heo mutex_lock(&ev->block_mutex); 1473fdd514e1STejun Heo 147477ea887eSTejun Heo spin_lock_irqsave(&ev->lock, flags); 147577ea887eSTejun Heo cancel = !ev->block++; 147677ea887eSTejun Heo spin_unlock_irqrestore(&ev->lock, flags); 147777ea887eSTejun Heo 1478c3af54afSTejun Heo if (cancel) 147977ea887eSTejun Heo cancel_delayed_work_sync(&disk->ev->dwork); 1480fdd514e1STejun Heo 1481fdd514e1STejun Heo mutex_unlock(&ev->block_mutex); 148277ea887eSTejun Heo } 148377ea887eSTejun Heo 148477ea887eSTejun Heo static void __disk_unblock_events(struct gendisk *disk, bool check_now) 148577ea887eSTejun Heo { 148677ea887eSTejun Heo struct disk_events *ev = disk->ev; 148777ea887eSTejun Heo unsigned long intv; 148877ea887eSTejun Heo unsigned long flags; 148977ea887eSTejun Heo 149077ea887eSTejun Heo spin_lock_irqsave(&ev->lock, flags); 149177ea887eSTejun Heo 149277ea887eSTejun Heo if (WARN_ON_ONCE(ev->block <= 0)) 149377ea887eSTejun Heo goto out_unlock; 149477ea887eSTejun Heo 149577ea887eSTejun Heo if (--ev->block) 149677ea887eSTejun Heo goto out_unlock; 149777ea887eSTejun Heo 149877ea887eSTejun Heo /* 149977ea887eSTejun Heo * Not exactly a latency critical operation, set poll timer 150077ea887eSTejun Heo * slack to 25% and kick event check. 150177ea887eSTejun Heo */ 150277ea887eSTejun Heo intv = disk_events_poll_jiffies(disk); 150377ea887eSTejun Heo set_timer_slack(&ev->dwork.timer, intv / 4); 150477ea887eSTejun Heo if (check_now) 1505695588f9SViresh Kumar queue_delayed_work(system_freezable_power_efficient_wq, 1506695588f9SViresh Kumar &ev->dwork, 0); 150777ea887eSTejun Heo else if (intv) 1508695588f9SViresh Kumar queue_delayed_work(system_freezable_power_efficient_wq, 1509695588f9SViresh Kumar &ev->dwork, intv); 151077ea887eSTejun Heo out_unlock: 151177ea887eSTejun Heo spin_unlock_irqrestore(&ev->lock, flags); 151277ea887eSTejun Heo } 151377ea887eSTejun Heo 151477ea887eSTejun Heo /** 151577ea887eSTejun Heo * disk_unblock_events - unblock disk event checking 151677ea887eSTejun Heo * @disk: disk to unblock events for 151777ea887eSTejun Heo * 151877ea887eSTejun Heo * Undo disk_block_events(). When the block count reaches zero, it 151977ea887eSTejun Heo * starts events polling if configured. 152077ea887eSTejun Heo * 152177ea887eSTejun Heo * CONTEXT: 152277ea887eSTejun Heo * Don't care. Safe to call from irq context. 152377ea887eSTejun Heo */ 152477ea887eSTejun Heo void disk_unblock_events(struct gendisk *disk) 152577ea887eSTejun Heo { 152677ea887eSTejun Heo if (disk->ev) 1527facc31ddSTejun Heo __disk_unblock_events(disk, false); 152877ea887eSTejun Heo } 152977ea887eSTejun Heo 153077ea887eSTejun Heo /** 153185ef06d1STejun Heo * disk_flush_events - schedule immediate event checking and flushing 153285ef06d1STejun Heo * @disk: disk to check and flush events for 153385ef06d1STejun Heo * @mask: events to flush 153477ea887eSTejun Heo * 153585ef06d1STejun Heo * Schedule immediate event checking on @disk if not blocked. Events in 153685ef06d1STejun Heo * @mask are scheduled to be cleared from the driver. Note that this 153785ef06d1STejun Heo * doesn't clear the events from @disk->ev. 153877ea887eSTejun Heo * 153977ea887eSTejun Heo * CONTEXT: 154085ef06d1STejun Heo * If @mask is non-zero must be called with bdev->bd_mutex held. 154177ea887eSTejun Heo */ 154285ef06d1STejun Heo void disk_flush_events(struct gendisk *disk, unsigned int mask) 154377ea887eSTejun Heo { 1544a9dce2a3STejun Heo struct disk_events *ev = disk->ev; 1545a9dce2a3STejun Heo 1546a9dce2a3STejun Heo if (!ev) 1547a9dce2a3STejun Heo return; 1548a9dce2a3STejun Heo 154985ef06d1STejun Heo spin_lock_irq(&ev->lock); 155085ef06d1STejun Heo ev->clearing |= mask; 155141f63c53STejun Heo if (!ev->block) 1552695588f9SViresh Kumar mod_delayed_work(system_freezable_power_efficient_wq, 1553695588f9SViresh Kumar &ev->dwork, 0); 155485ef06d1STejun Heo spin_unlock_irq(&ev->lock); 155577ea887eSTejun Heo } 155677ea887eSTejun Heo 155777ea887eSTejun Heo /** 155877ea887eSTejun Heo * disk_clear_events - synchronously check, clear and return pending events 155977ea887eSTejun Heo * @disk: disk to fetch and clear events from 1560da3dae54SMasanari Iida * @mask: mask of events to be fetched and cleared 156177ea887eSTejun Heo * 156277ea887eSTejun Heo * Disk events are synchronously checked and pending events in @mask 156377ea887eSTejun Heo * are cleared and returned. This ignores the block count. 156477ea887eSTejun Heo * 156577ea887eSTejun Heo * CONTEXT: 156677ea887eSTejun Heo * Might sleep. 156777ea887eSTejun Heo */ 156877ea887eSTejun Heo unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) 156977ea887eSTejun Heo { 157077ea887eSTejun Heo const struct block_device_operations *bdops = disk->fops; 157177ea887eSTejun Heo struct disk_events *ev = disk->ev; 157277ea887eSTejun Heo unsigned int pending; 157312c2bdb2SDerek Basehore unsigned int clearing = mask; 157477ea887eSTejun Heo 157577ea887eSTejun Heo if (!ev) { 157677ea887eSTejun Heo /* for drivers still using the old ->media_changed method */ 157777ea887eSTejun Heo if ((mask & DISK_EVENT_MEDIA_CHANGE) && 157877ea887eSTejun Heo bdops->media_changed && bdops->media_changed(disk)) 157977ea887eSTejun Heo return DISK_EVENT_MEDIA_CHANGE; 158077ea887eSTejun Heo return 0; 158177ea887eSTejun Heo } 158277ea887eSTejun Heo 158312c2bdb2SDerek Basehore disk_block_events(disk); 158412c2bdb2SDerek Basehore 158512c2bdb2SDerek Basehore /* 158612c2bdb2SDerek Basehore * store the union of mask and ev->clearing on the stack so that the 158712c2bdb2SDerek Basehore * race with disk_flush_events does not cause ambiguity (ev->clearing 158812c2bdb2SDerek Basehore * can still be modified even if events are blocked). 158912c2bdb2SDerek Basehore */ 159077ea887eSTejun Heo spin_lock_irq(&ev->lock); 159112c2bdb2SDerek Basehore clearing |= ev->clearing; 159212c2bdb2SDerek Basehore ev->clearing = 0; 159377ea887eSTejun Heo spin_unlock_irq(&ev->lock); 159477ea887eSTejun Heo 159512c2bdb2SDerek Basehore disk_check_events(ev, &clearing); 1596aea24a8bSDerek Basehore /* 159712c2bdb2SDerek Basehore * if ev->clearing is not 0, the disk_flush_events got called in the 159812c2bdb2SDerek Basehore * middle of this function, so we want to run the workfn without delay. 1599aea24a8bSDerek Basehore */ 160012c2bdb2SDerek Basehore __disk_unblock_events(disk, ev->clearing ? true : false); 160177ea887eSTejun Heo 160277ea887eSTejun Heo /* then, fetch and clear pending events */ 160377ea887eSTejun Heo spin_lock_irq(&ev->lock); 160477ea887eSTejun Heo pending = ev->pending & mask; 160577ea887eSTejun Heo ev->pending &= ~mask; 160677ea887eSTejun Heo spin_unlock_irq(&ev->lock); 160712c2bdb2SDerek Basehore WARN_ON_ONCE(clearing & mask); 160877ea887eSTejun Heo 160977ea887eSTejun Heo return pending; 161077ea887eSTejun Heo } 161177ea887eSTejun Heo 161212c2bdb2SDerek Basehore /* 161312c2bdb2SDerek Basehore * Separate this part out so that a different pointer for clearing_ptr can be 161412c2bdb2SDerek Basehore * passed in for disk_clear_events. 161512c2bdb2SDerek Basehore */ 161677ea887eSTejun Heo static void disk_events_workfn(struct work_struct *work) 161777ea887eSTejun Heo { 161877ea887eSTejun Heo struct delayed_work *dwork = to_delayed_work(work); 161977ea887eSTejun Heo struct disk_events *ev = container_of(dwork, struct disk_events, dwork); 162012c2bdb2SDerek Basehore 162112c2bdb2SDerek Basehore disk_check_events(ev, &ev->clearing); 162212c2bdb2SDerek Basehore } 162312c2bdb2SDerek Basehore 162412c2bdb2SDerek Basehore static void disk_check_events(struct disk_events *ev, 162512c2bdb2SDerek Basehore unsigned int *clearing_ptr) 162612c2bdb2SDerek Basehore { 162777ea887eSTejun Heo struct gendisk *disk = ev->disk; 162877ea887eSTejun Heo char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; 162912c2bdb2SDerek Basehore unsigned int clearing = *clearing_ptr; 163077ea887eSTejun Heo unsigned int events; 163177ea887eSTejun Heo unsigned long intv; 163277ea887eSTejun Heo int nr_events = 0, i; 163377ea887eSTejun Heo 163477ea887eSTejun Heo /* check events */ 163577ea887eSTejun Heo events = disk->fops->check_events(disk, clearing); 163677ea887eSTejun Heo 163777ea887eSTejun Heo /* accumulate pending events and schedule next poll if necessary */ 163877ea887eSTejun Heo spin_lock_irq(&ev->lock); 163977ea887eSTejun Heo 164077ea887eSTejun Heo events &= ~ev->pending; 164177ea887eSTejun Heo ev->pending |= events; 164212c2bdb2SDerek Basehore *clearing_ptr &= ~clearing; 164377ea887eSTejun Heo 164477ea887eSTejun Heo intv = disk_events_poll_jiffies(disk); 164577ea887eSTejun Heo if (!ev->block && intv) 1646695588f9SViresh Kumar queue_delayed_work(system_freezable_power_efficient_wq, 1647695588f9SViresh Kumar &ev->dwork, intv); 164877ea887eSTejun Heo 164977ea887eSTejun Heo spin_unlock_irq(&ev->lock); 165077ea887eSTejun Heo 16517c88a168STejun Heo /* 16527c88a168STejun Heo * Tell userland about new events. Only the events listed in 16537c88a168STejun Heo * @disk->events are reported. Unlisted events are processed the 16547c88a168STejun Heo * same internally but never get reported to userland. 16557c88a168STejun Heo */ 165677ea887eSTejun Heo for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) 16577c88a168STejun Heo if (events & disk->events & (1 << i)) 165877ea887eSTejun Heo envp[nr_events++] = disk_uevents[i]; 165977ea887eSTejun Heo 166077ea887eSTejun Heo if (nr_events) 166177ea887eSTejun Heo kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); 166277ea887eSTejun Heo } 166377ea887eSTejun Heo 166477ea887eSTejun Heo /* 166577ea887eSTejun Heo * A disk events enabled device has the following sysfs nodes under 166677ea887eSTejun Heo * its /sys/block/X/ directory. 166777ea887eSTejun Heo * 166877ea887eSTejun Heo * events : list of all supported events 166977ea887eSTejun Heo * events_async : list of events which can be detected w/o polling 167077ea887eSTejun Heo * events_poll_msecs : polling interval, 0: disable, -1: system default 167177ea887eSTejun Heo */ 167277ea887eSTejun Heo static ssize_t __disk_events_show(unsigned int events, char *buf) 167377ea887eSTejun Heo { 167477ea887eSTejun Heo const char *delim = ""; 167577ea887eSTejun Heo ssize_t pos = 0; 167677ea887eSTejun Heo int i; 167777ea887eSTejun Heo 167877ea887eSTejun Heo for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) 167977ea887eSTejun Heo if (events & (1 << i)) { 168077ea887eSTejun Heo pos += sprintf(buf + pos, "%s%s", 168177ea887eSTejun Heo delim, disk_events_strs[i]); 168277ea887eSTejun Heo delim = " "; 168377ea887eSTejun Heo } 168477ea887eSTejun Heo if (pos) 168577ea887eSTejun Heo pos += sprintf(buf + pos, "\n"); 168677ea887eSTejun Heo return pos; 168777ea887eSTejun Heo } 168877ea887eSTejun Heo 168977ea887eSTejun Heo static ssize_t disk_events_show(struct device *dev, 169077ea887eSTejun Heo struct device_attribute *attr, char *buf) 169177ea887eSTejun Heo { 169277ea887eSTejun Heo struct gendisk *disk = dev_to_disk(dev); 169377ea887eSTejun Heo 169477ea887eSTejun Heo return __disk_events_show(disk->events, buf); 169577ea887eSTejun Heo } 169677ea887eSTejun Heo 169777ea887eSTejun Heo static ssize_t disk_events_async_show(struct device *dev, 169877ea887eSTejun Heo struct device_attribute *attr, char *buf) 169977ea887eSTejun Heo { 170077ea887eSTejun Heo struct gendisk *disk = dev_to_disk(dev); 170177ea887eSTejun Heo 170277ea887eSTejun Heo return __disk_events_show(disk->async_events, buf); 170377ea887eSTejun Heo } 170477ea887eSTejun Heo 170577ea887eSTejun Heo static ssize_t disk_events_poll_msecs_show(struct device *dev, 170677ea887eSTejun Heo struct device_attribute *attr, 170777ea887eSTejun Heo char *buf) 170877ea887eSTejun Heo { 170977ea887eSTejun Heo struct gendisk *disk = dev_to_disk(dev); 171077ea887eSTejun Heo 171177ea887eSTejun Heo return sprintf(buf, "%ld\n", disk->ev->poll_msecs); 171277ea887eSTejun Heo } 171377ea887eSTejun Heo 171477ea887eSTejun Heo static ssize_t disk_events_poll_msecs_store(struct device *dev, 171577ea887eSTejun Heo struct device_attribute *attr, 171677ea887eSTejun Heo const char *buf, size_t count) 171777ea887eSTejun Heo { 171877ea887eSTejun Heo struct gendisk *disk = dev_to_disk(dev); 171977ea887eSTejun Heo long intv; 172077ea887eSTejun Heo 172177ea887eSTejun Heo if (!count || !sscanf(buf, "%ld", &intv)) 172277ea887eSTejun Heo return -EINVAL; 172377ea887eSTejun Heo 172477ea887eSTejun Heo if (intv < 0 && intv != -1) 172577ea887eSTejun Heo return -EINVAL; 172677ea887eSTejun Heo 1727c3af54afSTejun Heo disk_block_events(disk); 172877ea887eSTejun Heo disk->ev->poll_msecs = intv; 172977ea887eSTejun Heo __disk_unblock_events(disk, true); 173077ea887eSTejun Heo 173177ea887eSTejun Heo return count; 173277ea887eSTejun Heo } 173377ea887eSTejun Heo 173477ea887eSTejun Heo static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL); 173577ea887eSTejun Heo static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL); 173677ea887eSTejun Heo static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR, 173777ea887eSTejun Heo disk_events_poll_msecs_show, 173877ea887eSTejun Heo disk_events_poll_msecs_store); 173977ea887eSTejun Heo 174077ea887eSTejun Heo static const struct attribute *disk_events_attrs[] = { 174177ea887eSTejun Heo &dev_attr_events.attr, 174277ea887eSTejun Heo &dev_attr_events_async.attr, 174377ea887eSTejun Heo &dev_attr_events_poll_msecs.attr, 174477ea887eSTejun Heo NULL, 174577ea887eSTejun Heo }; 174677ea887eSTejun Heo 174777ea887eSTejun Heo /* 174877ea887eSTejun Heo * The default polling interval can be specified by the kernel 174977ea887eSTejun Heo * parameter block.events_dfl_poll_msecs which defaults to 0 175077ea887eSTejun Heo * (disable). This can also be modified runtime by writing to 175177ea887eSTejun Heo * /sys/module/block/events_dfl_poll_msecs. 175277ea887eSTejun Heo */ 175377ea887eSTejun Heo static int disk_events_set_dfl_poll_msecs(const char *val, 175477ea887eSTejun Heo const struct kernel_param *kp) 175577ea887eSTejun Heo { 175677ea887eSTejun Heo struct disk_events *ev; 175777ea887eSTejun Heo int ret; 175877ea887eSTejun Heo 175977ea887eSTejun Heo ret = param_set_ulong(val, kp); 176077ea887eSTejun Heo if (ret < 0) 176177ea887eSTejun Heo return ret; 176277ea887eSTejun Heo 176377ea887eSTejun Heo mutex_lock(&disk_events_mutex); 176477ea887eSTejun Heo 176577ea887eSTejun Heo list_for_each_entry(ev, &disk_events, node) 176685ef06d1STejun Heo disk_flush_events(ev->disk, 0); 176777ea887eSTejun Heo 176877ea887eSTejun Heo mutex_unlock(&disk_events_mutex); 176977ea887eSTejun Heo 177077ea887eSTejun Heo return 0; 177177ea887eSTejun Heo } 177277ea887eSTejun Heo 177377ea887eSTejun Heo static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { 177477ea887eSTejun Heo .set = disk_events_set_dfl_poll_msecs, 177577ea887eSTejun Heo .get = param_get_ulong, 177677ea887eSTejun Heo }; 177777ea887eSTejun Heo 177877ea887eSTejun Heo #undef MODULE_PARAM_PREFIX 177977ea887eSTejun Heo #define MODULE_PARAM_PREFIX "block." 178077ea887eSTejun Heo 178177ea887eSTejun Heo module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, 178277ea887eSTejun Heo &disk_events_dfl_poll_msecs, 0644); 178377ea887eSTejun Heo 178477ea887eSTejun Heo /* 17859f53d2feSStanislaw Gruszka * disk_{alloc|add|del|release}_events - initialize and destroy disk_events. 178677ea887eSTejun Heo */ 17879f53d2feSStanislaw Gruszka static void disk_alloc_events(struct gendisk *disk) 178877ea887eSTejun Heo { 178977ea887eSTejun Heo struct disk_events *ev; 179077ea887eSTejun Heo 179175e3f3eeSTejun Heo if (!disk->fops->check_events) 179277ea887eSTejun Heo return; 179377ea887eSTejun Heo 179477ea887eSTejun Heo ev = kzalloc(sizeof(*ev), GFP_KERNEL); 179577ea887eSTejun Heo if (!ev) { 179677ea887eSTejun Heo pr_warn("%s: failed to initialize events\n", disk->disk_name); 179777ea887eSTejun Heo return; 179877ea887eSTejun Heo } 179977ea887eSTejun Heo 180077ea887eSTejun Heo INIT_LIST_HEAD(&ev->node); 180177ea887eSTejun Heo ev->disk = disk; 180277ea887eSTejun Heo spin_lock_init(&ev->lock); 1803fdd514e1STejun Heo mutex_init(&ev->block_mutex); 180477ea887eSTejun Heo ev->block = 1; 180577ea887eSTejun Heo ev->poll_msecs = -1; 180677ea887eSTejun Heo INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); 180777ea887eSTejun Heo 18089f53d2feSStanislaw Gruszka disk->ev = ev; 18099f53d2feSStanislaw Gruszka } 18109f53d2feSStanislaw Gruszka 18119f53d2feSStanislaw Gruszka static void disk_add_events(struct gendisk *disk) 18129f53d2feSStanislaw Gruszka { 18139f53d2feSStanislaw Gruszka if (!disk->ev) 18149f53d2feSStanislaw Gruszka return; 18159f53d2feSStanislaw Gruszka 18169f53d2feSStanislaw Gruszka /* FIXME: error handling */ 18179f53d2feSStanislaw Gruszka if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0) 18189f53d2feSStanislaw Gruszka pr_warn("%s: failed to create sysfs files for events\n", 18199f53d2feSStanislaw Gruszka disk->disk_name); 18209f53d2feSStanislaw Gruszka 182177ea887eSTejun Heo mutex_lock(&disk_events_mutex); 18229f53d2feSStanislaw Gruszka list_add_tail(&disk->ev->node, &disk_events); 182377ea887eSTejun Heo mutex_unlock(&disk_events_mutex); 182477ea887eSTejun Heo 182577ea887eSTejun Heo /* 182677ea887eSTejun Heo * Block count is initialized to 1 and the following initial 182777ea887eSTejun Heo * unblock kicks it into action. 182877ea887eSTejun Heo */ 182977ea887eSTejun Heo __disk_unblock_events(disk, true); 183077ea887eSTejun Heo } 183177ea887eSTejun Heo 183277ea887eSTejun Heo static void disk_del_events(struct gendisk *disk) 183377ea887eSTejun Heo { 183477ea887eSTejun Heo if (!disk->ev) 183577ea887eSTejun Heo return; 183677ea887eSTejun Heo 1837c3af54afSTejun Heo disk_block_events(disk); 183877ea887eSTejun Heo 183977ea887eSTejun Heo mutex_lock(&disk_events_mutex); 184077ea887eSTejun Heo list_del_init(&disk->ev->node); 184177ea887eSTejun Heo mutex_unlock(&disk_events_mutex); 184277ea887eSTejun Heo 184377ea887eSTejun Heo sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); 184477ea887eSTejun Heo } 184577ea887eSTejun Heo 184677ea887eSTejun Heo static void disk_release_events(struct gendisk *disk) 184777ea887eSTejun Heo { 184877ea887eSTejun Heo /* the block count should be 1 from disk_del_events() */ 184977ea887eSTejun Heo WARN_ON_ONCE(disk->ev && disk->ev->block != 1); 185077ea887eSTejun Heo kfree(disk->ev); 185177ea887eSTejun Heo } 1852