11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * gendisk handling 31da177e4SLinus Torvalds */ 41da177e4SLinus Torvalds 51da177e4SLinus Torvalds #include <linux/module.h> 61da177e4SLinus Torvalds #include <linux/fs.h> 71da177e4SLinus Torvalds #include <linux/genhd.h> 8b446b60eSAndrew Morton #include <linux/kdev_t.h> 91da177e4SLinus Torvalds #include <linux/kernel.h> 101da177e4SLinus Torvalds #include <linux/blkdev.h> 111da177e4SLinus Torvalds #include <linux/init.h> 121da177e4SLinus Torvalds #include <linux/spinlock.h> 13f500975aSAlexey Dobriyan #include <linux/proc_fs.h> 141da177e4SLinus Torvalds #include <linux/seq_file.h> 151da177e4SLinus Torvalds #include <linux/slab.h> 161da177e4SLinus Torvalds #include <linux/kmod.h> 171da177e4SLinus Torvalds #include <linux/kobj_map.h> 1858383af6SJes Sorensen #include <linux/mutex.h> 19bcce3de1STejun Heo #include <linux/idr.h> 2077ea887eSTejun Heo #include <linux/log2.h> 2125e823c8SMing Lei #include <linux/pm_runtime.h> 221da177e4SLinus Torvalds 23ff88972cSAdrian Bunk #include "blk.h" 24ff88972cSAdrian Bunk 25edfaa7c3SKay Sievers static DEFINE_MUTEX(block_class_lock); 26edfaa7c3SKay Sievers struct kobject *block_depr; 271da177e4SLinus Torvalds 28bcce3de1STejun Heo /* for extended dynamic devt allocation, currently only one major is used */ 29ce23bba8STejun Heo #define NR_EXT_DEVT (1 << MINORBITS) 30bcce3de1STejun Heo 31bcce3de1STejun Heo /* For extended devt allocation. ext_devt_mutex prevents look up 32bcce3de1STejun Heo * results from going away underneath its user. 33bcce3de1STejun Heo */ 34bcce3de1STejun Heo static DEFINE_MUTEX(ext_devt_mutex); 35bcce3de1STejun Heo static DEFINE_IDR(ext_devt_idr); 36bcce3de1STejun Heo 371826eadfSAdrian Bunk static struct device_type disk_type; 381826eadfSAdrian Bunk 3912c2bdb2SDerek Basehore static void disk_check_events(struct disk_events *ev, 4012c2bdb2SDerek Basehore unsigned int *clearing_ptr); 419f53d2feSStanislaw Gruszka static void disk_alloc_events(struct gendisk *disk); 4277ea887eSTejun Heo static void disk_add_events(struct gendisk *disk); 4377ea887eSTejun Heo static void disk_del_events(struct gendisk *disk); 4477ea887eSTejun Heo static void disk_release_events(struct gendisk *disk); 4577ea887eSTejun Heo 46e71bf0d0STejun Heo /** 47e71bf0d0STejun Heo * disk_get_part - get partition 48e71bf0d0STejun Heo * @disk: disk to look partition from 49e71bf0d0STejun Heo * @partno: partition number 50e71bf0d0STejun Heo * 51e71bf0d0STejun Heo * Look for partition @partno from @disk. If found, increment 52e71bf0d0STejun Heo * reference count and return it. 53e71bf0d0STejun Heo * 54e71bf0d0STejun Heo * CONTEXT: 55e71bf0d0STejun Heo * Don't care. 56e71bf0d0STejun Heo * 57e71bf0d0STejun Heo * RETURNS: 58e71bf0d0STejun Heo * Pointer to the found partition on success, NULL if not found. 59e71bf0d0STejun Heo */ 60e71bf0d0STejun Heo struct hd_struct *disk_get_part(struct gendisk *disk, int partno) 61e71bf0d0STejun Heo { 62540eed56STejun Heo struct hd_struct *part = NULL; 63540eed56STejun Heo struct disk_part_tbl *ptbl; 64e71bf0d0STejun Heo 65540eed56STejun Heo if (unlikely(partno < 0)) 66e71bf0d0STejun Heo return NULL; 67540eed56STejun Heo 68e71bf0d0STejun Heo rcu_read_lock(); 69540eed56STejun Heo 70540eed56STejun Heo ptbl = rcu_dereference(disk->part_tbl); 71540eed56STejun Heo if (likely(partno < ptbl->len)) { 72540eed56STejun Heo part = rcu_dereference(ptbl->part[partno]); 73e71bf0d0STejun Heo if (part) 74ed9e1982STejun Heo get_device(part_to_dev(part)); 75540eed56STejun Heo } 76540eed56STejun Heo 77e71bf0d0STejun Heo rcu_read_unlock(); 78e71bf0d0STejun Heo 79e71bf0d0STejun Heo return part; 80e71bf0d0STejun Heo } 81e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_get_part); 82e71bf0d0STejun Heo 83e71bf0d0STejun Heo /** 84e71bf0d0STejun Heo * disk_part_iter_init - initialize partition iterator 85e71bf0d0STejun Heo * @piter: iterator to initialize 86e71bf0d0STejun Heo * @disk: disk to iterate over 87e71bf0d0STejun Heo * @flags: DISK_PITER_* flags 88e71bf0d0STejun Heo * 89e71bf0d0STejun Heo * Initialize @piter so that it iterates over partitions of @disk. 90e71bf0d0STejun Heo * 91e71bf0d0STejun Heo * CONTEXT: 92e71bf0d0STejun Heo * Don't care. 93e71bf0d0STejun Heo */ 94e71bf0d0STejun Heo void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, 95e71bf0d0STejun Heo unsigned int flags) 96e71bf0d0STejun Heo { 97540eed56STejun Heo struct disk_part_tbl *ptbl; 98540eed56STejun Heo 99540eed56STejun Heo rcu_read_lock(); 100540eed56STejun Heo ptbl = rcu_dereference(disk->part_tbl); 101540eed56STejun Heo 102e71bf0d0STejun Heo piter->disk = disk; 103e71bf0d0STejun Heo piter->part = NULL; 104e71bf0d0STejun Heo 105e71bf0d0STejun Heo if (flags & DISK_PITER_REVERSE) 106540eed56STejun Heo piter->idx = ptbl->len - 1; 10771982a40STejun Heo else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0)) 108e71bf0d0STejun Heo piter->idx = 0; 109b5d0b9dfSTejun Heo else 110b5d0b9dfSTejun Heo piter->idx = 1; 111e71bf0d0STejun Heo 112e71bf0d0STejun Heo piter->flags = flags; 113540eed56STejun Heo 114540eed56STejun Heo rcu_read_unlock(); 115e71bf0d0STejun Heo } 116e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_part_iter_init); 117e71bf0d0STejun Heo 118e71bf0d0STejun Heo /** 119e71bf0d0STejun Heo * disk_part_iter_next - proceed iterator to the next partition and return it 120e71bf0d0STejun Heo * @piter: iterator of interest 121e71bf0d0STejun Heo * 122e71bf0d0STejun Heo * Proceed @piter to the next partition and return it. 123e71bf0d0STejun Heo * 124e71bf0d0STejun Heo * CONTEXT: 125e71bf0d0STejun Heo * Don't care. 126e71bf0d0STejun Heo */ 127e71bf0d0STejun Heo struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) 128e71bf0d0STejun Heo { 129540eed56STejun Heo struct disk_part_tbl *ptbl; 130e71bf0d0STejun Heo int inc, end; 131e71bf0d0STejun Heo 132e71bf0d0STejun Heo /* put the last partition */ 133e71bf0d0STejun Heo disk_put_part(piter->part); 134e71bf0d0STejun Heo piter->part = NULL; 135e71bf0d0STejun Heo 136540eed56STejun Heo /* get part_tbl */ 137e71bf0d0STejun Heo rcu_read_lock(); 138540eed56STejun Heo ptbl = rcu_dereference(piter->disk->part_tbl); 139e71bf0d0STejun Heo 140e71bf0d0STejun Heo /* determine iteration parameters */ 141e71bf0d0STejun Heo if (piter->flags & DISK_PITER_REVERSE) { 142e71bf0d0STejun Heo inc = -1; 14371982a40STejun Heo if (piter->flags & (DISK_PITER_INCL_PART0 | 14471982a40STejun Heo DISK_PITER_INCL_EMPTY_PART0)) 145e71bf0d0STejun Heo end = -1; 146b5d0b9dfSTejun Heo else 147b5d0b9dfSTejun Heo end = 0; 148e71bf0d0STejun Heo } else { 149e71bf0d0STejun Heo inc = 1; 150540eed56STejun Heo end = ptbl->len; 151e71bf0d0STejun Heo } 152e71bf0d0STejun Heo 153e71bf0d0STejun Heo /* iterate to the next partition */ 154e71bf0d0STejun Heo for (; piter->idx != end; piter->idx += inc) { 155e71bf0d0STejun Heo struct hd_struct *part; 156e71bf0d0STejun Heo 157540eed56STejun Heo part = rcu_dereference(ptbl->part[piter->idx]); 158e71bf0d0STejun Heo if (!part) 159e71bf0d0STejun Heo continue; 160c83f6bf9SVivek Goyal if (!part_nr_sects_read(part) && 16171982a40STejun Heo !(piter->flags & DISK_PITER_INCL_EMPTY) && 16271982a40STejun Heo !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && 16371982a40STejun Heo piter->idx == 0)) 164e71bf0d0STejun Heo continue; 165e71bf0d0STejun Heo 166ed9e1982STejun Heo get_device(part_to_dev(part)); 167e71bf0d0STejun Heo piter->part = part; 168e71bf0d0STejun Heo piter->idx += inc; 169e71bf0d0STejun Heo break; 170e71bf0d0STejun Heo } 171e71bf0d0STejun Heo 172e71bf0d0STejun Heo rcu_read_unlock(); 173e71bf0d0STejun Heo 174e71bf0d0STejun Heo return piter->part; 175e71bf0d0STejun Heo } 176e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_part_iter_next); 177e71bf0d0STejun Heo 178e71bf0d0STejun Heo /** 179e71bf0d0STejun Heo * disk_part_iter_exit - finish up partition iteration 180e71bf0d0STejun Heo * @piter: iter of interest 181e71bf0d0STejun Heo * 182e71bf0d0STejun Heo * Called when iteration is over. Cleans up @piter. 183e71bf0d0STejun Heo * 184e71bf0d0STejun Heo * CONTEXT: 185e71bf0d0STejun Heo * Don't care. 186e71bf0d0STejun Heo */ 187e71bf0d0STejun Heo void disk_part_iter_exit(struct disk_part_iter *piter) 188e71bf0d0STejun Heo { 189e71bf0d0STejun Heo disk_put_part(piter->part); 190e71bf0d0STejun Heo piter->part = NULL; 191e71bf0d0STejun Heo } 192e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_part_iter_exit); 193e71bf0d0STejun Heo 194a6f23657SJens Axboe static inline int sector_in_part(struct hd_struct *part, sector_t sector) 195a6f23657SJens Axboe { 196a6f23657SJens Axboe return part->start_sect <= sector && 197c83f6bf9SVivek Goyal sector < part->start_sect + part_nr_sects_read(part); 198a6f23657SJens Axboe } 199a6f23657SJens Axboe 200e71bf0d0STejun Heo /** 201e71bf0d0STejun Heo * disk_map_sector_rcu - map sector to partition 202e71bf0d0STejun Heo * @disk: gendisk of interest 203e71bf0d0STejun Heo * @sector: sector to map 204e71bf0d0STejun Heo * 205e71bf0d0STejun Heo * Find out which partition @sector maps to on @disk. This is 206e71bf0d0STejun Heo * primarily used for stats accounting. 207e71bf0d0STejun Heo * 208e71bf0d0STejun Heo * CONTEXT: 209e71bf0d0STejun Heo * RCU read locked. The returned partition pointer is valid only 210e71bf0d0STejun Heo * while preemption is disabled. 211e71bf0d0STejun Heo * 212e71bf0d0STejun Heo * RETURNS: 213074a7acaSTejun Heo * Found partition on success, part0 is returned if no partition matches 214e71bf0d0STejun Heo */ 215e71bf0d0STejun Heo struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 216e71bf0d0STejun Heo { 217540eed56STejun Heo struct disk_part_tbl *ptbl; 218a6f23657SJens Axboe struct hd_struct *part; 219e71bf0d0STejun Heo int i; 220e71bf0d0STejun Heo 221540eed56STejun Heo ptbl = rcu_dereference(disk->part_tbl); 222540eed56STejun Heo 223a6f23657SJens Axboe part = rcu_dereference(ptbl->last_lookup); 224a6f23657SJens Axboe if (part && sector_in_part(part, sector)) 225e71bf0d0STejun Heo return part; 226a6f23657SJens Axboe 227a6f23657SJens Axboe for (i = 1; i < ptbl->len; i++) { 228a6f23657SJens Axboe part = rcu_dereference(ptbl->part[i]); 229a6f23657SJens Axboe 230a6f23657SJens Axboe if (part && sector_in_part(part, sector)) { 231a6f23657SJens Axboe rcu_assign_pointer(ptbl->last_lookup, part); 232a6f23657SJens Axboe return part; 233a6f23657SJens Axboe } 234e71bf0d0STejun Heo } 235074a7acaSTejun Heo return &disk->part0; 236e71bf0d0STejun Heo } 237e71bf0d0STejun Heo EXPORT_SYMBOL_GPL(disk_map_sector_rcu); 238e71bf0d0STejun Heo 2391da177e4SLinus Torvalds /* 2401da177e4SLinus Torvalds * Can be deleted altogether. Later. 2411da177e4SLinus Torvalds * 2421da177e4SLinus Torvalds */ 2431da177e4SLinus Torvalds static struct blk_major_name { 2441da177e4SLinus Torvalds struct blk_major_name *next; 2451da177e4SLinus Torvalds int major; 2461da177e4SLinus Torvalds char name[16]; 24768eef3b4SJoe Korty } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds /* index in the above - for now: assume no multimajor ranges */ 250e61eb2e9SYang Zhang static inline int major_to_index(unsigned major) 2511da177e4SLinus Torvalds { 25268eef3b4SJoe Korty return major % BLKDEV_MAJOR_HASH_SIZE; 2531da177e4SLinus Torvalds } 2541da177e4SLinus Torvalds 25568eef3b4SJoe Korty #ifdef CONFIG_PROC_FS 256cf771cb5STejun Heo void blkdev_show(struct seq_file *seqf, off_t offset) 2577170be5fSNeil Horman { 25868eef3b4SJoe Korty struct blk_major_name *dp; 2597170be5fSNeil Horman 26068eef3b4SJoe Korty if (offset < BLKDEV_MAJOR_HASH_SIZE) { 261edfaa7c3SKay Sievers mutex_lock(&block_class_lock); 26268eef3b4SJoe Korty for (dp = major_names[offset]; dp; dp = dp->next) 263cf771cb5STejun Heo seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 264edfaa7c3SKay Sievers mutex_unlock(&block_class_lock); 26568eef3b4SJoe Korty } 2667170be5fSNeil Horman } 26768eef3b4SJoe Korty #endif /* CONFIG_PROC_FS */ 2681da177e4SLinus Torvalds 2699e8c0bccSMárton Németh /** 2709e8c0bccSMárton Németh * register_blkdev - register a new block device 2719e8c0bccSMárton Németh * 2729e8c0bccSMárton Németh * @major: the requested major device number [1..255]. If @major=0, try to 2739e8c0bccSMárton Németh * allocate any unused major number. 2749e8c0bccSMárton Németh * @name: the name of the new block device as a zero terminated string 2759e8c0bccSMárton Németh * 2769e8c0bccSMárton Németh * The @name must be unique within the system. 2779e8c0bccSMárton Németh * 2789e8c0bccSMárton Németh * The return value depends on the @major input parameter. 2799e8c0bccSMárton Németh * - if a major device number was requested in range [1..255] then the 2809e8c0bccSMárton Németh * function returns zero on success, or a negative error code 2819e8c0bccSMárton Németh * - if any unused major number was requested with @major=0 parameter 2829e8c0bccSMárton Németh * then the return value is the allocated major number in range 2839e8c0bccSMárton Németh * [1..255] or a negative error code otherwise 2849e8c0bccSMárton Németh */ 2851da177e4SLinus Torvalds int register_blkdev(unsigned int major, const char *name) 2861da177e4SLinus Torvalds { 2871da177e4SLinus Torvalds struct blk_major_name **n, *p; 2881da177e4SLinus Torvalds int index, ret = 0; 2891da177e4SLinus Torvalds 290edfaa7c3SKay Sievers mutex_lock(&block_class_lock); 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds /* temporary */ 2931da177e4SLinus Torvalds if (major == 0) { 2941da177e4SLinus Torvalds for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 2951da177e4SLinus Torvalds if (major_names[index] == NULL) 2961da177e4SLinus Torvalds break; 2971da177e4SLinus Torvalds } 2981da177e4SLinus Torvalds 2991da177e4SLinus Torvalds if (index == 0) { 3001da177e4SLinus Torvalds printk("register_blkdev: failed to get major for %s\n", 3011da177e4SLinus Torvalds name); 3021da177e4SLinus Torvalds ret = -EBUSY; 3031da177e4SLinus Torvalds goto out; 3041da177e4SLinus Torvalds } 3051da177e4SLinus Torvalds major = index; 3061da177e4SLinus Torvalds ret = major; 3071da177e4SLinus Torvalds } 3081da177e4SLinus Torvalds 3091da177e4SLinus Torvalds p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); 3101da177e4SLinus Torvalds if (p == NULL) { 3111da177e4SLinus Torvalds ret = -ENOMEM; 3121da177e4SLinus Torvalds goto out; 3131da177e4SLinus Torvalds } 3141da177e4SLinus Torvalds 3151da177e4SLinus Torvalds p->major = major; 3161da177e4SLinus Torvalds strlcpy(p->name, name, sizeof(p->name)); 3171da177e4SLinus Torvalds p->next = NULL; 3181da177e4SLinus Torvalds index = major_to_index(major); 3191da177e4SLinus Torvalds 3201da177e4SLinus Torvalds for (n = &major_names[index]; *n; n = &(*n)->next) { 3211da177e4SLinus Torvalds if ((*n)->major == major) 3221da177e4SLinus Torvalds break; 3231da177e4SLinus Torvalds } 3241da177e4SLinus Torvalds if (!*n) 3251da177e4SLinus Torvalds *n = p; 3261da177e4SLinus Torvalds else 3271da177e4SLinus Torvalds ret = -EBUSY; 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds if (ret < 0) { 3301da177e4SLinus Torvalds printk("register_blkdev: cannot get major %d for %s\n", 3311da177e4SLinus Torvalds major, name); 3321da177e4SLinus Torvalds kfree(p); 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds out: 335edfaa7c3SKay Sievers mutex_unlock(&block_class_lock); 3361da177e4SLinus Torvalds return ret; 3371da177e4SLinus Torvalds } 3381da177e4SLinus Torvalds 3391da177e4SLinus Torvalds EXPORT_SYMBOL(register_blkdev); 3401da177e4SLinus Torvalds 341f4480240SAkinobu Mita void unregister_blkdev(unsigned int major, const char *name) 3421da177e4SLinus Torvalds { 3431da177e4SLinus Torvalds struct blk_major_name **n; 3441da177e4SLinus Torvalds struct blk_major_name *p = NULL; 3451da177e4SLinus Torvalds int index = major_to_index(major); 3461da177e4SLinus Torvalds 347edfaa7c3SKay Sievers mutex_lock(&block_class_lock); 3481da177e4SLinus Torvalds for (n = &major_names[index]; *n; n = &(*n)->next) 3491da177e4SLinus Torvalds if ((*n)->major == major) 3501da177e4SLinus Torvalds break; 351294462a5SAkinobu Mita if (!*n || strcmp((*n)->name, name)) { 352294462a5SAkinobu Mita WARN_ON(1); 353294462a5SAkinobu Mita } else { 3541da177e4SLinus Torvalds p = *n; 3551da177e4SLinus Torvalds *n = p->next; 3561da177e4SLinus Torvalds } 357edfaa7c3SKay Sievers mutex_unlock(&block_class_lock); 3581da177e4SLinus Torvalds kfree(p); 3591da177e4SLinus Torvalds } 3601da177e4SLinus Torvalds 3611da177e4SLinus Torvalds EXPORT_SYMBOL(unregister_blkdev); 3621da177e4SLinus Torvalds 3631da177e4SLinus Torvalds static struct kobj_map *bdev_map; 3641da177e4SLinus Torvalds 365bcce3de1STejun Heo /** 366870d6656STejun Heo * blk_mangle_minor - scatter minor numbers apart 367870d6656STejun Heo * @minor: minor number to mangle 368870d6656STejun Heo * 369870d6656STejun Heo * Scatter consecutively allocated @minor number apart if MANGLE_DEVT 370870d6656STejun Heo * is enabled. Mangling twice gives the original value. 371870d6656STejun Heo * 372870d6656STejun Heo * RETURNS: 373870d6656STejun Heo * Mangled value. 374870d6656STejun Heo * 375870d6656STejun Heo * CONTEXT: 376870d6656STejun Heo * Don't care. 377870d6656STejun Heo */ 378870d6656STejun Heo static int blk_mangle_minor(int minor) 379870d6656STejun Heo { 380870d6656STejun Heo #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT 381870d6656STejun Heo int i; 382870d6656STejun Heo 383870d6656STejun Heo for (i = 0; i < MINORBITS / 2; i++) { 384870d6656STejun Heo int low = minor & (1 << i); 385870d6656STejun Heo int high = minor & (1 << (MINORBITS - 1 - i)); 386870d6656STejun Heo int distance = MINORBITS - 1 - 2 * i; 387870d6656STejun Heo 388870d6656STejun Heo minor ^= low | high; /* clear both bits */ 389870d6656STejun Heo low <<= distance; /* swap the positions */ 390870d6656STejun Heo high >>= distance; 391870d6656STejun Heo minor |= low | high; /* and set */ 392870d6656STejun Heo } 393870d6656STejun Heo #endif 394870d6656STejun Heo return minor; 395870d6656STejun Heo } 396870d6656STejun Heo 397870d6656STejun Heo /** 398bcce3de1STejun Heo * blk_alloc_devt - allocate a dev_t for a partition 399bcce3de1STejun Heo * @part: partition to allocate dev_t for 400bcce3de1STejun Heo * @devt: out parameter for resulting dev_t 401bcce3de1STejun Heo * 402bcce3de1STejun Heo * Allocate a dev_t for block device. 403bcce3de1STejun Heo * 404bcce3de1STejun Heo * RETURNS: 405bcce3de1STejun Heo * 0 on success, allocated dev_t is returned in *@devt. -errno on 406bcce3de1STejun Heo * failure. 407bcce3de1STejun Heo * 408bcce3de1STejun Heo * CONTEXT: 409bcce3de1STejun Heo * Might sleep. 410bcce3de1STejun Heo */ 411bcce3de1STejun Heo int blk_alloc_devt(struct hd_struct *part, dev_t *devt) 412bcce3de1STejun Heo { 413bcce3de1STejun Heo struct gendisk *disk = part_to_disk(part); 414bab998d6STejun Heo int idx; 415bcce3de1STejun Heo 416bcce3de1STejun Heo /* in consecutive minor range? */ 417bcce3de1STejun Heo if (part->partno < disk->minors) { 418bcce3de1STejun Heo *devt = MKDEV(disk->major, disk->first_minor + part->partno); 419bcce3de1STejun Heo return 0; 420bcce3de1STejun Heo } 421bcce3de1STejun Heo 422bcce3de1STejun Heo /* allocate ext devt */ 4237b74e912STomas Henzl mutex_lock(&ext_devt_mutex); 424bab998d6STejun Heo idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL); 4257b74e912STomas Henzl mutex_unlock(&ext_devt_mutex); 426bab998d6STejun Heo if (idx < 0) 427bab998d6STejun Heo return idx == -ENOSPC ? -EBUSY : idx; 428bcce3de1STejun Heo 429870d6656STejun Heo *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); 430bcce3de1STejun Heo return 0; 431bcce3de1STejun Heo } 432bcce3de1STejun Heo 433bcce3de1STejun Heo /** 434bcce3de1STejun Heo * blk_free_devt - free a dev_t 435bcce3de1STejun Heo * @devt: dev_t to free 436bcce3de1STejun Heo * 437bcce3de1STejun Heo * Free @devt which was allocated using blk_alloc_devt(). 438bcce3de1STejun Heo * 439bcce3de1STejun Heo * CONTEXT: 440bcce3de1STejun Heo * Might sleep. 441bcce3de1STejun Heo */ 442bcce3de1STejun Heo void blk_free_devt(dev_t devt) 443bcce3de1STejun Heo { 444bcce3de1STejun Heo might_sleep(); 445bcce3de1STejun Heo 446bcce3de1STejun Heo if (devt == MKDEV(0, 0)) 447bcce3de1STejun Heo return; 448bcce3de1STejun Heo 449bcce3de1STejun Heo if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 450bcce3de1STejun Heo mutex_lock(&ext_devt_mutex); 451870d6656STejun Heo idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 452bcce3de1STejun Heo mutex_unlock(&ext_devt_mutex); 453bcce3de1STejun Heo } 454bcce3de1STejun Heo } 455bcce3de1STejun Heo 4561f014290STejun Heo static char *bdevt_str(dev_t devt, char *buf) 4571f014290STejun Heo { 4581f014290STejun Heo if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { 4591f014290STejun Heo char tbuf[BDEVT_SIZE]; 4601f014290STejun Heo snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); 4611f014290STejun Heo snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); 4621f014290STejun Heo } else 4631f014290STejun Heo snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 4641f014290STejun Heo 4651f014290STejun Heo return buf; 4661f014290STejun Heo } 4671f014290STejun Heo 4681da177e4SLinus Torvalds /* 4691da177e4SLinus Torvalds * Register device numbers dev..(dev+range-1) 4701da177e4SLinus Torvalds * range must be nonzero 4711da177e4SLinus Torvalds * The hash chain is sorted on range, so that subranges can override. 4721da177e4SLinus Torvalds */ 473edfaa7c3SKay Sievers void blk_register_region(dev_t devt, unsigned long range, struct module *module, 4741da177e4SLinus Torvalds struct kobject *(*probe)(dev_t, int *, void *), 4751da177e4SLinus Torvalds int (*lock)(dev_t, void *), void *data) 4761da177e4SLinus Torvalds { 477edfaa7c3SKay Sievers kobj_map(bdev_map, devt, range, module, probe, lock, data); 4781da177e4SLinus Torvalds } 4791da177e4SLinus Torvalds 4801da177e4SLinus Torvalds EXPORT_SYMBOL(blk_register_region); 4811da177e4SLinus Torvalds 482edfaa7c3SKay Sievers void blk_unregister_region(dev_t devt, unsigned long range) 4831da177e4SLinus Torvalds { 484edfaa7c3SKay Sievers kobj_unmap(bdev_map, devt, range); 4851da177e4SLinus Torvalds } 4861da177e4SLinus Torvalds 4871da177e4SLinus Torvalds EXPORT_SYMBOL(blk_unregister_region); 4881da177e4SLinus Torvalds 489cf771cb5STejun Heo static struct kobject *exact_match(dev_t devt, int *partno, void *data) 4901da177e4SLinus Torvalds { 4911da177e4SLinus Torvalds struct gendisk *p = data; 492edfaa7c3SKay Sievers 493ed9e1982STejun Heo return &disk_to_dev(p)->kobj; 4941da177e4SLinus Torvalds } 4951da177e4SLinus Torvalds 496edfaa7c3SKay Sievers static int exact_lock(dev_t devt, void *data) 4971da177e4SLinus Torvalds { 4981da177e4SLinus Torvalds struct gendisk *p = data; 4991da177e4SLinus Torvalds 5001da177e4SLinus Torvalds if (!get_disk(p)) 5011da177e4SLinus Torvalds return -1; 5021da177e4SLinus Torvalds return 0; 5031da177e4SLinus Torvalds } 5041da177e4SLinus Torvalds 5054752bc30SAl Viro static void register_disk(struct gendisk *disk) 506d2bf1b67STejun Heo { 507d2bf1b67STejun Heo struct device *ddev = disk_to_dev(disk); 508d2bf1b67STejun Heo struct block_device *bdev; 509d2bf1b67STejun Heo struct disk_part_iter piter; 510d2bf1b67STejun Heo struct hd_struct *part; 511d2bf1b67STejun Heo int err; 512d2bf1b67STejun Heo 513d2bf1b67STejun Heo ddev->parent = disk->driverfs_dev; 514d2bf1b67STejun Heo 515d2bf1b67STejun Heo dev_set_name(ddev, disk->disk_name); 516d2bf1b67STejun Heo 517d2bf1b67STejun Heo /* delay uevents, until we scanned partition table */ 518d2bf1b67STejun Heo dev_set_uevent_suppress(ddev, 1); 519d2bf1b67STejun Heo 520d2bf1b67STejun Heo if (device_add(ddev)) 521d2bf1b67STejun Heo return; 522d2bf1b67STejun Heo if (!sysfs_deprecated) { 523d2bf1b67STejun Heo err = sysfs_create_link(block_depr, &ddev->kobj, 524d2bf1b67STejun Heo kobject_name(&ddev->kobj)); 525d2bf1b67STejun Heo if (err) { 526d2bf1b67STejun Heo device_del(ddev); 527d2bf1b67STejun Heo return; 528d2bf1b67STejun Heo } 529d2bf1b67STejun Heo } 53025e823c8SMing Lei 53125e823c8SMing Lei /* 53225e823c8SMing Lei * avoid probable deadlock caused by allocating memory with 53325e823c8SMing Lei * GFP_KERNEL in runtime_resume callback of its all ancestor 53425e823c8SMing Lei * devices 53525e823c8SMing Lei */ 53625e823c8SMing Lei pm_runtime_set_memalloc_noio(ddev, true); 53725e823c8SMing Lei 538d2bf1b67STejun Heo disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); 539d2bf1b67STejun Heo disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); 540d2bf1b67STejun Heo 541d2bf1b67STejun Heo /* No minors to use for partitions */ 542d27769ecSTejun Heo if (!disk_part_scan_enabled(disk)) 543d2bf1b67STejun Heo goto exit; 544d2bf1b67STejun Heo 545d2bf1b67STejun Heo /* No such device (e.g., media were just removed) */ 546d2bf1b67STejun Heo if (!get_capacity(disk)) 547d2bf1b67STejun Heo goto exit; 548d2bf1b67STejun Heo 549d2bf1b67STejun Heo bdev = bdget_disk(disk, 0); 550d2bf1b67STejun Heo if (!bdev) 551d2bf1b67STejun Heo goto exit; 552d2bf1b67STejun Heo 553d2bf1b67STejun Heo bdev->bd_invalidated = 1; 554d2bf1b67STejun Heo err = blkdev_get(bdev, FMODE_READ, NULL); 555d2bf1b67STejun Heo if (err < 0) 556d2bf1b67STejun Heo goto exit; 557d2bf1b67STejun Heo blkdev_put(bdev, FMODE_READ); 558d2bf1b67STejun Heo 559d2bf1b67STejun Heo exit: 560d2bf1b67STejun Heo /* announce disk after possible partitions are created */ 561d2bf1b67STejun Heo dev_set_uevent_suppress(ddev, 0); 562d2bf1b67STejun Heo kobject_uevent(&ddev->kobj, KOBJ_ADD); 563d2bf1b67STejun Heo 564d2bf1b67STejun Heo /* announce possible partitions */ 565d2bf1b67STejun Heo disk_part_iter_init(&piter, disk, 0); 566d2bf1b67STejun Heo while ((part = disk_part_iter_next(&piter))) 567d2bf1b67STejun Heo kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); 568d2bf1b67STejun Heo disk_part_iter_exit(&piter); 569d2bf1b67STejun Heo } 570d2bf1b67STejun Heo 5711da177e4SLinus Torvalds /** 5721da177e4SLinus Torvalds * add_disk - add partitioning information to kernel list 5731da177e4SLinus Torvalds * @disk: per-device partitioning information 5741da177e4SLinus Torvalds * 5751da177e4SLinus Torvalds * This function registers the partitioning information in @disk 5761da177e4SLinus Torvalds * with the kernel. 5773e1a7ff8STejun Heo * 5783e1a7ff8STejun Heo * FIXME: error handling 5791da177e4SLinus Torvalds */ 5801da177e4SLinus Torvalds void add_disk(struct gendisk *disk) 5811da177e4SLinus Torvalds { 582cf0ca9feSPeter Zijlstra struct backing_dev_info *bdi; 5833e1a7ff8STejun Heo dev_t devt; 5846ffeea77SGreg Kroah-Hartman int retval; 585cf0ca9feSPeter Zijlstra 5863e1a7ff8STejun Heo /* minors == 0 indicates to use ext devt from part0 and should 5873e1a7ff8STejun Heo * be accompanied with EXT_DEVT flag. Make sure all 5883e1a7ff8STejun Heo * parameters make sense. 5893e1a7ff8STejun Heo */ 5903e1a7ff8STejun Heo WARN_ON(disk->minors && !(disk->major || disk->first_minor)); 5913e1a7ff8STejun Heo WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT)); 5923e1a7ff8STejun Heo 5931da177e4SLinus Torvalds disk->flags |= GENHD_FL_UP; 5943e1a7ff8STejun Heo 5953e1a7ff8STejun Heo retval = blk_alloc_devt(&disk->part0, &devt); 5963e1a7ff8STejun Heo if (retval) { 5973e1a7ff8STejun Heo WARN_ON(1); 5983e1a7ff8STejun Heo return; 5993e1a7ff8STejun Heo } 6003e1a7ff8STejun Heo disk_to_dev(disk)->devt = devt; 6013e1a7ff8STejun Heo 6023e1a7ff8STejun Heo /* ->major and ->first_minor aren't supposed to be 6033e1a7ff8STejun Heo * dereferenced from here on, but set them just in case. 6043e1a7ff8STejun Heo */ 6053e1a7ff8STejun Heo disk->major = MAJOR(devt); 6063e1a7ff8STejun Heo disk->first_minor = MINOR(devt); 6073e1a7ff8STejun Heo 6089f53d2feSStanislaw Gruszka disk_alloc_events(disk); 6099f53d2feSStanislaw Gruszka 61001ea5063SSigned-off-by: Jan Kara /* Register BDI before referencing it from bdev */ 61101ea5063SSigned-off-by: Jan Kara bdi = &disk->queue->backing_dev_info; 61201ea5063SSigned-off-by: Jan Kara bdi_register_dev(bdi, disk_devt(disk)); 61301ea5063SSigned-off-by: Jan Kara 614f331c029STejun Heo blk_register_region(disk_devt(disk), disk->minors, NULL, 615f331c029STejun Heo exact_match, exact_lock, disk); 6161da177e4SLinus Torvalds register_disk(disk); 6171da177e4SLinus Torvalds blk_register_queue(disk); 618cf0ca9feSPeter Zijlstra 619523e1d39STejun Heo /* 620523e1d39STejun Heo * Take an extra ref on queue which will be put on disk_release() 621523e1d39STejun Heo * so that it sticks around as long as @disk is there. 622523e1d39STejun Heo */ 62309ac46c4STejun Heo WARN_ON_ONCE(!blk_get_queue(disk->queue)); 624523e1d39STejun Heo 625ed9e1982STejun Heo retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 626ed9e1982STejun Heo "bdi"); 6276ffeea77SGreg Kroah-Hartman WARN_ON(retval); 62877ea887eSTejun Heo 62977ea887eSTejun Heo disk_add_events(disk); 6301da177e4SLinus Torvalds } 6311da177e4SLinus Torvalds EXPORT_SYMBOL(add_disk); 6321da177e4SLinus Torvalds 633d2bf1b67STejun Heo void del_gendisk(struct gendisk *disk) 6341da177e4SLinus Torvalds { 635d2bf1b67STejun Heo struct disk_part_iter piter; 636d2bf1b67STejun Heo struct hd_struct *part; 637d2bf1b67STejun Heo 63877ea887eSTejun Heo disk_del_events(disk); 63977ea887eSTejun Heo 640d2bf1b67STejun Heo /* invalidate stuff */ 641d2bf1b67STejun Heo disk_part_iter_init(&piter, disk, 642d2bf1b67STejun Heo DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); 643d2bf1b67STejun Heo while ((part = disk_part_iter_next(&piter))) { 644d2bf1b67STejun Heo invalidate_partition(disk, part->partno); 645d2bf1b67STejun Heo delete_partition(disk, part->partno); 646d2bf1b67STejun Heo } 647d2bf1b67STejun Heo disk_part_iter_exit(&piter); 648d2bf1b67STejun Heo 649d2bf1b67STejun Heo invalidate_partition(disk, 0); 650d2bf1b67STejun Heo set_capacity(disk, 0); 651d2bf1b67STejun Heo disk->flags &= ~GENHD_FL_UP; 652d2bf1b67STejun Heo 653ed9e1982STejun Heo sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 654cf0ca9feSPeter Zijlstra bdi_unregister(&disk->queue->backing_dev_info); 6551da177e4SLinus Torvalds blk_unregister_queue(disk); 656f331c029STejun Heo blk_unregister_region(disk_devt(disk), disk->minors); 657d2bf1b67STejun Heo 658d2bf1b67STejun Heo part_stat_set_all(&disk->part0, 0); 659d2bf1b67STejun Heo disk->part0.stamp = 0; 660d2bf1b67STejun Heo 661d2bf1b67STejun Heo kobject_put(disk->part0.holder_dir); 662d2bf1b67STejun Heo kobject_put(disk->slave_dir); 663d2bf1b67STejun Heo disk->driverfs_dev = NULL; 664d2bf1b67STejun Heo if (!sysfs_deprecated) 665d2bf1b67STejun Heo sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); 66625e823c8SMing Lei pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); 667d2bf1b67STejun Heo device_del(disk_to_dev(disk)); 6687b74e912STomas Henzl blk_free_devt(disk_to_dev(disk)->devt); 6691da177e4SLinus Torvalds } 670d2bf1b67STejun Heo EXPORT_SYMBOL(del_gendisk); 6711da177e4SLinus Torvalds 6721da177e4SLinus Torvalds /** 6731da177e4SLinus Torvalds * get_gendisk - get partitioning information for a given device 674710027a4SRandy Dunlap * @devt: device to get partitioning information for 675496aa8a9SRandy Dunlap * @partno: returned partition index 6761da177e4SLinus Torvalds * 6771da177e4SLinus Torvalds * This function gets the structure containing partitioning 678710027a4SRandy Dunlap * information for the given device @devt. 6791da177e4SLinus Torvalds */ 680cf771cb5STejun Heo struct gendisk *get_gendisk(dev_t devt, int *partno) 6811da177e4SLinus Torvalds { 682bcce3de1STejun Heo struct gendisk *disk = NULL; 683edfaa7c3SKay Sievers 684bcce3de1STejun Heo if (MAJOR(devt) != BLOCK_EXT_MAJOR) { 685bcce3de1STejun Heo struct kobject *kobj; 686bcce3de1STejun Heo 687bcce3de1STejun Heo kobj = kobj_lookup(bdev_map, devt, partno); 688bcce3de1STejun Heo if (kobj) 689bcce3de1STejun Heo disk = dev_to_disk(kobj_to_dev(kobj)); 690bcce3de1STejun Heo } else { 691bcce3de1STejun Heo struct hd_struct *part; 692bcce3de1STejun Heo 693bcce3de1STejun Heo mutex_lock(&ext_devt_mutex); 694870d6656STejun Heo part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 695bcce3de1STejun Heo if (part && get_disk(part_to_disk(part))) { 696bcce3de1STejun Heo *partno = part->partno; 697bcce3de1STejun Heo disk = part_to_disk(part); 698bcce3de1STejun Heo } 699bcce3de1STejun Heo mutex_unlock(&ext_devt_mutex); 700bcce3de1STejun Heo } 701bcce3de1STejun Heo 702bcce3de1STejun Heo return disk; 7031da177e4SLinus Torvalds } 704b6ac23afSDivyesh Shah EXPORT_SYMBOL(get_gendisk); 7051da177e4SLinus Torvalds 706f331c029STejun Heo /** 707f331c029STejun Heo * bdget_disk - do bdget() by gendisk and partition number 708f331c029STejun Heo * @disk: gendisk of interest 709f331c029STejun Heo * @partno: partition number 710f331c029STejun Heo * 711f331c029STejun Heo * Find partition @partno from @disk, do bdget() on it. 712f331c029STejun Heo * 713f331c029STejun Heo * CONTEXT: 714f331c029STejun Heo * Don't care. 715f331c029STejun Heo * 716f331c029STejun Heo * RETURNS: 717f331c029STejun Heo * Resulting block_device on success, NULL on failure. 718f331c029STejun Heo */ 719aeb3d3a8SHarvey Harrison struct block_device *bdget_disk(struct gendisk *disk, int partno) 720f331c029STejun Heo { 721e71bf0d0STejun Heo struct hd_struct *part; 722548b10ebSTejun Heo struct block_device *bdev = NULL; 723f331c029STejun Heo 724e71bf0d0STejun Heo part = disk_get_part(disk, partno); 7252bbedcb4STejun Heo if (part) 726548b10ebSTejun Heo bdev = bdget(part_devt(part)); 727e71bf0d0STejun Heo disk_put_part(part); 728f331c029STejun Heo 729548b10ebSTejun Heo return bdev; 730f331c029STejun Heo } 731f331c029STejun Heo EXPORT_SYMBOL(bdget_disk); 732f331c029STejun Heo 733dd2a345fSDave Gilbert /* 7345c6f35c5SGreg Kroah-Hartman * print a full list of all partitions - intended for places where the root 7355c6f35c5SGreg Kroah-Hartman * filesystem can't be mounted and thus to give the victim some idea of what 7365c6f35c5SGreg Kroah-Hartman * went wrong 7375c6f35c5SGreg Kroah-Hartman */ 7385c6f35c5SGreg Kroah-Hartman void __init printk_all_partitions(void) 7395c6f35c5SGreg Kroah-Hartman { 740def4e38dSTejun Heo struct class_dev_iter iter; 741def4e38dSTejun Heo struct device *dev; 742def4e38dSTejun Heo 743def4e38dSTejun Heo class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 744def4e38dSTejun Heo while ((dev = class_dev_iter_next(&iter))) { 745def4e38dSTejun Heo struct gendisk *disk = dev_to_disk(dev); 746e71bf0d0STejun Heo struct disk_part_iter piter; 747e71bf0d0STejun Heo struct hd_struct *part; 7481f014290STejun Heo char name_buf[BDEVNAME_SIZE]; 7491f014290STejun Heo char devt_buf[BDEVT_SIZE]; 750def4e38dSTejun Heo 751def4e38dSTejun Heo /* 752def4e38dSTejun Heo * Don't show empty devices or things that have been 75325985edcSLucas De Marchi * suppressed 754def4e38dSTejun Heo */ 755def4e38dSTejun Heo if (get_capacity(disk) == 0 || 756def4e38dSTejun Heo (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 757def4e38dSTejun Heo continue; 758def4e38dSTejun Heo 759def4e38dSTejun Heo /* 760def4e38dSTejun Heo * Note, unlike /proc/partitions, I am showing the 761def4e38dSTejun Heo * numbers in hex - the same format as the root= 762def4e38dSTejun Heo * option takes. 763def4e38dSTejun Heo */ 764074a7acaSTejun Heo disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 765074a7acaSTejun Heo while ((part = disk_part_iter_next(&piter))) { 766074a7acaSTejun Heo bool is_part0 = part == &disk->part0; 767074a7acaSTejun Heo 768b5af921eSWill Drewry printk("%s%s %10llu %s %s", is_part0 ? "" : " ", 769074a7acaSTejun Heo bdevt_str(part_devt(part), devt_buf), 770c83f6bf9SVivek Goyal (unsigned long long)part_nr_sects_read(part) >> 1 771c83f6bf9SVivek Goyal , disk_name(disk, part->partno, name_buf), 7721ad7e899SStephen Warren part->info ? part->info->uuid : ""); 773074a7acaSTejun Heo if (is_part0) { 774def4e38dSTejun Heo if (disk->driverfs_dev != NULL && 775def4e38dSTejun Heo disk->driverfs_dev->driver != NULL) 776def4e38dSTejun Heo printk(" driver: %s\n", 777def4e38dSTejun Heo disk->driverfs_dev->driver->name); 778def4e38dSTejun Heo else 779def4e38dSTejun Heo printk(" (driver?)\n"); 780074a7acaSTejun Heo } else 781074a7acaSTejun Heo printk("\n"); 782074a7acaSTejun Heo } 783e71bf0d0STejun Heo disk_part_iter_exit(&piter); 784def4e38dSTejun Heo } 785def4e38dSTejun Heo class_dev_iter_exit(&iter); 786dd2a345fSDave Gilbert } 787dd2a345fSDave Gilbert 7881da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 7891da177e4SLinus Torvalds /* iterator */ 790def4e38dSTejun Heo static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) 79168c4d4a7SGreg Kroah-Hartman { 792def4e38dSTejun Heo loff_t skip = *pos; 793def4e38dSTejun Heo struct class_dev_iter *iter; 794def4e38dSTejun Heo struct device *dev; 79568c4d4a7SGreg Kroah-Hartman 796aeb3d3a8SHarvey Harrison iter = kmalloc(sizeof(*iter), GFP_KERNEL); 797def4e38dSTejun Heo if (!iter) 798def4e38dSTejun Heo return ERR_PTR(-ENOMEM); 799def4e38dSTejun Heo 800def4e38dSTejun Heo seqf->private = iter; 801def4e38dSTejun Heo class_dev_iter_init(iter, &block_class, NULL, &disk_type); 802def4e38dSTejun Heo do { 803def4e38dSTejun Heo dev = class_dev_iter_next(iter); 804def4e38dSTejun Heo if (!dev) 805def4e38dSTejun Heo return NULL; 806def4e38dSTejun Heo } while (skip--); 807def4e38dSTejun Heo 808def4e38dSTejun Heo return dev_to_disk(dev); 80968c4d4a7SGreg Kroah-Hartman } 81068c4d4a7SGreg Kroah-Hartman 811def4e38dSTejun Heo static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) 8121da177e4SLinus Torvalds { 813edfaa7c3SKay Sievers struct device *dev; 81466c64afeSGreg Kroah-Hartman 815def4e38dSTejun Heo (*pos)++; 816def4e38dSTejun Heo dev = class_dev_iter_next(seqf->private); 8172ac3cee5STejun Heo if (dev) 818edfaa7c3SKay Sievers return dev_to_disk(dev); 8192ac3cee5STejun Heo 8201da177e4SLinus Torvalds return NULL; 8211da177e4SLinus Torvalds } 8221da177e4SLinus Torvalds 823def4e38dSTejun Heo static void disk_seqf_stop(struct seq_file *seqf, void *v) 82427f30251SGreg Kroah-Hartman { 825def4e38dSTejun Heo struct class_dev_iter *iter = seqf->private; 826def4e38dSTejun Heo 827def4e38dSTejun Heo /* stop is called even after start failed :-( */ 828def4e38dSTejun Heo if (iter) { 829def4e38dSTejun Heo class_dev_iter_exit(iter); 830def4e38dSTejun Heo kfree(iter); 831def4e38dSTejun Heo } 83227f30251SGreg Kroah-Hartman } 83327f30251SGreg Kroah-Hartman 834def4e38dSTejun Heo static void *show_partition_start(struct seq_file *seqf, loff_t *pos) 8351da177e4SLinus Torvalds { 83606768067SJianpeng Ma void *p; 8371da177e4SLinus Torvalds 838def4e38dSTejun Heo p = disk_seqf_start(seqf, pos); 839b9f985b6SYang Zhang if (!IS_ERR_OR_NULL(p) && !*pos) 840def4e38dSTejun Heo seq_puts(seqf, "major minor #blocks name\n\n"); 841def4e38dSTejun Heo return p; 8421da177e4SLinus Torvalds } 8431da177e4SLinus Torvalds 844cf771cb5STejun Heo static int show_partition(struct seq_file *seqf, void *v) 8451da177e4SLinus Torvalds { 8461da177e4SLinus Torvalds struct gendisk *sgp = v; 847e71bf0d0STejun Heo struct disk_part_iter piter; 848e71bf0d0STejun Heo struct hd_struct *part; 8491da177e4SLinus Torvalds char buf[BDEVNAME_SIZE]; 8501da177e4SLinus Torvalds 8511da177e4SLinus Torvalds /* Don't show non-partitionable removeable devices or empty devices */ 852d27769ecSTejun Heo if (!get_capacity(sgp) || (!disk_max_parts(sgp) && 853f331c029STejun Heo (sgp->flags & GENHD_FL_REMOVABLE))) 8541da177e4SLinus Torvalds return 0; 8551da177e4SLinus Torvalds if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 8561da177e4SLinus Torvalds return 0; 8571da177e4SLinus Torvalds 8581da177e4SLinus Torvalds /* show the full disk and all non-0 size partitions of it */ 859074a7acaSTejun Heo disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); 860e71bf0d0STejun Heo while ((part = disk_part_iter_next(&piter))) 8611f014290STejun Heo seq_printf(seqf, "%4d %7d %10llu %s\n", 862f331c029STejun Heo MAJOR(part_devt(part)), MINOR(part_devt(part)), 863c83f6bf9SVivek Goyal (unsigned long long)part_nr_sects_read(part) >> 1, 864f331c029STejun Heo disk_name(sgp, part->partno, buf)); 865e71bf0d0STejun Heo disk_part_iter_exit(&piter); 8661da177e4SLinus Torvalds 8671da177e4SLinus Torvalds return 0; 8681da177e4SLinus Torvalds } 8691da177e4SLinus Torvalds 870f500975aSAlexey Dobriyan static const struct seq_operations partitions_op = { 871def4e38dSTejun Heo .start = show_partition_start, 872def4e38dSTejun Heo .next = disk_seqf_next, 873def4e38dSTejun Heo .stop = disk_seqf_stop, 8741da177e4SLinus Torvalds .show = show_partition 8751da177e4SLinus Torvalds }; 876f500975aSAlexey Dobriyan 877f500975aSAlexey Dobriyan static int partitions_open(struct inode *inode, struct file *file) 878f500975aSAlexey Dobriyan { 879f500975aSAlexey Dobriyan return seq_open(file, &partitions_op); 880f500975aSAlexey Dobriyan } 881f500975aSAlexey Dobriyan 882f500975aSAlexey Dobriyan static const struct file_operations proc_partitions_operations = { 883f500975aSAlexey Dobriyan .open = partitions_open, 884f500975aSAlexey Dobriyan .read = seq_read, 885f500975aSAlexey Dobriyan .llseek = seq_lseek, 886f500975aSAlexey Dobriyan .release = seq_release, 887f500975aSAlexey Dobriyan }; 8881da177e4SLinus Torvalds #endif 8891da177e4SLinus Torvalds 8901da177e4SLinus Torvalds 891cf771cb5STejun Heo static struct kobject *base_probe(dev_t devt, int *partno, void *data) 8921da177e4SLinus Torvalds { 893edfaa7c3SKay Sievers if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 8941da177e4SLinus Torvalds /* Make old-style 2.4 aliases work */ 895edfaa7c3SKay Sievers request_module("block-major-%d", MAJOR(devt)); 8961da177e4SLinus Torvalds return NULL; 8971da177e4SLinus Torvalds } 8981da177e4SLinus Torvalds 8991da177e4SLinus Torvalds static int __init genhd_device_init(void) 9001da177e4SLinus Torvalds { 901e105b8bfSDan Williams int error; 902e105b8bfSDan Williams 903e105b8bfSDan Williams block_class.dev_kobj = sysfs_dev_block_kobj; 904e105b8bfSDan Williams error = class_register(&block_class); 905ee27a558SRoland McGrath if (unlikely(error)) 906ee27a558SRoland McGrath return error; 907edfaa7c3SKay Sievers bdev_map = kobj_map_init(base_probe, &block_class_lock); 9081da177e4SLinus Torvalds blk_dev_init(); 909edfaa7c3SKay Sievers 910561ec68eSZhang, Yanmin register_blkdev(BLOCK_EXT_MAJOR, "blkext"); 911561ec68eSZhang, Yanmin 912edfaa7c3SKay Sievers /* create top-level block dir */ 913e52eec13SAndi Kleen if (!sysfs_deprecated) 914edfaa7c3SKay Sievers block_depr = kobject_create_and_add("block", NULL); 915830d3cfbSGreg Kroah-Hartman return 0; 9161da177e4SLinus Torvalds } 9171da177e4SLinus Torvalds 9181da177e4SLinus Torvalds subsys_initcall(genhd_device_init); 9191da177e4SLinus Torvalds 920edfaa7c3SKay Sievers static ssize_t disk_range_show(struct device *dev, 921edfaa7c3SKay Sievers struct device_attribute *attr, char *buf) 9221da177e4SLinus Torvalds { 923edfaa7c3SKay Sievers struct gendisk *disk = dev_to_disk(dev); 9241da177e4SLinus Torvalds 925edfaa7c3SKay Sievers return sprintf(buf, "%d\n", disk->minors); 9261da177e4SLinus Torvalds } 9271da177e4SLinus Torvalds 9281f014290STejun Heo static ssize_t disk_ext_range_show(struct device *dev, 9291f014290STejun Heo struct device_attribute *attr, char *buf) 9301f014290STejun Heo { 9311f014290STejun Heo struct gendisk *disk = dev_to_disk(dev); 9321f014290STejun Heo 933b5d0b9dfSTejun Heo return sprintf(buf, "%d\n", disk_max_parts(disk)); 9341f014290STejun Heo } 9351f014290STejun Heo 936edfaa7c3SKay Sievers static ssize_t disk_removable_show(struct device *dev, 937edfaa7c3SKay Sievers struct device_attribute *attr, char *buf) 938a7fd6706SKay Sievers { 939edfaa7c3SKay Sievers struct gendisk *disk = dev_to_disk(dev); 940a7fd6706SKay Sievers 941edfaa7c3SKay Sievers return sprintf(buf, "%d\n", 9421da177e4SLinus Torvalds (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 943edfaa7c3SKay Sievers } 9441da177e4SLinus Torvalds 9451c9ce527SKay Sievers static ssize_t disk_ro_show(struct device *dev, 9461c9ce527SKay Sievers struct device_attribute *attr, char *buf) 9471c9ce527SKay Sievers { 9481c9ce527SKay Sievers struct gendisk *disk = dev_to_disk(dev); 9491c9ce527SKay Sievers 950b7db9956STejun Heo return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); 9511c9ce527SKay Sievers } 9521c9ce527SKay Sievers 953edfaa7c3SKay Sievers static ssize_t disk_capability_show(struct device *dev, 954edfaa7c3SKay Sievers struct device_attribute *attr, char *buf) 95586ce18d7SKristen Carlson Accardi { 956edfaa7c3SKay Sievers struct gendisk *disk = dev_to_disk(dev); 957edfaa7c3SKay Sievers 958edfaa7c3SKay Sievers return sprintf(buf, "%x\n", disk->flags); 95986ce18d7SKristen Carlson Accardi } 960edfaa7c3SKay Sievers 961c72758f3SMartin K. Petersen static ssize_t disk_alignment_offset_show(struct device *dev, 962c72758f3SMartin K. Petersen struct device_attribute *attr, 963c72758f3SMartin K. Petersen char *buf) 964c72758f3SMartin K. Petersen { 965c72758f3SMartin K. Petersen struct gendisk *disk = dev_to_disk(dev); 966c72758f3SMartin K. Petersen 967c72758f3SMartin K. Petersen return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); 968c72758f3SMartin K. Petersen } 969c72758f3SMartin K. Petersen 97086b37281SMartin K. Petersen static ssize_t disk_discard_alignment_show(struct device *dev, 97186b37281SMartin K. Petersen struct device_attribute *attr, 97286b37281SMartin K. Petersen char *buf) 97386b37281SMartin K. Petersen { 97486b37281SMartin K. Petersen struct gendisk *disk = dev_to_disk(dev); 97586b37281SMartin K. Petersen 976dd3d145dSMartin K. Petersen return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue)); 97786b37281SMartin K. Petersen } 97886b37281SMartin K. Petersen 979edfaa7c3SKay Sievers static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 9801f014290STejun Heo static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); 981edfaa7c3SKay Sievers static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 9821c9ce527SKay Sievers static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); 983e5610521STejun Heo static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 984c72758f3SMartin K. Petersen static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); 98586b37281SMartin K. Petersen static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show, 98686b37281SMartin K. Petersen NULL); 987edfaa7c3SKay Sievers static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 988074a7acaSTejun Heo static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 989316d315bSNikanth Karthikesan static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 990c17bb495SAkinobu Mita #ifdef CONFIG_FAIL_MAKE_REQUEST 991edfaa7c3SKay Sievers static struct device_attribute dev_attr_fail = 992eddb2e26STejun Heo __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 993c17bb495SAkinobu Mita #endif 994581d4e28SJens Axboe #ifdef CONFIG_FAIL_IO_TIMEOUT 995581d4e28SJens Axboe static struct device_attribute dev_attr_fail_timeout = 996581d4e28SJens Axboe __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show, 997581d4e28SJens Axboe part_timeout_store); 998581d4e28SJens Axboe #endif 999edfaa7c3SKay Sievers 1000edfaa7c3SKay Sievers static struct attribute *disk_attrs[] = { 1001edfaa7c3SKay Sievers &dev_attr_range.attr, 10021f014290STejun Heo &dev_attr_ext_range.attr, 1003edfaa7c3SKay Sievers &dev_attr_removable.attr, 10041c9ce527SKay Sievers &dev_attr_ro.attr, 1005edfaa7c3SKay Sievers &dev_attr_size.attr, 1006c72758f3SMartin K. Petersen &dev_attr_alignment_offset.attr, 100786b37281SMartin K. Petersen &dev_attr_discard_alignment.attr, 1008edfaa7c3SKay Sievers &dev_attr_capability.attr, 1009edfaa7c3SKay Sievers &dev_attr_stat.attr, 1010316d315bSNikanth Karthikesan &dev_attr_inflight.attr, 1011edfaa7c3SKay Sievers #ifdef CONFIG_FAIL_MAKE_REQUEST 1012edfaa7c3SKay Sievers &dev_attr_fail.attr, 1013edfaa7c3SKay Sievers #endif 1014581d4e28SJens Axboe #ifdef CONFIG_FAIL_IO_TIMEOUT 1015581d4e28SJens Axboe &dev_attr_fail_timeout.attr, 1016581d4e28SJens Axboe #endif 1017edfaa7c3SKay Sievers NULL 10181da177e4SLinus Torvalds }; 10191da177e4SLinus Torvalds 1020edfaa7c3SKay Sievers static struct attribute_group disk_attr_group = { 1021edfaa7c3SKay Sievers .attrs = disk_attrs, 1022edfaa7c3SKay Sievers }; 1023edfaa7c3SKay Sievers 1024a4dbd674SDavid Brownell static const struct attribute_group *disk_attr_groups[] = { 1025edfaa7c3SKay Sievers &disk_attr_group, 1026edfaa7c3SKay Sievers NULL 1027edfaa7c3SKay Sievers }; 1028edfaa7c3SKay Sievers 1029540eed56STejun Heo /** 1030540eed56STejun Heo * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way 1031540eed56STejun Heo * @disk: disk to replace part_tbl for 1032540eed56STejun Heo * @new_ptbl: new part_tbl to install 1033540eed56STejun Heo * 1034540eed56STejun Heo * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The 1035540eed56STejun Heo * original ptbl is freed using RCU callback. 1036540eed56STejun Heo * 1037540eed56STejun Heo * LOCKING: 1038540eed56STejun Heo * Matching bd_mutx locked. 1039540eed56STejun Heo */ 1040540eed56STejun Heo static void disk_replace_part_tbl(struct gendisk *disk, 1041540eed56STejun Heo struct disk_part_tbl *new_ptbl) 1042540eed56STejun Heo { 1043540eed56STejun Heo struct disk_part_tbl *old_ptbl = disk->part_tbl; 1044540eed56STejun Heo 1045540eed56STejun Heo rcu_assign_pointer(disk->part_tbl, new_ptbl); 1046a6f23657SJens Axboe 1047a6f23657SJens Axboe if (old_ptbl) { 1048a6f23657SJens Axboe rcu_assign_pointer(old_ptbl->last_lookup, NULL); 104957bdfbf9SLai Jiangshan kfree_rcu(old_ptbl, rcu_head); 1050540eed56STejun Heo } 1051a6f23657SJens Axboe } 1052540eed56STejun Heo 1053540eed56STejun Heo /** 1054540eed56STejun Heo * disk_expand_part_tbl - expand disk->part_tbl 1055540eed56STejun Heo * @disk: disk to expand part_tbl for 1056540eed56STejun Heo * @partno: expand such that this partno can fit in 1057540eed56STejun Heo * 1058540eed56STejun Heo * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl 1059540eed56STejun Heo * uses RCU to allow unlocked dereferencing for stats and other stuff. 1060540eed56STejun Heo * 1061540eed56STejun Heo * LOCKING: 1062540eed56STejun Heo * Matching bd_mutex locked, might sleep. 1063540eed56STejun Heo * 1064540eed56STejun Heo * RETURNS: 1065540eed56STejun Heo * 0 on success, -errno on failure. 1066540eed56STejun Heo */ 1067540eed56STejun Heo int disk_expand_part_tbl(struct gendisk *disk, int partno) 1068540eed56STejun Heo { 1069540eed56STejun Heo struct disk_part_tbl *old_ptbl = disk->part_tbl; 1070540eed56STejun Heo struct disk_part_tbl *new_ptbl; 1071540eed56STejun Heo int len = old_ptbl ? old_ptbl->len : 0; 1072540eed56STejun Heo int target = partno + 1; 1073540eed56STejun Heo size_t size; 1074540eed56STejun Heo int i; 1075540eed56STejun Heo 1076540eed56STejun Heo /* disk_max_parts() is zero during initialization, ignore if so */ 1077540eed56STejun Heo if (disk_max_parts(disk) && target > disk_max_parts(disk)) 1078540eed56STejun Heo return -EINVAL; 1079540eed56STejun Heo 1080540eed56STejun Heo if (target <= len) 1081540eed56STejun Heo return 0; 1082540eed56STejun Heo 1083540eed56STejun Heo size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]); 1084540eed56STejun Heo new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id); 1085540eed56STejun Heo if (!new_ptbl) 1086540eed56STejun Heo return -ENOMEM; 1087540eed56STejun Heo 1088540eed56STejun Heo new_ptbl->len = target; 1089540eed56STejun Heo 1090540eed56STejun Heo for (i = 0; i < len; i++) 1091540eed56STejun Heo rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); 1092540eed56STejun Heo 1093540eed56STejun Heo disk_replace_part_tbl(disk, new_ptbl); 1094540eed56STejun Heo return 0; 1095540eed56STejun Heo } 1096540eed56STejun Heo 1097edfaa7c3SKay Sievers static void disk_release(struct device *dev) 10981da177e4SLinus Torvalds { 1099edfaa7c3SKay Sievers struct gendisk *disk = dev_to_disk(dev); 1100edfaa7c3SKay Sievers 110177ea887eSTejun Heo disk_release_events(disk); 11021da177e4SLinus Torvalds kfree(disk->random); 1103540eed56STejun Heo disk_replace_part_tbl(disk, NULL); 1104074a7acaSTejun Heo free_part_stats(&disk->part0); 11056d1d8050SWill Drewry free_part_info(&disk->part0); 1106523e1d39STejun Heo if (disk->queue) 1107523e1d39STejun Heo blk_put_queue(disk->queue); 11081da177e4SLinus Torvalds kfree(disk); 11091da177e4SLinus Torvalds } 1110edfaa7c3SKay Sievers struct class block_class = { 1111edfaa7c3SKay Sievers .name = "block", 11121da177e4SLinus Torvalds }; 11131da177e4SLinus Torvalds 11143c2670e6SKay Sievers static char *block_devnode(struct device *dev, umode_t *mode, 1115*4e4098a3SGreg Kroah-Hartman kuid_t *uid, kgid_t *gid) 1116b03f38b6SKay Sievers { 1117b03f38b6SKay Sievers struct gendisk *disk = dev_to_disk(dev); 1118b03f38b6SKay Sievers 1119e454cea2SKay Sievers if (disk->devnode) 1120e454cea2SKay Sievers return disk->devnode(disk, mode); 1121b03f38b6SKay Sievers return NULL; 1122b03f38b6SKay Sievers } 1123b03f38b6SKay Sievers 11241826eadfSAdrian Bunk static struct device_type disk_type = { 1125edfaa7c3SKay Sievers .name = "disk", 1126edfaa7c3SKay Sievers .groups = disk_attr_groups, 1127edfaa7c3SKay Sievers .release = disk_release, 1128e454cea2SKay Sievers .devnode = block_devnode, 11291da177e4SLinus Torvalds }; 11301da177e4SLinus Torvalds 1131a6e2ba88SRandy Dunlap #ifdef CONFIG_PROC_FS 1132cf771cb5STejun Heo /* 1133cf771cb5STejun Heo * aggregate disk stat collector. Uses the same stats that the sysfs 1134cf771cb5STejun Heo * entries do, above, but makes them available through one seq_file. 1135cf771cb5STejun Heo * 1136cf771cb5STejun Heo * The output looks suspiciously like /proc/partitions with a bunch of 1137cf771cb5STejun Heo * extra fields. 1138cf771cb5STejun Heo */ 1139cf771cb5STejun Heo static int diskstats_show(struct seq_file *seqf, void *v) 11401da177e4SLinus Torvalds { 11411da177e4SLinus Torvalds struct gendisk *gp = v; 1142e71bf0d0STejun Heo struct disk_part_iter piter; 1143e71bf0d0STejun Heo struct hd_struct *hd; 11441da177e4SLinus Torvalds char buf[BDEVNAME_SIZE]; 1145c9959059STejun Heo int cpu; 11461da177e4SLinus Torvalds 11471da177e4SLinus Torvalds /* 1148ed9e1982STejun Heo if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) 1149cf771cb5STejun Heo seq_puts(seqf, "major minor name" 11501da177e4SLinus Torvalds " rio rmerge rsect ruse wio wmerge " 11511da177e4SLinus Torvalds "wsect wuse running use aveq" 11521da177e4SLinus Torvalds "\n\n"); 11531da177e4SLinus Torvalds */ 11541da177e4SLinus Torvalds 115571982a40STejun Heo disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); 1156e71bf0d0STejun Heo while ((hd = disk_part_iter_next(&piter))) { 1157074a7acaSTejun Heo cpu = part_stat_lock(); 1158c9959059STejun Heo part_round_stats(cpu, hd); 1159074a7acaSTejun Heo part_stat_unlock(); 1160f95fe9cfSHerbert Poetzl seq_printf(seqf, "%4d %7d %s %lu %lu %lu " 1161f95fe9cfSHerbert Poetzl "%u %lu %lu %lu %u %u %u %u\n", 1162f331c029STejun Heo MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1163f331c029STejun Heo disk_name(gp, hd->partno, buf), 116453f22956SLiu Yuan part_stat_read(hd, ios[READ]), 116553f22956SLiu Yuan part_stat_read(hd, merges[READ]), 1166f95fe9cfSHerbert Poetzl part_stat_read(hd, sectors[READ]), 116753f22956SLiu Yuan jiffies_to_msecs(part_stat_read(hd, ticks[READ])), 116853f22956SLiu Yuan part_stat_read(hd, ios[WRITE]), 116953f22956SLiu Yuan part_stat_read(hd, merges[WRITE]), 1170f95fe9cfSHerbert Poetzl part_stat_read(hd, sectors[WRITE]), 117153f22956SLiu Yuan jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), 1172316d315bSNikanth Karthikesan part_in_flight(hd), 117328f39d55SJerome Marchand jiffies_to_msecs(part_stat_read(hd, io_ticks)), 117428f39d55SJerome Marchand jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 117528f39d55SJerome Marchand ); 11761da177e4SLinus Torvalds } 1177e71bf0d0STejun Heo disk_part_iter_exit(&piter); 11781da177e4SLinus Torvalds 11791da177e4SLinus Torvalds return 0; 11801da177e4SLinus Torvalds } 11811da177e4SLinus Torvalds 118231d85ab2SAlexey Dobriyan static const struct seq_operations diskstats_op = { 1183def4e38dSTejun Heo .start = disk_seqf_start, 1184def4e38dSTejun Heo .next = disk_seqf_next, 1185def4e38dSTejun Heo .stop = disk_seqf_stop, 11861da177e4SLinus Torvalds .show = diskstats_show 11871da177e4SLinus Torvalds }; 1188f500975aSAlexey Dobriyan 118931d85ab2SAlexey Dobriyan static int diskstats_open(struct inode *inode, struct file *file) 119031d85ab2SAlexey Dobriyan { 119131d85ab2SAlexey Dobriyan return seq_open(file, &diskstats_op); 119231d85ab2SAlexey Dobriyan } 119331d85ab2SAlexey Dobriyan 119431d85ab2SAlexey Dobriyan static const struct file_operations proc_diskstats_operations = { 119531d85ab2SAlexey Dobriyan .open = diskstats_open, 119631d85ab2SAlexey Dobriyan .read = seq_read, 119731d85ab2SAlexey Dobriyan .llseek = seq_lseek, 119831d85ab2SAlexey Dobriyan .release = seq_release, 119931d85ab2SAlexey Dobriyan }; 120031d85ab2SAlexey Dobriyan 1201f500975aSAlexey Dobriyan static int __init proc_genhd_init(void) 1202f500975aSAlexey Dobriyan { 120331d85ab2SAlexey Dobriyan proc_create("diskstats", 0, NULL, &proc_diskstats_operations); 1204f500975aSAlexey Dobriyan proc_create("partitions", 0, NULL, &proc_partitions_operations); 1205f500975aSAlexey Dobriyan return 0; 1206f500975aSAlexey Dobriyan } 1207f500975aSAlexey Dobriyan module_init(proc_genhd_init); 1208a6e2ba88SRandy Dunlap #endif /* CONFIG_PROC_FS */ 12091da177e4SLinus Torvalds 1210cf771cb5STejun Heo dev_t blk_lookup_devt(const char *name, int partno) 1211edfaa7c3SKay Sievers { 1212edfaa7c3SKay Sievers dev_t devt = MKDEV(0, 0); 1213def4e38dSTejun Heo struct class_dev_iter iter; 1214def4e38dSTejun Heo struct device *dev; 1215edfaa7c3SKay Sievers 1216def4e38dSTejun Heo class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 1217def4e38dSTejun Heo while ((dev = class_dev_iter_next(&iter))) { 1218def4e38dSTejun Heo struct gendisk *disk = dev_to_disk(dev); 1219548b10ebSTejun Heo struct hd_struct *part; 1220def4e38dSTejun Heo 12213ada8b7eSKay Sievers if (strcmp(dev_name(dev), name)) 1222f331c029STejun Heo continue; 1223f331c029STejun Heo 122441b8c853SNeil Brown if (partno < disk->minors) { 122541b8c853SNeil Brown /* We need to return the right devno, even 122641b8c853SNeil Brown * if the partition doesn't exist yet. 122741b8c853SNeil Brown */ 122841b8c853SNeil Brown devt = MKDEV(MAJOR(dev->devt), 122941b8c853SNeil Brown MINOR(dev->devt) + partno); 123041b8c853SNeil Brown break; 123141b8c853SNeil Brown } 1232e71bf0d0STejun Heo part = disk_get_part(disk, partno); 12332bbedcb4STejun Heo if (part) { 1234f331c029STejun Heo devt = part_devt(part); 1235e71bf0d0STejun Heo disk_put_part(part); 1236f331c029STejun Heo break; 1237def4e38dSTejun Heo } 1238548b10ebSTejun Heo disk_put_part(part); 1239548b10ebSTejun Heo } 1240def4e38dSTejun Heo class_dev_iter_exit(&iter); 1241edfaa7c3SKay Sievers return devt; 1242edfaa7c3SKay Sievers } 1243edfaa7c3SKay Sievers EXPORT_SYMBOL(blk_lookup_devt); 1244edfaa7c3SKay Sievers 12451da177e4SLinus Torvalds struct gendisk *alloc_disk(int minors) 12461da177e4SLinus Torvalds { 1247c304a51bSEzequiel Garcia return alloc_disk_node(minors, NUMA_NO_NODE); 12481946089aSChristoph Lameter } 1249689d6facSTejun Heo EXPORT_SYMBOL(alloc_disk); 12501946089aSChristoph Lameter 12511946089aSChristoph Lameter struct gendisk *alloc_disk_node(int minors, int node_id) 12521946089aSChristoph Lameter { 12531946089aSChristoph Lameter struct gendisk *disk; 12541946089aSChristoph Lameter 125594f6030cSChristoph Lameter disk = kmalloc_node(sizeof(struct gendisk), 125694f6030cSChristoph Lameter GFP_KERNEL | __GFP_ZERO, node_id); 12571da177e4SLinus Torvalds if (disk) { 1258074a7acaSTejun Heo if (!init_part_stats(&disk->part0)) { 12591da177e4SLinus Torvalds kfree(disk); 12601da177e4SLinus Torvalds return NULL; 12611da177e4SLinus Torvalds } 1262bf91db18SCheng Renquan disk->node_id = node_id; 1263540eed56STejun Heo if (disk_expand_part_tbl(disk, 0)) { 1264074a7acaSTejun Heo free_part_stats(&disk->part0); 12651da177e4SLinus Torvalds kfree(disk); 12661da177e4SLinus Torvalds return NULL; 12671da177e4SLinus Torvalds } 1268540eed56STejun Heo disk->part_tbl->part[0] = &disk->part0; 12696c23a968SJens Axboe 1270c83f6bf9SVivek Goyal /* 1271c83f6bf9SVivek Goyal * set_capacity() and get_capacity() currently don't use 1272c83f6bf9SVivek Goyal * seqcounter to read/update the part0->nr_sects. Still init 1273c83f6bf9SVivek Goyal * the counter as we can read the sectors in IO submission 1274c83f6bf9SVivek Goyal * patch using seqence counters. 1275c83f6bf9SVivek Goyal * 1276c83f6bf9SVivek Goyal * TODO: Ideally set_capacity() and get_capacity() should be 1277c83f6bf9SVivek Goyal * converted to make use of bd_mutex and sequence counters. 1278c83f6bf9SVivek Goyal */ 1279c83f6bf9SVivek Goyal seqcount_init(&disk->part0.nr_sects_seq); 12806c23a968SJens Axboe hd_ref_init(&disk->part0); 1281b5d0b9dfSTejun Heo 12821da177e4SLinus Torvalds disk->minors = minors; 12831da177e4SLinus Torvalds rand_initialize_disk(disk); 1284ed9e1982STejun Heo disk_to_dev(disk)->class = &block_class; 1285ed9e1982STejun Heo disk_to_dev(disk)->type = &disk_type; 1286ed9e1982STejun Heo device_initialize(disk_to_dev(disk)); 12871da177e4SLinus Torvalds } 12881da177e4SLinus Torvalds return disk; 12891da177e4SLinus Torvalds } 12901946089aSChristoph Lameter EXPORT_SYMBOL(alloc_disk_node); 12911da177e4SLinus Torvalds 12921da177e4SLinus Torvalds struct kobject *get_disk(struct gendisk *disk) 12931da177e4SLinus Torvalds { 12941da177e4SLinus Torvalds struct module *owner; 12951da177e4SLinus Torvalds struct kobject *kobj; 12961da177e4SLinus Torvalds 12971da177e4SLinus Torvalds if (!disk->fops) 12981da177e4SLinus Torvalds return NULL; 12991da177e4SLinus Torvalds owner = disk->fops->owner; 13001da177e4SLinus Torvalds if (owner && !try_module_get(owner)) 13011da177e4SLinus Torvalds return NULL; 1302ed9e1982STejun Heo kobj = kobject_get(&disk_to_dev(disk)->kobj); 13031da177e4SLinus Torvalds if (kobj == NULL) { 13041da177e4SLinus Torvalds module_put(owner); 13051da177e4SLinus Torvalds return NULL; 13061da177e4SLinus Torvalds } 13071da177e4SLinus Torvalds return kobj; 13081da177e4SLinus Torvalds 13091da177e4SLinus Torvalds } 13101da177e4SLinus Torvalds 13111da177e4SLinus Torvalds EXPORT_SYMBOL(get_disk); 13121da177e4SLinus Torvalds 13131da177e4SLinus Torvalds void put_disk(struct gendisk *disk) 13141da177e4SLinus Torvalds { 13151da177e4SLinus Torvalds if (disk) 1316ed9e1982STejun Heo kobject_put(&disk_to_dev(disk)->kobj); 13171da177e4SLinus Torvalds } 13181da177e4SLinus Torvalds 13191da177e4SLinus Torvalds EXPORT_SYMBOL(put_disk); 13201da177e4SLinus Torvalds 1321e3264a4dSHannes Reinecke static void set_disk_ro_uevent(struct gendisk *gd, int ro) 1322e3264a4dSHannes Reinecke { 1323e3264a4dSHannes Reinecke char event[] = "DISK_RO=1"; 1324e3264a4dSHannes Reinecke char *envp[] = { event, NULL }; 1325e3264a4dSHannes Reinecke 1326e3264a4dSHannes Reinecke if (!ro) 1327e3264a4dSHannes Reinecke event[8] = '0'; 1328e3264a4dSHannes Reinecke kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1329e3264a4dSHannes Reinecke } 1330e3264a4dSHannes Reinecke 13311da177e4SLinus Torvalds void set_device_ro(struct block_device *bdev, int flag) 13321da177e4SLinus Torvalds { 13331da177e4SLinus Torvalds bdev->bd_part->policy = flag; 13341da177e4SLinus Torvalds } 13351da177e4SLinus Torvalds 13361da177e4SLinus Torvalds EXPORT_SYMBOL(set_device_ro); 13371da177e4SLinus Torvalds 13381da177e4SLinus Torvalds void set_disk_ro(struct gendisk *disk, int flag) 13391da177e4SLinus Torvalds { 1340e71bf0d0STejun Heo struct disk_part_iter piter; 1341e71bf0d0STejun Heo struct hd_struct *part; 1342e71bf0d0STejun Heo 1343e3264a4dSHannes Reinecke if (disk->part0.policy != flag) { 1344e3264a4dSHannes Reinecke set_disk_ro_uevent(disk, flag); 1345e3264a4dSHannes Reinecke disk->part0.policy = flag; 1346e3264a4dSHannes Reinecke } 1347e3264a4dSHannes Reinecke 1348e3264a4dSHannes Reinecke disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 1349e71bf0d0STejun Heo while ((part = disk_part_iter_next(&piter))) 1350e71bf0d0STejun Heo part->policy = flag; 1351e71bf0d0STejun Heo disk_part_iter_exit(&piter); 13521da177e4SLinus Torvalds } 13531da177e4SLinus Torvalds 13541da177e4SLinus Torvalds EXPORT_SYMBOL(set_disk_ro); 13551da177e4SLinus Torvalds 13561da177e4SLinus Torvalds int bdev_read_only(struct block_device *bdev) 13571da177e4SLinus Torvalds { 13581da177e4SLinus Torvalds if (!bdev) 13591da177e4SLinus Torvalds return 0; 13601da177e4SLinus Torvalds return bdev->bd_part->policy; 13611da177e4SLinus Torvalds } 13621da177e4SLinus Torvalds 13631da177e4SLinus Torvalds EXPORT_SYMBOL(bdev_read_only); 13641da177e4SLinus Torvalds 1365cf771cb5STejun Heo int invalidate_partition(struct gendisk *disk, int partno) 13661da177e4SLinus Torvalds { 13671da177e4SLinus Torvalds int res = 0; 1368cf771cb5STejun Heo struct block_device *bdev = bdget_disk(disk, partno); 13691da177e4SLinus Torvalds if (bdev) { 13702ef41634SChristoph Hellwig fsync_bdev(bdev); 137193b270f7SNeilBrown res = __invalidate_device(bdev, true); 13721da177e4SLinus Torvalds bdput(bdev); 13731da177e4SLinus Torvalds } 13741da177e4SLinus Torvalds return res; 13751da177e4SLinus Torvalds } 13761da177e4SLinus Torvalds 13771da177e4SLinus Torvalds EXPORT_SYMBOL(invalidate_partition); 137877ea887eSTejun Heo 137977ea887eSTejun Heo /* 138077ea887eSTejun Heo * Disk events - monitor disk events like media change and eject request. 138177ea887eSTejun Heo */ 138277ea887eSTejun Heo struct disk_events { 138377ea887eSTejun Heo struct list_head node; /* all disk_event's */ 138477ea887eSTejun Heo struct gendisk *disk; /* the associated disk */ 138577ea887eSTejun Heo spinlock_t lock; 138677ea887eSTejun Heo 1387fdd514e1STejun Heo struct mutex block_mutex; /* protects blocking */ 138877ea887eSTejun Heo int block; /* event blocking depth */ 138977ea887eSTejun Heo unsigned int pending; /* events already sent out */ 139077ea887eSTejun Heo unsigned int clearing; /* events being cleared */ 139177ea887eSTejun Heo 139277ea887eSTejun Heo long poll_msecs; /* interval, -1 for default */ 139377ea887eSTejun Heo struct delayed_work dwork; 139477ea887eSTejun Heo }; 139577ea887eSTejun Heo 139677ea887eSTejun Heo static const char *disk_events_strs[] = { 139777ea887eSTejun Heo [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change", 139877ea887eSTejun Heo [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request", 139977ea887eSTejun Heo }; 140077ea887eSTejun Heo 140177ea887eSTejun Heo static char *disk_uevents[] = { 140277ea887eSTejun Heo [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1", 140377ea887eSTejun Heo [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1", 140477ea887eSTejun Heo }; 140577ea887eSTejun Heo 140677ea887eSTejun Heo /* list of all disk_events */ 140777ea887eSTejun Heo static DEFINE_MUTEX(disk_events_mutex); 140877ea887eSTejun Heo static LIST_HEAD(disk_events); 140977ea887eSTejun Heo 141077ea887eSTejun Heo /* disable in-kernel polling by default */ 141177ea887eSTejun Heo static unsigned long disk_events_dfl_poll_msecs = 0; 141277ea887eSTejun Heo 141377ea887eSTejun Heo static unsigned long disk_events_poll_jiffies(struct gendisk *disk) 141477ea887eSTejun Heo { 141577ea887eSTejun Heo struct disk_events *ev = disk->ev; 141677ea887eSTejun Heo long intv_msecs = 0; 141777ea887eSTejun Heo 141877ea887eSTejun Heo /* 141977ea887eSTejun Heo * If device-specific poll interval is set, always use it. If 142077ea887eSTejun Heo * the default is being used, poll iff there are events which 142177ea887eSTejun Heo * can't be monitored asynchronously. 142277ea887eSTejun Heo */ 142377ea887eSTejun Heo if (ev->poll_msecs >= 0) 142477ea887eSTejun Heo intv_msecs = ev->poll_msecs; 142577ea887eSTejun Heo else if (disk->events & ~disk->async_events) 142677ea887eSTejun Heo intv_msecs = disk_events_dfl_poll_msecs; 142777ea887eSTejun Heo 142877ea887eSTejun Heo return msecs_to_jiffies(intv_msecs); 142977ea887eSTejun Heo } 143077ea887eSTejun Heo 1431c3af54afSTejun Heo /** 1432c3af54afSTejun Heo * disk_block_events - block and flush disk event checking 1433c3af54afSTejun Heo * @disk: disk to block events for 1434c3af54afSTejun Heo * 1435c3af54afSTejun Heo * On return from this function, it is guaranteed that event checking 1436c3af54afSTejun Heo * isn't in progress and won't happen until unblocked by 1437c3af54afSTejun Heo * disk_unblock_events(). Events blocking is counted and the actual 1438c3af54afSTejun Heo * unblocking happens after the matching number of unblocks are done. 1439c3af54afSTejun Heo * 1440c3af54afSTejun Heo * Note that this intentionally does not block event checking from 1441c3af54afSTejun Heo * disk_clear_events(). 1442c3af54afSTejun Heo * 1443c3af54afSTejun Heo * CONTEXT: 1444c3af54afSTejun Heo * Might sleep. 1445c3af54afSTejun Heo */ 1446c3af54afSTejun Heo void disk_block_events(struct gendisk *disk) 144777ea887eSTejun Heo { 144877ea887eSTejun Heo struct disk_events *ev = disk->ev; 144977ea887eSTejun Heo unsigned long flags; 145077ea887eSTejun Heo bool cancel; 145177ea887eSTejun Heo 1452c3af54afSTejun Heo if (!ev) 1453c3af54afSTejun Heo return; 1454c3af54afSTejun Heo 1455fdd514e1STejun Heo /* 1456fdd514e1STejun Heo * Outer mutex ensures that the first blocker completes canceling 1457fdd514e1STejun Heo * the event work before further blockers are allowed to finish. 1458fdd514e1STejun Heo */ 1459fdd514e1STejun Heo mutex_lock(&ev->block_mutex); 1460fdd514e1STejun Heo 146177ea887eSTejun Heo spin_lock_irqsave(&ev->lock, flags); 146277ea887eSTejun Heo cancel = !ev->block++; 146377ea887eSTejun Heo spin_unlock_irqrestore(&ev->lock, flags); 146477ea887eSTejun Heo 1465c3af54afSTejun Heo if (cancel) 146677ea887eSTejun Heo cancel_delayed_work_sync(&disk->ev->dwork); 1467fdd514e1STejun Heo 1468fdd514e1STejun Heo mutex_unlock(&ev->block_mutex); 146977ea887eSTejun Heo } 147077ea887eSTejun Heo 147177ea887eSTejun Heo static void __disk_unblock_events(struct gendisk *disk, bool check_now) 147277ea887eSTejun Heo { 147377ea887eSTejun Heo struct disk_events *ev = disk->ev; 147477ea887eSTejun Heo unsigned long intv; 147577ea887eSTejun Heo unsigned long flags; 147677ea887eSTejun Heo 147777ea887eSTejun Heo spin_lock_irqsave(&ev->lock, flags); 147877ea887eSTejun Heo 147977ea887eSTejun Heo if (WARN_ON_ONCE(ev->block <= 0)) 148077ea887eSTejun Heo goto out_unlock; 148177ea887eSTejun Heo 148277ea887eSTejun Heo if (--ev->block) 148377ea887eSTejun Heo goto out_unlock; 148477ea887eSTejun Heo 148577ea887eSTejun Heo /* 148677ea887eSTejun Heo * Not exactly a latency critical operation, set poll timer 148777ea887eSTejun Heo * slack to 25% and kick event check. 148877ea887eSTejun Heo */ 148977ea887eSTejun Heo intv = disk_events_poll_jiffies(disk); 149077ea887eSTejun Heo set_timer_slack(&ev->dwork.timer, intv / 4); 149177ea887eSTejun Heo if (check_now) 14923b07e9caSTejun Heo queue_delayed_work(system_freezable_wq, &ev->dwork, 0); 149377ea887eSTejun Heo else if (intv) 14943b07e9caSTejun Heo queue_delayed_work(system_freezable_wq, &ev->dwork, intv); 149577ea887eSTejun Heo out_unlock: 149677ea887eSTejun Heo spin_unlock_irqrestore(&ev->lock, flags); 149777ea887eSTejun Heo } 149877ea887eSTejun Heo 149977ea887eSTejun Heo /** 150077ea887eSTejun Heo * disk_unblock_events - unblock disk event checking 150177ea887eSTejun Heo * @disk: disk to unblock events for 150277ea887eSTejun Heo * 150377ea887eSTejun Heo * Undo disk_block_events(). When the block count reaches zero, it 150477ea887eSTejun Heo * starts events polling if configured. 150577ea887eSTejun Heo * 150677ea887eSTejun Heo * CONTEXT: 150777ea887eSTejun Heo * Don't care. Safe to call from irq context. 150877ea887eSTejun Heo */ 150977ea887eSTejun Heo void disk_unblock_events(struct gendisk *disk) 151077ea887eSTejun Heo { 151177ea887eSTejun Heo if (disk->ev) 1512facc31ddSTejun Heo __disk_unblock_events(disk, false); 151377ea887eSTejun Heo } 151477ea887eSTejun Heo 151577ea887eSTejun Heo /** 151685ef06d1STejun Heo * disk_flush_events - schedule immediate event checking and flushing 151785ef06d1STejun Heo * @disk: disk to check and flush events for 151885ef06d1STejun Heo * @mask: events to flush 151977ea887eSTejun Heo * 152085ef06d1STejun Heo * Schedule immediate event checking on @disk if not blocked. Events in 152185ef06d1STejun Heo * @mask are scheduled to be cleared from the driver. Note that this 152285ef06d1STejun Heo * doesn't clear the events from @disk->ev. 152377ea887eSTejun Heo * 152477ea887eSTejun Heo * CONTEXT: 152585ef06d1STejun Heo * If @mask is non-zero must be called with bdev->bd_mutex held. 152677ea887eSTejun Heo */ 152785ef06d1STejun Heo void disk_flush_events(struct gendisk *disk, unsigned int mask) 152877ea887eSTejun Heo { 1529a9dce2a3STejun Heo struct disk_events *ev = disk->ev; 1530a9dce2a3STejun Heo 1531a9dce2a3STejun Heo if (!ev) 1532a9dce2a3STejun Heo return; 1533a9dce2a3STejun Heo 153485ef06d1STejun Heo spin_lock_irq(&ev->lock); 153585ef06d1STejun Heo ev->clearing |= mask; 153641f63c53STejun Heo if (!ev->block) 15373b07e9caSTejun Heo mod_delayed_work(system_freezable_wq, &ev->dwork, 0); 153885ef06d1STejun Heo spin_unlock_irq(&ev->lock); 153977ea887eSTejun Heo } 154077ea887eSTejun Heo 154177ea887eSTejun Heo /** 154277ea887eSTejun Heo * disk_clear_events - synchronously check, clear and return pending events 154377ea887eSTejun Heo * @disk: disk to fetch and clear events from 154477ea887eSTejun Heo * @mask: mask of events to be fetched and clearted 154577ea887eSTejun Heo * 154677ea887eSTejun Heo * Disk events are synchronously checked and pending events in @mask 154777ea887eSTejun Heo * are cleared and returned. This ignores the block count. 154877ea887eSTejun Heo * 154977ea887eSTejun Heo * CONTEXT: 155077ea887eSTejun Heo * Might sleep. 155177ea887eSTejun Heo */ 155277ea887eSTejun Heo unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) 155377ea887eSTejun Heo { 155477ea887eSTejun Heo const struct block_device_operations *bdops = disk->fops; 155577ea887eSTejun Heo struct disk_events *ev = disk->ev; 155677ea887eSTejun Heo unsigned int pending; 155712c2bdb2SDerek Basehore unsigned int clearing = mask; 155877ea887eSTejun Heo 155977ea887eSTejun Heo if (!ev) { 156077ea887eSTejun Heo /* for drivers still using the old ->media_changed method */ 156177ea887eSTejun Heo if ((mask & DISK_EVENT_MEDIA_CHANGE) && 156277ea887eSTejun Heo bdops->media_changed && bdops->media_changed(disk)) 156377ea887eSTejun Heo return DISK_EVENT_MEDIA_CHANGE; 156477ea887eSTejun Heo return 0; 156577ea887eSTejun Heo } 156677ea887eSTejun Heo 156712c2bdb2SDerek Basehore disk_block_events(disk); 156812c2bdb2SDerek Basehore 156912c2bdb2SDerek Basehore /* 157012c2bdb2SDerek Basehore * store the union of mask and ev->clearing on the stack so that the 157112c2bdb2SDerek Basehore * race with disk_flush_events does not cause ambiguity (ev->clearing 157212c2bdb2SDerek Basehore * can still be modified even if events are blocked). 157312c2bdb2SDerek Basehore */ 157477ea887eSTejun Heo spin_lock_irq(&ev->lock); 157512c2bdb2SDerek Basehore clearing |= ev->clearing; 157612c2bdb2SDerek Basehore ev->clearing = 0; 157777ea887eSTejun Heo spin_unlock_irq(&ev->lock); 157877ea887eSTejun Heo 157912c2bdb2SDerek Basehore disk_check_events(ev, &clearing); 1580aea24a8bSDerek Basehore /* 158112c2bdb2SDerek Basehore * if ev->clearing is not 0, the disk_flush_events got called in the 158212c2bdb2SDerek Basehore * middle of this function, so we want to run the workfn without delay. 1583aea24a8bSDerek Basehore */ 158412c2bdb2SDerek Basehore __disk_unblock_events(disk, ev->clearing ? true : false); 158577ea887eSTejun Heo 158677ea887eSTejun Heo /* then, fetch and clear pending events */ 158777ea887eSTejun Heo spin_lock_irq(&ev->lock); 158877ea887eSTejun Heo pending = ev->pending & mask; 158977ea887eSTejun Heo ev->pending &= ~mask; 159077ea887eSTejun Heo spin_unlock_irq(&ev->lock); 159112c2bdb2SDerek Basehore WARN_ON_ONCE(clearing & mask); 159277ea887eSTejun Heo 159377ea887eSTejun Heo return pending; 159477ea887eSTejun Heo } 159577ea887eSTejun Heo 159612c2bdb2SDerek Basehore /* 159712c2bdb2SDerek Basehore * Separate this part out so that a different pointer for clearing_ptr can be 159812c2bdb2SDerek Basehore * passed in for disk_clear_events. 159912c2bdb2SDerek Basehore */ 160077ea887eSTejun Heo static void disk_events_workfn(struct work_struct *work) 160177ea887eSTejun Heo { 160277ea887eSTejun Heo struct delayed_work *dwork = to_delayed_work(work); 160377ea887eSTejun Heo struct disk_events *ev = container_of(dwork, struct disk_events, dwork); 160412c2bdb2SDerek Basehore 160512c2bdb2SDerek Basehore disk_check_events(ev, &ev->clearing); 160612c2bdb2SDerek Basehore } 160712c2bdb2SDerek Basehore 160812c2bdb2SDerek Basehore static void disk_check_events(struct disk_events *ev, 160912c2bdb2SDerek Basehore unsigned int *clearing_ptr) 161012c2bdb2SDerek Basehore { 161177ea887eSTejun Heo struct gendisk *disk = ev->disk; 161277ea887eSTejun Heo char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; 161312c2bdb2SDerek Basehore unsigned int clearing = *clearing_ptr; 161477ea887eSTejun Heo unsigned int events; 161577ea887eSTejun Heo unsigned long intv; 161677ea887eSTejun Heo int nr_events = 0, i; 161777ea887eSTejun Heo 161877ea887eSTejun Heo /* check events */ 161977ea887eSTejun Heo events = disk->fops->check_events(disk, clearing); 162077ea887eSTejun Heo 162177ea887eSTejun Heo /* accumulate pending events and schedule next poll if necessary */ 162277ea887eSTejun Heo spin_lock_irq(&ev->lock); 162377ea887eSTejun Heo 162477ea887eSTejun Heo events &= ~ev->pending; 162577ea887eSTejun Heo ev->pending |= events; 162612c2bdb2SDerek Basehore *clearing_ptr &= ~clearing; 162777ea887eSTejun Heo 162877ea887eSTejun Heo intv = disk_events_poll_jiffies(disk); 162977ea887eSTejun Heo if (!ev->block && intv) 16303b07e9caSTejun Heo queue_delayed_work(system_freezable_wq, &ev->dwork, intv); 163177ea887eSTejun Heo 163277ea887eSTejun Heo spin_unlock_irq(&ev->lock); 163377ea887eSTejun Heo 16347c88a168STejun Heo /* 16357c88a168STejun Heo * Tell userland about new events. Only the events listed in 16367c88a168STejun Heo * @disk->events are reported. Unlisted events are processed the 16377c88a168STejun Heo * same internally but never get reported to userland. 16387c88a168STejun Heo */ 163977ea887eSTejun Heo for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) 16407c88a168STejun Heo if (events & disk->events & (1 << i)) 164177ea887eSTejun Heo envp[nr_events++] = disk_uevents[i]; 164277ea887eSTejun Heo 164377ea887eSTejun Heo if (nr_events) 164477ea887eSTejun Heo kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); 164577ea887eSTejun Heo } 164677ea887eSTejun Heo 164777ea887eSTejun Heo /* 164877ea887eSTejun Heo * A disk events enabled device has the following sysfs nodes under 164977ea887eSTejun Heo * its /sys/block/X/ directory. 165077ea887eSTejun Heo * 165177ea887eSTejun Heo * events : list of all supported events 165277ea887eSTejun Heo * events_async : list of events which can be detected w/o polling 165377ea887eSTejun Heo * events_poll_msecs : polling interval, 0: disable, -1: system default 165477ea887eSTejun Heo */ 165577ea887eSTejun Heo static ssize_t __disk_events_show(unsigned int events, char *buf) 165677ea887eSTejun Heo { 165777ea887eSTejun Heo const char *delim = ""; 165877ea887eSTejun Heo ssize_t pos = 0; 165977ea887eSTejun Heo int i; 166077ea887eSTejun Heo 166177ea887eSTejun Heo for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) 166277ea887eSTejun Heo if (events & (1 << i)) { 166377ea887eSTejun Heo pos += sprintf(buf + pos, "%s%s", 166477ea887eSTejun Heo delim, disk_events_strs[i]); 166577ea887eSTejun Heo delim = " "; 166677ea887eSTejun Heo } 166777ea887eSTejun Heo if (pos) 166877ea887eSTejun Heo pos += sprintf(buf + pos, "\n"); 166977ea887eSTejun Heo return pos; 167077ea887eSTejun Heo } 167177ea887eSTejun Heo 167277ea887eSTejun Heo static ssize_t disk_events_show(struct device *dev, 167377ea887eSTejun Heo struct device_attribute *attr, char *buf) 167477ea887eSTejun Heo { 167577ea887eSTejun Heo struct gendisk *disk = dev_to_disk(dev); 167677ea887eSTejun Heo 167777ea887eSTejun Heo return __disk_events_show(disk->events, buf); 167877ea887eSTejun Heo } 167977ea887eSTejun Heo 168077ea887eSTejun Heo static ssize_t disk_events_async_show(struct device *dev, 168177ea887eSTejun Heo struct device_attribute *attr, char *buf) 168277ea887eSTejun Heo { 168377ea887eSTejun Heo struct gendisk *disk = dev_to_disk(dev); 168477ea887eSTejun Heo 168577ea887eSTejun Heo return __disk_events_show(disk->async_events, buf); 168677ea887eSTejun Heo } 168777ea887eSTejun Heo 168877ea887eSTejun Heo static ssize_t disk_events_poll_msecs_show(struct device *dev, 168977ea887eSTejun Heo struct device_attribute *attr, 169077ea887eSTejun Heo char *buf) 169177ea887eSTejun Heo { 169277ea887eSTejun Heo struct gendisk *disk = dev_to_disk(dev); 169377ea887eSTejun Heo 169477ea887eSTejun Heo return sprintf(buf, "%ld\n", disk->ev->poll_msecs); 169577ea887eSTejun Heo } 169677ea887eSTejun Heo 169777ea887eSTejun Heo static ssize_t disk_events_poll_msecs_store(struct device *dev, 169877ea887eSTejun Heo struct device_attribute *attr, 169977ea887eSTejun Heo const char *buf, size_t count) 170077ea887eSTejun Heo { 170177ea887eSTejun Heo struct gendisk *disk = dev_to_disk(dev); 170277ea887eSTejun Heo long intv; 170377ea887eSTejun Heo 170477ea887eSTejun Heo if (!count || !sscanf(buf, "%ld", &intv)) 170577ea887eSTejun Heo return -EINVAL; 170677ea887eSTejun Heo 170777ea887eSTejun Heo if (intv < 0 && intv != -1) 170877ea887eSTejun Heo return -EINVAL; 170977ea887eSTejun Heo 1710c3af54afSTejun Heo disk_block_events(disk); 171177ea887eSTejun Heo disk->ev->poll_msecs = intv; 171277ea887eSTejun Heo __disk_unblock_events(disk, true); 171377ea887eSTejun Heo 171477ea887eSTejun Heo return count; 171577ea887eSTejun Heo } 171677ea887eSTejun Heo 171777ea887eSTejun Heo static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL); 171877ea887eSTejun Heo static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL); 171977ea887eSTejun Heo static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR, 172077ea887eSTejun Heo disk_events_poll_msecs_show, 172177ea887eSTejun Heo disk_events_poll_msecs_store); 172277ea887eSTejun Heo 172377ea887eSTejun Heo static const struct attribute *disk_events_attrs[] = { 172477ea887eSTejun Heo &dev_attr_events.attr, 172577ea887eSTejun Heo &dev_attr_events_async.attr, 172677ea887eSTejun Heo &dev_attr_events_poll_msecs.attr, 172777ea887eSTejun Heo NULL, 172877ea887eSTejun Heo }; 172977ea887eSTejun Heo 173077ea887eSTejun Heo /* 173177ea887eSTejun Heo * The default polling interval can be specified by the kernel 173277ea887eSTejun Heo * parameter block.events_dfl_poll_msecs which defaults to 0 173377ea887eSTejun Heo * (disable). This can also be modified runtime by writing to 173477ea887eSTejun Heo * /sys/module/block/events_dfl_poll_msecs. 173577ea887eSTejun Heo */ 173677ea887eSTejun Heo static int disk_events_set_dfl_poll_msecs(const char *val, 173777ea887eSTejun Heo const struct kernel_param *kp) 173877ea887eSTejun Heo { 173977ea887eSTejun Heo struct disk_events *ev; 174077ea887eSTejun Heo int ret; 174177ea887eSTejun Heo 174277ea887eSTejun Heo ret = param_set_ulong(val, kp); 174377ea887eSTejun Heo if (ret < 0) 174477ea887eSTejun Heo return ret; 174577ea887eSTejun Heo 174677ea887eSTejun Heo mutex_lock(&disk_events_mutex); 174777ea887eSTejun Heo 174877ea887eSTejun Heo list_for_each_entry(ev, &disk_events, node) 174985ef06d1STejun Heo disk_flush_events(ev->disk, 0); 175077ea887eSTejun Heo 175177ea887eSTejun Heo mutex_unlock(&disk_events_mutex); 175277ea887eSTejun Heo 175377ea887eSTejun Heo return 0; 175477ea887eSTejun Heo } 175577ea887eSTejun Heo 175677ea887eSTejun Heo static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { 175777ea887eSTejun Heo .set = disk_events_set_dfl_poll_msecs, 175877ea887eSTejun Heo .get = param_get_ulong, 175977ea887eSTejun Heo }; 176077ea887eSTejun Heo 176177ea887eSTejun Heo #undef MODULE_PARAM_PREFIX 176277ea887eSTejun Heo #define MODULE_PARAM_PREFIX "block." 176377ea887eSTejun Heo 176477ea887eSTejun Heo module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, 176577ea887eSTejun Heo &disk_events_dfl_poll_msecs, 0644); 176677ea887eSTejun Heo 176777ea887eSTejun Heo /* 17689f53d2feSStanislaw Gruszka * disk_{alloc|add|del|release}_events - initialize and destroy disk_events. 176977ea887eSTejun Heo */ 17709f53d2feSStanislaw Gruszka static void disk_alloc_events(struct gendisk *disk) 177177ea887eSTejun Heo { 177277ea887eSTejun Heo struct disk_events *ev; 177377ea887eSTejun Heo 177475e3f3eeSTejun Heo if (!disk->fops->check_events) 177577ea887eSTejun Heo return; 177677ea887eSTejun Heo 177777ea887eSTejun Heo ev = kzalloc(sizeof(*ev), GFP_KERNEL); 177877ea887eSTejun Heo if (!ev) { 177977ea887eSTejun Heo pr_warn("%s: failed to initialize events\n", disk->disk_name); 178077ea887eSTejun Heo return; 178177ea887eSTejun Heo } 178277ea887eSTejun Heo 178377ea887eSTejun Heo INIT_LIST_HEAD(&ev->node); 178477ea887eSTejun Heo ev->disk = disk; 178577ea887eSTejun Heo spin_lock_init(&ev->lock); 1786fdd514e1STejun Heo mutex_init(&ev->block_mutex); 178777ea887eSTejun Heo ev->block = 1; 178877ea887eSTejun Heo ev->poll_msecs = -1; 178977ea887eSTejun Heo INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); 179077ea887eSTejun Heo 17919f53d2feSStanislaw Gruszka disk->ev = ev; 17929f53d2feSStanislaw Gruszka } 17939f53d2feSStanislaw Gruszka 17949f53d2feSStanislaw Gruszka static void disk_add_events(struct gendisk *disk) 17959f53d2feSStanislaw Gruszka { 17969f53d2feSStanislaw Gruszka if (!disk->ev) 17979f53d2feSStanislaw Gruszka return; 17989f53d2feSStanislaw Gruszka 17999f53d2feSStanislaw Gruszka /* FIXME: error handling */ 18009f53d2feSStanislaw Gruszka if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0) 18019f53d2feSStanislaw Gruszka pr_warn("%s: failed to create sysfs files for events\n", 18029f53d2feSStanislaw Gruszka disk->disk_name); 18039f53d2feSStanislaw Gruszka 180477ea887eSTejun Heo mutex_lock(&disk_events_mutex); 18059f53d2feSStanislaw Gruszka list_add_tail(&disk->ev->node, &disk_events); 180677ea887eSTejun Heo mutex_unlock(&disk_events_mutex); 180777ea887eSTejun Heo 180877ea887eSTejun Heo /* 180977ea887eSTejun Heo * Block count is initialized to 1 and the following initial 181077ea887eSTejun Heo * unblock kicks it into action. 181177ea887eSTejun Heo */ 181277ea887eSTejun Heo __disk_unblock_events(disk, true); 181377ea887eSTejun Heo } 181477ea887eSTejun Heo 181577ea887eSTejun Heo static void disk_del_events(struct gendisk *disk) 181677ea887eSTejun Heo { 181777ea887eSTejun Heo if (!disk->ev) 181877ea887eSTejun Heo return; 181977ea887eSTejun Heo 1820c3af54afSTejun Heo disk_block_events(disk); 182177ea887eSTejun Heo 182277ea887eSTejun Heo mutex_lock(&disk_events_mutex); 182377ea887eSTejun Heo list_del_init(&disk->ev->node); 182477ea887eSTejun Heo mutex_unlock(&disk_events_mutex); 182577ea887eSTejun Heo 182677ea887eSTejun Heo sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); 182777ea887eSTejun Heo } 182877ea887eSTejun Heo 182977ea887eSTejun Heo static void disk_release_events(struct gendisk *disk) 183077ea887eSTejun Heo { 183177ea887eSTejun Heo /* the block count should be 1 from disk_del_events() */ 183277ea887eSTejun Heo WARN_ON_ONCE(disk->ev && disk->ev->block != 1); 183377ea887eSTejun Heo kfree(disk->ev); 183477ea887eSTejun Heo } 1835