11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Block device elevator/IO-scheduler. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * 30042000 Jens Axboe <axboe@suse.de> : 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Split the elevator a bit so that it is possible to choose a different 91da177e4SLinus Torvalds * one or even write a new "plug in". There are three pieces: 101da177e4SLinus Torvalds * - elevator_fn, inserts a new request in the queue list 111da177e4SLinus Torvalds * - elevator_merge_fn, decides whether a new buffer can be merged with 121da177e4SLinus Torvalds * an existing request 131da177e4SLinus Torvalds * - elevator_dequeue_fn, called when a request is taken off the active list 141da177e4SLinus Torvalds * 151da177e4SLinus Torvalds * 20082000 Dave Jones <davej@suse.de> : 161da177e4SLinus Torvalds * Removed tests for max-bomb-segments, which was breaking elvtune 171da177e4SLinus Torvalds * when run without -bN 181da177e4SLinus Torvalds * 191da177e4SLinus Torvalds * Jens: 201da177e4SLinus Torvalds * - Rework again to work with bio instead of buffer_heads 211da177e4SLinus Torvalds * - loose bi_dev comparisons, partition handling is right now 221da177e4SLinus Torvalds * - completely modularize elevator setup and teardown 231da177e4SLinus Torvalds * 241da177e4SLinus Torvalds */ 251da177e4SLinus Torvalds #include <linux/kernel.h> 261da177e4SLinus Torvalds #include <linux/fs.h> 271da177e4SLinus Torvalds #include <linux/blkdev.h> 281da177e4SLinus Torvalds #include <linux/elevator.h> 291da177e4SLinus Torvalds #include <linux/bio.h> 301da177e4SLinus Torvalds #include <linux/module.h> 311da177e4SLinus Torvalds #include <linux/slab.h> 321da177e4SLinus Torvalds #include <linux/init.h> 331da177e4SLinus Torvalds #include <linux/compiler.h> 34cb98fc8bSTejun Heo #include <linux/delay.h> 352056a782SJens Axboe #include <linux/blktrace_api.h> 369817064bSJens Axboe #include <linux/hash.h> 371da177e4SLinus Torvalds 381da177e4SLinus Torvalds #include <asm/uaccess.h> 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds static DEFINE_SPINLOCK(elv_list_lock); 411da177e4SLinus Torvalds static LIST_HEAD(elv_list); 421da177e4SLinus Torvalds 431da177e4SLinus Torvalds /* 449817064bSJens Axboe * Merge hash stuff. 459817064bSJens Axboe */ 469817064bSJens Axboe static const int elv_hash_shift = 6; 479817064bSJens Axboe #define ELV_HASH_BLOCK(sec) ((sec) >> 3) 489817064bSJens Axboe #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) 499817064bSJens Axboe #define ELV_HASH_ENTRIES (1 << elv_hash_shift) 509817064bSJens Axboe #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 519817064bSJens Axboe #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 529817064bSJens Axboe 539817064bSJens Axboe /* 541da177e4SLinus Torvalds * can we safely merge with this request? 551da177e4SLinus Torvalds */ 561da177e4SLinus Torvalds inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 571da177e4SLinus Torvalds { 581da177e4SLinus Torvalds if (!rq_mergeable(rq)) 591da177e4SLinus Torvalds return 0; 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds /* 621da177e4SLinus Torvalds * different data direction or already started, don't merge 631da177e4SLinus Torvalds */ 641da177e4SLinus Torvalds if (bio_data_dir(bio) != rq_data_dir(rq)) 651da177e4SLinus Torvalds return 0; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds /* 681da177e4SLinus Torvalds * same device and no special stuff set, merge is ok 691da177e4SLinus Torvalds */ 701da177e4SLinus Torvalds if (rq->rq_disk == bio->bi_bdev->bd_disk && 711da177e4SLinus Torvalds !rq->waiting && !rq->special) 721da177e4SLinus Torvalds return 1; 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds return 0; 751da177e4SLinus Torvalds } 761da177e4SLinus Torvalds EXPORT_SYMBOL(elv_rq_merge_ok); 771da177e4SLinus Torvalds 78769db45bSCoywolf Qi Hunt static inline int elv_try_merge(struct request *__rq, struct bio *bio) 791da177e4SLinus Torvalds { 801da177e4SLinus Torvalds int ret = ELEVATOR_NO_MERGE; 811da177e4SLinus Torvalds 821da177e4SLinus Torvalds /* 831da177e4SLinus Torvalds * we can merge and sequence is ok, check if it's possible 841da177e4SLinus Torvalds */ 851da177e4SLinus Torvalds if (elv_rq_merge_ok(__rq, bio)) { 861da177e4SLinus Torvalds if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 871da177e4SLinus Torvalds ret = ELEVATOR_BACK_MERGE; 881da177e4SLinus Torvalds else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 891da177e4SLinus Torvalds ret = ELEVATOR_FRONT_MERGE; 901da177e4SLinus Torvalds } 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds return ret; 931da177e4SLinus Torvalds } 941da177e4SLinus Torvalds 951da177e4SLinus Torvalds static struct elevator_type *elevator_find(const char *name) 961da177e4SLinus Torvalds { 971da177e4SLinus Torvalds struct elevator_type *e = NULL; 981da177e4SLinus Torvalds struct list_head *entry; 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds list_for_each(entry, &elv_list) { 1011da177e4SLinus Torvalds struct elevator_type *__e; 1021da177e4SLinus Torvalds 1031da177e4SLinus Torvalds __e = list_entry(entry, struct elevator_type, list); 1041da177e4SLinus Torvalds 1051da177e4SLinus Torvalds if (!strcmp(__e->elevator_name, name)) { 1061da177e4SLinus Torvalds e = __e; 1071da177e4SLinus Torvalds break; 1081da177e4SLinus Torvalds } 1091da177e4SLinus Torvalds } 1101da177e4SLinus Torvalds 1111da177e4SLinus Torvalds return e; 1121da177e4SLinus Torvalds } 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds static void elevator_put(struct elevator_type *e) 1151da177e4SLinus Torvalds { 1161da177e4SLinus Torvalds module_put(e->elevator_owner); 1171da177e4SLinus Torvalds } 1181da177e4SLinus Torvalds 1191da177e4SLinus Torvalds static struct elevator_type *elevator_get(const char *name) 1201da177e4SLinus Torvalds { 1212824bc93STejun Heo struct elevator_type *e; 1221da177e4SLinus Torvalds 1232824bc93STejun Heo spin_lock_irq(&elv_list_lock); 1242824bc93STejun Heo 1252824bc93STejun Heo e = elevator_find(name); 1262824bc93STejun Heo if (e && !try_module_get(e->elevator_owner)) 1272824bc93STejun Heo e = NULL; 1282824bc93STejun Heo 1292824bc93STejun Heo spin_unlock_irq(&elv_list_lock); 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds return e; 1321da177e4SLinus Torvalds } 1331da177e4SLinus Torvalds 134bc1c1169SJens Axboe static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) 1351da177e4SLinus Torvalds { 136bc1c1169SJens Axboe return eq->ops->elevator_init_fn(q, eq); 137bc1c1169SJens Axboe } 1381da177e4SLinus Torvalds 139bc1c1169SJens Axboe static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, 140bc1c1169SJens Axboe void *data) 141bc1c1169SJens Axboe { 1421da177e4SLinus Torvalds q->elevator = eq; 143bc1c1169SJens Axboe eq->elevator_data = data; 1441da177e4SLinus Torvalds } 1451da177e4SLinus Torvalds 1461da177e4SLinus Torvalds static char chosen_elevator[16]; 1471da177e4SLinus Torvalds 1485f003976SNate Diller static int __init elevator_setup(char *str) 1491da177e4SLinus Torvalds { 150752a3b79SChuck Ebbert /* 151752a3b79SChuck Ebbert * Be backwards-compatible with previous kernels, so users 152752a3b79SChuck Ebbert * won't get the wrong elevator. 153752a3b79SChuck Ebbert */ 1545f003976SNate Diller if (!strcmp(str, "as")) 155752a3b79SChuck Ebbert strcpy(chosen_elevator, "anticipatory"); 156cff3ba22SZachary Amsden else 1571da177e4SLinus Torvalds strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 1589b41046cSOGAWA Hirofumi return 1; 1591da177e4SLinus Torvalds } 1601da177e4SLinus Torvalds 1611da177e4SLinus Torvalds __setup("elevator=", elevator_setup); 1621da177e4SLinus Torvalds 1633d1ab40fSAl Viro static struct kobj_type elv_ktype; 1643d1ab40fSAl Viro 1653d1ab40fSAl Viro static elevator_t *elevator_alloc(struct elevator_type *e) 1663d1ab40fSAl Viro { 1679817064bSJens Axboe elevator_t *eq; 1689817064bSJens Axboe int i; 1699817064bSJens Axboe 1709817064bSJens Axboe eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); 1719817064bSJens Axboe if (unlikely(!eq)) 1729817064bSJens Axboe goto err; 1739817064bSJens Axboe 1743d1ab40fSAl Viro memset(eq, 0, sizeof(*eq)); 1753d1ab40fSAl Viro eq->ops = &e->ops; 1763d1ab40fSAl Viro eq->elevator_type = e; 1773d1ab40fSAl Viro kobject_init(&eq->kobj); 1783d1ab40fSAl Viro snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); 1793d1ab40fSAl Viro eq->kobj.ktype = &elv_ktype; 1803d1ab40fSAl Viro mutex_init(&eq->sysfs_lock); 1819817064bSJens Axboe 1829817064bSJens Axboe eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL); 1839817064bSJens Axboe if (!eq->hash) 1849817064bSJens Axboe goto err; 1859817064bSJens Axboe 1869817064bSJens Axboe for (i = 0; i < ELV_HASH_ENTRIES; i++) 1879817064bSJens Axboe INIT_HLIST_HEAD(&eq->hash[i]); 1889817064bSJens Axboe 1893d1ab40fSAl Viro return eq; 1909817064bSJens Axboe err: 1919817064bSJens Axboe kfree(eq); 1929817064bSJens Axboe elevator_put(e); 1939817064bSJens Axboe return NULL; 1943d1ab40fSAl Viro } 1953d1ab40fSAl Viro 1963d1ab40fSAl Viro static void elevator_release(struct kobject *kobj) 1973d1ab40fSAl Viro { 1983d1ab40fSAl Viro elevator_t *e = container_of(kobj, elevator_t, kobj); 1999817064bSJens Axboe 2003d1ab40fSAl Viro elevator_put(e->elevator_type); 2019817064bSJens Axboe kfree(e->hash); 2023d1ab40fSAl Viro kfree(e); 2033d1ab40fSAl Viro } 2043d1ab40fSAl Viro 2051da177e4SLinus Torvalds int elevator_init(request_queue_t *q, char *name) 2061da177e4SLinus Torvalds { 2071da177e4SLinus Torvalds struct elevator_type *e = NULL; 2081da177e4SLinus Torvalds struct elevator_queue *eq; 2091da177e4SLinus Torvalds int ret = 0; 210bc1c1169SJens Axboe void *data; 2111da177e4SLinus Torvalds 212cb98fc8bSTejun Heo INIT_LIST_HEAD(&q->queue_head); 213cb98fc8bSTejun Heo q->last_merge = NULL; 214cb98fc8bSTejun Heo q->end_sector = 0; 215cb98fc8bSTejun Heo q->boundary_rq = NULL; 216cb98fc8bSTejun Heo 2175f003976SNate Diller if (name && !(e = elevator_get(name))) 2181da177e4SLinus Torvalds return -EINVAL; 2191da177e4SLinus Torvalds 220248d5ca5SNate Diller if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) 2215f003976SNate Diller printk("I/O scheduler %s not found\n", chosen_elevator); 222248d5ca5SNate Diller 223248d5ca5SNate Diller if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { 224248d5ca5SNate Diller printk("Default I/O scheduler not found, using no-op\n"); 225248d5ca5SNate Diller e = elevator_get("noop"); 2265f003976SNate Diller } 2275f003976SNate Diller 2283d1ab40fSAl Viro eq = elevator_alloc(e); 2293d1ab40fSAl Viro if (!eq) 2301da177e4SLinus Torvalds return -ENOMEM; 2311da177e4SLinus Torvalds 232bc1c1169SJens Axboe data = elevator_init_queue(q, eq); 233bc1c1169SJens Axboe if (!data) { 2343d1ab40fSAl Viro kobject_put(&eq->kobj); 235bc1c1169SJens Axboe return -ENOMEM; 236bc1c1169SJens Axboe } 2371da177e4SLinus Torvalds 238bc1c1169SJens Axboe elevator_attach(q, eq, data); 2391da177e4SLinus Torvalds return ret; 2401da177e4SLinus Torvalds } 2411da177e4SLinus Torvalds 242*2e662b65SJens Axboe EXPORT_SYMBOL(elevator_init); 243*2e662b65SJens Axboe 2441da177e4SLinus Torvalds void elevator_exit(elevator_t *e) 2451da177e4SLinus Torvalds { 2463d1ab40fSAl Viro mutex_lock(&e->sysfs_lock); 2471da177e4SLinus Torvalds if (e->ops->elevator_exit_fn) 2481da177e4SLinus Torvalds e->ops->elevator_exit_fn(e); 2493d1ab40fSAl Viro e->ops = NULL; 2503d1ab40fSAl Viro mutex_unlock(&e->sysfs_lock); 2511da177e4SLinus Torvalds 2523d1ab40fSAl Viro kobject_put(&e->kobj); 2531da177e4SLinus Torvalds } 2541da177e4SLinus Torvalds 255*2e662b65SJens Axboe EXPORT_SYMBOL(elevator_exit); 256*2e662b65SJens Axboe 2579817064bSJens Axboe static inline void __elv_rqhash_del(struct request *rq) 2589817064bSJens Axboe { 2599817064bSJens Axboe hlist_del_init(&rq->hash); 2609817064bSJens Axboe } 2619817064bSJens Axboe 2629817064bSJens Axboe static void elv_rqhash_del(request_queue_t *q, struct request *rq) 2639817064bSJens Axboe { 2649817064bSJens Axboe if (ELV_ON_HASH(rq)) 2659817064bSJens Axboe __elv_rqhash_del(rq); 2669817064bSJens Axboe } 2679817064bSJens Axboe 2689817064bSJens Axboe static void elv_rqhash_add(request_queue_t *q, struct request *rq) 2699817064bSJens Axboe { 2709817064bSJens Axboe elevator_t *e = q->elevator; 2719817064bSJens Axboe 2729817064bSJens Axboe BUG_ON(ELV_ON_HASH(rq)); 2739817064bSJens Axboe hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); 2749817064bSJens Axboe } 2759817064bSJens Axboe 2769817064bSJens Axboe static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) 2779817064bSJens Axboe { 2789817064bSJens Axboe __elv_rqhash_del(rq); 2799817064bSJens Axboe elv_rqhash_add(q, rq); 2809817064bSJens Axboe } 2819817064bSJens Axboe 2829817064bSJens Axboe static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) 2839817064bSJens Axboe { 2849817064bSJens Axboe elevator_t *e = q->elevator; 2859817064bSJens Axboe struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; 2869817064bSJens Axboe struct hlist_node *entry, *next; 2879817064bSJens Axboe struct request *rq; 2889817064bSJens Axboe 2899817064bSJens Axboe hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { 2909817064bSJens Axboe BUG_ON(!ELV_ON_HASH(rq)); 2919817064bSJens Axboe 2929817064bSJens Axboe if (unlikely(!rq_mergeable(rq))) { 2939817064bSJens Axboe __elv_rqhash_del(rq); 2949817064bSJens Axboe continue; 2959817064bSJens Axboe } 2969817064bSJens Axboe 2979817064bSJens Axboe if (rq_hash_key(rq) == offset) 2989817064bSJens Axboe return rq; 2999817064bSJens Axboe } 3009817064bSJens Axboe 3019817064bSJens Axboe return NULL; 3029817064bSJens Axboe } 3039817064bSJens Axboe 3048922e16cSTejun Heo /* 305*2e662b65SJens Axboe * RB-tree support functions for inserting/lookup/removal of requests 306*2e662b65SJens Axboe * in a sorted RB tree. 307*2e662b65SJens Axboe */ 308*2e662b65SJens Axboe struct request *elv_rb_add(struct rb_root *root, struct request *rq) 309*2e662b65SJens Axboe { 310*2e662b65SJens Axboe struct rb_node **p = &root->rb_node; 311*2e662b65SJens Axboe struct rb_node *parent = NULL; 312*2e662b65SJens Axboe struct request *__rq; 313*2e662b65SJens Axboe 314*2e662b65SJens Axboe while (*p) { 315*2e662b65SJens Axboe parent = *p; 316*2e662b65SJens Axboe __rq = rb_entry(parent, struct request, rb_node); 317*2e662b65SJens Axboe 318*2e662b65SJens Axboe if (rq->sector < __rq->sector) 319*2e662b65SJens Axboe p = &(*p)->rb_left; 320*2e662b65SJens Axboe else if (rq->sector > __rq->sector) 321*2e662b65SJens Axboe p = &(*p)->rb_right; 322*2e662b65SJens Axboe else 323*2e662b65SJens Axboe return __rq; 324*2e662b65SJens Axboe } 325*2e662b65SJens Axboe 326*2e662b65SJens Axboe rb_link_node(&rq->rb_node, parent, p); 327*2e662b65SJens Axboe rb_insert_color(&rq->rb_node, root); 328*2e662b65SJens Axboe return NULL; 329*2e662b65SJens Axboe } 330*2e662b65SJens Axboe 331*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_add); 332*2e662b65SJens Axboe 333*2e662b65SJens Axboe void elv_rb_del(struct rb_root *root, struct request *rq) 334*2e662b65SJens Axboe { 335*2e662b65SJens Axboe BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 336*2e662b65SJens Axboe rb_erase(&rq->rb_node, root); 337*2e662b65SJens Axboe RB_CLEAR_NODE(&rq->rb_node); 338*2e662b65SJens Axboe } 339*2e662b65SJens Axboe 340*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_del); 341*2e662b65SJens Axboe 342*2e662b65SJens Axboe struct request *elv_rb_find(struct rb_root *root, sector_t sector) 343*2e662b65SJens Axboe { 344*2e662b65SJens Axboe struct rb_node *n = root->rb_node; 345*2e662b65SJens Axboe struct request *rq; 346*2e662b65SJens Axboe 347*2e662b65SJens Axboe while (n) { 348*2e662b65SJens Axboe rq = rb_entry(n, struct request, rb_node); 349*2e662b65SJens Axboe 350*2e662b65SJens Axboe if (sector < rq->sector) 351*2e662b65SJens Axboe n = n->rb_left; 352*2e662b65SJens Axboe else if (sector > rq->sector) 353*2e662b65SJens Axboe n = n->rb_right; 354*2e662b65SJens Axboe else 355*2e662b65SJens Axboe return rq; 356*2e662b65SJens Axboe } 357*2e662b65SJens Axboe 358*2e662b65SJens Axboe return NULL; 359*2e662b65SJens Axboe } 360*2e662b65SJens Axboe 361*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_find); 362*2e662b65SJens Axboe 363*2e662b65SJens Axboe /* 3648922e16cSTejun Heo * Insert rq into dispatch queue of q. Queue lock must be held on 365*2e662b65SJens Axboe * entry. rq is sort insted into the dispatch queue. To be used by 366*2e662b65SJens Axboe * specific elevators. 3678922e16cSTejun Heo */ 3681b47f531SJens Axboe void elv_dispatch_sort(request_queue_t *q, struct request *rq) 3698922e16cSTejun Heo { 3708922e16cSTejun Heo sector_t boundary; 3718922e16cSTejun Heo struct list_head *entry; 3728922e16cSTejun Heo 37306b86245STejun Heo if (q->last_merge == rq) 37406b86245STejun Heo q->last_merge = NULL; 3759817064bSJens Axboe 3769817064bSJens Axboe elv_rqhash_del(q, rq); 3779817064bSJens Axboe 37815853af9STejun Heo q->nr_sorted--; 37906b86245STejun Heo 3801b47f531SJens Axboe boundary = q->end_sector; 3818922e16cSTejun Heo 3828922e16cSTejun Heo list_for_each_prev(entry, &q->queue_head) { 3838922e16cSTejun Heo struct request *pos = list_entry_rq(entry); 3848922e16cSTejun Heo 3854aff5e23SJens Axboe if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 3868922e16cSTejun Heo break; 3878922e16cSTejun Heo if (rq->sector >= boundary) { 3888922e16cSTejun Heo if (pos->sector < boundary) 3898922e16cSTejun Heo continue; 3908922e16cSTejun Heo } else { 3918922e16cSTejun Heo if (pos->sector >= boundary) 3928922e16cSTejun Heo break; 3938922e16cSTejun Heo } 3948922e16cSTejun Heo if (rq->sector >= pos->sector) 3958922e16cSTejun Heo break; 3968922e16cSTejun Heo } 3978922e16cSTejun Heo 3988922e16cSTejun Heo list_add(&rq->queuelist, entry); 3998922e16cSTejun Heo } 4008922e16cSTejun Heo 401*2e662b65SJens Axboe EXPORT_SYMBOL(elv_dispatch_sort); 402*2e662b65SJens Axboe 4039817064bSJens Axboe /* 404*2e662b65SJens Axboe * Insert rq into dispatch queue of q. Queue lock must be held on 405*2e662b65SJens Axboe * entry. rq is added to the back of the dispatch queue. To be used by 406*2e662b65SJens Axboe * specific elevators. 4079817064bSJens Axboe */ 4089817064bSJens Axboe void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 4099817064bSJens Axboe { 4109817064bSJens Axboe if (q->last_merge == rq) 4119817064bSJens Axboe q->last_merge = NULL; 4129817064bSJens Axboe 4139817064bSJens Axboe elv_rqhash_del(q, rq); 4149817064bSJens Axboe 4159817064bSJens Axboe q->nr_sorted--; 4169817064bSJens Axboe 4179817064bSJens Axboe q->end_sector = rq_end_sector(rq); 4189817064bSJens Axboe q->boundary_rq = rq; 4199817064bSJens Axboe list_add_tail(&rq->queuelist, &q->queue_head); 4209817064bSJens Axboe } 4219817064bSJens Axboe 422*2e662b65SJens Axboe EXPORT_SYMBOL(elv_dispatch_add_tail); 423*2e662b65SJens Axboe 4241da177e4SLinus Torvalds int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) 4251da177e4SLinus Torvalds { 4261da177e4SLinus Torvalds elevator_t *e = q->elevator; 4279817064bSJens Axboe struct request *__rq; 42806b86245STejun Heo int ret; 42906b86245STejun Heo 4309817064bSJens Axboe /* 4319817064bSJens Axboe * First try one-hit cache. 4329817064bSJens Axboe */ 43306b86245STejun Heo if (q->last_merge) { 43406b86245STejun Heo ret = elv_try_merge(q->last_merge, bio); 43506b86245STejun Heo if (ret != ELEVATOR_NO_MERGE) { 43606b86245STejun Heo *req = q->last_merge; 43706b86245STejun Heo return ret; 43806b86245STejun Heo } 43906b86245STejun Heo } 4401da177e4SLinus Torvalds 4419817064bSJens Axboe /* 4429817064bSJens Axboe * See if our hash lookup can find a potential backmerge. 4439817064bSJens Axboe */ 4449817064bSJens Axboe __rq = elv_rqhash_find(q, bio->bi_sector); 4459817064bSJens Axboe if (__rq && elv_rq_merge_ok(__rq, bio)) { 4469817064bSJens Axboe *req = __rq; 4479817064bSJens Axboe return ELEVATOR_BACK_MERGE; 4489817064bSJens Axboe } 4499817064bSJens Axboe 4501da177e4SLinus Torvalds if (e->ops->elevator_merge_fn) 4511da177e4SLinus Torvalds return e->ops->elevator_merge_fn(q, req, bio); 4521da177e4SLinus Torvalds 4531da177e4SLinus Torvalds return ELEVATOR_NO_MERGE; 4541da177e4SLinus Torvalds } 4551da177e4SLinus Torvalds 456*2e662b65SJens Axboe void elv_merged_request(request_queue_t *q, struct request *rq, int type) 4571da177e4SLinus Torvalds { 4581da177e4SLinus Torvalds elevator_t *e = q->elevator; 4591da177e4SLinus Torvalds 4601da177e4SLinus Torvalds if (e->ops->elevator_merged_fn) 461*2e662b65SJens Axboe e->ops->elevator_merged_fn(q, rq, type); 46206b86245STejun Heo 463*2e662b65SJens Axboe if (type == ELEVATOR_BACK_MERGE) 4649817064bSJens Axboe elv_rqhash_reposition(q, rq); 4659817064bSJens Axboe 46606b86245STejun Heo q->last_merge = rq; 4671da177e4SLinus Torvalds } 4681da177e4SLinus Torvalds 4691da177e4SLinus Torvalds void elv_merge_requests(request_queue_t *q, struct request *rq, 4701da177e4SLinus Torvalds struct request *next) 4711da177e4SLinus Torvalds { 4721da177e4SLinus Torvalds elevator_t *e = q->elevator; 4731da177e4SLinus Torvalds 4741da177e4SLinus Torvalds if (e->ops->elevator_merge_req_fn) 4751da177e4SLinus Torvalds e->ops->elevator_merge_req_fn(q, rq, next); 47606b86245STejun Heo 4779817064bSJens Axboe elv_rqhash_reposition(q, rq); 4789817064bSJens Axboe elv_rqhash_del(q, next); 4799817064bSJens Axboe 4809817064bSJens Axboe q->nr_sorted--; 48106b86245STejun Heo q->last_merge = rq; 4821da177e4SLinus Torvalds } 4831da177e4SLinus Torvalds 4848922e16cSTejun Heo void elv_requeue_request(request_queue_t *q, struct request *rq) 4851da177e4SLinus Torvalds { 4861da177e4SLinus Torvalds elevator_t *e = q->elevator; 4871da177e4SLinus Torvalds 4881da177e4SLinus Torvalds /* 4891da177e4SLinus Torvalds * it already went through dequeue, we need to decrement the 4901da177e4SLinus Torvalds * in_flight count again 4911da177e4SLinus Torvalds */ 4928922e16cSTejun Heo if (blk_account_rq(rq)) { 4931da177e4SLinus Torvalds q->in_flight--; 4948922e16cSTejun Heo if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) 4951da177e4SLinus Torvalds e->ops->elevator_deactivate_req_fn(q, rq); 4961da177e4SLinus Torvalds } 4971da177e4SLinus Torvalds 4984aff5e23SJens Axboe rq->cmd_flags &= ~REQ_STARTED; 4991da177e4SLinus Torvalds 50030e9656cSTejun Heo elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 5011da177e4SLinus Torvalds } 5021da177e4SLinus Torvalds 50315853af9STejun Heo static void elv_drain_elevator(request_queue_t *q) 50415853af9STejun Heo { 50515853af9STejun Heo static int printed; 50615853af9STejun Heo while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 50715853af9STejun Heo ; 50815853af9STejun Heo if (q->nr_sorted == 0) 50915853af9STejun Heo return; 51015853af9STejun Heo if (printed++ < 10) { 51115853af9STejun Heo printk(KERN_ERR "%s: forced dispatching is broken " 51215853af9STejun Heo "(nr_sorted=%u), please report this\n", 51315853af9STejun Heo q->elevator->elevator_type->elevator_name, q->nr_sorted); 51415853af9STejun Heo } 51515853af9STejun Heo } 51615853af9STejun Heo 51730e9656cSTejun Heo void elv_insert(request_queue_t *q, struct request *rq, int where) 5181da177e4SLinus Torvalds { 519797e7dbbSTejun Heo struct list_head *pos; 520797e7dbbSTejun Heo unsigned ordseq; 521dac07ec1SJens Axboe int unplug_it = 1; 522797e7dbbSTejun Heo 5232056a782SJens Axboe blk_add_trace_rq(q, rq, BLK_TA_INSERT); 5242056a782SJens Axboe 5251da177e4SLinus Torvalds rq->q = q; 5261da177e4SLinus Torvalds 5278922e16cSTejun Heo switch (where) { 5288922e16cSTejun Heo case ELEVATOR_INSERT_FRONT: 5294aff5e23SJens Axboe rq->cmd_flags |= REQ_SOFTBARRIER; 5308922e16cSTejun Heo 5318922e16cSTejun Heo list_add(&rq->queuelist, &q->queue_head); 5328922e16cSTejun Heo break; 5338922e16cSTejun Heo 5348922e16cSTejun Heo case ELEVATOR_INSERT_BACK: 5354aff5e23SJens Axboe rq->cmd_flags |= REQ_SOFTBARRIER; 53615853af9STejun Heo elv_drain_elevator(q); 5378922e16cSTejun Heo list_add_tail(&rq->queuelist, &q->queue_head); 5388922e16cSTejun Heo /* 5398922e16cSTejun Heo * We kick the queue here for the following reasons. 5408922e16cSTejun Heo * - The elevator might have returned NULL previously 5418922e16cSTejun Heo * to delay requests and returned them now. As the 5428922e16cSTejun Heo * queue wasn't empty before this request, ll_rw_blk 5438922e16cSTejun Heo * won't run the queue on return, resulting in hang. 5448922e16cSTejun Heo * - Usually, back inserted requests won't be merged 5458922e16cSTejun Heo * with anything. There's no point in delaying queue 5468922e16cSTejun Heo * processing. 5478922e16cSTejun Heo */ 5488922e16cSTejun Heo blk_remove_plug(q); 5498922e16cSTejun Heo q->request_fn(q); 5508922e16cSTejun Heo break; 5518922e16cSTejun Heo 5528922e16cSTejun Heo case ELEVATOR_INSERT_SORT: 5538922e16cSTejun Heo BUG_ON(!blk_fs_request(rq)); 5544aff5e23SJens Axboe rq->cmd_flags |= REQ_SORTED; 55515853af9STejun Heo q->nr_sorted++; 5569817064bSJens Axboe if (rq_mergeable(rq)) { 5579817064bSJens Axboe elv_rqhash_add(q, rq); 5589817064bSJens Axboe if (!q->last_merge) 55906b86245STejun Heo q->last_merge = rq; 5609817064bSJens Axboe } 5619817064bSJens Axboe 562ca23509fSTejun Heo /* 563ca23509fSTejun Heo * Some ioscheds (cfq) run q->request_fn directly, so 564ca23509fSTejun Heo * rq cannot be accessed after calling 565ca23509fSTejun Heo * elevator_add_req_fn. 566ca23509fSTejun Heo */ 567ca23509fSTejun Heo q->elevator->ops->elevator_add_req_fn(q, rq); 5688922e16cSTejun Heo break; 5698922e16cSTejun Heo 570797e7dbbSTejun Heo case ELEVATOR_INSERT_REQUEUE: 571797e7dbbSTejun Heo /* 572797e7dbbSTejun Heo * If ordered flush isn't in progress, we do front 573797e7dbbSTejun Heo * insertion; otherwise, requests should be requeued 574797e7dbbSTejun Heo * in ordseq order. 575797e7dbbSTejun Heo */ 5764aff5e23SJens Axboe rq->cmd_flags |= REQ_SOFTBARRIER; 577797e7dbbSTejun Heo 578797e7dbbSTejun Heo if (q->ordseq == 0) { 579797e7dbbSTejun Heo list_add(&rq->queuelist, &q->queue_head); 580797e7dbbSTejun Heo break; 581797e7dbbSTejun Heo } 582797e7dbbSTejun Heo 583797e7dbbSTejun Heo ordseq = blk_ordered_req_seq(rq); 584797e7dbbSTejun Heo 585797e7dbbSTejun Heo list_for_each(pos, &q->queue_head) { 586797e7dbbSTejun Heo struct request *pos_rq = list_entry_rq(pos); 587797e7dbbSTejun Heo if (ordseq <= blk_ordered_req_seq(pos_rq)) 588797e7dbbSTejun Heo break; 589797e7dbbSTejun Heo } 590797e7dbbSTejun Heo 591797e7dbbSTejun Heo list_add_tail(&rq->queuelist, pos); 592dac07ec1SJens Axboe /* 593dac07ec1SJens Axboe * most requeues happen because of a busy condition, don't 594dac07ec1SJens Axboe * force unplug of the queue for that case. 595dac07ec1SJens Axboe */ 596dac07ec1SJens Axboe unplug_it = 0; 597797e7dbbSTejun Heo break; 598797e7dbbSTejun Heo 5998922e16cSTejun Heo default: 6008922e16cSTejun Heo printk(KERN_ERR "%s: bad insertion point %d\n", 6018922e16cSTejun Heo __FUNCTION__, where); 6028922e16cSTejun Heo BUG(); 6038922e16cSTejun Heo } 6041da177e4SLinus Torvalds 605dac07ec1SJens Axboe if (unplug_it && blk_queue_plugged(q)) { 6061da177e4SLinus Torvalds int nrq = q->rq.count[READ] + q->rq.count[WRITE] 6071da177e4SLinus Torvalds - q->in_flight; 6081da177e4SLinus Torvalds 609c374f127STejun Heo if (nrq >= q->unplug_thresh) 6101da177e4SLinus Torvalds __generic_unplug_device(q); 6111da177e4SLinus Torvalds } 6121da177e4SLinus Torvalds } 6131da177e4SLinus Torvalds 61430e9656cSTejun Heo void __elv_add_request(request_queue_t *q, struct request *rq, int where, 61530e9656cSTejun Heo int plug) 61630e9656cSTejun Heo { 61730e9656cSTejun Heo if (q->ordcolor) 6184aff5e23SJens Axboe rq->cmd_flags |= REQ_ORDERED_COLOR; 61930e9656cSTejun Heo 6204aff5e23SJens Axboe if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 62130e9656cSTejun Heo /* 62230e9656cSTejun Heo * toggle ordered color 62330e9656cSTejun Heo */ 62430e9656cSTejun Heo if (blk_barrier_rq(rq)) 62530e9656cSTejun Heo q->ordcolor ^= 1; 62630e9656cSTejun Heo 62730e9656cSTejun Heo /* 62830e9656cSTejun Heo * barriers implicitly indicate back insertion 62930e9656cSTejun Heo */ 63030e9656cSTejun Heo if (where == ELEVATOR_INSERT_SORT) 63130e9656cSTejun Heo where = ELEVATOR_INSERT_BACK; 63230e9656cSTejun Heo 63330e9656cSTejun Heo /* 63430e9656cSTejun Heo * this request is scheduling boundary, update 63530e9656cSTejun Heo * end_sector 63630e9656cSTejun Heo */ 63730e9656cSTejun Heo if (blk_fs_request(rq)) { 63830e9656cSTejun Heo q->end_sector = rq_end_sector(rq); 63930e9656cSTejun Heo q->boundary_rq = rq; 64030e9656cSTejun Heo } 6414aff5e23SJens Axboe } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 64230e9656cSTejun Heo where = ELEVATOR_INSERT_BACK; 64330e9656cSTejun Heo 64430e9656cSTejun Heo if (plug) 64530e9656cSTejun Heo blk_plug_device(q); 64630e9656cSTejun Heo 64730e9656cSTejun Heo elv_insert(q, rq, where); 64830e9656cSTejun Heo } 64930e9656cSTejun Heo 650*2e662b65SJens Axboe EXPORT_SYMBOL(__elv_add_request); 651*2e662b65SJens Axboe 6521da177e4SLinus Torvalds void elv_add_request(request_queue_t *q, struct request *rq, int where, 6531da177e4SLinus Torvalds int plug) 6541da177e4SLinus Torvalds { 6551da177e4SLinus Torvalds unsigned long flags; 6561da177e4SLinus Torvalds 6571da177e4SLinus Torvalds spin_lock_irqsave(q->queue_lock, flags); 6581da177e4SLinus Torvalds __elv_add_request(q, rq, where, plug); 6591da177e4SLinus Torvalds spin_unlock_irqrestore(q->queue_lock, flags); 6601da177e4SLinus Torvalds } 6611da177e4SLinus Torvalds 662*2e662b65SJens Axboe EXPORT_SYMBOL(elv_add_request); 663*2e662b65SJens Axboe 6641da177e4SLinus Torvalds static inline struct request *__elv_next_request(request_queue_t *q) 6651da177e4SLinus Torvalds { 6668922e16cSTejun Heo struct request *rq; 6678922e16cSTejun Heo 668797e7dbbSTejun Heo while (1) { 669797e7dbbSTejun Heo while (!list_empty(&q->queue_head)) { 6708922e16cSTejun Heo rq = list_entry_rq(q->queue_head.next); 671797e7dbbSTejun Heo if (blk_do_ordered(q, &rq)) 672797e7dbbSTejun Heo return rq; 6731da177e4SLinus Torvalds } 6741da177e4SLinus Torvalds 675797e7dbbSTejun Heo if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 676797e7dbbSTejun Heo return NULL; 677797e7dbbSTejun Heo } 6781da177e4SLinus Torvalds } 6791da177e4SLinus Torvalds 6801da177e4SLinus Torvalds struct request *elv_next_request(request_queue_t *q) 6811da177e4SLinus Torvalds { 6821da177e4SLinus Torvalds struct request *rq; 6831da177e4SLinus Torvalds int ret; 6841da177e4SLinus Torvalds 6851da177e4SLinus Torvalds while ((rq = __elv_next_request(q)) != NULL) { 6864aff5e23SJens Axboe if (!(rq->cmd_flags & REQ_STARTED)) { 6878922e16cSTejun Heo elevator_t *e = q->elevator; 6888922e16cSTejun Heo 6891da177e4SLinus Torvalds /* 6908922e16cSTejun Heo * This is the first time the device driver 6918922e16cSTejun Heo * sees this request (possibly after 6928922e16cSTejun Heo * requeueing). Notify IO scheduler. 6938922e16cSTejun Heo */ 6948922e16cSTejun Heo if (blk_sorted_rq(rq) && 6958922e16cSTejun Heo e->ops->elevator_activate_req_fn) 6968922e16cSTejun Heo e->ops->elevator_activate_req_fn(q, rq); 6978922e16cSTejun Heo 6988922e16cSTejun Heo /* 6998922e16cSTejun Heo * just mark as started even if we don't start 7008922e16cSTejun Heo * it, a request that has been delayed should 7018922e16cSTejun Heo * not be passed by new incoming requests 7021da177e4SLinus Torvalds */ 7034aff5e23SJens Axboe rq->cmd_flags |= REQ_STARTED; 7042056a782SJens Axboe blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 7058922e16cSTejun Heo } 7061da177e4SLinus Torvalds 7078922e16cSTejun Heo if (!q->boundary_rq || q->boundary_rq == rq) { 7081b47f531SJens Axboe q->end_sector = rq_end_sector(rq); 7098922e16cSTejun Heo q->boundary_rq = NULL; 7108922e16cSTejun Heo } 7111da177e4SLinus Torvalds 7124aff5e23SJens Axboe if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) 7131da177e4SLinus Torvalds break; 7141da177e4SLinus Torvalds 7151da177e4SLinus Torvalds ret = q->prep_rq_fn(q, rq); 7161da177e4SLinus Torvalds if (ret == BLKPREP_OK) { 7171da177e4SLinus Torvalds break; 7181da177e4SLinus Torvalds } else if (ret == BLKPREP_DEFER) { 7192e759cd4STejun Heo /* 7202e759cd4STejun Heo * the request may have been (partially) prepped. 7212e759cd4STejun Heo * we need to keep this request in the front to 7228922e16cSTejun Heo * avoid resource deadlock. REQ_STARTED will 7238922e16cSTejun Heo * prevent other fs requests from passing this one. 7242e759cd4STejun Heo */ 7251da177e4SLinus Torvalds rq = NULL; 7261da177e4SLinus Torvalds break; 7271da177e4SLinus Torvalds } else if (ret == BLKPREP_KILL) { 7281da177e4SLinus Torvalds int nr_bytes = rq->hard_nr_sectors << 9; 7291da177e4SLinus Torvalds 7301da177e4SLinus Torvalds if (!nr_bytes) 7311da177e4SLinus Torvalds nr_bytes = rq->data_len; 7321da177e4SLinus Torvalds 7331da177e4SLinus Torvalds blkdev_dequeue_request(rq); 7344aff5e23SJens Axboe rq->cmd_flags |= REQ_QUIET; 7351da177e4SLinus Torvalds end_that_request_chunk(rq, 0, nr_bytes); 7368ffdc655STejun Heo end_that_request_last(rq, 0); 7371da177e4SLinus Torvalds } else { 7381da177e4SLinus Torvalds printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 7391da177e4SLinus Torvalds ret); 7401da177e4SLinus Torvalds break; 7411da177e4SLinus Torvalds } 7421da177e4SLinus Torvalds } 7431da177e4SLinus Torvalds 7441da177e4SLinus Torvalds return rq; 7451da177e4SLinus Torvalds } 7461da177e4SLinus Torvalds 747*2e662b65SJens Axboe EXPORT_SYMBOL(elv_next_request); 748*2e662b65SJens Axboe 7498922e16cSTejun Heo void elv_dequeue_request(request_queue_t *q, struct request *rq) 7501da177e4SLinus Torvalds { 7518922e16cSTejun Heo BUG_ON(list_empty(&rq->queuelist)); 7529817064bSJens Axboe BUG_ON(ELV_ON_HASH(rq)); 7538922e16cSTejun Heo 7548922e16cSTejun Heo list_del_init(&rq->queuelist); 7551da177e4SLinus Torvalds 7561da177e4SLinus Torvalds /* 7571da177e4SLinus Torvalds * the time frame between a request being removed from the lists 7581da177e4SLinus Torvalds * and to it is freed is accounted as io that is in progress at 7598922e16cSTejun Heo * the driver side. 7601da177e4SLinus Torvalds */ 7611da177e4SLinus Torvalds if (blk_account_rq(rq)) 7621da177e4SLinus Torvalds q->in_flight++; 7631da177e4SLinus Torvalds } 7641da177e4SLinus Torvalds 765*2e662b65SJens Axboe EXPORT_SYMBOL(elv_dequeue_request); 766*2e662b65SJens Axboe 7671da177e4SLinus Torvalds int elv_queue_empty(request_queue_t *q) 7681da177e4SLinus Torvalds { 7691da177e4SLinus Torvalds elevator_t *e = q->elevator; 7701da177e4SLinus Torvalds 7718922e16cSTejun Heo if (!list_empty(&q->queue_head)) 7728922e16cSTejun Heo return 0; 7738922e16cSTejun Heo 7741da177e4SLinus Torvalds if (e->ops->elevator_queue_empty_fn) 7751da177e4SLinus Torvalds return e->ops->elevator_queue_empty_fn(q); 7761da177e4SLinus Torvalds 7778922e16cSTejun Heo return 1; 7781da177e4SLinus Torvalds } 7791da177e4SLinus Torvalds 780*2e662b65SJens Axboe EXPORT_SYMBOL(elv_queue_empty); 781*2e662b65SJens Axboe 7821da177e4SLinus Torvalds struct request *elv_latter_request(request_queue_t *q, struct request *rq) 7831da177e4SLinus Torvalds { 7841da177e4SLinus Torvalds elevator_t *e = q->elevator; 7851da177e4SLinus Torvalds 7861da177e4SLinus Torvalds if (e->ops->elevator_latter_req_fn) 7871da177e4SLinus Torvalds return e->ops->elevator_latter_req_fn(q, rq); 7881da177e4SLinus Torvalds return NULL; 7891da177e4SLinus Torvalds } 7901da177e4SLinus Torvalds 7911da177e4SLinus Torvalds struct request *elv_former_request(request_queue_t *q, struct request *rq) 7921da177e4SLinus Torvalds { 7931da177e4SLinus Torvalds elevator_t *e = q->elevator; 7941da177e4SLinus Torvalds 7951da177e4SLinus Torvalds if (e->ops->elevator_former_req_fn) 7961da177e4SLinus Torvalds return e->ops->elevator_former_req_fn(q, rq); 7971da177e4SLinus Torvalds return NULL; 7981da177e4SLinus Torvalds } 7991da177e4SLinus Torvalds 80022e2c507SJens Axboe int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, 8018267e268SAl Viro gfp_t gfp_mask) 8021da177e4SLinus Torvalds { 8031da177e4SLinus Torvalds elevator_t *e = q->elevator; 8041da177e4SLinus Torvalds 8051da177e4SLinus Torvalds if (e->ops->elevator_set_req_fn) 80622e2c507SJens Axboe return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); 8071da177e4SLinus Torvalds 8081da177e4SLinus Torvalds rq->elevator_private = NULL; 8091da177e4SLinus Torvalds return 0; 8101da177e4SLinus Torvalds } 8111da177e4SLinus Torvalds 8121da177e4SLinus Torvalds void elv_put_request(request_queue_t *q, struct request *rq) 8131da177e4SLinus Torvalds { 8141da177e4SLinus Torvalds elevator_t *e = q->elevator; 8151da177e4SLinus Torvalds 8161da177e4SLinus Torvalds if (e->ops->elevator_put_req_fn) 8171da177e4SLinus Torvalds e->ops->elevator_put_req_fn(q, rq); 8181da177e4SLinus Torvalds } 8191da177e4SLinus Torvalds 82022e2c507SJens Axboe int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) 8211da177e4SLinus Torvalds { 8221da177e4SLinus Torvalds elevator_t *e = q->elevator; 8231da177e4SLinus Torvalds 8241da177e4SLinus Torvalds if (e->ops->elevator_may_queue_fn) 82522e2c507SJens Axboe return e->ops->elevator_may_queue_fn(q, rw, bio); 8261da177e4SLinus Torvalds 8271da177e4SLinus Torvalds return ELV_MQUEUE_MAY; 8281da177e4SLinus Torvalds } 8291da177e4SLinus Torvalds 8301da177e4SLinus Torvalds void elv_completed_request(request_queue_t *q, struct request *rq) 8311da177e4SLinus Torvalds { 8321da177e4SLinus Torvalds elevator_t *e = q->elevator; 8331da177e4SLinus Torvalds 8341da177e4SLinus Torvalds /* 8351da177e4SLinus Torvalds * request is released from the driver, io must be done 8361da177e4SLinus Torvalds */ 8378922e16cSTejun Heo if (blk_account_rq(rq)) { 8381da177e4SLinus Torvalds q->in_flight--; 8391bc691d3STejun Heo if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 8401bc691d3STejun Heo e->ops->elevator_completed_req_fn(q, rq); 8411bc691d3STejun Heo } 842797e7dbbSTejun Heo 843797e7dbbSTejun Heo /* 844797e7dbbSTejun Heo * Check if the queue is waiting for fs requests to be 845797e7dbbSTejun Heo * drained for flush sequence. 846797e7dbbSTejun Heo */ 8471bc691d3STejun Heo if (unlikely(q->ordseq)) { 8481bc691d3STejun Heo struct request *first_rq = list_entry_rq(q->queue_head.next); 8491bc691d3STejun Heo if (q->in_flight == 0 && 850797e7dbbSTejun Heo blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 851797e7dbbSTejun Heo blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 852797e7dbbSTejun Heo blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 853797e7dbbSTejun Heo q->request_fn(q); 854797e7dbbSTejun Heo } 8551da177e4SLinus Torvalds } 8568922e16cSTejun Heo } 8571da177e4SLinus Torvalds 8583d1ab40fSAl Viro #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 8593d1ab40fSAl Viro 8603d1ab40fSAl Viro static ssize_t 8613d1ab40fSAl Viro elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 8623d1ab40fSAl Viro { 8633d1ab40fSAl Viro elevator_t *e = container_of(kobj, elevator_t, kobj); 8643d1ab40fSAl Viro struct elv_fs_entry *entry = to_elv(attr); 8653d1ab40fSAl Viro ssize_t error; 8663d1ab40fSAl Viro 8673d1ab40fSAl Viro if (!entry->show) 8683d1ab40fSAl Viro return -EIO; 8693d1ab40fSAl Viro 8703d1ab40fSAl Viro mutex_lock(&e->sysfs_lock); 8713d1ab40fSAl Viro error = e->ops ? entry->show(e, page) : -ENOENT; 8723d1ab40fSAl Viro mutex_unlock(&e->sysfs_lock); 8733d1ab40fSAl Viro return error; 8743d1ab40fSAl Viro } 8753d1ab40fSAl Viro 8763d1ab40fSAl Viro static ssize_t 8773d1ab40fSAl Viro elv_attr_store(struct kobject *kobj, struct attribute *attr, 8783d1ab40fSAl Viro const char *page, size_t length) 8793d1ab40fSAl Viro { 8803d1ab40fSAl Viro elevator_t *e = container_of(kobj, elevator_t, kobj); 8813d1ab40fSAl Viro struct elv_fs_entry *entry = to_elv(attr); 8823d1ab40fSAl Viro ssize_t error; 8833d1ab40fSAl Viro 8843d1ab40fSAl Viro if (!entry->store) 8853d1ab40fSAl Viro return -EIO; 8863d1ab40fSAl Viro 8873d1ab40fSAl Viro mutex_lock(&e->sysfs_lock); 8883d1ab40fSAl Viro error = e->ops ? entry->store(e, page, length) : -ENOENT; 8893d1ab40fSAl Viro mutex_unlock(&e->sysfs_lock); 8903d1ab40fSAl Viro return error; 8913d1ab40fSAl Viro } 8923d1ab40fSAl Viro 8933d1ab40fSAl Viro static struct sysfs_ops elv_sysfs_ops = { 8943d1ab40fSAl Viro .show = elv_attr_show, 8953d1ab40fSAl Viro .store = elv_attr_store, 8963d1ab40fSAl Viro }; 8973d1ab40fSAl Viro 8983d1ab40fSAl Viro static struct kobj_type elv_ktype = { 8993d1ab40fSAl Viro .sysfs_ops = &elv_sysfs_ops, 9003d1ab40fSAl Viro .release = elevator_release, 9013d1ab40fSAl Viro }; 9023d1ab40fSAl Viro 9031da177e4SLinus Torvalds int elv_register_queue(struct request_queue *q) 9041da177e4SLinus Torvalds { 9051da177e4SLinus Torvalds elevator_t *e = q->elevator; 9063d1ab40fSAl Viro int error; 9071da177e4SLinus Torvalds 9083d1ab40fSAl Viro e->kobj.parent = &q->kobj; 9091da177e4SLinus Torvalds 9103d1ab40fSAl Viro error = kobject_add(&e->kobj); 9113d1ab40fSAl Viro if (!error) { 912e572ec7eSAl Viro struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; 9133d1ab40fSAl Viro if (attr) { 914e572ec7eSAl Viro while (attr->attr.name) { 915e572ec7eSAl Viro if (sysfs_create_file(&e->kobj, &attr->attr)) 9163d1ab40fSAl Viro break; 917e572ec7eSAl Viro attr++; 9183d1ab40fSAl Viro } 9193d1ab40fSAl Viro } 9203d1ab40fSAl Viro kobject_uevent(&e->kobj, KOBJ_ADD); 9213d1ab40fSAl Viro } 9223d1ab40fSAl Viro return error; 9231da177e4SLinus Torvalds } 9241da177e4SLinus Torvalds 925bc1c1169SJens Axboe static void __elv_unregister_queue(elevator_t *e) 9261da177e4SLinus Torvalds { 9273d1ab40fSAl Viro kobject_uevent(&e->kobj, KOBJ_REMOVE); 9283d1ab40fSAl Viro kobject_del(&e->kobj); 9291da177e4SLinus Torvalds } 930bc1c1169SJens Axboe 931bc1c1169SJens Axboe void elv_unregister_queue(struct request_queue *q) 932bc1c1169SJens Axboe { 933bc1c1169SJens Axboe if (q) 934bc1c1169SJens Axboe __elv_unregister_queue(q->elevator); 9351da177e4SLinus Torvalds } 9361da177e4SLinus Torvalds 9371da177e4SLinus Torvalds int elv_register(struct elevator_type *e) 9381da177e4SLinus Torvalds { 9392824bc93STejun Heo spin_lock_irq(&elv_list_lock); 940ce524497SEric Sesterhenn BUG_ON(elevator_find(e->elevator_name)); 9411da177e4SLinus Torvalds list_add_tail(&e->list, &elv_list); 9421da177e4SLinus Torvalds spin_unlock_irq(&elv_list_lock); 9431da177e4SLinus Torvalds 9441da177e4SLinus Torvalds printk(KERN_INFO "io scheduler %s registered", e->elevator_name); 9455f003976SNate Diller if (!strcmp(e->elevator_name, chosen_elevator) || 9465f003976SNate Diller (!*chosen_elevator && 9475f003976SNate Diller !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 9481da177e4SLinus Torvalds printk(" (default)"); 9491da177e4SLinus Torvalds printk("\n"); 9501da177e4SLinus Torvalds return 0; 9511da177e4SLinus Torvalds } 9521da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(elv_register); 9531da177e4SLinus Torvalds 9541da177e4SLinus Torvalds void elv_unregister(struct elevator_type *e) 9551da177e4SLinus Torvalds { 95683521d3eSChristoph Hellwig struct task_struct *g, *p; 95783521d3eSChristoph Hellwig 95883521d3eSChristoph Hellwig /* 95983521d3eSChristoph Hellwig * Iterate every thread in the process to remove the io contexts. 96083521d3eSChristoph Hellwig */ 961e17a9489SAl Viro if (e->ops.trim) { 96283521d3eSChristoph Hellwig read_lock(&tasklist_lock); 96383521d3eSChristoph Hellwig do_each_thread(g, p) { 964e17a9489SAl Viro task_lock(p); 9652d8f6131SOleg Nesterov if (p->io_context) 966e17a9489SAl Viro e->ops.trim(p->io_context); 967e17a9489SAl Viro task_unlock(p); 96883521d3eSChristoph Hellwig } while_each_thread(g, p); 96983521d3eSChristoph Hellwig read_unlock(&tasklist_lock); 970e17a9489SAl Viro } 97183521d3eSChristoph Hellwig 9721da177e4SLinus Torvalds spin_lock_irq(&elv_list_lock); 9731da177e4SLinus Torvalds list_del_init(&e->list); 9741da177e4SLinus Torvalds spin_unlock_irq(&elv_list_lock); 9751da177e4SLinus Torvalds } 9761da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(elv_unregister); 9771da177e4SLinus Torvalds 9781da177e4SLinus Torvalds /* 9791da177e4SLinus Torvalds * switch to new_e io scheduler. be careful not to introduce deadlocks - 9801da177e4SLinus Torvalds * we don't free the old io scheduler, before we have allocated what we 9811da177e4SLinus Torvalds * need for the new one. this way we have a chance of going back to the old 982cb98fc8bSTejun Heo * one, if the new one fails init for some reason. 9831da177e4SLinus Torvalds */ 9843d1ab40fSAl Viro static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) 9851da177e4SLinus Torvalds { 986cb98fc8bSTejun Heo elevator_t *old_elevator, *e; 987bc1c1169SJens Axboe void *data; 9881da177e4SLinus Torvalds 989cb98fc8bSTejun Heo /* 990cb98fc8bSTejun Heo * Allocate new elevator 991cb98fc8bSTejun Heo */ 9923d1ab40fSAl Viro e = elevator_alloc(new_e); 9931da177e4SLinus Torvalds if (!e) 9943d1ab40fSAl Viro return 0; 9951da177e4SLinus Torvalds 996bc1c1169SJens Axboe data = elevator_init_queue(q, e); 997bc1c1169SJens Axboe if (!data) { 998bc1c1169SJens Axboe kobject_put(&e->kobj); 999bc1c1169SJens Axboe return 0; 1000bc1c1169SJens Axboe } 1001bc1c1169SJens Axboe 10021da177e4SLinus Torvalds /* 1003cb98fc8bSTejun Heo * Turn on BYPASS and drain all requests w/ elevator private data 10041da177e4SLinus Torvalds */ 1005cb98fc8bSTejun Heo spin_lock_irq(q->queue_lock); 1006cb98fc8bSTejun Heo 100764521d1aSJens Axboe set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1008cb98fc8bSTejun Heo 100915853af9STejun Heo elv_drain_elevator(q); 1010cb98fc8bSTejun Heo 1011cb98fc8bSTejun Heo while (q->rq.elvpriv) { 1012407df2aaSTejun Heo blk_remove_plug(q); 1013407df2aaSTejun Heo q->request_fn(q); 1014cb98fc8bSTejun Heo spin_unlock_irq(q->queue_lock); 101564521d1aSJens Axboe msleep(10); 1016cb98fc8bSTejun Heo spin_lock_irq(q->queue_lock); 101715853af9STejun Heo elv_drain_elevator(q); 1018cb98fc8bSTejun Heo } 1019cb98fc8bSTejun Heo 10201da177e4SLinus Torvalds /* 1021bc1c1169SJens Axboe * Remember old elevator. 10221da177e4SLinus Torvalds */ 10231da177e4SLinus Torvalds old_elevator = q->elevator; 10241da177e4SLinus Torvalds 10251da177e4SLinus Torvalds /* 10261da177e4SLinus Torvalds * attach and start new elevator 10271da177e4SLinus Torvalds */ 1028bc1c1169SJens Axboe elevator_attach(q, e, data); 1029bc1c1169SJens Axboe 1030bc1c1169SJens Axboe spin_unlock_irq(q->queue_lock); 1031bc1c1169SJens Axboe 1032bc1c1169SJens Axboe __elv_unregister_queue(old_elevator); 10331da177e4SLinus Torvalds 10341da177e4SLinus Torvalds if (elv_register_queue(q)) 10351da177e4SLinus Torvalds goto fail_register; 10361da177e4SLinus Torvalds 10371da177e4SLinus Torvalds /* 1038cb98fc8bSTejun Heo * finally exit old elevator and turn off BYPASS. 10391da177e4SLinus Torvalds */ 10401da177e4SLinus Torvalds elevator_exit(old_elevator); 104164521d1aSJens Axboe clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 10423d1ab40fSAl Viro return 1; 10431da177e4SLinus Torvalds 10441da177e4SLinus Torvalds fail_register: 10451da177e4SLinus Torvalds /* 10461da177e4SLinus Torvalds * switch failed, exit the new io scheduler and reattach the old 10471da177e4SLinus Torvalds * one again (along with re-adding the sysfs dir) 10481da177e4SLinus Torvalds */ 10491da177e4SLinus Torvalds elevator_exit(e); 10501da177e4SLinus Torvalds q->elevator = old_elevator; 10511da177e4SLinus Torvalds elv_register_queue(q); 105264521d1aSJens Axboe clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 10533d1ab40fSAl Viro return 0; 10541da177e4SLinus Torvalds } 10551da177e4SLinus Torvalds 10561da177e4SLinus Torvalds ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) 10571da177e4SLinus Torvalds { 10581da177e4SLinus Torvalds char elevator_name[ELV_NAME_MAX]; 1059be561235STejun Heo size_t len; 10601da177e4SLinus Torvalds struct elevator_type *e; 10611da177e4SLinus Torvalds 1062be561235STejun Heo elevator_name[sizeof(elevator_name) - 1] = '\0'; 1063be561235STejun Heo strncpy(elevator_name, name, sizeof(elevator_name) - 1); 1064be561235STejun Heo len = strlen(elevator_name); 10651da177e4SLinus Torvalds 1066be561235STejun Heo if (len && elevator_name[len - 1] == '\n') 1067be561235STejun Heo elevator_name[len - 1] = '\0'; 10681da177e4SLinus Torvalds 10691da177e4SLinus Torvalds e = elevator_get(elevator_name); 10701da177e4SLinus Torvalds if (!e) { 10711da177e4SLinus Torvalds printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 10721da177e4SLinus Torvalds return -EINVAL; 10731da177e4SLinus Torvalds } 10741da177e4SLinus Torvalds 10752ca7d93bSNate Diller if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { 10762ca7d93bSNate Diller elevator_put(e); 10771da177e4SLinus Torvalds return count; 10782ca7d93bSNate Diller } 10791da177e4SLinus Torvalds 10803d1ab40fSAl Viro if (!elevator_switch(q, e)) 10813d1ab40fSAl Viro printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); 10821da177e4SLinus Torvalds return count; 10831da177e4SLinus Torvalds } 10841da177e4SLinus Torvalds 10851da177e4SLinus Torvalds ssize_t elv_iosched_show(request_queue_t *q, char *name) 10861da177e4SLinus Torvalds { 10871da177e4SLinus Torvalds elevator_t *e = q->elevator; 10881da177e4SLinus Torvalds struct elevator_type *elv = e->elevator_type; 10891da177e4SLinus Torvalds struct list_head *entry; 10901da177e4SLinus Torvalds int len = 0; 10911da177e4SLinus Torvalds 10921da177e4SLinus Torvalds spin_lock_irq(q->queue_lock); 10931da177e4SLinus Torvalds list_for_each(entry, &elv_list) { 10941da177e4SLinus Torvalds struct elevator_type *__e; 10951da177e4SLinus Torvalds 10961da177e4SLinus Torvalds __e = list_entry(entry, struct elevator_type, list); 10971da177e4SLinus Torvalds if (!strcmp(elv->elevator_name, __e->elevator_name)) 10981da177e4SLinus Torvalds len += sprintf(name+len, "[%s] ", elv->elevator_name); 10991da177e4SLinus Torvalds else 11001da177e4SLinus Torvalds len += sprintf(name+len, "%s ", __e->elevator_name); 11011da177e4SLinus Torvalds } 11021da177e4SLinus Torvalds spin_unlock_irq(q->queue_lock); 11031da177e4SLinus Torvalds 11041da177e4SLinus Torvalds len += sprintf(len+name, "\n"); 11051da177e4SLinus Torvalds return len; 11061da177e4SLinus Torvalds } 11071da177e4SLinus Torvalds 1108*2e662b65SJens Axboe struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) 1109*2e662b65SJens Axboe { 1110*2e662b65SJens Axboe struct rb_node *rbprev = rb_prev(&rq->rb_node); 1111*2e662b65SJens Axboe 1112*2e662b65SJens Axboe if (rbprev) 1113*2e662b65SJens Axboe return rb_entry_rq(rbprev); 1114*2e662b65SJens Axboe 1115*2e662b65SJens Axboe return NULL; 1116*2e662b65SJens Axboe } 1117*2e662b65SJens Axboe 1118*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_former_request); 1119*2e662b65SJens Axboe 1120*2e662b65SJens Axboe struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) 1121*2e662b65SJens Axboe { 1122*2e662b65SJens Axboe struct rb_node *rbnext = rb_next(&rq->rb_node); 1123*2e662b65SJens Axboe 1124*2e662b65SJens Axboe if (rbnext) 1125*2e662b65SJens Axboe return rb_entry_rq(rbnext); 1126*2e662b65SJens Axboe 1127*2e662b65SJens Axboe return NULL; 1128*2e662b65SJens Axboe } 1129*2e662b65SJens Axboe 1130*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_latter_request); 1131