xref: /linux/block/elevator.c (revision 95543179f158b4891c5dc49004853ce081e8d794)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *  Block device elevator/IO-scheduler.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
51da177e4SLinus Torvalds  *
60fe23479SJens Axboe  * 30042000 Jens Axboe <axboe@kernel.dk> :
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Split the elevator a bit so that it is possible to choose a different
91da177e4SLinus Torvalds  * one or even write a new "plug in". There are three pieces:
101da177e4SLinus Torvalds  * - elevator_fn, inserts a new request in the queue list
111da177e4SLinus Torvalds  * - elevator_merge_fn, decides whether a new buffer can be merged with
121da177e4SLinus Torvalds  *   an existing request
131da177e4SLinus Torvalds  * - elevator_dequeue_fn, called when a request is taken off the active list
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * 20082000 Dave Jones <davej@suse.de> :
161da177e4SLinus Torvalds  * Removed tests for max-bomb-segments, which was breaking elvtune
171da177e4SLinus Torvalds  *  when run without -bN
181da177e4SLinus Torvalds  *
191da177e4SLinus Torvalds  * Jens:
201da177e4SLinus Torvalds  * - Rework again to work with bio instead of buffer_heads
211da177e4SLinus Torvalds  * - loose bi_dev comparisons, partition handling is right now
221da177e4SLinus Torvalds  * - completely modularize elevator setup and teardown
231da177e4SLinus Torvalds  *
241da177e4SLinus Torvalds  */
251da177e4SLinus Torvalds #include <linux/kernel.h>
261da177e4SLinus Torvalds #include <linux/fs.h>
271da177e4SLinus Torvalds #include <linux/blkdev.h>
281da177e4SLinus Torvalds #include <linux/elevator.h>
291da177e4SLinus Torvalds #include <linux/bio.h>
301da177e4SLinus Torvalds #include <linux/module.h>
311da177e4SLinus Torvalds #include <linux/slab.h>
321da177e4SLinus Torvalds #include <linux/init.h>
331da177e4SLinus Torvalds #include <linux/compiler.h>
34cb98fc8bSTejun Heo #include <linux/delay.h>
352056a782SJens Axboe #include <linux/blktrace_api.h>
369817064bSJens Axboe #include <linux/hash.h>
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds #include <asm/uaccess.h>
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds static DEFINE_SPINLOCK(elv_list_lock);
411da177e4SLinus Torvalds static LIST_HEAD(elv_list);
421da177e4SLinus Torvalds 
431da177e4SLinus Torvalds /*
449817064bSJens Axboe  * Merge hash stuff.
459817064bSJens Axboe  */
469817064bSJens Axboe static const int elv_hash_shift = 6;
479817064bSJens Axboe #define ELV_HASH_BLOCK(sec)	((sec) >> 3)
489817064bSJens Axboe #define ELV_HASH_FN(sec)	(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
499817064bSJens Axboe #define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
509817064bSJens Axboe #define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
519817064bSJens Axboe #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
529817064bSJens Axboe 
539817064bSJens Axboe /*
54da775265SJens Axboe  * Query io scheduler to see if the current process issuing bio may be
55da775265SJens Axboe  * merged with rq.
56da775265SJens Axboe  */
57da775265SJens Axboe static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
58da775265SJens Axboe {
59da775265SJens Axboe 	request_queue_t *q = rq->q;
60da775265SJens Axboe 	elevator_t *e = q->elevator;
61da775265SJens Axboe 
62da775265SJens Axboe 	if (e->ops->elevator_allow_merge_fn)
63da775265SJens Axboe 		return e->ops->elevator_allow_merge_fn(q, rq, bio);
64da775265SJens Axboe 
65da775265SJens Axboe 	return 1;
66da775265SJens Axboe }
67da775265SJens Axboe 
68da775265SJens Axboe /*
691da177e4SLinus Torvalds  * can we safely merge with this request?
701da177e4SLinus Torvalds  */
711da177e4SLinus Torvalds inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
721da177e4SLinus Torvalds {
731da177e4SLinus Torvalds 	if (!rq_mergeable(rq))
741da177e4SLinus Torvalds 		return 0;
751da177e4SLinus Torvalds 
761da177e4SLinus Torvalds 	/*
771da177e4SLinus Torvalds 	 * different data direction or already started, don't merge
781da177e4SLinus Torvalds 	 */
791da177e4SLinus Torvalds 	if (bio_data_dir(bio) != rq_data_dir(rq))
801da177e4SLinus Torvalds 		return 0;
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds 	/*
83da775265SJens Axboe 	 * must be same device and not a special request
841da177e4SLinus Torvalds 	 */
85bb4067e3SJens Axboe 	if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
861da177e4SLinus Torvalds 		return 0;
87da775265SJens Axboe 
88da775265SJens Axboe 	if (!elv_iosched_allow_merge(rq, bio))
89da775265SJens Axboe 		return 0;
90da775265SJens Axboe 
91da775265SJens Axboe 	return 1;
921da177e4SLinus Torvalds }
931da177e4SLinus Torvalds EXPORT_SYMBOL(elv_rq_merge_ok);
941da177e4SLinus Torvalds 
95769db45bSCoywolf Qi Hunt static inline int elv_try_merge(struct request *__rq, struct bio *bio)
961da177e4SLinus Torvalds {
971da177e4SLinus Torvalds 	int ret = ELEVATOR_NO_MERGE;
981da177e4SLinus Torvalds 
991da177e4SLinus Torvalds 	/*
1001da177e4SLinus Torvalds 	 * we can merge and sequence is ok, check if it's possible
1011da177e4SLinus Torvalds 	 */
1021da177e4SLinus Torvalds 	if (elv_rq_merge_ok(__rq, bio)) {
1031da177e4SLinus Torvalds 		if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
1041da177e4SLinus Torvalds 			ret = ELEVATOR_BACK_MERGE;
1051da177e4SLinus Torvalds 		else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
1061da177e4SLinus Torvalds 			ret = ELEVATOR_FRONT_MERGE;
1071da177e4SLinus Torvalds 	}
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds 	return ret;
1101da177e4SLinus Torvalds }
1111da177e4SLinus Torvalds 
1121da177e4SLinus Torvalds static struct elevator_type *elevator_find(const char *name)
1131da177e4SLinus Torvalds {
114a22b169dSVasily Tarasov 	struct elevator_type *e;
1151da177e4SLinus Torvalds 	struct list_head *entry;
1161da177e4SLinus Torvalds 
1171da177e4SLinus Torvalds 	list_for_each(entry, &elv_list) {
1181da177e4SLinus Torvalds 
119a22b169dSVasily Tarasov 		e = list_entry(entry, struct elevator_type, list);
1201da177e4SLinus Torvalds 
121a22b169dSVasily Tarasov 		if (!strcmp(e->elevator_name, name))
1221da177e4SLinus Torvalds 			return e;
1231da177e4SLinus Torvalds 	}
1241da177e4SLinus Torvalds 
125a22b169dSVasily Tarasov 	return NULL;
126a22b169dSVasily Tarasov }
127a22b169dSVasily Tarasov 
1281da177e4SLinus Torvalds static void elevator_put(struct elevator_type *e)
1291da177e4SLinus Torvalds {
1301da177e4SLinus Torvalds 	module_put(e->elevator_owner);
1311da177e4SLinus Torvalds }
1321da177e4SLinus Torvalds 
1331da177e4SLinus Torvalds static struct elevator_type *elevator_get(const char *name)
1341da177e4SLinus Torvalds {
1352824bc93STejun Heo 	struct elevator_type *e;
1361da177e4SLinus Torvalds 
1372824bc93STejun Heo 	spin_lock_irq(&elv_list_lock);
1382824bc93STejun Heo 
1392824bc93STejun Heo 	e = elevator_find(name);
1402824bc93STejun Heo 	if (e && !try_module_get(e->elevator_owner))
1412824bc93STejun Heo 		e = NULL;
1422824bc93STejun Heo 
1432824bc93STejun Heo 	spin_unlock_irq(&elv_list_lock);
1441da177e4SLinus Torvalds 
1451da177e4SLinus Torvalds 	return e;
1461da177e4SLinus Torvalds }
1471da177e4SLinus Torvalds 
148bc1c1169SJens Axboe static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
1491da177e4SLinus Torvalds {
150bb37b94cSJens Axboe 	return eq->ops->elevator_init_fn(q);
151bc1c1169SJens Axboe }
1521da177e4SLinus Torvalds 
153bc1c1169SJens Axboe static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
154bc1c1169SJens Axboe 			   void *data)
155bc1c1169SJens Axboe {
1561da177e4SLinus Torvalds 	q->elevator = eq;
157bc1c1169SJens Axboe 	eq->elevator_data = data;
1581da177e4SLinus Torvalds }
1591da177e4SLinus Torvalds 
1601da177e4SLinus Torvalds static char chosen_elevator[16];
1611da177e4SLinus Torvalds 
1625f003976SNate Diller static int __init elevator_setup(char *str)
1631da177e4SLinus Torvalds {
164752a3b79SChuck Ebbert 	/*
165752a3b79SChuck Ebbert 	 * Be backwards-compatible with previous kernels, so users
166752a3b79SChuck Ebbert 	 * won't get the wrong elevator.
167752a3b79SChuck Ebbert 	 */
1685f003976SNate Diller 	if (!strcmp(str, "as"))
169752a3b79SChuck Ebbert 		strcpy(chosen_elevator, "anticipatory");
170cff3ba22SZachary Amsden 	else
1711da177e4SLinus Torvalds 		strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
1729b41046cSOGAWA Hirofumi 	return 1;
1731da177e4SLinus Torvalds }
1741da177e4SLinus Torvalds 
1751da177e4SLinus Torvalds __setup("elevator=", elevator_setup);
1761da177e4SLinus Torvalds 
1773d1ab40fSAl Viro static struct kobj_type elv_ktype;
1783d1ab40fSAl Viro 
179b5deef90SJens Axboe static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
1803d1ab40fSAl Viro {
1819817064bSJens Axboe 	elevator_t *eq;
1829817064bSJens Axboe 	int i;
1839817064bSJens Axboe 
184b5deef90SJens Axboe 	eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
1859817064bSJens Axboe 	if (unlikely(!eq))
1869817064bSJens Axboe 		goto err;
1879817064bSJens Axboe 
1883d1ab40fSAl Viro 	memset(eq, 0, sizeof(*eq));
1893d1ab40fSAl Viro 	eq->ops = &e->ops;
1903d1ab40fSAl Viro 	eq->elevator_type = e;
1913d1ab40fSAl Viro 	kobject_init(&eq->kobj);
1923d1ab40fSAl Viro 	snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
1933d1ab40fSAl Viro 	eq->kobj.ktype = &elv_ktype;
1943d1ab40fSAl Viro 	mutex_init(&eq->sysfs_lock);
1959817064bSJens Axboe 
196b5deef90SJens Axboe 	eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
197b5deef90SJens Axboe 					GFP_KERNEL, q->node);
1989817064bSJens Axboe 	if (!eq->hash)
1999817064bSJens Axboe 		goto err;
2009817064bSJens Axboe 
2019817064bSJens Axboe 	for (i = 0; i < ELV_HASH_ENTRIES; i++)
2029817064bSJens Axboe 		INIT_HLIST_HEAD(&eq->hash[i]);
2039817064bSJens Axboe 
2043d1ab40fSAl Viro 	return eq;
2059817064bSJens Axboe err:
2069817064bSJens Axboe 	kfree(eq);
2079817064bSJens Axboe 	elevator_put(e);
2089817064bSJens Axboe 	return NULL;
2093d1ab40fSAl Viro }
2103d1ab40fSAl Viro 
2113d1ab40fSAl Viro static void elevator_release(struct kobject *kobj)
2123d1ab40fSAl Viro {
2133d1ab40fSAl Viro 	elevator_t *e = container_of(kobj, elevator_t, kobj);
2149817064bSJens Axboe 
2153d1ab40fSAl Viro 	elevator_put(e->elevator_type);
2169817064bSJens Axboe 	kfree(e->hash);
2173d1ab40fSAl Viro 	kfree(e);
2183d1ab40fSAl Viro }
2193d1ab40fSAl Viro 
2201da177e4SLinus Torvalds int elevator_init(request_queue_t *q, char *name)
2211da177e4SLinus Torvalds {
2221da177e4SLinus Torvalds 	struct elevator_type *e = NULL;
2231da177e4SLinus Torvalds 	struct elevator_queue *eq;
2241da177e4SLinus Torvalds 	int ret = 0;
225bc1c1169SJens Axboe 	void *data;
2261da177e4SLinus Torvalds 
227cb98fc8bSTejun Heo 	INIT_LIST_HEAD(&q->queue_head);
228cb98fc8bSTejun Heo 	q->last_merge = NULL;
229cb98fc8bSTejun Heo 	q->end_sector = 0;
230cb98fc8bSTejun Heo 	q->boundary_rq = NULL;
231cb98fc8bSTejun Heo 
2325f003976SNate Diller 	if (name && !(e = elevator_get(name)))
2331da177e4SLinus Torvalds 		return -EINVAL;
2341da177e4SLinus Torvalds 
235248d5ca5SNate Diller 	if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
2365f003976SNate Diller 		printk("I/O scheduler %s not found\n", chosen_elevator);
237248d5ca5SNate Diller 
238248d5ca5SNate Diller 	if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
239248d5ca5SNate Diller 		printk("Default I/O scheduler not found, using no-op\n");
240248d5ca5SNate Diller 		e = elevator_get("noop");
2415f003976SNate Diller 	}
2425f003976SNate Diller 
243b5deef90SJens Axboe 	eq = elevator_alloc(q, e);
2443d1ab40fSAl Viro 	if (!eq)
2451da177e4SLinus Torvalds 		return -ENOMEM;
2461da177e4SLinus Torvalds 
247bc1c1169SJens Axboe 	data = elevator_init_queue(q, eq);
248bc1c1169SJens Axboe 	if (!data) {
2493d1ab40fSAl Viro 		kobject_put(&eq->kobj);
250bc1c1169SJens Axboe 		return -ENOMEM;
251bc1c1169SJens Axboe 	}
2521da177e4SLinus Torvalds 
253bc1c1169SJens Axboe 	elevator_attach(q, eq, data);
2541da177e4SLinus Torvalds 	return ret;
2551da177e4SLinus Torvalds }
2561da177e4SLinus Torvalds 
2572e662b65SJens Axboe EXPORT_SYMBOL(elevator_init);
2582e662b65SJens Axboe 
2591da177e4SLinus Torvalds void elevator_exit(elevator_t *e)
2601da177e4SLinus Torvalds {
2613d1ab40fSAl Viro 	mutex_lock(&e->sysfs_lock);
2621da177e4SLinus Torvalds 	if (e->ops->elevator_exit_fn)
2631da177e4SLinus Torvalds 		e->ops->elevator_exit_fn(e);
2643d1ab40fSAl Viro 	e->ops = NULL;
2653d1ab40fSAl Viro 	mutex_unlock(&e->sysfs_lock);
2661da177e4SLinus Torvalds 
2673d1ab40fSAl Viro 	kobject_put(&e->kobj);
2681da177e4SLinus Torvalds }
2691da177e4SLinus Torvalds 
2702e662b65SJens Axboe EXPORT_SYMBOL(elevator_exit);
2712e662b65SJens Axboe 
2729817064bSJens Axboe static inline void __elv_rqhash_del(struct request *rq)
2739817064bSJens Axboe {
2749817064bSJens Axboe 	hlist_del_init(&rq->hash);
2759817064bSJens Axboe }
2769817064bSJens Axboe 
2779817064bSJens Axboe static void elv_rqhash_del(request_queue_t *q, struct request *rq)
2789817064bSJens Axboe {
2799817064bSJens Axboe 	if (ELV_ON_HASH(rq))
2809817064bSJens Axboe 		__elv_rqhash_del(rq);
2819817064bSJens Axboe }
2829817064bSJens Axboe 
2839817064bSJens Axboe static void elv_rqhash_add(request_queue_t *q, struct request *rq)
2849817064bSJens Axboe {
2859817064bSJens Axboe 	elevator_t *e = q->elevator;
2869817064bSJens Axboe 
2879817064bSJens Axboe 	BUG_ON(ELV_ON_HASH(rq));
2889817064bSJens Axboe 	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
2899817064bSJens Axboe }
2909817064bSJens Axboe 
2919817064bSJens Axboe static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
2929817064bSJens Axboe {
2939817064bSJens Axboe 	__elv_rqhash_del(rq);
2949817064bSJens Axboe 	elv_rqhash_add(q, rq);
2959817064bSJens Axboe }
2969817064bSJens Axboe 
2979817064bSJens Axboe static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
2989817064bSJens Axboe {
2999817064bSJens Axboe 	elevator_t *e = q->elevator;
3009817064bSJens Axboe 	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
3019817064bSJens Axboe 	struct hlist_node *entry, *next;
3029817064bSJens Axboe 	struct request *rq;
3039817064bSJens Axboe 
3049817064bSJens Axboe 	hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
3059817064bSJens Axboe 		BUG_ON(!ELV_ON_HASH(rq));
3069817064bSJens Axboe 
3079817064bSJens Axboe 		if (unlikely(!rq_mergeable(rq))) {
3089817064bSJens Axboe 			__elv_rqhash_del(rq);
3099817064bSJens Axboe 			continue;
3109817064bSJens Axboe 		}
3119817064bSJens Axboe 
3129817064bSJens Axboe 		if (rq_hash_key(rq) == offset)
3139817064bSJens Axboe 			return rq;
3149817064bSJens Axboe 	}
3159817064bSJens Axboe 
3169817064bSJens Axboe 	return NULL;
3179817064bSJens Axboe }
3189817064bSJens Axboe 
3198922e16cSTejun Heo /*
3202e662b65SJens Axboe  * RB-tree support functions for inserting/lookup/removal of requests
3212e662b65SJens Axboe  * in a sorted RB tree.
3222e662b65SJens Axboe  */
3232e662b65SJens Axboe struct request *elv_rb_add(struct rb_root *root, struct request *rq)
3242e662b65SJens Axboe {
3252e662b65SJens Axboe 	struct rb_node **p = &root->rb_node;
3262e662b65SJens Axboe 	struct rb_node *parent = NULL;
3272e662b65SJens Axboe 	struct request *__rq;
3282e662b65SJens Axboe 
3292e662b65SJens Axboe 	while (*p) {
3302e662b65SJens Axboe 		parent = *p;
3312e662b65SJens Axboe 		__rq = rb_entry(parent, struct request, rb_node);
3322e662b65SJens Axboe 
3332e662b65SJens Axboe 		if (rq->sector < __rq->sector)
3342e662b65SJens Axboe 			p = &(*p)->rb_left;
3352e662b65SJens Axboe 		else if (rq->sector > __rq->sector)
3362e662b65SJens Axboe 			p = &(*p)->rb_right;
3372e662b65SJens Axboe 		else
3382e662b65SJens Axboe 			return __rq;
3392e662b65SJens Axboe 	}
3402e662b65SJens Axboe 
3412e662b65SJens Axboe 	rb_link_node(&rq->rb_node, parent, p);
3422e662b65SJens Axboe 	rb_insert_color(&rq->rb_node, root);
3432e662b65SJens Axboe 	return NULL;
3442e662b65SJens Axboe }
3452e662b65SJens Axboe 
3462e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_add);
3472e662b65SJens Axboe 
3482e662b65SJens Axboe void elv_rb_del(struct rb_root *root, struct request *rq)
3492e662b65SJens Axboe {
3502e662b65SJens Axboe 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
3512e662b65SJens Axboe 	rb_erase(&rq->rb_node, root);
3522e662b65SJens Axboe 	RB_CLEAR_NODE(&rq->rb_node);
3532e662b65SJens Axboe }
3542e662b65SJens Axboe 
3552e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_del);
3562e662b65SJens Axboe 
3572e662b65SJens Axboe struct request *elv_rb_find(struct rb_root *root, sector_t sector)
3582e662b65SJens Axboe {
3592e662b65SJens Axboe 	struct rb_node *n = root->rb_node;
3602e662b65SJens Axboe 	struct request *rq;
3612e662b65SJens Axboe 
3622e662b65SJens Axboe 	while (n) {
3632e662b65SJens Axboe 		rq = rb_entry(n, struct request, rb_node);
3642e662b65SJens Axboe 
3652e662b65SJens Axboe 		if (sector < rq->sector)
3662e662b65SJens Axboe 			n = n->rb_left;
3672e662b65SJens Axboe 		else if (sector > rq->sector)
3682e662b65SJens Axboe 			n = n->rb_right;
3692e662b65SJens Axboe 		else
3702e662b65SJens Axboe 			return rq;
3712e662b65SJens Axboe 	}
3722e662b65SJens Axboe 
3732e662b65SJens Axboe 	return NULL;
3742e662b65SJens Axboe }
3752e662b65SJens Axboe 
3762e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_find);
3772e662b65SJens Axboe 
3782e662b65SJens Axboe /*
3798922e16cSTejun Heo  * Insert rq into dispatch queue of q.  Queue lock must be held on
3802e662b65SJens Axboe  * entry.  rq is sort insted into the dispatch queue. To be used by
3812e662b65SJens Axboe  * specific elevators.
3828922e16cSTejun Heo  */
3831b47f531SJens Axboe void elv_dispatch_sort(request_queue_t *q, struct request *rq)
3848922e16cSTejun Heo {
3858922e16cSTejun Heo 	sector_t boundary;
3868922e16cSTejun Heo 	struct list_head *entry;
3878922e16cSTejun Heo 
38806b86245STejun Heo 	if (q->last_merge == rq)
38906b86245STejun Heo 		q->last_merge = NULL;
3909817064bSJens Axboe 
3919817064bSJens Axboe 	elv_rqhash_del(q, rq);
3929817064bSJens Axboe 
39315853af9STejun Heo 	q->nr_sorted--;
39406b86245STejun Heo 
3951b47f531SJens Axboe 	boundary = q->end_sector;
3968922e16cSTejun Heo 
3978922e16cSTejun Heo 	list_for_each_prev(entry, &q->queue_head) {
3988922e16cSTejun Heo 		struct request *pos = list_entry_rq(entry);
3998922e16cSTejun Heo 
4004aff5e23SJens Axboe 		if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
4018922e16cSTejun Heo 			break;
4028922e16cSTejun Heo 		if (rq->sector >= boundary) {
4038922e16cSTejun Heo 			if (pos->sector < boundary)
4048922e16cSTejun Heo 				continue;
4058922e16cSTejun Heo 		} else {
4068922e16cSTejun Heo 			if (pos->sector >= boundary)
4078922e16cSTejun Heo 				break;
4088922e16cSTejun Heo 		}
4098922e16cSTejun Heo 		if (rq->sector >= pos->sector)
4108922e16cSTejun Heo 			break;
4118922e16cSTejun Heo 	}
4128922e16cSTejun Heo 
4138922e16cSTejun Heo 	list_add(&rq->queuelist, entry);
4148922e16cSTejun Heo }
4158922e16cSTejun Heo 
4162e662b65SJens Axboe EXPORT_SYMBOL(elv_dispatch_sort);
4172e662b65SJens Axboe 
4189817064bSJens Axboe /*
4192e662b65SJens Axboe  * Insert rq into dispatch queue of q.  Queue lock must be held on
4202e662b65SJens Axboe  * entry.  rq is added to the back of the dispatch queue. To be used by
4212e662b65SJens Axboe  * specific elevators.
4229817064bSJens Axboe  */
4239817064bSJens Axboe void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
4249817064bSJens Axboe {
4259817064bSJens Axboe 	if (q->last_merge == rq)
4269817064bSJens Axboe 		q->last_merge = NULL;
4279817064bSJens Axboe 
4289817064bSJens Axboe 	elv_rqhash_del(q, rq);
4299817064bSJens Axboe 
4309817064bSJens Axboe 	q->nr_sorted--;
4319817064bSJens Axboe 
4329817064bSJens Axboe 	q->end_sector = rq_end_sector(rq);
4339817064bSJens Axboe 	q->boundary_rq = rq;
4349817064bSJens Axboe 	list_add_tail(&rq->queuelist, &q->queue_head);
4359817064bSJens Axboe }
4369817064bSJens Axboe 
4372e662b65SJens Axboe EXPORT_SYMBOL(elv_dispatch_add_tail);
4382e662b65SJens Axboe 
4391da177e4SLinus Torvalds int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
4401da177e4SLinus Torvalds {
4411da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
4429817064bSJens Axboe 	struct request *__rq;
44306b86245STejun Heo 	int ret;
44406b86245STejun Heo 
4459817064bSJens Axboe 	/*
4469817064bSJens Axboe 	 * First try one-hit cache.
4479817064bSJens Axboe 	 */
44806b86245STejun Heo 	if (q->last_merge) {
44906b86245STejun Heo 		ret = elv_try_merge(q->last_merge, bio);
45006b86245STejun Heo 		if (ret != ELEVATOR_NO_MERGE) {
45106b86245STejun Heo 			*req = q->last_merge;
45206b86245STejun Heo 			return ret;
45306b86245STejun Heo 		}
45406b86245STejun Heo 	}
4551da177e4SLinus Torvalds 
4569817064bSJens Axboe 	/*
4579817064bSJens Axboe 	 * See if our hash lookup can find a potential backmerge.
4589817064bSJens Axboe 	 */
4599817064bSJens Axboe 	__rq = elv_rqhash_find(q, bio->bi_sector);
4609817064bSJens Axboe 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
4619817064bSJens Axboe 		*req = __rq;
4629817064bSJens Axboe 		return ELEVATOR_BACK_MERGE;
4639817064bSJens Axboe 	}
4649817064bSJens Axboe 
4651da177e4SLinus Torvalds 	if (e->ops->elevator_merge_fn)
4661da177e4SLinus Torvalds 		return e->ops->elevator_merge_fn(q, req, bio);
4671da177e4SLinus Torvalds 
4681da177e4SLinus Torvalds 	return ELEVATOR_NO_MERGE;
4691da177e4SLinus Torvalds }
4701da177e4SLinus Torvalds 
4712e662b65SJens Axboe void elv_merged_request(request_queue_t *q, struct request *rq, int type)
4721da177e4SLinus Torvalds {
4731da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
4741da177e4SLinus Torvalds 
4751da177e4SLinus Torvalds 	if (e->ops->elevator_merged_fn)
4762e662b65SJens Axboe 		e->ops->elevator_merged_fn(q, rq, type);
47706b86245STejun Heo 
4782e662b65SJens Axboe 	if (type == ELEVATOR_BACK_MERGE)
4799817064bSJens Axboe 		elv_rqhash_reposition(q, rq);
4809817064bSJens Axboe 
48106b86245STejun Heo 	q->last_merge = rq;
4821da177e4SLinus Torvalds }
4831da177e4SLinus Torvalds 
4841da177e4SLinus Torvalds void elv_merge_requests(request_queue_t *q, struct request *rq,
4851da177e4SLinus Torvalds 			     struct request *next)
4861da177e4SLinus Torvalds {
4871da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
4881da177e4SLinus Torvalds 
4891da177e4SLinus Torvalds 	if (e->ops->elevator_merge_req_fn)
4901da177e4SLinus Torvalds 		e->ops->elevator_merge_req_fn(q, rq, next);
49106b86245STejun Heo 
4929817064bSJens Axboe 	elv_rqhash_reposition(q, rq);
4939817064bSJens Axboe 	elv_rqhash_del(q, next);
4949817064bSJens Axboe 
4959817064bSJens Axboe 	q->nr_sorted--;
49606b86245STejun Heo 	q->last_merge = rq;
4971da177e4SLinus Torvalds }
4981da177e4SLinus Torvalds 
4998922e16cSTejun Heo void elv_requeue_request(request_queue_t *q, struct request *rq)
5001da177e4SLinus Torvalds {
5011da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
5021da177e4SLinus Torvalds 
5031da177e4SLinus Torvalds 	/*
5041da177e4SLinus Torvalds 	 * it already went through dequeue, we need to decrement the
5051da177e4SLinus Torvalds 	 * in_flight count again
5061da177e4SLinus Torvalds 	 */
5078922e16cSTejun Heo 	if (blk_account_rq(rq)) {
5081da177e4SLinus Torvalds 		q->in_flight--;
5098922e16cSTejun Heo 		if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
5101da177e4SLinus Torvalds 			e->ops->elevator_deactivate_req_fn(q, rq);
5111da177e4SLinus Torvalds 	}
5121da177e4SLinus Torvalds 
5134aff5e23SJens Axboe 	rq->cmd_flags &= ~REQ_STARTED;
5141da177e4SLinus Torvalds 
51530e9656cSTejun Heo 	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
5161da177e4SLinus Torvalds }
5171da177e4SLinus Torvalds 
51815853af9STejun Heo static void elv_drain_elevator(request_queue_t *q)
51915853af9STejun Heo {
52015853af9STejun Heo 	static int printed;
52115853af9STejun Heo 	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
52215853af9STejun Heo 		;
52315853af9STejun Heo 	if (q->nr_sorted == 0)
52415853af9STejun Heo 		return;
52515853af9STejun Heo 	if (printed++ < 10) {
52615853af9STejun Heo 		printk(KERN_ERR "%s: forced dispatching is broken "
52715853af9STejun Heo 		       "(nr_sorted=%u), please report this\n",
52815853af9STejun Heo 		       q->elevator->elevator_type->elevator_name, q->nr_sorted);
52915853af9STejun Heo 	}
53015853af9STejun Heo }
53115853af9STejun Heo 
53230e9656cSTejun Heo void elv_insert(request_queue_t *q, struct request *rq, int where)
5331da177e4SLinus Torvalds {
534797e7dbbSTejun Heo 	struct list_head *pos;
535797e7dbbSTejun Heo 	unsigned ordseq;
536dac07ec1SJens Axboe 	int unplug_it = 1;
537797e7dbbSTejun Heo 
5382056a782SJens Axboe 	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
5392056a782SJens Axboe 
5401da177e4SLinus Torvalds 	rq->q = q;
5411da177e4SLinus Torvalds 
5428922e16cSTejun Heo 	switch (where) {
5438922e16cSTejun Heo 	case ELEVATOR_INSERT_FRONT:
5444aff5e23SJens Axboe 		rq->cmd_flags |= REQ_SOFTBARRIER;
5458922e16cSTejun Heo 
5468922e16cSTejun Heo 		list_add(&rq->queuelist, &q->queue_head);
5478922e16cSTejun Heo 		break;
5488922e16cSTejun Heo 
5498922e16cSTejun Heo 	case ELEVATOR_INSERT_BACK:
5504aff5e23SJens Axboe 		rq->cmd_flags |= REQ_SOFTBARRIER;
55115853af9STejun Heo 		elv_drain_elevator(q);
5528922e16cSTejun Heo 		list_add_tail(&rq->queuelist, &q->queue_head);
5538922e16cSTejun Heo 		/*
5548922e16cSTejun Heo 		 * We kick the queue here for the following reasons.
5558922e16cSTejun Heo 		 * - The elevator might have returned NULL previously
5568922e16cSTejun Heo 		 *   to delay requests and returned them now.  As the
5578922e16cSTejun Heo 		 *   queue wasn't empty before this request, ll_rw_blk
5588922e16cSTejun Heo 		 *   won't run the queue on return, resulting in hang.
5598922e16cSTejun Heo 		 * - Usually, back inserted requests won't be merged
5608922e16cSTejun Heo 		 *   with anything.  There's no point in delaying queue
5618922e16cSTejun Heo 		 *   processing.
5628922e16cSTejun Heo 		 */
5638922e16cSTejun Heo 		blk_remove_plug(q);
5648922e16cSTejun Heo 		q->request_fn(q);
5658922e16cSTejun Heo 		break;
5668922e16cSTejun Heo 
5678922e16cSTejun Heo 	case ELEVATOR_INSERT_SORT:
5688922e16cSTejun Heo 		BUG_ON(!blk_fs_request(rq));
5694aff5e23SJens Axboe 		rq->cmd_flags |= REQ_SORTED;
57015853af9STejun Heo 		q->nr_sorted++;
5719817064bSJens Axboe 		if (rq_mergeable(rq)) {
5729817064bSJens Axboe 			elv_rqhash_add(q, rq);
5739817064bSJens Axboe 			if (!q->last_merge)
57406b86245STejun Heo 				q->last_merge = rq;
5759817064bSJens Axboe 		}
5769817064bSJens Axboe 
577ca23509fSTejun Heo 		/*
578ca23509fSTejun Heo 		 * Some ioscheds (cfq) run q->request_fn directly, so
579ca23509fSTejun Heo 		 * rq cannot be accessed after calling
580ca23509fSTejun Heo 		 * elevator_add_req_fn.
581ca23509fSTejun Heo 		 */
582ca23509fSTejun Heo 		q->elevator->ops->elevator_add_req_fn(q, rq);
5838922e16cSTejun Heo 		break;
5848922e16cSTejun Heo 
585797e7dbbSTejun Heo 	case ELEVATOR_INSERT_REQUEUE:
586797e7dbbSTejun Heo 		/*
587797e7dbbSTejun Heo 		 * If ordered flush isn't in progress, we do front
588797e7dbbSTejun Heo 		 * insertion; otherwise, requests should be requeued
589797e7dbbSTejun Heo 		 * in ordseq order.
590797e7dbbSTejun Heo 		 */
5914aff5e23SJens Axboe 		rq->cmd_flags |= REQ_SOFTBARRIER;
592797e7dbbSTejun Heo 
593*95543179SLinas Vepstas 		/*
594*95543179SLinas Vepstas 		 * Most requeues happen because of a busy condition,
595*95543179SLinas Vepstas 		 * don't force unplug of the queue for that case.
596*95543179SLinas Vepstas 		 */
597*95543179SLinas Vepstas 		unplug_it = 0;
598*95543179SLinas Vepstas 
599797e7dbbSTejun Heo 		if (q->ordseq == 0) {
600797e7dbbSTejun Heo 			list_add(&rq->queuelist, &q->queue_head);
601797e7dbbSTejun Heo 			break;
602797e7dbbSTejun Heo 		}
603797e7dbbSTejun Heo 
604797e7dbbSTejun Heo 		ordseq = blk_ordered_req_seq(rq);
605797e7dbbSTejun Heo 
606797e7dbbSTejun Heo 		list_for_each(pos, &q->queue_head) {
607797e7dbbSTejun Heo 			struct request *pos_rq = list_entry_rq(pos);
608797e7dbbSTejun Heo 			if (ordseq <= blk_ordered_req_seq(pos_rq))
609797e7dbbSTejun Heo 				break;
610797e7dbbSTejun Heo 		}
611797e7dbbSTejun Heo 
612797e7dbbSTejun Heo 		list_add_tail(&rq->queuelist, pos);
613797e7dbbSTejun Heo 		break;
614797e7dbbSTejun Heo 
6158922e16cSTejun Heo 	default:
6168922e16cSTejun Heo 		printk(KERN_ERR "%s: bad insertion point %d\n",
6178922e16cSTejun Heo 		       __FUNCTION__, where);
6188922e16cSTejun Heo 		BUG();
6198922e16cSTejun Heo 	}
6201da177e4SLinus Torvalds 
621dac07ec1SJens Axboe 	if (unplug_it && blk_queue_plugged(q)) {
6221da177e4SLinus Torvalds 		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
6231da177e4SLinus Torvalds 			- q->in_flight;
6241da177e4SLinus Torvalds 
625c374f127STejun Heo  		if (nrq >= q->unplug_thresh)
6261da177e4SLinus Torvalds 			__generic_unplug_device(q);
6271da177e4SLinus Torvalds 	}
6281da177e4SLinus Torvalds }
6291da177e4SLinus Torvalds 
63030e9656cSTejun Heo void __elv_add_request(request_queue_t *q, struct request *rq, int where,
63130e9656cSTejun Heo 		       int plug)
63230e9656cSTejun Heo {
63330e9656cSTejun Heo 	if (q->ordcolor)
6344aff5e23SJens Axboe 		rq->cmd_flags |= REQ_ORDERED_COLOR;
63530e9656cSTejun Heo 
6364aff5e23SJens Axboe 	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
63730e9656cSTejun Heo 		/*
63830e9656cSTejun Heo 		 * toggle ordered color
63930e9656cSTejun Heo 		 */
64030e9656cSTejun Heo 		if (blk_barrier_rq(rq))
64130e9656cSTejun Heo 			q->ordcolor ^= 1;
64230e9656cSTejun Heo 
64330e9656cSTejun Heo 		/*
64430e9656cSTejun Heo 		 * barriers implicitly indicate back insertion
64530e9656cSTejun Heo 		 */
64630e9656cSTejun Heo 		if (where == ELEVATOR_INSERT_SORT)
64730e9656cSTejun Heo 			where = ELEVATOR_INSERT_BACK;
64830e9656cSTejun Heo 
64930e9656cSTejun Heo 		/*
65030e9656cSTejun Heo 		 * this request is scheduling boundary, update
65130e9656cSTejun Heo 		 * end_sector
65230e9656cSTejun Heo 		 */
65330e9656cSTejun Heo 		if (blk_fs_request(rq)) {
65430e9656cSTejun Heo 			q->end_sector = rq_end_sector(rq);
65530e9656cSTejun Heo 			q->boundary_rq = rq;
65630e9656cSTejun Heo 		}
6574aff5e23SJens Axboe 	} else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
65830e9656cSTejun Heo 		where = ELEVATOR_INSERT_BACK;
65930e9656cSTejun Heo 
66030e9656cSTejun Heo 	if (plug)
66130e9656cSTejun Heo 		blk_plug_device(q);
66230e9656cSTejun Heo 
66330e9656cSTejun Heo 	elv_insert(q, rq, where);
66430e9656cSTejun Heo }
66530e9656cSTejun Heo 
6662e662b65SJens Axboe EXPORT_SYMBOL(__elv_add_request);
6672e662b65SJens Axboe 
6681da177e4SLinus Torvalds void elv_add_request(request_queue_t *q, struct request *rq, int where,
6691da177e4SLinus Torvalds 		     int plug)
6701da177e4SLinus Torvalds {
6711da177e4SLinus Torvalds 	unsigned long flags;
6721da177e4SLinus Torvalds 
6731da177e4SLinus Torvalds 	spin_lock_irqsave(q->queue_lock, flags);
6741da177e4SLinus Torvalds 	__elv_add_request(q, rq, where, plug);
6751da177e4SLinus Torvalds 	spin_unlock_irqrestore(q->queue_lock, flags);
6761da177e4SLinus Torvalds }
6771da177e4SLinus Torvalds 
6782e662b65SJens Axboe EXPORT_SYMBOL(elv_add_request);
6792e662b65SJens Axboe 
6801da177e4SLinus Torvalds static inline struct request *__elv_next_request(request_queue_t *q)
6811da177e4SLinus Torvalds {
6828922e16cSTejun Heo 	struct request *rq;
6838922e16cSTejun Heo 
684797e7dbbSTejun Heo 	while (1) {
685797e7dbbSTejun Heo 		while (!list_empty(&q->queue_head)) {
6868922e16cSTejun Heo 			rq = list_entry_rq(q->queue_head.next);
687797e7dbbSTejun Heo 			if (blk_do_ordered(q, &rq))
688797e7dbbSTejun Heo 				return rq;
6891da177e4SLinus Torvalds 		}
6901da177e4SLinus Torvalds 
691797e7dbbSTejun Heo 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
692797e7dbbSTejun Heo 			return NULL;
693797e7dbbSTejun Heo 	}
6941da177e4SLinus Torvalds }
6951da177e4SLinus Torvalds 
6961da177e4SLinus Torvalds struct request *elv_next_request(request_queue_t *q)
6971da177e4SLinus Torvalds {
6981da177e4SLinus Torvalds 	struct request *rq;
6991da177e4SLinus Torvalds 	int ret;
7001da177e4SLinus Torvalds 
7011da177e4SLinus Torvalds 	while ((rq = __elv_next_request(q)) != NULL) {
7024aff5e23SJens Axboe 		if (!(rq->cmd_flags & REQ_STARTED)) {
7038922e16cSTejun Heo 			elevator_t *e = q->elevator;
7048922e16cSTejun Heo 
7051da177e4SLinus Torvalds 			/*
7068922e16cSTejun Heo 			 * This is the first time the device driver
7078922e16cSTejun Heo 			 * sees this request (possibly after
7088922e16cSTejun Heo 			 * requeueing).  Notify IO scheduler.
7098922e16cSTejun Heo 			 */
7108922e16cSTejun Heo 			if (blk_sorted_rq(rq) &&
7118922e16cSTejun Heo 			    e->ops->elevator_activate_req_fn)
7128922e16cSTejun Heo 				e->ops->elevator_activate_req_fn(q, rq);
7138922e16cSTejun Heo 
7148922e16cSTejun Heo 			/*
7158922e16cSTejun Heo 			 * just mark as started even if we don't start
7168922e16cSTejun Heo 			 * it, a request that has been delayed should
7178922e16cSTejun Heo 			 * not be passed by new incoming requests
7181da177e4SLinus Torvalds 			 */
7194aff5e23SJens Axboe 			rq->cmd_flags |= REQ_STARTED;
7202056a782SJens Axboe 			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
7218922e16cSTejun Heo 		}
7221da177e4SLinus Torvalds 
7238922e16cSTejun Heo 		if (!q->boundary_rq || q->boundary_rq == rq) {
7241b47f531SJens Axboe 			q->end_sector = rq_end_sector(rq);
7258922e16cSTejun Heo 			q->boundary_rq = NULL;
7268922e16cSTejun Heo 		}
7271da177e4SLinus Torvalds 
7284aff5e23SJens Axboe 		if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
7291da177e4SLinus Torvalds 			break;
7301da177e4SLinus Torvalds 
7311da177e4SLinus Torvalds 		ret = q->prep_rq_fn(q, rq);
7321da177e4SLinus Torvalds 		if (ret == BLKPREP_OK) {
7331da177e4SLinus Torvalds 			break;
7341da177e4SLinus Torvalds 		} else if (ret == BLKPREP_DEFER) {
7352e759cd4STejun Heo  			/*
7362e759cd4STejun Heo  			 * the request may have been (partially) prepped.
7372e759cd4STejun Heo  			 * we need to keep this request in the front to
7388922e16cSTejun Heo 			 * avoid resource deadlock.  REQ_STARTED will
7398922e16cSTejun Heo 			 * prevent other fs requests from passing this one.
7402e759cd4STejun Heo  			 */
7411da177e4SLinus Torvalds 			rq = NULL;
7421da177e4SLinus Torvalds 			break;
7431da177e4SLinus Torvalds 		} else if (ret == BLKPREP_KILL) {
7441da177e4SLinus Torvalds 			int nr_bytes = rq->hard_nr_sectors << 9;
7451da177e4SLinus Torvalds 
7461da177e4SLinus Torvalds 			if (!nr_bytes)
7471da177e4SLinus Torvalds 				nr_bytes = rq->data_len;
7481da177e4SLinus Torvalds 
7491da177e4SLinus Torvalds 			blkdev_dequeue_request(rq);
7504aff5e23SJens Axboe 			rq->cmd_flags |= REQ_QUIET;
7511da177e4SLinus Torvalds 			end_that_request_chunk(rq, 0, nr_bytes);
7528ffdc655STejun Heo 			end_that_request_last(rq, 0);
7531da177e4SLinus Torvalds 		} else {
7541da177e4SLinus Torvalds 			printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
7551da177e4SLinus Torvalds 								ret);
7561da177e4SLinus Torvalds 			break;
7571da177e4SLinus Torvalds 		}
7581da177e4SLinus Torvalds 	}
7591da177e4SLinus Torvalds 
7601da177e4SLinus Torvalds 	return rq;
7611da177e4SLinus Torvalds }
7621da177e4SLinus Torvalds 
7632e662b65SJens Axboe EXPORT_SYMBOL(elv_next_request);
7642e662b65SJens Axboe 
7658922e16cSTejun Heo void elv_dequeue_request(request_queue_t *q, struct request *rq)
7661da177e4SLinus Torvalds {
7678922e16cSTejun Heo 	BUG_ON(list_empty(&rq->queuelist));
7689817064bSJens Axboe 	BUG_ON(ELV_ON_HASH(rq));
7698922e16cSTejun Heo 
7708922e16cSTejun Heo 	list_del_init(&rq->queuelist);
7711da177e4SLinus Torvalds 
7721da177e4SLinus Torvalds 	/*
7731da177e4SLinus Torvalds 	 * the time frame between a request being removed from the lists
7741da177e4SLinus Torvalds 	 * and to it is freed is accounted as io that is in progress at
7758922e16cSTejun Heo 	 * the driver side.
7761da177e4SLinus Torvalds 	 */
7771da177e4SLinus Torvalds 	if (blk_account_rq(rq))
7781da177e4SLinus Torvalds 		q->in_flight++;
7791da177e4SLinus Torvalds }
7801da177e4SLinus Torvalds 
7812e662b65SJens Axboe EXPORT_SYMBOL(elv_dequeue_request);
7822e662b65SJens Axboe 
7831da177e4SLinus Torvalds int elv_queue_empty(request_queue_t *q)
7841da177e4SLinus Torvalds {
7851da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
7861da177e4SLinus Torvalds 
7878922e16cSTejun Heo 	if (!list_empty(&q->queue_head))
7888922e16cSTejun Heo 		return 0;
7898922e16cSTejun Heo 
7901da177e4SLinus Torvalds 	if (e->ops->elevator_queue_empty_fn)
7911da177e4SLinus Torvalds 		return e->ops->elevator_queue_empty_fn(q);
7921da177e4SLinus Torvalds 
7938922e16cSTejun Heo 	return 1;
7941da177e4SLinus Torvalds }
7951da177e4SLinus Torvalds 
7962e662b65SJens Axboe EXPORT_SYMBOL(elv_queue_empty);
7972e662b65SJens Axboe 
7981da177e4SLinus Torvalds struct request *elv_latter_request(request_queue_t *q, struct request *rq)
7991da177e4SLinus Torvalds {
8001da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8011da177e4SLinus Torvalds 
8021da177e4SLinus Torvalds 	if (e->ops->elevator_latter_req_fn)
8031da177e4SLinus Torvalds 		return e->ops->elevator_latter_req_fn(q, rq);
8041da177e4SLinus Torvalds 	return NULL;
8051da177e4SLinus Torvalds }
8061da177e4SLinus Torvalds 
8071da177e4SLinus Torvalds struct request *elv_former_request(request_queue_t *q, struct request *rq)
8081da177e4SLinus Torvalds {
8091da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8101da177e4SLinus Torvalds 
8111da177e4SLinus Torvalds 	if (e->ops->elevator_former_req_fn)
8121da177e4SLinus Torvalds 		return e->ops->elevator_former_req_fn(q, rq);
8131da177e4SLinus Torvalds 	return NULL;
8141da177e4SLinus Torvalds }
8151da177e4SLinus Torvalds 
816cb78b285SJens Axboe int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
8171da177e4SLinus Torvalds {
8181da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8191da177e4SLinus Torvalds 
8201da177e4SLinus Torvalds 	if (e->ops->elevator_set_req_fn)
821cb78b285SJens Axboe 		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
8221da177e4SLinus Torvalds 
8231da177e4SLinus Torvalds 	rq->elevator_private = NULL;
8241da177e4SLinus Torvalds 	return 0;
8251da177e4SLinus Torvalds }
8261da177e4SLinus Torvalds 
8271da177e4SLinus Torvalds void elv_put_request(request_queue_t *q, struct request *rq)
8281da177e4SLinus Torvalds {
8291da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8301da177e4SLinus Torvalds 
8311da177e4SLinus Torvalds 	if (e->ops->elevator_put_req_fn)
832bb37b94cSJens Axboe 		e->ops->elevator_put_req_fn(rq);
8331da177e4SLinus Torvalds }
8341da177e4SLinus Torvalds 
835cb78b285SJens Axboe int elv_may_queue(request_queue_t *q, int rw)
8361da177e4SLinus Torvalds {
8371da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8381da177e4SLinus Torvalds 
8391da177e4SLinus Torvalds 	if (e->ops->elevator_may_queue_fn)
840cb78b285SJens Axboe 		return e->ops->elevator_may_queue_fn(q, rw);
8411da177e4SLinus Torvalds 
8421da177e4SLinus Torvalds 	return ELV_MQUEUE_MAY;
8431da177e4SLinus Torvalds }
8441da177e4SLinus Torvalds 
8451da177e4SLinus Torvalds void elv_completed_request(request_queue_t *q, struct request *rq)
8461da177e4SLinus Torvalds {
8471da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8481da177e4SLinus Torvalds 
8491da177e4SLinus Torvalds 	/*
8501da177e4SLinus Torvalds 	 * request is released from the driver, io must be done
8511da177e4SLinus Torvalds 	 */
8528922e16cSTejun Heo 	if (blk_account_rq(rq)) {
8531da177e4SLinus Torvalds 		q->in_flight--;
8541bc691d3STejun Heo 		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
8551bc691d3STejun Heo 			e->ops->elevator_completed_req_fn(q, rq);
8561bc691d3STejun Heo 	}
857797e7dbbSTejun Heo 
858797e7dbbSTejun Heo 	/*
859797e7dbbSTejun Heo 	 * Check if the queue is waiting for fs requests to be
860797e7dbbSTejun Heo 	 * drained for flush sequence.
861797e7dbbSTejun Heo 	 */
8621bc691d3STejun Heo 	if (unlikely(q->ordseq)) {
8631bc691d3STejun Heo 		struct request *first_rq = list_entry_rq(q->queue_head.next);
8641bc691d3STejun Heo 		if (q->in_flight == 0 &&
865797e7dbbSTejun Heo 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
866797e7dbbSTejun Heo 		    blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
867797e7dbbSTejun Heo 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
868797e7dbbSTejun Heo 			q->request_fn(q);
869797e7dbbSTejun Heo 		}
8701da177e4SLinus Torvalds 	}
8718922e16cSTejun Heo }
8721da177e4SLinus Torvalds 
8733d1ab40fSAl Viro #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
8743d1ab40fSAl Viro 
8753d1ab40fSAl Viro static ssize_t
8763d1ab40fSAl Viro elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
8773d1ab40fSAl Viro {
8783d1ab40fSAl Viro 	elevator_t *e = container_of(kobj, elevator_t, kobj);
8793d1ab40fSAl Viro 	struct elv_fs_entry *entry = to_elv(attr);
8803d1ab40fSAl Viro 	ssize_t error;
8813d1ab40fSAl Viro 
8823d1ab40fSAl Viro 	if (!entry->show)
8833d1ab40fSAl Viro 		return -EIO;
8843d1ab40fSAl Viro 
8853d1ab40fSAl Viro 	mutex_lock(&e->sysfs_lock);
8863d1ab40fSAl Viro 	error = e->ops ? entry->show(e, page) : -ENOENT;
8873d1ab40fSAl Viro 	mutex_unlock(&e->sysfs_lock);
8883d1ab40fSAl Viro 	return error;
8893d1ab40fSAl Viro }
8903d1ab40fSAl Viro 
8913d1ab40fSAl Viro static ssize_t
8923d1ab40fSAl Viro elv_attr_store(struct kobject *kobj, struct attribute *attr,
8933d1ab40fSAl Viro 	       const char *page, size_t length)
8943d1ab40fSAl Viro {
8953d1ab40fSAl Viro 	elevator_t *e = container_of(kobj, elevator_t, kobj);
8963d1ab40fSAl Viro 	struct elv_fs_entry *entry = to_elv(attr);
8973d1ab40fSAl Viro 	ssize_t error;
8983d1ab40fSAl Viro 
8993d1ab40fSAl Viro 	if (!entry->store)
9003d1ab40fSAl Viro 		return -EIO;
9013d1ab40fSAl Viro 
9023d1ab40fSAl Viro 	mutex_lock(&e->sysfs_lock);
9033d1ab40fSAl Viro 	error = e->ops ? entry->store(e, page, length) : -ENOENT;
9043d1ab40fSAl Viro 	mutex_unlock(&e->sysfs_lock);
9053d1ab40fSAl Viro 	return error;
9063d1ab40fSAl Viro }
9073d1ab40fSAl Viro 
9083d1ab40fSAl Viro static struct sysfs_ops elv_sysfs_ops = {
9093d1ab40fSAl Viro 	.show	= elv_attr_show,
9103d1ab40fSAl Viro 	.store	= elv_attr_store,
9113d1ab40fSAl Viro };
9123d1ab40fSAl Viro 
9133d1ab40fSAl Viro static struct kobj_type elv_ktype = {
9143d1ab40fSAl Viro 	.sysfs_ops	= &elv_sysfs_ops,
9153d1ab40fSAl Viro 	.release	= elevator_release,
9163d1ab40fSAl Viro };
9173d1ab40fSAl Viro 
9181da177e4SLinus Torvalds int elv_register_queue(struct request_queue *q)
9191da177e4SLinus Torvalds {
9201da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
9213d1ab40fSAl Viro 	int error;
9221da177e4SLinus Torvalds 
9233d1ab40fSAl Viro 	e->kobj.parent = &q->kobj;
9241da177e4SLinus Torvalds 
9253d1ab40fSAl Viro 	error = kobject_add(&e->kobj);
9263d1ab40fSAl Viro 	if (!error) {
927e572ec7eSAl Viro 		struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
9283d1ab40fSAl Viro 		if (attr) {
929e572ec7eSAl Viro 			while (attr->attr.name) {
930e572ec7eSAl Viro 				if (sysfs_create_file(&e->kobj, &attr->attr))
9313d1ab40fSAl Viro 					break;
932e572ec7eSAl Viro 				attr++;
9333d1ab40fSAl Viro 			}
9343d1ab40fSAl Viro 		}
9353d1ab40fSAl Viro 		kobject_uevent(&e->kobj, KOBJ_ADD);
9363d1ab40fSAl Viro 	}
9373d1ab40fSAl Viro 	return error;
9381da177e4SLinus Torvalds }
9391da177e4SLinus Torvalds 
940bc1c1169SJens Axboe static void __elv_unregister_queue(elevator_t *e)
9411da177e4SLinus Torvalds {
9423d1ab40fSAl Viro 	kobject_uevent(&e->kobj, KOBJ_REMOVE);
9433d1ab40fSAl Viro 	kobject_del(&e->kobj);
9441da177e4SLinus Torvalds }
945bc1c1169SJens Axboe 
946bc1c1169SJens Axboe void elv_unregister_queue(struct request_queue *q)
947bc1c1169SJens Axboe {
948bc1c1169SJens Axboe 	if (q)
949bc1c1169SJens Axboe 		__elv_unregister_queue(q->elevator);
9501da177e4SLinus Torvalds }
9511da177e4SLinus Torvalds 
9521da177e4SLinus Torvalds int elv_register(struct elevator_type *e)
9531da177e4SLinus Torvalds {
9542824bc93STejun Heo 	spin_lock_irq(&elv_list_lock);
955ce524497SEric Sesterhenn 	BUG_ON(elevator_find(e->elevator_name));
9561da177e4SLinus Torvalds 	list_add_tail(&e->list, &elv_list);
9571da177e4SLinus Torvalds 	spin_unlock_irq(&elv_list_lock);
9581da177e4SLinus Torvalds 
9591da177e4SLinus Torvalds 	printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
9605f003976SNate Diller 	if (!strcmp(e->elevator_name, chosen_elevator) ||
9615f003976SNate Diller 			(!*chosen_elevator &&
9625f003976SNate Diller 			 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
9631da177e4SLinus Torvalds 				printk(" (default)");
9641da177e4SLinus Torvalds 	printk("\n");
9651da177e4SLinus Torvalds 	return 0;
9661da177e4SLinus Torvalds }
9671da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(elv_register);
9681da177e4SLinus Torvalds 
9691da177e4SLinus Torvalds void elv_unregister(struct elevator_type *e)
9701da177e4SLinus Torvalds {
97183521d3eSChristoph Hellwig 	struct task_struct *g, *p;
97283521d3eSChristoph Hellwig 
97383521d3eSChristoph Hellwig 	/*
97483521d3eSChristoph Hellwig 	 * Iterate every thread in the process to remove the io contexts.
97583521d3eSChristoph Hellwig 	 */
976e17a9489SAl Viro 	if (e->ops.trim) {
97783521d3eSChristoph Hellwig 		read_lock(&tasklist_lock);
97883521d3eSChristoph Hellwig 		do_each_thread(g, p) {
979e17a9489SAl Viro 			task_lock(p);
9802d8f6131SOleg Nesterov 			if (p->io_context)
981e17a9489SAl Viro 				e->ops.trim(p->io_context);
982e17a9489SAl Viro 			task_unlock(p);
98383521d3eSChristoph Hellwig 		} while_each_thread(g, p);
98483521d3eSChristoph Hellwig 		read_unlock(&tasklist_lock);
985e17a9489SAl Viro 	}
98683521d3eSChristoph Hellwig 
9871da177e4SLinus Torvalds 	spin_lock_irq(&elv_list_lock);
9881da177e4SLinus Torvalds 	list_del_init(&e->list);
9891da177e4SLinus Torvalds 	spin_unlock_irq(&elv_list_lock);
9901da177e4SLinus Torvalds }
9911da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(elv_unregister);
9921da177e4SLinus Torvalds 
9931da177e4SLinus Torvalds /*
9941da177e4SLinus Torvalds  * switch to new_e io scheduler. be careful not to introduce deadlocks -
9951da177e4SLinus Torvalds  * we don't free the old io scheduler, before we have allocated what we
9961da177e4SLinus Torvalds  * need for the new one. this way we have a chance of going back to the old
997cb98fc8bSTejun Heo  * one, if the new one fails init for some reason.
9981da177e4SLinus Torvalds  */
9993d1ab40fSAl Viro static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
10001da177e4SLinus Torvalds {
1001cb98fc8bSTejun Heo 	elevator_t *old_elevator, *e;
1002bc1c1169SJens Axboe 	void *data;
10031da177e4SLinus Torvalds 
1004cb98fc8bSTejun Heo 	/*
1005cb98fc8bSTejun Heo 	 * Allocate new elevator
1006cb98fc8bSTejun Heo 	 */
1007b5deef90SJens Axboe 	e = elevator_alloc(q, new_e);
10081da177e4SLinus Torvalds 	if (!e)
10093d1ab40fSAl Viro 		return 0;
10101da177e4SLinus Torvalds 
1011bc1c1169SJens Axboe 	data = elevator_init_queue(q, e);
1012bc1c1169SJens Axboe 	if (!data) {
1013bc1c1169SJens Axboe 		kobject_put(&e->kobj);
1014bc1c1169SJens Axboe 		return 0;
1015bc1c1169SJens Axboe 	}
1016bc1c1169SJens Axboe 
10171da177e4SLinus Torvalds 	/*
1018cb98fc8bSTejun Heo 	 * Turn on BYPASS and drain all requests w/ elevator private data
10191da177e4SLinus Torvalds 	 */
1020cb98fc8bSTejun Heo 	spin_lock_irq(q->queue_lock);
1021cb98fc8bSTejun Heo 
102264521d1aSJens Axboe 	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1023cb98fc8bSTejun Heo 
102415853af9STejun Heo 	elv_drain_elevator(q);
1025cb98fc8bSTejun Heo 
1026cb98fc8bSTejun Heo 	while (q->rq.elvpriv) {
1027407df2aaSTejun Heo 		blk_remove_plug(q);
1028407df2aaSTejun Heo 		q->request_fn(q);
1029cb98fc8bSTejun Heo 		spin_unlock_irq(q->queue_lock);
103064521d1aSJens Axboe 		msleep(10);
1031cb98fc8bSTejun Heo 		spin_lock_irq(q->queue_lock);
103215853af9STejun Heo 		elv_drain_elevator(q);
1033cb98fc8bSTejun Heo 	}
1034cb98fc8bSTejun Heo 
10351da177e4SLinus Torvalds 	/*
1036bc1c1169SJens Axboe 	 * Remember old elevator.
10371da177e4SLinus Torvalds 	 */
10381da177e4SLinus Torvalds 	old_elevator = q->elevator;
10391da177e4SLinus Torvalds 
10401da177e4SLinus Torvalds 	/*
10411da177e4SLinus Torvalds 	 * attach and start new elevator
10421da177e4SLinus Torvalds 	 */
1043bc1c1169SJens Axboe 	elevator_attach(q, e, data);
1044bc1c1169SJens Axboe 
1045bc1c1169SJens Axboe 	spin_unlock_irq(q->queue_lock);
1046bc1c1169SJens Axboe 
1047bc1c1169SJens Axboe 	__elv_unregister_queue(old_elevator);
10481da177e4SLinus Torvalds 
10491da177e4SLinus Torvalds 	if (elv_register_queue(q))
10501da177e4SLinus Torvalds 		goto fail_register;
10511da177e4SLinus Torvalds 
10521da177e4SLinus Torvalds 	/*
1053cb98fc8bSTejun Heo 	 * finally exit old elevator and turn off BYPASS.
10541da177e4SLinus Torvalds 	 */
10551da177e4SLinus Torvalds 	elevator_exit(old_elevator);
105664521d1aSJens Axboe 	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
10573d1ab40fSAl Viro 	return 1;
10581da177e4SLinus Torvalds 
10591da177e4SLinus Torvalds fail_register:
10601da177e4SLinus Torvalds 	/*
10611da177e4SLinus Torvalds 	 * switch failed, exit the new io scheduler and reattach the old
10621da177e4SLinus Torvalds 	 * one again (along with re-adding the sysfs dir)
10631da177e4SLinus Torvalds 	 */
10641da177e4SLinus Torvalds 	elevator_exit(e);
10651da177e4SLinus Torvalds 	q->elevator = old_elevator;
10661da177e4SLinus Torvalds 	elv_register_queue(q);
106764521d1aSJens Axboe 	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
10683d1ab40fSAl Viro 	return 0;
10691da177e4SLinus Torvalds }
10701da177e4SLinus Torvalds 
10711da177e4SLinus Torvalds ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
10721da177e4SLinus Torvalds {
10731da177e4SLinus Torvalds 	char elevator_name[ELV_NAME_MAX];
1074be561235STejun Heo 	size_t len;
10751da177e4SLinus Torvalds 	struct elevator_type *e;
10761da177e4SLinus Torvalds 
1077be561235STejun Heo 	elevator_name[sizeof(elevator_name) - 1] = '\0';
1078be561235STejun Heo 	strncpy(elevator_name, name, sizeof(elevator_name) - 1);
1079be561235STejun Heo 	len = strlen(elevator_name);
10801da177e4SLinus Torvalds 
1081be561235STejun Heo 	if (len && elevator_name[len - 1] == '\n')
1082be561235STejun Heo 		elevator_name[len - 1] = '\0';
10831da177e4SLinus Torvalds 
10841da177e4SLinus Torvalds 	e = elevator_get(elevator_name);
10851da177e4SLinus Torvalds 	if (!e) {
10861da177e4SLinus Torvalds 		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
10871da177e4SLinus Torvalds 		return -EINVAL;
10881da177e4SLinus Torvalds 	}
10891da177e4SLinus Torvalds 
10902ca7d93bSNate Diller 	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
10912ca7d93bSNate Diller 		elevator_put(e);
10921da177e4SLinus Torvalds 		return count;
10932ca7d93bSNate Diller 	}
10941da177e4SLinus Torvalds 
10953d1ab40fSAl Viro 	if (!elevator_switch(q, e))
10963d1ab40fSAl Viro 		printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
10971da177e4SLinus Torvalds 	return count;
10981da177e4SLinus Torvalds }
10991da177e4SLinus Torvalds 
11001da177e4SLinus Torvalds ssize_t elv_iosched_show(request_queue_t *q, char *name)
11011da177e4SLinus Torvalds {
11021da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
11031da177e4SLinus Torvalds 	struct elevator_type *elv = e->elevator_type;
11041da177e4SLinus Torvalds 	struct list_head *entry;
11051da177e4SLinus Torvalds 	int len = 0;
11061da177e4SLinus Torvalds 
1107c5841642SVasily Tarasov 	spin_lock_irq(&elv_list_lock);
11081da177e4SLinus Torvalds 	list_for_each(entry, &elv_list) {
11091da177e4SLinus Torvalds 		struct elevator_type *__e;
11101da177e4SLinus Torvalds 
11111da177e4SLinus Torvalds 		__e = list_entry(entry, struct elevator_type, list);
11121da177e4SLinus Torvalds 		if (!strcmp(elv->elevator_name, __e->elevator_name))
11131da177e4SLinus Torvalds 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
11141da177e4SLinus Torvalds 		else
11151da177e4SLinus Torvalds 			len += sprintf(name+len, "%s ", __e->elevator_name);
11161da177e4SLinus Torvalds 	}
1117c5841642SVasily Tarasov 	spin_unlock_irq(&elv_list_lock);
11181da177e4SLinus Torvalds 
11191da177e4SLinus Torvalds 	len += sprintf(len+name, "\n");
11201da177e4SLinus Torvalds 	return len;
11211da177e4SLinus Torvalds }
11221da177e4SLinus Torvalds 
11232e662b65SJens Axboe struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
11242e662b65SJens Axboe {
11252e662b65SJens Axboe 	struct rb_node *rbprev = rb_prev(&rq->rb_node);
11262e662b65SJens Axboe 
11272e662b65SJens Axboe 	if (rbprev)
11282e662b65SJens Axboe 		return rb_entry_rq(rbprev);
11292e662b65SJens Axboe 
11302e662b65SJens Axboe 	return NULL;
11312e662b65SJens Axboe }
11322e662b65SJens Axboe 
11332e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_former_request);
11342e662b65SJens Axboe 
11352e662b65SJens Axboe struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
11362e662b65SJens Axboe {
11372e662b65SJens Axboe 	struct rb_node *rbnext = rb_next(&rq->rb_node);
11382e662b65SJens Axboe 
11392e662b65SJens Axboe 	if (rbnext)
11402e662b65SJens Axboe 		return rb_entry_rq(rbnext);
11412e662b65SJens Axboe 
11422e662b65SJens Axboe 	return NULL;
11432e662b65SJens Axboe }
11442e662b65SJens Axboe 
11452e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_latter_request);
1146