xref: /linux/block/elevator.c (revision 2e662b65f05d550b6799ed6bfa9963b82279e6b7)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *  Block device elevator/IO-scheduler.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  * 30042000 Jens Axboe <axboe@suse.de> :
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Split the elevator a bit so that it is possible to choose a different
91da177e4SLinus Torvalds  * one or even write a new "plug in". There are three pieces:
101da177e4SLinus Torvalds  * - elevator_fn, inserts a new request in the queue list
111da177e4SLinus Torvalds  * - elevator_merge_fn, decides whether a new buffer can be merged with
121da177e4SLinus Torvalds  *   an existing request
131da177e4SLinus Torvalds  * - elevator_dequeue_fn, called when a request is taken off the active list
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * 20082000 Dave Jones <davej@suse.de> :
161da177e4SLinus Torvalds  * Removed tests for max-bomb-segments, which was breaking elvtune
171da177e4SLinus Torvalds  *  when run without -bN
181da177e4SLinus Torvalds  *
191da177e4SLinus Torvalds  * Jens:
201da177e4SLinus Torvalds  * - Rework again to work with bio instead of buffer_heads
211da177e4SLinus Torvalds  * - loose bi_dev comparisons, partition handling is right now
221da177e4SLinus Torvalds  * - completely modularize elevator setup and teardown
231da177e4SLinus Torvalds  *
241da177e4SLinus Torvalds  */
251da177e4SLinus Torvalds #include <linux/kernel.h>
261da177e4SLinus Torvalds #include <linux/fs.h>
271da177e4SLinus Torvalds #include <linux/blkdev.h>
281da177e4SLinus Torvalds #include <linux/elevator.h>
291da177e4SLinus Torvalds #include <linux/bio.h>
301da177e4SLinus Torvalds #include <linux/module.h>
311da177e4SLinus Torvalds #include <linux/slab.h>
321da177e4SLinus Torvalds #include <linux/init.h>
331da177e4SLinus Torvalds #include <linux/compiler.h>
34cb98fc8bSTejun Heo #include <linux/delay.h>
352056a782SJens Axboe #include <linux/blktrace_api.h>
369817064bSJens Axboe #include <linux/hash.h>
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds #include <asm/uaccess.h>
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds static DEFINE_SPINLOCK(elv_list_lock);
411da177e4SLinus Torvalds static LIST_HEAD(elv_list);
421da177e4SLinus Torvalds 
431da177e4SLinus Torvalds /*
449817064bSJens Axboe  * Merge hash stuff.
459817064bSJens Axboe  */
469817064bSJens Axboe static const int elv_hash_shift = 6;
479817064bSJens Axboe #define ELV_HASH_BLOCK(sec)	((sec) >> 3)
489817064bSJens Axboe #define ELV_HASH_FN(sec)	(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
499817064bSJens Axboe #define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
509817064bSJens Axboe #define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
519817064bSJens Axboe #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
529817064bSJens Axboe 
539817064bSJens Axboe /*
541da177e4SLinus Torvalds  * can we safely merge with this request?
551da177e4SLinus Torvalds  */
561da177e4SLinus Torvalds inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
571da177e4SLinus Torvalds {
581da177e4SLinus Torvalds 	if (!rq_mergeable(rq))
591da177e4SLinus Torvalds 		return 0;
601da177e4SLinus Torvalds 
611da177e4SLinus Torvalds 	/*
621da177e4SLinus Torvalds 	 * different data direction or already started, don't merge
631da177e4SLinus Torvalds 	 */
641da177e4SLinus Torvalds 	if (bio_data_dir(bio) != rq_data_dir(rq))
651da177e4SLinus Torvalds 		return 0;
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds 	/*
681da177e4SLinus Torvalds 	 * same device and no special stuff set, merge is ok
691da177e4SLinus Torvalds 	 */
701da177e4SLinus Torvalds 	if (rq->rq_disk == bio->bi_bdev->bd_disk &&
711da177e4SLinus Torvalds 	    !rq->waiting && !rq->special)
721da177e4SLinus Torvalds 		return 1;
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds 	return 0;
751da177e4SLinus Torvalds }
761da177e4SLinus Torvalds EXPORT_SYMBOL(elv_rq_merge_ok);
771da177e4SLinus Torvalds 
78769db45bSCoywolf Qi Hunt static inline int elv_try_merge(struct request *__rq, struct bio *bio)
791da177e4SLinus Torvalds {
801da177e4SLinus Torvalds 	int ret = ELEVATOR_NO_MERGE;
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds 	/*
831da177e4SLinus Torvalds 	 * we can merge and sequence is ok, check if it's possible
841da177e4SLinus Torvalds 	 */
851da177e4SLinus Torvalds 	if (elv_rq_merge_ok(__rq, bio)) {
861da177e4SLinus Torvalds 		if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
871da177e4SLinus Torvalds 			ret = ELEVATOR_BACK_MERGE;
881da177e4SLinus Torvalds 		else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
891da177e4SLinus Torvalds 			ret = ELEVATOR_FRONT_MERGE;
901da177e4SLinus Torvalds 	}
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds 	return ret;
931da177e4SLinus Torvalds }
941da177e4SLinus Torvalds 
951da177e4SLinus Torvalds static struct elevator_type *elevator_find(const char *name)
961da177e4SLinus Torvalds {
971da177e4SLinus Torvalds 	struct elevator_type *e = NULL;
981da177e4SLinus Torvalds 	struct list_head *entry;
991da177e4SLinus Torvalds 
1001da177e4SLinus Torvalds 	list_for_each(entry, &elv_list) {
1011da177e4SLinus Torvalds 		struct elevator_type *__e;
1021da177e4SLinus Torvalds 
1031da177e4SLinus Torvalds 		__e = list_entry(entry, struct elevator_type, list);
1041da177e4SLinus Torvalds 
1051da177e4SLinus Torvalds 		if (!strcmp(__e->elevator_name, name)) {
1061da177e4SLinus Torvalds 			e = __e;
1071da177e4SLinus Torvalds 			break;
1081da177e4SLinus Torvalds 		}
1091da177e4SLinus Torvalds 	}
1101da177e4SLinus Torvalds 
1111da177e4SLinus Torvalds 	return e;
1121da177e4SLinus Torvalds }
1131da177e4SLinus Torvalds 
1141da177e4SLinus Torvalds static void elevator_put(struct elevator_type *e)
1151da177e4SLinus Torvalds {
1161da177e4SLinus Torvalds 	module_put(e->elevator_owner);
1171da177e4SLinus Torvalds }
1181da177e4SLinus Torvalds 
1191da177e4SLinus Torvalds static struct elevator_type *elevator_get(const char *name)
1201da177e4SLinus Torvalds {
1212824bc93STejun Heo 	struct elevator_type *e;
1221da177e4SLinus Torvalds 
1232824bc93STejun Heo 	spin_lock_irq(&elv_list_lock);
1242824bc93STejun Heo 
1252824bc93STejun Heo 	e = elevator_find(name);
1262824bc93STejun Heo 	if (e && !try_module_get(e->elevator_owner))
1272824bc93STejun Heo 		e = NULL;
1282824bc93STejun Heo 
1292824bc93STejun Heo 	spin_unlock_irq(&elv_list_lock);
1301da177e4SLinus Torvalds 
1311da177e4SLinus Torvalds 	return e;
1321da177e4SLinus Torvalds }
1331da177e4SLinus Torvalds 
134bc1c1169SJens Axboe static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
1351da177e4SLinus Torvalds {
136bc1c1169SJens Axboe 	return eq->ops->elevator_init_fn(q, eq);
137bc1c1169SJens Axboe }
1381da177e4SLinus Torvalds 
139bc1c1169SJens Axboe static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
140bc1c1169SJens Axboe 			   void *data)
141bc1c1169SJens Axboe {
1421da177e4SLinus Torvalds 	q->elevator = eq;
143bc1c1169SJens Axboe 	eq->elevator_data = data;
1441da177e4SLinus Torvalds }
1451da177e4SLinus Torvalds 
1461da177e4SLinus Torvalds static char chosen_elevator[16];
1471da177e4SLinus Torvalds 
1485f003976SNate Diller static int __init elevator_setup(char *str)
1491da177e4SLinus Torvalds {
150752a3b79SChuck Ebbert 	/*
151752a3b79SChuck Ebbert 	 * Be backwards-compatible with previous kernels, so users
152752a3b79SChuck Ebbert 	 * won't get the wrong elevator.
153752a3b79SChuck Ebbert 	 */
1545f003976SNate Diller 	if (!strcmp(str, "as"))
155752a3b79SChuck Ebbert 		strcpy(chosen_elevator, "anticipatory");
156cff3ba22SZachary Amsden 	else
1571da177e4SLinus Torvalds 		strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
1589b41046cSOGAWA Hirofumi 	return 1;
1591da177e4SLinus Torvalds }
1601da177e4SLinus Torvalds 
1611da177e4SLinus Torvalds __setup("elevator=", elevator_setup);
1621da177e4SLinus Torvalds 
1633d1ab40fSAl Viro static struct kobj_type elv_ktype;
1643d1ab40fSAl Viro 
1653d1ab40fSAl Viro static elevator_t *elevator_alloc(struct elevator_type *e)
1663d1ab40fSAl Viro {
1679817064bSJens Axboe 	elevator_t *eq;
1689817064bSJens Axboe 	int i;
1699817064bSJens Axboe 
1709817064bSJens Axboe 	eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
1719817064bSJens Axboe 	if (unlikely(!eq))
1729817064bSJens Axboe 		goto err;
1739817064bSJens Axboe 
1743d1ab40fSAl Viro 	memset(eq, 0, sizeof(*eq));
1753d1ab40fSAl Viro 	eq->ops = &e->ops;
1763d1ab40fSAl Viro 	eq->elevator_type = e;
1773d1ab40fSAl Viro 	kobject_init(&eq->kobj);
1783d1ab40fSAl Viro 	snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
1793d1ab40fSAl Viro 	eq->kobj.ktype = &elv_ktype;
1803d1ab40fSAl Viro 	mutex_init(&eq->sysfs_lock);
1819817064bSJens Axboe 
1829817064bSJens Axboe 	eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL);
1839817064bSJens Axboe 	if (!eq->hash)
1849817064bSJens Axboe 		goto err;
1859817064bSJens Axboe 
1869817064bSJens Axboe 	for (i = 0; i < ELV_HASH_ENTRIES; i++)
1879817064bSJens Axboe 		INIT_HLIST_HEAD(&eq->hash[i]);
1889817064bSJens Axboe 
1893d1ab40fSAl Viro 	return eq;
1909817064bSJens Axboe err:
1919817064bSJens Axboe 	kfree(eq);
1929817064bSJens Axboe 	elevator_put(e);
1939817064bSJens Axboe 	return NULL;
1943d1ab40fSAl Viro }
1953d1ab40fSAl Viro 
1963d1ab40fSAl Viro static void elevator_release(struct kobject *kobj)
1973d1ab40fSAl Viro {
1983d1ab40fSAl Viro 	elevator_t *e = container_of(kobj, elevator_t, kobj);
1999817064bSJens Axboe 
2003d1ab40fSAl Viro 	elevator_put(e->elevator_type);
2019817064bSJens Axboe 	kfree(e->hash);
2023d1ab40fSAl Viro 	kfree(e);
2033d1ab40fSAl Viro }
2043d1ab40fSAl Viro 
2051da177e4SLinus Torvalds int elevator_init(request_queue_t *q, char *name)
2061da177e4SLinus Torvalds {
2071da177e4SLinus Torvalds 	struct elevator_type *e = NULL;
2081da177e4SLinus Torvalds 	struct elevator_queue *eq;
2091da177e4SLinus Torvalds 	int ret = 0;
210bc1c1169SJens Axboe 	void *data;
2111da177e4SLinus Torvalds 
212cb98fc8bSTejun Heo 	INIT_LIST_HEAD(&q->queue_head);
213cb98fc8bSTejun Heo 	q->last_merge = NULL;
214cb98fc8bSTejun Heo 	q->end_sector = 0;
215cb98fc8bSTejun Heo 	q->boundary_rq = NULL;
216cb98fc8bSTejun Heo 
2175f003976SNate Diller 	if (name && !(e = elevator_get(name)))
2181da177e4SLinus Torvalds 		return -EINVAL;
2191da177e4SLinus Torvalds 
220248d5ca5SNate Diller 	if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
2215f003976SNate Diller 		printk("I/O scheduler %s not found\n", chosen_elevator);
222248d5ca5SNate Diller 
223248d5ca5SNate Diller 	if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
224248d5ca5SNate Diller 		printk("Default I/O scheduler not found, using no-op\n");
225248d5ca5SNate Diller 		e = elevator_get("noop");
2265f003976SNate Diller 	}
2275f003976SNate Diller 
2283d1ab40fSAl Viro 	eq = elevator_alloc(e);
2293d1ab40fSAl Viro 	if (!eq)
2301da177e4SLinus Torvalds 		return -ENOMEM;
2311da177e4SLinus Torvalds 
232bc1c1169SJens Axboe 	data = elevator_init_queue(q, eq);
233bc1c1169SJens Axboe 	if (!data) {
2343d1ab40fSAl Viro 		kobject_put(&eq->kobj);
235bc1c1169SJens Axboe 		return -ENOMEM;
236bc1c1169SJens Axboe 	}
2371da177e4SLinus Torvalds 
238bc1c1169SJens Axboe 	elevator_attach(q, eq, data);
2391da177e4SLinus Torvalds 	return ret;
2401da177e4SLinus Torvalds }
2411da177e4SLinus Torvalds 
242*2e662b65SJens Axboe EXPORT_SYMBOL(elevator_init);
243*2e662b65SJens Axboe 
2441da177e4SLinus Torvalds void elevator_exit(elevator_t *e)
2451da177e4SLinus Torvalds {
2463d1ab40fSAl Viro 	mutex_lock(&e->sysfs_lock);
2471da177e4SLinus Torvalds 	if (e->ops->elevator_exit_fn)
2481da177e4SLinus Torvalds 		e->ops->elevator_exit_fn(e);
2493d1ab40fSAl Viro 	e->ops = NULL;
2503d1ab40fSAl Viro 	mutex_unlock(&e->sysfs_lock);
2511da177e4SLinus Torvalds 
2523d1ab40fSAl Viro 	kobject_put(&e->kobj);
2531da177e4SLinus Torvalds }
2541da177e4SLinus Torvalds 
255*2e662b65SJens Axboe EXPORT_SYMBOL(elevator_exit);
256*2e662b65SJens Axboe 
2579817064bSJens Axboe static inline void __elv_rqhash_del(struct request *rq)
2589817064bSJens Axboe {
2599817064bSJens Axboe 	hlist_del_init(&rq->hash);
2609817064bSJens Axboe }
2619817064bSJens Axboe 
2629817064bSJens Axboe static void elv_rqhash_del(request_queue_t *q, struct request *rq)
2639817064bSJens Axboe {
2649817064bSJens Axboe 	if (ELV_ON_HASH(rq))
2659817064bSJens Axboe 		__elv_rqhash_del(rq);
2669817064bSJens Axboe }
2679817064bSJens Axboe 
2689817064bSJens Axboe static void elv_rqhash_add(request_queue_t *q, struct request *rq)
2699817064bSJens Axboe {
2709817064bSJens Axboe 	elevator_t *e = q->elevator;
2719817064bSJens Axboe 
2729817064bSJens Axboe 	BUG_ON(ELV_ON_HASH(rq));
2739817064bSJens Axboe 	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
2749817064bSJens Axboe }
2759817064bSJens Axboe 
2769817064bSJens Axboe static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
2779817064bSJens Axboe {
2789817064bSJens Axboe 	__elv_rqhash_del(rq);
2799817064bSJens Axboe 	elv_rqhash_add(q, rq);
2809817064bSJens Axboe }
2819817064bSJens Axboe 
2829817064bSJens Axboe static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
2839817064bSJens Axboe {
2849817064bSJens Axboe 	elevator_t *e = q->elevator;
2859817064bSJens Axboe 	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
2869817064bSJens Axboe 	struct hlist_node *entry, *next;
2879817064bSJens Axboe 	struct request *rq;
2889817064bSJens Axboe 
2899817064bSJens Axboe 	hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
2909817064bSJens Axboe 		BUG_ON(!ELV_ON_HASH(rq));
2919817064bSJens Axboe 
2929817064bSJens Axboe 		if (unlikely(!rq_mergeable(rq))) {
2939817064bSJens Axboe 			__elv_rqhash_del(rq);
2949817064bSJens Axboe 			continue;
2959817064bSJens Axboe 		}
2969817064bSJens Axboe 
2979817064bSJens Axboe 		if (rq_hash_key(rq) == offset)
2989817064bSJens Axboe 			return rq;
2999817064bSJens Axboe 	}
3009817064bSJens Axboe 
3019817064bSJens Axboe 	return NULL;
3029817064bSJens Axboe }
3039817064bSJens Axboe 
3048922e16cSTejun Heo /*
305*2e662b65SJens Axboe  * RB-tree support functions for inserting/lookup/removal of requests
306*2e662b65SJens Axboe  * in a sorted RB tree.
307*2e662b65SJens Axboe  */
308*2e662b65SJens Axboe struct request *elv_rb_add(struct rb_root *root, struct request *rq)
309*2e662b65SJens Axboe {
310*2e662b65SJens Axboe 	struct rb_node **p = &root->rb_node;
311*2e662b65SJens Axboe 	struct rb_node *parent = NULL;
312*2e662b65SJens Axboe 	struct request *__rq;
313*2e662b65SJens Axboe 
314*2e662b65SJens Axboe 	while (*p) {
315*2e662b65SJens Axboe 		parent = *p;
316*2e662b65SJens Axboe 		__rq = rb_entry(parent, struct request, rb_node);
317*2e662b65SJens Axboe 
318*2e662b65SJens Axboe 		if (rq->sector < __rq->sector)
319*2e662b65SJens Axboe 			p = &(*p)->rb_left;
320*2e662b65SJens Axboe 		else if (rq->sector > __rq->sector)
321*2e662b65SJens Axboe 			p = &(*p)->rb_right;
322*2e662b65SJens Axboe 		else
323*2e662b65SJens Axboe 			return __rq;
324*2e662b65SJens Axboe 	}
325*2e662b65SJens Axboe 
326*2e662b65SJens Axboe 	rb_link_node(&rq->rb_node, parent, p);
327*2e662b65SJens Axboe 	rb_insert_color(&rq->rb_node, root);
328*2e662b65SJens Axboe 	return NULL;
329*2e662b65SJens Axboe }
330*2e662b65SJens Axboe 
331*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_add);
332*2e662b65SJens Axboe 
333*2e662b65SJens Axboe void elv_rb_del(struct rb_root *root, struct request *rq)
334*2e662b65SJens Axboe {
335*2e662b65SJens Axboe 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
336*2e662b65SJens Axboe 	rb_erase(&rq->rb_node, root);
337*2e662b65SJens Axboe 	RB_CLEAR_NODE(&rq->rb_node);
338*2e662b65SJens Axboe }
339*2e662b65SJens Axboe 
340*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_del);
341*2e662b65SJens Axboe 
342*2e662b65SJens Axboe struct request *elv_rb_find(struct rb_root *root, sector_t sector)
343*2e662b65SJens Axboe {
344*2e662b65SJens Axboe 	struct rb_node *n = root->rb_node;
345*2e662b65SJens Axboe 	struct request *rq;
346*2e662b65SJens Axboe 
347*2e662b65SJens Axboe 	while (n) {
348*2e662b65SJens Axboe 		rq = rb_entry(n, struct request, rb_node);
349*2e662b65SJens Axboe 
350*2e662b65SJens Axboe 		if (sector < rq->sector)
351*2e662b65SJens Axboe 			n = n->rb_left;
352*2e662b65SJens Axboe 		else if (sector > rq->sector)
353*2e662b65SJens Axboe 			n = n->rb_right;
354*2e662b65SJens Axboe 		else
355*2e662b65SJens Axboe 			return rq;
356*2e662b65SJens Axboe 	}
357*2e662b65SJens Axboe 
358*2e662b65SJens Axboe 	return NULL;
359*2e662b65SJens Axboe }
360*2e662b65SJens Axboe 
361*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_find);
362*2e662b65SJens Axboe 
363*2e662b65SJens Axboe /*
3648922e16cSTejun Heo  * Insert rq into dispatch queue of q.  Queue lock must be held on
365*2e662b65SJens Axboe  * entry.  rq is sort insted into the dispatch queue. To be used by
366*2e662b65SJens Axboe  * specific elevators.
3678922e16cSTejun Heo  */
3681b47f531SJens Axboe void elv_dispatch_sort(request_queue_t *q, struct request *rq)
3698922e16cSTejun Heo {
3708922e16cSTejun Heo 	sector_t boundary;
3718922e16cSTejun Heo 	struct list_head *entry;
3728922e16cSTejun Heo 
37306b86245STejun Heo 	if (q->last_merge == rq)
37406b86245STejun Heo 		q->last_merge = NULL;
3759817064bSJens Axboe 
3769817064bSJens Axboe 	elv_rqhash_del(q, rq);
3779817064bSJens Axboe 
37815853af9STejun Heo 	q->nr_sorted--;
37906b86245STejun Heo 
3801b47f531SJens Axboe 	boundary = q->end_sector;
3818922e16cSTejun Heo 
3828922e16cSTejun Heo 	list_for_each_prev(entry, &q->queue_head) {
3838922e16cSTejun Heo 		struct request *pos = list_entry_rq(entry);
3848922e16cSTejun Heo 
3854aff5e23SJens Axboe 		if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
3868922e16cSTejun Heo 			break;
3878922e16cSTejun Heo 		if (rq->sector >= boundary) {
3888922e16cSTejun Heo 			if (pos->sector < boundary)
3898922e16cSTejun Heo 				continue;
3908922e16cSTejun Heo 		} else {
3918922e16cSTejun Heo 			if (pos->sector >= boundary)
3928922e16cSTejun Heo 				break;
3938922e16cSTejun Heo 		}
3948922e16cSTejun Heo 		if (rq->sector >= pos->sector)
3958922e16cSTejun Heo 			break;
3968922e16cSTejun Heo 	}
3978922e16cSTejun Heo 
3988922e16cSTejun Heo 	list_add(&rq->queuelist, entry);
3998922e16cSTejun Heo }
4008922e16cSTejun Heo 
401*2e662b65SJens Axboe EXPORT_SYMBOL(elv_dispatch_sort);
402*2e662b65SJens Axboe 
4039817064bSJens Axboe /*
404*2e662b65SJens Axboe  * Insert rq into dispatch queue of q.  Queue lock must be held on
405*2e662b65SJens Axboe  * entry.  rq is added to the back of the dispatch queue. To be used by
406*2e662b65SJens Axboe  * specific elevators.
4079817064bSJens Axboe  */
4089817064bSJens Axboe void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
4099817064bSJens Axboe {
4109817064bSJens Axboe 	if (q->last_merge == rq)
4119817064bSJens Axboe 		q->last_merge = NULL;
4129817064bSJens Axboe 
4139817064bSJens Axboe 	elv_rqhash_del(q, rq);
4149817064bSJens Axboe 
4159817064bSJens Axboe 	q->nr_sorted--;
4169817064bSJens Axboe 
4179817064bSJens Axboe 	q->end_sector = rq_end_sector(rq);
4189817064bSJens Axboe 	q->boundary_rq = rq;
4199817064bSJens Axboe 	list_add_tail(&rq->queuelist, &q->queue_head);
4209817064bSJens Axboe }
4219817064bSJens Axboe 
422*2e662b65SJens Axboe EXPORT_SYMBOL(elv_dispatch_add_tail);
423*2e662b65SJens Axboe 
4241da177e4SLinus Torvalds int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
4251da177e4SLinus Torvalds {
4261da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
4279817064bSJens Axboe 	struct request *__rq;
42806b86245STejun Heo 	int ret;
42906b86245STejun Heo 
4309817064bSJens Axboe 	/*
4319817064bSJens Axboe 	 * First try one-hit cache.
4329817064bSJens Axboe 	 */
43306b86245STejun Heo 	if (q->last_merge) {
43406b86245STejun Heo 		ret = elv_try_merge(q->last_merge, bio);
43506b86245STejun Heo 		if (ret != ELEVATOR_NO_MERGE) {
43606b86245STejun Heo 			*req = q->last_merge;
43706b86245STejun Heo 			return ret;
43806b86245STejun Heo 		}
43906b86245STejun Heo 	}
4401da177e4SLinus Torvalds 
4419817064bSJens Axboe 	/*
4429817064bSJens Axboe 	 * See if our hash lookup can find a potential backmerge.
4439817064bSJens Axboe 	 */
4449817064bSJens Axboe 	__rq = elv_rqhash_find(q, bio->bi_sector);
4459817064bSJens Axboe 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
4469817064bSJens Axboe 		*req = __rq;
4479817064bSJens Axboe 		return ELEVATOR_BACK_MERGE;
4489817064bSJens Axboe 	}
4499817064bSJens Axboe 
4501da177e4SLinus Torvalds 	if (e->ops->elevator_merge_fn)
4511da177e4SLinus Torvalds 		return e->ops->elevator_merge_fn(q, req, bio);
4521da177e4SLinus Torvalds 
4531da177e4SLinus Torvalds 	return ELEVATOR_NO_MERGE;
4541da177e4SLinus Torvalds }
4551da177e4SLinus Torvalds 
456*2e662b65SJens Axboe void elv_merged_request(request_queue_t *q, struct request *rq, int type)
4571da177e4SLinus Torvalds {
4581da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
4591da177e4SLinus Torvalds 
4601da177e4SLinus Torvalds 	if (e->ops->elevator_merged_fn)
461*2e662b65SJens Axboe 		e->ops->elevator_merged_fn(q, rq, type);
46206b86245STejun Heo 
463*2e662b65SJens Axboe 	if (type == ELEVATOR_BACK_MERGE)
4649817064bSJens Axboe 		elv_rqhash_reposition(q, rq);
4659817064bSJens Axboe 
46606b86245STejun Heo 	q->last_merge = rq;
4671da177e4SLinus Torvalds }
4681da177e4SLinus Torvalds 
4691da177e4SLinus Torvalds void elv_merge_requests(request_queue_t *q, struct request *rq,
4701da177e4SLinus Torvalds 			     struct request *next)
4711da177e4SLinus Torvalds {
4721da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
4731da177e4SLinus Torvalds 
4741da177e4SLinus Torvalds 	if (e->ops->elevator_merge_req_fn)
4751da177e4SLinus Torvalds 		e->ops->elevator_merge_req_fn(q, rq, next);
47606b86245STejun Heo 
4779817064bSJens Axboe 	elv_rqhash_reposition(q, rq);
4789817064bSJens Axboe 	elv_rqhash_del(q, next);
4799817064bSJens Axboe 
4809817064bSJens Axboe 	q->nr_sorted--;
48106b86245STejun Heo 	q->last_merge = rq;
4821da177e4SLinus Torvalds }
4831da177e4SLinus Torvalds 
4848922e16cSTejun Heo void elv_requeue_request(request_queue_t *q, struct request *rq)
4851da177e4SLinus Torvalds {
4861da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
4871da177e4SLinus Torvalds 
4881da177e4SLinus Torvalds 	/*
4891da177e4SLinus Torvalds 	 * it already went through dequeue, we need to decrement the
4901da177e4SLinus Torvalds 	 * in_flight count again
4911da177e4SLinus Torvalds 	 */
4928922e16cSTejun Heo 	if (blk_account_rq(rq)) {
4931da177e4SLinus Torvalds 		q->in_flight--;
4948922e16cSTejun Heo 		if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
4951da177e4SLinus Torvalds 			e->ops->elevator_deactivate_req_fn(q, rq);
4961da177e4SLinus Torvalds 	}
4971da177e4SLinus Torvalds 
4984aff5e23SJens Axboe 	rq->cmd_flags &= ~REQ_STARTED;
4991da177e4SLinus Torvalds 
50030e9656cSTejun Heo 	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
5011da177e4SLinus Torvalds }
5021da177e4SLinus Torvalds 
50315853af9STejun Heo static void elv_drain_elevator(request_queue_t *q)
50415853af9STejun Heo {
50515853af9STejun Heo 	static int printed;
50615853af9STejun Heo 	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
50715853af9STejun Heo 		;
50815853af9STejun Heo 	if (q->nr_sorted == 0)
50915853af9STejun Heo 		return;
51015853af9STejun Heo 	if (printed++ < 10) {
51115853af9STejun Heo 		printk(KERN_ERR "%s: forced dispatching is broken "
51215853af9STejun Heo 		       "(nr_sorted=%u), please report this\n",
51315853af9STejun Heo 		       q->elevator->elevator_type->elevator_name, q->nr_sorted);
51415853af9STejun Heo 	}
51515853af9STejun Heo }
51615853af9STejun Heo 
51730e9656cSTejun Heo void elv_insert(request_queue_t *q, struct request *rq, int where)
5181da177e4SLinus Torvalds {
519797e7dbbSTejun Heo 	struct list_head *pos;
520797e7dbbSTejun Heo 	unsigned ordseq;
521dac07ec1SJens Axboe 	int unplug_it = 1;
522797e7dbbSTejun Heo 
5232056a782SJens Axboe 	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
5242056a782SJens Axboe 
5251da177e4SLinus Torvalds 	rq->q = q;
5261da177e4SLinus Torvalds 
5278922e16cSTejun Heo 	switch (where) {
5288922e16cSTejun Heo 	case ELEVATOR_INSERT_FRONT:
5294aff5e23SJens Axboe 		rq->cmd_flags |= REQ_SOFTBARRIER;
5308922e16cSTejun Heo 
5318922e16cSTejun Heo 		list_add(&rq->queuelist, &q->queue_head);
5328922e16cSTejun Heo 		break;
5338922e16cSTejun Heo 
5348922e16cSTejun Heo 	case ELEVATOR_INSERT_BACK:
5354aff5e23SJens Axboe 		rq->cmd_flags |= REQ_SOFTBARRIER;
53615853af9STejun Heo 		elv_drain_elevator(q);
5378922e16cSTejun Heo 		list_add_tail(&rq->queuelist, &q->queue_head);
5388922e16cSTejun Heo 		/*
5398922e16cSTejun Heo 		 * We kick the queue here for the following reasons.
5408922e16cSTejun Heo 		 * - The elevator might have returned NULL previously
5418922e16cSTejun Heo 		 *   to delay requests and returned them now.  As the
5428922e16cSTejun Heo 		 *   queue wasn't empty before this request, ll_rw_blk
5438922e16cSTejun Heo 		 *   won't run the queue on return, resulting in hang.
5448922e16cSTejun Heo 		 * - Usually, back inserted requests won't be merged
5458922e16cSTejun Heo 		 *   with anything.  There's no point in delaying queue
5468922e16cSTejun Heo 		 *   processing.
5478922e16cSTejun Heo 		 */
5488922e16cSTejun Heo 		blk_remove_plug(q);
5498922e16cSTejun Heo 		q->request_fn(q);
5508922e16cSTejun Heo 		break;
5518922e16cSTejun Heo 
5528922e16cSTejun Heo 	case ELEVATOR_INSERT_SORT:
5538922e16cSTejun Heo 		BUG_ON(!blk_fs_request(rq));
5544aff5e23SJens Axboe 		rq->cmd_flags |= REQ_SORTED;
55515853af9STejun Heo 		q->nr_sorted++;
5569817064bSJens Axboe 		if (rq_mergeable(rq)) {
5579817064bSJens Axboe 			elv_rqhash_add(q, rq);
5589817064bSJens Axboe 			if (!q->last_merge)
55906b86245STejun Heo 				q->last_merge = rq;
5609817064bSJens Axboe 		}
5619817064bSJens Axboe 
562ca23509fSTejun Heo 		/*
563ca23509fSTejun Heo 		 * Some ioscheds (cfq) run q->request_fn directly, so
564ca23509fSTejun Heo 		 * rq cannot be accessed after calling
565ca23509fSTejun Heo 		 * elevator_add_req_fn.
566ca23509fSTejun Heo 		 */
567ca23509fSTejun Heo 		q->elevator->ops->elevator_add_req_fn(q, rq);
5688922e16cSTejun Heo 		break;
5698922e16cSTejun Heo 
570797e7dbbSTejun Heo 	case ELEVATOR_INSERT_REQUEUE:
571797e7dbbSTejun Heo 		/*
572797e7dbbSTejun Heo 		 * If ordered flush isn't in progress, we do front
573797e7dbbSTejun Heo 		 * insertion; otherwise, requests should be requeued
574797e7dbbSTejun Heo 		 * in ordseq order.
575797e7dbbSTejun Heo 		 */
5764aff5e23SJens Axboe 		rq->cmd_flags |= REQ_SOFTBARRIER;
577797e7dbbSTejun Heo 
578797e7dbbSTejun Heo 		if (q->ordseq == 0) {
579797e7dbbSTejun Heo 			list_add(&rq->queuelist, &q->queue_head);
580797e7dbbSTejun Heo 			break;
581797e7dbbSTejun Heo 		}
582797e7dbbSTejun Heo 
583797e7dbbSTejun Heo 		ordseq = blk_ordered_req_seq(rq);
584797e7dbbSTejun Heo 
585797e7dbbSTejun Heo 		list_for_each(pos, &q->queue_head) {
586797e7dbbSTejun Heo 			struct request *pos_rq = list_entry_rq(pos);
587797e7dbbSTejun Heo 			if (ordseq <= blk_ordered_req_seq(pos_rq))
588797e7dbbSTejun Heo 				break;
589797e7dbbSTejun Heo 		}
590797e7dbbSTejun Heo 
591797e7dbbSTejun Heo 		list_add_tail(&rq->queuelist, pos);
592dac07ec1SJens Axboe 		/*
593dac07ec1SJens Axboe 		 * most requeues happen because of a busy condition, don't
594dac07ec1SJens Axboe 		 * force unplug of the queue for that case.
595dac07ec1SJens Axboe 		 */
596dac07ec1SJens Axboe 		unplug_it = 0;
597797e7dbbSTejun Heo 		break;
598797e7dbbSTejun Heo 
5998922e16cSTejun Heo 	default:
6008922e16cSTejun Heo 		printk(KERN_ERR "%s: bad insertion point %d\n",
6018922e16cSTejun Heo 		       __FUNCTION__, where);
6028922e16cSTejun Heo 		BUG();
6038922e16cSTejun Heo 	}
6041da177e4SLinus Torvalds 
605dac07ec1SJens Axboe 	if (unplug_it && blk_queue_plugged(q)) {
6061da177e4SLinus Torvalds 		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
6071da177e4SLinus Torvalds 			- q->in_flight;
6081da177e4SLinus Torvalds 
609c374f127STejun Heo  		if (nrq >= q->unplug_thresh)
6101da177e4SLinus Torvalds 			__generic_unplug_device(q);
6111da177e4SLinus Torvalds 	}
6121da177e4SLinus Torvalds }
6131da177e4SLinus Torvalds 
61430e9656cSTejun Heo void __elv_add_request(request_queue_t *q, struct request *rq, int where,
61530e9656cSTejun Heo 		       int plug)
61630e9656cSTejun Heo {
61730e9656cSTejun Heo 	if (q->ordcolor)
6184aff5e23SJens Axboe 		rq->cmd_flags |= REQ_ORDERED_COLOR;
61930e9656cSTejun Heo 
6204aff5e23SJens Axboe 	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
62130e9656cSTejun Heo 		/*
62230e9656cSTejun Heo 		 * toggle ordered color
62330e9656cSTejun Heo 		 */
62430e9656cSTejun Heo 		if (blk_barrier_rq(rq))
62530e9656cSTejun Heo 			q->ordcolor ^= 1;
62630e9656cSTejun Heo 
62730e9656cSTejun Heo 		/*
62830e9656cSTejun Heo 		 * barriers implicitly indicate back insertion
62930e9656cSTejun Heo 		 */
63030e9656cSTejun Heo 		if (where == ELEVATOR_INSERT_SORT)
63130e9656cSTejun Heo 			where = ELEVATOR_INSERT_BACK;
63230e9656cSTejun Heo 
63330e9656cSTejun Heo 		/*
63430e9656cSTejun Heo 		 * this request is scheduling boundary, update
63530e9656cSTejun Heo 		 * end_sector
63630e9656cSTejun Heo 		 */
63730e9656cSTejun Heo 		if (blk_fs_request(rq)) {
63830e9656cSTejun Heo 			q->end_sector = rq_end_sector(rq);
63930e9656cSTejun Heo 			q->boundary_rq = rq;
64030e9656cSTejun Heo 		}
6414aff5e23SJens Axboe 	} else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
64230e9656cSTejun Heo 		where = ELEVATOR_INSERT_BACK;
64330e9656cSTejun Heo 
64430e9656cSTejun Heo 	if (plug)
64530e9656cSTejun Heo 		blk_plug_device(q);
64630e9656cSTejun Heo 
64730e9656cSTejun Heo 	elv_insert(q, rq, where);
64830e9656cSTejun Heo }
64930e9656cSTejun Heo 
650*2e662b65SJens Axboe EXPORT_SYMBOL(__elv_add_request);
651*2e662b65SJens Axboe 
6521da177e4SLinus Torvalds void elv_add_request(request_queue_t *q, struct request *rq, int where,
6531da177e4SLinus Torvalds 		     int plug)
6541da177e4SLinus Torvalds {
6551da177e4SLinus Torvalds 	unsigned long flags;
6561da177e4SLinus Torvalds 
6571da177e4SLinus Torvalds 	spin_lock_irqsave(q->queue_lock, flags);
6581da177e4SLinus Torvalds 	__elv_add_request(q, rq, where, plug);
6591da177e4SLinus Torvalds 	spin_unlock_irqrestore(q->queue_lock, flags);
6601da177e4SLinus Torvalds }
6611da177e4SLinus Torvalds 
662*2e662b65SJens Axboe EXPORT_SYMBOL(elv_add_request);
663*2e662b65SJens Axboe 
6641da177e4SLinus Torvalds static inline struct request *__elv_next_request(request_queue_t *q)
6651da177e4SLinus Torvalds {
6668922e16cSTejun Heo 	struct request *rq;
6678922e16cSTejun Heo 
668797e7dbbSTejun Heo 	while (1) {
669797e7dbbSTejun Heo 		while (!list_empty(&q->queue_head)) {
6708922e16cSTejun Heo 			rq = list_entry_rq(q->queue_head.next);
671797e7dbbSTejun Heo 			if (blk_do_ordered(q, &rq))
672797e7dbbSTejun Heo 				return rq;
6731da177e4SLinus Torvalds 		}
6741da177e4SLinus Torvalds 
675797e7dbbSTejun Heo 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
676797e7dbbSTejun Heo 			return NULL;
677797e7dbbSTejun Heo 	}
6781da177e4SLinus Torvalds }
6791da177e4SLinus Torvalds 
6801da177e4SLinus Torvalds struct request *elv_next_request(request_queue_t *q)
6811da177e4SLinus Torvalds {
6821da177e4SLinus Torvalds 	struct request *rq;
6831da177e4SLinus Torvalds 	int ret;
6841da177e4SLinus Torvalds 
6851da177e4SLinus Torvalds 	while ((rq = __elv_next_request(q)) != NULL) {
6864aff5e23SJens Axboe 		if (!(rq->cmd_flags & REQ_STARTED)) {
6878922e16cSTejun Heo 			elevator_t *e = q->elevator;
6888922e16cSTejun Heo 
6891da177e4SLinus Torvalds 			/*
6908922e16cSTejun Heo 			 * This is the first time the device driver
6918922e16cSTejun Heo 			 * sees this request (possibly after
6928922e16cSTejun Heo 			 * requeueing).  Notify IO scheduler.
6938922e16cSTejun Heo 			 */
6948922e16cSTejun Heo 			if (blk_sorted_rq(rq) &&
6958922e16cSTejun Heo 			    e->ops->elevator_activate_req_fn)
6968922e16cSTejun Heo 				e->ops->elevator_activate_req_fn(q, rq);
6978922e16cSTejun Heo 
6988922e16cSTejun Heo 			/*
6998922e16cSTejun Heo 			 * just mark as started even if we don't start
7008922e16cSTejun Heo 			 * it, a request that has been delayed should
7018922e16cSTejun Heo 			 * not be passed by new incoming requests
7021da177e4SLinus Torvalds 			 */
7034aff5e23SJens Axboe 			rq->cmd_flags |= REQ_STARTED;
7042056a782SJens Axboe 			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
7058922e16cSTejun Heo 		}
7061da177e4SLinus Torvalds 
7078922e16cSTejun Heo 		if (!q->boundary_rq || q->boundary_rq == rq) {
7081b47f531SJens Axboe 			q->end_sector = rq_end_sector(rq);
7098922e16cSTejun Heo 			q->boundary_rq = NULL;
7108922e16cSTejun Heo 		}
7111da177e4SLinus Torvalds 
7124aff5e23SJens Axboe 		if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
7131da177e4SLinus Torvalds 			break;
7141da177e4SLinus Torvalds 
7151da177e4SLinus Torvalds 		ret = q->prep_rq_fn(q, rq);
7161da177e4SLinus Torvalds 		if (ret == BLKPREP_OK) {
7171da177e4SLinus Torvalds 			break;
7181da177e4SLinus Torvalds 		} else if (ret == BLKPREP_DEFER) {
7192e759cd4STejun Heo  			/*
7202e759cd4STejun Heo  			 * the request may have been (partially) prepped.
7212e759cd4STejun Heo  			 * we need to keep this request in the front to
7228922e16cSTejun Heo 			 * avoid resource deadlock.  REQ_STARTED will
7238922e16cSTejun Heo 			 * prevent other fs requests from passing this one.
7242e759cd4STejun Heo  			 */
7251da177e4SLinus Torvalds 			rq = NULL;
7261da177e4SLinus Torvalds 			break;
7271da177e4SLinus Torvalds 		} else if (ret == BLKPREP_KILL) {
7281da177e4SLinus Torvalds 			int nr_bytes = rq->hard_nr_sectors << 9;
7291da177e4SLinus Torvalds 
7301da177e4SLinus Torvalds 			if (!nr_bytes)
7311da177e4SLinus Torvalds 				nr_bytes = rq->data_len;
7321da177e4SLinus Torvalds 
7331da177e4SLinus Torvalds 			blkdev_dequeue_request(rq);
7344aff5e23SJens Axboe 			rq->cmd_flags |= REQ_QUIET;
7351da177e4SLinus Torvalds 			end_that_request_chunk(rq, 0, nr_bytes);
7368ffdc655STejun Heo 			end_that_request_last(rq, 0);
7371da177e4SLinus Torvalds 		} else {
7381da177e4SLinus Torvalds 			printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
7391da177e4SLinus Torvalds 								ret);
7401da177e4SLinus Torvalds 			break;
7411da177e4SLinus Torvalds 		}
7421da177e4SLinus Torvalds 	}
7431da177e4SLinus Torvalds 
7441da177e4SLinus Torvalds 	return rq;
7451da177e4SLinus Torvalds }
7461da177e4SLinus Torvalds 
747*2e662b65SJens Axboe EXPORT_SYMBOL(elv_next_request);
748*2e662b65SJens Axboe 
7498922e16cSTejun Heo void elv_dequeue_request(request_queue_t *q, struct request *rq)
7501da177e4SLinus Torvalds {
7518922e16cSTejun Heo 	BUG_ON(list_empty(&rq->queuelist));
7529817064bSJens Axboe 	BUG_ON(ELV_ON_HASH(rq));
7538922e16cSTejun Heo 
7548922e16cSTejun Heo 	list_del_init(&rq->queuelist);
7551da177e4SLinus Torvalds 
7561da177e4SLinus Torvalds 	/*
7571da177e4SLinus Torvalds 	 * the time frame between a request being removed from the lists
7581da177e4SLinus Torvalds 	 * and to it is freed is accounted as io that is in progress at
7598922e16cSTejun Heo 	 * the driver side.
7601da177e4SLinus Torvalds 	 */
7611da177e4SLinus Torvalds 	if (blk_account_rq(rq))
7621da177e4SLinus Torvalds 		q->in_flight++;
7631da177e4SLinus Torvalds }
7641da177e4SLinus Torvalds 
765*2e662b65SJens Axboe EXPORT_SYMBOL(elv_dequeue_request);
766*2e662b65SJens Axboe 
7671da177e4SLinus Torvalds int elv_queue_empty(request_queue_t *q)
7681da177e4SLinus Torvalds {
7691da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
7701da177e4SLinus Torvalds 
7718922e16cSTejun Heo 	if (!list_empty(&q->queue_head))
7728922e16cSTejun Heo 		return 0;
7738922e16cSTejun Heo 
7741da177e4SLinus Torvalds 	if (e->ops->elevator_queue_empty_fn)
7751da177e4SLinus Torvalds 		return e->ops->elevator_queue_empty_fn(q);
7761da177e4SLinus Torvalds 
7778922e16cSTejun Heo 	return 1;
7781da177e4SLinus Torvalds }
7791da177e4SLinus Torvalds 
780*2e662b65SJens Axboe EXPORT_SYMBOL(elv_queue_empty);
781*2e662b65SJens Axboe 
7821da177e4SLinus Torvalds struct request *elv_latter_request(request_queue_t *q, struct request *rq)
7831da177e4SLinus Torvalds {
7841da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
7851da177e4SLinus Torvalds 
7861da177e4SLinus Torvalds 	if (e->ops->elevator_latter_req_fn)
7871da177e4SLinus Torvalds 		return e->ops->elevator_latter_req_fn(q, rq);
7881da177e4SLinus Torvalds 	return NULL;
7891da177e4SLinus Torvalds }
7901da177e4SLinus Torvalds 
7911da177e4SLinus Torvalds struct request *elv_former_request(request_queue_t *q, struct request *rq)
7921da177e4SLinus Torvalds {
7931da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
7941da177e4SLinus Torvalds 
7951da177e4SLinus Torvalds 	if (e->ops->elevator_former_req_fn)
7961da177e4SLinus Torvalds 		return e->ops->elevator_former_req_fn(q, rq);
7971da177e4SLinus Torvalds 	return NULL;
7981da177e4SLinus Torvalds }
7991da177e4SLinus Torvalds 
80022e2c507SJens Axboe int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
8018267e268SAl Viro 		    gfp_t gfp_mask)
8021da177e4SLinus Torvalds {
8031da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8041da177e4SLinus Torvalds 
8051da177e4SLinus Torvalds 	if (e->ops->elevator_set_req_fn)
80622e2c507SJens Axboe 		return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
8071da177e4SLinus Torvalds 
8081da177e4SLinus Torvalds 	rq->elevator_private = NULL;
8091da177e4SLinus Torvalds 	return 0;
8101da177e4SLinus Torvalds }
8111da177e4SLinus Torvalds 
8121da177e4SLinus Torvalds void elv_put_request(request_queue_t *q, struct request *rq)
8131da177e4SLinus Torvalds {
8141da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8151da177e4SLinus Torvalds 
8161da177e4SLinus Torvalds 	if (e->ops->elevator_put_req_fn)
8171da177e4SLinus Torvalds 		e->ops->elevator_put_req_fn(q, rq);
8181da177e4SLinus Torvalds }
8191da177e4SLinus Torvalds 
82022e2c507SJens Axboe int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
8211da177e4SLinus Torvalds {
8221da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8231da177e4SLinus Torvalds 
8241da177e4SLinus Torvalds 	if (e->ops->elevator_may_queue_fn)
82522e2c507SJens Axboe 		return e->ops->elevator_may_queue_fn(q, rw, bio);
8261da177e4SLinus Torvalds 
8271da177e4SLinus Torvalds 	return ELV_MQUEUE_MAY;
8281da177e4SLinus Torvalds }
8291da177e4SLinus Torvalds 
8301da177e4SLinus Torvalds void elv_completed_request(request_queue_t *q, struct request *rq)
8311da177e4SLinus Torvalds {
8321da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8331da177e4SLinus Torvalds 
8341da177e4SLinus Torvalds 	/*
8351da177e4SLinus Torvalds 	 * request is released from the driver, io must be done
8361da177e4SLinus Torvalds 	 */
8378922e16cSTejun Heo 	if (blk_account_rq(rq)) {
8381da177e4SLinus Torvalds 		q->in_flight--;
8391bc691d3STejun Heo 		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
8401bc691d3STejun Heo 			e->ops->elevator_completed_req_fn(q, rq);
8411bc691d3STejun Heo 	}
842797e7dbbSTejun Heo 
843797e7dbbSTejun Heo 	/*
844797e7dbbSTejun Heo 	 * Check if the queue is waiting for fs requests to be
845797e7dbbSTejun Heo 	 * drained for flush sequence.
846797e7dbbSTejun Heo 	 */
8471bc691d3STejun Heo 	if (unlikely(q->ordseq)) {
8481bc691d3STejun Heo 		struct request *first_rq = list_entry_rq(q->queue_head.next);
8491bc691d3STejun Heo 		if (q->in_flight == 0 &&
850797e7dbbSTejun Heo 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
851797e7dbbSTejun Heo 		    blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
852797e7dbbSTejun Heo 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
853797e7dbbSTejun Heo 			q->request_fn(q);
854797e7dbbSTejun Heo 		}
8551da177e4SLinus Torvalds 	}
8568922e16cSTejun Heo }
8571da177e4SLinus Torvalds 
8583d1ab40fSAl Viro #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
8593d1ab40fSAl Viro 
8603d1ab40fSAl Viro static ssize_t
8613d1ab40fSAl Viro elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
8623d1ab40fSAl Viro {
8633d1ab40fSAl Viro 	elevator_t *e = container_of(kobj, elevator_t, kobj);
8643d1ab40fSAl Viro 	struct elv_fs_entry *entry = to_elv(attr);
8653d1ab40fSAl Viro 	ssize_t error;
8663d1ab40fSAl Viro 
8673d1ab40fSAl Viro 	if (!entry->show)
8683d1ab40fSAl Viro 		return -EIO;
8693d1ab40fSAl Viro 
8703d1ab40fSAl Viro 	mutex_lock(&e->sysfs_lock);
8713d1ab40fSAl Viro 	error = e->ops ? entry->show(e, page) : -ENOENT;
8723d1ab40fSAl Viro 	mutex_unlock(&e->sysfs_lock);
8733d1ab40fSAl Viro 	return error;
8743d1ab40fSAl Viro }
8753d1ab40fSAl Viro 
8763d1ab40fSAl Viro static ssize_t
8773d1ab40fSAl Viro elv_attr_store(struct kobject *kobj, struct attribute *attr,
8783d1ab40fSAl Viro 	       const char *page, size_t length)
8793d1ab40fSAl Viro {
8803d1ab40fSAl Viro 	elevator_t *e = container_of(kobj, elevator_t, kobj);
8813d1ab40fSAl Viro 	struct elv_fs_entry *entry = to_elv(attr);
8823d1ab40fSAl Viro 	ssize_t error;
8833d1ab40fSAl Viro 
8843d1ab40fSAl Viro 	if (!entry->store)
8853d1ab40fSAl Viro 		return -EIO;
8863d1ab40fSAl Viro 
8873d1ab40fSAl Viro 	mutex_lock(&e->sysfs_lock);
8883d1ab40fSAl Viro 	error = e->ops ? entry->store(e, page, length) : -ENOENT;
8893d1ab40fSAl Viro 	mutex_unlock(&e->sysfs_lock);
8903d1ab40fSAl Viro 	return error;
8913d1ab40fSAl Viro }
8923d1ab40fSAl Viro 
8933d1ab40fSAl Viro static struct sysfs_ops elv_sysfs_ops = {
8943d1ab40fSAl Viro 	.show	= elv_attr_show,
8953d1ab40fSAl Viro 	.store	= elv_attr_store,
8963d1ab40fSAl Viro };
8973d1ab40fSAl Viro 
8983d1ab40fSAl Viro static struct kobj_type elv_ktype = {
8993d1ab40fSAl Viro 	.sysfs_ops	= &elv_sysfs_ops,
9003d1ab40fSAl Viro 	.release	= elevator_release,
9013d1ab40fSAl Viro };
9023d1ab40fSAl Viro 
9031da177e4SLinus Torvalds int elv_register_queue(struct request_queue *q)
9041da177e4SLinus Torvalds {
9051da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
9063d1ab40fSAl Viro 	int error;
9071da177e4SLinus Torvalds 
9083d1ab40fSAl Viro 	e->kobj.parent = &q->kobj;
9091da177e4SLinus Torvalds 
9103d1ab40fSAl Viro 	error = kobject_add(&e->kobj);
9113d1ab40fSAl Viro 	if (!error) {
912e572ec7eSAl Viro 		struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
9133d1ab40fSAl Viro 		if (attr) {
914e572ec7eSAl Viro 			while (attr->attr.name) {
915e572ec7eSAl Viro 				if (sysfs_create_file(&e->kobj, &attr->attr))
9163d1ab40fSAl Viro 					break;
917e572ec7eSAl Viro 				attr++;
9183d1ab40fSAl Viro 			}
9193d1ab40fSAl Viro 		}
9203d1ab40fSAl Viro 		kobject_uevent(&e->kobj, KOBJ_ADD);
9213d1ab40fSAl Viro 	}
9223d1ab40fSAl Viro 	return error;
9231da177e4SLinus Torvalds }
9241da177e4SLinus Torvalds 
925bc1c1169SJens Axboe static void __elv_unregister_queue(elevator_t *e)
9261da177e4SLinus Torvalds {
9273d1ab40fSAl Viro 	kobject_uevent(&e->kobj, KOBJ_REMOVE);
9283d1ab40fSAl Viro 	kobject_del(&e->kobj);
9291da177e4SLinus Torvalds }
930bc1c1169SJens Axboe 
931bc1c1169SJens Axboe void elv_unregister_queue(struct request_queue *q)
932bc1c1169SJens Axboe {
933bc1c1169SJens Axboe 	if (q)
934bc1c1169SJens Axboe 		__elv_unregister_queue(q->elevator);
9351da177e4SLinus Torvalds }
9361da177e4SLinus Torvalds 
9371da177e4SLinus Torvalds int elv_register(struct elevator_type *e)
9381da177e4SLinus Torvalds {
9392824bc93STejun Heo 	spin_lock_irq(&elv_list_lock);
940ce524497SEric Sesterhenn 	BUG_ON(elevator_find(e->elevator_name));
9411da177e4SLinus Torvalds 	list_add_tail(&e->list, &elv_list);
9421da177e4SLinus Torvalds 	spin_unlock_irq(&elv_list_lock);
9431da177e4SLinus Torvalds 
9441da177e4SLinus Torvalds 	printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
9455f003976SNate Diller 	if (!strcmp(e->elevator_name, chosen_elevator) ||
9465f003976SNate Diller 			(!*chosen_elevator &&
9475f003976SNate Diller 			 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
9481da177e4SLinus Torvalds 				printk(" (default)");
9491da177e4SLinus Torvalds 	printk("\n");
9501da177e4SLinus Torvalds 	return 0;
9511da177e4SLinus Torvalds }
9521da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(elv_register);
9531da177e4SLinus Torvalds 
9541da177e4SLinus Torvalds void elv_unregister(struct elevator_type *e)
9551da177e4SLinus Torvalds {
95683521d3eSChristoph Hellwig 	struct task_struct *g, *p;
95783521d3eSChristoph Hellwig 
95883521d3eSChristoph Hellwig 	/*
95983521d3eSChristoph Hellwig 	 * Iterate every thread in the process to remove the io contexts.
96083521d3eSChristoph Hellwig 	 */
961e17a9489SAl Viro 	if (e->ops.trim) {
96283521d3eSChristoph Hellwig 		read_lock(&tasklist_lock);
96383521d3eSChristoph Hellwig 		do_each_thread(g, p) {
964e17a9489SAl Viro 			task_lock(p);
9652d8f6131SOleg Nesterov 			if (p->io_context)
966e17a9489SAl Viro 				e->ops.trim(p->io_context);
967e17a9489SAl Viro 			task_unlock(p);
96883521d3eSChristoph Hellwig 		} while_each_thread(g, p);
96983521d3eSChristoph Hellwig 		read_unlock(&tasklist_lock);
970e17a9489SAl Viro 	}
97183521d3eSChristoph Hellwig 
9721da177e4SLinus Torvalds 	spin_lock_irq(&elv_list_lock);
9731da177e4SLinus Torvalds 	list_del_init(&e->list);
9741da177e4SLinus Torvalds 	spin_unlock_irq(&elv_list_lock);
9751da177e4SLinus Torvalds }
9761da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(elv_unregister);
9771da177e4SLinus Torvalds 
9781da177e4SLinus Torvalds /*
9791da177e4SLinus Torvalds  * switch to new_e io scheduler. be careful not to introduce deadlocks -
9801da177e4SLinus Torvalds  * we don't free the old io scheduler, before we have allocated what we
9811da177e4SLinus Torvalds  * need for the new one. this way we have a chance of going back to the old
982cb98fc8bSTejun Heo  * one, if the new one fails init for some reason.
9831da177e4SLinus Torvalds  */
9843d1ab40fSAl Viro static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
9851da177e4SLinus Torvalds {
986cb98fc8bSTejun Heo 	elevator_t *old_elevator, *e;
987bc1c1169SJens Axboe 	void *data;
9881da177e4SLinus Torvalds 
989cb98fc8bSTejun Heo 	/*
990cb98fc8bSTejun Heo 	 * Allocate new elevator
991cb98fc8bSTejun Heo 	 */
9923d1ab40fSAl Viro 	e = elevator_alloc(new_e);
9931da177e4SLinus Torvalds 	if (!e)
9943d1ab40fSAl Viro 		return 0;
9951da177e4SLinus Torvalds 
996bc1c1169SJens Axboe 	data = elevator_init_queue(q, e);
997bc1c1169SJens Axboe 	if (!data) {
998bc1c1169SJens Axboe 		kobject_put(&e->kobj);
999bc1c1169SJens Axboe 		return 0;
1000bc1c1169SJens Axboe 	}
1001bc1c1169SJens Axboe 
10021da177e4SLinus Torvalds 	/*
1003cb98fc8bSTejun Heo 	 * Turn on BYPASS and drain all requests w/ elevator private data
10041da177e4SLinus Torvalds 	 */
1005cb98fc8bSTejun Heo 	spin_lock_irq(q->queue_lock);
1006cb98fc8bSTejun Heo 
100764521d1aSJens Axboe 	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1008cb98fc8bSTejun Heo 
100915853af9STejun Heo 	elv_drain_elevator(q);
1010cb98fc8bSTejun Heo 
1011cb98fc8bSTejun Heo 	while (q->rq.elvpriv) {
1012407df2aaSTejun Heo 		blk_remove_plug(q);
1013407df2aaSTejun Heo 		q->request_fn(q);
1014cb98fc8bSTejun Heo 		spin_unlock_irq(q->queue_lock);
101564521d1aSJens Axboe 		msleep(10);
1016cb98fc8bSTejun Heo 		spin_lock_irq(q->queue_lock);
101715853af9STejun Heo 		elv_drain_elevator(q);
1018cb98fc8bSTejun Heo 	}
1019cb98fc8bSTejun Heo 
10201da177e4SLinus Torvalds 	/*
1021bc1c1169SJens Axboe 	 * Remember old elevator.
10221da177e4SLinus Torvalds 	 */
10231da177e4SLinus Torvalds 	old_elevator = q->elevator;
10241da177e4SLinus Torvalds 
10251da177e4SLinus Torvalds 	/*
10261da177e4SLinus Torvalds 	 * attach and start new elevator
10271da177e4SLinus Torvalds 	 */
1028bc1c1169SJens Axboe 	elevator_attach(q, e, data);
1029bc1c1169SJens Axboe 
1030bc1c1169SJens Axboe 	spin_unlock_irq(q->queue_lock);
1031bc1c1169SJens Axboe 
1032bc1c1169SJens Axboe 	__elv_unregister_queue(old_elevator);
10331da177e4SLinus Torvalds 
10341da177e4SLinus Torvalds 	if (elv_register_queue(q))
10351da177e4SLinus Torvalds 		goto fail_register;
10361da177e4SLinus Torvalds 
10371da177e4SLinus Torvalds 	/*
1038cb98fc8bSTejun Heo 	 * finally exit old elevator and turn off BYPASS.
10391da177e4SLinus Torvalds 	 */
10401da177e4SLinus Torvalds 	elevator_exit(old_elevator);
104164521d1aSJens Axboe 	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
10423d1ab40fSAl Viro 	return 1;
10431da177e4SLinus Torvalds 
10441da177e4SLinus Torvalds fail_register:
10451da177e4SLinus Torvalds 	/*
10461da177e4SLinus Torvalds 	 * switch failed, exit the new io scheduler and reattach the old
10471da177e4SLinus Torvalds 	 * one again (along with re-adding the sysfs dir)
10481da177e4SLinus Torvalds 	 */
10491da177e4SLinus Torvalds 	elevator_exit(e);
10501da177e4SLinus Torvalds 	q->elevator = old_elevator;
10511da177e4SLinus Torvalds 	elv_register_queue(q);
105264521d1aSJens Axboe 	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
10533d1ab40fSAl Viro 	return 0;
10541da177e4SLinus Torvalds }
10551da177e4SLinus Torvalds 
10561da177e4SLinus Torvalds ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
10571da177e4SLinus Torvalds {
10581da177e4SLinus Torvalds 	char elevator_name[ELV_NAME_MAX];
1059be561235STejun Heo 	size_t len;
10601da177e4SLinus Torvalds 	struct elevator_type *e;
10611da177e4SLinus Torvalds 
1062be561235STejun Heo 	elevator_name[sizeof(elevator_name) - 1] = '\0';
1063be561235STejun Heo 	strncpy(elevator_name, name, sizeof(elevator_name) - 1);
1064be561235STejun Heo 	len = strlen(elevator_name);
10651da177e4SLinus Torvalds 
1066be561235STejun Heo 	if (len && elevator_name[len - 1] == '\n')
1067be561235STejun Heo 		elevator_name[len - 1] = '\0';
10681da177e4SLinus Torvalds 
10691da177e4SLinus Torvalds 	e = elevator_get(elevator_name);
10701da177e4SLinus Torvalds 	if (!e) {
10711da177e4SLinus Torvalds 		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
10721da177e4SLinus Torvalds 		return -EINVAL;
10731da177e4SLinus Torvalds 	}
10741da177e4SLinus Torvalds 
10752ca7d93bSNate Diller 	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
10762ca7d93bSNate Diller 		elevator_put(e);
10771da177e4SLinus Torvalds 		return count;
10782ca7d93bSNate Diller 	}
10791da177e4SLinus Torvalds 
10803d1ab40fSAl Viro 	if (!elevator_switch(q, e))
10813d1ab40fSAl Viro 		printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
10821da177e4SLinus Torvalds 	return count;
10831da177e4SLinus Torvalds }
10841da177e4SLinus Torvalds 
10851da177e4SLinus Torvalds ssize_t elv_iosched_show(request_queue_t *q, char *name)
10861da177e4SLinus Torvalds {
10871da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
10881da177e4SLinus Torvalds 	struct elevator_type *elv = e->elevator_type;
10891da177e4SLinus Torvalds 	struct list_head *entry;
10901da177e4SLinus Torvalds 	int len = 0;
10911da177e4SLinus Torvalds 
10921da177e4SLinus Torvalds 	spin_lock_irq(q->queue_lock);
10931da177e4SLinus Torvalds 	list_for_each(entry, &elv_list) {
10941da177e4SLinus Torvalds 		struct elevator_type *__e;
10951da177e4SLinus Torvalds 
10961da177e4SLinus Torvalds 		__e = list_entry(entry, struct elevator_type, list);
10971da177e4SLinus Torvalds 		if (!strcmp(elv->elevator_name, __e->elevator_name))
10981da177e4SLinus Torvalds 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
10991da177e4SLinus Torvalds 		else
11001da177e4SLinus Torvalds 			len += sprintf(name+len, "%s ", __e->elevator_name);
11011da177e4SLinus Torvalds 	}
11021da177e4SLinus Torvalds 	spin_unlock_irq(q->queue_lock);
11031da177e4SLinus Torvalds 
11041da177e4SLinus Torvalds 	len += sprintf(len+name, "\n");
11051da177e4SLinus Torvalds 	return len;
11061da177e4SLinus Torvalds }
11071da177e4SLinus Torvalds 
1108*2e662b65SJens Axboe struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
1109*2e662b65SJens Axboe {
1110*2e662b65SJens Axboe 	struct rb_node *rbprev = rb_prev(&rq->rb_node);
1111*2e662b65SJens Axboe 
1112*2e662b65SJens Axboe 	if (rbprev)
1113*2e662b65SJens Axboe 		return rb_entry_rq(rbprev);
1114*2e662b65SJens Axboe 
1115*2e662b65SJens Axboe 	return NULL;
1116*2e662b65SJens Axboe }
1117*2e662b65SJens Axboe 
1118*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_former_request);
1119*2e662b65SJens Axboe 
1120*2e662b65SJens Axboe struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
1121*2e662b65SJens Axboe {
1122*2e662b65SJens Axboe 	struct rb_node *rbnext = rb_next(&rq->rb_node);
1123*2e662b65SJens Axboe 
1124*2e662b65SJens Axboe 	if (rbnext)
1125*2e662b65SJens Axboe 		return rb_entry_rq(rbnext);
1126*2e662b65SJens Axboe 
1127*2e662b65SJens Axboe 	return NULL;
1128*2e662b65SJens Axboe }
1129*2e662b65SJens Axboe 
1130*2e662b65SJens Axboe EXPORT_SYMBOL(elv_rb_latter_request);
1131