xref: /linux/block/elevator.c (revision e572ec7e4e432de7ecf7bd2e62117646fa64e518)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *  Block device elevator/IO-scheduler.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  * 30042000 Jens Axboe <axboe@suse.de> :
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Split the elevator a bit so that it is possible to choose a different
91da177e4SLinus Torvalds  * one or even write a new "plug in". There are three pieces:
101da177e4SLinus Torvalds  * - elevator_fn, inserts a new request in the queue list
111da177e4SLinus Torvalds  * - elevator_merge_fn, decides whether a new buffer can be merged with
121da177e4SLinus Torvalds  *   an existing request
131da177e4SLinus Torvalds  * - elevator_dequeue_fn, called when a request is taken off the active list
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * 20082000 Dave Jones <davej@suse.de> :
161da177e4SLinus Torvalds  * Removed tests for max-bomb-segments, which was breaking elvtune
171da177e4SLinus Torvalds  *  when run without -bN
181da177e4SLinus Torvalds  *
191da177e4SLinus Torvalds  * Jens:
201da177e4SLinus Torvalds  * - Rework again to work with bio instead of buffer_heads
211da177e4SLinus Torvalds  * - loose bi_dev comparisons, partition handling is right now
221da177e4SLinus Torvalds  * - completely modularize elevator setup and teardown
231da177e4SLinus Torvalds  *
241da177e4SLinus Torvalds  */
251da177e4SLinus Torvalds #include <linux/kernel.h>
261da177e4SLinus Torvalds #include <linux/fs.h>
271da177e4SLinus Torvalds #include <linux/blkdev.h>
281da177e4SLinus Torvalds #include <linux/elevator.h>
291da177e4SLinus Torvalds #include <linux/bio.h>
301da177e4SLinus Torvalds #include <linux/config.h>
311da177e4SLinus Torvalds #include <linux/module.h>
321da177e4SLinus Torvalds #include <linux/slab.h>
331da177e4SLinus Torvalds #include <linux/init.h>
341da177e4SLinus Torvalds #include <linux/compiler.h>
35cb98fc8bSTejun Heo #include <linux/delay.h>
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds #include <asm/uaccess.h>
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds static DEFINE_SPINLOCK(elv_list_lock);
401da177e4SLinus Torvalds static LIST_HEAD(elv_list);
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds /*
431da177e4SLinus Torvalds  * can we safely merge with this request?
441da177e4SLinus Torvalds  */
451da177e4SLinus Torvalds inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
461da177e4SLinus Torvalds {
471da177e4SLinus Torvalds 	if (!rq_mergeable(rq))
481da177e4SLinus Torvalds 		return 0;
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds 	/*
511da177e4SLinus Torvalds 	 * different data direction or already started, don't merge
521da177e4SLinus Torvalds 	 */
531da177e4SLinus Torvalds 	if (bio_data_dir(bio) != rq_data_dir(rq))
541da177e4SLinus Torvalds 		return 0;
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds 	/*
571da177e4SLinus Torvalds 	 * same device and no special stuff set, merge is ok
581da177e4SLinus Torvalds 	 */
591da177e4SLinus Torvalds 	if (rq->rq_disk == bio->bi_bdev->bd_disk &&
601da177e4SLinus Torvalds 	    !rq->waiting && !rq->special)
611da177e4SLinus Torvalds 		return 1;
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds 	return 0;
641da177e4SLinus Torvalds }
651da177e4SLinus Torvalds EXPORT_SYMBOL(elv_rq_merge_ok);
661da177e4SLinus Torvalds 
67769db45bSCoywolf Qi Hunt static inline int elv_try_merge(struct request *__rq, struct bio *bio)
681da177e4SLinus Torvalds {
691da177e4SLinus Torvalds 	int ret = ELEVATOR_NO_MERGE;
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds 	/*
721da177e4SLinus Torvalds 	 * we can merge and sequence is ok, check if it's possible
731da177e4SLinus Torvalds 	 */
741da177e4SLinus Torvalds 	if (elv_rq_merge_ok(__rq, bio)) {
751da177e4SLinus Torvalds 		if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
761da177e4SLinus Torvalds 			ret = ELEVATOR_BACK_MERGE;
771da177e4SLinus Torvalds 		else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
781da177e4SLinus Torvalds 			ret = ELEVATOR_FRONT_MERGE;
791da177e4SLinus Torvalds 	}
801da177e4SLinus Torvalds 
811da177e4SLinus Torvalds 	return ret;
821da177e4SLinus Torvalds }
831da177e4SLinus Torvalds 
841da177e4SLinus Torvalds static struct elevator_type *elevator_find(const char *name)
851da177e4SLinus Torvalds {
861da177e4SLinus Torvalds 	struct elevator_type *e = NULL;
871da177e4SLinus Torvalds 	struct list_head *entry;
881da177e4SLinus Torvalds 
891da177e4SLinus Torvalds 	list_for_each(entry, &elv_list) {
901da177e4SLinus Torvalds 		struct elevator_type *__e;
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds 		__e = list_entry(entry, struct elevator_type, list);
931da177e4SLinus Torvalds 
941da177e4SLinus Torvalds 		if (!strcmp(__e->elevator_name, name)) {
951da177e4SLinus Torvalds 			e = __e;
961da177e4SLinus Torvalds 			break;
971da177e4SLinus Torvalds 		}
981da177e4SLinus Torvalds 	}
991da177e4SLinus Torvalds 
1001da177e4SLinus Torvalds 	return e;
1011da177e4SLinus Torvalds }
1021da177e4SLinus Torvalds 
1031da177e4SLinus Torvalds static void elevator_put(struct elevator_type *e)
1041da177e4SLinus Torvalds {
1051da177e4SLinus Torvalds 	module_put(e->elevator_owner);
1061da177e4SLinus Torvalds }
1071da177e4SLinus Torvalds 
1081da177e4SLinus Torvalds static struct elevator_type *elevator_get(const char *name)
1091da177e4SLinus Torvalds {
1102824bc93STejun Heo 	struct elevator_type *e;
1111da177e4SLinus Torvalds 
1122824bc93STejun Heo 	spin_lock_irq(&elv_list_lock);
1132824bc93STejun Heo 
1142824bc93STejun Heo 	e = elevator_find(name);
1152824bc93STejun Heo 	if (e && !try_module_get(e->elevator_owner))
1162824bc93STejun Heo 		e = NULL;
1172824bc93STejun Heo 
1182824bc93STejun Heo 	spin_unlock_irq(&elv_list_lock);
1191da177e4SLinus Torvalds 
1201da177e4SLinus Torvalds 	return e;
1211da177e4SLinus Torvalds }
1221da177e4SLinus Torvalds 
1233d1ab40fSAl Viro static int elevator_attach(request_queue_t *q, struct elevator_queue *eq)
1241da177e4SLinus Torvalds {
1251da177e4SLinus Torvalds 	int ret = 0;
1261da177e4SLinus Torvalds 
1271da177e4SLinus Torvalds 	q->elevator = eq;
1281da177e4SLinus Torvalds 
1291da177e4SLinus Torvalds 	if (eq->ops->elevator_init_fn)
1301da177e4SLinus Torvalds 		ret = eq->ops->elevator_init_fn(q, eq);
1311da177e4SLinus Torvalds 
1321da177e4SLinus Torvalds 	return ret;
1331da177e4SLinus Torvalds }
1341da177e4SLinus Torvalds 
1351da177e4SLinus Torvalds static char chosen_elevator[16];
1361da177e4SLinus Torvalds 
1375f003976SNate Diller static int __init elevator_setup(char *str)
1381da177e4SLinus Torvalds {
139752a3b79SChuck Ebbert 	/*
140752a3b79SChuck Ebbert 	 * Be backwards-compatible with previous kernels, so users
141752a3b79SChuck Ebbert 	 * won't get the wrong elevator.
142752a3b79SChuck Ebbert 	 */
1435f003976SNate Diller 	if (!strcmp(str, "as"))
144752a3b79SChuck Ebbert 		strcpy(chosen_elevator, "anticipatory");
145cff3ba22SZachary Amsden 	else
1461da177e4SLinus Torvalds 		strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
1471da177e4SLinus Torvalds 	return 0;
1481da177e4SLinus Torvalds }
1491da177e4SLinus Torvalds 
1501da177e4SLinus Torvalds __setup("elevator=", elevator_setup);
1511da177e4SLinus Torvalds 
1523d1ab40fSAl Viro static struct kobj_type elv_ktype;
1533d1ab40fSAl Viro 
1543d1ab40fSAl Viro static elevator_t *elevator_alloc(struct elevator_type *e)
1553d1ab40fSAl Viro {
1563d1ab40fSAl Viro 	elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
1573d1ab40fSAl Viro 	if (eq) {
1583d1ab40fSAl Viro 		memset(eq, 0, sizeof(*eq));
1593d1ab40fSAl Viro 		eq->ops = &e->ops;
1603d1ab40fSAl Viro 		eq->elevator_type = e;
1613d1ab40fSAl Viro 		kobject_init(&eq->kobj);
1623d1ab40fSAl Viro 		snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
1633d1ab40fSAl Viro 		eq->kobj.ktype = &elv_ktype;
1643d1ab40fSAl Viro 		mutex_init(&eq->sysfs_lock);
1653d1ab40fSAl Viro 	} else {
1663d1ab40fSAl Viro 		elevator_put(e);
1673d1ab40fSAl Viro 	}
1683d1ab40fSAl Viro 	return eq;
1693d1ab40fSAl Viro }
1703d1ab40fSAl Viro 
1713d1ab40fSAl Viro static void elevator_release(struct kobject *kobj)
1723d1ab40fSAl Viro {
1733d1ab40fSAl Viro 	elevator_t *e = container_of(kobj, elevator_t, kobj);
1743d1ab40fSAl Viro 	elevator_put(e->elevator_type);
1753d1ab40fSAl Viro 	kfree(e);
1763d1ab40fSAl Viro }
1773d1ab40fSAl Viro 
1781da177e4SLinus Torvalds int elevator_init(request_queue_t *q, char *name)
1791da177e4SLinus Torvalds {
1801da177e4SLinus Torvalds 	struct elevator_type *e = NULL;
1811da177e4SLinus Torvalds 	struct elevator_queue *eq;
1821da177e4SLinus Torvalds 	int ret = 0;
1831da177e4SLinus Torvalds 
184cb98fc8bSTejun Heo 	INIT_LIST_HEAD(&q->queue_head);
185cb98fc8bSTejun Heo 	q->last_merge = NULL;
186cb98fc8bSTejun Heo 	q->end_sector = 0;
187cb98fc8bSTejun Heo 	q->boundary_rq = NULL;
188cb98fc8bSTejun Heo 
1895f003976SNate Diller 	if (name && !(e = elevator_get(name)))
1901da177e4SLinus Torvalds 		return -EINVAL;
1911da177e4SLinus Torvalds 
192248d5ca5SNate Diller 	if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
1935f003976SNate Diller 		printk("I/O scheduler %s not found\n", chosen_elevator);
194248d5ca5SNate Diller 
195248d5ca5SNate Diller 	if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
196248d5ca5SNate Diller 		printk("Default I/O scheduler not found, using no-op\n");
197248d5ca5SNate Diller 		e = elevator_get("noop");
1985f003976SNate Diller 	}
1995f003976SNate Diller 
2003d1ab40fSAl Viro 	eq = elevator_alloc(e);
2013d1ab40fSAl Viro 	if (!eq)
2021da177e4SLinus Torvalds 		return -ENOMEM;
2031da177e4SLinus Torvalds 
2043d1ab40fSAl Viro 	ret = elevator_attach(q, eq);
2053d1ab40fSAl Viro 	if (ret)
2063d1ab40fSAl Viro 		kobject_put(&eq->kobj);
2071da177e4SLinus Torvalds 
2081da177e4SLinus Torvalds 	return ret;
2091da177e4SLinus Torvalds }
2101da177e4SLinus Torvalds 
2111da177e4SLinus Torvalds void elevator_exit(elevator_t *e)
2121da177e4SLinus Torvalds {
2133d1ab40fSAl Viro 	mutex_lock(&e->sysfs_lock);
2141da177e4SLinus Torvalds 	if (e->ops->elevator_exit_fn)
2151da177e4SLinus Torvalds 		e->ops->elevator_exit_fn(e);
2163d1ab40fSAl Viro 	e->ops = NULL;
2173d1ab40fSAl Viro 	mutex_unlock(&e->sysfs_lock);
2181da177e4SLinus Torvalds 
2193d1ab40fSAl Viro 	kobject_put(&e->kobj);
2201da177e4SLinus Torvalds }
2211da177e4SLinus Torvalds 
2228922e16cSTejun Heo /*
2238922e16cSTejun Heo  * Insert rq into dispatch queue of q.  Queue lock must be held on
2248922e16cSTejun Heo  * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
2258922e16cSTejun Heo  * appended to the dispatch queue.  To be used by specific elevators.
2268922e16cSTejun Heo  */
2271b47f531SJens Axboe void elv_dispatch_sort(request_queue_t *q, struct request *rq)
2288922e16cSTejun Heo {
2298922e16cSTejun Heo 	sector_t boundary;
2308922e16cSTejun Heo 	struct list_head *entry;
2318922e16cSTejun Heo 
23206b86245STejun Heo 	if (q->last_merge == rq)
23306b86245STejun Heo 		q->last_merge = NULL;
23415853af9STejun Heo 	q->nr_sorted--;
23506b86245STejun Heo 
2361b47f531SJens Axboe 	boundary = q->end_sector;
2378922e16cSTejun Heo 
2388922e16cSTejun Heo 	list_for_each_prev(entry, &q->queue_head) {
2398922e16cSTejun Heo 		struct request *pos = list_entry_rq(entry);
2408922e16cSTejun Heo 
2418922e16cSTejun Heo 		if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
2428922e16cSTejun Heo 			break;
2438922e16cSTejun Heo 		if (rq->sector >= boundary) {
2448922e16cSTejun Heo 			if (pos->sector < boundary)
2458922e16cSTejun Heo 				continue;
2468922e16cSTejun Heo 		} else {
2478922e16cSTejun Heo 			if (pos->sector >= boundary)
2488922e16cSTejun Heo 				break;
2498922e16cSTejun Heo 		}
2508922e16cSTejun Heo 		if (rq->sector >= pos->sector)
2518922e16cSTejun Heo 			break;
2528922e16cSTejun Heo 	}
2538922e16cSTejun Heo 
2548922e16cSTejun Heo 	list_add(&rq->queuelist, entry);
2558922e16cSTejun Heo }
2568922e16cSTejun Heo 
2571da177e4SLinus Torvalds int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
2581da177e4SLinus Torvalds {
2591da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
26006b86245STejun Heo 	int ret;
26106b86245STejun Heo 
26206b86245STejun Heo 	if (q->last_merge) {
26306b86245STejun Heo 		ret = elv_try_merge(q->last_merge, bio);
26406b86245STejun Heo 		if (ret != ELEVATOR_NO_MERGE) {
26506b86245STejun Heo 			*req = q->last_merge;
26606b86245STejun Heo 			return ret;
26706b86245STejun Heo 		}
26806b86245STejun Heo 	}
2691da177e4SLinus Torvalds 
2701da177e4SLinus Torvalds 	if (e->ops->elevator_merge_fn)
2711da177e4SLinus Torvalds 		return e->ops->elevator_merge_fn(q, req, bio);
2721da177e4SLinus Torvalds 
2731da177e4SLinus Torvalds 	return ELEVATOR_NO_MERGE;
2741da177e4SLinus Torvalds }
2751da177e4SLinus Torvalds 
2761da177e4SLinus Torvalds void elv_merged_request(request_queue_t *q, struct request *rq)
2771da177e4SLinus Torvalds {
2781da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
2791da177e4SLinus Torvalds 
2801da177e4SLinus Torvalds 	if (e->ops->elevator_merged_fn)
2811da177e4SLinus Torvalds 		e->ops->elevator_merged_fn(q, rq);
28206b86245STejun Heo 
28306b86245STejun Heo 	q->last_merge = rq;
2841da177e4SLinus Torvalds }
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds void elv_merge_requests(request_queue_t *q, struct request *rq,
2871da177e4SLinus Torvalds 			     struct request *next)
2881da177e4SLinus Torvalds {
2891da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
2901da177e4SLinus Torvalds 
2911da177e4SLinus Torvalds 	if (e->ops->elevator_merge_req_fn)
2921da177e4SLinus Torvalds 		e->ops->elevator_merge_req_fn(q, rq, next);
29315853af9STejun Heo 	q->nr_sorted--;
29406b86245STejun Heo 
29506b86245STejun Heo 	q->last_merge = rq;
2961da177e4SLinus Torvalds }
2971da177e4SLinus Torvalds 
2988922e16cSTejun Heo void elv_requeue_request(request_queue_t *q, struct request *rq)
2991da177e4SLinus Torvalds {
3001da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
3011da177e4SLinus Torvalds 
3021da177e4SLinus Torvalds 	/*
3031da177e4SLinus Torvalds 	 * it already went through dequeue, we need to decrement the
3041da177e4SLinus Torvalds 	 * in_flight count again
3051da177e4SLinus Torvalds 	 */
3068922e16cSTejun Heo 	if (blk_account_rq(rq)) {
3071da177e4SLinus Torvalds 		q->in_flight--;
3088922e16cSTejun Heo 		if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
3091da177e4SLinus Torvalds 			e->ops->elevator_deactivate_req_fn(q, rq);
3101da177e4SLinus Torvalds 	}
3111da177e4SLinus Torvalds 
3128922e16cSTejun Heo 	rq->flags &= ~REQ_STARTED;
3131da177e4SLinus Torvalds 
31430e9656cSTejun Heo 	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
3151da177e4SLinus Torvalds }
3161da177e4SLinus Torvalds 
31715853af9STejun Heo static void elv_drain_elevator(request_queue_t *q)
31815853af9STejun Heo {
31915853af9STejun Heo 	static int printed;
32015853af9STejun Heo 	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
32115853af9STejun Heo 		;
32215853af9STejun Heo 	if (q->nr_sorted == 0)
32315853af9STejun Heo 		return;
32415853af9STejun Heo 	if (printed++ < 10) {
32515853af9STejun Heo 		printk(KERN_ERR "%s: forced dispatching is broken "
32615853af9STejun Heo 		       "(nr_sorted=%u), please report this\n",
32715853af9STejun Heo 		       q->elevator->elevator_type->elevator_name, q->nr_sorted);
32815853af9STejun Heo 	}
32915853af9STejun Heo }
33015853af9STejun Heo 
33130e9656cSTejun Heo void elv_insert(request_queue_t *q, struct request *rq, int where)
3321da177e4SLinus Torvalds {
333797e7dbbSTejun Heo 	struct list_head *pos;
334797e7dbbSTejun Heo 	unsigned ordseq;
335797e7dbbSTejun Heo 
3361da177e4SLinus Torvalds 	rq->q = q;
3371da177e4SLinus Torvalds 
3388922e16cSTejun Heo 	switch (where) {
3398922e16cSTejun Heo 	case ELEVATOR_INSERT_FRONT:
3408922e16cSTejun Heo 		rq->flags |= REQ_SOFTBARRIER;
3418922e16cSTejun Heo 
3428922e16cSTejun Heo 		list_add(&rq->queuelist, &q->queue_head);
3438922e16cSTejun Heo 		break;
3448922e16cSTejun Heo 
3458922e16cSTejun Heo 	case ELEVATOR_INSERT_BACK:
3468922e16cSTejun Heo 		rq->flags |= REQ_SOFTBARRIER;
34715853af9STejun Heo 		elv_drain_elevator(q);
3488922e16cSTejun Heo 		list_add_tail(&rq->queuelist, &q->queue_head);
3498922e16cSTejun Heo 		/*
3508922e16cSTejun Heo 		 * We kick the queue here for the following reasons.
3518922e16cSTejun Heo 		 * - The elevator might have returned NULL previously
3528922e16cSTejun Heo 		 *   to delay requests and returned them now.  As the
3538922e16cSTejun Heo 		 *   queue wasn't empty before this request, ll_rw_blk
3548922e16cSTejun Heo 		 *   won't run the queue on return, resulting in hang.
3558922e16cSTejun Heo 		 * - Usually, back inserted requests won't be merged
3568922e16cSTejun Heo 		 *   with anything.  There's no point in delaying queue
3578922e16cSTejun Heo 		 *   processing.
3588922e16cSTejun Heo 		 */
3598922e16cSTejun Heo 		blk_remove_plug(q);
3608922e16cSTejun Heo 		q->request_fn(q);
3618922e16cSTejun Heo 		break;
3628922e16cSTejun Heo 
3638922e16cSTejun Heo 	case ELEVATOR_INSERT_SORT:
3648922e16cSTejun Heo 		BUG_ON(!blk_fs_request(rq));
3658922e16cSTejun Heo 		rq->flags |= REQ_SORTED;
36615853af9STejun Heo 		q->nr_sorted++;
36706b86245STejun Heo 		if (q->last_merge == NULL && rq_mergeable(rq))
36806b86245STejun Heo 			q->last_merge = rq;
369ca23509fSTejun Heo 		/*
370ca23509fSTejun Heo 		 * Some ioscheds (cfq) run q->request_fn directly, so
371ca23509fSTejun Heo 		 * rq cannot be accessed after calling
372ca23509fSTejun Heo 		 * elevator_add_req_fn.
373ca23509fSTejun Heo 		 */
374ca23509fSTejun Heo 		q->elevator->ops->elevator_add_req_fn(q, rq);
3758922e16cSTejun Heo 		break;
3768922e16cSTejun Heo 
377797e7dbbSTejun Heo 	case ELEVATOR_INSERT_REQUEUE:
378797e7dbbSTejun Heo 		/*
379797e7dbbSTejun Heo 		 * If ordered flush isn't in progress, we do front
380797e7dbbSTejun Heo 		 * insertion; otherwise, requests should be requeued
381797e7dbbSTejun Heo 		 * in ordseq order.
382797e7dbbSTejun Heo 		 */
383797e7dbbSTejun Heo 		rq->flags |= REQ_SOFTBARRIER;
384797e7dbbSTejun Heo 
385797e7dbbSTejun Heo 		if (q->ordseq == 0) {
386797e7dbbSTejun Heo 			list_add(&rq->queuelist, &q->queue_head);
387797e7dbbSTejun Heo 			break;
388797e7dbbSTejun Heo 		}
389797e7dbbSTejun Heo 
390797e7dbbSTejun Heo 		ordseq = blk_ordered_req_seq(rq);
391797e7dbbSTejun Heo 
392797e7dbbSTejun Heo 		list_for_each(pos, &q->queue_head) {
393797e7dbbSTejun Heo 			struct request *pos_rq = list_entry_rq(pos);
394797e7dbbSTejun Heo 			if (ordseq <= blk_ordered_req_seq(pos_rq))
395797e7dbbSTejun Heo 				break;
396797e7dbbSTejun Heo 		}
397797e7dbbSTejun Heo 
398797e7dbbSTejun Heo 		list_add_tail(&rq->queuelist, pos);
399797e7dbbSTejun Heo 		break;
400797e7dbbSTejun Heo 
4018922e16cSTejun Heo 	default:
4028922e16cSTejun Heo 		printk(KERN_ERR "%s: bad insertion point %d\n",
4038922e16cSTejun Heo 		       __FUNCTION__, where);
4048922e16cSTejun Heo 		BUG();
4058922e16cSTejun Heo 	}
4061da177e4SLinus Torvalds 
4071da177e4SLinus Torvalds 	if (blk_queue_plugged(q)) {
4081da177e4SLinus Torvalds 		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
4091da177e4SLinus Torvalds 			- q->in_flight;
4101da177e4SLinus Torvalds 
411c374f127STejun Heo  		if (nrq >= q->unplug_thresh)
4121da177e4SLinus Torvalds 			__generic_unplug_device(q);
4131da177e4SLinus Torvalds 	}
4141da177e4SLinus Torvalds }
4151da177e4SLinus Torvalds 
41630e9656cSTejun Heo void __elv_add_request(request_queue_t *q, struct request *rq, int where,
41730e9656cSTejun Heo 		       int plug)
41830e9656cSTejun Heo {
41930e9656cSTejun Heo 	if (q->ordcolor)
42030e9656cSTejun Heo 		rq->flags |= REQ_ORDERED_COLOR;
42130e9656cSTejun Heo 
42230e9656cSTejun Heo 	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
42330e9656cSTejun Heo 		/*
42430e9656cSTejun Heo 		 * toggle ordered color
42530e9656cSTejun Heo 		 */
42630e9656cSTejun Heo 		if (blk_barrier_rq(rq))
42730e9656cSTejun Heo 			q->ordcolor ^= 1;
42830e9656cSTejun Heo 
42930e9656cSTejun Heo 		/*
43030e9656cSTejun Heo 		 * barriers implicitly indicate back insertion
43130e9656cSTejun Heo 		 */
43230e9656cSTejun Heo 		if (where == ELEVATOR_INSERT_SORT)
43330e9656cSTejun Heo 			where = ELEVATOR_INSERT_BACK;
43430e9656cSTejun Heo 
43530e9656cSTejun Heo 		/*
43630e9656cSTejun Heo 		 * this request is scheduling boundary, update
43730e9656cSTejun Heo 		 * end_sector
43830e9656cSTejun Heo 		 */
43930e9656cSTejun Heo 		if (blk_fs_request(rq)) {
44030e9656cSTejun Heo 			q->end_sector = rq_end_sector(rq);
44130e9656cSTejun Heo 			q->boundary_rq = rq;
44230e9656cSTejun Heo 		}
44330e9656cSTejun Heo 	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
44430e9656cSTejun Heo 		where = ELEVATOR_INSERT_BACK;
44530e9656cSTejun Heo 
44630e9656cSTejun Heo 	if (plug)
44730e9656cSTejun Heo 		blk_plug_device(q);
44830e9656cSTejun Heo 
44930e9656cSTejun Heo 	elv_insert(q, rq, where);
45030e9656cSTejun Heo }
45130e9656cSTejun Heo 
4521da177e4SLinus Torvalds void elv_add_request(request_queue_t *q, struct request *rq, int where,
4531da177e4SLinus Torvalds 		     int plug)
4541da177e4SLinus Torvalds {
4551da177e4SLinus Torvalds 	unsigned long flags;
4561da177e4SLinus Torvalds 
4571da177e4SLinus Torvalds 	spin_lock_irqsave(q->queue_lock, flags);
4581da177e4SLinus Torvalds 	__elv_add_request(q, rq, where, plug);
4591da177e4SLinus Torvalds 	spin_unlock_irqrestore(q->queue_lock, flags);
4601da177e4SLinus Torvalds }
4611da177e4SLinus Torvalds 
4621da177e4SLinus Torvalds static inline struct request *__elv_next_request(request_queue_t *q)
4631da177e4SLinus Torvalds {
4648922e16cSTejun Heo 	struct request *rq;
4658922e16cSTejun Heo 
466797e7dbbSTejun Heo 	while (1) {
467797e7dbbSTejun Heo 		while (!list_empty(&q->queue_head)) {
4688922e16cSTejun Heo 			rq = list_entry_rq(q->queue_head.next);
469797e7dbbSTejun Heo 			if (blk_do_ordered(q, &rq))
470797e7dbbSTejun Heo 				return rq;
4711da177e4SLinus Torvalds 		}
4721da177e4SLinus Torvalds 
473797e7dbbSTejun Heo 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
474797e7dbbSTejun Heo 			return NULL;
475797e7dbbSTejun Heo 	}
4761da177e4SLinus Torvalds }
4771da177e4SLinus Torvalds 
4781da177e4SLinus Torvalds struct request *elv_next_request(request_queue_t *q)
4791da177e4SLinus Torvalds {
4801da177e4SLinus Torvalds 	struct request *rq;
4811da177e4SLinus Torvalds 	int ret;
4821da177e4SLinus Torvalds 
4831da177e4SLinus Torvalds 	while ((rq = __elv_next_request(q)) != NULL) {
4848922e16cSTejun Heo 		if (!(rq->flags & REQ_STARTED)) {
4858922e16cSTejun Heo 			elevator_t *e = q->elevator;
4868922e16cSTejun Heo 
4871da177e4SLinus Torvalds 			/*
4888922e16cSTejun Heo 			 * This is the first time the device driver
4898922e16cSTejun Heo 			 * sees this request (possibly after
4908922e16cSTejun Heo 			 * requeueing).  Notify IO scheduler.
4918922e16cSTejun Heo 			 */
4928922e16cSTejun Heo 			if (blk_sorted_rq(rq) &&
4938922e16cSTejun Heo 			    e->ops->elevator_activate_req_fn)
4948922e16cSTejun Heo 				e->ops->elevator_activate_req_fn(q, rq);
4958922e16cSTejun Heo 
4968922e16cSTejun Heo 			/*
4978922e16cSTejun Heo 			 * just mark as started even if we don't start
4988922e16cSTejun Heo 			 * it, a request that has been delayed should
4998922e16cSTejun Heo 			 * not be passed by new incoming requests
5001da177e4SLinus Torvalds 			 */
5011da177e4SLinus Torvalds 			rq->flags |= REQ_STARTED;
5028922e16cSTejun Heo 		}
5031da177e4SLinus Torvalds 
5048922e16cSTejun Heo 		if (!q->boundary_rq || q->boundary_rq == rq) {
5051b47f531SJens Axboe 			q->end_sector = rq_end_sector(rq);
5068922e16cSTejun Heo 			q->boundary_rq = NULL;
5078922e16cSTejun Heo 		}
5081da177e4SLinus Torvalds 
5091da177e4SLinus Torvalds 		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
5101da177e4SLinus Torvalds 			break;
5111da177e4SLinus Torvalds 
5121da177e4SLinus Torvalds 		ret = q->prep_rq_fn(q, rq);
5131da177e4SLinus Torvalds 		if (ret == BLKPREP_OK) {
5141da177e4SLinus Torvalds 			break;
5151da177e4SLinus Torvalds 		} else if (ret == BLKPREP_DEFER) {
5162e759cd4STejun Heo  			/*
5172e759cd4STejun Heo  			 * the request may have been (partially) prepped.
5182e759cd4STejun Heo  			 * we need to keep this request in the front to
5198922e16cSTejun Heo 			 * avoid resource deadlock.  REQ_STARTED will
5208922e16cSTejun Heo 			 * prevent other fs requests from passing this one.
5212e759cd4STejun Heo  			 */
5221da177e4SLinus Torvalds 			rq = NULL;
5231da177e4SLinus Torvalds 			break;
5241da177e4SLinus Torvalds 		} else if (ret == BLKPREP_KILL) {
5251da177e4SLinus Torvalds 			int nr_bytes = rq->hard_nr_sectors << 9;
5261da177e4SLinus Torvalds 
5271da177e4SLinus Torvalds 			if (!nr_bytes)
5281da177e4SLinus Torvalds 				nr_bytes = rq->data_len;
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds 			blkdev_dequeue_request(rq);
5311da177e4SLinus Torvalds 			rq->flags |= REQ_QUIET;
5321da177e4SLinus Torvalds 			end_that_request_chunk(rq, 0, nr_bytes);
5338ffdc655STejun Heo 			end_that_request_last(rq, 0);
5341da177e4SLinus Torvalds 		} else {
5351da177e4SLinus Torvalds 			printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
5361da177e4SLinus Torvalds 								ret);
5371da177e4SLinus Torvalds 			break;
5381da177e4SLinus Torvalds 		}
5391da177e4SLinus Torvalds 	}
5401da177e4SLinus Torvalds 
5411da177e4SLinus Torvalds 	return rq;
5421da177e4SLinus Torvalds }
5431da177e4SLinus Torvalds 
5448922e16cSTejun Heo void elv_dequeue_request(request_queue_t *q, struct request *rq)
5451da177e4SLinus Torvalds {
5468922e16cSTejun Heo 	BUG_ON(list_empty(&rq->queuelist));
5478922e16cSTejun Heo 
5488922e16cSTejun Heo 	list_del_init(&rq->queuelist);
5491da177e4SLinus Torvalds 
5501da177e4SLinus Torvalds 	/*
5511da177e4SLinus Torvalds 	 * the time frame between a request being removed from the lists
5521da177e4SLinus Torvalds 	 * and to it is freed is accounted as io that is in progress at
5538922e16cSTejun Heo 	 * the driver side.
5541da177e4SLinus Torvalds 	 */
5551da177e4SLinus Torvalds 	if (blk_account_rq(rq))
5561da177e4SLinus Torvalds 		q->in_flight++;
5571da177e4SLinus Torvalds }
5581da177e4SLinus Torvalds 
5591da177e4SLinus Torvalds int elv_queue_empty(request_queue_t *q)
5601da177e4SLinus Torvalds {
5611da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
5621da177e4SLinus Torvalds 
5638922e16cSTejun Heo 	if (!list_empty(&q->queue_head))
5648922e16cSTejun Heo 		return 0;
5658922e16cSTejun Heo 
5661da177e4SLinus Torvalds 	if (e->ops->elevator_queue_empty_fn)
5671da177e4SLinus Torvalds 		return e->ops->elevator_queue_empty_fn(q);
5681da177e4SLinus Torvalds 
5698922e16cSTejun Heo 	return 1;
5701da177e4SLinus Torvalds }
5711da177e4SLinus Torvalds 
5721da177e4SLinus Torvalds struct request *elv_latter_request(request_queue_t *q, struct request *rq)
5731da177e4SLinus Torvalds {
5741da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
5751da177e4SLinus Torvalds 
5761da177e4SLinus Torvalds 	if (e->ops->elevator_latter_req_fn)
5771da177e4SLinus Torvalds 		return e->ops->elevator_latter_req_fn(q, rq);
5781da177e4SLinus Torvalds 	return NULL;
5791da177e4SLinus Torvalds }
5801da177e4SLinus Torvalds 
5811da177e4SLinus Torvalds struct request *elv_former_request(request_queue_t *q, struct request *rq)
5821da177e4SLinus Torvalds {
5831da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
5841da177e4SLinus Torvalds 
5851da177e4SLinus Torvalds 	if (e->ops->elevator_former_req_fn)
5861da177e4SLinus Torvalds 		return e->ops->elevator_former_req_fn(q, rq);
5871da177e4SLinus Torvalds 	return NULL;
5881da177e4SLinus Torvalds }
5891da177e4SLinus Torvalds 
59022e2c507SJens Axboe int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
5918267e268SAl Viro 		    gfp_t gfp_mask)
5921da177e4SLinus Torvalds {
5931da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
5941da177e4SLinus Torvalds 
5951da177e4SLinus Torvalds 	if (e->ops->elevator_set_req_fn)
59622e2c507SJens Axboe 		return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
5971da177e4SLinus Torvalds 
5981da177e4SLinus Torvalds 	rq->elevator_private = NULL;
5991da177e4SLinus Torvalds 	return 0;
6001da177e4SLinus Torvalds }
6011da177e4SLinus Torvalds 
6021da177e4SLinus Torvalds void elv_put_request(request_queue_t *q, struct request *rq)
6031da177e4SLinus Torvalds {
6041da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds 	if (e->ops->elevator_put_req_fn)
6071da177e4SLinus Torvalds 		e->ops->elevator_put_req_fn(q, rq);
6081da177e4SLinus Torvalds }
6091da177e4SLinus Torvalds 
61022e2c507SJens Axboe int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
6111da177e4SLinus Torvalds {
6121da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
6131da177e4SLinus Torvalds 
6141da177e4SLinus Torvalds 	if (e->ops->elevator_may_queue_fn)
61522e2c507SJens Axboe 		return e->ops->elevator_may_queue_fn(q, rw, bio);
6161da177e4SLinus Torvalds 
6171da177e4SLinus Torvalds 	return ELV_MQUEUE_MAY;
6181da177e4SLinus Torvalds }
6191da177e4SLinus Torvalds 
6201da177e4SLinus Torvalds void elv_completed_request(request_queue_t *q, struct request *rq)
6211da177e4SLinus Torvalds {
6221da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
6231da177e4SLinus Torvalds 
6241da177e4SLinus Torvalds 	/*
6251da177e4SLinus Torvalds 	 * request is released from the driver, io must be done
6261da177e4SLinus Torvalds 	 */
6278922e16cSTejun Heo 	if (blk_account_rq(rq)) {
6281da177e4SLinus Torvalds 		q->in_flight--;
6291bc691d3STejun Heo 		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
6301bc691d3STejun Heo 			e->ops->elevator_completed_req_fn(q, rq);
6311bc691d3STejun Heo 	}
632797e7dbbSTejun Heo 
633797e7dbbSTejun Heo 	/*
634797e7dbbSTejun Heo 	 * Check if the queue is waiting for fs requests to be
635797e7dbbSTejun Heo 	 * drained for flush sequence.
636797e7dbbSTejun Heo 	 */
6371bc691d3STejun Heo 	if (unlikely(q->ordseq)) {
6381bc691d3STejun Heo 		struct request *first_rq = list_entry_rq(q->queue_head.next);
6391bc691d3STejun Heo 		if (q->in_flight == 0 &&
640797e7dbbSTejun Heo 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
641797e7dbbSTejun Heo 		    blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
642797e7dbbSTejun Heo 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
643797e7dbbSTejun Heo 			q->request_fn(q);
644797e7dbbSTejun Heo 		}
6451da177e4SLinus Torvalds 	}
6468922e16cSTejun Heo }
6471da177e4SLinus Torvalds 
6483d1ab40fSAl Viro #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
6493d1ab40fSAl Viro 
6503d1ab40fSAl Viro static ssize_t
6513d1ab40fSAl Viro elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
6523d1ab40fSAl Viro {
6533d1ab40fSAl Viro 	elevator_t *e = container_of(kobj, elevator_t, kobj);
6543d1ab40fSAl Viro 	struct elv_fs_entry *entry = to_elv(attr);
6553d1ab40fSAl Viro 	ssize_t error;
6563d1ab40fSAl Viro 
6573d1ab40fSAl Viro 	if (!entry->show)
6583d1ab40fSAl Viro 		return -EIO;
6593d1ab40fSAl Viro 
6603d1ab40fSAl Viro 	mutex_lock(&e->sysfs_lock);
6613d1ab40fSAl Viro 	error = e->ops ? entry->show(e, page) : -ENOENT;
6623d1ab40fSAl Viro 	mutex_unlock(&e->sysfs_lock);
6633d1ab40fSAl Viro 	return error;
6643d1ab40fSAl Viro }
6653d1ab40fSAl Viro 
6663d1ab40fSAl Viro static ssize_t
6673d1ab40fSAl Viro elv_attr_store(struct kobject *kobj, struct attribute *attr,
6683d1ab40fSAl Viro 	       const char *page, size_t length)
6693d1ab40fSAl Viro {
6703d1ab40fSAl Viro 	elevator_t *e = container_of(kobj, elevator_t, kobj);
6713d1ab40fSAl Viro 	struct elv_fs_entry *entry = to_elv(attr);
6723d1ab40fSAl Viro 	ssize_t error;
6733d1ab40fSAl Viro 
6743d1ab40fSAl Viro 	if (!entry->store)
6753d1ab40fSAl Viro 		return -EIO;
6763d1ab40fSAl Viro 
6773d1ab40fSAl Viro 	mutex_lock(&e->sysfs_lock);
6783d1ab40fSAl Viro 	error = e->ops ? entry->store(e, page, length) : -ENOENT;
6793d1ab40fSAl Viro 	mutex_unlock(&e->sysfs_lock);
6803d1ab40fSAl Viro 	return error;
6813d1ab40fSAl Viro }
6823d1ab40fSAl Viro 
6833d1ab40fSAl Viro static struct sysfs_ops elv_sysfs_ops = {
6843d1ab40fSAl Viro 	.show	= elv_attr_show,
6853d1ab40fSAl Viro 	.store	= elv_attr_store,
6863d1ab40fSAl Viro };
6873d1ab40fSAl Viro 
6883d1ab40fSAl Viro static struct kobj_type elv_ktype = {
6893d1ab40fSAl Viro 	.sysfs_ops	= &elv_sysfs_ops,
6903d1ab40fSAl Viro 	.release	= elevator_release,
6913d1ab40fSAl Viro };
6923d1ab40fSAl Viro 
6931da177e4SLinus Torvalds int elv_register_queue(struct request_queue *q)
6941da177e4SLinus Torvalds {
6951da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
6963d1ab40fSAl Viro 	int error;
6971da177e4SLinus Torvalds 
6983d1ab40fSAl Viro 	e->kobj.parent = &q->kobj;
6991da177e4SLinus Torvalds 
7003d1ab40fSAl Viro 	error = kobject_add(&e->kobj);
7013d1ab40fSAl Viro 	if (!error) {
702*e572ec7eSAl Viro 		struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
7033d1ab40fSAl Viro 		if (attr) {
704*e572ec7eSAl Viro 			while (attr->attr.name) {
705*e572ec7eSAl Viro 				if (sysfs_create_file(&e->kobj, &attr->attr))
7063d1ab40fSAl Viro 					break;
707*e572ec7eSAl Viro 				attr++;
7083d1ab40fSAl Viro 			}
7093d1ab40fSAl Viro 		}
7103d1ab40fSAl Viro 		kobject_uevent(&e->kobj, KOBJ_ADD);
7113d1ab40fSAl Viro 	}
7123d1ab40fSAl Viro 	return error;
7131da177e4SLinus Torvalds }
7141da177e4SLinus Torvalds 
7151da177e4SLinus Torvalds void elv_unregister_queue(struct request_queue *q)
7161da177e4SLinus Torvalds {
7171da177e4SLinus Torvalds 	if (q) {
7181da177e4SLinus Torvalds 		elevator_t *e = q->elevator;
7193d1ab40fSAl Viro 		kobject_uevent(&e->kobj, KOBJ_REMOVE);
7203d1ab40fSAl Viro 		kobject_del(&e->kobj);
7211da177e4SLinus Torvalds 	}
7221da177e4SLinus Torvalds }
7231da177e4SLinus Torvalds 
7241da177e4SLinus Torvalds int elv_register(struct elevator_type *e)
7251da177e4SLinus Torvalds {
7262824bc93STejun Heo 	spin_lock_irq(&elv_list_lock);
7271da177e4SLinus Torvalds 	if (elevator_find(e->elevator_name))
7281da177e4SLinus Torvalds 		BUG();
7291da177e4SLinus Torvalds 	list_add_tail(&e->list, &elv_list);
7301da177e4SLinus Torvalds 	spin_unlock_irq(&elv_list_lock);
7311da177e4SLinus Torvalds 
7321da177e4SLinus Torvalds 	printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
7335f003976SNate Diller 	if (!strcmp(e->elevator_name, chosen_elevator) ||
7345f003976SNate Diller 			(!*chosen_elevator &&
7355f003976SNate Diller 			 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
7361da177e4SLinus Torvalds 				printk(" (default)");
7371da177e4SLinus Torvalds 	printk("\n");
7381da177e4SLinus Torvalds 	return 0;
7391da177e4SLinus Torvalds }
7401da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(elv_register);
7411da177e4SLinus Torvalds 
7421da177e4SLinus Torvalds void elv_unregister(struct elevator_type *e)
7431da177e4SLinus Torvalds {
74483521d3eSChristoph Hellwig 	struct task_struct *g, *p;
74583521d3eSChristoph Hellwig 
74683521d3eSChristoph Hellwig 	/*
74783521d3eSChristoph Hellwig 	 * Iterate every thread in the process to remove the io contexts.
74883521d3eSChristoph Hellwig 	 */
749e17a9489SAl Viro 	if (e->ops.trim) {
75083521d3eSChristoph Hellwig 		read_lock(&tasklist_lock);
75183521d3eSChristoph Hellwig 		do_each_thread(g, p) {
752e17a9489SAl Viro 			task_lock(p);
753e17a9489SAl Viro 			e->ops.trim(p->io_context);
754e17a9489SAl Viro 			task_unlock(p);
75583521d3eSChristoph Hellwig 		} while_each_thread(g, p);
75683521d3eSChristoph Hellwig 		read_unlock(&tasklist_lock);
757e17a9489SAl Viro 	}
75883521d3eSChristoph Hellwig 
7591da177e4SLinus Torvalds 	spin_lock_irq(&elv_list_lock);
7601da177e4SLinus Torvalds 	list_del_init(&e->list);
7611da177e4SLinus Torvalds 	spin_unlock_irq(&elv_list_lock);
7621da177e4SLinus Torvalds }
7631da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(elv_unregister);
7641da177e4SLinus Torvalds 
7651da177e4SLinus Torvalds /*
7661da177e4SLinus Torvalds  * switch to new_e io scheduler. be careful not to introduce deadlocks -
7671da177e4SLinus Torvalds  * we don't free the old io scheduler, before we have allocated what we
7681da177e4SLinus Torvalds  * need for the new one. this way we have a chance of going back to the old
769cb98fc8bSTejun Heo  * one, if the new one fails init for some reason.
7701da177e4SLinus Torvalds  */
7713d1ab40fSAl Viro static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
7721da177e4SLinus Torvalds {
773cb98fc8bSTejun Heo 	elevator_t *old_elevator, *e;
7741da177e4SLinus Torvalds 
775cb98fc8bSTejun Heo 	/*
776cb98fc8bSTejun Heo 	 * Allocate new elevator
777cb98fc8bSTejun Heo 	 */
7783d1ab40fSAl Viro 	e = elevator_alloc(new_e);
7791da177e4SLinus Torvalds 	if (!e)
7803d1ab40fSAl Viro 		return 0;
7811da177e4SLinus Torvalds 
7821da177e4SLinus Torvalds 	/*
783cb98fc8bSTejun Heo 	 * Turn on BYPASS and drain all requests w/ elevator private data
7841da177e4SLinus Torvalds 	 */
785cb98fc8bSTejun Heo 	spin_lock_irq(q->queue_lock);
786cb98fc8bSTejun Heo 
78764521d1aSJens Axboe 	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
788cb98fc8bSTejun Heo 
78915853af9STejun Heo 	elv_drain_elevator(q);
790cb98fc8bSTejun Heo 
791cb98fc8bSTejun Heo 	while (q->rq.elvpriv) {
792407df2aaSTejun Heo 		blk_remove_plug(q);
793407df2aaSTejun Heo 		q->request_fn(q);
794cb98fc8bSTejun Heo 		spin_unlock_irq(q->queue_lock);
79564521d1aSJens Axboe 		msleep(10);
796cb98fc8bSTejun Heo 		spin_lock_irq(q->queue_lock);
79715853af9STejun Heo 		elv_drain_elevator(q);
798cb98fc8bSTejun Heo 	}
799cb98fc8bSTejun Heo 
800cb98fc8bSTejun Heo 	spin_unlock_irq(q->queue_lock);
8011da177e4SLinus Torvalds 
8021da177e4SLinus Torvalds 	/*
8031da177e4SLinus Torvalds 	 * unregister old elevator data
8041da177e4SLinus Torvalds 	 */
8051da177e4SLinus Torvalds 	elv_unregister_queue(q);
8061da177e4SLinus Torvalds 	old_elevator = q->elevator;
8071da177e4SLinus Torvalds 
8081da177e4SLinus Torvalds 	/*
8091da177e4SLinus Torvalds 	 * attach and start new elevator
8101da177e4SLinus Torvalds 	 */
8113d1ab40fSAl Viro 	if (elevator_attach(q, e))
8121da177e4SLinus Torvalds 		goto fail;
8131da177e4SLinus Torvalds 
8141da177e4SLinus Torvalds 	if (elv_register_queue(q))
8151da177e4SLinus Torvalds 		goto fail_register;
8161da177e4SLinus Torvalds 
8171da177e4SLinus Torvalds 	/*
818cb98fc8bSTejun Heo 	 * finally exit old elevator and turn off BYPASS.
8191da177e4SLinus Torvalds 	 */
8201da177e4SLinus Torvalds 	elevator_exit(old_elevator);
82164521d1aSJens Axboe 	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
8223d1ab40fSAl Viro 	return 1;
8231da177e4SLinus Torvalds 
8241da177e4SLinus Torvalds fail_register:
8251da177e4SLinus Torvalds 	/*
8261da177e4SLinus Torvalds 	 * switch failed, exit the new io scheduler and reattach the old
8271da177e4SLinus Torvalds 	 * one again (along with re-adding the sysfs dir)
8281da177e4SLinus Torvalds 	 */
8291da177e4SLinus Torvalds 	elevator_exit(e);
830cb98fc8bSTejun Heo 	e = NULL;
8311da177e4SLinus Torvalds fail:
8321da177e4SLinus Torvalds 	q->elevator = old_elevator;
8331da177e4SLinus Torvalds 	elv_register_queue(q);
83464521d1aSJens Axboe 	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
8353d1ab40fSAl Viro 	if (e)
8363d1ab40fSAl Viro 		kobject_put(&e->kobj);
8373d1ab40fSAl Viro 	return 0;
8381da177e4SLinus Torvalds }
8391da177e4SLinus Torvalds 
8401da177e4SLinus Torvalds ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
8411da177e4SLinus Torvalds {
8421da177e4SLinus Torvalds 	char elevator_name[ELV_NAME_MAX];
843be561235STejun Heo 	size_t len;
8441da177e4SLinus Torvalds 	struct elevator_type *e;
8451da177e4SLinus Torvalds 
846be561235STejun Heo 	elevator_name[sizeof(elevator_name) - 1] = '\0';
847be561235STejun Heo 	strncpy(elevator_name, name, sizeof(elevator_name) - 1);
848be561235STejun Heo 	len = strlen(elevator_name);
8491da177e4SLinus Torvalds 
850be561235STejun Heo 	if (len && elevator_name[len - 1] == '\n')
851be561235STejun Heo 		elevator_name[len - 1] = '\0';
8521da177e4SLinus Torvalds 
8531da177e4SLinus Torvalds 	e = elevator_get(elevator_name);
8541da177e4SLinus Torvalds 	if (!e) {
8551da177e4SLinus Torvalds 		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
8561da177e4SLinus Torvalds 		return -EINVAL;
8571da177e4SLinus Torvalds 	}
8581da177e4SLinus Torvalds 
8592ca7d93bSNate Diller 	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
8602ca7d93bSNate Diller 		elevator_put(e);
8611da177e4SLinus Torvalds 		return count;
8622ca7d93bSNate Diller 	}
8631da177e4SLinus Torvalds 
8643d1ab40fSAl Viro 	if (!elevator_switch(q, e))
8653d1ab40fSAl Viro 		printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
8661da177e4SLinus Torvalds 	return count;
8671da177e4SLinus Torvalds }
8681da177e4SLinus Torvalds 
8691da177e4SLinus Torvalds ssize_t elv_iosched_show(request_queue_t *q, char *name)
8701da177e4SLinus Torvalds {
8711da177e4SLinus Torvalds 	elevator_t *e = q->elevator;
8721da177e4SLinus Torvalds 	struct elevator_type *elv = e->elevator_type;
8731da177e4SLinus Torvalds 	struct list_head *entry;
8741da177e4SLinus Torvalds 	int len = 0;
8751da177e4SLinus Torvalds 
8761da177e4SLinus Torvalds 	spin_lock_irq(q->queue_lock);
8771da177e4SLinus Torvalds 	list_for_each(entry, &elv_list) {
8781da177e4SLinus Torvalds 		struct elevator_type *__e;
8791da177e4SLinus Torvalds 
8801da177e4SLinus Torvalds 		__e = list_entry(entry, struct elevator_type, list);
8811da177e4SLinus Torvalds 		if (!strcmp(elv->elevator_name, __e->elevator_name))
8821da177e4SLinus Torvalds 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
8831da177e4SLinus Torvalds 		else
8841da177e4SLinus Torvalds 			len += sprintf(name+len, "%s ", __e->elevator_name);
8851da177e4SLinus Torvalds 	}
8861da177e4SLinus Torvalds 	spin_unlock_irq(q->queue_lock);
8871da177e4SLinus Torvalds 
8881da177e4SLinus Torvalds 	len += sprintf(len+name, "\n");
8891da177e4SLinus Torvalds 	return len;
8901da177e4SLinus Torvalds }
8911da177e4SLinus Torvalds 
8921b47f531SJens Axboe EXPORT_SYMBOL(elv_dispatch_sort);
8931da177e4SLinus Torvalds EXPORT_SYMBOL(elv_add_request);
8941da177e4SLinus Torvalds EXPORT_SYMBOL(__elv_add_request);
8951da177e4SLinus Torvalds EXPORT_SYMBOL(elv_requeue_request);
8961da177e4SLinus Torvalds EXPORT_SYMBOL(elv_next_request);
8978922e16cSTejun Heo EXPORT_SYMBOL(elv_dequeue_request);
8981da177e4SLinus Torvalds EXPORT_SYMBOL(elv_queue_empty);
8991da177e4SLinus Torvalds EXPORT_SYMBOL(elv_completed_request);
9001da177e4SLinus Torvalds EXPORT_SYMBOL(elevator_exit);
9011da177e4SLinus Torvalds EXPORT_SYMBOL(elevator_init);
902