linux-6.8/block/blk-mq.c

1 // SPDX-License-Identifier: GPL-2.0
5  * Copyright (C) 2013-2014 Jens Axboe
6  * Copyright (C) 2013-2014 Christoph Hellwig
10 #include <linux/backing-dev.h>
13 #include <linux/blk-integrity.h>
30 #include <linux/blk-crypto.h>
35 #include <linux/t10-pi.h>
37 #include "blk-mq.h"
38 #include "blk-mq-debugfs.h"
39 #include "blk-pm.h"
40 #include "blk-stat.h"
41 #include "blk-mq-sched.h"
42 #include "blk-rq-qos.h"
57  * have pending work in this hardware queue.
61 	return !list_empty_careful(&hctx->dispatch) ||  in blk_mq_hctx_has_pending()
62 		sbitmap_any_bit_set(&hctx->ctx_map) ||  in blk_mq_hctx_has_pending()
67  * Mark this ctx as having pending work in this hardware queue
72 	const int bit = ctx->index_hw[hctx->type];  in blk_mq_hctx_mark_pending()
74 	if (!sbitmap_test_bit(&hctx->ctx_map, bit))  in blk_mq_hctx_mark_pending()
75 		sbitmap_set_bit(&hctx->ctx_map, bit);  in blk_mq_hctx_mark_pending()
81 	const int bit = ctx->index_hw[hctx->type];  in blk_mq_hctx_clear_pending()
83 	sbitmap_clear_bit(&hctx->ctx_map, bit);  in blk_mq_hctx_clear_pending()
95 	if (rq->part && blk_do_io_stat(rq) &&  in blk_mq_check_inflight()
96 	    (!mi->part->bd_partno || rq->part == mi->part) &&  in blk_mq_check_inflight()
98 		mi->inflight[rq_data_dir(rq)]++;  in blk_mq_check_inflight()
125 	mutex_lock(&q->mq_freeze_lock);  in blk_freeze_queue_start()
126 	if (++q->mq_freeze_depth == 1) {  in blk_freeze_queue_start()
127 		percpu_ref_kill(&q->q_usage_counter);  in blk_freeze_queue_start()
128 		mutex_unlock(&q->mq_freeze_lock);  in blk_freeze_queue_start()
132 		mutex_unlock(&q->mq_freeze_lock);  in blk_freeze_queue_start()
139 	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));  in blk_mq_freeze_queue_wait()
146 	return wait_event_timeout(q->mq_freeze_wq,  in blk_mq_freeze_queue_wait_timeout()
147 					percpu_ref_is_zero(&q->q_usage_counter),  in blk_mq_freeze_queue_wait_timeout()
154  * the queue afterward.
181 	mutex_lock(&q->mq_freeze_lock);  in __blk_mq_unfreeze_queue()
183 		q->q_usage_counter.data->force_atomic = true;  in __blk_mq_unfreeze_queue()
184 	q->mq_freeze_depth--;  in __blk_mq_unfreeze_queue()
185 	WARN_ON_ONCE(q->mq_freeze_depth < 0);  in __blk_mq_unfreeze_queue()
186 	if (!q->mq_freeze_depth) {  in __blk_mq_unfreeze_queue()
187 		percpu_ref_resurrect(&q->q_usage_counter);  in __blk_mq_unfreeze_queue()
188 		wake_up_all(&q->mq_freeze_wq);  in __blk_mq_unfreeze_queue()
190 	mutex_unlock(&q->mq_freeze_lock);  in __blk_mq_unfreeze_queue()
207 	spin_lock_irqsave(&q->queue_lock, flags);  in blk_mq_quiesce_queue_nowait()
208 	if (!q->quiesce_depth++)  in blk_mq_quiesce_queue_nowait()
210 	spin_unlock_irqrestore(&q->queue_lock, flags);  in blk_mq_quiesce_queue_nowait()
215  * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
216  * @set: tag_set to wait on
225 	if (set->flags & BLK_MQ_F_BLOCKING)  in blk_mq_wait_quiesce_done()
226 		synchronize_srcu(set->srcu);  in blk_mq_wait_quiesce_done()
233  * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
234  * @q: request queue.
238  * sure no dispatch can happen until the queue is unquiesced via
244 	/* nothing to wait for non-mq queues */  in blk_mq_quiesce_queue()
246 		blk_mq_wait_quiesce_done(q->tag_set);  in blk_mq_quiesce_queue()
251  * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
252  * @q: request queue.
254  * This function recovers queue into the state before quiescing
262 	spin_lock_irqsave(&q->queue_lock, flags);  in blk_mq_unquiesce_queue()
263 	if (WARN_ON_ONCE(q->quiesce_depth <= 0)) {  in blk_mq_unquiesce_queue()
265 	} else if (!--q->quiesce_depth) {  in blk_mq_unquiesce_queue()
269 	spin_unlock_irqrestore(&q->queue_lock, flags);  in blk_mq_unquiesce_queue()
281 	mutex_lock(&set->tag_list_lock);  in blk_mq_quiesce_tagset()
282 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in blk_mq_quiesce_tagset()
287 	mutex_unlock(&set->tag_list_lock);  in blk_mq_quiesce_tagset()
295 	mutex_lock(&set->tag_list_lock);  in blk_mq_unquiesce_tagset()
296 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in blk_mq_unquiesce_tagset()
300 	mutex_unlock(&set->tag_list_lock);  in blk_mq_unquiesce_tagset()
311 			blk_mq_tag_wakeup_all(hctx->tags, true);  in blk_mq_wake_waiters()
318 	INIT_LIST_HEAD(&rq->queuelist);  in blk_rq_init()
319 	rq->q = q;  in blk_rq_init()
320 	rq->__sector = (sector_t) -1;  in blk_rq_init()
321 	INIT_HLIST_NODE(&rq->hash);  in blk_rq_init()
322 	RB_CLEAR_NODE(&rq->rb_node);  in blk_rq_init()
323 	rq->tag = BLK_MQ_NO_TAG;  in blk_rq_init()
324 	rq->internal_tag = BLK_MQ_NO_TAG;  in blk_rq_init()
325 	rq->start_time_ns = ktime_get_ns();  in blk_rq_init()
326 	rq->part = NULL;  in blk_rq_init()
335 		rq->start_time_ns = ktime_get_ns();  in blk_mq_rq_time_init()
337 		rq->start_time_ns = 0;  in blk_mq_rq_time_init()
340 	if (blk_queue_rq_alloc_time(rq->q))  in blk_mq_rq_time_init()
341 		rq->alloc_time_ns = alloc_time_ns ?: rq->start_time_ns;  in blk_mq_rq_time_init()
343 		rq->alloc_time_ns = 0;  in blk_mq_rq_time_init()
350 	struct blk_mq_ctx *ctx = data->ctx;  in blk_mq_rq_ctx_init()
351 	struct blk_mq_hw_ctx *hctx = data->hctx;  in blk_mq_rq_ctx_init()
352 	struct request_queue *q = data->q;  in blk_mq_rq_ctx_init()
353 	struct request *rq = tags->static_rqs[tag];  in blk_mq_rq_ctx_init()
355 	rq->q = q;  in blk_mq_rq_ctx_init()
356 	rq->mq_ctx = ctx;  in blk_mq_rq_ctx_init()
357 	rq->mq_hctx = hctx;  in blk_mq_rq_ctx_init()
358 	rq->cmd_flags = data->cmd_flags;  in blk_mq_rq_ctx_init()
360 	if (data->flags & BLK_MQ_REQ_PM)  in blk_mq_rq_ctx_init()
361 		data->rq_flags |= RQF_PM;  in blk_mq_rq_ctx_init()
363 		data->rq_flags |= RQF_IO_STAT;  in blk_mq_rq_ctx_init()
364 	rq->rq_flags = data->rq_flags;  in blk_mq_rq_ctx_init()
366 	if (data->rq_flags & RQF_SCHED_TAGS) {  in blk_mq_rq_ctx_init()
367 		rq->tag = BLK_MQ_NO_TAG;  in blk_mq_rq_ctx_init()
368 		rq->internal_tag = tag;  in blk_mq_rq_ctx_init()
370 		rq->tag = tag;  in blk_mq_rq_ctx_init()
371 		rq->internal_tag = BLK_MQ_NO_TAG;  in blk_mq_rq_ctx_init()
373 	rq->timeout = 0;  in blk_mq_rq_ctx_init()
375 	rq->part = NULL;  in blk_mq_rq_ctx_init()
376 	rq->io_start_time_ns = 0;  in blk_mq_rq_ctx_init()
377 	rq->stats_sectors = 0;  in blk_mq_rq_ctx_init()
378 	rq->nr_phys_segments = 0;  in blk_mq_rq_ctx_init()
380 	rq->nr_integrity_segments = 0;  in blk_mq_rq_ctx_init()
382 	rq->end_io = NULL;  in blk_mq_rq_ctx_init()
383 	rq->end_io_data = NULL;  in blk_mq_rq_ctx_init()
386 	INIT_LIST_HEAD(&rq->queuelist);  in blk_mq_rq_ctx_init()
388 	WRITE_ONCE(rq->deadline, 0);  in blk_mq_rq_ctx_init()
391 	if (rq->rq_flags & RQF_USE_SCHED) {  in blk_mq_rq_ctx_init()
392 		struct elevator_queue *e = data->q->elevator;  in blk_mq_rq_ctx_init()
394 		INIT_HLIST_NODE(&rq->hash);  in blk_mq_rq_ctx_init()
395 		RB_CLEAR_NODE(&rq->rb_node);  in blk_mq_rq_ctx_init()
397 		if (e->type->ops.prepare_request)  in blk_mq_rq_ctx_init()
398 			e->type->ops.prepare_request(rq);  in blk_mq_rq_ctx_init()
413 	tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);  in __blk_mq_alloc_requests_batch()
422 		prefetch(tags->static_rqs[tag]);  in __blk_mq_alloc_requests_batch()
425 		rq_list_add(data->cached_rq, rq);  in __blk_mq_alloc_requests_batch()
428 	if (!(data->rq_flags & RQF_SCHED_TAGS))  in __blk_mq_alloc_requests_batch()
429 		blk_mq_add_active_requests(data->hctx, nr);  in __blk_mq_alloc_requests_batch()
431 	percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);  in __blk_mq_alloc_requests_batch()
432 	data->nr_tags -= nr;  in __blk_mq_alloc_requests_batch()
434 	return rq_list_pop(data->cached_rq);  in __blk_mq_alloc_requests_batch()
439 	struct request_queue *q = data->q;  in __blk_mq_alloc_requests()
448 	if (data->cmd_flags & REQ_NOWAIT)  in __blk_mq_alloc_requests()
449 		data->flags |= BLK_MQ_REQ_NOWAIT;  in __blk_mq_alloc_requests()
451 	if (q->elevator) {  in __blk_mq_alloc_requests()
454 		 * enabled for the queue.  in __blk_mq_alloc_requests()
456 		data->rq_flags |= RQF_SCHED_TAGS;  in __blk_mq_alloc_requests()
462 		if ((data->cmd_flags & REQ_OP_MASK) != REQ_OP_FLUSH &&  in __blk_mq_alloc_requests()
463 		    !blk_op_is_passthrough(data->cmd_flags)) {  in __blk_mq_alloc_requests()
464 			struct elevator_mq_ops *ops = &q->elevator->type->ops;  in __blk_mq_alloc_requests()
466 			WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED);  in __blk_mq_alloc_requests()
468 			data->rq_flags |= RQF_USE_SCHED;  in __blk_mq_alloc_requests()
469 			if (ops->limit_depth)  in __blk_mq_alloc_requests()
470 				ops->limit_depth(data->cmd_flags, data);  in __blk_mq_alloc_requests()
475 	data->ctx = blk_mq_get_ctx(q);  in __blk_mq_alloc_requests()
476 	data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);  in __blk_mq_alloc_requests()
477 	if (!(data->rq_flags & RQF_SCHED_TAGS))  in __blk_mq_alloc_requests()
478 		blk_mq_tag_busy(data->hctx);  in __blk_mq_alloc_requests()
480 	if (data->flags & BLK_MQ_REQ_RESERVED)  in __blk_mq_alloc_requests()
481 		data->rq_flags |= RQF_RESV;  in __blk_mq_alloc_requests()
486 	if (data->nr_tags > 1) {  in __blk_mq_alloc_requests()
492 		data->nr_tags = 1;  in __blk_mq_alloc_requests()
502 		if (data->flags & BLK_MQ_REQ_NOWAIT)  in __blk_mq_alloc_requests()
514 	if (!(data->rq_flags & RQF_SCHED_TAGS))  in __blk_mq_alloc_requests()
515 		blk_mq_inc_active_requests(data->hctx);  in __blk_mq_alloc_requests()
530 		.nr_tags	= plug->nr_ios,  in blk_mq_rq_cache_fill()
531 		.cached_rq	= &plug->cached_rq,  in blk_mq_rq_cache_fill()
538 	plug->nr_ios = 1;  in blk_mq_rq_cache_fill()
550 	struct blk_plug *plug = current->plug;  in blk_mq_alloc_cached_request()
556 	if (rq_list_empty(plug->cached_rq)) {  in blk_mq_alloc_cached_request()
557 		if (plug->nr_ios == 1)  in blk_mq_alloc_cached_request()
563 		rq = rq_list_peek(&plug->cached_rq);  in blk_mq_alloc_cached_request()
564 		if (!rq || rq->q != q)  in blk_mq_alloc_cached_request()
567 		if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type)  in blk_mq_alloc_cached_request()
569 		if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))  in blk_mq_alloc_cached_request()
572 		plug->cached_rq = rq_list_next(rq);  in blk_mq_alloc_cached_request()
576 	rq->cmd_flags = opf;  in blk_mq_alloc_cached_request()
577 	INIT_LIST_HEAD(&rq->queuelist);  in blk_mq_alloc_cached_request()
604 	rq->__data_len = 0;  in blk_mq_alloc_request()
605 	rq->__sector = (sector_t) -1;  in blk_mq_alloc_request()
606 	rq->bio = rq->biotail = NULL;  in blk_mq_alloc_request()
610 	return ERR_PTR(-EWOULDBLOCK);  in blk_mq_alloc_request()
637 	 * a specific queue.  in blk_mq_alloc_request_hctx()
641 		return ERR_PTR(-EINVAL);  in blk_mq_alloc_request_hctx()
643 	if (hctx_idx >= q->nr_hw_queues)  in blk_mq_alloc_request_hctx()
644 		return ERR_PTR(-EIO);  in blk_mq_alloc_request_hctx()
652 	 * If not tell the caller that it should skip this queue.  in blk_mq_alloc_request_hctx()
654 	ret = -EXDEV;  in blk_mq_alloc_request_hctx()
655 	data.hctx = xa_load(&q->hctx_table, hctx_idx);  in blk_mq_alloc_request_hctx()
658 	cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);  in blk_mq_alloc_request_hctx()
663 	if (q->elevator)  in blk_mq_alloc_request_hctx()
671 	ret = -EWOULDBLOCK;  in blk_mq_alloc_request_hctx()
679 	rq->__data_len = 0;  in blk_mq_alloc_request_hctx()
680 	rq->__sector = (sector_t) -1;  in blk_mq_alloc_request_hctx()
681 	rq->bio = rq->biotail = NULL;  in blk_mq_alloc_request_hctx()
692 	struct request_queue *q = rq->q;  in blk_mq_finish_request()
694 	if (rq->rq_flags & RQF_USE_SCHED) {  in blk_mq_finish_request()
695 		q->elevator->type->ops.finish_request(rq);  in blk_mq_finish_request()
701 		rq->rq_flags &= ~RQF_USE_SCHED;  in blk_mq_finish_request()
707 	struct request_queue *q = rq->q;  in __blk_mq_free_request()
708 	struct blk_mq_ctx *ctx = rq->mq_ctx;  in __blk_mq_free_request()
709 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in __blk_mq_free_request()
710 	const int sched_tag = rq->internal_tag;  in __blk_mq_free_request()
714 	rq->mq_hctx = NULL;  in __blk_mq_free_request()
716 	if (rq->tag != BLK_MQ_NO_TAG) {  in __blk_mq_free_request()
718 		blk_mq_put_tag(hctx->tags, ctx, rq->tag);  in __blk_mq_free_request()
721 		blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);  in __blk_mq_free_request()
728 	struct request_queue *q = rq->q;  in blk_mq_free_request()
733 		laptop_io_completion(q->disk->bdi);  in blk_mq_free_request()
737 	WRITE_ONCE(rq->state, MQ_RQ_IDLE);  in blk_mq_free_request()
747 	while ((rq = rq_list_pop(&plug->cached_rq)) != NULL)  in blk_mq_free_plug_rqs()
754 		rq->q->disk ? rq->q->disk->disk_name : "?",  in blk_dump_rq_flags()
755 		(__force unsigned long long) rq->cmd_flags);  in blk_dump_rq_flags()
761 	       rq->bio, rq->biotail, blk_rq_bytes(rq));  in blk_dump_rq_flags()
769 		bio->bi_status = error;  in req_bio_endio()
778 		if (bio->bi_iter.bi_size != nbytes) {  in req_bio_endio()
779 			bio->bi_status = BLK_STS_IOERR;  in req_bio_endio()
780 			nbytes = bio->bi_iter.bi_size;  in req_bio_endio()
782 			bio->bi_iter.bi_sector = rq->__sector;  in req_bio_endio()
788 	if (unlikely(rq->rq_flags & RQF_QUIET))  in req_bio_endio()
791 	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))  in req_bio_endio()
797 	if (req->part && blk_do_io_stat(req)) {  in blk_account_io_completion()
801 		part_stat_add(req->part, sectors[sgrp], bytes >> 9);  in blk_account_io_completion()
812 		req->q->disk ? req->q->disk->disk_name : "?",  in blk_print_req_error()
815 		(__force u32)(req->cmd_flags & ~REQ_OP_MASK),  in blk_print_req_error()
816 		req->nr_phys_segments,  in blk_print_req_error()
817 		IOPRIO_PRIO_CLASS(req->ioprio));  in blk_print_req_error()
826 	const bool is_flush = (req->rq_flags & RQF_FLUSH_SEQ) != 0;  in blk_complete_request()
828 	struct bio *bio = req->bio;  in blk_complete_request()
837 		req->q->integrity.profile->complete_fn(req, total_bytes);  in blk_complete_request()
849 		struct bio *next = bio->bi_next;  in blk_complete_request()
855 			bio->bi_iter.bi_sector = req->__sector;  in blk_complete_request()
867 	if (!req->end_io) {  in blk_complete_request()
868 		req->bio = NULL;  in blk_complete_request()
869 		req->__data_len = 0;  in blk_complete_request()
874  * blk_update_request - Complete multiple bytes without completing the request
892  *     %false - this request doesn't have any more data
893  *     %true  - this request has more data
902 	if (!req->bio)  in blk_update_request()
908 		req->q->integrity.profile->complete_fn(req, nr_bytes);  in blk_update_request()
919 		     !(req->rq_flags & RQF_QUIET)) &&  in blk_update_request()
920 		     !test_bit(GD_DEAD, &req->q->disk->state)) {  in blk_update_request()
928 	while (req->bio) {  in blk_update_request()
929 		struct bio *bio = req->bio;  in blk_update_request()
930 		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);  in blk_update_request()
932 		if (bio_bytes == bio->bi_iter.bi_size)  in blk_update_request()
933 			req->bio = bio->bi_next;  in blk_update_request()
940 		nr_bytes -= bio_bytes;  in blk_update_request()
949 	if (!req->bio) {  in blk_update_request()
955 		req->__data_len = 0;  in blk_update_request()
959 	req->__data_len -= total_bytes;  in blk_update_request()
963 		req->__sector += total_bytes >> 9;  in blk_update_request()
966 	if (req->rq_flags & RQF_MIXED_MERGE) {  in blk_update_request()
967 		req->cmd_flags &= ~REQ_FAILFAST_MASK;  in blk_update_request()
968 		req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;  in blk_update_request()
971 	if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {  in blk_update_request()
978 			req->__data_len = blk_rq_cur_bytes(req);  in blk_update_request()
982 		req->nr_phys_segments = blk_recalc_rq_segments(req);  in blk_update_request()
998 	if (blk_do_io_stat(req) && req->part &&  in blk_account_io_done()
999 	    !(req->rq_flags & RQF_FLUSH_SEQ)) {  in blk_account_io_done()
1003 		update_io_ticks(req->part, jiffies, true);  in blk_account_io_done()
1004 		part_stat_inc(req->part, ios[sgrp]);  in blk_account_io_done()
1005 		part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);  in blk_account_io_done()
1016 		 * All non-passthrough requests are created from a bio with one  in blk_account_io_start()
1018 		 * generated by the state machine in blk-flush.c is cloned onto the  in blk_account_io_start()
1019 		 * lower device by dm-multipath we can get here without a bio.  in blk_account_io_start()
1021 		if (req->bio)  in blk_account_io_start()
1022 			req->part = req->bio->bi_bdev;  in blk_account_io_start()
1024 			req->part = req->q->disk->part0;  in blk_account_io_start()
1027 		update_io_ticks(req->part, jiffies, false);  in blk_account_io_start()
1034 	if (rq->rq_flags & RQF_STATS)  in __blk_mq_end_request_acct()
1048 	if (rq->end_io) {  in __blk_mq_end_request()
1049 		rq_qos_done(rq->q, rq);  in __blk_mq_end_request()
1050 		if (rq->end_io(rq, error) == RQ_END_IO_FREE)  in __blk_mq_end_request()
1071 	struct request_queue *q = hctx->queue;  in blk_mq_flush_tag_batch()
1075 	blk_mq_put_tags(hctx->tags, tag_array, nr_tags);  in blk_mq_flush_tag_batch()
1076 	percpu_ref_put_many(&q->q_usage_counter, nr_tags);  in blk_mq_flush_tag_batch()
1086 	if (iob->need_ts)  in blk_mq_end_request_batch()
1089 	while ((rq = rq_list_pop(&iob->req_list)) != NULL) {  in blk_mq_end_request_batch()
1090 		prefetch(rq->bio);  in blk_mq_end_request_batch()
1091 		prefetch(rq->rq_next);  in blk_mq_end_request_batch()
1094 		if (iob->need_ts)  in blk_mq_end_request_batch()
1099 		rq_qos_done(rq->q, rq);  in blk_mq_end_request_batch()
1105 		if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE)  in blk_mq_end_request_batch()
1108 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);  in blk_mq_end_request_batch()
1115 		if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) {  in blk_mq_end_request_batch()
1119 			cur_hctx = rq->mq_hctx;  in blk_mq_end_request_batch()
1121 		tags[nr_tags++] = rq->tag;  in blk_mq_end_request_batch()
1135 		rq->q->mq_ops->complete(rq);  in blk_complete_reqs()
1159 	    !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))  in blk_mq_complete_need_ipi()
1171 	if (cpu == rq->mq_ctx->cpu ||  in blk_mq_complete_need_ipi()
1172 	    (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&  in blk_mq_complete_need_ipi()
1173 	     cpus_share_cache(cpu, rq->mq_ctx->cpu)))  in blk_mq_complete_need_ipi()
1177 	return cpu_online(rq->mq_ctx->cpu);  in blk_mq_complete_need_ipi()
1184 	cpu = rq->mq_ctx->cpu;  in blk_mq_complete_send_ipi()
1185 	if (llist_add(&rq->ipi_list, &per_cpu(blk_cpu_done, cpu)))  in blk_mq_complete_send_ipi()
1195 	if (llist_add(&rq->ipi_list, list))  in blk_mq_raise_softirq()
1202 	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);  in blk_mq_complete_request_remote()
1209 	if ((rq->mq_hctx->nr_ctx == 1 &&  in blk_mq_complete_request_remote()
1210 	     rq->mq_ctx->cpu == raw_smp_processor_id()) ||  in blk_mq_complete_request_remote()
1211 	     rq->cmd_flags & REQ_POLLED)  in blk_mq_complete_request_remote()
1219 	if (rq->q->nr_hw_queues == 1) {  in blk_mq_complete_request_remote()
1228  * blk_mq_complete_request - end I/O on a request
1232  *	Complete a request by scheduling the ->complete_rq operation.
1237 		rq->q->mq_ops->complete(rq);  in blk_mq_complete_request()
1242  * blk_mq_start_request - Start processing a request
1251 	struct request_queue *q = rq->q;  in blk_mq_start_request()
1255 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) &&  in blk_mq_start_request()
1257 		rq->io_start_time_ns = ktime_get_ns();  in blk_mq_start_request()
1258 		rq->stats_sectors = blk_rq_sectors(rq);  in blk_mq_start_request()
1259 		rq->rq_flags |= RQF_STATS;  in blk_mq_start_request()
1266 	WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);  in blk_mq_start_request()
1267 	rq->mq_hctx->tags->rqs[rq->tag] = rq;  in blk_mq_start_request()
1271 		q->integrity.profile->prepare_fn(rq);  in blk_mq_start_request()
1273 	if (rq->bio && rq->bio->bi_opf & REQ_POLLED)  in blk_mq_start_request()
1274 	        WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num);  in blk_mq_start_request()
1279  * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
1285 	if (plug->multiple_queues)  in blk_plug_max_rq_count()
1292 	struct request *last = rq_list_peek(&plug->mq_list);  in blk_add_rq_to_plug()
1294 	if (!plug->rq_count) {  in blk_add_rq_to_plug()
1295 		trace_block_plug(rq->q);  in blk_add_rq_to_plug()
1296 	} else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||  in blk_add_rq_to_plug()
1297 		   (!blk_queue_nomerges(rq->q) &&  in blk_add_rq_to_plug()
1301 		trace_block_plug(rq->q);  in blk_add_rq_to_plug()
1304 	if (!plug->multiple_queues && last && last->q != rq->q)  in blk_add_rq_to_plug()
1305 		plug->multiple_queues = true;  in blk_add_rq_to_plug()
1308 	 * ->queue_rqs() directly  in blk_add_rq_to_plug()
1310 	if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))  in blk_add_rq_to_plug()
1311 		plug->has_elevator = true;  in blk_add_rq_to_plug()
1312 	rq->rq_next = NULL;  in blk_add_rq_to_plug()
1313 	rq_list_add(&plug->mq_list, rq);  in blk_add_rq_to_plug()
1314 	plug->rq_count++;  in blk_add_rq_to_plug()
1318  * blk_execute_rq_nowait - insert a request to I/O scheduler for execution
1320  * @at_head:    insert request at head or tail of queue
1323  *    Insert a fully prepared request at the back of the I/O scheduler queue
1324  *    for execution.  Don't wait for completion.
1327  *    This function will invoke @done directly if the queue is dead.
1331 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_execute_rq_nowait()
1343 	if (current->plug && !at_head) {  in blk_execute_rq_nowait()
1344 		blk_add_rq_to_plug(current->plug, rq);  in blk_execute_rq_nowait()
1349 	blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);  in blk_execute_rq_nowait()
1360 	struct blk_rq_wait *wait = rq->end_io_data;  in blk_end_sync_rq()  local
1362 	wait->ret = ret;  in blk_end_sync_rq()
1363 	complete(&wait->done);  in blk_end_sync_rq()
1369 	if (!rq->mq_hctx)  in blk_rq_is_poll()
1371 	if (rq->mq_hctx->type != HCTX_TYPE_POLL)  in blk_rq_is_poll()
1377 static void blk_rq_poll_completion(struct request *rq, struct completion *wait)  in blk_rq_poll_completion()  argument
1380 		blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0);  in blk_rq_poll_completion()
1382 	} while (!completion_done(wait));  in blk_rq_poll_completion()
1386  * blk_execute_rq - insert a request into queue for execution
1388  * @at_head:    insert request at head or tail of queue
1391  *    Insert a fully prepared request at the back of the I/O scheduler queue
1392  *    for execution and wait for completion.
1397 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_execute_rq()
1398 	struct blk_rq_wait wait = {  in blk_execute_rq()  local
1399 		.done = COMPLETION_INITIALIZER_ONSTACK(wait.done),  in blk_execute_rq()
1405 	rq->end_io_data = &wait;  in blk_execute_rq()
1406 	rq->end_io = blk_end_sync_rq;  in blk_execute_rq()
1413 		blk_rq_poll_completion(rq, &wait.done);  in blk_execute_rq()
1422 			while (!wait_for_completion_io_timeout(&wait.done,  in blk_execute_rq()
1426 			wait_for_completion_io(&wait.done);  in blk_execute_rq()
1429 	return wait.ret;  in blk_execute_rq()
1435 	struct request_queue *q = rq->q;  in __blk_mq_requeue_request()
1443 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);  in __blk_mq_requeue_request()
1444 		rq->rq_flags &= ~RQF_TIMED_OUT;  in __blk_mq_requeue_request()
1450 	struct request_queue *q = rq->q;  in blk_mq_requeue_request()
1455 	/* this request will be re-inserted to io scheduler queue */  in blk_mq_requeue_request()
1458 	spin_lock_irqsave(&q->requeue_lock, flags);  in blk_mq_requeue_request()
1459 	list_add_tail(&rq->queuelist, &q->requeue_list);  in blk_mq_requeue_request()
1460 	spin_unlock_irqrestore(&q->requeue_lock, flags);  in blk_mq_requeue_request()
1475 	spin_lock_irq(&q->requeue_lock);  in blk_mq_requeue_work()
1476 	list_splice_init(&q->requeue_list, &rq_list);  in blk_mq_requeue_work()
1477 	list_splice_init(&q->flush_list, &flush_list);  in blk_mq_requeue_work()
1478 	spin_unlock_irq(&q->requeue_lock);  in blk_mq_requeue_work()
1484 		 * driver already and might have driver-specific data allocated  in blk_mq_requeue_work()
1488 		if (rq->rq_flags & RQF_DONTPREP) {  in blk_mq_requeue_work()
1489 			list_del_init(&rq->queuelist);  in blk_mq_requeue_work()
1492 			list_del_init(&rq->queuelist);  in blk_mq_requeue_work()
1499 		list_del_init(&rq->queuelist);  in blk_mq_requeue_work()
1508 	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);  in blk_mq_kick_requeue_list()
1515 	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,  in blk_mq_delay_kick_requeue_list()
1522 	return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq);  in blk_is_flush_data_rq()
1528 	 * If we find a request that isn't idle we know the queue is busy  in blk_mq_rq_inflight()
1532 	 * In case of queue quiesce, if one flush data request is completed,  in blk_mq_rq_inflight()
1537 	if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) &&  in blk_mq_rq_inflight()
1560 	req->rq_flags |= RQF_TIMED_OUT;  in blk_mq_rq_timed_out()
1561 	if (req->q->mq_ops->timeout) {  in blk_mq_rq_timed_out()
1564 		ret = req->q->mq_ops->timeout(req);  in blk_mq_rq_timed_out()
1585 	if (rq->rq_flags & RQF_TIMED_OUT)  in blk_mq_req_expired()
1588 	deadline = READ_ONCE(rq->deadline);  in blk_mq_req_expired()
1589 	if (time_after_eq(expired->timeout_start, deadline))  in blk_mq_req_expired()
1592 	if (expired->next == 0)  in blk_mq_req_expired()
1593 		expired->next = deadline;  in blk_mq_req_expired()
1594 	else if (time_after(expired->next, deadline))  in blk_mq_req_expired()
1595 		expired->next = deadline;  in blk_mq_req_expired()
1602 		if (rq->end_io(rq, 0) == RQ_END_IO_FREE)  in blk_mq_put_rq_ref()
1621 		expired->has_timedout_rq = true;  in blk_mq_check_expired()
1647 	 * timeout at the same time a queue freeze is waiting  in blk_mq_timeout_work()
1649 	 * acquire the queue reference here.  in blk_mq_timeout_work()
1653 	 * obtain a reference even in the short window between the queue  in blk_mq_timeout_work()
1659 	if (!percpu_ref_tryget(&q->q_usage_counter))  in blk_mq_timeout_work()
1662 	/* check if there is any timed-out request */  in blk_mq_timeout_work()
1668 		 * uses srcu or rcu, wait for a synchronization point to  in blk_mq_timeout_work()
1671 		blk_mq_wait_quiesce_done(q->tag_set);  in blk_mq_timeout_work()
1678 		mod_timer(&q->timeout, expired.next);  in blk_mq_timeout_work()
1703 	struct blk_mq_hw_ctx *hctx = flush_data->hctx;  in flush_busy_ctx()
1704 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];  in flush_busy_ctx()
1705 	enum hctx_type type = hctx->type;  in flush_busy_ctx()
1707 	spin_lock(&ctx->lock);  in flush_busy_ctx()
1708 	list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);  in flush_busy_ctx()
1710 	spin_unlock(&ctx->lock);  in flush_busy_ctx()
1716  * to the for-dispatch
1725 	sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);  in blk_mq_flush_busy_ctxs()
1738 	struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;  in dispatch_rq_from_ctx()
1739 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];  in dispatch_rq_from_ctx()
1740 	enum hctx_type type = hctx->type;  in dispatch_rq_from_ctx()
1742 	spin_lock(&ctx->lock);  in dispatch_rq_from_ctx()
1743 	if (!list_empty(&ctx->rq_lists[type])) {  in dispatch_rq_from_ctx()
1744 		dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);  in dispatch_rq_from_ctx()
1745 		list_del_init(&dispatch_data->rq->queuelist);  in dispatch_rq_from_ctx()
1746 		if (list_empty(&ctx->rq_lists[type]))  in dispatch_rq_from_ctx()
1749 	spin_unlock(&ctx->lock);  in dispatch_rq_from_ctx()
1751 	return !dispatch_data->rq;  in dispatch_rq_from_ctx()
1757 	unsigned off = start ? start->index_hw[hctx->type] : 0;  in blk_mq_dequeue_from_ctx()
1763 	__sbitmap_for_each_set(&hctx->ctx_map, off,  in blk_mq_dequeue_from_ctx()
1771 	struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;  in __blk_mq_alloc_driver_tag()
1772 	unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;  in __blk_mq_alloc_driver_tag()
1775 	blk_mq_tag_busy(rq->mq_hctx);  in __blk_mq_alloc_driver_tag()
1777 	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {  in __blk_mq_alloc_driver_tag()
1778 		bt = &rq->mq_hctx->tags->breserved_tags;  in __blk_mq_alloc_driver_tag()
1781 		if (!hctx_may_queue(rq->mq_hctx, bt))  in __blk_mq_alloc_driver_tag()
1789 	rq->tag = tag + tag_offset;  in __blk_mq_alloc_driver_tag()
1790 	blk_mq_inc_active_requests(rq->mq_hctx);  in __blk_mq_alloc_driver_tag()
1794 static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,  in blk_mq_dispatch_wake()  argument
1799 	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);  in blk_mq_dispatch_wake()
1801 	spin_lock(&hctx->dispatch_wait_lock);  in blk_mq_dispatch_wake()
1802 	if (!list_empty(&wait->entry)) {  in blk_mq_dispatch_wake()
1805 		list_del_init(&wait->entry);  in blk_mq_dispatch_wake()
1806 		sbq = &hctx->tags->bitmap_tags;  in blk_mq_dispatch_wake()
1807 		atomic_dec(&sbq->ws_active);  in blk_mq_dispatch_wake()
1809 	spin_unlock(&hctx->dispatch_wait_lock);  in blk_mq_dispatch_wake()
1817  * the tag wakeups. For non-shared tags, we can simply mark us needing a
1826 	wait_queue_entry_t *wait;  in blk_mq_mark_tag_wait()  local
1829 	if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&  in blk_mq_mark_tag_wait()
1830 	    !(blk_mq_is_shared_tags(hctx->flags))) {  in blk_mq_mark_tag_wait()
1835 		 * allocation failure and adding the hardware queue to the wait  in blk_mq_mark_tag_wait()
1836 		 * queue.  in blk_mq_mark_tag_wait()
1839 		 * At most this will cost an extra queue run.  in blk_mq_mark_tag_wait()
1844 	wait = &hctx->dispatch_wait;  in blk_mq_mark_tag_wait()
1845 	if (!list_empty_careful(&wait->entry))  in blk_mq_mark_tag_wait()
1848 	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag))  in blk_mq_mark_tag_wait()
1849 		sbq = &hctx->tags->breserved_tags;  in blk_mq_mark_tag_wait()
1851 		sbq = &hctx->tags->bitmap_tags;  in blk_mq_mark_tag_wait()
1852 	wq = &bt_wait_ptr(sbq, hctx)->wait;  in blk_mq_mark_tag_wait()
1854 	spin_lock_irq(&wq->lock);  in blk_mq_mark_tag_wait()
1855 	spin_lock(&hctx->dispatch_wait_lock);  in blk_mq_mark_tag_wait()
1856 	if (!list_empty(&wait->entry)) {  in blk_mq_mark_tag_wait()
1857 		spin_unlock(&hctx->dispatch_wait_lock);  in blk_mq_mark_tag_wait()
1858 		spin_unlock_irq(&wq->lock);  in blk_mq_mark_tag_wait()
1862 	atomic_inc(&sbq->ws_active);  in blk_mq_mark_tag_wait()
1863 	wait->flags &= ~WQ_FLAG_EXCLUSIVE;  in blk_mq_mark_tag_wait()
1864 	__add_wait_queue(wq, wait);  in blk_mq_mark_tag_wait()
1870 	 * Order adding us to wait queue and allocating driver tag.  in blk_mq_mark_tag_wait()
1876 	 * Otherwise, re-order of adding wait queue and getting driver tag  in blk_mq_mark_tag_wait()
1878 	 * the waitqueue_active() may not observe us in wait queue.  in blk_mq_mark_tag_wait()
1884 	 * allocation failure and adding the hardware queue to the wait  in blk_mq_mark_tag_wait()
1885 	 * queue.  in blk_mq_mark_tag_wait()
1889 		spin_unlock(&hctx->dispatch_wait_lock);  in blk_mq_mark_tag_wait()
1890 		spin_unlock_irq(&wq->lock);  in blk_mq_mark_tag_wait()
1895 	 * We got a tag, remove ourselves from the wait queue to ensure  in blk_mq_mark_tag_wait()
1898 	list_del_init(&wait->entry);  in blk_mq_mark_tag_wait()
1899 	atomic_dec(&sbq->ws_active);  in blk_mq_mark_tag_wait()
1900 	spin_unlock(&hctx->dispatch_wait_lock);  in blk_mq_mark_tag_wait()
1901 	spin_unlock_irq(&wq->lock);  in blk_mq_mark_tag_wait()
1910  * - EWMA is one simple way to compute running average value
1911  * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
1912  * - take 4 as factor for avoiding to get too small(0) result, and this
1919 	ewma = hctx->dispatch_busy;  in blk_mq_update_dispatch_busy()
1924 	ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;  in blk_mq_update_dispatch_busy()
1929 	hctx->dispatch_busy = ewma;  in blk_mq_update_dispatch_busy()
1937 	list_add(&rq->queuelist, list);  in blk_mq_handle_dev_resource()
1946 	 * specific zone due to LLD level zone-write locking or other zone  in blk_mq_handle_zone_resource()
1950 	list_add(&rq->queuelist, zone_list);  in blk_mq_handle_zone_resource()
1963 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_mq_prep_dispatch_rq()
1964 	int budget_token = -1;  in blk_mq_prep_dispatch_rq()
1967 		budget_token = blk_mq_get_dispatch_budget(rq->q);  in blk_mq_prep_dispatch_rq()
1978 		 * rerun the hardware queue when a tag is freed. The  in blk_mq_prep_dispatch_rq()
1979 		 * waitqueue takes care of that. If the queue is run  in blk_mq_prep_dispatch_rq()
1981 		 * we'll re-run it below.  in blk_mq_prep_dispatch_rq()
1989 				blk_mq_put_dispatch_budget(rq->q, budget_token);  in blk_mq_prep_dispatch_rq()
2012  * blk_mq_commit_rqs will notify driver using bd->last that there is no
2016  *  1) did not queue everything initially scheduled to queue
2017  *  2) the last attempt to queue a request failed
2022 	if (hctx->queue->mq_ops->commit_rqs && queued) {  in blk_mq_commit_rqs()
2023 		trace_block_unplug(hctx->queue, queued, !from_schedule);  in blk_mq_commit_rqs()
2024 		hctx->queue->mq_ops->commit_rqs(hctx);  in blk_mq_commit_rqs()
2035 	struct request_queue *q = hctx->queue;  in blk_mq_dispatch_rq_list()
2054 		WARN_ON_ONCE(hctx != rq->mq_hctx);  in blk_mq_dispatch_rq_list()
2059 		list_del_init(&rq->queuelist);  in blk_mq_dispatch_rq_list()
2069 			nr_budgets--;  in blk_mq_dispatch_rq_list()
2070 		ret = q->mq_ops->queue_rq(hctx, &bd);  in blk_mq_dispatch_rq_list()
2105 	 * Any items that need requeuing? Stuff them into hctx->dispatch,  in blk_mq_dispatch_rq_list()
2106 	 * that is where we will continue on next queue run.  in blk_mq_dispatch_rq_list()
2110 		/* For non-shared tags, the RESTART check will suffice */  in blk_mq_dispatch_rq_list()
2112 			((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||  in blk_mq_dispatch_rq_list()
2113 			blk_mq_is_shared_tags(hctx->flags));  in blk_mq_dispatch_rq_list()
2118 		spin_lock(&hctx->lock);  in blk_mq_dispatch_rq_list()
2119 		list_splice_tail_init(list, &hctx->dispatch);  in blk_mq_dispatch_rq_list()
2120 		spin_unlock(&hctx->lock);  in blk_mq_dispatch_rq_list()
2123 		 * Order adding requests to hctx->dispatch and checking  in blk_mq_dispatch_rq_list()
2126 		 * miss the new added requests to hctx->dispatch, meantime  in blk_mq_dispatch_rq_list()
2134 		 * thread and hence that a queue rerun is needed.  in blk_mq_dispatch_rq_list()
2138 		 * waitqueue is no longer active, ensure that we run the queue  in blk_mq_dispatch_rq_list()
2142 		 * the hardware queue got stopped and restarted before requests  in blk_mq_dispatch_rq_list()
2143 		 * were pushed back onto the dispatch list. Rerun the queue to  in blk_mq_dispatch_rq_list()
2145 		 * - blk_mq_run_hw_queue() checks whether or not a queue has  in blk_mq_dispatch_rq_list()
2146 		 *   been stopped before rerunning a queue.  in blk_mq_dispatch_rq_list()
2147 		 * - Some but not all block drivers stop a queue before  in blk_mq_dispatch_rq_list()
2148 		 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq  in blk_mq_dispatch_rq_list()
2149 		 *   and dm-rq.  in blk_mq_dispatch_rq_list()
2152 		 * bit is set, run queue after a delay to avoid IO stalls  in blk_mq_dispatch_rq_list()
2153 		 * that could otherwise occur if the queue is idle.  We'll do  in blk_mq_dispatch_rq_list()
2161 		    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))  in blk_mq_dispatch_rq_list()
2176 	int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);  in blk_mq_first_mapped_cpu()
2179 		cpu = cpumask_first(hctx->cpumask);  in blk_mq_first_mapped_cpu()
2186  * For now we just round-robin here, switching for every
2192 	int next_cpu = hctx->next_cpu;  in blk_mq_hctx_next_cpu()
2194 	if (hctx->queue->nr_hw_queues == 1)  in blk_mq_hctx_next_cpu()
2197 	if (--hctx->next_cpu_batch <= 0) {  in blk_mq_hctx_next_cpu()
2199 		next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,  in blk_mq_hctx_next_cpu()
2203 		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;  in blk_mq_hctx_next_cpu()
2217 		 * Make sure to re-select CPU next time once after CPUs  in blk_mq_hctx_next_cpu()
2218 		 * in hctx->cpumask become online again.  in blk_mq_hctx_next_cpu()
2220 		hctx->next_cpu = next_cpu;  in blk_mq_hctx_next_cpu()
2221 		hctx->next_cpu_batch = 1;  in blk_mq_hctx_next_cpu()
2225 	hctx->next_cpu = next_cpu;  in blk_mq_hctx_next_cpu()
2230  * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
2231  * @hctx: Pointer to the hardware queue to run.
2232  * @msecs: Milliseconds of delay to wait before running the queue.
2234  * Run a hardware queue asynchronously with a delay of @msecs.
2240 	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,  in blk_mq_delay_run_hw_queue()
2246  * blk_mq_run_hw_queue - Start to run a hardware queue.
2247  * @hctx: Pointer to the hardware queue to run.
2248  * @async: If we want to run the queue asynchronously.
2250  * Check if the request queue is not in a quiesced state and if there are
2251  * pending requests to be sent. If this is true, run the queue to send requests
2259 	 * We can't run the queue inline with interrupts disabled.  in blk_mq_run_hw_queue()
2263 	might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);  in blk_mq_run_hw_queue()
2266 	 * When queue is quiesced, we may be switching io scheduler, or  in blk_mq_run_hw_queue()
2267 	 * updating nr_hw_queues, or other things, and we can't run queue  in blk_mq_run_hw_queue()
2270 	 * And queue will be rerun in blk_mq_unquiesce_queue() if it is  in blk_mq_run_hw_queue()
2273 	__blk_mq_run_dispatch_ops(hctx->queue, false,  in blk_mq_run_hw_queue()
2274 		need_run = !blk_queue_quiesced(hctx->queue) &&  in blk_mq_run_hw_queue()
2280 	if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {  in blk_mq_run_hw_queue()
2285 	blk_mq_run_dispatch_ops(hctx->queue,  in blk_mq_run_hw_queue()
2291  * Return prefered queue to dispatch from (if any) for non-mq aware IO
2304 	struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT];  in blk_mq_get_sq_hctx()
2312  * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
2313  * @q: Pointer to the request queue to run.
2314  * @async: If we want to run the queue asynchronously.
2333 		    !list_empty_careful(&hctx->dispatch))  in blk_mq_run_hw_queues()
2340  * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
2341  * @q: Pointer to the request queue to run.
2342  * @msecs: Milliseconds of delay to wait before running the queues.
2358 		 * if another hctx is re-delaying the other's work  in blk_mq_delay_run_hw_queues()
2361 		if (delayed_work_pending(&hctx->run_work))  in blk_mq_delay_run_hw_queues()
2369 		    !list_empty_careful(&hctx->dispatch))  in blk_mq_delay_run_hw_queues()
2386 	cancel_delayed_work(&hctx->run_work);  in blk_mq_stop_hw_queue()
2388 	set_bit(BLK_MQ_S_STOPPED, &hctx->state);  in blk_mq_stop_hw_queue()
2413 	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);  in blk_mq_start_hw_queue()
2415 	blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);  in blk_mq_start_hw_queue()
2434 	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);  in blk_mq_start_stopped_hw_queue()
2446 					(hctx->flags & BLK_MQ_F_BLOCKING));  in blk_mq_start_stopped_hw_queues()
2455 	blk_mq_run_dispatch_ops(hctx->queue,  in blk_mq_run_work_fn()
2460  * blk_mq_request_bypass_insert - Insert a request at dispatch list.
2469 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_mq_request_bypass_insert()
2471 	spin_lock(&hctx->lock);  in blk_mq_request_bypass_insert()
2473 		list_add(&rq->queuelist, &hctx->dispatch);  in blk_mq_request_bypass_insert()
2475 		list_add_tail(&rq->queuelist, &hctx->dispatch);  in blk_mq_request_bypass_insert()
2476 	spin_unlock(&hctx->lock);  in blk_mq_request_bypass_insert()
2484 	enum hctx_type type = hctx->type;  in blk_mq_insert_requests()
2487 	 * Try to issue requests directly if the hw queue isn't busy to save an  in blk_mq_insert_requests()
2488 	 * extra enqueue & dequeue to the sw queue.  in blk_mq_insert_requests()
2490 	if (!hctx->dispatch_busy && !run_queue_async) {  in blk_mq_insert_requests()
2491 		blk_mq_run_dispatch_ops(hctx->queue,  in blk_mq_insert_requests()
2498 	 * preemption doesn't flush plug list, so it's possible ctx->cpu is  in blk_mq_insert_requests()
2502 		BUG_ON(rq->mq_ctx != ctx);  in blk_mq_insert_requests()
2504 		if (rq->cmd_flags & REQ_NOWAIT)  in blk_mq_insert_requests()
2508 	spin_lock(&ctx->lock);  in blk_mq_insert_requests()
2509 	list_splice_tail_init(list, &ctx->rq_lists[type]);  in blk_mq_insert_requests()
2511 	spin_unlock(&ctx->lock);  in blk_mq_insert_requests()
2518 	struct request_queue *q = rq->q;  in blk_mq_insert_request()
2519 	struct blk_mq_ctx *ctx = rq->mq_ctx;  in blk_mq_insert_request()
2520 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_mq_insert_request()
2524 		 * Passthrough request have to be added to hctx->dispatch  in blk_mq_insert_request()
2527 		 * them, which gets them added to hctx->dispatch.  in blk_mq_insert_request()
2530 		 * and it is added to the scheduler queue, there is no chance to  in blk_mq_insert_request()
2531 		 * dispatch it given we prioritize requests in hctx->dispatch.  in blk_mq_insert_request()
2536 		 * Firstly normal IO request is inserted to scheduler queue or  in blk_mq_insert_request()
2537 		 * sw queue, meantime we add flush request to dispatch queue(  in blk_mq_insert_request()
2538 		 * hctx->dispatch) directly and there is at most one in-flight  in blk_mq_insert_request()
2539 		 * flush request for each hw queue, so it doesn't matter to add  in blk_mq_insert_request()
2540 		 * flush request to tail or front of the dispatch queue.  in blk_mq_insert_request()
2542 		 * Secondly in case of NCQ, flush request belongs to non-NCQ  in blk_mq_insert_request()
2544 		 * in-flight normal IO request(NCQ command). When adding flush  in blk_mq_insert_request()
2545 		 * rq to the front of hctx->dispatch, it is easier to introduce  in blk_mq_insert_request()
2547 		 * compared with adding to the tail of dispatch queue, then  in blk_mq_insert_request()
2551 		 * drive when adding flush rq to the front of hctx->dispatch.  in blk_mq_insert_request()
2553 		 * Simply queue flush rq to the front of hctx->dispatch so that  in blk_mq_insert_request()
2557 	} else if (q->elevator) {  in blk_mq_insert_request()
2560 		WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG);  in blk_mq_insert_request()
2562 		list_add(&rq->queuelist, &list);  in blk_mq_insert_request()
2563 		q->elevator->type->ops.insert_requests(hctx, &list, flags);  in blk_mq_insert_request()
2567 		spin_lock(&ctx->lock);  in blk_mq_insert_request()
2569 			list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]);  in blk_mq_insert_request()
2571 			list_add_tail(&rq->queuelist,  in blk_mq_insert_request()
2572 				      &ctx->rq_lists[hctx->type]);  in blk_mq_insert_request()
2574 		spin_unlock(&ctx->lock);  in blk_mq_insert_request()
2583 	if (bio->bi_opf & REQ_RAHEAD)  in blk_mq_bio_to_request()
2584 		rq->cmd_flags |= REQ_FAILFAST_MASK;  in blk_mq_bio_to_request()
2586 	rq->__sector = bio->bi_iter.bi_sector;  in blk_mq_bio_to_request()
2599 	struct request_queue *q = rq->q;  in __blk_mq_issue_directly()
2607 	 * For OK queue, we are done. For error, caller may kill it.  in __blk_mq_issue_directly()
2611 	ret = q->mq_ops->queue_rq(hctx, &bd);  in __blk_mq_issue_directly()
2633 	budget_token = blk_mq_get_dispatch_budget(rq->q);  in blk_mq_get_budget_and_tag()
2638 		blk_mq_put_dispatch_budget(rq->q, budget_token);  in blk_mq_get_budget_and_tag()
2645  * blk_mq_try_issue_directly - Try to send a request directly to device driver.
2646  * @hctx: Pointer of the associated hardware queue.
2650  * request directly to device driver. Else, insert at hctx->dispatch queue, so
2652  * queue have higher priority.
2659 	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {  in blk_mq_try_issue_directly()
2664 	if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {  in blk_mq_try_issue_directly()
2666 		blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT);  in blk_mq_try_issue_directly()
2687 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;  in blk_mq_request_issue_directly()
2689 	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {  in blk_mq_request_issue_directly()
2706 	while ((rq = rq_list_pop(&plug->mq_list))) {  in blk_mq_plug_issue_direct()
2707 		bool last = rq_list_empty(plug->mq_list);  in blk_mq_plug_issue_direct()
2709 		if (hctx != rq->mq_hctx) {  in blk_mq_plug_issue_direct()
2714 			hctx = rq->mq_hctx;  in blk_mq_plug_issue_direct()
2743 	q->mq_ops->queue_rqs(&plug->mq_list);  in __blk_mq_flush_plug_list()
2757 		struct request *rq = rq_list_pop(&plug->mq_list);  in blk_mq_dispatch_plug_list()
2760 			this_hctx = rq->mq_hctx;  in blk_mq_dispatch_plug_list()
2761 			this_ctx = rq->mq_ctx;  in blk_mq_dispatch_plug_list()
2763 		} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx ||  in blk_mq_dispatch_plug_list()
2768 		list_add(&rq->queuelist, &list);  in blk_mq_dispatch_plug_list()
2770 	} while (!rq_list_empty(plug->mq_list));  in blk_mq_dispatch_plug_list()
2772 	plug->mq_list = requeue_list;  in blk_mq_dispatch_plug_list()
2773 	trace_block_unplug(this_hctx->queue, depth, !from_sched);  in blk_mq_dispatch_plug_list()
2775 	percpu_ref_get(&this_hctx->queue->q_usage_counter);  in blk_mq_dispatch_plug_list()
2778 		spin_lock(&this_hctx->lock);  in blk_mq_dispatch_plug_list()
2779 		list_splice_tail_init(&list, &this_hctx->dispatch);  in blk_mq_dispatch_plug_list()
2780 		spin_unlock(&this_hctx->lock);  in blk_mq_dispatch_plug_list()
2782 	} else if (this_hctx->queue->elevator) {  in blk_mq_dispatch_plug_list()
2783 		this_hctx->queue->elevator->type->ops.insert_requests(this_hctx,  in blk_mq_dispatch_plug_list()
2789 	percpu_ref_put(&this_hctx->queue->q_usage_counter);  in blk_mq_dispatch_plug_list()
2798 	 * plug->mq_list via a schedule() in the driver's queue_rq() callback.  in blk_mq_flush_plug_list()
2803 	if (plug->rq_count == 0)  in blk_mq_flush_plug_list()
2805 	plug->rq_count = 0;  in blk_mq_flush_plug_list()
2807 	if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) {  in blk_mq_flush_plug_list()
2810 		rq = rq_list_peek(&plug->mq_list);  in blk_mq_flush_plug_list()
2811 		q = rq->q;  in blk_mq_flush_plug_list()
2814 		 * Peek first request and see if we have a ->queue_rqs() hook.  in blk_mq_flush_plug_list()
2817 		 * same queue, caller must ensure that's the case.  in blk_mq_flush_plug_list()
2819 		if (q->mq_ops->queue_rqs) {  in blk_mq_flush_plug_list()
2822 			if (rq_list_empty(plug->mq_list))  in blk_mq_flush_plug_list()
2828 		if (rq_list_empty(plug->mq_list))  in blk_mq_flush_plug_list()
2834 	} while (!rq_list_empty(plug->mq_list));  in blk_mq_flush_plug_list()
2847 		list_del_init(&rq->queuelist);  in blk_mq_try_issue_list_directly()
2890 		.cmd_flags	= bio->bi_opf,  in blk_mq_get_new_requests()
2900 		data.nr_tags = plug->nr_ios;  in blk_mq_get_new_requests()
2901 		plug->nr_ios = 1;  in blk_mq_get_new_requests()
2902 		data.cached_rq = &plug->cached_rq;  in blk_mq_get_new_requests()
2909 	if (bio->bi_opf & REQ_NOWAIT)  in blk_mq_get_new_requests()
2921 	enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);  in blk_mq_use_cached_rq()
2922 	enum hctx_type hctx_type = rq->mq_hctx->type;  in blk_mq_use_cached_rq()
2924 	WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);  in blk_mq_use_cached_rq()
2929 	if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))  in blk_mq_use_cached_rq()
2933 	 * If any qos ->throttle() end up blocking, we will have flushed the  in blk_mq_use_cached_rq()
2937 	plug->cached_rq = rq_list_next(rq);  in blk_mq_use_cached_rq()
2938 	rq_qos_throttle(rq->q, bio);  in blk_mq_use_cached_rq()
2941 	rq->cmd_flags = bio->bi_opf;  in blk_mq_use_cached_rq()
2942 	INIT_LIST_HEAD(&rq->queuelist);  in blk_mq_use_cached_rq()
2947  * blk_mq_submit_bio - Create and send a request to block device.
2953  * * We want to place request at plug queue for possible future merging
2954  * * There is an IO scheduler active at this queue
2956  * It will not queue the request if there is an error with the bio, or at the
2961 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);  in blk_mq_submit_bio()
2963 	const int is_sync = op_is_sync(bio->bi_opf);  in blk_mq_submit_bio()
2972 		rq = rq_list_peek(&plug->cached_rq);  in blk_mq_submit_bio()
2973 		if (rq && rq->q != q)  in blk_mq_submit_bio()
2977 		if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {  in blk_mq_submit_bio()
2978 			bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);  in blk_mq_submit_bio()
2988 		percpu_ref_get(&q->q_usage_counter);  in blk_mq_submit_bio()
2992 		if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {  in blk_mq_submit_bio()
2993 			bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);  in blk_mq_submit_bio()
3017 		bio->bi_status = ret;  in blk_mq_submit_bio()
3023 	if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))  in blk_mq_submit_bio()
3031 	hctx = rq->mq_hctx;  in blk_mq_submit_bio()
3032 	if ((rq->rq_flags & RQF_USE_SCHED) ||  in blk_mq_submit_bio()
3033 	    (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {  in blk_mq_submit_bio()
3043  * blk_insert_cloned_request - Helper for stacking drivers to submit a request
3048 	struct request_queue *q = rq->q;  in blk_insert_cloned_request()
3057 		 * a non-read/write command (discard, write same,etc.) the  in blk_insert_cloned_request()
3058 		 * low-level device driver will set the relevant queue limit to  in blk_insert_cloned_request()
3059 		 * 0 to prevent blk-lib from issuing more of the offending  in blk_insert_cloned_request()
3060 		 * operations. Commands queued prior to the queue limit being  in blk_insert_cloned_request()
3073 	 * The queue settings related to segment counting may differ from the  in blk_insert_cloned_request()
3074 	 * original queue.  in blk_insert_cloned_request()
3076 	rq->nr_phys_segments = blk_recalc_rq_segments(rq);  in blk_insert_cloned_request()
3077 	if (rq->nr_phys_segments > max_segments) {  in blk_insert_cloned_request()
3079 			__func__, rq->nr_phys_segments, max_segments);  in blk_insert_cloned_request()
3083 	if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))  in blk_insert_cloned_request()
3106  * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
3116 	while ((bio = rq->bio) != NULL) {  in blk_rq_unprep_clone()
3117 		rq->bio = bio->bi_next;  in blk_rq_unprep_clone()
3125  * blk_rq_prep_clone - Helper function to setup clone request
3152 		bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask,  in blk_rq_prep_clone()
3160 		if (rq->bio) {  in blk_rq_prep_clone()
3161 			rq->biotail->bi_next = bio;  in blk_rq_prep_clone()
3162 			rq->biotail = bio;  in blk_rq_prep_clone()
3164 			rq->bio = rq->biotail = bio;  in blk_rq_prep_clone()
3170 	rq->__sector = blk_rq_pos(rq_src);  in blk_rq_prep_clone()
3171 	rq->__data_len = blk_rq_bytes(rq_src);  in blk_rq_prep_clone()
3172 	if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {  in blk_rq_prep_clone()
3173 		rq->rq_flags |= RQF_SPECIAL_PAYLOAD;  in blk_rq_prep_clone()
3174 		rq->special_vec = rq_src->special_vec;  in blk_rq_prep_clone()
3176 	rq->nr_phys_segments = rq_src->nr_phys_segments;  in blk_rq_prep_clone()
3177 	rq->ioprio = rq_src->ioprio;  in blk_rq_prep_clone()
3179 	if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)  in blk_rq_prep_clone()
3189 	return -ENOMEM;  in blk_rq_prep_clone()
3200 	if (rq->bio) {  in blk_steal_bios()
3201 		if (list->tail)  in blk_steal_bios()
3202 			list->tail->bi_next = rq->bio;  in blk_steal_bios()
3204 			list->head = rq->bio;  in blk_steal_bios()
3205 		list->tail = rq->biotail;  in blk_steal_bios()
3207 		rq->bio = NULL;  in blk_steal_bios()
3208 		rq->biotail = NULL;  in blk_steal_bios()
3211 	rq->__data_len = 0;  in blk_steal_bios()
3234 	list_for_each_entry(page, &tags->page_list, lru) {  in blk_mq_clear_rq_mapping()
3236 		unsigned long end = start + order_to_size(page->private);  in blk_mq_clear_rq_mapping()
3239 		for (i = 0; i < drv_tags->nr_tags; i++) {  in blk_mq_clear_rq_mapping()
3240 			struct request *rq = drv_tags->rqs[i];  in blk_mq_clear_rq_mapping()
3245 				cmpxchg(&drv_tags->rqs[i], rq, NULL);  in blk_mq_clear_rq_mapping()
3251 	 * Wait until all pending iteration is done.  in blk_mq_clear_rq_mapping()
3254 	 * after the ->lock is released.  in blk_mq_clear_rq_mapping()
3256 	spin_lock_irqsave(&drv_tags->lock, flags);  in blk_mq_clear_rq_mapping()
3257 	spin_unlock_irqrestore(&drv_tags->lock, flags);  in blk_mq_clear_rq_mapping()
3266 	if (list_empty(&tags->page_list))  in blk_mq_free_rqs()
3269 	if (blk_mq_is_shared_tags(set->flags))  in blk_mq_free_rqs()
3270 		drv_tags = set->shared_tags;  in blk_mq_free_rqs()
3272 		drv_tags = set->tags[hctx_idx];  in blk_mq_free_rqs()
3274 	if (tags->static_rqs && set->ops->exit_request) {  in blk_mq_free_rqs()
3277 		for (i = 0; i < tags->nr_tags; i++) {  in blk_mq_free_rqs()
3278 			struct request *rq = tags->static_rqs[i];  in blk_mq_free_rqs()
3282 			set->ops->exit_request(set, rq, hctx_idx);  in blk_mq_free_rqs()
3283 			tags->static_rqs[i] = NULL;  in blk_mq_free_rqs()
3289 	while (!list_empty(&tags->page_list)) {  in blk_mq_free_rqs()
3290 		page = list_first_entry(&tags->page_list, struct page, lru);  in blk_mq_free_rqs()
3291 		list_del_init(&page->lru);  in blk_mq_free_rqs()
3297 		__free_pages(page, page->private);  in blk_mq_free_rqs()
3303 	kfree(tags->rqs);  in blk_mq_free_rq_map()
3304 	tags->rqs = NULL;  in blk_mq_free_rq_map()
3305 	kfree(tags->static_rqs);  in blk_mq_free_rq_map()
3306 	tags->static_rqs = NULL;  in blk_mq_free_rq_map()
3316 	for (i = 0; i < set->nr_maps; i++) {  in hctx_idx_to_type()
3317 		unsigned int start = set->map[i].queue_offset;  in hctx_idx_to_type()
3318 		unsigned int end = start + set->map[i].nr_queues;  in hctx_idx_to_type()
3324 	if (i >= set->nr_maps)  in hctx_idx_to_type()
3335 	return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx);  in blk_mq_get_hctx_node()
3347 		node = set->numa_node;  in blk_mq_alloc_rq_map()
3350 				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));  in blk_mq_alloc_rq_map()
3354 	tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),  in blk_mq_alloc_rq_map()
3357 	if (!tags->rqs)  in blk_mq_alloc_rq_map()
3360 	tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *),  in blk_mq_alloc_rq_map()
3363 	if (!tags->static_rqs)  in blk_mq_alloc_rq_map()
3369 	kfree(tags->rqs);  in blk_mq_alloc_rq_map()
3380 	if (set->ops->init_request) {  in blk_mq_init_request()
3381 		ret = set->ops->init_request(set, rq, hctx_idx, node);  in blk_mq_init_request()
3386 	WRITE_ONCE(rq->state, MQ_RQ_IDLE);  in blk_mq_init_request()
3399 		node = set->numa_node;  in blk_mq_alloc_rqs()
3401 	INIT_LIST_HEAD(&tags->page_list);  in blk_mq_alloc_rqs()
3407 	rq_size = round_up(sizeof(struct request) + set->cmd_size,  in blk_mq_alloc_rqs()
3417 		while (this_order && left < order_to_size(this_order - 1))  in blk_mq_alloc_rqs()
3418 			this_order--;  in blk_mq_alloc_rqs()
3426 			if (!this_order--)  in blk_mq_alloc_rqs()
3435 		page->private = this_order;  in blk_mq_alloc_rqs()
3436 		list_add_tail(&page->lru, &tags->page_list);  in blk_mq_alloc_rqs()
3441 		 * to additional allocations like via ops->init_request().  in blk_mq_alloc_rqs()
3445 		to_do = min(entries_per_page, depth - i);  in blk_mq_alloc_rqs()
3446 		left -= to_do * rq_size;  in blk_mq_alloc_rqs()
3450 			tags->static_rqs[i] = rq;  in blk_mq_alloc_rqs()
3452 				tags->static_rqs[i] = NULL;  in blk_mq_alloc_rqs()
3464 	return -ENOMEM;  in blk_mq_alloc_rqs()
3476 	if (rq->mq_hctx != iter_data->hctx)  in blk_mq_has_request()
3478 	iter_data->has_rq = true;  in blk_mq_has_request()
3484 	struct blk_mq_tags *tags = hctx->sched_tags ?  in blk_mq_hctx_has_requests()
3485 			hctx->sched_tags : hctx->tags;  in blk_mq_hctx_has_requests()
3497 	if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu)  in blk_mq_last_cpu_in_hctx()
3499 	if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)  in blk_mq_last_cpu_in_hctx()
3509 	if (!cpumask_test_cpu(cpu, hctx->cpumask) ||  in blk_mq_hctx_notify_offline()
3520 	set_bit(BLK_MQ_S_INACTIVE, &hctx->state);  in blk_mq_hctx_notify_offline()
3524 	 * Try to grab a reference to the queue and wait for any outstanding  in blk_mq_hctx_notify_offline()
3525 	 * requests.  If we could not grab a reference the queue has been  in blk_mq_hctx_notify_offline()
3528 	if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) {  in blk_mq_hctx_notify_offline()
3531 		percpu_ref_put(&hctx->queue->q_usage_counter);  in blk_mq_hctx_notify_offline()
3542 	if (cpumask_test_cpu(cpu, hctx->cpumask))  in blk_mq_hctx_notify_online()
3543 		clear_bit(BLK_MQ_S_INACTIVE, &hctx->state);  in blk_mq_hctx_notify_online()
3549  * software queue to the hw queue dispatch list, and ensure that it
3560 	if (!cpumask_test_cpu(cpu, hctx->cpumask))  in blk_mq_hctx_notify_dead()
3563 	ctx = __blk_mq_get_ctx(hctx->queue, cpu);  in blk_mq_hctx_notify_dead()
3564 	type = hctx->type;  in blk_mq_hctx_notify_dead()
3566 	spin_lock(&ctx->lock);  in blk_mq_hctx_notify_dead()
3567 	if (!list_empty(&ctx->rq_lists[type])) {  in blk_mq_hctx_notify_dead()
3568 		list_splice_init(&ctx->rq_lists[type], &tmp);  in blk_mq_hctx_notify_dead()
3571 	spin_unlock(&ctx->lock);  in blk_mq_hctx_notify_dead()
3576 	spin_lock(&hctx->lock);  in blk_mq_hctx_notify_dead()
3577 	list_splice_tail_init(&tmp, &hctx->dispatch);  in blk_mq_hctx_notify_dead()
3578 	spin_unlock(&hctx->lock);  in blk_mq_hctx_notify_dead()
3586 	if (!(hctx->flags & BLK_MQ_F_STACKING))  in blk_mq_remove_cpuhp()
3588 						    &hctx->cpuhp_online);  in blk_mq_remove_cpuhp()
3590 					    &hctx->cpuhp_dead);  in blk_mq_remove_cpuhp()
3594  * Before freeing hw queue, clearing the flush request reference in
3595  * tags->rqs[] for avoiding potential UAF.
3603 	/* The hw queue may not be mapped yet */  in blk_mq_clear_flush_rq_mapping()
3610 		cmpxchg(&tags->rqs[i], flush_rq, NULL);  in blk_mq_clear_flush_rq_mapping()
3613 	 * Wait until all pending iteration is done.  in blk_mq_clear_flush_rq_mapping()
3616 	 * after the ->lock is released.  in blk_mq_clear_flush_rq_mapping()
3618 	spin_lock_irqsave(&tags->lock, flags);  in blk_mq_clear_flush_rq_mapping()
3619 	spin_unlock_irqrestore(&tags->lock, flags);  in blk_mq_clear_flush_rq_mapping()
3622 /* hctx->ctxs will be freed in queue's release handler */
3627 	struct request *flush_rq = hctx->fq->flush_rq;  in blk_mq_exit_hctx()
3633 		blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],  in blk_mq_exit_hctx()
3634 				set->queue_depth, flush_rq);  in blk_mq_exit_hctx()
3635 	if (set->ops->exit_request)  in blk_mq_exit_hctx()
3636 		set->ops->exit_request(set, flush_rq, hctx_idx);  in blk_mq_exit_hctx()
3638 	if (set->ops->exit_hctx)  in blk_mq_exit_hctx()
3639 		set->ops->exit_hctx(hctx, hctx_idx);  in blk_mq_exit_hctx()
3643 	xa_erase(&q->hctx_table, hctx_idx);  in blk_mq_exit_hctx()
3645 	spin_lock(&q->unused_hctx_lock);  in blk_mq_exit_hctx()
3646 	list_add(&hctx->hctx_list, &q->unused_hctx_list);  in blk_mq_exit_hctx()
3647 	spin_unlock(&q->unused_hctx_lock);  in blk_mq_exit_hctx()
3667 	hctx->queue_num = hctx_idx;  in blk_mq_init_hctx()
3669 	if (!(hctx->flags & BLK_MQ_F_STACKING))  in blk_mq_init_hctx()
3671 				&hctx->cpuhp_online);  in blk_mq_init_hctx()
3672 	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);  in blk_mq_init_hctx()
3674 	hctx->tags = set->tags[hctx_idx];  in blk_mq_init_hctx()
3676 	if (set->ops->init_hctx &&  in blk_mq_init_hctx()
3677 	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))  in blk_mq_init_hctx()
3680 	if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,  in blk_mq_init_hctx()
3681 				hctx->numa_node))  in blk_mq_init_hctx()
3684 	if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))  in blk_mq_init_hctx()
3690 	if (set->ops->exit_request)  in blk_mq_init_hctx()
3691 		set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);  in blk_mq_init_hctx()
3693 	if (set->ops->exit_hctx)  in blk_mq_init_hctx()
3694 		set->ops->exit_hctx(hctx, hctx_idx);  in blk_mq_init_hctx()
3697 	return -1;  in blk_mq_init_hctx()
3711 	if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))  in blk_mq_alloc_hctx()
3714 	atomic_set(&hctx->nr_active, 0);  in blk_mq_alloc_hctx()
3716 		node = set->numa_node;  in blk_mq_alloc_hctx()
3717 	hctx->numa_node = node;  in blk_mq_alloc_hctx()
3719 	INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);  in blk_mq_alloc_hctx()
3720 	spin_lock_init(&hctx->lock);  in blk_mq_alloc_hctx()
3721 	INIT_LIST_HEAD(&hctx->dispatch);  in blk_mq_alloc_hctx()
3722 	hctx->queue = q;  in blk_mq_alloc_hctx()
3723 	hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;  in blk_mq_alloc_hctx()
3725 	INIT_LIST_HEAD(&hctx->hctx_list);  in blk_mq_alloc_hctx()
3731 	hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),  in blk_mq_alloc_hctx()
3733 	if (!hctx->ctxs)  in blk_mq_alloc_hctx()
3736 	if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),  in blk_mq_alloc_hctx()
3739 	hctx->nr_ctx = 0;  in blk_mq_alloc_hctx()
3741 	spin_lock_init(&hctx->dispatch_wait_lock);  in blk_mq_alloc_hctx()
3742 	init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);  in blk_mq_alloc_hctx()
3743 	INIT_LIST_HEAD(&hctx->dispatch_wait.entry);  in blk_mq_alloc_hctx()
3745 	hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);  in blk_mq_alloc_hctx()
3746 	if (!hctx->fq)  in blk_mq_alloc_hctx()
3754 	sbitmap_free(&hctx->ctx_map);  in blk_mq_alloc_hctx()
3756 	kfree(hctx->ctxs);  in blk_mq_alloc_hctx()
3758 	free_cpumask_var(hctx->cpumask);  in blk_mq_alloc_hctx()
3768 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_init_cpu_queues()
3772 		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);  in blk_mq_init_cpu_queues()
3776 		__ctx->cpu = i;  in blk_mq_init_cpu_queues()
3777 		spin_lock_init(&__ctx->lock);  in blk_mq_init_cpu_queues()
3779 			INIT_LIST_HEAD(&__ctx->rq_lists[k]);  in blk_mq_init_cpu_queues()
3781 		__ctx->queue = q;  in blk_mq_init_cpu_queues()
3784 		 * Set local node, IFF we have more than one hw queue. If  in blk_mq_init_cpu_queues()
3787 		for (j = 0; j < set->nr_maps; j++) {  in blk_mq_init_cpu_queues()
3789 			if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)  in blk_mq_init_cpu_queues()
3790 				hctx->numa_node = cpu_to_node(i);  in blk_mq_init_cpu_queues()
3802 	tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags);  in blk_mq_alloc_map_and_rqs()
3818 	if (blk_mq_is_shared_tags(set->flags)) {  in __blk_mq_alloc_map_and_rqs()
3819 		set->tags[hctx_idx] = set->shared_tags;  in __blk_mq_alloc_map_and_rqs()
3824 	set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx,  in __blk_mq_alloc_map_and_rqs()
3825 						       set->queue_depth);  in __blk_mq_alloc_map_and_rqs()
3827 	return set->tags[hctx_idx];  in __blk_mq_alloc_map_and_rqs()
3843 	if (!blk_mq_is_shared_tags(set->flags))  in __blk_mq_free_map_and_rqs()
3844 		blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx);  in __blk_mq_free_map_and_rqs()
3846 	set->tags[hctx_idx] = NULL;  in __blk_mq_free_map_and_rqs()
3855 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_map_swqueue()
3858 		cpumask_clear(hctx->cpumask);  in blk_mq_map_swqueue()
3859 		hctx->nr_ctx = 0;  in blk_mq_map_swqueue()
3860 		hctx->dispatch_from = NULL;  in blk_mq_map_swqueue()
3870 		ctx = per_cpu_ptr(q->queue_ctx, i);  in blk_mq_map_swqueue()
3871 		for (j = 0; j < set->nr_maps; j++) {  in blk_mq_map_swqueue()
3872 			if (!set->map[j].nr_queues) {  in blk_mq_map_swqueue()
3873 				ctx->hctxs[j] = blk_mq_map_queue_type(q,  in blk_mq_map_swqueue()
3877 			hctx_idx = set->map[j].mq_map[i];  in blk_mq_map_swqueue()
3878 			/* unmapped hw queue can be remapped after CPU topo changed */  in blk_mq_map_swqueue()
3879 			if (!set->tags[hctx_idx] &&  in blk_mq_map_swqueue()
3887 				set->map[j].mq_map[i] = 0;  in blk_mq_map_swqueue()
3891 			ctx->hctxs[j] = hctx;  in blk_mq_map_swqueue()
3895 			 * devices share queues across queue maps.  in blk_mq_map_swqueue()
3897 			if (cpumask_test_cpu(i, hctx->cpumask))  in blk_mq_map_swqueue()
3900 			cpumask_set_cpu(i, hctx->cpumask);  in blk_mq_map_swqueue()
3901 			hctx->type = j;  in blk_mq_map_swqueue()
3902 			ctx->index_hw[hctx->type] = hctx->nr_ctx;  in blk_mq_map_swqueue()
3903 			hctx->ctxs[hctx->nr_ctx++] = ctx;  in blk_mq_map_swqueue()
3909 			BUG_ON(!hctx->nr_ctx);  in blk_mq_map_swqueue()
3913 			ctx->hctxs[j] = blk_mq_map_queue_type(q,  in blk_mq_map_swqueue()
3919 		 * If no software queues are mapped to this hardware queue,  in blk_mq_map_swqueue()
3922 		if (!hctx->nr_ctx) {  in blk_mq_map_swqueue()
3923 			/* Never unmap queue 0.  We need it as a  in blk_mq_map_swqueue()
3930 			hctx->tags = NULL;  in blk_mq_map_swqueue()
3934 		hctx->tags = set->tags[i];  in blk_mq_map_swqueue()
3935 		WARN_ON(!hctx->tags);  in blk_mq_map_swqueue()
3942 		sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);  in blk_mq_map_swqueue()
3947 		hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);  in blk_mq_map_swqueue()
3948 		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;  in blk_mq_map_swqueue()
3954  * the queue isn't live yet.
3963 			hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;  in queue_set_hctx_shared()
3966 			hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;  in queue_set_hctx_shared()
3976 	lockdep_assert_held(&set->tag_list_lock);  in blk_mq_update_tag_set_shared()
3978 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in blk_mq_update_tag_set_shared()
3987 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_del_queue_tag_set()
3989 	mutex_lock(&set->tag_list_lock);  in blk_mq_del_queue_tag_set()
3990 	list_del(&q->tag_set_list);  in blk_mq_del_queue_tag_set()
3991 	if (list_is_singular(&set->tag_list)) {  in blk_mq_del_queue_tag_set()
3993 		set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;  in blk_mq_del_queue_tag_set()
3994 		/* update existing queue */  in blk_mq_del_queue_tag_set()
3997 	mutex_unlock(&set->tag_list_lock);  in blk_mq_del_queue_tag_set()
3998 	INIT_LIST_HEAD(&q->tag_set_list);  in blk_mq_del_queue_tag_set()
4004 	mutex_lock(&set->tag_list_lock);  in blk_mq_add_queue_tag_set()
4009 	if (!list_empty(&set->tag_list) &&  in blk_mq_add_queue_tag_set()
4010 	    !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {  in blk_mq_add_queue_tag_set()
4011 		set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;  in blk_mq_add_queue_tag_set()
4012 		/* update existing queue */  in blk_mq_add_queue_tag_set()
4015 	if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)  in blk_mq_add_queue_tag_set()
4017 	list_add_tail(&q->tag_set_list, &set->tag_list);  in blk_mq_add_queue_tag_set()
4019 	mutex_unlock(&set->tag_list_lock);  in blk_mq_add_queue_tag_set()
4022 /* All allocations will be freed in release handler of q->mq_kobj */
4030 		return -ENOMEM;  in blk_mq_alloc_ctxs()
4032 	ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);  in blk_mq_alloc_ctxs()
4033 	if (!ctxs->queue_ctx)  in blk_mq_alloc_ctxs()
4037 		struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);  in blk_mq_alloc_ctxs()
4038 		ctx->ctxs = ctxs;  in blk_mq_alloc_ctxs()
4041 	q->mq_kobj = &ctxs->kobj;  in blk_mq_alloc_ctxs()
4042 	q->queue_ctx = ctxs->queue_ctx;  in blk_mq_alloc_ctxs()
4047 	return -ENOMEM;  in blk_mq_alloc_ctxs()
4052  * request queue's release handler for avoiding use-after-free
4053  * and headache because q->mq_kobj shouldn't have been introduced,
4062 		WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));  in blk_mq_release()
4065 	list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {  in blk_mq_release()
4066 		list_del_init(&hctx->hctx_list);  in blk_mq_release()
4067 		kobject_put(&hctx->kobj);  in blk_mq_release()
4070 	xa_destroy(&q->hctx_table);  in blk_mq_release()
4073 	 * release .mq_kobj and sw queue's kobject now because  in blk_mq_release()
4074 	 * both share lifetime with request queue.  in blk_mq_release()
4085 	q = blk_alloc_queue(set->numa_node);  in blk_mq_init_queue_data()
4087 		return ERR_PTR(-ENOMEM);  in blk_mq_init_queue_data()
4088 	q->queuedata = queuedata;  in blk_mq_init_queue_data()
4104  * blk_mq_destroy_queue - shutdown a request queue
4105  * @q: request queue to shutdown
4107  * This shuts down a request queue allocated by blk_mq_init_queue(). All future
4108  * requests will be failed with -ENODEV. The caller is responsible for dropping
4140 	disk = __alloc_disk_node(q, set->numa_node, lkclass);  in __blk_mq_alloc_disk()
4144 		return ERR_PTR(-ENOMEM);  in __blk_mq_alloc_disk()
4146 	set_bit(GD_OWNS_QUEUE, &disk->state);  in __blk_mq_alloc_disk()
4172 	spin_lock(&q->unused_hctx_lock);  in blk_mq_alloc_and_init_hctx()
4173 	list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {  in blk_mq_alloc_and_init_hctx()
4174 		if (tmp->numa_node == node) {  in blk_mq_alloc_and_init_hctx()
4180 		list_del_init(&hctx->hctx_list);  in blk_mq_alloc_and_init_hctx()
4181 	spin_unlock(&q->unused_hctx_lock);  in blk_mq_alloc_and_init_hctx()
4194 	kobject_put(&hctx->kobj);  in blk_mq_alloc_and_init_hctx()
4206 	mutex_lock(&q->sysfs_lock);  in blk_mq_realloc_hw_ctxs()
4207 	for (i = 0; i < set->nr_hw_queues; i++) {  in blk_mq_realloc_hw_ctxs()
4210 		struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i);  in blk_mq_realloc_hw_ctxs()
4213 			old_node = old_hctx->numa_node;  in blk_mq_realloc_hw_ctxs()
4228 	 * hctxs and keep the previous q->nr_hw_queues.  in blk_mq_realloc_hw_ctxs()
4230 	if (i != set->nr_hw_queues) {  in blk_mq_realloc_hw_ctxs()
4231 		j = q->nr_hw_queues;  in blk_mq_realloc_hw_ctxs()
4234 		q->nr_hw_queues = set->nr_hw_queues;  in blk_mq_realloc_hw_ctxs()
4237 	xa_for_each_start(&q->hctx_table, j, hctx, j)  in blk_mq_realloc_hw_ctxs()
4239 	mutex_unlock(&q->sysfs_lock);  in blk_mq_realloc_hw_ctxs()
4244 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_update_poll_flag()
4246 	if (set->nr_maps > HCTX_TYPE_POLL &&  in blk_mq_update_poll_flag()
4247 	    set->map[HCTX_TYPE_POLL].nr_queues)  in blk_mq_update_poll_flag()
4256 	/* mark the queue as mq asap */  in blk_mq_init_allocated_queue()
4257 	q->mq_ops = set->ops;  in blk_mq_init_allocated_queue()
4262 	/* init q->mq_kobj and sw queues' kobjects */  in blk_mq_init_allocated_queue()
4265 	INIT_LIST_HEAD(&q->unused_hctx_list);  in blk_mq_init_allocated_queue()
4266 	spin_lock_init(&q->unused_hctx_lock);  in blk_mq_init_allocated_queue()
4268 	xa_init(&q->hctx_table);  in blk_mq_init_allocated_queue()
4271 	if (!q->nr_hw_queues)  in blk_mq_init_allocated_queue()
4274 	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);  in blk_mq_init_allocated_queue()
4275 	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);  in blk_mq_init_allocated_queue()
4277 	q->tag_set = set;  in blk_mq_init_allocated_queue()
4279 	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;  in blk_mq_init_allocated_queue()
4282 	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);  in blk_mq_init_allocated_queue()
4283 	INIT_LIST_HEAD(&q->flush_list);  in blk_mq_init_allocated_queue()
4284 	INIT_LIST_HEAD(&q->requeue_list);  in blk_mq_init_allocated_queue()
4285 	spin_lock_init(&q->requeue_lock);  in blk_mq_init_allocated_queue()
4287 	q->nr_requests = set->queue_depth;  in blk_mq_init_allocated_queue()
4289 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);  in blk_mq_init_allocated_queue()
4297 	q->mq_ops = NULL;  in blk_mq_init_allocated_queue()
4298 	return -ENOMEM;  in blk_mq_init_allocated_queue()
4305 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_exit_queue()
4307 	/* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */  in blk_mq_exit_queue()
4308 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);  in blk_mq_exit_queue()
4309 	/* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */  in blk_mq_exit_queue()
4317 	if (blk_mq_is_shared_tags(set->flags)) {  in __blk_mq_alloc_rq_maps()
4318 		set->shared_tags = blk_mq_alloc_map_and_rqs(set,  in __blk_mq_alloc_rq_maps()
4320 						set->queue_depth);  in __blk_mq_alloc_rq_maps()
4321 		if (!set->shared_tags)  in __blk_mq_alloc_rq_maps()
4322 			return -ENOMEM;  in __blk_mq_alloc_rq_maps()
4325 	for (i = 0; i < set->nr_hw_queues; i++) {  in __blk_mq_alloc_rq_maps()
4334 	while (--i >= 0)  in __blk_mq_alloc_rq_maps()
4337 	if (blk_mq_is_shared_tags(set->flags)) {  in __blk_mq_alloc_rq_maps()
4338 		blk_mq_free_map_and_rqs(set, set->shared_tags,  in __blk_mq_alloc_rq_maps()
4342 	return -ENOMEM;  in __blk_mq_alloc_rq_maps()
4347  * may reduce the depth asked for, if memory is tight. set->queue_depth
4355 	depth = set->queue_depth;  in blk_mq_alloc_set_map_and_rqs()
4361 		set->queue_depth >>= 1;  in blk_mq_alloc_set_map_and_rqs()
4362 		if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {  in blk_mq_alloc_set_map_and_rqs()
4363 			err = -ENOMEM;  in blk_mq_alloc_set_map_and_rqs()
4366 	} while (set->queue_depth);  in blk_mq_alloc_set_map_and_rqs()
4368 	if (!set->queue_depth || err) {  in blk_mq_alloc_set_map_and_rqs()
4369 		pr_err("blk-mq: failed to allocate request map\n");  in blk_mq_alloc_set_map_and_rqs()
4370 		return -ENOMEM;  in blk_mq_alloc_set_map_and_rqs()
4373 	if (depth != set->queue_depth)  in blk_mq_alloc_set_map_and_rqs()
4374 		pr_info("blk-mq: reduced tag depth (%u -> %u)\n",  in blk_mq_alloc_set_map_and_rqs()
4375 						depth, set->queue_depth);  in blk_mq_alloc_set_map_and_rqs()
4384 	 * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the  in blk_mq_update_queue_map()
4387 	if (set->nr_maps == 1)  in blk_mq_update_queue_map()
4388 		set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;  in blk_mq_update_queue_map()
4390 	if (set->ops->map_queues && !is_kdump_kernel()) {  in blk_mq_update_queue_map()
4397 		 * for (queue = 0; queue < set->nr_hw_queues; queue++) {  in blk_mq_update_queue_map()
4398 		 * 	mask = get_cpu_mask(queue)  in blk_mq_update_queue_map()
4400 		 * 		set->map[x].mq_map[cpu] = queue;  in blk_mq_update_queue_map()
4405 		 * to any hw queue.  in blk_mq_update_queue_map()
4407 		for (i = 0; i < set->nr_maps; i++)  in blk_mq_update_queue_map()
4408 			blk_mq_clear_mq_map(&set->map[i]);  in blk_mq_update_queue_map()
4410 		set->ops->map_queues(set);  in blk_mq_update_queue_map()
4412 		BUG_ON(set->nr_maps > 1);  in blk_mq_update_queue_map()
4413 		blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);  in blk_mq_update_queue_map()
4423 	if (set->nr_hw_queues >= new_nr_hw_queues)  in blk_mq_realloc_tag_set_tags()
4427 				GFP_KERNEL, set->numa_node);  in blk_mq_realloc_tag_set_tags()
4429 		return -ENOMEM;  in blk_mq_realloc_tag_set_tags()
4431 	if (set->tags)  in blk_mq_realloc_tag_set_tags()
4432 		memcpy(new_tags, set->tags, set->nr_hw_queues *  in blk_mq_realloc_tag_set_tags()
4433 		       sizeof(*set->tags));  in blk_mq_realloc_tag_set_tags()
4434 	kfree(set->tags);  in blk_mq_realloc_tag_set_tags()
4435 	set->tags = new_tags;  in blk_mq_realloc_tag_set_tags()
4437 	for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {  in blk_mq_realloc_tag_set_tags()
4439 			while (--i >= set->nr_hw_queues)  in blk_mq_realloc_tag_set_tags()
4441 			return -ENOMEM;  in blk_mq_realloc_tag_set_tags()
4447 	set->nr_hw_queues = new_nr_hw_queues;  in blk_mq_realloc_tag_set_tags()
4455  * value will be stored in set->queue_depth.
4463 	if (!set->nr_hw_queues)  in blk_mq_alloc_tag_set()
4464 		return -EINVAL;  in blk_mq_alloc_tag_set()
4465 	if (!set->queue_depth)  in blk_mq_alloc_tag_set()
4466 		return -EINVAL;  in blk_mq_alloc_tag_set()
4467 	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)  in blk_mq_alloc_tag_set()
4468 		return -EINVAL;  in blk_mq_alloc_tag_set()
4470 	if (!set->ops->queue_rq)  in blk_mq_alloc_tag_set()
4471 		return -EINVAL;  in blk_mq_alloc_tag_set()
4473 	if (!set->ops->get_budget ^ !set->ops->put_budget)  in blk_mq_alloc_tag_set()
4474 		return -EINVAL;  in blk_mq_alloc_tag_set()
4476 	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {  in blk_mq_alloc_tag_set()
4477 		pr_info("blk-mq: reduced tag depth to %u\n",  in blk_mq_alloc_tag_set()
4479 		set->queue_depth = BLK_MQ_MAX_DEPTH;  in blk_mq_alloc_tag_set()
4482 	if (!set->nr_maps)  in blk_mq_alloc_tag_set()
4483 		set->nr_maps = 1;  in blk_mq_alloc_tag_set()
4484 	else if (set->nr_maps > HCTX_MAX_TYPES)  in blk_mq_alloc_tag_set()
4485 		return -EINVAL;  in blk_mq_alloc_tag_set()
4489 	 * memory constrained environment. Limit us to 1 queue and  in blk_mq_alloc_tag_set()
4493 		set->nr_hw_queues = 1;  in blk_mq_alloc_tag_set()
4494 		set->nr_maps = 1;  in blk_mq_alloc_tag_set()
4495 		set->queue_depth = min(64U, set->queue_depth);  in blk_mq_alloc_tag_set()
4501 	if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)  in blk_mq_alloc_tag_set()
4502 		set->nr_hw_queues = nr_cpu_ids;  in blk_mq_alloc_tag_set()
4504 	if (set->flags & BLK_MQ_F_BLOCKING) {  in blk_mq_alloc_tag_set()
4505 		set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL);  in blk_mq_alloc_tag_set()
4506 		if (!set->srcu)  in blk_mq_alloc_tag_set()
4507 			return -ENOMEM;  in blk_mq_alloc_tag_set()
4508 		ret = init_srcu_struct(set->srcu);  in blk_mq_alloc_tag_set()
4513 	ret = -ENOMEM;  in blk_mq_alloc_tag_set()
4514 	set->tags = kcalloc_node(set->nr_hw_queues,  in blk_mq_alloc_tag_set()
4516 				 set->numa_node);  in blk_mq_alloc_tag_set()
4517 	if (!set->tags)  in blk_mq_alloc_tag_set()
4520 	for (i = 0; i < set->nr_maps; i++) {  in blk_mq_alloc_tag_set()
4521 		set->map[i].mq_map = kcalloc_node(nr_cpu_ids,  in blk_mq_alloc_tag_set()
4522 						  sizeof(set->map[i].mq_map[0]),  in blk_mq_alloc_tag_set()
4523 						  GFP_KERNEL, set->numa_node);  in blk_mq_alloc_tag_set()
4524 		if (!set->map[i].mq_map)  in blk_mq_alloc_tag_set()
4526 		set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;  in blk_mq_alloc_tag_set()
4535 	mutex_init(&set->tag_list_lock);  in blk_mq_alloc_tag_set()
4536 	INIT_LIST_HEAD(&set->tag_list);  in blk_mq_alloc_tag_set()
4541 	for (i = 0; i < set->nr_maps; i++) {  in blk_mq_alloc_tag_set()
4542 		kfree(set->map[i].mq_map);  in blk_mq_alloc_tag_set()
4543 		set->map[i].mq_map = NULL;  in blk_mq_alloc_tag_set()
4545 	kfree(set->tags);  in blk_mq_alloc_tag_set()
4546 	set->tags = NULL;  in blk_mq_alloc_tag_set()
4548 	if (set->flags & BLK_MQ_F_BLOCKING)  in blk_mq_alloc_tag_set()
4549 		cleanup_srcu_struct(set->srcu);  in blk_mq_alloc_tag_set()
4551 	if (set->flags & BLK_MQ_F_BLOCKING)  in blk_mq_alloc_tag_set()
4552 		kfree(set->srcu);  in blk_mq_alloc_tag_set()
4557 /* allocate and initialize a tagset for a simple single-queue device */
4563 	set->ops = ops;  in blk_mq_alloc_sq_tag_set()
4564 	set->nr_hw_queues = 1;  in blk_mq_alloc_sq_tag_set()
4565 	set->nr_maps = 1;  in blk_mq_alloc_sq_tag_set()
4566 	set->queue_depth = queue_depth;  in blk_mq_alloc_sq_tag_set()
4567 	set->numa_node = NUMA_NO_NODE;  in blk_mq_alloc_sq_tag_set()
4568 	set->flags = set_flags;  in blk_mq_alloc_sq_tag_set()
4577 	for (i = 0; i < set->nr_hw_queues; i++)  in blk_mq_free_tag_set()
4580 	if (blk_mq_is_shared_tags(set->flags)) {  in blk_mq_free_tag_set()
4581 		blk_mq_free_map_and_rqs(set, set->shared_tags,  in blk_mq_free_tag_set()
4585 	for (j = 0; j < set->nr_maps; j++) {  in blk_mq_free_tag_set()
4586 		kfree(set->map[j].mq_map);  in blk_mq_free_tag_set()
4587 		set->map[j].mq_map = NULL;  in blk_mq_free_tag_set()
4590 	kfree(set->tags);  in blk_mq_free_tag_set()
4591 	set->tags = NULL;  in blk_mq_free_tag_set()
4592 	if (set->flags & BLK_MQ_F_BLOCKING) {  in blk_mq_free_tag_set()
4593 		cleanup_srcu_struct(set->srcu);  in blk_mq_free_tag_set()
4594 		kfree(set->srcu);  in blk_mq_free_tag_set()
4601 	struct blk_mq_tag_set *set = q->tag_set;  in blk_mq_update_nr_requests()
4607 		return -EINVAL;  in blk_mq_update_nr_requests()
4609 	if (q->nr_requests == nr)  in blk_mq_update_nr_requests()
4617 		if (!hctx->tags)  in blk_mq_update_nr_requests()
4621 		 * queue depth. This is similar to what the old code would do.  in blk_mq_update_nr_requests()
4623 		if (hctx->sched_tags) {  in blk_mq_update_nr_requests()
4624 			ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,  in blk_mq_update_nr_requests()
4627 			ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,  in blk_mq_update_nr_requests()
4632 		if (q->elevator && q->elevator->type->ops.depth_updated)  in blk_mq_update_nr_requests()
4633 			q->elevator->type->ops.depth_updated(hctx);  in blk_mq_update_nr_requests()
4636 		q->nr_requests = nr;  in blk_mq_update_nr_requests()
4637 		if (blk_mq_is_shared_tags(set->flags)) {  in blk_mq_update_nr_requests()
4638 			if (q->elevator)  in blk_mq_update_nr_requests()
4675 	/* q->elevator needs protection from ->sysfs_lock */  in blk_mq_elv_switch_none()
4676 	mutex_lock(&q->sysfs_lock);  in blk_mq_elv_switch_none()
4679 	if (!q->elevator) {  in blk_mq_elv_switch_none()
4684 	INIT_LIST_HEAD(&qe->node);  in blk_mq_elv_switch_none()
4685 	qe->q = q;  in blk_mq_elv_switch_none()
4686 	qe->type = q->elevator->type;  in blk_mq_elv_switch_none()
4688 	__elevator_get(qe->type);  in blk_mq_elv_switch_none()
4689 	list_add(&qe->node, head);  in blk_mq_elv_switch_none()
4692 	mutex_unlock(&q->sysfs_lock);  in blk_mq_elv_switch_none()
4703 		if (qe->q == q)  in blk_lookup_qe_pair()
4718 	t = qe->type;  in blk_mq_elv_switch_back()
4719 	list_del(&qe->node);  in blk_mq_elv_switch_back()
4722 	mutex_lock(&q->sysfs_lock);  in blk_mq_elv_switch_back()
4726 	mutex_unlock(&q->sysfs_lock);  in blk_mq_elv_switch_back()
4734 	int prev_nr_hw_queues = set->nr_hw_queues;  in __blk_mq_update_nr_hw_queues()
4737 	lockdep_assert_held(&set->tag_list_lock);  in __blk_mq_update_nr_hw_queues()
4739 	if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids)  in __blk_mq_update_nr_hw_queues()
4743 	if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)  in __blk_mq_update_nr_hw_queues()
4746 	list_for_each_entry(q, &set->tag_list, tag_set_list)  in __blk_mq_update_nr_hw_queues()
4751 	 * updating the new sw to hw queue mappings.  in __blk_mq_update_nr_hw_queues()
4753 	list_for_each_entry(q, &set->tag_list, tag_set_list)  in __blk_mq_update_nr_hw_queues()
4757 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in __blk_mq_update_nr_hw_queues()
4767 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in __blk_mq_update_nr_hw_queues()
4770 		if (q->nr_hw_queues != set->nr_hw_queues) {  in __blk_mq_update_nr_hw_queues()
4775 			for (; i < set->nr_hw_queues; i++)  in __blk_mq_update_nr_hw_queues()
4778 			set->nr_hw_queues = prev_nr_hw_queues;  in __blk_mq_update_nr_hw_queues()
4785 	list_for_each_entry(q, &set->tag_list, tag_set_list) {  in __blk_mq_update_nr_hw_queues()
4791 	list_for_each_entry(q, &set->tag_list, tag_set_list)  in __blk_mq_update_nr_hw_queues()
4794 	list_for_each_entry(q, &set->tag_list, tag_set_list)  in __blk_mq_update_nr_hw_queues()
4798 	for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)  in __blk_mq_update_nr_hw_queues()
4804 	mutex_lock(&set->tag_list_lock);  in blk_mq_update_nr_hw_queues()
4806 	mutex_unlock(&set->tag_list_lock);  in blk_mq_update_nr_hw_queues()
4817 		ret = q->mq_ops->poll(hctx, iob);  in blk_hctx_poll()
4840 	struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie);  in blk_mq_poll()
4848 	struct request_queue *q = rq->q;  in blk_rq_poll()
4853 	if (!percpu_ref_tryget(&q->q_usage_counter))  in blk_rq_poll()
4856 	ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags);  in blk_rq_poll()
4865 	return rq->mq_ctx->cpu;  in blk_mq_rq_cpu()
4874 	cancel_delayed_work_sync(&q->requeue_work);  in blk_mq_cancel_work_sync()
4877 		cancel_delayed_work_sync(&hctx->run_work);  in blk_mq_cancel_work_sync()