Lines Matching +full:wait +full:- +full:queue
1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 2013-2014 Jens Axboe
6 * Copyright (C) 2013-2014 Christoph Hellwig
10 #include <linux/backing-dev.h>
13 #include <linux/blk-integrity.h>
30 #include <linux/blk-crypto.h>
35 #include <linux/t10-pi.h>
37 #include "blk-mq.h"
38 #include "blk-mq-debugfs.h"
39 #include "blk-pm.h"
40 #include "blk-stat.h"
41 #include "blk-mq-sched.h"
42 #include "blk-rq-qos.h"
57 * have pending work in this hardware queue.
61 return !list_empty_careful(&hctx->dispatch) || in blk_mq_hctx_has_pending()
62 sbitmap_any_bit_set(&hctx->ctx_map) || in blk_mq_hctx_has_pending()
67 * Mark this ctx as having pending work in this hardware queue
72 const int bit = ctx->index_hw[hctx->type]; in blk_mq_hctx_mark_pending()
74 if (!sbitmap_test_bit(&hctx->ctx_map, bit)) in blk_mq_hctx_mark_pending()
75 sbitmap_set_bit(&hctx->ctx_map, bit); in blk_mq_hctx_mark_pending()
81 const int bit = ctx->index_hw[hctx->type]; in blk_mq_hctx_clear_pending()
83 sbitmap_clear_bit(&hctx->ctx_map, bit); in blk_mq_hctx_clear_pending()
95 if (rq->part && blk_do_io_stat(rq) && in blk_mq_check_inflight()
96 (!mi->part->bd_partno || rq->part == mi->part) && in blk_mq_check_inflight()
98 mi->inflight[rq_data_dir(rq)]++; in blk_mq_check_inflight()
125 mutex_lock(&q->mq_freeze_lock); in blk_freeze_queue_start()
126 if (++q->mq_freeze_depth == 1) { in blk_freeze_queue_start()
127 percpu_ref_kill(&q->q_usage_counter); in blk_freeze_queue_start()
128 mutex_unlock(&q->mq_freeze_lock); in blk_freeze_queue_start()
132 mutex_unlock(&q->mq_freeze_lock); in blk_freeze_queue_start()
139 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); in blk_mq_freeze_queue_wait()
146 return wait_event_timeout(q->mq_freeze_wq, in blk_mq_freeze_queue_wait_timeout()
147 percpu_ref_is_zero(&q->q_usage_counter), in blk_mq_freeze_queue_wait_timeout()
154 * the queue afterward.
181 mutex_lock(&q->mq_freeze_lock); in __blk_mq_unfreeze_queue()
183 q->q_usage_counter.data->force_atomic = true; in __blk_mq_unfreeze_queue()
184 q->mq_freeze_depth--; in __blk_mq_unfreeze_queue()
185 WARN_ON_ONCE(q->mq_freeze_depth < 0); in __blk_mq_unfreeze_queue()
186 if (!q->mq_freeze_depth) { in __blk_mq_unfreeze_queue()
187 percpu_ref_resurrect(&q->q_usage_counter); in __blk_mq_unfreeze_queue()
188 wake_up_all(&q->mq_freeze_wq); in __blk_mq_unfreeze_queue()
190 mutex_unlock(&q->mq_freeze_lock); in __blk_mq_unfreeze_queue()
207 spin_lock_irqsave(&q->queue_lock, flags); in blk_mq_quiesce_queue_nowait()
208 if (!q->quiesce_depth++) in blk_mq_quiesce_queue_nowait()
210 spin_unlock_irqrestore(&q->queue_lock, flags); in blk_mq_quiesce_queue_nowait()
215 * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
216 * @set: tag_set to wait on
225 if (set->flags & BLK_MQ_F_BLOCKING) in blk_mq_wait_quiesce_done()
226 synchronize_srcu(set->srcu); in blk_mq_wait_quiesce_done()
233 * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
234 * @q: request queue.
238 * sure no dispatch can happen until the queue is unquiesced via
244 /* nothing to wait for non-mq queues */ in blk_mq_quiesce_queue()
246 blk_mq_wait_quiesce_done(q->tag_set); in blk_mq_quiesce_queue()
251 * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
252 * @q: request queue.
254 * This function recovers queue into the state before quiescing
262 spin_lock_irqsave(&q->queue_lock, flags); in blk_mq_unquiesce_queue()
263 if (WARN_ON_ONCE(q->quiesce_depth <= 0)) { in blk_mq_unquiesce_queue()
265 } else if (!--q->quiesce_depth) { in blk_mq_unquiesce_queue()
269 spin_unlock_irqrestore(&q->queue_lock, flags); in blk_mq_unquiesce_queue()
281 mutex_lock(&set->tag_list_lock); in blk_mq_quiesce_tagset()
282 list_for_each_entry(q, &set->tag_list, tag_set_list) { in blk_mq_quiesce_tagset()
287 mutex_unlock(&set->tag_list_lock); in blk_mq_quiesce_tagset()
295 mutex_lock(&set->tag_list_lock); in blk_mq_unquiesce_tagset()
296 list_for_each_entry(q, &set->tag_list, tag_set_list) { in blk_mq_unquiesce_tagset()
300 mutex_unlock(&set->tag_list_lock); in blk_mq_unquiesce_tagset()
311 blk_mq_tag_wakeup_all(hctx->tags, true); in blk_mq_wake_waiters()
318 INIT_LIST_HEAD(&rq->queuelist); in blk_rq_init()
319 rq->q = q; in blk_rq_init()
320 rq->__sector = (sector_t) -1; in blk_rq_init()
321 INIT_HLIST_NODE(&rq->hash); in blk_rq_init()
322 RB_CLEAR_NODE(&rq->rb_node); in blk_rq_init()
323 rq->tag = BLK_MQ_NO_TAG; in blk_rq_init()
324 rq->internal_tag = BLK_MQ_NO_TAG; in blk_rq_init()
325 rq->start_time_ns = ktime_get_ns(); in blk_rq_init()
326 rq->part = NULL; in blk_rq_init()
335 rq->start_time_ns = ktime_get_ns(); in blk_mq_rq_time_init()
337 rq->start_time_ns = 0; in blk_mq_rq_time_init()
340 if (blk_queue_rq_alloc_time(rq->q)) in blk_mq_rq_time_init()
341 rq->alloc_time_ns = alloc_time_ns ?: rq->start_time_ns; in blk_mq_rq_time_init()
343 rq->alloc_time_ns = 0; in blk_mq_rq_time_init()
350 struct blk_mq_ctx *ctx = data->ctx; in blk_mq_rq_ctx_init()
351 struct blk_mq_hw_ctx *hctx = data->hctx; in blk_mq_rq_ctx_init()
352 struct request_queue *q = data->q; in blk_mq_rq_ctx_init()
353 struct request *rq = tags->static_rqs[tag]; in blk_mq_rq_ctx_init()
355 rq->q = q; in blk_mq_rq_ctx_init()
356 rq->mq_ctx = ctx; in blk_mq_rq_ctx_init()
357 rq->mq_hctx = hctx; in blk_mq_rq_ctx_init()
358 rq->cmd_flags = data->cmd_flags; in blk_mq_rq_ctx_init()
360 if (data->flags & BLK_MQ_REQ_PM) in blk_mq_rq_ctx_init()
361 data->rq_flags |= RQF_PM; in blk_mq_rq_ctx_init()
363 data->rq_flags |= RQF_IO_STAT; in blk_mq_rq_ctx_init()
364 rq->rq_flags = data->rq_flags; in blk_mq_rq_ctx_init()
366 if (data->rq_flags & RQF_SCHED_TAGS) { in blk_mq_rq_ctx_init()
367 rq->tag = BLK_MQ_NO_TAG; in blk_mq_rq_ctx_init()
368 rq->internal_tag = tag; in blk_mq_rq_ctx_init()
370 rq->tag = tag; in blk_mq_rq_ctx_init()
371 rq->internal_tag = BLK_MQ_NO_TAG; in blk_mq_rq_ctx_init()
373 rq->timeout = 0; in blk_mq_rq_ctx_init()
375 rq->part = NULL; in blk_mq_rq_ctx_init()
376 rq->io_start_time_ns = 0; in blk_mq_rq_ctx_init()
377 rq->stats_sectors = 0; in blk_mq_rq_ctx_init()
378 rq->nr_phys_segments = 0; in blk_mq_rq_ctx_init()
380 rq->nr_integrity_segments = 0; in blk_mq_rq_ctx_init()
382 rq->end_io = NULL; in blk_mq_rq_ctx_init()
383 rq->end_io_data = NULL; in blk_mq_rq_ctx_init()
386 INIT_LIST_HEAD(&rq->queuelist); in blk_mq_rq_ctx_init()
388 WRITE_ONCE(rq->deadline, 0); in blk_mq_rq_ctx_init()
391 if (rq->rq_flags & RQF_USE_SCHED) { in blk_mq_rq_ctx_init()
392 struct elevator_queue *e = data->q->elevator; in blk_mq_rq_ctx_init()
394 INIT_HLIST_NODE(&rq->hash); in blk_mq_rq_ctx_init()
395 RB_CLEAR_NODE(&rq->rb_node); in blk_mq_rq_ctx_init()
397 if (e->type->ops.prepare_request) in blk_mq_rq_ctx_init()
398 e->type->ops.prepare_request(rq); in blk_mq_rq_ctx_init()
413 tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset); in __blk_mq_alloc_requests_batch()
422 prefetch(tags->static_rqs[tag]); in __blk_mq_alloc_requests_batch()
425 rq_list_add(data->cached_rq, rq); in __blk_mq_alloc_requests_batch()
428 if (!(data->rq_flags & RQF_SCHED_TAGS)) in __blk_mq_alloc_requests_batch()
429 blk_mq_add_active_requests(data->hctx, nr); in __blk_mq_alloc_requests_batch()
431 percpu_ref_get_many(&data->q->q_usage_counter, nr - 1); in __blk_mq_alloc_requests_batch()
432 data->nr_tags -= nr; in __blk_mq_alloc_requests_batch()
434 return rq_list_pop(data->cached_rq); in __blk_mq_alloc_requests_batch()
439 struct request_queue *q = data->q; in __blk_mq_alloc_requests()
448 if (data->cmd_flags & REQ_NOWAIT) in __blk_mq_alloc_requests()
449 data->flags |= BLK_MQ_REQ_NOWAIT; in __blk_mq_alloc_requests()
451 if (q->elevator) { in __blk_mq_alloc_requests()
454 * enabled for the queue. in __blk_mq_alloc_requests()
456 data->rq_flags |= RQF_SCHED_TAGS; in __blk_mq_alloc_requests()
462 if ((data->cmd_flags & REQ_OP_MASK) != REQ_OP_FLUSH && in __blk_mq_alloc_requests()
463 !blk_op_is_passthrough(data->cmd_flags)) { in __blk_mq_alloc_requests()
464 struct elevator_mq_ops *ops = &q->elevator->type->ops; in __blk_mq_alloc_requests()
466 WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED); in __blk_mq_alloc_requests()
468 data->rq_flags |= RQF_USE_SCHED; in __blk_mq_alloc_requests()
469 if (ops->limit_depth) in __blk_mq_alloc_requests()
470 ops->limit_depth(data->cmd_flags, data); in __blk_mq_alloc_requests()
475 data->ctx = blk_mq_get_ctx(q); in __blk_mq_alloc_requests()
476 data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx); in __blk_mq_alloc_requests()
477 if (!(data->rq_flags & RQF_SCHED_TAGS)) in __blk_mq_alloc_requests()
478 blk_mq_tag_busy(data->hctx); in __blk_mq_alloc_requests()
480 if (data->flags & BLK_MQ_REQ_RESERVED) in __blk_mq_alloc_requests()
481 data->rq_flags |= RQF_RESV; in __blk_mq_alloc_requests()
486 if (data->nr_tags > 1) { in __blk_mq_alloc_requests()
492 data->nr_tags = 1; in __blk_mq_alloc_requests()
502 if (data->flags & BLK_MQ_REQ_NOWAIT) in __blk_mq_alloc_requests()
514 if (!(data->rq_flags & RQF_SCHED_TAGS)) in __blk_mq_alloc_requests()
515 blk_mq_inc_active_requests(data->hctx); in __blk_mq_alloc_requests()
530 .nr_tags = plug->nr_ios, in blk_mq_rq_cache_fill()
531 .cached_rq = &plug->cached_rq, in blk_mq_rq_cache_fill()
538 plug->nr_ios = 1; in blk_mq_rq_cache_fill()
550 struct blk_plug *plug = current->plug; in blk_mq_alloc_cached_request()
556 if (rq_list_empty(plug->cached_rq)) { in blk_mq_alloc_cached_request()
557 if (plug->nr_ios == 1) in blk_mq_alloc_cached_request()
563 rq = rq_list_peek(&plug->cached_rq); in blk_mq_alloc_cached_request()
564 if (!rq || rq->q != q) in blk_mq_alloc_cached_request()
567 if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type) in blk_mq_alloc_cached_request()
569 if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) in blk_mq_alloc_cached_request()
572 plug->cached_rq = rq_list_next(rq); in blk_mq_alloc_cached_request()
576 rq->cmd_flags = opf; in blk_mq_alloc_cached_request()
577 INIT_LIST_HEAD(&rq->queuelist); in blk_mq_alloc_cached_request()
604 rq->__data_len = 0; in blk_mq_alloc_request()
605 rq->__sector = (sector_t) -1; in blk_mq_alloc_request()
606 rq->bio = rq->biotail = NULL; in blk_mq_alloc_request()
610 return ERR_PTR(-EWOULDBLOCK); in blk_mq_alloc_request()
637 * a specific queue. in blk_mq_alloc_request_hctx()
641 return ERR_PTR(-EINVAL); in blk_mq_alloc_request_hctx()
643 if (hctx_idx >= q->nr_hw_queues) in blk_mq_alloc_request_hctx()
644 return ERR_PTR(-EIO); in blk_mq_alloc_request_hctx()
652 * If not tell the caller that it should skip this queue. in blk_mq_alloc_request_hctx()
654 ret = -EXDEV; in blk_mq_alloc_request_hctx()
655 data.hctx = xa_load(&q->hctx_table, hctx_idx); in blk_mq_alloc_request_hctx()
658 cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask); in blk_mq_alloc_request_hctx()
663 if (q->elevator) in blk_mq_alloc_request_hctx()
671 ret = -EWOULDBLOCK; in blk_mq_alloc_request_hctx()
679 rq->__data_len = 0; in blk_mq_alloc_request_hctx()
680 rq->__sector = (sector_t) -1; in blk_mq_alloc_request_hctx()
681 rq->bio = rq->biotail = NULL; in blk_mq_alloc_request_hctx()
692 struct request_queue *q = rq->q; in blk_mq_finish_request()
694 if (rq->rq_flags & RQF_USE_SCHED) { in blk_mq_finish_request()
695 q->elevator->type->ops.finish_request(rq); in blk_mq_finish_request()
701 rq->rq_flags &= ~RQF_USE_SCHED; in blk_mq_finish_request()
707 struct request_queue *q = rq->q; in __blk_mq_free_request()
708 struct blk_mq_ctx *ctx = rq->mq_ctx; in __blk_mq_free_request()
709 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in __blk_mq_free_request()
710 const int sched_tag = rq->internal_tag; in __blk_mq_free_request()
714 rq->mq_hctx = NULL; in __blk_mq_free_request()
716 if (rq->tag != BLK_MQ_NO_TAG) { in __blk_mq_free_request()
718 blk_mq_put_tag(hctx->tags, ctx, rq->tag); in __blk_mq_free_request()
721 blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); in __blk_mq_free_request()
728 struct request_queue *q = rq->q; in blk_mq_free_request()
733 laptop_io_completion(q->disk->bdi); in blk_mq_free_request()
737 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in blk_mq_free_request()
747 while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) in blk_mq_free_plug_rqs()
754 rq->q->disk ? rq->q->disk->disk_name : "?", in blk_dump_rq_flags()
755 (__force unsigned long long) rq->cmd_flags); in blk_dump_rq_flags()
761 rq->bio, rq->biotail, blk_rq_bytes(rq)); in blk_dump_rq_flags()
769 bio->bi_status = error; in req_bio_endio()
778 if (bio->bi_iter.bi_size != nbytes) { in req_bio_endio()
779 bio->bi_status = BLK_STS_IOERR; in req_bio_endio()
780 nbytes = bio->bi_iter.bi_size; in req_bio_endio()
782 bio->bi_iter.bi_sector = rq->__sector; in req_bio_endio()
788 if (unlikely(rq->rq_flags & RQF_QUIET)) in req_bio_endio()
791 if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) in req_bio_endio()
797 if (req->part && blk_do_io_stat(req)) { in blk_account_io_completion()
801 part_stat_add(req->part, sectors[sgrp], bytes >> 9); in blk_account_io_completion()
812 req->q->disk ? req->q->disk->disk_name : "?", in blk_print_req_error()
815 (__force u32)(req->cmd_flags & ~REQ_OP_MASK), in blk_print_req_error()
816 req->nr_phys_segments, in blk_print_req_error()
817 IOPRIO_PRIO_CLASS(req->ioprio)); in blk_print_req_error()
826 const bool is_flush = (req->rq_flags & RQF_FLUSH_SEQ) != 0; in blk_complete_request()
828 struct bio *bio = req->bio; in blk_complete_request()
837 req->q->integrity.profile->complete_fn(req, total_bytes); in blk_complete_request()
849 struct bio *next = bio->bi_next; in blk_complete_request()
855 bio->bi_iter.bi_sector = req->__sector; in blk_complete_request()
867 if (!req->end_io) { in blk_complete_request()
868 req->bio = NULL; in blk_complete_request()
869 req->__data_len = 0; in blk_complete_request()
874 * blk_update_request - Complete multiple bytes without completing the request
892 * %false - this request doesn't have any more data
893 * %true - this request has more data
902 if (!req->bio) in blk_update_request()
908 req->q->integrity.profile->complete_fn(req, nr_bytes); in blk_update_request()
919 !(req->rq_flags & RQF_QUIET)) && in blk_update_request()
920 !test_bit(GD_DEAD, &req->q->disk->state)) { in blk_update_request()
928 while (req->bio) { in blk_update_request()
929 struct bio *bio = req->bio; in blk_update_request()
930 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); in blk_update_request()
932 if (bio_bytes == bio->bi_iter.bi_size) in blk_update_request()
933 req->bio = bio->bi_next; in blk_update_request()
940 nr_bytes -= bio_bytes; in blk_update_request()
949 if (!req->bio) { in blk_update_request()
955 req->__data_len = 0; in blk_update_request()
959 req->__data_len -= total_bytes; in blk_update_request()
963 req->__sector += total_bytes >> 9; in blk_update_request()
966 if (req->rq_flags & RQF_MIXED_MERGE) { in blk_update_request()
967 req->cmd_flags &= ~REQ_FAILFAST_MASK; in blk_update_request()
968 req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; in blk_update_request()
971 if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { in blk_update_request()
978 req->__data_len = blk_rq_cur_bytes(req); in blk_update_request()
982 req->nr_phys_segments = blk_recalc_rq_segments(req); in blk_update_request()
998 if (blk_do_io_stat(req) && req->part && in blk_account_io_done()
999 !(req->rq_flags & RQF_FLUSH_SEQ)) { in blk_account_io_done()
1003 update_io_ticks(req->part, jiffies, true); in blk_account_io_done()
1004 part_stat_inc(req->part, ios[sgrp]); in blk_account_io_done()
1005 part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); in blk_account_io_done()
1016 * All non-passthrough requests are created from a bio with one in blk_account_io_start()
1018 * generated by the state machine in blk-flush.c is cloned onto the in blk_account_io_start()
1019 * lower device by dm-multipath we can get here without a bio. in blk_account_io_start()
1021 if (req->bio) in blk_account_io_start()
1022 req->part = req->bio->bi_bdev; in blk_account_io_start()
1024 req->part = req->q->disk->part0; in blk_account_io_start()
1027 update_io_ticks(req->part, jiffies, false); in blk_account_io_start()
1034 if (rq->rq_flags & RQF_STATS) in __blk_mq_end_request_acct()
1048 if (rq->end_io) { in __blk_mq_end_request()
1049 rq_qos_done(rq->q, rq); in __blk_mq_end_request()
1050 if (rq->end_io(rq, error) == RQ_END_IO_FREE) in __blk_mq_end_request()
1071 struct request_queue *q = hctx->queue; in blk_mq_flush_tag_batch()
1075 blk_mq_put_tags(hctx->tags, tag_array, nr_tags); in blk_mq_flush_tag_batch()
1076 percpu_ref_put_many(&q->q_usage_counter, nr_tags); in blk_mq_flush_tag_batch()
1086 if (iob->need_ts) in blk_mq_end_request_batch()
1089 while ((rq = rq_list_pop(&iob->req_list)) != NULL) { in blk_mq_end_request_batch()
1090 prefetch(rq->bio); in blk_mq_end_request_batch()
1091 prefetch(rq->rq_next); in blk_mq_end_request_batch()
1094 if (iob->need_ts) in blk_mq_end_request_batch()
1099 rq_qos_done(rq->q, rq); in blk_mq_end_request_batch()
1105 if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE) in blk_mq_end_request_batch()
1108 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in blk_mq_end_request_batch()
1115 if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) { in blk_mq_end_request_batch()
1119 cur_hctx = rq->mq_hctx; in blk_mq_end_request_batch()
1121 tags[nr_tags++] = rq->tag; in blk_mq_end_request_batch()
1135 rq->q->mq_ops->complete(rq); in blk_complete_reqs()
1159 !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) in blk_mq_complete_need_ipi()
1171 if (cpu == rq->mq_ctx->cpu || in blk_mq_complete_need_ipi()
1172 (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) && in blk_mq_complete_need_ipi()
1173 cpus_share_cache(cpu, rq->mq_ctx->cpu))) in blk_mq_complete_need_ipi()
1177 return cpu_online(rq->mq_ctx->cpu); in blk_mq_complete_need_ipi()
1184 cpu = rq->mq_ctx->cpu; in blk_mq_complete_send_ipi()
1185 if (llist_add(&rq->ipi_list, &per_cpu(blk_cpu_done, cpu))) in blk_mq_complete_send_ipi()
1195 if (llist_add(&rq->ipi_list, list)) in blk_mq_raise_softirq()
1202 WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); in blk_mq_complete_request_remote()
1209 if ((rq->mq_hctx->nr_ctx == 1 && in blk_mq_complete_request_remote()
1210 rq->mq_ctx->cpu == raw_smp_processor_id()) || in blk_mq_complete_request_remote()
1211 rq->cmd_flags & REQ_POLLED) in blk_mq_complete_request_remote()
1219 if (rq->q->nr_hw_queues == 1) { in blk_mq_complete_request_remote()
1228 * blk_mq_complete_request - end I/O on a request
1232 * Complete a request by scheduling the ->complete_rq operation.
1237 rq->q->mq_ops->complete(rq); in blk_mq_complete_request()
1242 * blk_mq_start_request - Start processing a request
1251 struct request_queue *q = rq->q; in blk_mq_start_request()
1255 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) && in blk_mq_start_request()
1257 rq->io_start_time_ns = ktime_get_ns(); in blk_mq_start_request()
1258 rq->stats_sectors = blk_rq_sectors(rq); in blk_mq_start_request()
1259 rq->rq_flags |= RQF_STATS; in blk_mq_start_request()
1266 WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); in blk_mq_start_request()
1267 rq->mq_hctx->tags->rqs[rq->tag] = rq; in blk_mq_start_request()
1271 q->integrity.profile->prepare_fn(rq); in blk_mq_start_request()
1273 if (rq->bio && rq->bio->bi_opf & REQ_POLLED) in blk_mq_start_request()
1274 WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num); in blk_mq_start_request()
1279 * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
1285 if (plug->multiple_queues) in blk_plug_max_rq_count()
1292 struct request *last = rq_list_peek(&plug->mq_list); in blk_add_rq_to_plug()
1294 if (!plug->rq_count) { in blk_add_rq_to_plug()
1295 trace_block_plug(rq->q); in blk_add_rq_to_plug()
1296 } else if (plug->rq_count >= blk_plug_max_rq_count(plug) || in blk_add_rq_to_plug()
1297 (!blk_queue_nomerges(rq->q) && in blk_add_rq_to_plug()
1301 trace_block_plug(rq->q); in blk_add_rq_to_plug()
1304 if (!plug->multiple_queues && last && last->q != rq->q) in blk_add_rq_to_plug()
1305 plug->multiple_queues = true; in blk_add_rq_to_plug()
1308 * ->queue_rqs() directly in blk_add_rq_to_plug()
1310 if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS)) in blk_add_rq_to_plug()
1311 plug->has_elevator = true; in blk_add_rq_to_plug()
1312 rq->rq_next = NULL; in blk_add_rq_to_plug()
1313 rq_list_add(&plug->mq_list, rq); in blk_add_rq_to_plug()
1314 plug->rq_count++; in blk_add_rq_to_plug()
1318 * blk_execute_rq_nowait - insert a request to I/O scheduler for execution
1320 * @at_head: insert request at head or tail of queue
1323 * Insert a fully prepared request at the back of the I/O scheduler queue
1324 * for execution. Don't wait for completion.
1327 * This function will invoke @done directly if the queue is dead.
1331 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_execute_rq_nowait()
1343 if (current->plug && !at_head) { in blk_execute_rq_nowait()
1344 blk_add_rq_to_plug(current->plug, rq); in blk_execute_rq_nowait()
1349 blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING); in blk_execute_rq_nowait()
1360 struct blk_rq_wait *wait = rq->end_io_data; in blk_end_sync_rq() local
1362 wait->ret = ret; in blk_end_sync_rq()
1363 complete(&wait->done); in blk_end_sync_rq()
1369 if (!rq->mq_hctx) in blk_rq_is_poll()
1371 if (rq->mq_hctx->type != HCTX_TYPE_POLL) in blk_rq_is_poll()
1377 static void blk_rq_poll_completion(struct request *rq, struct completion *wait) in blk_rq_poll_completion() argument
1380 blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0); in blk_rq_poll_completion()
1382 } while (!completion_done(wait)); in blk_rq_poll_completion()
1386 * blk_execute_rq - insert a request into queue for execution
1388 * @at_head: insert request at head or tail of queue
1391 * Insert a fully prepared request at the back of the I/O scheduler queue
1392 * for execution and wait for completion.
1397 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_execute_rq()
1398 struct blk_rq_wait wait = { in blk_execute_rq() local
1399 .done = COMPLETION_INITIALIZER_ONSTACK(wait.done), in blk_execute_rq()
1405 rq->end_io_data = &wait; in blk_execute_rq()
1406 rq->end_io = blk_end_sync_rq; in blk_execute_rq()
1413 blk_rq_poll_completion(rq, &wait.done); in blk_execute_rq()
1422 while (!wait_for_completion_io_timeout(&wait.done, in blk_execute_rq()
1426 wait_for_completion_io(&wait.done); in blk_execute_rq()
1429 return wait.ret; in blk_execute_rq()
1435 struct request_queue *q = rq->q; in __blk_mq_requeue_request()
1443 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in __blk_mq_requeue_request()
1444 rq->rq_flags &= ~RQF_TIMED_OUT; in __blk_mq_requeue_request()
1450 struct request_queue *q = rq->q; in blk_mq_requeue_request()
1455 /* this request will be re-inserted to io scheduler queue */ in blk_mq_requeue_request()
1458 spin_lock_irqsave(&q->requeue_lock, flags); in blk_mq_requeue_request()
1459 list_add_tail(&rq->queuelist, &q->requeue_list); in blk_mq_requeue_request()
1460 spin_unlock_irqrestore(&q->requeue_lock, flags); in blk_mq_requeue_request()
1475 spin_lock_irq(&q->requeue_lock); in blk_mq_requeue_work()
1476 list_splice_init(&q->requeue_list, &rq_list); in blk_mq_requeue_work()
1477 list_splice_init(&q->flush_list, &flush_list); in blk_mq_requeue_work()
1478 spin_unlock_irq(&q->requeue_lock); in blk_mq_requeue_work()
1484 * driver already and might have driver-specific data allocated in blk_mq_requeue_work()
1488 if (rq->rq_flags & RQF_DONTPREP) { in blk_mq_requeue_work()
1489 list_del_init(&rq->queuelist); in blk_mq_requeue_work()
1492 list_del_init(&rq->queuelist); in blk_mq_requeue_work()
1499 list_del_init(&rq->queuelist); in blk_mq_requeue_work()
1508 kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0); in blk_mq_kick_requeue_list()
1515 kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, in blk_mq_delay_kick_requeue_list()
1522 return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq); in blk_is_flush_data_rq()
1528 * If we find a request that isn't idle we know the queue is busy in blk_mq_rq_inflight()
1532 * In case of queue quiesce, if one flush data request is completed, in blk_mq_rq_inflight()
1537 if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) && in blk_mq_rq_inflight()
1560 req->rq_flags |= RQF_TIMED_OUT; in blk_mq_rq_timed_out()
1561 if (req->q->mq_ops->timeout) { in blk_mq_rq_timed_out()
1564 ret = req->q->mq_ops->timeout(req); in blk_mq_rq_timed_out()
1585 if (rq->rq_flags & RQF_TIMED_OUT) in blk_mq_req_expired()
1588 deadline = READ_ONCE(rq->deadline); in blk_mq_req_expired()
1589 if (time_after_eq(expired->timeout_start, deadline)) in blk_mq_req_expired()
1592 if (expired->next == 0) in blk_mq_req_expired()
1593 expired->next = deadline; in blk_mq_req_expired()
1594 else if (time_after(expired->next, deadline)) in blk_mq_req_expired()
1595 expired->next = deadline; in blk_mq_req_expired()
1602 if (rq->end_io(rq, 0) == RQ_END_IO_FREE) in blk_mq_put_rq_ref()
1621 expired->has_timedout_rq = true; in blk_mq_check_expired()
1647 * timeout at the same time a queue freeze is waiting in blk_mq_timeout_work()
1649 * acquire the queue reference here. in blk_mq_timeout_work()
1653 * obtain a reference even in the short window between the queue in blk_mq_timeout_work()
1659 if (!percpu_ref_tryget(&q->q_usage_counter)) in blk_mq_timeout_work()
1662 /* check if there is any timed-out request */ in blk_mq_timeout_work()
1668 * uses srcu or rcu, wait for a synchronization point to in blk_mq_timeout_work()
1671 blk_mq_wait_quiesce_done(q->tag_set); in blk_mq_timeout_work()
1678 mod_timer(&q->timeout, expired.next); in blk_mq_timeout_work()
1703 struct blk_mq_hw_ctx *hctx = flush_data->hctx; in flush_busy_ctx()
1704 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; in flush_busy_ctx()
1705 enum hctx_type type = hctx->type; in flush_busy_ctx()
1707 spin_lock(&ctx->lock); in flush_busy_ctx()
1708 list_splice_tail_init(&ctx->rq_lists[type], flush_data->list); in flush_busy_ctx()
1710 spin_unlock(&ctx->lock); in flush_busy_ctx()
1716 * to the for-dispatch
1725 sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); in blk_mq_flush_busy_ctxs()
1738 struct blk_mq_hw_ctx *hctx = dispatch_data->hctx; in dispatch_rq_from_ctx()
1739 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; in dispatch_rq_from_ctx()
1740 enum hctx_type type = hctx->type; in dispatch_rq_from_ctx()
1742 spin_lock(&ctx->lock); in dispatch_rq_from_ctx()
1743 if (!list_empty(&ctx->rq_lists[type])) { in dispatch_rq_from_ctx()
1744 dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next); in dispatch_rq_from_ctx()
1745 list_del_init(&dispatch_data->rq->queuelist); in dispatch_rq_from_ctx()
1746 if (list_empty(&ctx->rq_lists[type])) in dispatch_rq_from_ctx()
1749 spin_unlock(&ctx->lock); in dispatch_rq_from_ctx()
1751 return !dispatch_data->rq; in dispatch_rq_from_ctx()
1757 unsigned off = start ? start->index_hw[hctx->type] : 0; in blk_mq_dequeue_from_ctx()
1763 __sbitmap_for_each_set(&hctx->ctx_map, off, in blk_mq_dequeue_from_ctx()
1771 struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; in __blk_mq_alloc_driver_tag()
1772 unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; in __blk_mq_alloc_driver_tag()
1775 blk_mq_tag_busy(rq->mq_hctx); in __blk_mq_alloc_driver_tag()
1777 if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { in __blk_mq_alloc_driver_tag()
1778 bt = &rq->mq_hctx->tags->breserved_tags; in __blk_mq_alloc_driver_tag()
1781 if (!hctx_may_queue(rq->mq_hctx, bt)) in __blk_mq_alloc_driver_tag()
1789 rq->tag = tag + tag_offset; in __blk_mq_alloc_driver_tag()
1790 blk_mq_inc_active_requests(rq->mq_hctx); in __blk_mq_alloc_driver_tag()
1794 static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, in blk_mq_dispatch_wake() argument
1799 hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); in blk_mq_dispatch_wake()
1801 spin_lock(&hctx->dispatch_wait_lock); in blk_mq_dispatch_wake()
1802 if (!list_empty(&wait->entry)) { in blk_mq_dispatch_wake()
1805 list_del_init(&wait->entry); in blk_mq_dispatch_wake()
1806 sbq = &hctx->tags->bitmap_tags; in blk_mq_dispatch_wake()
1807 atomic_dec(&sbq->ws_active); in blk_mq_dispatch_wake()
1809 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_dispatch_wake()
1817 * the tag wakeups. For non-shared tags, we can simply mark us needing a
1826 wait_queue_entry_t *wait; in blk_mq_mark_tag_wait() local
1829 if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && in blk_mq_mark_tag_wait()
1830 !(blk_mq_is_shared_tags(hctx->flags))) { in blk_mq_mark_tag_wait()
1835 * allocation failure and adding the hardware queue to the wait in blk_mq_mark_tag_wait()
1836 * queue. in blk_mq_mark_tag_wait()
1839 * At most this will cost an extra queue run. in blk_mq_mark_tag_wait()
1844 wait = &hctx->dispatch_wait; in blk_mq_mark_tag_wait()
1845 if (!list_empty_careful(&wait->entry)) in blk_mq_mark_tag_wait()
1848 if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) in blk_mq_mark_tag_wait()
1849 sbq = &hctx->tags->breserved_tags; in blk_mq_mark_tag_wait()
1851 sbq = &hctx->tags->bitmap_tags; in blk_mq_mark_tag_wait()
1852 wq = &bt_wait_ptr(sbq, hctx)->wait; in blk_mq_mark_tag_wait()
1854 spin_lock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1855 spin_lock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1856 if (!list_empty(&wait->entry)) { in blk_mq_mark_tag_wait()
1857 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1858 spin_unlock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1862 atomic_inc(&sbq->ws_active); in blk_mq_mark_tag_wait()
1863 wait->flags &= ~WQ_FLAG_EXCLUSIVE; in blk_mq_mark_tag_wait()
1864 __add_wait_queue(wq, wait); in blk_mq_mark_tag_wait()
1870 * Order adding us to wait queue and allocating driver tag. in blk_mq_mark_tag_wait()
1876 * Otherwise, re-order of adding wait queue and getting driver tag in blk_mq_mark_tag_wait()
1878 * the waitqueue_active() may not observe us in wait queue. in blk_mq_mark_tag_wait()
1884 * allocation failure and adding the hardware queue to the wait in blk_mq_mark_tag_wait()
1885 * queue. in blk_mq_mark_tag_wait()
1889 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1890 spin_unlock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1895 * We got a tag, remove ourselves from the wait queue to ensure in blk_mq_mark_tag_wait()
1898 list_del_init(&wait->entry); in blk_mq_mark_tag_wait()
1899 atomic_dec(&sbq->ws_active); in blk_mq_mark_tag_wait()
1900 spin_unlock(&hctx->dispatch_wait_lock); in blk_mq_mark_tag_wait()
1901 spin_unlock_irq(&wq->lock); in blk_mq_mark_tag_wait()
1910 * - EWMA is one simple way to compute running average value
1911 * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
1912 * - take 4 as factor for avoiding to get too small(0) result, and this
1919 ewma = hctx->dispatch_busy; in blk_mq_update_dispatch_busy()
1924 ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1; in blk_mq_update_dispatch_busy()
1929 hctx->dispatch_busy = ewma; in blk_mq_update_dispatch_busy()
1937 list_add(&rq->queuelist, list); in blk_mq_handle_dev_resource()
1946 * specific zone due to LLD level zone-write locking or other zone in blk_mq_handle_zone_resource()
1950 list_add(&rq->queuelist, zone_list); in blk_mq_handle_zone_resource()
1963 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_prep_dispatch_rq()
1964 int budget_token = -1; in blk_mq_prep_dispatch_rq()
1967 budget_token = blk_mq_get_dispatch_budget(rq->q); in blk_mq_prep_dispatch_rq()
1978 * rerun the hardware queue when a tag is freed. The in blk_mq_prep_dispatch_rq()
1979 * waitqueue takes care of that. If the queue is run in blk_mq_prep_dispatch_rq()
1981 * we'll re-run it below. in blk_mq_prep_dispatch_rq()
1989 blk_mq_put_dispatch_budget(rq->q, budget_token); in blk_mq_prep_dispatch_rq()
2012 * blk_mq_commit_rqs will notify driver using bd->last that there is no
2016 * 1) did not queue everything initially scheduled to queue
2017 * 2) the last attempt to queue a request failed
2022 if (hctx->queue->mq_ops->commit_rqs && queued) { in blk_mq_commit_rqs()
2023 trace_block_unplug(hctx->queue, queued, !from_schedule); in blk_mq_commit_rqs()
2024 hctx->queue->mq_ops->commit_rqs(hctx); in blk_mq_commit_rqs()
2035 struct request_queue *q = hctx->queue; in blk_mq_dispatch_rq_list()
2054 WARN_ON_ONCE(hctx != rq->mq_hctx); in blk_mq_dispatch_rq_list()
2059 list_del_init(&rq->queuelist); in blk_mq_dispatch_rq_list()
2069 nr_budgets--; in blk_mq_dispatch_rq_list()
2070 ret = q->mq_ops->queue_rq(hctx, &bd); in blk_mq_dispatch_rq_list()
2105 * Any items that need requeuing? Stuff them into hctx->dispatch, in blk_mq_dispatch_rq_list()
2106 * that is where we will continue on next queue run. in blk_mq_dispatch_rq_list()
2110 /* For non-shared tags, the RESTART check will suffice */ in blk_mq_dispatch_rq_list()
2112 ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) || in blk_mq_dispatch_rq_list()
2113 blk_mq_is_shared_tags(hctx->flags)); in blk_mq_dispatch_rq_list()
2118 spin_lock(&hctx->lock); in blk_mq_dispatch_rq_list()
2119 list_splice_tail_init(list, &hctx->dispatch); in blk_mq_dispatch_rq_list()
2120 spin_unlock(&hctx->lock); in blk_mq_dispatch_rq_list()
2123 * Order adding requests to hctx->dispatch and checking in blk_mq_dispatch_rq_list()
2126 * miss the new added requests to hctx->dispatch, meantime in blk_mq_dispatch_rq_list()
2134 * thread and hence that a queue rerun is needed. in blk_mq_dispatch_rq_list()
2138 * waitqueue is no longer active, ensure that we run the queue in blk_mq_dispatch_rq_list()
2142 * the hardware queue got stopped and restarted before requests in blk_mq_dispatch_rq_list()
2143 * were pushed back onto the dispatch list. Rerun the queue to in blk_mq_dispatch_rq_list()
2145 * - blk_mq_run_hw_queue() checks whether or not a queue has in blk_mq_dispatch_rq_list()
2146 * been stopped before rerunning a queue. in blk_mq_dispatch_rq_list()
2147 * - Some but not all block drivers stop a queue before in blk_mq_dispatch_rq_list()
2148 * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq in blk_mq_dispatch_rq_list()
2149 * and dm-rq. in blk_mq_dispatch_rq_list()
2152 * bit is set, run queue after a delay to avoid IO stalls in blk_mq_dispatch_rq_list()
2153 * that could otherwise occur if the queue is idle. We'll do in blk_mq_dispatch_rq_list()
2161 (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) in blk_mq_dispatch_rq_list()
2176 int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask); in blk_mq_first_mapped_cpu()
2179 cpu = cpumask_first(hctx->cpumask); in blk_mq_first_mapped_cpu()
2186 * For now we just round-robin here, switching for every
2192 int next_cpu = hctx->next_cpu; in blk_mq_hctx_next_cpu()
2194 if (hctx->queue->nr_hw_queues == 1) in blk_mq_hctx_next_cpu()
2197 if (--hctx->next_cpu_batch <= 0) { in blk_mq_hctx_next_cpu()
2199 next_cpu = cpumask_next_and(next_cpu, hctx->cpumask, in blk_mq_hctx_next_cpu()
2203 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; in blk_mq_hctx_next_cpu()
2217 * Make sure to re-select CPU next time once after CPUs in blk_mq_hctx_next_cpu()
2218 * in hctx->cpumask become online again. in blk_mq_hctx_next_cpu()
2220 hctx->next_cpu = next_cpu; in blk_mq_hctx_next_cpu()
2221 hctx->next_cpu_batch = 1; in blk_mq_hctx_next_cpu()
2225 hctx->next_cpu = next_cpu; in blk_mq_hctx_next_cpu()
2230 * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
2231 * @hctx: Pointer to the hardware queue to run.
2232 * @msecs: Milliseconds of delay to wait before running the queue.
2234 * Run a hardware queue asynchronously with a delay of @msecs.
2240 kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, in blk_mq_delay_run_hw_queue()
2246 * blk_mq_run_hw_queue - Start to run a hardware queue.
2247 * @hctx: Pointer to the hardware queue to run.
2248 * @async: If we want to run the queue asynchronously.
2250 * Check if the request queue is not in a quiesced state and if there are
2251 * pending requests to be sent. If this is true, run the queue to send requests
2259 * We can't run the queue inline with interrupts disabled. in blk_mq_run_hw_queue()
2263 might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING); in blk_mq_run_hw_queue()
2266 * When queue is quiesced, we may be switching io scheduler, or in blk_mq_run_hw_queue()
2267 * updating nr_hw_queues, or other things, and we can't run queue in blk_mq_run_hw_queue()
2270 * And queue will be rerun in blk_mq_unquiesce_queue() if it is in blk_mq_run_hw_queue()
2273 __blk_mq_run_dispatch_ops(hctx->queue, false, in blk_mq_run_hw_queue()
2274 need_run = !blk_queue_quiesced(hctx->queue) && in blk_mq_run_hw_queue()
2280 if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { in blk_mq_run_hw_queue()
2285 blk_mq_run_dispatch_ops(hctx->queue, in blk_mq_run_hw_queue()
2291 * Return prefered queue to dispatch from (if any) for non-mq aware IO
2304 struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT]; in blk_mq_get_sq_hctx()
2312 * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
2313 * @q: Pointer to the request queue to run.
2314 * @async: If we want to run the queue asynchronously.
2333 !list_empty_careful(&hctx->dispatch)) in blk_mq_run_hw_queues()
2340 * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
2341 * @q: Pointer to the request queue to run.
2342 * @msecs: Milliseconds of delay to wait before running the queues.
2358 * if another hctx is re-delaying the other's work in blk_mq_delay_run_hw_queues()
2361 if (delayed_work_pending(&hctx->run_work)) in blk_mq_delay_run_hw_queues()
2369 !list_empty_careful(&hctx->dispatch)) in blk_mq_delay_run_hw_queues()
2386 cancel_delayed_work(&hctx->run_work); in blk_mq_stop_hw_queue()
2388 set_bit(BLK_MQ_S_STOPPED, &hctx->state); in blk_mq_stop_hw_queue()
2413 clear_bit(BLK_MQ_S_STOPPED, &hctx->state); in blk_mq_start_hw_queue()
2415 blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING); in blk_mq_start_hw_queue()
2434 clear_bit(BLK_MQ_S_STOPPED, &hctx->state); in blk_mq_start_stopped_hw_queue()
2446 (hctx->flags & BLK_MQ_F_BLOCKING)); in blk_mq_start_stopped_hw_queues()
2455 blk_mq_run_dispatch_ops(hctx->queue, in blk_mq_run_work_fn()
2460 * blk_mq_request_bypass_insert - Insert a request at dispatch list.
2469 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_request_bypass_insert()
2471 spin_lock(&hctx->lock); in blk_mq_request_bypass_insert()
2473 list_add(&rq->queuelist, &hctx->dispatch); in blk_mq_request_bypass_insert()
2475 list_add_tail(&rq->queuelist, &hctx->dispatch); in blk_mq_request_bypass_insert()
2476 spin_unlock(&hctx->lock); in blk_mq_request_bypass_insert()
2484 enum hctx_type type = hctx->type; in blk_mq_insert_requests()
2487 * Try to issue requests directly if the hw queue isn't busy to save an in blk_mq_insert_requests()
2488 * extra enqueue & dequeue to the sw queue. in blk_mq_insert_requests()
2490 if (!hctx->dispatch_busy && !run_queue_async) { in blk_mq_insert_requests()
2491 blk_mq_run_dispatch_ops(hctx->queue, in blk_mq_insert_requests()
2498 * preemption doesn't flush plug list, so it's possible ctx->cpu is in blk_mq_insert_requests()
2502 BUG_ON(rq->mq_ctx != ctx); in blk_mq_insert_requests()
2504 if (rq->cmd_flags & REQ_NOWAIT) in blk_mq_insert_requests()
2508 spin_lock(&ctx->lock); in blk_mq_insert_requests()
2509 list_splice_tail_init(list, &ctx->rq_lists[type]); in blk_mq_insert_requests()
2511 spin_unlock(&ctx->lock); in blk_mq_insert_requests()
2518 struct request_queue *q = rq->q; in blk_mq_insert_request()
2519 struct blk_mq_ctx *ctx = rq->mq_ctx; in blk_mq_insert_request()
2520 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_insert_request()
2524 * Passthrough request have to be added to hctx->dispatch in blk_mq_insert_request()
2527 * them, which gets them added to hctx->dispatch. in blk_mq_insert_request()
2530 * and it is added to the scheduler queue, there is no chance to in blk_mq_insert_request()
2531 * dispatch it given we prioritize requests in hctx->dispatch. in blk_mq_insert_request()
2536 * Firstly normal IO request is inserted to scheduler queue or in blk_mq_insert_request()
2537 * sw queue, meantime we add flush request to dispatch queue( in blk_mq_insert_request()
2538 * hctx->dispatch) directly and there is at most one in-flight in blk_mq_insert_request()
2539 * flush request for each hw queue, so it doesn't matter to add in blk_mq_insert_request()
2540 * flush request to tail or front of the dispatch queue. in blk_mq_insert_request()
2542 * Secondly in case of NCQ, flush request belongs to non-NCQ in blk_mq_insert_request()
2544 * in-flight normal IO request(NCQ command). When adding flush in blk_mq_insert_request()
2545 * rq to the front of hctx->dispatch, it is easier to introduce in blk_mq_insert_request()
2547 * compared with adding to the tail of dispatch queue, then in blk_mq_insert_request()
2551 * drive when adding flush rq to the front of hctx->dispatch. in blk_mq_insert_request()
2553 * Simply queue flush rq to the front of hctx->dispatch so that in blk_mq_insert_request()
2557 } else if (q->elevator) { in blk_mq_insert_request()
2560 WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG); in blk_mq_insert_request()
2562 list_add(&rq->queuelist, &list); in blk_mq_insert_request()
2563 q->elevator->type->ops.insert_requests(hctx, &list, flags); in blk_mq_insert_request()
2567 spin_lock(&ctx->lock); in blk_mq_insert_request()
2569 list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]); in blk_mq_insert_request()
2571 list_add_tail(&rq->queuelist, in blk_mq_insert_request()
2572 &ctx->rq_lists[hctx->type]); in blk_mq_insert_request()
2574 spin_unlock(&ctx->lock); in blk_mq_insert_request()
2583 if (bio->bi_opf & REQ_RAHEAD) in blk_mq_bio_to_request()
2584 rq->cmd_flags |= REQ_FAILFAST_MASK; in blk_mq_bio_to_request()
2586 rq->__sector = bio->bi_iter.bi_sector; in blk_mq_bio_to_request()
2599 struct request_queue *q = rq->q; in __blk_mq_issue_directly()
2607 * For OK queue, we are done. For error, caller may kill it. in __blk_mq_issue_directly()
2611 ret = q->mq_ops->queue_rq(hctx, &bd); in __blk_mq_issue_directly()
2633 budget_token = blk_mq_get_dispatch_budget(rq->q); in blk_mq_get_budget_and_tag()
2638 blk_mq_put_dispatch_budget(rq->q, budget_token); in blk_mq_get_budget_and_tag()
2645 * blk_mq_try_issue_directly - Try to send a request directly to device driver.
2646 * @hctx: Pointer of the associated hardware queue.
2650 * request directly to device driver. Else, insert at hctx->dispatch queue, so
2652 * queue have higher priority.
2659 if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { in blk_mq_try_issue_directly()
2664 if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) { in blk_mq_try_issue_directly()
2666 blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT); in blk_mq_try_issue_directly()
2687 struct blk_mq_hw_ctx *hctx = rq->mq_hctx; in blk_mq_request_issue_directly()
2689 if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { in blk_mq_request_issue_directly()
2706 while ((rq = rq_list_pop(&plug->mq_list))) { in blk_mq_plug_issue_direct()
2707 bool last = rq_list_empty(plug->mq_list); in blk_mq_plug_issue_direct()
2709 if (hctx != rq->mq_hctx) { in blk_mq_plug_issue_direct()
2714 hctx = rq->mq_hctx; in blk_mq_plug_issue_direct()
2743 q->mq_ops->queue_rqs(&plug->mq_list); in __blk_mq_flush_plug_list()
2757 struct request *rq = rq_list_pop(&plug->mq_list); in blk_mq_dispatch_plug_list()
2760 this_hctx = rq->mq_hctx; in blk_mq_dispatch_plug_list()
2761 this_ctx = rq->mq_ctx; in blk_mq_dispatch_plug_list()
2763 } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx || in blk_mq_dispatch_plug_list()
2768 list_add(&rq->queuelist, &list); in blk_mq_dispatch_plug_list()
2770 } while (!rq_list_empty(plug->mq_list)); in blk_mq_dispatch_plug_list()
2772 plug->mq_list = requeue_list; in blk_mq_dispatch_plug_list()
2773 trace_block_unplug(this_hctx->queue, depth, !from_sched); in blk_mq_dispatch_plug_list()
2775 percpu_ref_get(&this_hctx->queue->q_usage_counter); in blk_mq_dispatch_plug_list()
2778 spin_lock(&this_hctx->lock); in blk_mq_dispatch_plug_list()
2779 list_splice_tail_init(&list, &this_hctx->dispatch); in blk_mq_dispatch_plug_list()
2780 spin_unlock(&this_hctx->lock); in blk_mq_dispatch_plug_list()
2782 } else if (this_hctx->queue->elevator) { in blk_mq_dispatch_plug_list()
2783 this_hctx->queue->elevator->type->ops.insert_requests(this_hctx, in blk_mq_dispatch_plug_list()
2789 percpu_ref_put(&this_hctx->queue->q_usage_counter); in blk_mq_dispatch_plug_list()
2798 * plug->mq_list via a schedule() in the driver's queue_rq() callback. in blk_mq_flush_plug_list()
2803 if (plug->rq_count == 0) in blk_mq_flush_plug_list()
2805 plug->rq_count = 0; in blk_mq_flush_plug_list()
2807 if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) { in blk_mq_flush_plug_list()
2810 rq = rq_list_peek(&plug->mq_list); in blk_mq_flush_plug_list()
2811 q = rq->q; in blk_mq_flush_plug_list()
2814 * Peek first request and see if we have a ->queue_rqs() hook. in blk_mq_flush_plug_list()
2817 * same queue, caller must ensure that's the case. in blk_mq_flush_plug_list()
2819 if (q->mq_ops->queue_rqs) { in blk_mq_flush_plug_list()
2822 if (rq_list_empty(plug->mq_list)) in blk_mq_flush_plug_list()
2828 if (rq_list_empty(plug->mq_list)) in blk_mq_flush_plug_list()
2834 } while (!rq_list_empty(plug->mq_list)); in blk_mq_flush_plug_list()
2847 list_del_init(&rq->queuelist); in blk_mq_try_issue_list_directly()
2890 .cmd_flags = bio->bi_opf, in blk_mq_get_new_requests()
2900 data.nr_tags = plug->nr_ios; in blk_mq_get_new_requests()
2901 plug->nr_ios = 1; in blk_mq_get_new_requests()
2902 data.cached_rq = &plug->cached_rq; in blk_mq_get_new_requests()
2909 if (bio->bi_opf & REQ_NOWAIT) in blk_mq_get_new_requests()
2921 enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf); in blk_mq_use_cached_rq()
2922 enum hctx_type hctx_type = rq->mq_hctx->type; in blk_mq_use_cached_rq()
2924 WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq); in blk_mq_use_cached_rq()
2929 if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf)) in blk_mq_use_cached_rq()
2933 * If any qos ->throttle() end up blocking, we will have flushed the in blk_mq_use_cached_rq()
2937 plug->cached_rq = rq_list_next(rq); in blk_mq_use_cached_rq()
2938 rq_qos_throttle(rq->q, bio); in blk_mq_use_cached_rq()
2941 rq->cmd_flags = bio->bi_opf; in blk_mq_use_cached_rq()
2942 INIT_LIST_HEAD(&rq->queuelist); in blk_mq_use_cached_rq()
2947 * blk_mq_submit_bio - Create and send a request to block device.
2953 * * We want to place request at plug queue for possible future merging
2954 * * There is an IO scheduler active at this queue
2956 * It will not queue the request if there is an error with the bio, or at the
2961 struct request_queue *q = bdev_get_queue(bio->bi_bdev); in blk_mq_submit_bio()
2963 const int is_sync = op_is_sync(bio->bi_opf); in blk_mq_submit_bio()
2972 rq = rq_list_peek(&plug->cached_rq); in blk_mq_submit_bio()
2973 if (rq && rq->q != q) in blk_mq_submit_bio()
2977 if (unlikely(bio_may_exceed_limits(bio, &q->limits))) { in blk_mq_submit_bio()
2978 bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); in blk_mq_submit_bio()
2988 percpu_ref_get(&q->q_usage_counter); in blk_mq_submit_bio()
2992 if (unlikely(bio_may_exceed_limits(bio, &q->limits))) { in blk_mq_submit_bio()
2993 bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); in blk_mq_submit_bio()
3017 bio->bi_status = ret; in blk_mq_submit_bio()
3023 if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq)) in blk_mq_submit_bio()
3031 hctx = rq->mq_hctx; in blk_mq_submit_bio()
3032 if ((rq->rq_flags & RQF_USE_SCHED) || in blk_mq_submit_bio()
3033 (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) { in blk_mq_submit_bio()
3043 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
3048 struct request_queue *q = rq->q; in blk_insert_cloned_request()
3057 * a non-read/write command (discard, write same,etc.) the in blk_insert_cloned_request()
3058 * low-level device driver will set the relevant queue limit to in blk_insert_cloned_request()
3059 * 0 to prevent blk-lib from issuing more of the offending in blk_insert_cloned_request()
3060 * operations. Commands queued prior to the queue limit being in blk_insert_cloned_request()
3073 * The queue settings related to segment counting may differ from the in blk_insert_cloned_request()
3074 * original queue. in blk_insert_cloned_request()
3076 rq->nr_phys_segments = blk_recalc_rq_segments(rq); in blk_insert_cloned_request()
3077 if (rq->nr_phys_segments > max_segments) { in blk_insert_cloned_request()
3079 __func__, rq->nr_phys_segments, max_segments); in blk_insert_cloned_request()
3083 if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq))) in blk_insert_cloned_request()
3106 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
3116 while ((bio = rq->bio) != NULL) { in blk_rq_unprep_clone()
3117 rq->bio = bio->bi_next; in blk_rq_unprep_clone()
3125 * blk_rq_prep_clone - Helper function to setup clone request
3152 bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask, in blk_rq_prep_clone()
3160 if (rq->bio) { in blk_rq_prep_clone()
3161 rq->biotail->bi_next = bio; in blk_rq_prep_clone()
3162 rq->biotail = bio; in blk_rq_prep_clone()
3164 rq->bio = rq->biotail = bio; in blk_rq_prep_clone()
3170 rq->__sector = blk_rq_pos(rq_src); in blk_rq_prep_clone()
3171 rq->__data_len = blk_rq_bytes(rq_src); in blk_rq_prep_clone()
3172 if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) { in blk_rq_prep_clone()
3173 rq->rq_flags |= RQF_SPECIAL_PAYLOAD; in blk_rq_prep_clone()
3174 rq->special_vec = rq_src->special_vec; in blk_rq_prep_clone()
3176 rq->nr_phys_segments = rq_src->nr_phys_segments; in blk_rq_prep_clone()
3177 rq->ioprio = rq_src->ioprio; in blk_rq_prep_clone()
3179 if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0) in blk_rq_prep_clone()
3189 return -ENOMEM; in blk_rq_prep_clone()
3200 if (rq->bio) { in blk_steal_bios()
3201 if (list->tail) in blk_steal_bios()
3202 list->tail->bi_next = rq->bio; in blk_steal_bios()
3204 list->head = rq->bio; in blk_steal_bios()
3205 list->tail = rq->biotail; in blk_steal_bios()
3207 rq->bio = NULL; in blk_steal_bios()
3208 rq->biotail = NULL; in blk_steal_bios()
3211 rq->__data_len = 0; in blk_steal_bios()
3234 list_for_each_entry(page, &tags->page_list, lru) { in blk_mq_clear_rq_mapping()
3236 unsigned long end = start + order_to_size(page->private); in blk_mq_clear_rq_mapping()
3239 for (i = 0; i < drv_tags->nr_tags; i++) { in blk_mq_clear_rq_mapping()
3240 struct request *rq = drv_tags->rqs[i]; in blk_mq_clear_rq_mapping()
3245 cmpxchg(&drv_tags->rqs[i], rq, NULL); in blk_mq_clear_rq_mapping()
3251 * Wait until all pending iteration is done. in blk_mq_clear_rq_mapping()
3254 * after the ->lock is released. in blk_mq_clear_rq_mapping()
3256 spin_lock_irqsave(&drv_tags->lock, flags); in blk_mq_clear_rq_mapping()
3257 spin_unlock_irqrestore(&drv_tags->lock, flags); in blk_mq_clear_rq_mapping()
3266 if (list_empty(&tags->page_list)) in blk_mq_free_rqs()
3269 if (blk_mq_is_shared_tags(set->flags)) in blk_mq_free_rqs()
3270 drv_tags = set->shared_tags; in blk_mq_free_rqs()
3272 drv_tags = set->tags[hctx_idx]; in blk_mq_free_rqs()
3274 if (tags->static_rqs && set->ops->exit_request) { in blk_mq_free_rqs()
3277 for (i = 0; i < tags->nr_tags; i++) { in blk_mq_free_rqs()
3278 struct request *rq = tags->static_rqs[i]; in blk_mq_free_rqs()
3282 set->ops->exit_request(set, rq, hctx_idx); in blk_mq_free_rqs()
3283 tags->static_rqs[i] = NULL; in blk_mq_free_rqs()
3289 while (!list_empty(&tags->page_list)) { in blk_mq_free_rqs()
3290 page = list_first_entry(&tags->page_list, struct page, lru); in blk_mq_free_rqs()
3291 list_del_init(&page->lru); in blk_mq_free_rqs()
3297 __free_pages(page, page->private); in blk_mq_free_rqs()
3303 kfree(tags->rqs); in blk_mq_free_rq_map()
3304 tags->rqs = NULL; in blk_mq_free_rq_map()
3305 kfree(tags->static_rqs); in blk_mq_free_rq_map()
3306 tags->static_rqs = NULL; in blk_mq_free_rq_map()
3316 for (i = 0; i < set->nr_maps; i++) { in hctx_idx_to_type()
3317 unsigned int start = set->map[i].queue_offset; in hctx_idx_to_type()
3318 unsigned int end = start + set->map[i].nr_queues; in hctx_idx_to_type()
3324 if (i >= set->nr_maps) in hctx_idx_to_type()
3335 return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx); in blk_mq_get_hctx_node()
3347 node = set->numa_node; in blk_mq_alloc_rq_map()
3350 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); in blk_mq_alloc_rq_map()
3354 tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *), in blk_mq_alloc_rq_map()
3357 if (!tags->rqs) in blk_mq_alloc_rq_map()
3360 tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *), in blk_mq_alloc_rq_map()
3363 if (!tags->static_rqs) in blk_mq_alloc_rq_map()
3369 kfree(tags->rqs); in blk_mq_alloc_rq_map()
3380 if (set->ops->init_request) { in blk_mq_init_request()
3381 ret = set->ops->init_request(set, rq, hctx_idx, node); in blk_mq_init_request()
3386 WRITE_ONCE(rq->state, MQ_RQ_IDLE); in blk_mq_init_request()
3399 node = set->numa_node; in blk_mq_alloc_rqs()
3401 INIT_LIST_HEAD(&tags->page_list); in blk_mq_alloc_rqs()
3407 rq_size = round_up(sizeof(struct request) + set->cmd_size, in blk_mq_alloc_rqs()
3417 while (this_order && left < order_to_size(this_order - 1)) in blk_mq_alloc_rqs()
3418 this_order--; in blk_mq_alloc_rqs()
3426 if (!this_order--) in blk_mq_alloc_rqs()
3435 page->private = this_order; in blk_mq_alloc_rqs()
3436 list_add_tail(&page->lru, &tags->page_list); in blk_mq_alloc_rqs()
3441 * to additional allocations like via ops->init_request(). in blk_mq_alloc_rqs()
3445 to_do = min(entries_per_page, depth - i); in blk_mq_alloc_rqs()
3446 left -= to_do * rq_size; in blk_mq_alloc_rqs()
3450 tags->static_rqs[i] = rq; in blk_mq_alloc_rqs()
3452 tags->static_rqs[i] = NULL; in blk_mq_alloc_rqs()
3464 return -ENOMEM; in blk_mq_alloc_rqs()
3476 if (rq->mq_hctx != iter_data->hctx) in blk_mq_has_request()
3478 iter_data->has_rq = true; in blk_mq_has_request()
3484 struct blk_mq_tags *tags = hctx->sched_tags ? in blk_mq_hctx_has_requests()
3485 hctx->sched_tags : hctx->tags; in blk_mq_hctx_has_requests()
3497 if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu) in blk_mq_last_cpu_in_hctx()
3499 if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids) in blk_mq_last_cpu_in_hctx()
3509 if (!cpumask_test_cpu(cpu, hctx->cpumask) || in blk_mq_hctx_notify_offline()
3520 set_bit(BLK_MQ_S_INACTIVE, &hctx->state); in blk_mq_hctx_notify_offline()
3524 * Try to grab a reference to the queue and wait for any outstanding in blk_mq_hctx_notify_offline()
3525 * requests. If we could not grab a reference the queue has been in blk_mq_hctx_notify_offline()
3528 if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) { in blk_mq_hctx_notify_offline()
3531 percpu_ref_put(&hctx->queue->q_usage_counter); in blk_mq_hctx_notify_offline()
3542 if (cpumask_test_cpu(cpu, hctx->cpumask)) in blk_mq_hctx_notify_online()
3543 clear_bit(BLK_MQ_S_INACTIVE, &hctx->state); in blk_mq_hctx_notify_online()
3549 * software queue to the hw queue dispatch list, and ensure that it
3560 if (!cpumask_test_cpu(cpu, hctx->cpumask)) in blk_mq_hctx_notify_dead()
3563 ctx = __blk_mq_get_ctx(hctx->queue, cpu); in blk_mq_hctx_notify_dead()
3564 type = hctx->type; in blk_mq_hctx_notify_dead()
3566 spin_lock(&ctx->lock); in blk_mq_hctx_notify_dead()
3567 if (!list_empty(&ctx->rq_lists[type])) { in blk_mq_hctx_notify_dead()
3568 list_splice_init(&ctx->rq_lists[type], &tmp); in blk_mq_hctx_notify_dead()
3571 spin_unlock(&ctx->lock); in blk_mq_hctx_notify_dead()
3576 spin_lock(&hctx->lock); in blk_mq_hctx_notify_dead()
3577 list_splice_tail_init(&tmp, &hctx->dispatch); in blk_mq_hctx_notify_dead()
3578 spin_unlock(&hctx->lock); in blk_mq_hctx_notify_dead()
3586 if (!(hctx->flags & BLK_MQ_F_STACKING)) in blk_mq_remove_cpuhp()
3588 &hctx->cpuhp_online); in blk_mq_remove_cpuhp()
3590 &hctx->cpuhp_dead); in blk_mq_remove_cpuhp()
3594 * Before freeing hw queue, clearing the flush request reference in
3595 * tags->rqs[] for avoiding potential UAF.
3603 /* The hw queue may not be mapped yet */ in blk_mq_clear_flush_rq_mapping()
3610 cmpxchg(&tags->rqs[i], flush_rq, NULL); in blk_mq_clear_flush_rq_mapping()
3613 * Wait until all pending iteration is done. in blk_mq_clear_flush_rq_mapping()
3616 * after the ->lock is released. in blk_mq_clear_flush_rq_mapping()
3618 spin_lock_irqsave(&tags->lock, flags); in blk_mq_clear_flush_rq_mapping()
3619 spin_unlock_irqrestore(&tags->lock, flags); in blk_mq_clear_flush_rq_mapping()
3622 /* hctx->ctxs will be freed in queue's release handler */
3627 struct request *flush_rq = hctx->fq->flush_rq; in blk_mq_exit_hctx()
3633 blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], in blk_mq_exit_hctx()
3634 set->queue_depth, flush_rq); in blk_mq_exit_hctx()
3635 if (set->ops->exit_request) in blk_mq_exit_hctx()
3636 set->ops->exit_request(set, flush_rq, hctx_idx); in blk_mq_exit_hctx()
3638 if (set->ops->exit_hctx) in blk_mq_exit_hctx()
3639 set->ops->exit_hctx(hctx, hctx_idx); in blk_mq_exit_hctx()
3643 xa_erase(&q->hctx_table, hctx_idx); in blk_mq_exit_hctx()
3645 spin_lock(&q->unused_hctx_lock); in blk_mq_exit_hctx()
3646 list_add(&hctx->hctx_list, &q->unused_hctx_list); in blk_mq_exit_hctx()
3647 spin_unlock(&q->unused_hctx_lock); in blk_mq_exit_hctx()
3667 hctx->queue_num = hctx_idx; in blk_mq_init_hctx()
3669 if (!(hctx->flags & BLK_MQ_F_STACKING)) in blk_mq_init_hctx()
3671 &hctx->cpuhp_online); in blk_mq_init_hctx()
3672 cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); in blk_mq_init_hctx()
3674 hctx->tags = set->tags[hctx_idx]; in blk_mq_init_hctx()
3676 if (set->ops->init_hctx && in blk_mq_init_hctx()
3677 set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) in blk_mq_init_hctx()
3680 if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, in blk_mq_init_hctx()
3681 hctx->numa_node)) in blk_mq_init_hctx()
3684 if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL)) in blk_mq_init_hctx()
3690 if (set->ops->exit_request) in blk_mq_init_hctx()
3691 set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); in blk_mq_init_hctx()
3693 if (set->ops->exit_hctx) in blk_mq_init_hctx()
3694 set->ops->exit_hctx(hctx, hctx_idx); in blk_mq_init_hctx()
3697 return -1; in blk_mq_init_hctx()
3711 if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node)) in blk_mq_alloc_hctx()
3714 atomic_set(&hctx->nr_active, 0); in blk_mq_alloc_hctx()
3716 node = set->numa_node; in blk_mq_alloc_hctx()
3717 hctx->numa_node = node; in blk_mq_alloc_hctx()
3719 INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); in blk_mq_alloc_hctx()
3720 spin_lock_init(&hctx->lock); in blk_mq_alloc_hctx()
3721 INIT_LIST_HEAD(&hctx->dispatch); in blk_mq_alloc_hctx()
3722 hctx->queue = q; in blk_mq_alloc_hctx()
3723 hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; in blk_mq_alloc_hctx()
3725 INIT_LIST_HEAD(&hctx->hctx_list); in blk_mq_alloc_hctx()
3731 hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), in blk_mq_alloc_hctx()
3733 if (!hctx->ctxs) in blk_mq_alloc_hctx()
3736 if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), in blk_mq_alloc_hctx()
3739 hctx->nr_ctx = 0; in blk_mq_alloc_hctx()
3741 spin_lock_init(&hctx->dispatch_wait_lock); in blk_mq_alloc_hctx()
3742 init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); in blk_mq_alloc_hctx()
3743 INIT_LIST_HEAD(&hctx->dispatch_wait.entry); in blk_mq_alloc_hctx()
3745 hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp); in blk_mq_alloc_hctx()
3746 if (!hctx->fq) in blk_mq_alloc_hctx()
3754 sbitmap_free(&hctx->ctx_map); in blk_mq_alloc_hctx()
3756 kfree(hctx->ctxs); in blk_mq_alloc_hctx()
3758 free_cpumask_var(hctx->cpumask); in blk_mq_alloc_hctx()
3768 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_init_cpu_queues()
3772 struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); in blk_mq_init_cpu_queues()
3776 __ctx->cpu = i; in blk_mq_init_cpu_queues()
3777 spin_lock_init(&__ctx->lock); in blk_mq_init_cpu_queues()
3779 INIT_LIST_HEAD(&__ctx->rq_lists[k]); in blk_mq_init_cpu_queues()
3781 __ctx->queue = q; in blk_mq_init_cpu_queues()
3784 * Set local node, IFF we have more than one hw queue. If in blk_mq_init_cpu_queues()
3787 for (j = 0; j < set->nr_maps; j++) { in blk_mq_init_cpu_queues()
3789 if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) in blk_mq_init_cpu_queues()
3790 hctx->numa_node = cpu_to_node(i); in blk_mq_init_cpu_queues()
3802 tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags); in blk_mq_alloc_map_and_rqs()
3818 if (blk_mq_is_shared_tags(set->flags)) { in __blk_mq_alloc_map_and_rqs()
3819 set->tags[hctx_idx] = set->shared_tags; in __blk_mq_alloc_map_and_rqs()
3824 set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx, in __blk_mq_alloc_map_and_rqs()
3825 set->queue_depth); in __blk_mq_alloc_map_and_rqs()
3827 return set->tags[hctx_idx]; in __blk_mq_alloc_map_and_rqs()
3843 if (!blk_mq_is_shared_tags(set->flags)) in __blk_mq_free_map_and_rqs()
3844 blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx); in __blk_mq_free_map_and_rqs()
3846 set->tags[hctx_idx] = NULL; in __blk_mq_free_map_and_rqs()
3855 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_map_swqueue()
3858 cpumask_clear(hctx->cpumask); in blk_mq_map_swqueue()
3859 hctx->nr_ctx = 0; in blk_mq_map_swqueue()
3860 hctx->dispatch_from = NULL; in blk_mq_map_swqueue()
3870 ctx = per_cpu_ptr(q->queue_ctx, i); in blk_mq_map_swqueue()
3871 for (j = 0; j < set->nr_maps; j++) { in blk_mq_map_swqueue()
3872 if (!set->map[j].nr_queues) { in blk_mq_map_swqueue()
3873 ctx->hctxs[j] = blk_mq_map_queue_type(q, in blk_mq_map_swqueue()
3877 hctx_idx = set->map[j].mq_map[i]; in blk_mq_map_swqueue()
3878 /* unmapped hw queue can be remapped after CPU topo changed */ in blk_mq_map_swqueue()
3879 if (!set->tags[hctx_idx] && in blk_mq_map_swqueue()
3887 set->map[j].mq_map[i] = 0; in blk_mq_map_swqueue()
3891 ctx->hctxs[j] = hctx; in blk_mq_map_swqueue()
3895 * devices share queues across queue maps. in blk_mq_map_swqueue()
3897 if (cpumask_test_cpu(i, hctx->cpumask)) in blk_mq_map_swqueue()
3900 cpumask_set_cpu(i, hctx->cpumask); in blk_mq_map_swqueue()
3901 hctx->type = j; in blk_mq_map_swqueue()
3902 ctx->index_hw[hctx->type] = hctx->nr_ctx; in blk_mq_map_swqueue()
3903 hctx->ctxs[hctx->nr_ctx++] = ctx; in blk_mq_map_swqueue()
3909 BUG_ON(!hctx->nr_ctx); in blk_mq_map_swqueue()
3913 ctx->hctxs[j] = blk_mq_map_queue_type(q, in blk_mq_map_swqueue()
3919 * If no software queues are mapped to this hardware queue, in blk_mq_map_swqueue()
3922 if (!hctx->nr_ctx) { in blk_mq_map_swqueue()
3923 /* Never unmap queue 0. We need it as a in blk_mq_map_swqueue()
3930 hctx->tags = NULL; in blk_mq_map_swqueue()
3934 hctx->tags = set->tags[i]; in blk_mq_map_swqueue()
3935 WARN_ON(!hctx->tags); in blk_mq_map_swqueue()
3942 sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx); in blk_mq_map_swqueue()
3947 hctx->next_cpu = blk_mq_first_mapped_cpu(hctx); in blk_mq_map_swqueue()
3948 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; in blk_mq_map_swqueue()
3954 * the queue isn't live yet.
3963 hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; in queue_set_hctx_shared()
3966 hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; in queue_set_hctx_shared()
3976 lockdep_assert_held(&set->tag_list_lock); in blk_mq_update_tag_set_shared()
3978 list_for_each_entry(q, &set->tag_list, tag_set_list) { in blk_mq_update_tag_set_shared()
3987 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_del_queue_tag_set()
3989 mutex_lock(&set->tag_list_lock); in blk_mq_del_queue_tag_set()
3990 list_del(&q->tag_set_list); in blk_mq_del_queue_tag_set()
3991 if (list_is_singular(&set->tag_list)) { in blk_mq_del_queue_tag_set()
3993 set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; in blk_mq_del_queue_tag_set()
3994 /* update existing queue */ in blk_mq_del_queue_tag_set()
3997 mutex_unlock(&set->tag_list_lock); in blk_mq_del_queue_tag_set()
3998 INIT_LIST_HEAD(&q->tag_set_list); in blk_mq_del_queue_tag_set()
4004 mutex_lock(&set->tag_list_lock); in blk_mq_add_queue_tag_set()
4009 if (!list_empty(&set->tag_list) && in blk_mq_add_queue_tag_set()
4010 !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { in blk_mq_add_queue_tag_set()
4011 set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; in blk_mq_add_queue_tag_set()
4012 /* update existing queue */ in blk_mq_add_queue_tag_set()
4015 if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) in blk_mq_add_queue_tag_set()
4017 list_add_tail(&q->tag_set_list, &set->tag_list); in blk_mq_add_queue_tag_set()
4019 mutex_unlock(&set->tag_list_lock); in blk_mq_add_queue_tag_set()
4022 /* All allocations will be freed in release handler of q->mq_kobj */
4030 return -ENOMEM; in blk_mq_alloc_ctxs()
4032 ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx); in blk_mq_alloc_ctxs()
4033 if (!ctxs->queue_ctx) in blk_mq_alloc_ctxs()
4037 struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu); in blk_mq_alloc_ctxs()
4038 ctx->ctxs = ctxs; in blk_mq_alloc_ctxs()
4041 q->mq_kobj = &ctxs->kobj; in blk_mq_alloc_ctxs()
4042 q->queue_ctx = ctxs->queue_ctx; in blk_mq_alloc_ctxs()
4047 return -ENOMEM; in blk_mq_alloc_ctxs()
4052 * request queue's release handler for avoiding use-after-free
4053 * and headache because q->mq_kobj shouldn't have been introduced,
4062 WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list)); in blk_mq_release()
4065 list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) { in blk_mq_release()
4066 list_del_init(&hctx->hctx_list); in blk_mq_release()
4067 kobject_put(&hctx->kobj); in blk_mq_release()
4070 xa_destroy(&q->hctx_table); in blk_mq_release()
4073 * release .mq_kobj and sw queue's kobject now because in blk_mq_release()
4074 * both share lifetime with request queue. in blk_mq_release()
4085 q = blk_alloc_queue(set->numa_node); in blk_mq_init_queue_data()
4087 return ERR_PTR(-ENOMEM); in blk_mq_init_queue_data()
4088 q->queuedata = queuedata; in blk_mq_init_queue_data()
4104 * blk_mq_destroy_queue - shutdown a request queue
4105 * @q: request queue to shutdown
4107 * This shuts down a request queue allocated by blk_mq_init_queue(). All future
4108 * requests will be failed with -ENODEV. The caller is responsible for dropping
4140 disk = __alloc_disk_node(q, set->numa_node, lkclass); in __blk_mq_alloc_disk()
4144 return ERR_PTR(-ENOMEM); in __blk_mq_alloc_disk()
4146 set_bit(GD_OWNS_QUEUE, &disk->state); in __blk_mq_alloc_disk()
4172 spin_lock(&q->unused_hctx_lock); in blk_mq_alloc_and_init_hctx()
4173 list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) { in blk_mq_alloc_and_init_hctx()
4174 if (tmp->numa_node == node) { in blk_mq_alloc_and_init_hctx()
4180 list_del_init(&hctx->hctx_list); in blk_mq_alloc_and_init_hctx()
4181 spin_unlock(&q->unused_hctx_lock); in blk_mq_alloc_and_init_hctx()
4194 kobject_put(&hctx->kobj); in blk_mq_alloc_and_init_hctx()
4206 mutex_lock(&q->sysfs_lock); in blk_mq_realloc_hw_ctxs()
4207 for (i = 0; i < set->nr_hw_queues; i++) { in blk_mq_realloc_hw_ctxs()
4210 struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i); in blk_mq_realloc_hw_ctxs()
4213 old_node = old_hctx->numa_node; in blk_mq_realloc_hw_ctxs()
4228 * hctxs and keep the previous q->nr_hw_queues. in blk_mq_realloc_hw_ctxs()
4230 if (i != set->nr_hw_queues) { in blk_mq_realloc_hw_ctxs()
4231 j = q->nr_hw_queues; in blk_mq_realloc_hw_ctxs()
4234 q->nr_hw_queues = set->nr_hw_queues; in blk_mq_realloc_hw_ctxs()
4237 xa_for_each_start(&q->hctx_table, j, hctx, j) in blk_mq_realloc_hw_ctxs()
4239 mutex_unlock(&q->sysfs_lock); in blk_mq_realloc_hw_ctxs()
4244 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_update_poll_flag()
4246 if (set->nr_maps > HCTX_TYPE_POLL && in blk_mq_update_poll_flag()
4247 set->map[HCTX_TYPE_POLL].nr_queues) in blk_mq_update_poll_flag()
4256 /* mark the queue as mq asap */ in blk_mq_init_allocated_queue()
4257 q->mq_ops = set->ops; in blk_mq_init_allocated_queue()
4262 /* init q->mq_kobj and sw queues' kobjects */ in blk_mq_init_allocated_queue()
4265 INIT_LIST_HEAD(&q->unused_hctx_list); in blk_mq_init_allocated_queue()
4266 spin_lock_init(&q->unused_hctx_lock); in blk_mq_init_allocated_queue()
4268 xa_init(&q->hctx_table); in blk_mq_init_allocated_queue()
4271 if (!q->nr_hw_queues) in blk_mq_init_allocated_queue()
4274 INIT_WORK(&q->timeout_work, blk_mq_timeout_work); in blk_mq_init_allocated_queue()
4275 blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); in blk_mq_init_allocated_queue()
4277 q->tag_set = set; in blk_mq_init_allocated_queue()
4279 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; in blk_mq_init_allocated_queue()
4282 INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work); in blk_mq_init_allocated_queue()
4283 INIT_LIST_HEAD(&q->flush_list); in blk_mq_init_allocated_queue()
4284 INIT_LIST_HEAD(&q->requeue_list); in blk_mq_init_allocated_queue()
4285 spin_lock_init(&q->requeue_lock); in blk_mq_init_allocated_queue()
4287 q->nr_requests = set->queue_depth; in blk_mq_init_allocated_queue()
4289 blk_mq_init_cpu_queues(q, set->nr_hw_queues); in blk_mq_init_allocated_queue()
4297 q->mq_ops = NULL; in blk_mq_init_allocated_queue()
4298 return -ENOMEM; in blk_mq_init_allocated_queue()
4305 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_exit_queue()
4307 /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */ in blk_mq_exit_queue()
4308 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); in blk_mq_exit_queue()
4309 /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */ in blk_mq_exit_queue()
4317 if (blk_mq_is_shared_tags(set->flags)) { in __blk_mq_alloc_rq_maps()
4318 set->shared_tags = blk_mq_alloc_map_and_rqs(set, in __blk_mq_alloc_rq_maps()
4320 set->queue_depth); in __blk_mq_alloc_rq_maps()
4321 if (!set->shared_tags) in __blk_mq_alloc_rq_maps()
4322 return -ENOMEM; in __blk_mq_alloc_rq_maps()
4325 for (i = 0; i < set->nr_hw_queues; i++) { in __blk_mq_alloc_rq_maps()
4334 while (--i >= 0) in __blk_mq_alloc_rq_maps()
4337 if (blk_mq_is_shared_tags(set->flags)) { in __blk_mq_alloc_rq_maps()
4338 blk_mq_free_map_and_rqs(set, set->shared_tags, in __blk_mq_alloc_rq_maps()
4342 return -ENOMEM; in __blk_mq_alloc_rq_maps()
4347 * may reduce the depth asked for, if memory is tight. set->queue_depth
4355 depth = set->queue_depth; in blk_mq_alloc_set_map_and_rqs()
4361 set->queue_depth >>= 1; in blk_mq_alloc_set_map_and_rqs()
4362 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) { in blk_mq_alloc_set_map_and_rqs()
4363 err = -ENOMEM; in blk_mq_alloc_set_map_and_rqs()
4366 } while (set->queue_depth); in blk_mq_alloc_set_map_and_rqs()
4368 if (!set->queue_depth || err) { in blk_mq_alloc_set_map_and_rqs()
4369 pr_err("blk-mq: failed to allocate request map\n"); in blk_mq_alloc_set_map_and_rqs()
4370 return -ENOMEM; in blk_mq_alloc_set_map_and_rqs()
4373 if (depth != set->queue_depth) in blk_mq_alloc_set_map_and_rqs()
4374 pr_info("blk-mq: reduced tag depth (%u -> %u)\n", in blk_mq_alloc_set_map_and_rqs()
4375 depth, set->queue_depth); in blk_mq_alloc_set_map_and_rqs()
4384 * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the in blk_mq_update_queue_map()
4387 if (set->nr_maps == 1) in blk_mq_update_queue_map()
4388 set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues; in blk_mq_update_queue_map()
4390 if (set->ops->map_queues && !is_kdump_kernel()) { in blk_mq_update_queue_map()
4397 * for (queue = 0; queue < set->nr_hw_queues; queue++) { in blk_mq_update_queue_map()
4398 * mask = get_cpu_mask(queue) in blk_mq_update_queue_map()
4400 * set->map[x].mq_map[cpu] = queue; in blk_mq_update_queue_map()
4405 * to any hw queue. in blk_mq_update_queue_map()
4407 for (i = 0; i < set->nr_maps; i++) in blk_mq_update_queue_map()
4408 blk_mq_clear_mq_map(&set->map[i]); in blk_mq_update_queue_map()
4410 set->ops->map_queues(set); in blk_mq_update_queue_map()
4412 BUG_ON(set->nr_maps > 1); in blk_mq_update_queue_map()
4413 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); in blk_mq_update_queue_map()
4423 if (set->nr_hw_queues >= new_nr_hw_queues) in blk_mq_realloc_tag_set_tags()
4427 GFP_KERNEL, set->numa_node); in blk_mq_realloc_tag_set_tags()
4429 return -ENOMEM; in blk_mq_realloc_tag_set_tags()
4431 if (set->tags) in blk_mq_realloc_tag_set_tags()
4432 memcpy(new_tags, set->tags, set->nr_hw_queues * in blk_mq_realloc_tag_set_tags()
4433 sizeof(*set->tags)); in blk_mq_realloc_tag_set_tags()
4434 kfree(set->tags); in blk_mq_realloc_tag_set_tags()
4435 set->tags = new_tags; in blk_mq_realloc_tag_set_tags()
4437 for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) { in blk_mq_realloc_tag_set_tags()
4439 while (--i >= set->nr_hw_queues) in blk_mq_realloc_tag_set_tags()
4441 return -ENOMEM; in blk_mq_realloc_tag_set_tags()
4447 set->nr_hw_queues = new_nr_hw_queues; in blk_mq_realloc_tag_set_tags()
4455 * value will be stored in set->queue_depth.
4463 if (!set->nr_hw_queues) in blk_mq_alloc_tag_set()
4464 return -EINVAL; in blk_mq_alloc_tag_set()
4465 if (!set->queue_depth) in blk_mq_alloc_tag_set()
4466 return -EINVAL; in blk_mq_alloc_tag_set()
4467 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) in blk_mq_alloc_tag_set()
4468 return -EINVAL; in blk_mq_alloc_tag_set()
4470 if (!set->ops->queue_rq) in blk_mq_alloc_tag_set()
4471 return -EINVAL; in blk_mq_alloc_tag_set()
4473 if (!set->ops->get_budget ^ !set->ops->put_budget) in blk_mq_alloc_tag_set()
4474 return -EINVAL; in blk_mq_alloc_tag_set()
4476 if (set->queue_depth > BLK_MQ_MAX_DEPTH) { in blk_mq_alloc_tag_set()
4477 pr_info("blk-mq: reduced tag depth to %u\n", in blk_mq_alloc_tag_set()
4479 set->queue_depth = BLK_MQ_MAX_DEPTH; in blk_mq_alloc_tag_set()
4482 if (!set->nr_maps) in blk_mq_alloc_tag_set()
4483 set->nr_maps = 1; in blk_mq_alloc_tag_set()
4484 else if (set->nr_maps > HCTX_MAX_TYPES) in blk_mq_alloc_tag_set()
4485 return -EINVAL; in blk_mq_alloc_tag_set()
4489 * memory constrained environment. Limit us to 1 queue and in blk_mq_alloc_tag_set()
4493 set->nr_hw_queues = 1; in blk_mq_alloc_tag_set()
4494 set->nr_maps = 1; in blk_mq_alloc_tag_set()
4495 set->queue_depth = min(64U, set->queue_depth); in blk_mq_alloc_tag_set()
4501 if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) in blk_mq_alloc_tag_set()
4502 set->nr_hw_queues = nr_cpu_ids; in blk_mq_alloc_tag_set()
4504 if (set->flags & BLK_MQ_F_BLOCKING) { in blk_mq_alloc_tag_set()
4505 set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL); in blk_mq_alloc_tag_set()
4506 if (!set->srcu) in blk_mq_alloc_tag_set()
4507 return -ENOMEM; in blk_mq_alloc_tag_set()
4508 ret = init_srcu_struct(set->srcu); in blk_mq_alloc_tag_set()
4513 ret = -ENOMEM; in blk_mq_alloc_tag_set()
4514 set->tags = kcalloc_node(set->nr_hw_queues, in blk_mq_alloc_tag_set()
4516 set->numa_node); in blk_mq_alloc_tag_set()
4517 if (!set->tags) in blk_mq_alloc_tag_set()
4520 for (i = 0; i < set->nr_maps; i++) { in blk_mq_alloc_tag_set()
4521 set->map[i].mq_map = kcalloc_node(nr_cpu_ids, in blk_mq_alloc_tag_set()
4522 sizeof(set->map[i].mq_map[0]), in blk_mq_alloc_tag_set()
4523 GFP_KERNEL, set->numa_node); in blk_mq_alloc_tag_set()
4524 if (!set->map[i].mq_map) in blk_mq_alloc_tag_set()
4526 set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues; in blk_mq_alloc_tag_set()
4535 mutex_init(&set->tag_list_lock); in blk_mq_alloc_tag_set()
4536 INIT_LIST_HEAD(&set->tag_list); in blk_mq_alloc_tag_set()
4541 for (i = 0; i < set->nr_maps; i++) { in blk_mq_alloc_tag_set()
4542 kfree(set->map[i].mq_map); in blk_mq_alloc_tag_set()
4543 set->map[i].mq_map = NULL; in blk_mq_alloc_tag_set()
4545 kfree(set->tags); in blk_mq_alloc_tag_set()
4546 set->tags = NULL; in blk_mq_alloc_tag_set()
4548 if (set->flags & BLK_MQ_F_BLOCKING) in blk_mq_alloc_tag_set()
4549 cleanup_srcu_struct(set->srcu); in blk_mq_alloc_tag_set()
4551 if (set->flags & BLK_MQ_F_BLOCKING) in blk_mq_alloc_tag_set()
4552 kfree(set->srcu); in blk_mq_alloc_tag_set()
4557 /* allocate and initialize a tagset for a simple single-queue device */
4563 set->ops = ops; in blk_mq_alloc_sq_tag_set()
4564 set->nr_hw_queues = 1; in blk_mq_alloc_sq_tag_set()
4565 set->nr_maps = 1; in blk_mq_alloc_sq_tag_set()
4566 set->queue_depth = queue_depth; in blk_mq_alloc_sq_tag_set()
4567 set->numa_node = NUMA_NO_NODE; in blk_mq_alloc_sq_tag_set()
4568 set->flags = set_flags; in blk_mq_alloc_sq_tag_set()
4577 for (i = 0; i < set->nr_hw_queues; i++) in blk_mq_free_tag_set()
4580 if (blk_mq_is_shared_tags(set->flags)) { in blk_mq_free_tag_set()
4581 blk_mq_free_map_and_rqs(set, set->shared_tags, in blk_mq_free_tag_set()
4585 for (j = 0; j < set->nr_maps; j++) { in blk_mq_free_tag_set()
4586 kfree(set->map[j].mq_map); in blk_mq_free_tag_set()
4587 set->map[j].mq_map = NULL; in blk_mq_free_tag_set()
4590 kfree(set->tags); in blk_mq_free_tag_set()
4591 set->tags = NULL; in blk_mq_free_tag_set()
4592 if (set->flags & BLK_MQ_F_BLOCKING) { in blk_mq_free_tag_set()
4593 cleanup_srcu_struct(set->srcu); in blk_mq_free_tag_set()
4594 kfree(set->srcu); in blk_mq_free_tag_set()
4601 struct blk_mq_tag_set *set = q->tag_set; in blk_mq_update_nr_requests()
4607 return -EINVAL; in blk_mq_update_nr_requests()
4609 if (q->nr_requests == nr) in blk_mq_update_nr_requests()
4617 if (!hctx->tags) in blk_mq_update_nr_requests()
4621 * queue depth. This is similar to what the old code would do. in blk_mq_update_nr_requests()
4623 if (hctx->sched_tags) { in blk_mq_update_nr_requests()
4624 ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, in blk_mq_update_nr_requests()
4627 ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, in blk_mq_update_nr_requests()
4632 if (q->elevator && q->elevator->type->ops.depth_updated) in blk_mq_update_nr_requests()
4633 q->elevator->type->ops.depth_updated(hctx); in blk_mq_update_nr_requests()
4636 q->nr_requests = nr; in blk_mq_update_nr_requests()
4637 if (blk_mq_is_shared_tags(set->flags)) { in blk_mq_update_nr_requests()
4638 if (q->elevator) in blk_mq_update_nr_requests()
4675 /* q->elevator needs protection from ->sysfs_lock */ in blk_mq_elv_switch_none()
4676 mutex_lock(&q->sysfs_lock); in blk_mq_elv_switch_none()
4679 if (!q->elevator) { in blk_mq_elv_switch_none()
4684 INIT_LIST_HEAD(&qe->node); in blk_mq_elv_switch_none()
4685 qe->q = q; in blk_mq_elv_switch_none()
4686 qe->type = q->elevator->type; in blk_mq_elv_switch_none()
4688 __elevator_get(qe->type); in blk_mq_elv_switch_none()
4689 list_add(&qe->node, head); in blk_mq_elv_switch_none()
4692 mutex_unlock(&q->sysfs_lock); in blk_mq_elv_switch_none()
4703 if (qe->q == q) in blk_lookup_qe_pair()
4718 t = qe->type; in blk_mq_elv_switch_back()
4719 list_del(&qe->node); in blk_mq_elv_switch_back()
4722 mutex_lock(&q->sysfs_lock); in blk_mq_elv_switch_back()
4726 mutex_unlock(&q->sysfs_lock); in blk_mq_elv_switch_back()
4734 int prev_nr_hw_queues = set->nr_hw_queues; in __blk_mq_update_nr_hw_queues()
4737 lockdep_assert_held(&set->tag_list_lock); in __blk_mq_update_nr_hw_queues()
4739 if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids) in __blk_mq_update_nr_hw_queues()
4743 if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) in __blk_mq_update_nr_hw_queues()
4746 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
4751 * updating the new sw to hw queue mappings. in __blk_mq_update_nr_hw_queues()
4753 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
4757 list_for_each_entry(q, &set->tag_list, tag_set_list) { in __blk_mq_update_nr_hw_queues()
4767 list_for_each_entry(q, &set->tag_list, tag_set_list) { in __blk_mq_update_nr_hw_queues()
4770 if (q->nr_hw_queues != set->nr_hw_queues) { in __blk_mq_update_nr_hw_queues()
4775 for (; i < set->nr_hw_queues; i++) in __blk_mq_update_nr_hw_queues()
4778 set->nr_hw_queues = prev_nr_hw_queues; in __blk_mq_update_nr_hw_queues()
4785 list_for_each_entry(q, &set->tag_list, tag_set_list) { in __blk_mq_update_nr_hw_queues()
4791 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
4794 list_for_each_entry(q, &set->tag_list, tag_set_list) in __blk_mq_update_nr_hw_queues()
4798 for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++) in __blk_mq_update_nr_hw_queues()
4804 mutex_lock(&set->tag_list_lock); in blk_mq_update_nr_hw_queues()
4806 mutex_unlock(&set->tag_list_lock); in blk_mq_update_nr_hw_queues()
4817 ret = q->mq_ops->poll(hctx, iob); in blk_hctx_poll()
4840 struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie); in blk_mq_poll()
4848 struct request_queue *q = rq->q; in blk_rq_poll()
4853 if (!percpu_ref_tryget(&q->q_usage_counter)) in blk_rq_poll()
4856 ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags); in blk_rq_poll()
4865 return rq->mq_ctx->cpu; in blk_mq_rq_cpu()
4874 cancel_delayed_work_sync(&q->requeue_work); in blk_mq_cancel_work_sync()
4877 cancel_delayed_work_sync(&hctx->run_work); in blk_mq_cancel_work_sync()