1*1b419405SBird, Tim /* SPDX-License-Identifier: GPL-2.0 */ 2a7b36ee6SJens Axboe #ifndef BLK_THROTTLE_H 3a7b36ee6SJens Axboe #define BLK_THROTTLE_H 4a7b36ee6SJens Axboe 5a7b36ee6SJens Axboe #include "blk-cgroup-rwstat.h" 6a7b36ee6SJens Axboe 7a7b36ee6SJens Axboe /* 8a7b36ee6SJens Axboe * To implement hierarchical throttling, throtl_grps form a tree and bios 9a7b36ee6SJens Axboe * are dispatched upwards level by level until they reach the top and get 10a7b36ee6SJens Axboe * issued. When dispatching bios from the children and local group at each 11a7b36ee6SJens Axboe * level, if the bios are dispatched into a single bio_list, there's a risk 12a7b36ee6SJens Axboe * of a local or child group which can queue many bios at once filling up 13a7b36ee6SJens Axboe * the list starving others. 14a7b36ee6SJens Axboe * 15a7b36ee6SJens Axboe * To avoid such starvation, dispatched bios are queued separately 16a7b36ee6SJens Axboe * according to where they came from. When they are again dispatched to 17a7b36ee6SJens Axboe * the parent, they're popped in round-robin order so that no single source 18a7b36ee6SJens Axboe * hogs the dispatch window. 19a7b36ee6SJens Axboe * 20a7b36ee6SJens Axboe * throtl_qnode is used to keep the queued bios separated by their sources. 21a7b36ee6SJens Axboe * Bios are queued to throtl_qnode which in turn is queued to 22a7b36ee6SJens Axboe * throtl_service_queue and then dispatched in round-robin order. 23a7b36ee6SJens Axboe * 24a7b36ee6SJens Axboe * It's also used to track the reference counts on blkg's. A qnode always 25a7b36ee6SJens Axboe * belongs to a throtl_grp and gets queued on itself or the parent, so 26a7b36ee6SJens Axboe * incrementing the reference of the associated throtl_grp when a qnode is 27a7b36ee6SJens Axboe * queued and decrementing when dequeued is enough to keep the whole blkg 28a7b36ee6SJens Axboe * tree pinned while bios are in flight. 29a7b36ee6SJens Axboe */ 30a7b36ee6SJens Axboe struct throtl_qnode { 31a7b36ee6SJens Axboe struct list_head node; /* service_queue->queued[] */ 32a7b36ee6SJens Axboe struct bio_list bios; /* queued bios */ 33a7b36ee6SJens Axboe struct throtl_grp *tg; /* tg this qnode belongs to */ 34a7b36ee6SJens Axboe }; 35a7b36ee6SJens Axboe 36a7b36ee6SJens Axboe struct throtl_service_queue { 37a7b36ee6SJens Axboe struct throtl_service_queue *parent_sq; /* the parent service_queue */ 38a7b36ee6SJens Axboe 39a7b36ee6SJens Axboe /* 40a7b36ee6SJens Axboe * Bios queued directly to this service_queue or dispatched from 41a7b36ee6SJens Axboe * children throtl_grp's. 42a7b36ee6SJens Axboe */ 43a7b36ee6SJens Axboe struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ 44a7b36ee6SJens Axboe unsigned int nr_queued[2]; /* number of queued bios */ 45a7b36ee6SJens Axboe 46a7b36ee6SJens Axboe /* 47a7b36ee6SJens Axboe * RB tree of active children throtl_grp's, which are sorted by 48a7b36ee6SJens Axboe * their ->disptime. 49a7b36ee6SJens Axboe */ 50a7b36ee6SJens Axboe struct rb_root_cached pending_tree; /* RB tree of active tgs */ 51a7b36ee6SJens Axboe unsigned int nr_pending; /* # queued in the tree */ 52a7b36ee6SJens Axboe unsigned long first_pending_disptime; /* disptime of the first tg */ 53a7b36ee6SJens Axboe struct timer_list pending_timer; /* fires on first_pending_disptime */ 54a7b36ee6SJens Axboe }; 55a7b36ee6SJens Axboe 565a93b602SMing Lei enum tg_state_flags { 575a93b602SMing Lei THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ 585a93b602SMing Lei THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ 5985496749SYu Kuai THROTL_TG_CANCELING = 1 << 2, /* starts to cancel bio */ 605a93b602SMing Lei }; 615a93b602SMing Lei 62a7b36ee6SJens Axboe struct throtl_grp { 63a7b36ee6SJens Axboe /* must be the first member */ 64a7b36ee6SJens Axboe struct blkg_policy_data pd; 65a7b36ee6SJens Axboe 66a7b36ee6SJens Axboe /* active throtl group service_queue member */ 67a7b36ee6SJens Axboe struct rb_node rb_node; 68a7b36ee6SJens Axboe 69a7b36ee6SJens Axboe /* throtl_data this group belongs to */ 70a7b36ee6SJens Axboe struct throtl_data *td; 71a7b36ee6SJens Axboe 72a7b36ee6SJens Axboe /* this group's service queue */ 73a7b36ee6SJens Axboe struct throtl_service_queue service_queue; 74a7b36ee6SJens Axboe 75a7b36ee6SJens Axboe /* 76a7b36ee6SJens Axboe * qnode_on_self is used when bios are directly queued to this 77a7b36ee6SJens Axboe * throtl_grp so that local bios compete fairly with bios 78a7b36ee6SJens Axboe * dispatched from children. qnode_on_parent is used when bios are 79a7b36ee6SJens Axboe * dispatched from this throtl_grp into its parent and will compete 80a7b36ee6SJens Axboe * with the sibling qnode_on_parents and the parent's 81a7b36ee6SJens Axboe * qnode_on_self. 82a7b36ee6SJens Axboe */ 83a7b36ee6SJens Axboe struct throtl_qnode qnode_on_self[2]; 84a7b36ee6SJens Axboe struct throtl_qnode qnode_on_parent[2]; 85a7b36ee6SJens Axboe 86a7b36ee6SJens Axboe /* 87a7b36ee6SJens Axboe * Dispatch time in jiffies. This is the estimated time when group 88a7b36ee6SJens Axboe * will unthrottle and is ready to dispatch more bio. It is used as 89a7b36ee6SJens Axboe * key to sort active groups in service tree. 90a7b36ee6SJens Axboe */ 91a7b36ee6SJens Axboe unsigned long disptime; 92a7b36ee6SJens Axboe 93a7b36ee6SJens Axboe unsigned int flags; 94a7b36ee6SJens Axboe 95a7b36ee6SJens Axboe /* are there any throtl rules between this group and td? */ 9681c7a63aSYu Kuai bool has_rules_bps[2]; 9781c7a63aSYu Kuai bool has_rules_iops[2]; 98a7b36ee6SJens Axboe 990a751df4SWaiman Long /* bytes per second rate limits */ 100bf20ab53SYu Kuai uint64_t bps[2]; 101a7b36ee6SJens Axboe 1020a751df4SWaiman Long /* IOPS limits */ 103bf20ab53SYu Kuai unsigned int iops[2]; 104a7b36ee6SJens Axboe 105a7b36ee6SJens Axboe /* Number of bytes dispatched in current slice */ 1066cc477c3SMing Lei int64_t bytes_disp[2]; 107a7b36ee6SJens Axboe /* Number of bio's dispatched in current slice */ 1086cc477c3SMing Lei int io_disp[2]; 109a7b36ee6SJens Axboe 110a880ae93SYu Kuai /* 111a880ae93SYu Kuai * The following two fields are updated when new configuration is 112a880ae93SYu Kuai * submitted while some bios are still throttled, they record how many 113a880ae93SYu Kuai * bytes/ios are waited already in previous configuration, and they will 114a880ae93SYu Kuai * be used to calculate wait time under new configuration. 115a880ae93SYu Kuai */ 116ef100397SYu Kuai long long carryover_bytes[2]; 117ef100397SYu Kuai int carryover_ios[2]; 118a880ae93SYu Kuai 119a7b36ee6SJens Axboe unsigned long last_check_time; 120a7b36ee6SJens Axboe 121a7b36ee6SJens Axboe /* When did we start a new slice */ 122a7b36ee6SJens Axboe unsigned long slice_start[2]; 123a7b36ee6SJens Axboe unsigned long slice_end[2]; 124a7b36ee6SJens Axboe 125a7b36ee6SJens Axboe struct blkg_rwstat stat_bytes; 126a7b36ee6SJens Axboe struct blkg_rwstat stat_ios; 127a7b36ee6SJens Axboe }; 128a7b36ee6SJens Axboe 129a7b36ee6SJens Axboe extern struct blkcg_policy blkcg_policy_throtl; 130a7b36ee6SJens Axboe 131a7b36ee6SJens Axboe static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) 132a7b36ee6SJens Axboe { 133a7b36ee6SJens Axboe return pd ? container_of(pd, struct throtl_grp, pd) : NULL; 134a7b36ee6SJens Axboe } 135a7b36ee6SJens Axboe 136a7b36ee6SJens Axboe static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) 137a7b36ee6SJens Axboe { 138a7b36ee6SJens Axboe return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); 139a7b36ee6SJens Axboe } 140a7b36ee6SJens Axboe 141a7b36ee6SJens Axboe /* 142a7b36ee6SJens Axboe * Internal throttling interface 143a7b36ee6SJens Axboe */ 144a7b36ee6SJens Axboe #ifndef CONFIG_BLK_DEV_THROTTLING 145e13793baSChristoph Hellwig static inline void blk_throtl_exit(struct gendisk *disk) { } 146a7b36ee6SJens Axboe static inline bool blk_throtl_bio(struct bio *bio) { return false; } 147cad9266aSChristoph Hellwig static inline void blk_throtl_cancel_bios(struct gendisk *disk) { } 148a7b36ee6SJens Axboe #else /* CONFIG_BLK_DEV_THROTTLING */ 149e13793baSChristoph Hellwig void blk_throtl_exit(struct gendisk *disk); 150a7b36ee6SJens Axboe bool __blk_throtl_bio(struct bio *bio); 151cad9266aSChristoph Hellwig void blk_throtl_cancel_bios(struct gendisk *disk); 15281c7a63aSYu Kuai 153a3166c51SYu Kuai static inline bool blk_throtl_activated(struct request_queue *q) 154a3166c51SYu Kuai { 155a3166c51SYu Kuai return q->td != NULL; 156a3166c51SYu Kuai } 157a3166c51SYu Kuai 15881c7a63aSYu Kuai static inline bool blk_should_throtl(struct bio *bio) 159a7b36ee6SJens Axboe { 160a3166c51SYu Kuai struct throtl_grp *tg; 16181c7a63aSYu Kuai int rw = bio_data_dir(bio); 162a7b36ee6SJens Axboe 163a3166c51SYu Kuai /* 164a3166c51SYu Kuai * This is called under bio_queue_enter(), and it's synchronized with 165a3166c51SYu Kuai * the activation of blk-throtl, which is protected by 166a3166c51SYu Kuai * blk_mq_freeze_queue(). 167a3166c51SYu Kuai */ 168a3166c51SYu Kuai if (!blk_throtl_activated(bio->bi_bdev->bd_queue)) 169a3166c51SYu Kuai return false; 170a3166c51SYu Kuai 171a3166c51SYu Kuai tg = blkg_to_tg(bio->bi_blkg); 172ad7c3b41SJinke Han if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) { 173ad7c3b41SJinke Han if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { 174ad7c3b41SJinke Han bio_set_flag(bio, BIO_CGROUP_ACCT); 175ad7c3b41SJinke Han blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf, 176ad7c3b41SJinke Han bio->bi_iter.bi_size); 177ad7c3b41SJinke Han } 178ad7c3b41SJinke Han blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1); 179ad7c3b41SJinke Han } 180ad7c3b41SJinke Han 18181c7a63aSYu Kuai /* iops limit is always counted */ 18281c7a63aSYu Kuai if (tg->has_rules_iops[rw]) 18381c7a63aSYu Kuai return true; 18481c7a63aSYu Kuai 18581c7a63aSYu Kuai if (tg->has_rules_bps[rw] && !bio_flagged(bio, BIO_BPS_THROTTLED)) 18681c7a63aSYu Kuai return true; 18781c7a63aSYu Kuai 18881c7a63aSYu Kuai return false; 18981c7a63aSYu Kuai } 19081c7a63aSYu Kuai 19181c7a63aSYu Kuai static inline bool blk_throtl_bio(struct bio *bio) 19281c7a63aSYu Kuai { 19381c7a63aSYu Kuai 19481c7a63aSYu Kuai if (!blk_should_throtl(bio)) 195a7b36ee6SJens Axboe return false; 196a7b36ee6SJens Axboe 197a7b36ee6SJens Axboe return __blk_throtl_bio(bio); 198a7b36ee6SJens Axboe } 199a7b36ee6SJens Axboe #endif /* CONFIG_BLK_DEV_THROTTLING */ 200a7b36ee6SJens Axboe 201a7b36ee6SJens Axboe #endif 202