1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/sch_red.c Random Early Detection queue. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * 7 * Changes: 8 * J Hadi Salim 980914: computation fixes 9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. 10 * J Hadi Salim 980816: ECN support 11 */ 12 13 #include <linux/module.h> 14 #include <linux/types.h> 15 #include <linux/kernel.h> 16 #include <linux/skbuff.h> 17 #include <net/pkt_sched.h> 18 #include <net/pkt_cls.h> 19 #include <net/inet_ecn.h> 20 #include <net/red.h> 21 22 23 /* Parameters, settable by user: 24 ----------------------------- 25 26 limit - bytes (must be > qth_max + burst) 27 28 Hard limit on queue length, should be chosen >qth_max 29 to allow packet bursts. This parameter does not 30 affect the algorithms behaviour and can be chosen 31 arbitrarily high (well, less than ram size) 32 Really, this limit will never be reached 33 if RED works correctly. 34 */ 35 36 struct red_sched_data { 37 u32 limit; /* HARD maximal queue length */ 38 39 unsigned char flags; 40 /* Non-flags in tc_red_qopt.flags. */ 41 unsigned char userbits; 42 43 struct timer_list adapt_timer; 44 struct Qdisc *sch; 45 struct red_parms parms; 46 struct red_vars vars; 47 struct red_stats stats; 48 struct Qdisc *qdisc; 49 struct tcf_qevent qe_early_drop; 50 struct tcf_qevent qe_mark; 51 }; 52 53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP) 54 55 static inline int red_use_ecn(struct red_sched_data *q) 56 { 57 return q->flags & TC_RED_ECN; 58 } 59 60 static inline int red_use_harddrop(struct red_sched_data *q) 61 { 62 return q->flags & TC_RED_HARDDROP; 63 } 64 65 static int red_use_nodrop(struct red_sched_data *q) 66 { 67 return q->flags & TC_RED_NODROP; 68 } 69 70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, 71 struct sk_buff **to_free) 72 { 73 enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_CONGESTED; 74 struct red_sched_data *q = qdisc_priv(sch); 75 struct Qdisc *child = q->qdisc; 76 unsigned int len; 77 int ret; 78 79 q->vars.qavg = red_calc_qavg(&q->parms, 80 &q->vars, 81 child->qstats.backlog); 82 83 if (red_is_idling(&q->vars)) 84 red_end_of_idle_period(&q->vars); 85 86 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) { 87 case RED_DONT_MARK: 88 break; 89 90 case RED_PROB_MARK: 91 qdisc_qstats_overlimit(sch); 92 if (!red_use_ecn(q)) { 93 q->stats.prob_drop++; 94 goto congestion_drop; 95 } 96 97 if (INET_ECN_set_ce(skb)) { 98 q->stats.prob_mark++; 99 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); 100 if (!skb) 101 return NET_XMIT_CN | ret; 102 } else if (!red_use_nodrop(q)) { 103 q->stats.prob_drop++; 104 goto congestion_drop; 105 } 106 107 /* Non-ECT packet in ECN nodrop mode: queue it. */ 108 break; 109 110 case RED_HARD_MARK: 111 reason = SKB_DROP_REASON_QDISC_OVERLIMIT; 112 qdisc_qstats_overlimit(sch); 113 if (red_use_harddrop(q) || !red_use_ecn(q)) { 114 q->stats.forced_drop++; 115 goto congestion_drop; 116 } 117 118 if (INET_ECN_set_ce(skb)) { 119 q->stats.forced_mark++; 120 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); 121 if (!skb) 122 return NET_XMIT_CN | ret; 123 } else if (!red_use_nodrop(q)) { 124 q->stats.forced_drop++; 125 goto congestion_drop; 126 } 127 128 /* Non-ECT packet in ECN nodrop mode: queue it. */ 129 break; 130 } 131 132 len = qdisc_pkt_len(skb); 133 ret = qdisc_enqueue(skb, child, to_free); 134 if (likely(ret == NET_XMIT_SUCCESS)) { 135 sch->qstats.backlog += len; 136 sch->q.qlen++; 137 } else if (net_xmit_drop_count(ret)) { 138 q->stats.pdrop++; 139 qdisc_qstats_drop(sch); 140 } 141 return ret; 142 143 congestion_drop: 144 skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret); 145 if (!skb) 146 return NET_XMIT_CN | ret; 147 148 qdisc_drop_reason(skb, sch, to_free, reason); 149 return NET_XMIT_CN; 150 } 151 152 static struct sk_buff *red_dequeue(struct Qdisc *sch) 153 { 154 struct sk_buff *skb; 155 struct red_sched_data *q = qdisc_priv(sch); 156 struct Qdisc *child = q->qdisc; 157 158 skb = child->dequeue(child); 159 if (skb) { 160 qdisc_bstats_update(sch, skb); 161 qdisc_qstats_backlog_dec(sch, skb); 162 sch->q.qlen--; 163 } else { 164 if (!red_is_idling(&q->vars)) 165 red_start_of_idle_period(&q->vars); 166 } 167 return skb; 168 } 169 170 static struct sk_buff *red_peek(struct Qdisc *sch) 171 { 172 struct red_sched_data *q = qdisc_priv(sch); 173 struct Qdisc *child = q->qdisc; 174 175 return child->ops->peek(child); 176 } 177 178 static void red_reset(struct Qdisc *sch) 179 { 180 struct red_sched_data *q = qdisc_priv(sch); 181 182 qdisc_reset(q->qdisc); 183 red_restart(&q->vars); 184 } 185 186 static int red_offload(struct Qdisc *sch, bool enable) 187 { 188 struct red_sched_data *q = qdisc_priv(sch); 189 struct net_device *dev = qdisc_dev(sch); 190 struct tc_red_qopt_offload opt = { 191 .handle = sch->handle, 192 .parent = sch->parent, 193 }; 194 195 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 196 return -EOPNOTSUPP; 197 198 if (enable) { 199 opt.command = TC_RED_REPLACE; 200 opt.set.min = q->parms.qth_min >> q->parms.Wlog; 201 opt.set.max = q->parms.qth_max >> q->parms.Wlog; 202 opt.set.probability = q->parms.max_P; 203 opt.set.limit = q->limit; 204 opt.set.is_ecn = red_use_ecn(q); 205 opt.set.is_harddrop = red_use_harddrop(q); 206 opt.set.is_nodrop = red_use_nodrop(q); 207 opt.set.qstats = &sch->qstats; 208 } else { 209 opt.command = TC_RED_DESTROY; 210 } 211 212 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); 213 } 214 215 static void red_destroy(struct Qdisc *sch) 216 { 217 struct red_sched_data *q = qdisc_priv(sch); 218 219 tcf_qevent_destroy(&q->qe_mark, sch); 220 tcf_qevent_destroy(&q->qe_early_drop, sch); 221 timer_delete_sync(&q->adapt_timer); 222 red_offload(sch, false); 223 qdisc_put(q->qdisc); 224 } 225 226 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { 227 [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS }, 228 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, 229 [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, 230 [TCA_RED_MAX_P] = { .type = NLA_U32 }, 231 [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS), 232 [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 }, 233 [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 }, 234 }; 235 236 static int __red_change(struct Qdisc *sch, struct nlattr **tb, 237 struct netlink_ext_ack *extack) 238 { 239 struct Qdisc *old_child = NULL, *child = NULL; 240 struct red_sched_data *q = qdisc_priv(sch); 241 struct nla_bitfield32 flags_bf; 242 struct tc_red_qopt *ctl; 243 unsigned char userbits; 244 unsigned char flags; 245 int err; 246 u32 max_P; 247 u8 *stab; 248 249 if (tb[TCA_RED_PARMS] == NULL || 250 tb[TCA_RED_STAB] == NULL) 251 return -EINVAL; 252 253 max_P = nla_get_u32_default(tb[TCA_RED_MAX_P], 0); 254 255 ctl = nla_data(tb[TCA_RED_PARMS]); 256 stab = nla_data(tb[TCA_RED_STAB]); 257 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, 258 ctl->Scell_log, stab)) 259 return -EINVAL; 260 261 err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, 262 tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS, 263 &flags_bf, &userbits, extack); 264 if (err) 265 return err; 266 267 if (ctl->limit > 0) { 268 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit, 269 extack); 270 if (IS_ERR(child)) 271 return PTR_ERR(child); 272 273 /* child is fifo, no need to check for noop_qdisc */ 274 qdisc_hash_add(child, true); 275 } 276 277 sch_tree_lock(sch); 278 279 flags = (q->flags & ~flags_bf.selector) | flags_bf.value; 280 err = red_validate_flags(flags, extack); 281 if (err) 282 goto unlock_out; 283 284 q->flags = flags; 285 q->userbits = userbits; 286 q->limit = ctl->limit; 287 if (child) { 288 qdisc_purge_queue(q->qdisc); 289 old_child = q->qdisc; 290 q->qdisc = child; 291 } 292 293 red_set_parms(&q->parms, 294 ctl->qth_min, ctl->qth_max, ctl->Wlog, 295 ctl->Plog, ctl->Scell_log, 296 stab, 297 max_P); 298 red_set_vars(&q->vars); 299 300 timer_delete(&q->adapt_timer); 301 if (ctl->flags & TC_RED_ADAPTATIVE) 302 mod_timer(&q->adapt_timer, jiffies + HZ/2); 303 304 if (!q->qdisc->q.qlen) 305 red_start_of_idle_period(&q->vars); 306 307 sch_tree_unlock(sch); 308 309 red_offload(sch, true); 310 311 if (old_child) 312 qdisc_put(old_child); 313 return 0; 314 315 unlock_out: 316 sch_tree_unlock(sch); 317 if (child) 318 qdisc_put(child); 319 return err; 320 } 321 322 static inline void red_adaptative_timer(struct timer_list *t) 323 { 324 struct red_sched_data *q = timer_container_of(q, t, adapt_timer); 325 struct Qdisc *sch = q->sch; 326 spinlock_t *root_lock; 327 328 rcu_read_lock(); 329 root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 330 spin_lock(root_lock); 331 red_adaptative_algo(&q->parms, &q->vars); 332 mod_timer(&q->adapt_timer, jiffies + HZ/2); 333 spin_unlock(root_lock); 334 rcu_read_unlock(); 335 } 336 337 static int red_init(struct Qdisc *sch, struct nlattr *opt, 338 struct netlink_ext_ack *extack) 339 { 340 struct red_sched_data *q = qdisc_priv(sch); 341 struct nlattr *tb[TCA_RED_MAX + 1]; 342 int err; 343 344 q->qdisc = &noop_qdisc; 345 q->sch = sch; 346 timer_setup(&q->adapt_timer, red_adaptative_timer, 0); 347 348 if (!opt) 349 return -EINVAL; 350 351 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, 352 extack); 353 if (err < 0) 354 return err; 355 356 err = __red_change(sch, tb, extack); 357 if (err) 358 return err; 359 360 err = tcf_qevent_init(&q->qe_early_drop, sch, 361 FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP, 362 tb[TCA_RED_EARLY_DROP_BLOCK], extack); 363 if (err) 364 return err; 365 366 return tcf_qevent_init(&q->qe_mark, sch, 367 FLOW_BLOCK_BINDER_TYPE_RED_MARK, 368 tb[TCA_RED_MARK_BLOCK], extack); 369 } 370 371 static int red_change(struct Qdisc *sch, struct nlattr *opt, 372 struct netlink_ext_ack *extack) 373 { 374 struct red_sched_data *q = qdisc_priv(sch); 375 struct nlattr *tb[TCA_RED_MAX + 1]; 376 int err; 377 378 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, 379 extack); 380 if (err < 0) 381 return err; 382 383 err = tcf_qevent_validate_change(&q->qe_early_drop, 384 tb[TCA_RED_EARLY_DROP_BLOCK], extack); 385 if (err) 386 return err; 387 388 err = tcf_qevent_validate_change(&q->qe_mark, 389 tb[TCA_RED_MARK_BLOCK], extack); 390 if (err) 391 return err; 392 393 return __red_change(sch, tb, extack); 394 } 395 396 static int red_dump_offload_stats(struct Qdisc *sch) 397 { 398 struct tc_red_qopt_offload hw_stats = { 399 .command = TC_RED_STATS, 400 .handle = sch->handle, 401 .parent = sch->parent, 402 { 403 .stats.bstats = &sch->bstats, 404 .stats.qstats = &sch->qstats, 405 }, 406 }; 407 408 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats); 409 } 410 411 static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 412 { 413 struct red_sched_data *q = qdisc_priv(sch); 414 struct nlattr *opts = NULL; 415 struct tc_red_qopt opt = { 416 .limit = q->limit, 417 .flags = (q->flags & TC_RED_HISTORIC_FLAGS) | 418 q->userbits, 419 .qth_min = q->parms.qth_min >> q->parms.Wlog, 420 .qth_max = q->parms.qth_max >> q->parms.Wlog, 421 .Wlog = q->parms.Wlog, 422 .Plog = q->parms.Plog, 423 .Scell_log = q->parms.Scell_log, 424 }; 425 int err; 426 427 err = red_dump_offload_stats(sch); 428 if (err) 429 goto nla_put_failure; 430 431 opts = nla_nest_start_noflag(skb, TCA_OPTIONS); 432 if (opts == NULL) 433 goto nla_put_failure; 434 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || 435 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) || 436 nla_put_bitfield32(skb, TCA_RED_FLAGS, 437 q->flags, TC_RED_SUPPORTED_FLAGS) || 438 tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) || 439 tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop)) 440 goto nla_put_failure; 441 return nla_nest_end(skb, opts); 442 443 nla_put_failure: 444 nla_nest_cancel(skb, opts); 445 return -EMSGSIZE; 446 } 447 448 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 449 { 450 struct red_sched_data *q = qdisc_priv(sch); 451 struct net_device *dev = qdisc_dev(sch); 452 struct tc_red_xstats st = {0}; 453 454 if (sch->flags & TCQ_F_OFFLOADED) { 455 struct tc_red_qopt_offload hw_stats_request = { 456 .command = TC_RED_XSTATS, 457 .handle = sch->handle, 458 .parent = sch->parent, 459 { 460 .xstats = &q->stats, 461 }, 462 }; 463 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, 464 &hw_stats_request); 465 } 466 st.early = q->stats.prob_drop + q->stats.forced_drop; 467 st.pdrop = q->stats.pdrop; 468 st.marked = q->stats.prob_mark + q->stats.forced_mark; 469 470 return gnet_stats_copy_app(d, &st, sizeof(st)); 471 } 472 473 static int red_dump_class(struct Qdisc *sch, unsigned long cl, 474 struct sk_buff *skb, struct tcmsg *tcm) 475 { 476 struct red_sched_data *q = qdisc_priv(sch); 477 478 tcm->tcm_handle |= TC_H_MIN(1); 479 tcm->tcm_info = q->qdisc->handle; 480 return 0; 481 } 482 483 static void red_graft_offload(struct Qdisc *sch, 484 struct Qdisc *new, struct Qdisc *old, 485 struct netlink_ext_ack *extack) 486 { 487 struct tc_red_qopt_offload graft_offload = { 488 .handle = sch->handle, 489 .parent = sch->parent, 490 .child_handle = new->handle, 491 .command = TC_RED_GRAFT, 492 }; 493 494 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, 495 TC_SETUP_QDISC_RED, &graft_offload, extack); 496 } 497 498 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 499 struct Qdisc **old, struct netlink_ext_ack *extack) 500 { 501 struct red_sched_data *q = qdisc_priv(sch); 502 503 if (new == NULL) 504 new = &noop_qdisc; 505 506 *old = qdisc_replace(sch, new, &q->qdisc); 507 508 red_graft_offload(sch, new, *old, extack); 509 return 0; 510 } 511 512 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg) 513 { 514 struct red_sched_data *q = qdisc_priv(sch); 515 return q->qdisc; 516 } 517 518 static unsigned long red_find(struct Qdisc *sch, u32 classid) 519 { 520 return 1; 521 } 522 523 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) 524 { 525 if (!walker->stop) { 526 tc_qdisc_stats_dump(sch, 1, walker); 527 } 528 } 529 530 static const struct Qdisc_class_ops red_class_ops = { 531 .graft = red_graft, 532 .leaf = red_leaf, 533 .find = red_find, 534 .walk = red_walk, 535 .dump = red_dump_class, 536 }; 537 538 static struct Qdisc_ops red_qdisc_ops __read_mostly = { 539 .id = "red", 540 .priv_size = sizeof(struct red_sched_data), 541 .cl_ops = &red_class_ops, 542 .enqueue = red_enqueue, 543 .dequeue = red_dequeue, 544 .peek = red_peek, 545 .init = red_init, 546 .reset = red_reset, 547 .destroy = red_destroy, 548 .change = red_change, 549 .dump = red_dump, 550 .dump_stats = red_dump_stats, 551 .owner = THIS_MODULE, 552 }; 553 MODULE_ALIAS_NET_SCH("red"); 554 555 static int __init red_module_init(void) 556 { 557 return register_qdisc(&red_qdisc_ops); 558 } 559 560 static void __exit red_module_exit(void) 561 { 562 unregister_qdisc(&red_qdisc_ops); 563 } 564 565 module_init(red_module_init) 566 module_exit(red_module_exit) 567 568 MODULE_LICENSE("GPL"); 569 MODULE_DESCRIPTION("Random Early Detection qdisc"); 570