1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13
14 /* Holds rate nodes associated with an E-Switch. */
15 struct mlx5_qos_domain {
16 /* Serializes access to all qos changes in the qos domain. */
17 struct mutex lock;
18 /* List of all mlx5_esw_sched_nodes. */
19 struct list_head nodes;
20 };
21
esw_qos_lock(struct mlx5_eswitch * esw)22 static void esw_qos_lock(struct mlx5_eswitch *esw)
23 {
24 mutex_lock(&esw->qos.domain->lock);
25 }
26
esw_qos_unlock(struct mlx5_eswitch * esw)27 static void esw_qos_unlock(struct mlx5_eswitch *esw)
28 {
29 mutex_unlock(&esw->qos.domain->lock);
30 }
31
esw_assert_qos_lock_held(struct mlx5_eswitch * esw)32 static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw)
33 {
34 lockdep_assert_held(&esw->qos.domain->lock);
35 }
36
esw_qos_domain_alloc(void)37 static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
38 {
39 struct mlx5_qos_domain *qos_domain;
40
41 qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL);
42 if (!qos_domain)
43 return NULL;
44
45 mutex_init(&qos_domain->lock);
46 INIT_LIST_HEAD(&qos_domain->nodes);
47
48 return qos_domain;
49 }
50
esw_qos_domain_init(struct mlx5_eswitch * esw)51 static int esw_qos_domain_init(struct mlx5_eswitch *esw)
52 {
53 esw->qos.domain = esw_qos_domain_alloc();
54
55 return esw->qos.domain ? 0 : -ENOMEM;
56 }
57
esw_qos_domain_release(struct mlx5_eswitch * esw)58 static void esw_qos_domain_release(struct mlx5_eswitch *esw)
59 {
60 kfree(esw->qos.domain);
61 esw->qos.domain = NULL;
62 }
63
64 enum sched_node_type {
65 SCHED_NODE_TYPE_VPORTS_TSAR,
66 SCHED_NODE_TYPE_VPORT,
67 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
68 SCHED_NODE_TYPE_RATE_LIMITER,
69 SCHED_NODE_TYPE_VPORT_TC,
70 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
71 };
72
73 static const char * const sched_node_type_str[] = {
74 [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
75 [SCHED_NODE_TYPE_VPORT] = "vport",
76 [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR",
77 [SCHED_NODE_TYPE_RATE_LIMITER] = "Rate Limiter",
78 [SCHED_NODE_TYPE_VPORT_TC] = "vport TC",
79 [SCHED_NODE_TYPE_VPORTS_TC_TSAR] = "vports TC TSAR",
80 };
81
82 struct mlx5_esw_sched_node {
83 u32 ix;
84 /* Bandwidth parameters. */
85 u32 max_rate;
86 u32 min_rate;
87 /* A computed value indicating relative min_rate between node's children. */
88 u32 bw_share;
89 /* The parent node in the rate hierarchy. */
90 struct mlx5_esw_sched_node *parent;
91 /* Entry in the parent node's children list. */
92 struct list_head entry;
93 /* The type of this node in the rate hierarchy. */
94 enum sched_node_type type;
95 /* The eswitch this node belongs to. */
96 struct mlx5_eswitch *esw;
97 /* The children nodes of this node, empty list for leaf nodes. */
98 struct list_head children;
99 /* Valid only if this node is associated with a vport. */
100 struct mlx5_vport *vport;
101 /* Level in the hierarchy. The root node level is 1. */
102 u8 level;
103 /* Valid only when this node represents a traffic class. */
104 u8 tc;
105 };
106
esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node * node)107 static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
108 {
109 if (!node->parent) {
110 /* Root children are assigned a depth level of 2. */
111 node->level = 2;
112 list_add_tail(&node->entry, &node->esw->qos.domain->nodes);
113 } else {
114 node->level = node->parent->level + 1;
115 list_add_tail(&node->entry, &node->parent->children);
116 }
117 }
118
esw_qos_num_tcs(struct mlx5_core_dev * dev)119 static int esw_qos_num_tcs(struct mlx5_core_dev *dev)
120 {
121 int num_tcs = mlx5_max_tc(dev) + 1;
122
123 return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX;
124 }
125
126 static void
esw_qos_node_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent)127 esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
128 {
129 list_del_init(&node->entry);
130 node->parent = parent;
131 if (parent)
132 node->esw = parent->esw;
133 esw_qos_node_attach_to_parent(node);
134 }
135
esw_qos_nodes_set_parent(struct list_head * nodes,struct mlx5_esw_sched_node * parent)136 static void esw_qos_nodes_set_parent(struct list_head *nodes,
137 struct mlx5_esw_sched_node *parent)
138 {
139 struct mlx5_esw_sched_node *node, *tmp;
140
141 list_for_each_entry_safe(node, tmp, nodes, entry) {
142 esw_qos_node_set_parent(node, parent);
143 if (!list_empty(&node->children) &&
144 parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
145 struct mlx5_esw_sched_node *child;
146
147 list_for_each_entry(child, &node->children, entry) {
148 struct mlx5_vport *vport = child->vport;
149
150 if (vport)
151 vport->qos.sched_node->parent = parent;
152 }
153 }
154 }
155 }
156
mlx5_esw_qos_vport_qos_free(struct mlx5_vport * vport)157 void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
158 {
159 if (vport->qos.sched_nodes) {
160 int num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
161 int i;
162
163 for (i = 0; i < num_tcs; i++)
164 kfree(vport->qos.sched_nodes[i]);
165 kfree(vport->qos.sched_nodes);
166 }
167
168 kfree(vport->qos.sched_node);
169 memset(&vport->qos, 0, sizeof(vport->qos));
170 }
171
mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport * vport)172 u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport)
173 {
174 if (!vport->qos.sched_node)
175 return 0;
176
177 return vport->qos.sched_node->ix;
178 }
179
180 struct mlx5_esw_sched_node *
mlx5_esw_qos_vport_get_parent(const struct mlx5_vport * vport)181 mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
182 {
183 if (!vport->qos.sched_node)
184 return NULL;
185
186 return vport->qos.sched_node->parent;
187 }
188
esw_qos_sched_elem_warn(struct mlx5_esw_sched_node * node,int err,const char * op)189 static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
190 {
191 switch (node->type) {
192 case SCHED_NODE_TYPE_VPORTS_TC_TSAR:
193 esw_warn(node->esw->dev,
194 "E-Switch %s %s scheduling element failed (tc=%d,err=%d)\n",
195 op, sched_node_type_str[node->type], node->tc, err);
196 break;
197 case SCHED_NODE_TYPE_VPORT_TC:
198 esw_warn(node->esw->dev,
199 "E-Switch %s %s scheduling element failed (vport=%d,tc=%d,err=%d)\n",
200 op,
201 sched_node_type_str[node->type],
202 node->vport->vport, node->tc, err);
203 break;
204 case SCHED_NODE_TYPE_VPORT:
205 esw_warn(node->esw->dev,
206 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
207 op, sched_node_type_str[node->type], node->vport->vport, err);
208 break;
209 case SCHED_NODE_TYPE_RATE_LIMITER:
210 case SCHED_NODE_TYPE_TC_ARBITER_TSAR:
211 case SCHED_NODE_TYPE_VPORTS_TSAR:
212 esw_warn(node->esw->dev,
213 "E-Switch %s %s scheduling element failed (err=%d)\n",
214 op, sched_node_type_str[node->type], err);
215 break;
216 default:
217 esw_warn(node->esw->dev,
218 "E-Switch %s scheduling element failed (err=%d)\n",
219 op, err);
220 break;
221 }
222 }
223
esw_qos_node_create_sched_element(struct mlx5_esw_sched_node * node,void * ctx,struct netlink_ext_ack * extack)224 static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
225 struct netlink_ext_ack *extack)
226 {
227 int err;
228
229 err = mlx5_create_scheduling_element_cmd(node->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx,
230 &node->ix);
231 if (err) {
232 esw_qos_sched_elem_warn(node, err, "create");
233 NL_SET_ERR_MSG_MOD(extack, "E-Switch create scheduling element failed");
234 }
235
236 return err;
237 }
238
esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)239 static int esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node *node,
240 struct netlink_ext_ack *extack)
241 {
242 int err;
243
244 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
245 SCHEDULING_HIERARCHY_E_SWITCH,
246 node->ix);
247 if (err) {
248 esw_qos_sched_elem_warn(node, err, "destroy");
249 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroying scheduling element failed.");
250 }
251
252 return err;
253 }
254
esw_qos_sched_elem_config(struct mlx5_esw_sched_node * node,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)255 static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share,
256 struct netlink_ext_ack *extack)
257 {
258 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
259 struct mlx5_core_dev *dev = node->esw->dev;
260 u32 bitmask = 0;
261 int err;
262
263 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
264 return -EOPNOTSUPP;
265
266 if (bw_share && (!MLX5_CAP_QOS(dev, esw_bw_share) ||
267 MLX5_CAP_QOS(dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE))
268 return -EOPNOTSUPP;
269
270 if (node->max_rate == max_rate && node->bw_share == bw_share)
271 return 0;
272
273 if (node->max_rate != max_rate) {
274 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
275 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
276 }
277 if (node->bw_share != bw_share) {
278 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
279 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
280 }
281
282 err = mlx5_modify_scheduling_element_cmd(dev,
283 SCHEDULING_HIERARCHY_E_SWITCH,
284 sched_ctx,
285 node->ix,
286 bitmask);
287 if (err) {
288 esw_qos_sched_elem_warn(node, err, "modify");
289 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed");
290
291 return err;
292 }
293
294 node->max_rate = max_rate;
295 node->bw_share = bw_share;
296 if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR)
297 trace_mlx5_esw_node_qos_config(dev, node, node->ix, bw_share, max_rate);
298 else if (node->type == SCHED_NODE_TYPE_VPORT)
299 trace_mlx5_esw_vport_qos_config(dev, node->vport, bw_share, max_rate);
300
301 return 0;
302 }
303
esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)304 static int esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node *node,
305 struct netlink_ext_ack *extack)
306 {
307 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
308
309 if (!mlx5_qos_element_type_supported(
310 node->esw->dev,
311 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT,
312 SCHEDULING_HIERARCHY_E_SWITCH))
313 return -EOPNOTSUPP;
314
315 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, node->max_rate);
316 MLX5_SET(scheduling_context, sched_ctx, element_type,
317 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT);
318
319 return esw_qos_node_create_sched_element(node, sched_ctx, extack);
320 }
321
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent)322 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
323 struct mlx5_esw_sched_node *parent)
324 {
325 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
326 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
327 struct mlx5_esw_sched_node *node;
328 u32 max_guarantee = 0;
329
330 /* Find max min_rate across all nodes.
331 * This will correspond to fw_max_bw_share in the final bw_share calculation.
332 */
333 list_for_each_entry(node, nodes, entry) {
334 if (node->esw == esw && node->ix != esw->qos.root_tsar_ix &&
335 node->min_rate > max_guarantee)
336 max_guarantee = node->min_rate;
337 }
338
339 if (max_guarantee)
340 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
341
342 /* If nodes max min_rate divider is 0 but their parent has bw_share
343 * configured, then set bw_share for nodes to minimal value.
344 */
345
346 if (parent && parent->bw_share)
347 return 1;
348
349 /* If the node nodes has min_rate configured, a divider of 0 sets all
350 * nodes' bw_share to 0, effectively disabling min guarantees.
351 */
352 return 0;
353 }
354
esw_qos_calc_bw_share(u32 value,u32 divider,u32 fw_max)355 static u32 esw_qos_calc_bw_share(u32 value, u32 divider, u32 fw_max)
356 {
357 if (!divider)
358 return 0;
359 return min_t(u32, fw_max,
360 max_t(u32,
361 DIV_ROUND_UP(value, divider), MLX5_MIN_BW_SHARE));
362 }
363
esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node * node,u32 divider,struct netlink_ext_ack * extack)364 static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node,
365 u32 divider,
366 struct netlink_ext_ack *extack)
367 {
368 u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share);
369 u32 bw_share;
370
371 bw_share = esw_qos_calc_bw_share(node->min_rate, divider, fw_max_bw_share);
372
373 esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack);
374 }
375
esw_qos_normalize_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)376 static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw,
377 struct mlx5_esw_sched_node *parent,
378 struct netlink_ext_ack *extack)
379 {
380 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
381 u32 divider = esw_qos_calculate_min_rate_divider(esw, parent);
382 struct mlx5_esw_sched_node *node;
383
384 list_for_each_entry(node, nodes, entry) {
385 if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
386 continue;
387
388 /* Vports TC TSARs don't have a minimum rate configured,
389 * so there's no need to update the bw_share on them.
390 */
391 if (node->type != SCHED_NODE_TYPE_VPORTS_TC_TSAR) {
392 esw_qos_update_sched_node_bw_share(node, divider,
393 extack);
394 }
395
396 if (list_empty(&node->children))
397 continue;
398
399 esw_qos_normalize_min_rate(node->esw, node, extack);
400 }
401 }
402
esw_qos_calculate_tc_bw_divider(u32 * tc_bw)403 static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw)
404 {
405 u32 total = 0;
406 int i;
407
408 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++)
409 total += tc_bw[i];
410
411 /* If total is zero, tc-bw config is disabled and we shouldn't reach
412 * here.
413 */
414 return WARN_ON(!total) ? 1 : total;
415 }
416
esw_qos_set_node_min_rate(struct mlx5_esw_sched_node * node,u32 min_rate,struct netlink_ext_ack * extack)417 static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
418 u32 min_rate, struct netlink_ext_ack *extack)
419 {
420 struct mlx5_eswitch *esw = node->esw;
421
422 if (min_rate == node->min_rate)
423 return 0;
424
425 node->min_rate = min_rate;
426 esw_qos_normalize_min_rate(esw, node->parent, extack);
427
428 return 0;
429 }
430
431 static int
esw_qos_create_node_sched_elem(struct mlx5_core_dev * dev,u32 parent_element_id,u32 max_rate,u32 bw_share,u32 * tsar_ix)432 esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id,
433 u32 max_rate, u32 bw_share, u32 *tsar_ix)
434 {
435 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
436 void *attr;
437
438 if (!mlx5_qos_element_type_supported(dev,
439 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
440 SCHEDULING_HIERARCHY_E_SWITCH) ||
441 !mlx5_qos_tsar_type_supported(dev,
442 TSAR_ELEMENT_TSAR_TYPE_DWRR,
443 SCHEDULING_HIERARCHY_E_SWITCH))
444 return -EOPNOTSUPP;
445
446 MLX5_SET(scheduling_context, tsar_ctx, element_type,
447 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
448 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
449 parent_element_id);
450 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate);
451 MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share);
452 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
453 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
454
455 return mlx5_create_scheduling_element_cmd(dev,
456 SCHEDULING_HIERARCHY_E_SWITCH,
457 tsar_ctx,
458 tsar_ix);
459 }
460
461 static int
esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node * vport_node,struct netlink_ext_ack * extack)462 esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
463 struct netlink_ext_ack *extack)
464 {
465 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
466 struct mlx5_core_dev *dev = vport_node->esw->dev;
467 void *attr;
468
469 if (!mlx5_qos_element_type_supported(
470 dev,
471 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT,
472 SCHEDULING_HIERARCHY_E_SWITCH))
473 return -EOPNOTSUPP;
474
475 MLX5_SET(scheduling_context, sched_ctx, element_type,
476 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
477 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
478 MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
479 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
480 vport_node->parent->ix);
481 MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
482 vport_node->max_rate);
483
484 return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack);
485 }
486
487 static int
esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node * vport_tc_node,u32 rate_limit_elem_ix,struct netlink_ext_ack * extack)488 esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node,
489 u32 rate_limit_elem_ix,
490 struct netlink_ext_ack *extack)
491 {
492 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
493 struct mlx5_core_dev *dev = vport_tc_node->esw->dev;
494 void *attr;
495
496 if (!mlx5_qos_element_type_supported(
497 dev,
498 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC,
499 SCHEDULING_HIERARCHY_E_SWITCH))
500 return -EOPNOTSUPP;
501
502 MLX5_SET(scheduling_context, sched_ctx, element_type,
503 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC);
504 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
505 MLX5_SET(vport_tc_element, attr, vport_number,
506 vport_tc_node->vport->vport);
507 MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc);
508 MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id,
509 rate_limit_elem_ix);
510 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
511 vport_tc_node->parent->ix);
512 MLX5_SET(scheduling_context, sched_ctx, bw_share,
513 vport_tc_node->bw_share);
514
515 return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx,
516 extack);
517 }
518
519 static struct mlx5_esw_sched_node *
__esw_qos_alloc_node(struct mlx5_eswitch * esw,u32 tsar_ix,enum sched_node_type type,struct mlx5_esw_sched_node * parent)520 __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type,
521 struct mlx5_esw_sched_node *parent)
522 {
523 struct mlx5_esw_sched_node *node;
524
525 node = kzalloc(sizeof(*node), GFP_KERNEL);
526 if (!node)
527 return NULL;
528
529 node->esw = esw;
530 node->ix = tsar_ix;
531 node->type = type;
532 node->parent = parent;
533 INIT_LIST_HEAD(&node->children);
534 esw_qos_node_attach_to_parent(node);
535 if (!parent) {
536 /* The caller is responsible for inserting the node into the
537 * parent list if necessary. This function can also be used with
538 * a NULL parent, which doesn't necessarily indicate that it
539 * refers to the root scheduling element.
540 */
541 list_del_init(&node->entry);
542 }
543
544 return node;
545 }
546
__esw_qos_free_node(struct mlx5_esw_sched_node * node)547 static void __esw_qos_free_node(struct mlx5_esw_sched_node *node)
548 {
549 list_del(&node->entry);
550 kfree(node);
551 }
552
esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)553 static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
554 {
555 esw_qos_node_destroy_sched_element(node, extack);
556 __esw_qos_free_node(node);
557 }
558
esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node * parent,u8 tc,struct netlink_ext_ack * extack)559 static int esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node *parent,
560 u8 tc, struct netlink_ext_ack *extack)
561 {
562 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
563 struct mlx5_core_dev *dev = parent->esw->dev;
564 struct mlx5_esw_sched_node *vports_tc_node;
565 void *attr;
566 int err;
567
568 if (!mlx5_qos_element_type_supported(
569 dev,
570 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
571 SCHEDULING_HIERARCHY_E_SWITCH) ||
572 !mlx5_qos_tsar_type_supported(dev,
573 TSAR_ELEMENT_TSAR_TYPE_DWRR,
574 SCHEDULING_HIERARCHY_E_SWITCH))
575 return -EOPNOTSUPP;
576
577 vports_tc_node = __esw_qos_alloc_node(parent->esw, 0,
578 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
579 parent);
580 if (!vports_tc_node) {
581 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
582 esw_warn(dev, "Failed to alloc vports TC node (tc=%d)\n", tc);
583 return -ENOMEM;
584 }
585
586 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
587 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
588 MLX5_SET(tsar_element, attr, traffic_class, tc);
589 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent->ix);
590 MLX5_SET(scheduling_context, tsar_ctx, element_type,
591 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
592
593 err = esw_qos_node_create_sched_element(vports_tc_node, tsar_ctx,
594 extack);
595 if (err)
596 goto err_create_sched_element;
597
598 vports_tc_node->tc = tc;
599
600 return 0;
601
602 err_create_sched_element:
603 __esw_qos_free_node(vports_tc_node);
604 return err;
605 }
606
607 static void
esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw)608 esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
609 u32 *tc_bw)
610 {
611 struct mlx5_esw_sched_node *vports_tc_node;
612
613 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry)
614 tc_bw[vports_tc_node->tc] = vports_tc_node->bw_share;
615 }
616
617 static void
esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw,struct netlink_ext_ack * extack)618 esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
619 u32 *tc_bw, struct netlink_ext_ack *extack)
620 {
621 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
622 struct mlx5_esw_sched_node *vports_tc_node;
623 u32 divider, fw_max_bw_share;
624
625 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
626 divider = esw_qos_calculate_tc_bw_divider(tc_bw);
627 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
628 u8 tc = vports_tc_node->tc;
629 u32 bw_share;
630
631 bw_share = tc_bw[tc] * fw_max_bw_share;
632 bw_share = esw_qos_calc_bw_share(bw_share, divider,
633 fw_max_bw_share);
634 esw_qos_sched_elem_config(vports_tc_node, 0, bw_share, extack);
635 }
636 }
637
638 static void
esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)639 esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
640 struct netlink_ext_ack *extack)
641 {
642 struct mlx5_esw_sched_node *vports_tc_node, *tmp;
643
644 list_for_each_entry_safe(vports_tc_node, tmp,
645 &tc_arbiter_node->children, entry)
646 esw_qos_destroy_node(vports_tc_node, extack);
647 }
648
649 static int
esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)650 esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
651 struct netlink_ext_ack *extack)
652 {
653 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
654 int err, i, num_tcs = esw_qos_num_tcs(esw->dev);
655
656 for (i = 0; i < num_tcs; i++) {
657 err = esw_qos_create_vports_tc_node(tc_arbiter_node, i, extack);
658 if (err)
659 goto err_tc_node_create;
660 }
661
662 return 0;
663
664 err_tc_node_create:
665 esw_qos_destroy_vports_tc_nodes(tc_arbiter_node, NULL);
666 return err;
667 }
668
esw_qos_create_tc_arbiter_sched_elem(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)669 static int esw_qos_create_tc_arbiter_sched_elem(
670 struct mlx5_esw_sched_node *tc_arbiter_node,
671 struct netlink_ext_ack *extack)
672 {
673 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
674 u32 tsar_parent_ix;
675 void *attr;
676
677 if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev,
678 TSAR_ELEMENT_TSAR_TYPE_TC_ARB,
679 SCHEDULING_HIERARCHY_E_SWITCH)) {
680 NL_SET_ERR_MSG_MOD(extack,
681 "E-Switch TC Arbiter scheduling element is not supported");
682 return -EOPNOTSUPP;
683 }
684
685 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
686 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB);
687 tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix :
688 tc_arbiter_node->esw->qos.root_tsar_ix;
689 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
690 tsar_parent_ix);
691 MLX5_SET(scheduling_context, tsar_ctx, element_type,
692 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
693 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw,
694 tc_arbiter_node->max_rate);
695 MLX5_SET(scheduling_context, tsar_ctx, bw_share,
696 tc_arbiter_node->bw_share);
697
698 return esw_qos_node_create_sched_element(tc_arbiter_node, tsar_ctx,
699 extack);
700 }
701
702 static struct mlx5_esw_sched_node *
__esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)703 __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent,
704 struct netlink_ext_ack *extack)
705 {
706 struct mlx5_esw_sched_node *node;
707 u32 tsar_ix;
708 int err;
709
710 err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0,
711 0, &tsar_ix);
712 if (err) {
713 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed");
714 return ERR_PTR(err);
715 }
716
717 node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent);
718 if (!node) {
719 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
720 err = -ENOMEM;
721 goto err_alloc_node;
722 }
723
724 list_add_tail(&node->entry, &esw->qos.domain->nodes);
725 esw_qos_normalize_min_rate(esw, NULL, extack);
726 trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
727
728 return node;
729
730 err_alloc_node:
731 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
732 SCHEDULING_HIERARCHY_E_SWITCH,
733 tsar_ix))
734 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed");
735 return ERR_PTR(err);
736 }
737
738 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
739 static void esw_qos_put(struct mlx5_eswitch *esw);
740
741 static struct mlx5_esw_sched_node *
esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)742 esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
743 {
744 struct mlx5_esw_sched_node *node;
745 int err;
746
747 esw_assert_qos_lock_held(esw);
748 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
749 return ERR_PTR(-EOPNOTSUPP);
750
751 err = esw_qos_get(esw, extack);
752 if (err)
753 return ERR_PTR(err);
754
755 node = __esw_qos_create_vports_sched_node(esw, NULL, extack);
756 if (IS_ERR(node))
757 esw_qos_put(esw);
758
759 return node;
760 }
761
__esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)762 static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
763 {
764 struct mlx5_eswitch *esw = node->esw;
765
766 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
767 esw_qos_destroy_vports_tc_nodes(node, extack);
768
769 trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix);
770 esw_qos_destroy_node(node, extack);
771 esw_qos_normalize_min_rate(esw, NULL, extack);
772 }
773
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)774 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
775 {
776 struct mlx5_core_dev *dev = esw->dev;
777 int err;
778
779 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
780 return -EOPNOTSUPP;
781
782 err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0,
783 &esw->qos.root_tsar_ix);
784 if (err) {
785 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
786 return err;
787 }
788
789 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
790 esw->qos.node0 = __esw_qos_create_vports_sched_node(esw, NULL, extack);
791 } else {
792 /* The eswitch doesn't support scheduling nodes.
793 * Create a software-only node0 using the root TSAR to attach vport QoS to.
794 */
795 if (!__esw_qos_alloc_node(esw,
796 esw->qos.root_tsar_ix,
797 SCHED_NODE_TYPE_VPORTS_TSAR,
798 NULL))
799 esw->qos.node0 = ERR_PTR(-ENOMEM);
800 else
801 list_add_tail(&esw->qos.node0->entry,
802 &esw->qos.domain->nodes);
803 }
804 if (IS_ERR(esw->qos.node0)) {
805 err = PTR_ERR(esw->qos.node0);
806 esw_warn(dev, "E-Switch create rate node 0 failed (%d)\n", err);
807 goto err_node0;
808 }
809 refcount_set(&esw->qos.refcnt, 1);
810
811 return 0;
812
813 err_node0:
814 if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
815 esw->qos.root_tsar_ix))
816 esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
817
818 return err;
819 }
820
esw_qos_destroy(struct mlx5_eswitch * esw)821 static void esw_qos_destroy(struct mlx5_eswitch *esw)
822 {
823 int err;
824
825 if (esw->qos.node0->ix != esw->qos.root_tsar_ix)
826 __esw_qos_destroy_node(esw->qos.node0, NULL);
827 else
828 __esw_qos_free_node(esw->qos.node0);
829 esw->qos.node0 = NULL;
830
831 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
832 SCHEDULING_HIERARCHY_E_SWITCH,
833 esw->qos.root_tsar_ix);
834 if (err)
835 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
836 }
837
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)838 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
839 {
840 int err = 0;
841
842 esw_assert_qos_lock_held(esw);
843 if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
844 /* esw_qos_create() set refcount to 1 only on success.
845 * No need to decrement on failure.
846 */
847 err = esw_qos_create(esw, extack);
848 }
849
850 return err;
851 }
852
esw_qos_put(struct mlx5_eswitch * esw)853 static void esw_qos_put(struct mlx5_eswitch *esw)
854 {
855 esw_assert_qos_lock_held(esw);
856 if (refcount_dec_and_test(&esw->qos.refcnt))
857 esw_qos_destroy(esw);
858 }
859
860 static void
esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)861 esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node,
862 struct netlink_ext_ack *extack)
863 {
864 /* Clean up all Vports TC nodes within the TC arbiter node. */
865 esw_qos_destroy_vports_tc_nodes(node, extack);
866 /* Destroy the scheduling element for the TC arbiter node itself. */
867 esw_qos_node_destroy_sched_element(node, extack);
868 }
869
esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)870 static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node,
871 struct netlink_ext_ack *extack)
872 {
873 u32 curr_ix = node->ix;
874 int err;
875
876 err = esw_qos_create_tc_arbiter_sched_elem(node, extack);
877 if (err)
878 return err;
879 /* Initialize the vports TC nodes within created TC arbiter TSAR. */
880 err = esw_qos_create_vports_tc_nodes(node, extack);
881 if (err)
882 goto err_vports_tc_nodes;
883
884 node->type = SCHED_NODE_TYPE_TC_ARBITER_TSAR;
885
886 return 0;
887
888 err_vports_tc_nodes:
889 /* If initialization fails, clean up the scheduling element
890 * for the TC arbiter node.
891 */
892 esw_qos_node_destroy_sched_element(node, NULL);
893 node->ix = curr_ix;
894 return err;
895 }
896
897 static int
esw_qos_create_vport_tc_sched_node(struct mlx5_vport * vport,u32 rate_limit_elem_ix,struct mlx5_esw_sched_node * vports_tc_node,struct netlink_ext_ack * extack)898 esw_qos_create_vport_tc_sched_node(struct mlx5_vport *vport,
899 u32 rate_limit_elem_ix,
900 struct mlx5_esw_sched_node *vports_tc_node,
901 struct netlink_ext_ack *extack)
902 {
903 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
904 struct mlx5_esw_sched_node *vport_tc_node;
905 u8 tc = vports_tc_node->tc;
906 int err;
907
908 vport_tc_node = __esw_qos_alloc_node(vport_node->esw, 0,
909 SCHED_NODE_TYPE_VPORT_TC,
910 vports_tc_node);
911 if (!vport_tc_node)
912 return -ENOMEM;
913
914 vport_tc_node->min_rate = vport_node->min_rate;
915 vport_tc_node->tc = tc;
916 vport_tc_node->vport = vport;
917 err = esw_qos_vport_tc_create_sched_element(vport_tc_node,
918 rate_limit_elem_ix,
919 extack);
920 if (err)
921 goto err_out;
922
923 vport->qos.sched_nodes[tc] = vport_tc_node;
924
925 return 0;
926 err_out:
927 __esw_qos_free_node(vport_tc_node);
928 return err;
929 }
930
931 static void
esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport * vport,struct netlink_ext_ack * extack)932 esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport *vport,
933 struct netlink_ext_ack *extack)
934 {
935 int i, num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
936
937 for (i = 0; i < num_tcs; i++) {
938 if (vport->qos.sched_nodes[i]) {
939 __esw_qos_destroy_node(vport->qos.sched_nodes[i],
940 extack);
941 }
942 }
943
944 kfree(vport->qos.sched_nodes);
945 vport->qos.sched_nodes = NULL;
946 }
947
948 static int
esw_qos_create_vport_tc_sched_elements(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)949 esw_qos_create_vport_tc_sched_elements(struct mlx5_vport *vport,
950 enum sched_node_type type,
951 struct netlink_ext_ack *extack)
952 {
953 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
954 struct mlx5_esw_sched_node *tc_arbiter_node, *vports_tc_node;
955 int err, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
956 u32 rate_limit_elem_ix;
957
958 vport->qos.sched_nodes = kcalloc(num_tcs,
959 sizeof(struct mlx5_esw_sched_node *),
960 GFP_KERNEL);
961 if (!vport->qos.sched_nodes) {
962 NL_SET_ERR_MSG_MOD(extack,
963 "Allocating the vport TC scheduling elements failed.");
964 return -ENOMEM;
965 }
966
967 rate_limit_elem_ix = type == SCHED_NODE_TYPE_RATE_LIMITER ?
968 vport_node->ix : 0;
969 tc_arbiter_node = type == SCHED_NODE_TYPE_RATE_LIMITER ?
970 vport_node->parent : vport_node;
971 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
972 err = esw_qos_create_vport_tc_sched_node(vport,
973 rate_limit_elem_ix,
974 vports_tc_node,
975 extack);
976 if (err)
977 goto err_create_vport_tc;
978 }
979
980 return 0;
981
982 err_create_vport_tc:
983 esw_qos_destroy_vport_tc_sched_elements(vport, NULL);
984
985 return err;
986 }
987
988 static int
esw_qos_vport_tc_enable(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)989 esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type,
990 struct netlink_ext_ack *extack)
991 {
992 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
993 int err, new_level, max_level;
994
995 if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
996 /* Increase the parent's level by 2 to account for both the
997 * TC arbiter and the vports TC scheduling element.
998 */
999 new_level = vport_node->parent->level + 2;
1000 max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev,
1001 log_esw_max_sched_depth);
1002 if (new_level > max_level) {
1003 NL_SET_ERR_MSG_MOD(extack,
1004 "TC arbitration on leafs is not supported beyond max scheduling depth");
1005 return -EOPNOTSUPP;
1006 }
1007 }
1008
1009 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1010
1011 if (type == SCHED_NODE_TYPE_RATE_LIMITER)
1012 err = esw_qos_create_rate_limit_element(vport_node, extack);
1013 else
1014 err = esw_qos_tc_arbiter_scheduling_setup(vport_node, extack);
1015 if (err)
1016 return err;
1017
1018 /* Rate limiters impact multiple nodes not directly connected to them
1019 * and are not direct members of the QoS hierarchy.
1020 * Unlink it from the parent to reflect that.
1021 */
1022 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
1023 list_del_init(&vport_node->entry);
1024 vport_node->level = 0;
1025 }
1026
1027 err = esw_qos_create_vport_tc_sched_elements(vport, type, extack);
1028 if (err)
1029 goto err_sched_nodes;
1030
1031 return 0;
1032
1033 err_sched_nodes:
1034 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
1035 esw_qos_node_destroy_sched_element(vport_node, NULL);
1036 list_add_tail(&vport_node->entry,
1037 &vport_node->parent->children);
1038 vport_node->level = vport_node->parent->level + 1;
1039 } else {
1040 esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL);
1041 }
1042 return err;
1043 }
1044
esw_qos_vport_tc_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1045 static void esw_qos_vport_tc_disable(struct mlx5_vport *vport,
1046 struct netlink_ext_ack *extack)
1047 {
1048 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1049 enum sched_node_type curr_type = vport_node->type;
1050
1051 esw_qos_destroy_vport_tc_sched_elements(vport, extack);
1052
1053 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER)
1054 esw_qos_node_destroy_sched_element(vport_node, extack);
1055 else
1056 esw_qos_tc_arbiter_scheduling_teardown(vport_node, extack);
1057 }
1058
esw_qos_set_vport_tcs_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1059 static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport,
1060 u32 min_rate,
1061 struct netlink_ext_ack *extack)
1062 {
1063 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1064 int err, i, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
1065
1066 for (i = 0; i < num_tcs; i++) {
1067 err = esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1068 min_rate, extack);
1069 if (err)
1070 goto err_out;
1071 }
1072 vport_node->min_rate = min_rate;
1073
1074 return 0;
1075 err_out:
1076 for (--i; i >= 0; i--) {
1077 esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1078 vport_node->min_rate, extack);
1079 }
1080 return err;
1081 }
1082
esw_qos_vport_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1083 static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
1084 {
1085 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1086 struct mlx5_esw_sched_node *parent = vport_node->parent;
1087 enum sched_node_type curr_type = vport_node->type;
1088
1089 if (curr_type == SCHED_NODE_TYPE_VPORT)
1090 esw_qos_node_destroy_sched_element(vport_node, extack);
1091 else
1092 esw_qos_vport_tc_disable(vport, extack);
1093
1094 vport_node->bw_share = 0;
1095 list_del_init(&vport_node->entry);
1096 esw_qos_normalize_min_rate(parent->esw, parent, extack);
1097
1098 trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport);
1099 }
1100
esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1101 static int esw_qos_vport_enable(struct mlx5_vport *vport,
1102 enum sched_node_type type,
1103 struct mlx5_esw_sched_node *parent,
1104 struct netlink_ext_ack *extack)
1105 {
1106 int err;
1107
1108 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1109
1110 esw_qos_node_set_parent(vport->qos.sched_node, parent);
1111 if (type == SCHED_NODE_TYPE_VPORT) {
1112 err = esw_qos_vport_create_sched_element(vport->qos.sched_node,
1113 extack);
1114 } else {
1115 err = esw_qos_vport_tc_enable(vport, type, extack);
1116 }
1117 if (err)
1118 return err;
1119
1120 vport->qos.sched_node->type = type;
1121 esw_qos_normalize_min_rate(parent->esw, parent, extack);
1122 trace_mlx5_esw_vport_qos_create(vport->dev, vport,
1123 vport->qos.sched_node->max_rate,
1124 vport->qos.sched_node->bw_share);
1125
1126 return 0;
1127 }
1128
mlx5_esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,u32 max_rate,u32 min_rate,struct netlink_ext_ack * extack)1129 static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_type type,
1130 struct mlx5_esw_sched_node *parent, u32 max_rate,
1131 u32 min_rate, struct netlink_ext_ack *extack)
1132 {
1133 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1134 struct mlx5_esw_sched_node *sched_node;
1135 int err;
1136
1137 esw_assert_qos_lock_held(esw);
1138 err = esw_qos_get(esw, extack);
1139 if (err)
1140 return err;
1141
1142 parent = parent ?: esw->qos.node0;
1143 sched_node = __esw_qos_alloc_node(parent->esw, 0, type, parent);
1144 if (!sched_node)
1145 return -ENOMEM;
1146
1147 sched_node->max_rate = max_rate;
1148 sched_node->min_rate = min_rate;
1149 sched_node->vport = vport;
1150 vport->qos.sched_node = sched_node;
1151 err = esw_qos_vport_enable(vport, type, parent, extack);
1152 if (err) {
1153 esw_qos_put(esw);
1154 vport->qos.sched_node = NULL;
1155 }
1156
1157 return err;
1158 }
1159
mlx5_esw_qos_vport_disable(struct mlx5_vport * vport)1160 void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
1161 {
1162 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1163 struct mlx5_esw_sched_node *parent;
1164
1165 lockdep_assert_held(&esw->state_lock);
1166 esw_qos_lock(esw);
1167 if (!vport->qos.sched_node)
1168 goto unlock;
1169
1170 parent = vport->qos.sched_node->parent;
1171 WARN(parent != esw->qos.node0, "Disabling QoS on port before detaching it from node");
1172
1173 esw_qos_vport_disable(vport, NULL);
1174 mlx5_esw_qos_vport_qos_free(vport);
1175 esw_qos_put(esw);
1176 unlock:
1177 esw_qos_unlock(esw);
1178 }
1179
mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport * vport,u32 max_rate,struct netlink_ext_ack * extack)1180 static int mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate,
1181 struct netlink_ext_ack *extack)
1182 {
1183 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1184
1185 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1186
1187 if (!vport_node)
1188 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, max_rate, 0,
1189 extack);
1190 else
1191 return esw_qos_sched_elem_config(vport_node, max_rate, vport_node->bw_share,
1192 extack);
1193 }
1194
mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1195 static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate,
1196 struct netlink_ext_ack *extack)
1197 {
1198 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1199
1200 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1201
1202 if (!vport_node)
1203 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate,
1204 extack);
1205 else if (vport_node->type == SCHED_NODE_TYPE_RATE_LIMITER)
1206 return esw_qos_set_vport_tcs_min_rate(vport, min_rate, extack);
1207 else
1208 return esw_qos_set_node_min_rate(vport_node, min_rate, extack);
1209 }
1210
mlx5_esw_qos_set_vport_rate(struct mlx5_vport * vport,u32 max_rate,u32 min_rate)1211 int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate)
1212 {
1213 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1214 int err;
1215
1216 esw_qos_lock(esw);
1217 err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate, NULL);
1218 if (!err)
1219 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate, NULL);
1220 esw_qos_unlock(esw);
1221 return err;
1222 }
1223
mlx5_esw_qos_get_vport_rate(struct mlx5_vport * vport,u32 * max_rate,u32 * min_rate)1224 bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate)
1225 {
1226 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1227 bool enabled;
1228
1229 esw_qos_lock(esw);
1230 enabled = !!vport->qos.sched_node;
1231 if (enabled) {
1232 *max_rate = vport->qos.sched_node->max_rate;
1233 *min_rate = vport->qos.sched_node->min_rate;
1234 }
1235 esw_qos_unlock(esw);
1236 return enabled;
1237 }
1238
esw_qos_vport_tc_check_type(enum sched_node_type curr_type,enum sched_node_type new_type,struct netlink_ext_ack * extack)1239 static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type,
1240 enum sched_node_type new_type,
1241 struct netlink_ext_ack *extack)
1242 {
1243 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR &&
1244 new_type == SCHED_NODE_TYPE_RATE_LIMITER) {
1245 NL_SET_ERR_MSG_MOD(extack,
1246 "Cannot switch from vport-level TC arbitration to node-level TC arbitration");
1247 return -EOPNOTSUPP;
1248 }
1249
1250 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER &&
1251 new_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1252 NL_SET_ERR_MSG_MOD(extack,
1253 "Cannot switch from node-level TC arbitration to vport-level TC arbitration");
1254 return -EOPNOTSUPP;
1255 }
1256
1257 return 0;
1258 }
1259
esw_qos_vport_update(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1260 static int esw_qos_vport_update(struct mlx5_vport *vport,
1261 enum sched_node_type type,
1262 struct mlx5_esw_sched_node *parent,
1263 struct netlink_ext_ack *extack)
1264 {
1265 struct mlx5_esw_sched_node *curr_parent = vport->qos.sched_node->parent;
1266 enum sched_node_type curr_type = vport->qos.sched_node->type;
1267 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
1268 int err;
1269
1270 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1271 parent = parent ?: curr_parent;
1272 if (curr_type == type && curr_parent == parent)
1273 return 0;
1274
1275 err = esw_qos_vport_tc_check_type(curr_type, type, extack);
1276 if (err)
1277 return err;
1278
1279 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
1280 esw_qos_tc_arbiter_get_bw_shares(vport->qos.sched_node,
1281 curr_tc_bw);
1282 }
1283
1284 esw_qos_vport_disable(vport, extack);
1285
1286 err = esw_qos_vport_enable(vport, type, parent, extack);
1287 if (err) {
1288 esw_qos_vport_enable(vport, curr_type, curr_parent, NULL);
1289 extack = NULL;
1290 }
1291
1292 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
1293 esw_qos_set_tc_arbiter_bw_shares(vport->qos.sched_node,
1294 curr_tc_bw, extack);
1295 }
1296
1297 return err;
1298 }
1299
esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1300 static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1301 struct netlink_ext_ack *extack)
1302 {
1303 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1304 struct mlx5_esw_sched_node *curr_parent;
1305 enum sched_node_type type;
1306
1307 esw_assert_qos_lock_held(esw);
1308 curr_parent = vport->qos.sched_node->parent;
1309 parent = parent ?: esw->qos.node0;
1310 if (curr_parent == parent)
1311 return 0;
1312
1313 /* Set vport QoS type based on parent node type if different from
1314 * default QoS; otherwise, use the vport's current QoS type.
1315 */
1316 if (parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1317 type = SCHED_NODE_TYPE_RATE_LIMITER;
1318 else if (curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1319 type = SCHED_NODE_TYPE_VPORT;
1320 else
1321 type = vport->qos.sched_node->type;
1322
1323 return esw_qos_vport_update(vport, type, parent, extack);
1324 }
1325
1326 static void
esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1327 esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node,
1328 struct mlx5_esw_sched_node *node,
1329 struct netlink_ext_ack *extack)
1330 {
1331 struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp;
1332
1333 vports_tc_node = list_first_entry(&tc_arbiter_node->children,
1334 struct mlx5_esw_sched_node,
1335 entry);
1336
1337 list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children,
1338 entry)
1339 esw_qos_vport_update_parent(vport_tc_node->vport, node, extack);
1340 }
1341
esw_qos_switch_tc_arbiter_node_to_vports(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1342 static int esw_qos_switch_tc_arbiter_node_to_vports(
1343 struct mlx5_esw_sched_node *tc_arbiter_node,
1344 struct mlx5_esw_sched_node *node,
1345 struct netlink_ext_ack *extack)
1346 {
1347 u32 parent_tsar_ix = node->parent ?
1348 node->parent->ix : node->esw->qos.root_tsar_ix;
1349 int err;
1350
1351 err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix,
1352 node->max_rate, node->bw_share,
1353 &node->ix);
1354 if (err) {
1355 NL_SET_ERR_MSG_MOD(extack,
1356 "Failed to create scheduling element for vports node when disabling vports TC QoS");
1357 return err;
1358 }
1359
1360 node->type = SCHED_NODE_TYPE_VPORTS_TSAR;
1361
1362 /* Disable TC QoS for vports in the arbiter node. */
1363 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack);
1364
1365 return 0;
1366 }
1367
esw_qos_switch_vports_node_to_tc_arbiter(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)1368 static int esw_qos_switch_vports_node_to_tc_arbiter(
1369 struct mlx5_esw_sched_node *node,
1370 struct mlx5_esw_sched_node *tc_arbiter_node,
1371 struct netlink_ext_ack *extack)
1372 {
1373 struct mlx5_esw_sched_node *vport_node, *tmp;
1374 struct mlx5_vport *vport;
1375 int err;
1376
1377 /* Enable TC QoS for each vport in the node. */
1378 list_for_each_entry_safe(vport_node, tmp, &node->children, entry) {
1379 vport = vport_node->vport;
1380 err = esw_qos_vport_update_parent(vport, tc_arbiter_node,
1381 extack);
1382 if (err)
1383 goto err_out;
1384 }
1385
1386 /* Destroy the current vports node TSAR. */
1387 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
1388 SCHEDULING_HIERARCHY_E_SWITCH,
1389 node->ix);
1390 if (err)
1391 goto err_out;
1392
1393 return 0;
1394 err_out:
1395 /* Restore vports back into the node if an error occurs. */
1396 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL);
1397
1398 return err;
1399 }
1400
1401 static struct mlx5_esw_sched_node *
esw_qos_move_node(struct mlx5_esw_sched_node * curr_node)1402 esw_qos_move_node(struct mlx5_esw_sched_node *curr_node)
1403 {
1404 struct mlx5_esw_sched_node *new_node;
1405
1406 new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix,
1407 curr_node->type, NULL);
1408 if (!new_node)
1409 return ERR_PTR(-ENOMEM);
1410
1411 esw_qos_nodes_set_parent(&curr_node->children, new_node);
1412 return new_node;
1413 }
1414
esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1415 static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node,
1416 struct netlink_ext_ack *extack)
1417 {
1418 struct mlx5_esw_sched_node *curr_node;
1419 int err;
1420
1421 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1422 return 0;
1423
1424 /* Allocate a new rate node to hold the current state, which will allow
1425 * for restoring the vports back to this node after disabling TC
1426 * arbitration.
1427 */
1428 curr_node = esw_qos_move_node(node);
1429 if (IS_ERR(curr_node)) {
1430 NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node");
1431 return PTR_ERR(curr_node);
1432 }
1433
1434 /* Disable TC QoS for all vports, and assign them back to the node. */
1435 err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack);
1436 if (err)
1437 goto err_out;
1438
1439 /* Clean up the TC arbiter node after disabling TC QoS for vports. */
1440 esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack);
1441 goto out;
1442 err_out:
1443 esw_qos_nodes_set_parent(&curr_node->children, node);
1444 out:
1445 __esw_qos_free_node(curr_node);
1446 return err;
1447 }
1448
esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1449 static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node,
1450 struct netlink_ext_ack *extack)
1451 {
1452 struct mlx5_esw_sched_node *curr_node, *child;
1453 int err, new_level, max_level;
1454
1455 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1456 return 0;
1457
1458 /* Increase the hierarchy level by one to account for the additional
1459 * vports TC scheduling node, and verify that the new level does not
1460 * exceed the maximum allowed depth.
1461 */
1462 new_level = node->level + 1;
1463 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1464 if (new_level > max_level) {
1465 NL_SET_ERR_MSG_MOD(extack,
1466 "TC arbitration on nodes is not supported beyond max scheduling depth");
1467 return -EOPNOTSUPP;
1468 }
1469
1470 /* Ensure the node does not contain non-leaf children before assigning
1471 * TC bandwidth.
1472 */
1473 if (!list_empty(&node->children)) {
1474 list_for_each_entry(child, &node->children, entry) {
1475 if (!child->vport) {
1476 NL_SET_ERR_MSG_MOD(extack,
1477 "Cannot configure TC bandwidth on a node with non-leaf children");
1478 return -EOPNOTSUPP;
1479 }
1480 }
1481 }
1482
1483 /* Allocate a new node that will store the information of the current
1484 * node. This will be used later to restore the node if necessary.
1485 */
1486 curr_node = esw_qos_move_node(node);
1487 if (IS_ERR(curr_node)) {
1488 NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS");
1489 return PTR_ERR(curr_node);
1490 }
1491
1492 /* Initialize the TC arbiter node for QoS management.
1493 * This step prepares the node for handling Traffic Class arbitration.
1494 */
1495 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
1496 if (err)
1497 goto err_setup;
1498
1499 /* Enable TC QoS for each vport within the current node. */
1500 err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack);
1501 if (err)
1502 goto err_switch_vports;
1503 goto out;
1504
1505 err_switch_vports:
1506 esw_qos_tc_arbiter_scheduling_teardown(node, NULL);
1507 node->ix = curr_node->ix;
1508 node->type = curr_node->type;
1509 err_setup:
1510 esw_qos_nodes_set_parent(&curr_node->children, node);
1511 out:
1512 __esw_qos_free_node(curr_node);
1513 return err;
1514 }
1515
mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev * mdev)1516 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
1517 {
1518 struct ethtool_link_ksettings lksettings;
1519 struct net_device *slave, *master;
1520 u32 speed = SPEED_UNKNOWN;
1521
1522 /* Lock ensures a stable reference to master and slave netdevice
1523 * while port speed of master is queried.
1524 */
1525 ASSERT_RTNL();
1526
1527 slave = mlx5_uplink_netdev_get(mdev);
1528 if (!slave)
1529 goto out;
1530
1531 master = netdev_master_upper_dev_get(slave);
1532 if (master && !__ethtool_get_link_ksettings(master, &lksettings))
1533 speed = lksettings.base.speed;
1534
1535 out:
1536 return speed;
1537 }
1538
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool hold_rtnl_lock,struct netlink_ext_ack * extack)1539 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
1540 bool hold_rtnl_lock, struct netlink_ext_ack *extack)
1541 {
1542 int err;
1543
1544 if (!mlx5_lag_is_active(mdev))
1545 goto skip_lag;
1546
1547 if (hold_rtnl_lock)
1548 rtnl_lock();
1549
1550 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
1551
1552 if (hold_rtnl_lock)
1553 rtnl_unlock();
1554
1555 if (*link_speed_max != (u32)SPEED_UNKNOWN)
1556 return 0;
1557
1558 skip_lag:
1559 err = mlx5_port_max_linkspeed(mdev, link_speed_max);
1560 if (err)
1561 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
1562
1563 return err;
1564 }
1565
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)1566 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
1567 const char *name, u32 link_speed_max,
1568 u64 value, struct netlink_ext_ack *extack)
1569 {
1570 if (value > link_speed_max) {
1571 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
1572 name, value, link_speed_max);
1573 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
1574 return -EINVAL;
1575 }
1576
1577 return 0;
1578 }
1579
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)1580 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
1581 {
1582 struct mlx5_vport *vport;
1583 u32 link_speed_max;
1584 int err;
1585
1586 vport = mlx5_eswitch_get_vport(esw, vport_num);
1587 if (IS_ERR(vport))
1588 return PTR_ERR(vport);
1589
1590 if (rate_mbps) {
1591 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
1592 if (err)
1593 return err;
1594
1595 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
1596 link_speed_max, rate_mbps, NULL);
1597 if (err)
1598 return err;
1599 }
1600
1601 esw_qos_lock(esw);
1602 err = mlx5_esw_qos_set_vport_max_rate(vport, rate_mbps, NULL);
1603 esw_qos_unlock(esw);
1604
1605 return err;
1606 }
1607
1608 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
1609
1610 /* Converts bytes per second value passed in a pointer into megabits per
1611 * second, rewriting last. If converted rate exceed link speed or is not a
1612 * fraction of Mbps - returns error.
1613 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)1614 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
1615 u64 *rate, struct netlink_ext_ack *extack)
1616 {
1617 u32 link_speed_max, remainder;
1618 u64 value;
1619 int err;
1620
1621 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
1622 if (remainder) {
1623 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
1624 name, *rate);
1625 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
1626 return -EINVAL;
1627 }
1628
1629 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
1630 if (err)
1631 return err;
1632
1633 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
1634 if (err)
1635 return err;
1636
1637 *rate = value;
1638 return 0;
1639 }
1640
esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch * esw,u32 * tc_bw)1641 static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
1642 u32 *tc_bw)
1643 {
1644 int i, num_tcs = esw_qos_num_tcs(esw->dev);
1645
1646 for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) {
1647 if (tc_bw[i])
1648 return false;
1649 }
1650
1651 return true;
1652 }
1653
esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport * vport,u32 * tc_bw)1654 static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
1655 u32 *tc_bw)
1656 {
1657 struct mlx5_eswitch *esw = vport->qos.sched_node ?
1658 vport->qos.sched_node->parent->esw :
1659 vport->dev->priv.eswitch;
1660
1661 return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
1662 }
1663
esw_qos_tc_bw_disabled(u32 * tc_bw)1664 static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
1665 {
1666 int i;
1667
1668 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
1669 if (tc_bw[i])
1670 return false;
1671 }
1672
1673 return true;
1674 }
1675
mlx5_esw_qos_init(struct mlx5_eswitch * esw)1676 int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
1677 {
1678 if (esw->qos.domain)
1679 return 0; /* Nothing to change. */
1680
1681 return esw_qos_domain_init(esw);
1682 }
1683
mlx5_esw_qos_cleanup(struct mlx5_eswitch * esw)1684 void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
1685 {
1686 if (esw->qos.domain)
1687 esw_qos_domain_release(esw);
1688 }
1689
1690 /* Eswitch devlink rate API */
1691
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1692 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
1693 u64 tx_share, struct netlink_ext_ack *extack)
1694 {
1695 struct mlx5_vport *vport = priv;
1696 struct mlx5_eswitch *esw;
1697 int err;
1698
1699 esw = vport->dev->priv.eswitch;
1700 if (!mlx5_esw_allowed(esw))
1701 return -EPERM;
1702
1703 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
1704 if (err)
1705 return err;
1706
1707 esw_qos_lock(esw);
1708 err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack);
1709 esw_qos_unlock(esw);
1710 return err;
1711 }
1712
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1713 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
1714 u64 tx_max, struct netlink_ext_ack *extack)
1715 {
1716 struct mlx5_vport *vport = priv;
1717 struct mlx5_eswitch *esw;
1718 int err;
1719
1720 esw = vport->dev->priv.eswitch;
1721 if (!mlx5_esw_allowed(esw))
1722 return -EPERM;
1723
1724 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
1725 if (err)
1726 return err;
1727
1728 esw_qos_lock(esw);
1729 err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack);
1730 esw_qos_unlock(esw);
1731 return err;
1732 }
1733
mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate * rate_leaf,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1734 int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf,
1735 void *priv,
1736 u32 *tc_bw,
1737 struct netlink_ext_ack *extack)
1738 {
1739 struct mlx5_esw_sched_node *vport_node;
1740 struct mlx5_vport *vport = priv;
1741 struct mlx5_eswitch *esw;
1742 bool disable;
1743 int err = 0;
1744
1745 esw = vport->dev->priv.eswitch;
1746 if (!mlx5_esw_allowed(esw))
1747 return -EPERM;
1748
1749 disable = esw_qos_tc_bw_disabled(tc_bw);
1750 esw_qos_lock(esw);
1751
1752 if (!esw_qos_vport_validate_unsupported_tc_bw(vport, tc_bw)) {
1753 NL_SET_ERR_MSG_MOD(extack,
1754 "E-Switch traffic classes number is not supported");
1755 err = -EOPNOTSUPP;
1756 goto unlock;
1757 }
1758
1759 vport_node = vport->qos.sched_node;
1760 if (disable && !vport_node)
1761 goto unlock;
1762
1763 if (disable) {
1764 if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1765 err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT,
1766 NULL, extack);
1767 goto unlock;
1768 }
1769
1770 if (!vport_node) {
1771 err = mlx5_esw_qos_vport_enable(vport,
1772 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1773 NULL, 0, 0, extack);
1774 vport_node = vport->qos.sched_node;
1775 } else {
1776 err = esw_qos_vport_update(vport,
1777 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1778 NULL, extack);
1779 }
1780 if (!err)
1781 esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack);
1782 unlock:
1783 esw_qos_unlock(esw);
1784 return err;
1785 }
1786
mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate * rate_node,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1787 int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node,
1788 void *priv,
1789 u32 *tc_bw,
1790 struct netlink_ext_ack *extack)
1791 {
1792 struct mlx5_esw_sched_node *node = priv;
1793 struct mlx5_eswitch *esw = node->esw;
1794 bool disable;
1795 int err;
1796
1797 if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) {
1798 NL_SET_ERR_MSG_MOD(extack,
1799 "E-Switch traffic classes number is not supported");
1800 return -EOPNOTSUPP;
1801 }
1802
1803 disable = esw_qos_tc_bw_disabled(tc_bw);
1804 esw_qos_lock(esw);
1805 if (disable) {
1806 err = esw_qos_node_disable_tc_arbitration(node, extack);
1807 goto unlock;
1808 }
1809
1810 err = esw_qos_node_enable_tc_arbitration(node, extack);
1811 if (!err)
1812 esw_qos_set_tc_arbiter_bw_shares(node, tc_bw, extack);
1813 unlock:
1814 esw_qos_unlock(esw);
1815 return err;
1816 }
1817
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1818 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
1819 u64 tx_share, struct netlink_ext_ack *extack)
1820 {
1821 struct mlx5_esw_sched_node *node = priv;
1822 struct mlx5_eswitch *esw = node->esw;
1823 int err;
1824
1825 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack);
1826 if (err)
1827 return err;
1828
1829 esw_qos_lock(esw);
1830 err = esw_qos_set_node_min_rate(node, tx_share, extack);
1831 esw_qos_unlock(esw);
1832 return err;
1833 }
1834
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1835 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
1836 u64 tx_max, struct netlink_ext_ack *extack)
1837 {
1838 struct mlx5_esw_sched_node *node = priv;
1839 struct mlx5_eswitch *esw = node->esw;
1840 int err;
1841
1842 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack);
1843 if (err)
1844 return err;
1845
1846 esw_qos_lock(esw);
1847 err = esw_qos_sched_elem_config(node, tx_max, node->bw_share, extack);
1848 esw_qos_unlock(esw);
1849 return err;
1850 }
1851
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)1852 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
1853 struct netlink_ext_ack *extack)
1854 {
1855 struct mlx5_esw_sched_node *node;
1856 struct mlx5_eswitch *esw;
1857 int err = 0;
1858
1859 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
1860 if (IS_ERR(esw))
1861 return PTR_ERR(esw);
1862
1863 esw_qos_lock(esw);
1864 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
1865 NL_SET_ERR_MSG_MOD(extack,
1866 "Rate node creation supported only in switchdev mode");
1867 err = -EOPNOTSUPP;
1868 goto unlock;
1869 }
1870
1871 node = esw_qos_create_vports_sched_node(esw, extack);
1872 if (IS_ERR(node)) {
1873 err = PTR_ERR(node);
1874 goto unlock;
1875 }
1876
1877 *priv = node;
1878 unlock:
1879 esw_qos_unlock(esw);
1880 return err;
1881 }
1882
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)1883 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
1884 struct netlink_ext_ack *extack)
1885 {
1886 struct mlx5_esw_sched_node *node = priv;
1887 struct mlx5_eswitch *esw = node->esw;
1888
1889 esw_qos_lock(esw);
1890 __esw_qos_destroy_node(node, extack);
1891 esw_qos_put(esw);
1892 esw_qos_unlock(esw);
1893 return 0;
1894 }
1895
mlx5_esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1896 int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1897 struct netlink_ext_ack *extack)
1898 {
1899 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1900 int err = 0;
1901
1902 if (parent && parent->esw != esw) {
1903 NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
1904 return -EOPNOTSUPP;
1905 }
1906
1907 esw_qos_lock(esw);
1908 if (!vport->qos.sched_node && parent) {
1909 enum sched_node_type type;
1910
1911 type = parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR ?
1912 SCHED_NODE_TYPE_RATE_LIMITER : SCHED_NODE_TYPE_VPORT;
1913 err = mlx5_esw_qos_vport_enable(vport, type, parent, 0, 0,
1914 extack);
1915 } else if (vport->qos.sched_node) {
1916 err = esw_qos_vport_update_parent(vport, parent, extack);
1917 }
1918 esw_qos_unlock(esw);
1919 return err;
1920 }
1921
mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)1922 int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate,
1923 struct devlink_rate *parent,
1924 void *priv, void *parent_priv,
1925 struct netlink_ext_ack *extack)
1926 {
1927 struct mlx5_esw_sched_node *node;
1928 struct mlx5_vport *vport = priv;
1929
1930 if (!parent)
1931 return mlx5_esw_qos_vport_update_parent(vport, NULL, extack);
1932
1933 node = parent_priv;
1934 return mlx5_esw_qos_vport_update_parent(vport, node, extack);
1935 }
1936
esw_qos_is_node_empty(struct mlx5_esw_sched_node * node)1937 static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node)
1938 {
1939 if (list_empty(&node->children))
1940 return true;
1941
1942 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1943 return false;
1944
1945 node = list_first_entry(&node->children, struct mlx5_esw_sched_node,
1946 entry);
1947
1948 return esw_qos_is_node_empty(node);
1949 }
1950
1951 static int
mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1952 mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node,
1953 struct mlx5_esw_sched_node *parent,
1954 struct netlink_ext_ack *extack)
1955 {
1956 u8 new_level, max_level;
1957
1958 if (parent && parent->esw != node->esw) {
1959 NL_SET_ERR_MSG_MOD(extack,
1960 "Cannot assign node to another E-Switch");
1961 return -EOPNOTSUPP;
1962 }
1963
1964 if (!esw_qos_is_node_empty(node)) {
1965 NL_SET_ERR_MSG_MOD(extack,
1966 "Cannot reassign a node that contains rate objects");
1967 return -EOPNOTSUPP;
1968 }
1969
1970 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1971 NL_SET_ERR_MSG_MOD(extack,
1972 "Cannot attach a node to a parent with TC bandwidth configured");
1973 return -EOPNOTSUPP;
1974 }
1975
1976 new_level = parent ? parent->level + 1 : 2;
1977 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1978 /* Increase by one to account for the vports TC scheduling
1979 * element.
1980 */
1981 new_level += 1;
1982 }
1983
1984 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1985 if (new_level > max_level) {
1986 NL_SET_ERR_MSG_MOD(extack,
1987 "Node hierarchy depth exceeds the maximum supported level");
1988 return -EOPNOTSUPP;
1989 }
1990
1991 return 0;
1992 }
1993
1994 static int
esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1995 esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node *node,
1996 struct mlx5_esw_sched_node *parent,
1997 struct netlink_ext_ack *extack)
1998 {
1999 struct mlx5_esw_sched_node *curr_parent = node->parent;
2000 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
2001 struct mlx5_eswitch *esw = node->esw;
2002 int err;
2003
2004 esw_qos_tc_arbiter_get_bw_shares(node, curr_tc_bw);
2005 esw_qos_tc_arbiter_scheduling_teardown(node, extack);
2006 esw_qos_node_set_parent(node, parent);
2007 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
2008 if (err) {
2009 esw_qos_node_set_parent(node, curr_parent);
2010 if (esw_qos_tc_arbiter_scheduling_setup(node, extack)) {
2011 esw_warn(esw->dev, "Node restore QoS failed\n");
2012 return err;
2013 }
2014 }
2015 esw_qos_set_tc_arbiter_bw_shares(node, curr_tc_bw, extack);
2016
2017 return err;
2018 }
2019
esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2020 static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node,
2021 struct mlx5_esw_sched_node *parent,
2022 struct netlink_ext_ack *extack)
2023 {
2024 struct mlx5_esw_sched_node *curr_parent = node->parent;
2025 struct mlx5_eswitch *esw = node->esw;
2026 u32 parent_ix;
2027 int err;
2028
2029 parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix;
2030 mlx5_destroy_scheduling_element_cmd(esw->dev,
2031 SCHEDULING_HIERARCHY_E_SWITCH,
2032 node->ix);
2033 err = esw_qos_create_node_sched_elem(esw->dev, parent_ix,
2034 node->max_rate, 0, &node->ix);
2035 if (err) {
2036 NL_SET_ERR_MSG_MOD(extack,
2037 "Failed to create a node under the new hierarchy.");
2038 if (esw_qos_create_node_sched_elem(esw->dev, curr_parent->ix,
2039 node->max_rate,
2040 node->bw_share,
2041 &node->ix))
2042 esw_warn(esw->dev, "Node restore QoS failed\n");
2043
2044 return err;
2045 }
2046 esw_qos_node_set_parent(node, parent);
2047 node->bw_share = 0;
2048
2049 return 0;
2050 }
2051
mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2052 static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node,
2053 struct mlx5_esw_sched_node *parent,
2054 struct netlink_ext_ack *extack)
2055 {
2056 struct mlx5_esw_sched_node *curr_parent;
2057 struct mlx5_eswitch *esw = node->esw;
2058 int err;
2059
2060 err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack);
2061 if (err)
2062 return err;
2063
2064 esw_qos_lock(esw);
2065 curr_parent = node->parent;
2066 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
2067 err = esw_qos_tc_arbiter_node_update_parent(node, parent,
2068 extack);
2069 } else {
2070 err = esw_qos_vports_node_update_parent(node, parent, extack);
2071 }
2072
2073 if (err)
2074 goto out;
2075
2076 esw_qos_normalize_min_rate(esw, curr_parent, extack);
2077 esw_qos_normalize_min_rate(esw, parent, extack);
2078
2079 out:
2080 esw_qos_unlock(esw);
2081
2082 return err;
2083 }
2084
mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)2085 int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate,
2086 struct devlink_rate *parent,
2087 void *priv, void *parent_priv,
2088 struct netlink_ext_ack *extack)
2089 {
2090 struct mlx5_esw_sched_node *node = priv, *parent_node;
2091
2092 if (!parent)
2093 return mlx5_esw_qos_node_update_parent(node, NULL, extack);
2094
2095 parent_node = parent_priv;
2096 return mlx5_esw_qos_node_update_parent(node, parent_node, extack);
2097 }
2098