1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13
14 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
15 min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
16
17 struct mlx5_esw_rate_group {
18 u32 tsar_ix;
19 u32 max_rate;
20 u32 min_rate;
21 u32 bw_share;
22 struct list_head list;
23 };
24
esw_qos_tsar_config(struct mlx5_core_dev * dev,u32 * sched_ctx,u32 tsar_ix,u32 max_rate,u32 bw_share)25 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
26 u32 tsar_ix, u32 max_rate, u32 bw_share)
27 {
28 u32 bitmask = 0;
29
30 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
31 return -EOPNOTSUPP;
32
33 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
34 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
35 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
36 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
37
38 return mlx5_modify_scheduling_element_cmd(dev,
39 SCHEDULING_HIERARCHY_E_SWITCH,
40 sched_ctx,
41 tsar_ix,
42 bitmask);
43 }
44
esw_qos_group_config(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)45 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
46 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
47 {
48 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
49 struct mlx5_core_dev *dev = esw->dev;
50 int err;
51
52 err = esw_qos_tsar_config(dev, sched_ctx,
53 group->tsar_ix,
54 max_rate, bw_share);
55 if (err)
56 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
57
58 trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
59
60 return err;
61 }
62
esw_qos_vport_config(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)63 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
64 struct mlx5_vport *vport,
65 u32 max_rate, u32 bw_share,
66 struct netlink_ext_ack *extack)
67 {
68 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
69 struct mlx5_core_dev *dev = esw->dev;
70 int err;
71
72 if (!vport->qos.enabled)
73 return -EIO;
74
75 err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
76 max_rate, bw_share);
77 if (err) {
78 esw_warn(esw->dev,
79 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
80 vport->vport, err);
81 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
82 return err;
83 }
84
85 trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
86
87 return 0;
88 }
89
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,bool group_level)90 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
91 struct mlx5_esw_rate_group *group,
92 bool group_level)
93 {
94 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
95 struct mlx5_vport *evport;
96 u32 max_guarantee = 0;
97 unsigned long i;
98
99 if (group_level) {
100 struct mlx5_esw_rate_group *group;
101
102 list_for_each_entry(group, &esw->qos.groups, list) {
103 if (group->min_rate < max_guarantee)
104 continue;
105 max_guarantee = group->min_rate;
106 }
107 } else {
108 mlx5_esw_for_each_vport(esw, i, evport) {
109 if (!evport->enabled || !evport->qos.enabled ||
110 evport->qos.group != group || evport->qos.min_rate < max_guarantee)
111 continue;
112 max_guarantee = evport->qos.min_rate;
113 }
114 }
115
116 if (max_guarantee)
117 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
118
119 /* If vports min rate divider is 0 but their group has bw_share configured, then
120 * need to set bw_share for vports to minimal value.
121 */
122 if (!group_level && !max_guarantee && group && group->bw_share)
123 return 1;
124 return 0;
125 }
126
esw_qos_calc_bw_share(u32 min_rate,u32 divider,u32 fw_max)127 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
128 {
129 if (divider)
130 return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
131
132 return 0;
133 }
134
esw_qos_normalize_vports_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)135 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
136 struct mlx5_esw_rate_group *group,
137 struct netlink_ext_ack *extack)
138 {
139 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
140 u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
141 struct mlx5_vport *evport;
142 unsigned long i;
143 u32 bw_share;
144 int err;
145
146 mlx5_esw_for_each_vport(esw, i, evport) {
147 if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
148 continue;
149 bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
150
151 if (bw_share == evport->qos.bw_share)
152 continue;
153
154 err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
155 if (err)
156 return err;
157
158 evport->qos.bw_share = bw_share;
159 }
160
161 return 0;
162 }
163
esw_qos_normalize_groups_min_rate(struct mlx5_eswitch * esw,u32 divider,struct netlink_ext_ack * extack)164 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
165 struct netlink_ext_ack *extack)
166 {
167 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
168 struct mlx5_esw_rate_group *group;
169 u32 bw_share;
170 int err;
171
172 list_for_each_entry(group, &esw->qos.groups, list) {
173 bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
174
175 if (bw_share == group->bw_share)
176 continue;
177
178 err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
179 if (err)
180 return err;
181
182 group->bw_share = bw_share;
183
184 /* All the group's vports need to be set with default bw_share
185 * to enable them with QOS
186 */
187 err = esw_qos_normalize_vports_min_rate(esw, group, extack);
188
189 if (err)
190 return err;
191 }
192
193 return 0;
194 }
195
esw_qos_set_vport_min_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 min_rate,struct netlink_ext_ack * extack)196 static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
197 u32 min_rate, struct netlink_ext_ack *extack)
198 {
199 u32 fw_max_bw_share, previous_min_rate;
200 bool min_rate_supported;
201 int err;
202
203 lockdep_assert_held(&esw->state_lock);
204 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
205 min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
206 fw_max_bw_share >= MLX5_MIN_BW_SHARE;
207 if (min_rate && !min_rate_supported)
208 return -EOPNOTSUPP;
209 if (min_rate == evport->qos.min_rate)
210 return 0;
211
212 previous_min_rate = evport->qos.min_rate;
213 evport->qos.min_rate = min_rate;
214 err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
215 if (err)
216 evport->qos.min_rate = previous_min_rate;
217
218 return err;
219 }
220
esw_qos_set_vport_max_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 max_rate,struct netlink_ext_ack * extack)221 static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
222 u32 max_rate, struct netlink_ext_ack *extack)
223 {
224 u32 act_max_rate = max_rate;
225 bool max_rate_supported;
226 int err;
227
228 lockdep_assert_held(&esw->state_lock);
229 max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
230
231 if (max_rate && !max_rate_supported)
232 return -EOPNOTSUPP;
233 if (max_rate == evport->qos.max_rate)
234 return 0;
235
236 /* If parent group has rate limit need to set to group
237 * value when new max rate is 0.
238 */
239 if (evport->qos.group && !max_rate)
240 act_max_rate = evport->qos.group->max_rate;
241
242 err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
243
244 if (!err)
245 evport->qos.max_rate = max_rate;
246
247 return err;
248 }
249
esw_qos_set_group_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 min_rate,struct netlink_ext_ack * extack)250 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
251 u32 min_rate, struct netlink_ext_ack *extack)
252 {
253 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
254 struct mlx5_core_dev *dev = esw->dev;
255 u32 previous_min_rate, divider;
256 int err;
257
258 if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
259 return -EOPNOTSUPP;
260
261 if (min_rate == group->min_rate)
262 return 0;
263
264 previous_min_rate = group->min_rate;
265 group->min_rate = min_rate;
266 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
267 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
268 if (err) {
269 group->min_rate = previous_min_rate;
270 NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
271
272 /* Attempt restoring previous configuration */
273 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
274 if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
275 NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
276 }
277
278 return err;
279 }
280
esw_qos_set_group_max_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,struct netlink_ext_ack * extack)281 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
282 struct mlx5_esw_rate_group *group,
283 u32 max_rate, struct netlink_ext_ack *extack)
284 {
285 struct mlx5_vport *vport;
286 unsigned long i;
287 int err;
288
289 if (group->max_rate == max_rate)
290 return 0;
291
292 err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
293 if (err)
294 return err;
295
296 group->max_rate = max_rate;
297
298 /* Any unlimited vports in the group should be set
299 * with the value of the group.
300 */
301 mlx5_esw_for_each_vport(esw, i, vport) {
302 if (!vport->enabled || !vport->qos.enabled ||
303 vport->qos.group != group || vport->qos.max_rate)
304 continue;
305
306 err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
307 if (err)
308 NL_SET_ERR_MSG_MOD(extack,
309 "E-Switch vport implicit rate limit setting failed");
310 }
311
312 return err;
313 }
314
esw_qos_vport_create_sched_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share)315 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
316 struct mlx5_vport *vport,
317 u32 max_rate, u32 bw_share)
318 {
319 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
320 struct mlx5_esw_rate_group *group = vport->qos.group;
321 struct mlx5_core_dev *dev = esw->dev;
322 u32 parent_tsar_ix;
323 void *vport_elem;
324 int err;
325
326 parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
327 MLX5_SET(scheduling_context, sched_ctx, element_type,
328 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
329 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
330 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
331 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
332 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
333 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
334
335 err = mlx5_create_scheduling_element_cmd(dev,
336 SCHEDULING_HIERARCHY_E_SWITCH,
337 sched_ctx,
338 &vport->qos.esw_tsar_ix);
339 if (err) {
340 esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
341 vport->vport, err);
342 return err;
343 }
344
345 return 0;
346 }
347
esw_qos_update_group_scheduling_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * curr_group,struct mlx5_esw_rate_group * new_group,struct netlink_ext_ack * extack)348 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
349 struct mlx5_vport *vport,
350 struct mlx5_esw_rate_group *curr_group,
351 struct mlx5_esw_rate_group *new_group,
352 struct netlink_ext_ack *extack)
353 {
354 u32 max_rate;
355 int err;
356
357 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
358 SCHEDULING_HIERARCHY_E_SWITCH,
359 vport->qos.esw_tsar_ix);
360 if (err) {
361 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
362 return err;
363 }
364
365 vport->qos.group = new_group;
366 max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
367
368 /* If vport is unlimited, we set the group's value.
369 * Therefore, if the group is limited it will apply to
370 * the vport as well and if not, vport will remain unlimited.
371 */
372 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
373 if (err) {
374 NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
375 goto err_sched;
376 }
377
378 return 0;
379
380 err_sched:
381 vport->qos.group = curr_group;
382 max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
383 if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
384 esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
385 vport->vport);
386
387 return err;
388 }
389
esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)390 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
391 struct mlx5_vport *vport,
392 struct mlx5_esw_rate_group *group,
393 struct netlink_ext_ack *extack)
394 {
395 struct mlx5_esw_rate_group *new_group, *curr_group;
396 int err;
397
398 if (!vport->enabled)
399 return -EINVAL;
400
401 curr_group = vport->qos.group;
402 new_group = group ?: esw->qos.group0;
403 if (curr_group == new_group)
404 return 0;
405
406 err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
407 if (err)
408 return err;
409
410 /* Recalculate bw share weights of old and new groups */
411 if (vport->qos.bw_share || new_group->bw_share) {
412 esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
413 esw_qos_normalize_vports_min_rate(esw, new_group, extack);
414 }
415
416 return 0;
417 }
418
419 static struct mlx5_esw_rate_group *
__esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)420 __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
421 {
422 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
423 struct mlx5_esw_rate_group *group;
424 u32 divider;
425 int err;
426
427 group = kzalloc(sizeof(*group), GFP_KERNEL);
428 if (!group)
429 return ERR_PTR(-ENOMEM);
430
431 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
432 esw->qos.root_tsar_ix);
433 err = mlx5_create_scheduling_element_cmd(esw->dev,
434 SCHEDULING_HIERARCHY_E_SWITCH,
435 tsar_ctx,
436 &group->tsar_ix);
437 if (err) {
438 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
439 goto err_sched_elem;
440 }
441
442 list_add_tail(&group->list, &esw->qos.groups);
443
444 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
445 if (divider) {
446 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
447 if (err) {
448 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
449 goto err_min_rate;
450 }
451 }
452 trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
453
454 return group;
455
456 err_min_rate:
457 list_del(&group->list);
458 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
459 SCHEDULING_HIERARCHY_E_SWITCH,
460 group->tsar_ix))
461 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
462 err_sched_elem:
463 kfree(group);
464 return ERR_PTR(err);
465 }
466
467 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
468 static void esw_qos_put(struct mlx5_eswitch *esw);
469
470 static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)471 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
472 {
473 struct mlx5_esw_rate_group *group;
474 int err;
475
476 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
477 return ERR_PTR(-EOPNOTSUPP);
478
479 err = esw_qos_get(esw, extack);
480 if (err)
481 return ERR_PTR(err);
482
483 group = __esw_qos_create_rate_group(esw, extack);
484 if (IS_ERR(group))
485 esw_qos_put(esw);
486
487 return group;
488 }
489
__esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)490 static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
491 struct mlx5_esw_rate_group *group,
492 struct netlink_ext_ack *extack)
493 {
494 u32 divider;
495 int err;
496
497 list_del(&group->list);
498
499 divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
500 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
501 if (err)
502 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
503
504 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
505 SCHEDULING_HIERARCHY_E_SWITCH,
506 group->tsar_ix);
507 if (err)
508 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
509
510 trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
511
512 kfree(group);
513
514 return err;
515 }
516
esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)517 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
518 struct mlx5_esw_rate_group *group,
519 struct netlink_ext_ack *extack)
520 {
521 int err;
522
523 err = __esw_qos_destroy_rate_group(esw, group, extack);
524 esw_qos_put(esw);
525
526 return err;
527 }
528
esw_qos_element_type_supported(struct mlx5_core_dev * dev,int type)529 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
530 {
531 switch (type) {
532 case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
533 return MLX5_CAP_QOS(dev, esw_element_type) &
534 ELEMENT_TYPE_CAP_MASK_TASR;
535 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
536 return MLX5_CAP_QOS(dev, esw_element_type) &
537 ELEMENT_TYPE_CAP_MASK_VPORT;
538 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
539 return MLX5_CAP_QOS(dev, esw_element_type) &
540 ELEMENT_TYPE_CAP_MASK_VPORT_TC;
541 case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
542 return MLX5_CAP_QOS(dev, esw_element_type) &
543 ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
544 }
545 return false;
546 }
547
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)548 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
549 {
550 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
551 struct mlx5_core_dev *dev = esw->dev;
552 __be32 *attr;
553 int err;
554
555 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
556 return -EOPNOTSUPP;
557
558 if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
559 return -EOPNOTSUPP;
560
561 MLX5_SET(scheduling_context, tsar_ctx, element_type,
562 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
563
564 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
565 *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
566
567 err = mlx5_create_scheduling_element_cmd(dev,
568 SCHEDULING_HIERARCHY_E_SWITCH,
569 tsar_ctx,
570 &esw->qos.root_tsar_ix);
571 if (err) {
572 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
573 return err;
574 }
575
576 INIT_LIST_HEAD(&esw->qos.groups);
577 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
578 esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
579 if (IS_ERR(esw->qos.group0)) {
580 esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
581 PTR_ERR(esw->qos.group0));
582 err = PTR_ERR(esw->qos.group0);
583 goto err_group0;
584 }
585 }
586 refcount_set(&esw->qos.refcnt, 1);
587
588 return 0;
589
590 err_group0:
591 if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
592 esw->qos.root_tsar_ix))
593 esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
594
595 return err;
596 }
597
esw_qos_destroy(struct mlx5_eswitch * esw)598 static void esw_qos_destroy(struct mlx5_eswitch *esw)
599 {
600 int err;
601
602 if (esw->qos.group0)
603 __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
604
605 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
606 SCHEDULING_HIERARCHY_E_SWITCH,
607 esw->qos.root_tsar_ix);
608 if (err)
609 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
610 }
611
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)612 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
613 {
614 int err = 0;
615
616 lockdep_assert_held(&esw->state_lock);
617
618 if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
619 /* esw_qos_create() set refcount to 1 only on success.
620 * No need to decrement on failure.
621 */
622 err = esw_qos_create(esw, extack);
623 }
624
625 return err;
626 }
627
esw_qos_put(struct mlx5_eswitch * esw)628 static void esw_qos_put(struct mlx5_eswitch *esw)
629 {
630 lockdep_assert_held(&esw->state_lock);
631 if (refcount_dec_and_test(&esw->qos.refcnt))
632 esw_qos_destroy(esw);
633 }
634
esw_qos_vport_enable(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)635 static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
636 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
637 {
638 int err;
639
640 lockdep_assert_held(&esw->state_lock);
641 if (vport->qos.enabled)
642 return 0;
643
644 err = esw_qos_get(esw, extack);
645 if (err)
646 return err;
647
648 vport->qos.group = esw->qos.group0;
649
650 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
651 if (err)
652 goto err_out;
653
654 vport->qos.enabled = true;
655 trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
656
657 return 0;
658
659 err_out:
660 esw_qos_put(esw);
661
662 return err;
663 }
664
mlx5_esw_qos_vport_disable(struct mlx5_eswitch * esw,struct mlx5_vport * vport)665 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
666 {
667 int err;
668
669 lockdep_assert_held(&esw->state_lock);
670 if (!vport->qos.enabled)
671 return;
672 WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
673 "Disabling QoS on port before detaching it from group");
674
675 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
676 SCHEDULING_HIERARCHY_E_SWITCH,
677 vport->qos.esw_tsar_ix);
678 if (err)
679 esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
680 vport->vport, err);
681
682 memset(&vport->qos, 0, sizeof(vport->qos));
683 trace_mlx5_esw_vport_qos_destroy(vport);
684
685 esw_qos_put(esw);
686 }
687
mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 min_rate)688 int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
689 u32 max_rate, u32 min_rate)
690 {
691 int err;
692
693 lockdep_assert_held(&esw->state_lock);
694 err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
695 if (err)
696 return err;
697
698 err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
699 if (!err)
700 err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
701
702 return err;
703 }
704
mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev * mdev)705 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
706 {
707 struct ethtool_link_ksettings lksettings;
708 struct net_device *slave, *master;
709 u32 speed = SPEED_UNKNOWN;
710
711 /* Lock ensures a stable reference to master and slave netdevice
712 * while port speed of master is queried.
713 */
714 ASSERT_RTNL();
715
716 slave = mlx5_uplink_netdev_get(mdev);
717 if (!slave)
718 goto out;
719
720 master = netdev_master_upper_dev_get(slave);
721 if (master && !__ethtool_get_link_ksettings(master, &lksettings))
722 speed = lksettings.base.speed;
723
724 out:
725 return speed;
726 }
727
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool hold_rtnl_lock,struct netlink_ext_ack * extack)728 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
729 bool hold_rtnl_lock, struct netlink_ext_ack *extack)
730 {
731 int err;
732
733 if (!mlx5_lag_is_active(mdev))
734 goto skip_lag;
735
736 if (hold_rtnl_lock)
737 rtnl_lock();
738
739 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
740
741 if (hold_rtnl_lock)
742 rtnl_unlock();
743
744 if (*link_speed_max != (u32)SPEED_UNKNOWN)
745 return 0;
746
747 skip_lag:
748 err = mlx5_port_max_linkspeed(mdev, link_speed_max);
749 if (err)
750 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
751
752 return err;
753 }
754
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)755 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
756 const char *name, u32 link_speed_max,
757 u64 value, struct netlink_ext_ack *extack)
758 {
759 if (value > link_speed_max) {
760 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
761 name, value, link_speed_max);
762 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
763 return -EINVAL;
764 }
765
766 return 0;
767 }
768
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)769 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
770 {
771 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
772 struct mlx5_vport *vport;
773 u32 link_speed_max;
774 u32 bitmask;
775 int err;
776
777 vport = mlx5_eswitch_get_vport(esw, vport_num);
778 if (IS_ERR(vport))
779 return PTR_ERR(vport);
780
781 if (rate_mbps) {
782 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
783 if (err)
784 return err;
785
786 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
787 link_speed_max, rate_mbps, NULL);
788 if (err)
789 return err;
790 }
791
792 mutex_lock(&esw->state_lock);
793 if (!vport->qos.enabled) {
794 /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
795 err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
796 } else {
797 MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
798
799 bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
800 err = mlx5_modify_scheduling_element_cmd(esw->dev,
801 SCHEDULING_HIERARCHY_E_SWITCH,
802 ctx,
803 vport->qos.esw_tsar_ix,
804 bitmask);
805 }
806 mutex_unlock(&esw->state_lock);
807
808 return err;
809 }
810
811 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
812
813 /* Converts bytes per second value passed in a pointer into megabits per
814 * second, rewriting last. If converted rate exceed link speed or is not a
815 * fraction of Mbps - returns error.
816 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)817 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
818 u64 *rate, struct netlink_ext_ack *extack)
819 {
820 u32 link_speed_max, remainder;
821 u64 value;
822 int err;
823
824 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
825 if (remainder) {
826 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
827 name, *rate);
828 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
829 return -EINVAL;
830 }
831
832 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
833 if (err)
834 return err;
835
836 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
837 if (err)
838 return err;
839
840 *rate = value;
841 return 0;
842 }
843
844 /* Eswitch devlink rate API */
845
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)846 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
847 u64 tx_share, struct netlink_ext_ack *extack)
848 {
849 struct mlx5_vport *vport = priv;
850 struct mlx5_eswitch *esw;
851 int err;
852
853 esw = vport->dev->priv.eswitch;
854 if (!mlx5_esw_allowed(esw))
855 return -EPERM;
856
857 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
858 if (err)
859 return err;
860
861 mutex_lock(&esw->state_lock);
862 err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
863 if (err)
864 goto unlock;
865
866 err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
867 unlock:
868 mutex_unlock(&esw->state_lock);
869 return err;
870 }
871
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)872 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
873 u64 tx_max, struct netlink_ext_ack *extack)
874 {
875 struct mlx5_vport *vport = priv;
876 struct mlx5_eswitch *esw;
877 int err;
878
879 esw = vport->dev->priv.eswitch;
880 if (!mlx5_esw_allowed(esw))
881 return -EPERM;
882
883 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
884 if (err)
885 return err;
886
887 mutex_lock(&esw->state_lock);
888 err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
889 if (err)
890 goto unlock;
891
892 err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
893 unlock:
894 mutex_unlock(&esw->state_lock);
895 return err;
896 }
897
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)898 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
899 u64 tx_share, struct netlink_ext_ack *extack)
900 {
901 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
902 struct mlx5_eswitch *esw = dev->priv.eswitch;
903 struct mlx5_esw_rate_group *group = priv;
904 int err;
905
906 err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
907 if (err)
908 return err;
909
910 mutex_lock(&esw->state_lock);
911 err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
912 mutex_unlock(&esw->state_lock);
913 return err;
914 }
915
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)916 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
917 u64 tx_max, struct netlink_ext_ack *extack)
918 {
919 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
920 struct mlx5_eswitch *esw = dev->priv.eswitch;
921 struct mlx5_esw_rate_group *group = priv;
922 int err;
923
924 err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
925 if (err)
926 return err;
927
928 mutex_lock(&esw->state_lock);
929 err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
930 mutex_unlock(&esw->state_lock);
931 return err;
932 }
933
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)934 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
935 struct netlink_ext_ack *extack)
936 {
937 struct mlx5_esw_rate_group *group;
938 struct mlx5_eswitch *esw;
939 int err = 0;
940
941 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
942 if (IS_ERR(esw))
943 return PTR_ERR(esw);
944
945 mutex_lock(&esw->state_lock);
946 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
947 NL_SET_ERR_MSG_MOD(extack,
948 "Rate node creation supported only in switchdev mode");
949 err = -EOPNOTSUPP;
950 goto unlock;
951 }
952
953 group = esw_qos_create_rate_group(esw, extack);
954 if (IS_ERR(group)) {
955 err = PTR_ERR(group);
956 goto unlock;
957 }
958
959 *priv = group;
960 unlock:
961 mutex_unlock(&esw->state_lock);
962 return err;
963 }
964
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)965 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
966 struct netlink_ext_ack *extack)
967 {
968 struct mlx5_esw_rate_group *group = priv;
969 struct mlx5_eswitch *esw;
970 int err;
971
972 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
973 if (IS_ERR(esw))
974 return PTR_ERR(esw);
975
976 mutex_lock(&esw->state_lock);
977 err = esw_qos_destroy_rate_group(esw, group, extack);
978 mutex_unlock(&esw->state_lock);
979 return err;
980 }
981
mlx5_esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)982 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
983 struct mlx5_vport *vport,
984 struct mlx5_esw_rate_group *group,
985 struct netlink_ext_ack *extack)
986 {
987 int err = 0;
988
989 mutex_lock(&esw->state_lock);
990 if (!vport->qos.enabled && !group)
991 goto unlock;
992
993 err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
994 if (!err)
995 err = esw_qos_vport_update_group(esw, vport, group, extack);
996 unlock:
997 mutex_unlock(&esw->state_lock);
998 return err;
999 }
1000
mlx5_esw_devlink_rate_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)1001 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
1002 struct devlink_rate *parent,
1003 void *priv, void *parent_priv,
1004 struct netlink_ext_ack *extack)
1005 {
1006 struct mlx5_esw_rate_group *group;
1007 struct mlx5_vport *vport = priv;
1008
1009 if (!parent)
1010 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
1011 vport, NULL, extack);
1012
1013 group = parent_priv;
1014 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
1015 }
1016