1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include <linux/mlx5/lag.h>
39 #include "lib/mlx5.h"
40 #include "lib/devcom.h"
41 #include "mlx5_core.h"
42 #include "eswitch.h"
43 #include "esw/acl/ofld.h"
44 #include "lag.h"
45 #include "mp.h"
46 #include "mpesw.h"
47
48
49 /* General purpose, use for short periods of time.
50 * Beware of lock dependencies (preferably, no locks should be acquired
51 * under it).
52 */
53 static DEFINE_SPINLOCK(lag_lock);
54
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)55 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
56 {
57 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
58 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
59
60 if (mode == MLX5_LAG_MODE_MPESW)
61 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
62
63 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
64 }
65
lag_active_port_bits(struct mlx5_lag * ldev)66 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
67 {
68 u8 enabled_ports[MLX5_MAX_PORTS] = {};
69 u8 active_port = 0;
70 int num_enabled;
71 int idx;
72
73 mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
74 &num_enabled);
75 for (idx = 0; idx < num_enabled; idx++)
76 active_port |= BIT_MASK(enabled_ports[idx]);
77
78 return active_port;
79 }
80
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)81 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
82 int mode, unsigned long flags)
83 {
84 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
85 &flags);
86 int port_sel_mode = get_port_sel_mode(mode, flags);
87 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
88 u8 *ports = ldev->v2p_map;
89 int idx0, idx1;
90 void *lag_ctx;
91
92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
95 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
96 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
97
98 if (idx0 < 0 || idx1 < 0)
99 return -EINVAL;
100
101 switch (port_sel_mode) {
102 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
103 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
104 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
105 break;
106 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
107 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
108 break;
109
110 MLX5_SET(lagc, lag_ctx, active_port,
111 lag_active_port_bits(mlx5_lag_dev(dev)));
112 break;
113 default:
114 break;
115 }
116 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
117
118 return mlx5_cmd_exec_in(dev, create_lag, in);
119 }
120
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)121 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
122 u8 *ports)
123 {
124 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
125 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
126 int idx0, idx1;
127
128 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
129 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
130 if (idx0 < 0 || idx1 < 0)
131 return -EINVAL;
132
133 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
134 MLX5_SET(modify_lag_in, in, field_select, 0x1);
135
136 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
137 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
138
139 return mlx5_cmd_exec_in(dev, modify_lag, in);
140 }
141
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)142 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
143 {
144 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
145
146 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
147
148 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
149 }
150 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
151
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)152 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
153 {
154 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
155
156 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
157
158 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
159 }
160 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
161
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)162 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
163 u8 *ports, int *num_disabled)
164 {
165 int i;
166
167 *num_disabled = 0;
168 mlx5_ldev_for_each(i, 0, ldev)
169 if (!tracker->netdev_state[i].tx_enabled ||
170 !tracker->netdev_state[i].link_up)
171 ports[(*num_disabled)++] = i;
172 }
173
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)174 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
175 u8 *ports, int *num_enabled)
176 {
177 int i;
178
179 *num_enabled = 0;
180 mlx5_ldev_for_each(i, 0, ldev)
181 if (tracker->netdev_state[i].tx_enabled &&
182 tracker->netdev_state[i].link_up)
183 ports[(*num_enabled)++] = i;
184
185 if (*num_enabled == 0)
186 mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
187 }
188
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)189 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
190 struct mlx5_lag *ldev,
191 struct lag_tracker *tracker,
192 unsigned long flags)
193 {
194 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
195 u8 enabled_ports[MLX5_MAX_PORTS] = {};
196 int written = 0;
197 int num_enabled;
198 int idx;
199 int err;
200 int i;
201 int j;
202
203 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
204 mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
205 &num_enabled);
206 for (i = 0; i < num_enabled; i++) {
207 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
208 if (err != 3)
209 return;
210 written += err;
211 }
212 buf[written - 2] = 0;
213 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
214 } else {
215 mlx5_ldev_for_each(i, 0, ldev) {
216 for (j = 0; j < ldev->buckets; j++) {
217 idx = i * ldev->buckets + j;
218 err = scnprintf(buf + written, 10,
219 " port %d:%d", i + 1, ldev->v2p_map[idx]);
220 if (err != 9)
221 return;
222 written += err;
223 }
224 }
225 mlx5_core_info(dev, "lag map:%s\n", buf);
226 }
227 }
228
229 static int mlx5_lag_netdev_event(struct notifier_block *this,
230 unsigned long event, void *ptr);
231 static void mlx5_do_bond_work(struct work_struct *work);
232
mlx5_ldev_free(struct kref * ref)233 static void mlx5_ldev_free(struct kref *ref)
234 {
235 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
236 struct lag_func *pf;
237 struct net *net;
238 int i;
239
240 if (ldev->nb.notifier_call) {
241 net = read_pnet(&ldev->net);
242 unregister_netdevice_notifier_net(net, &ldev->nb);
243 }
244
245 mlx5_ldev_for_each(i, 0, ldev) {
246 pf = mlx5_lag_pf(ldev, i);
247 if (pf->port_change_nb.nb.notifier_call) {
248 struct mlx5_nb *nb = &pf->port_change_nb;
249
250 mlx5_eq_notifier_unregister(pf->dev, nb);
251 }
252 xa_erase(&ldev->pfs, i);
253 kfree(pf);
254 }
255 xa_destroy(&ldev->pfs);
256
257 mlx5_lag_mp_cleanup(ldev);
258 cancel_delayed_work_sync(&ldev->bond_work);
259 cancel_work_sync(&ldev->speed_update_work);
260 destroy_workqueue(ldev->wq);
261 mutex_destroy(&ldev->lock);
262 kfree(ldev);
263 }
264
mlx5_ldev_put(struct mlx5_lag * ldev)265 static void mlx5_ldev_put(struct mlx5_lag *ldev)
266 {
267 kref_put(&ldev->ref, mlx5_ldev_free);
268 }
269
mlx5_ldev_get(struct mlx5_lag * ldev)270 static void mlx5_ldev_get(struct mlx5_lag *ldev)
271 {
272 kref_get(&ldev->ref);
273 }
274
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)275 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
276 {
277 struct mlx5_lag *ldev;
278 int err;
279
280 ldev = kzalloc_obj(*ldev);
281 if (!ldev)
282 return NULL;
283
284 ldev->wq = create_singlethread_workqueue("mlx5_lag");
285 if (!ldev->wq) {
286 kfree(ldev);
287 return NULL;
288 }
289
290 kref_init(&ldev->ref);
291 mutex_init(&ldev->lock);
292 xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC);
293 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
294 INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
295
296 ldev->nb.notifier_call = mlx5_lag_netdev_event;
297 write_pnet(&ldev->net, mlx5_core_net(dev));
298 if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
299 ldev->nb.notifier_call = NULL;
300 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
301 }
302 ldev->mode = MLX5_LAG_MODE_NONE;
303
304 err = mlx5_lag_mp_init(ldev);
305 if (err)
306 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
307 err);
308
309 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
310 ldev->buckets = 1;
311
312 return ldev;
313 }
314
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)315 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
316 struct net_device *ndev)
317 {
318 struct lag_func *pf;
319 int i;
320
321 mlx5_ldev_for_each(i, 0, ldev) {
322 pf = mlx5_lag_pf(ldev, i);
323 if (pf->netdev == ndev)
324 return i;
325 }
326
327 return -ENOENT;
328 }
329
mlx5_lag_get_master_idx(struct mlx5_lag * ldev)330 static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev)
331 {
332 unsigned long idx = 0;
333 void *entry;
334
335 if (!ldev)
336 return -ENOENT;
337
338 entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
339 if (!entry)
340 return -ENOENT;
341
342 return (int)idx;
343 }
344
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)345 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
346 {
347 int master_idx, i, num = 0;
348
349 if (!ldev)
350 return -ENOENT;
351
352 master_idx = mlx5_lag_get_master_idx(ldev);
353
354 /* If seq 0 is requested and there's a primary PF, return it */
355 if (master_idx >= 0) {
356 if (seq == 0)
357 return master_idx;
358 num++;
359 }
360
361 mlx5_ldev_for_each(i, 0, ldev) {
362 /* Skip the primary PF in the loop */
363 if (i == master_idx)
364 continue;
365
366 if (num == seq)
367 return i;
368 num++;
369 }
370 return -ENOENT;
371 }
372
373 /* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its
374 * sequence number in the LAG. Master is always 0, others numbered
375 * sequentially starting from 1.
376 */
mlx5_lag_get_dev_seq(struct mlx5_core_dev * dev)377 int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev)
378 {
379 struct mlx5_lag *ldev = mlx5_lag_dev(dev);
380 int master_idx, i, num = 1;
381 struct lag_func *pf;
382
383 if (!ldev)
384 return -ENOENT;
385
386 master_idx = mlx5_lag_get_master_idx(ldev);
387 if (master_idx < 0)
388 return -ENOENT;
389
390 pf = mlx5_lag_pf(ldev, master_idx);
391 if (pf && pf->dev == dev)
392 return 0;
393
394 mlx5_ldev_for_each(i, 0, ldev) {
395 if (i == master_idx)
396 continue;
397 pf = mlx5_lag_pf(ldev, i);
398 if (pf->dev == dev)
399 return num;
400 num++;
401 }
402 return -ENOENT;
403 }
404 EXPORT_SYMBOL(mlx5_lag_get_dev_seq);
405
406 /* Devcom events for LAG master marking */
407 #define LAG_DEVCOM_PAIR (0)
408 #define LAG_DEVCOM_UNPAIR (1)
409
mlx5_lag_mark_master(struct mlx5_lag * ldev)410 static void mlx5_lag_mark_master(struct mlx5_lag *ldev)
411 {
412 int lowest_dev_idx = INT_MAX;
413 struct lag_func *pf;
414 int master_xa_idx = -1;
415 int dev_idx;
416 int i;
417
418 mlx5_ldev_for_each(i, 0, ldev) {
419 pf = mlx5_lag_pf(ldev, i);
420 dev_idx = mlx5_get_dev_index(pf->dev);
421 if (dev_idx < lowest_dev_idx) {
422 lowest_dev_idx = dev_idx;
423 master_xa_idx = i;
424 }
425 }
426
427 if (master_xa_idx >= 0)
428 xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER);
429 }
430
mlx5_lag_clear_master(struct mlx5_lag * ldev)431 static void mlx5_lag_clear_master(struct mlx5_lag *ldev)
432 {
433 unsigned long idx = 0;
434 void *entry;
435
436 entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
437 if (!entry)
438 return;
439
440 xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER);
441 }
442
443 /* Devcom event handler to manage LAG master marking */
mlx5_lag_devcom_event(int event,void * my_data,void * event_data)444 static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data)
445 {
446 struct mlx5_core_dev *dev = my_data;
447 struct mlx5_lag *ldev;
448 int idx;
449
450 ldev = mlx5_lag_dev(dev);
451 if (!ldev)
452 return 0;
453
454 mutex_lock(&ldev->lock);
455 switch (event) {
456 case LAG_DEVCOM_PAIR:
457 /* No need to mark more than once */
458 idx = mlx5_lag_get_master_idx(ldev);
459 if (idx >= 0)
460 break;
461 /* Check if all LAG ports are now registered */
462 if (mlx5_lag_num_devs(ldev) == ldev->ports)
463 mlx5_lag_mark_master(ldev);
464 break;
465
466 case LAG_DEVCOM_UNPAIR:
467 /* Clear master mark when a device is removed */
468 mlx5_lag_clear_master(ldev);
469 break;
470 }
471 mutex_unlock(&ldev->lock);
472 return 0;
473 }
474
mlx5_lag_num_devs(struct mlx5_lag * ldev)475 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
476 {
477 int i, num = 0;
478
479 if (!ldev)
480 return 0;
481
482 mlx5_ldev_for_each(i, 0, ldev) {
483 (void)i;
484 num++;
485 }
486 return num;
487 }
488
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)489 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
490 {
491 struct lag_func *pf;
492 int i, num = 0;
493
494 if (!ldev)
495 return 0;
496
497 mlx5_ldev_for_each(i, 0, ldev) {
498 pf = mlx5_lag_pf(ldev, i);
499 if (pf->netdev)
500 num++;
501 }
502 return num;
503 }
504
__mlx5_lag_is_roce(struct mlx5_lag * ldev)505 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
506 {
507 return ldev->mode == MLX5_LAG_MODE_ROCE;
508 }
509
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)510 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
511 {
512 return ldev->mode == MLX5_LAG_MODE_SRIOV;
513 }
514
515 /* Create a mapping between steering slots and active ports.
516 * As we have ldev->buckets slots per port first assume the native
517 * mapping should be used.
518 * If there are ports that are disabled fill the relevant slots
519 * with mapping that points to active ports.
520 */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)521 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
522 struct mlx5_lag *ldev,
523 u8 buckets,
524 u8 *ports)
525 {
526 int disabled[MLX5_MAX_PORTS] = {};
527 int enabled[MLX5_MAX_PORTS] = {};
528 int disabled_ports_num = 0;
529 int enabled_ports_num = 0;
530 int idx;
531 u32 rand;
532 int i;
533 int j;
534
535 mlx5_ldev_for_each(i, 0, ldev) {
536 if (tracker->netdev_state[i].tx_enabled &&
537 tracker->netdev_state[i].link_up)
538 enabled[enabled_ports_num++] = i;
539 else
540 disabled[disabled_ports_num++] = i;
541 }
542
543 /* Use native mapping by default where each port's buckets
544 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
545 * ports[] values are 1-indexed device indices for FW.
546 */
547 mlx5_ldev_for_each(i, 0, ldev) {
548 for (j = 0; j < buckets; j++) {
549 idx = i * buckets + j;
550 ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1;
551 }
552 }
553
554 /* If all ports are disabled/enabled keep native mapping */
555 if (enabled_ports_num == ldev->ports ||
556 disabled_ports_num == ldev->ports)
557 return;
558
559 /* Go over the disabled ports and for each assign a random active port */
560 for (i = 0; i < disabled_ports_num; i++) {
561 for (j = 0; j < buckets; j++) {
562 int rand_xa_idx;
563
564 get_random_bytes(&rand, 4);
565 rand_xa_idx = enabled[rand % enabled_ports_num];
566 ports[disabled[i] * buckets + j] =
567 mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1;
568 }
569 }
570 }
571
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)572 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
573 {
574 struct lag_func *pf;
575 int i;
576
577 mlx5_ldev_for_each(i, 0, ldev) {
578 pf = mlx5_lag_pf(ldev, i);
579 if (pf->has_drop)
580 return true;
581 }
582 return false;
583 }
584
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)585 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
586 {
587 struct lag_func *pf;
588 int i;
589
590 mlx5_ldev_for_each(i, 0, ldev) {
591 pf = mlx5_lag_pf(ldev, i);
592 if (!pf->has_drop)
593 continue;
594
595 mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch,
596 MLX5_VPORT_UPLINK);
597 pf->has_drop = false;
598 }
599 }
600
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)601 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
602 struct lag_tracker *tracker)
603 {
604 u8 disabled_ports[MLX5_MAX_PORTS] = {};
605 struct mlx5_core_dev *dev;
606 struct lag_func *pf;
607 int disabled_index;
608 int num_disabled;
609 int err;
610 int i;
611
612 /* First delete the current drop rule so there won't be any dropped
613 * packets
614 */
615 mlx5_lag_drop_rule_cleanup(ldev);
616
617 if (!ldev->tracker.has_inactive)
618 return;
619
620 mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
621
622 for (i = 0; i < num_disabled; i++) {
623 disabled_index = disabled_ports[i];
624 pf = mlx5_lag_pf(ldev, disabled_index);
625 dev = pf->dev;
626 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
627 MLX5_VPORT_UPLINK);
628 if (!err)
629 pf->has_drop = true;
630 else
631 mlx5_core_err(dev,
632 "Failed to create lag drop rule, error: %d", err);
633 }
634 }
635
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)636 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
637 {
638 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
639 void *lag_ctx;
640
641 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
642
643 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
644 MLX5_SET(modify_lag_in, in, field_select, 0x2);
645
646 MLX5_SET(lagc, lag_ctx, active_port, ports);
647
648 return mlx5_cmd_exec_in(dev, modify_lag, in);
649 }
650
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)651 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
652 {
653 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
654 struct mlx5_core_dev *dev0;
655 u8 active_ports;
656 int ret;
657
658 if (idx < 0)
659 return -EINVAL;
660
661 dev0 = mlx5_lag_pf(ldev, idx)->dev;
662 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
663 ret = mlx5_lag_port_sel_modify(ldev, ports);
664 if (ret ||
665 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
666 return ret;
667
668 active_ports = lag_active_port_bits(ldev);
669
670 return mlx5_cmd_modify_active_port(dev0, active_ports);
671 }
672 return mlx5_cmd_modify_lag(dev0, ldev, ports);
673 }
674
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)675 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
676 {
677 struct net_device *ndev = NULL;
678 struct lag_func *pf;
679 struct mlx5_lag *ldev;
680 unsigned long flags;
681 int i, last_idx;
682
683 spin_lock_irqsave(&lag_lock, flags);
684 ldev = mlx5_lag_dev(dev);
685
686 if (!ldev)
687 goto unlock;
688
689 mlx5_ldev_for_each(i, 0, ldev) {
690 pf = mlx5_lag_pf(ldev, i);
691 if (ldev->tracker.netdev_state[i].tx_enabled)
692 ndev = pf->netdev;
693 }
694 if (!ndev) {
695 last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
696 if (last_idx < 0)
697 goto unlock;
698 pf = mlx5_lag_pf(ldev, last_idx);
699 ndev = pf->netdev;
700 }
701
702 dev_hold(ndev);
703
704 unlock:
705 spin_unlock_irqrestore(&lag_lock, flags);
706
707 return ndev;
708 }
709
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)710 void mlx5_modify_lag(struct mlx5_lag *ldev,
711 struct lag_tracker *tracker)
712 {
713 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
714 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
715 struct mlx5_core_dev *dev0;
716 int idx;
717 int err;
718 int i;
719 int j;
720
721 if (first_idx < 0)
722 return;
723
724 dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
725 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
726
727 mlx5_ldev_for_each(i, 0, ldev) {
728 for (j = 0; j < ldev->buckets; j++) {
729 idx = i * ldev->buckets + j;
730 if (ports[idx] == ldev->v2p_map[idx])
731 continue;
732 err = _mlx5_modify_lag(ldev, ports);
733 if (err) {
734 mlx5_core_err(dev0,
735 "Failed to modify LAG (%d)\n",
736 err);
737 return;
738 }
739 memcpy(ldev->v2p_map, ports, sizeof(ports));
740
741 mlx5_lag_print_mapping(dev0, ldev, tracker,
742 ldev->mode_flags);
743 break;
744 }
745 }
746
747 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
748 struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
749
750 if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
751 mlx5_lag_drop_rule_setup(ldev, tracker);
752 /** Only sriov and roce lag should have tracker->tx_type set so
753 * no need to check the mode
754 */
755 blocking_notifier_call_chain(&dev0->priv.lag_nh,
756 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
757 ndev);
758 dev_put(ndev);
759 }
760 }
761
mlx5_lag_set_port_sel_mode(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,unsigned long * flags)762 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
763 enum mlx5_lag_mode mode,
764 unsigned long *flags)
765 {
766 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
767 struct mlx5_core_dev *dev0;
768
769 if (first_idx < 0)
770 return -EINVAL;
771
772 if (mode == MLX5_LAG_MODE_MPESW ||
773 mode == MLX5_LAG_MODE_MULTIPATH)
774 return 0;
775
776 dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
777
778 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
779 if (ldev->ports > 2)
780 return -EINVAL;
781 return 0;
782 }
783
784 if (ldev->ports > 2)
785 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
786
787 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
788
789 return 0;
790 }
791
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)792 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
793 struct lag_tracker *tracker, bool shared_fdb,
794 unsigned long *flags)
795 {
796 *flags = 0;
797 if (shared_fdb) {
798 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
799 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
800 }
801
802 if (mode == MLX5_LAG_MODE_MPESW)
803 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
804
805 return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
806 }
807
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)808 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
809 {
810 int port_sel_mode = get_port_sel_mode(mode, flags);
811
812 switch (port_sel_mode) {
813 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
814 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
815 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
816 default: return "invalid";
817 }
818 }
819
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)820 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
821 {
822 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
823 struct mlx5_eswitch *master_esw;
824 struct mlx5_core_dev *dev0;
825 int i, j;
826 int err;
827
828 if (master_idx < 0)
829 return -EINVAL;
830
831 dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
832 master_esw = dev0->priv.eswitch;
833 mlx5_ldev_for_each(i, 0, ldev) {
834 struct mlx5_eswitch *slave_esw;
835
836 if (i == master_idx)
837 continue;
838
839 slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
840
841 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
842 slave_esw, ldev->ports);
843 if (err)
844 goto err;
845 }
846 return 0;
847 err:
848 mlx5_ldev_for_each_reverse(j, i, 0, ldev) {
849 if (j == master_idx)
850 continue;
851 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
852 mlx5_lag_pf(ldev, j)->dev->priv.eswitch);
853 }
854 return err;
855 }
856
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)857 static int mlx5_create_lag(struct mlx5_lag *ldev,
858 struct lag_tracker *tracker,
859 enum mlx5_lag_mode mode,
860 unsigned long flags)
861 {
862 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
863 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
864 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
865 struct mlx5_core_dev *dev0;
866 int err;
867
868 if (first_idx < 0)
869 return -EINVAL;
870
871 dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
872 if (tracker)
873 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
874 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
875 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
876
877 err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
878 if (err) {
879 mlx5_core_err(dev0,
880 "Failed to create LAG (%d)\n",
881 err);
882 return err;
883 }
884
885 if (shared_fdb) {
886 err = mlx5_lag_create_single_fdb(ldev);
887 if (err)
888 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
889 else
890 mlx5_core_info(dev0, "Operation mode is single FDB\n");
891 }
892
893 if (err) {
894 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
895 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
896 mlx5_core_err(dev0,
897 "Failed to deactivate RoCE LAG; driver restart required\n");
898 }
899 BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
900
901 return err;
902 }
903
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)904 int mlx5_activate_lag(struct mlx5_lag *ldev,
905 struct lag_tracker *tracker,
906 enum mlx5_lag_mode mode,
907 bool shared_fdb)
908 {
909 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
910 struct mlx5_core_dev *dev0;
911 unsigned long flags = 0;
912 int master_idx;
913 int err;
914
915 master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
916 if (master_idx < 0)
917 return -EINVAL;
918
919 dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
920 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
921 if (err)
922 return err;
923
924 if (mode != MLX5_LAG_MODE_MPESW) {
925 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
926 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
927 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
928 ldev->v2p_map);
929 if (err) {
930 mlx5_core_err(dev0,
931 "Failed to create LAG port selection(%d)\n",
932 err);
933 return err;
934 }
935 }
936 }
937
938 err = mlx5_create_lag(ldev, tracker, mode, flags);
939 if (err) {
940 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
941 mlx5_lag_port_sel_destroy(ldev);
942 if (roce_lag)
943 mlx5_core_err(dev0,
944 "Failed to activate RoCE LAG\n");
945 else
946 mlx5_core_err(dev0,
947 "Failed to activate VF LAG\n"
948 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
949 return err;
950 }
951
952 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
953 !roce_lag)
954 mlx5_lag_drop_rule_setup(ldev, tracker);
955
956 ldev->mode = mode;
957 ldev->mode_flags = flags;
958 return 0;
959 }
960
mlx5_deactivate_lag(struct mlx5_lag * ldev)961 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
962 {
963 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
964 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
965 bool roce_lag = __mlx5_lag_is_roce(ldev);
966 unsigned long flags = ldev->mode_flags;
967 struct mlx5_eswitch *master_esw;
968 struct mlx5_core_dev *dev0;
969 int err;
970 int i;
971
972 if (master_idx < 0)
973 return -EINVAL;
974
975 dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
976 master_esw = dev0->priv.eswitch;
977 ldev->mode = MLX5_LAG_MODE_NONE;
978 ldev->mode_flags = 0;
979 mlx5_lag_mp_reset(ldev);
980
981 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
982 mlx5_ldev_for_each(i, 0, ldev) {
983 if (i == master_idx)
984 continue;
985 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
986 mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
987 }
988 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
989 }
990
991 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
992 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
993 if (err) {
994 if (roce_lag) {
995 mlx5_core_err(dev0,
996 "Failed to deactivate RoCE LAG; driver restart required\n");
997 } else {
998 mlx5_core_err(dev0,
999 "Failed to deactivate VF LAG; driver restart required\n"
1000 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
1001 }
1002 return err;
1003 }
1004
1005 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
1006 mlx5_lag_port_sel_destroy(ldev);
1007 ldev->buckets = 1;
1008 }
1009 if (mlx5_lag_has_drop_rule(ldev))
1010 mlx5_lag_drop_rule_cleanup(ldev);
1011
1012 return 0;
1013 }
1014
mlx5_lag_check_prereq(struct mlx5_lag * ldev)1015 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
1016 {
1017 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1018 #ifdef CONFIG_MLX5_ESWITCH
1019 struct mlx5_core_dev *dev;
1020 u8 mode;
1021 #endif
1022 struct lag_func *pf;
1023 bool roce_support;
1024 int i;
1025
1026 if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
1027 return false;
1028
1029 #ifdef CONFIG_MLX5_ESWITCH
1030 mlx5_ldev_for_each(i, 0, ldev) {
1031 pf = mlx5_lag_pf(ldev, i);
1032 dev = pf->dev;
1033 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
1034 return false;
1035 }
1036
1037 pf = mlx5_lag_pf(ldev, master_idx);
1038 dev = pf->dev;
1039 mode = mlx5_eswitch_mode(dev);
1040 mlx5_ldev_for_each(i, 0, ldev) {
1041 pf = mlx5_lag_pf(ldev, i);
1042 if (mlx5_eswitch_mode(pf->dev) != mode)
1043 return false;
1044 }
1045
1046 #else
1047 mlx5_ldev_for_each(i, 0, ldev) {
1048 pf = mlx5_lag_pf(ldev, i);
1049 if (mlx5_sriov_is_enabled(pf->dev))
1050 return false;
1051 }
1052 #endif
1053 pf = mlx5_lag_pf(ldev, master_idx);
1054 roce_support = mlx5_get_roce_state(pf->dev);
1055 mlx5_ldev_for_each(i, 0, ldev) {
1056 if (i == master_idx)
1057 continue;
1058 pf = mlx5_lag_pf(ldev, i);
1059 if (mlx5_get_roce_state(pf->dev) != roce_support)
1060 return false;
1061 }
1062
1063 return true;
1064 }
1065
mlx5_lag_add_devices(struct mlx5_lag * ldev)1066 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
1067 {
1068 struct lag_func *pf;
1069 int i;
1070
1071 mlx5_ldev_for_each(i, 0, ldev) {
1072 pf = mlx5_lag_pf(ldev, i);
1073 if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1074 continue;
1075
1076 pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1077 mlx5_rescan_drivers_locked(pf->dev);
1078 }
1079 }
1080
mlx5_lag_remove_devices(struct mlx5_lag * ldev)1081 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
1082 {
1083 struct lag_func *pf;
1084 int i;
1085
1086 mlx5_ldev_for_each(i, 0, ldev) {
1087 pf = mlx5_lag_pf(ldev, i);
1088 if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1089 continue;
1090
1091 pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1092 mlx5_rescan_drivers_locked(pf->dev);
1093 }
1094 }
1095
mlx5_disable_lag(struct mlx5_lag * ldev)1096 void mlx5_disable_lag(struct mlx5_lag *ldev)
1097 {
1098 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1099 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1100 struct mlx5_core_dev *dev0;
1101 bool roce_lag;
1102 int err;
1103 int i;
1104
1105 if (idx < 0)
1106 return;
1107
1108 dev0 = mlx5_lag_pf(ldev, idx)->dev;
1109 roce_lag = __mlx5_lag_is_roce(ldev);
1110
1111 if (shared_fdb) {
1112 mlx5_lag_remove_devices(ldev);
1113 } else if (roce_lag) {
1114 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
1115 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1116 mlx5_rescan_drivers_locked(dev0);
1117 }
1118 mlx5_ldev_for_each(i, 0, ldev) {
1119 if (i == idx)
1120 continue;
1121 mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev);
1122 }
1123 }
1124
1125 err = mlx5_deactivate_lag(ldev);
1126 if (err)
1127 return;
1128
1129 if (shared_fdb || roce_lag)
1130 mlx5_lag_add_devices(ldev);
1131
1132 if (shared_fdb)
1133 mlx5_ldev_for_each(i, 0, ldev)
1134 if (!(mlx5_lag_pf(ldev, i)->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
1135 mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1136 }
1137
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)1138 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
1139 {
1140 struct mlx5_core_dev *dev;
1141 bool ret = false;
1142 int idx;
1143 int i;
1144
1145 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1146 if (idx < 0)
1147 return false;
1148
1149 mlx5_ldev_for_each(i, 0, ldev) {
1150 if (i == idx)
1151 continue;
1152 dev = mlx5_lag_pf(ldev, i)->dev;
1153 if (is_mdev_switchdev_mode(dev) &&
1154 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
1155 MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
1156 MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
1157 mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
1158 MLX5_CAP_GEN(dev, num_lag_ports) - 1)
1159 continue;
1160 return false;
1161 }
1162
1163 dev = mlx5_lag_pf(ldev, idx)->dev;
1164 if (is_mdev_switchdev_mode(dev) &&
1165 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
1166 mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
1167 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
1168 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
1169 ret = true;
1170
1171 return ret;
1172 }
1173
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)1174 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
1175 {
1176 bool roce_lag = true;
1177 struct lag_func *pf;
1178 int i;
1179
1180 mlx5_ldev_for_each(i, 0, ldev) {
1181 pf = mlx5_lag_pf(ldev, i);
1182 roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev);
1183 }
1184
1185 #ifdef CONFIG_MLX5_ESWITCH
1186 mlx5_ldev_for_each(i, 0, ldev) {
1187 pf = mlx5_lag_pf(ldev, i);
1188 roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev);
1189 }
1190 #endif
1191
1192 return roce_lag;
1193 }
1194
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)1195 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
1196 {
1197 return do_bond && __mlx5_lag_is_active(ldev) &&
1198 ldev->mode != MLX5_LAG_MODE_MPESW;
1199 }
1200
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)1201 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
1202 {
1203 return !do_bond && __mlx5_lag_is_active(ldev) &&
1204 ldev->mode != MLX5_LAG_MODE_MPESW;
1205 }
1206
1207 #ifdef CONFIG_MLX5_ESWITCH
1208 static int
mlx5_lag_sum_devices_speed(struct mlx5_lag * ldev,u32 * sum_speed,int (* get_speed)(struct mlx5_core_dev *,u32 *))1209 mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
1210 int (*get_speed)(struct mlx5_core_dev *, u32 *))
1211 {
1212 struct mlx5_core_dev *pf_mdev;
1213 struct lag_func *pf;
1214 int pf_idx;
1215 u32 speed;
1216 int ret;
1217
1218 *sum_speed = 0;
1219 mlx5_ldev_for_each(pf_idx, 0, ldev) {
1220 pf = mlx5_lag_pf(ldev, pf_idx);
1221 if (!pf)
1222 continue;
1223 pf_mdev = pf->dev;
1224 if (!pf_mdev)
1225 continue;
1226
1227 ret = get_speed(pf_mdev, &speed);
1228 if (ret) {
1229 mlx5_core_dbg(pf_mdev,
1230 "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
1231 get_speed, dev_name(pf_mdev->device),
1232 ret);
1233 return ret;
1234 }
1235
1236 *sum_speed += speed;
1237 }
1238
1239 return 0;
1240 }
1241
mlx5_lag_sum_devices_max_speed(struct mlx5_lag * ldev,u32 * max_speed)1242 static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
1243 {
1244 return mlx5_lag_sum_devices_speed(ldev, max_speed,
1245 mlx5_port_max_linkspeed);
1246 }
1247
mlx5_lag_sum_devices_oper_speed(struct mlx5_lag * ldev,u32 * oper_speed)1248 static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
1249 u32 *oper_speed)
1250 {
1251 return mlx5_lag_sum_devices_speed(ldev, oper_speed,
1252 mlx5_port_oper_linkspeed);
1253 }
1254
mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev * mdev,u32 speed)1255 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
1256 u32 speed)
1257 {
1258 u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
1259 struct mlx5_eswitch *esw = mdev->priv.eswitch;
1260 struct mlx5_vport *vport;
1261 unsigned long i;
1262 int ret;
1263
1264 if (!esw)
1265 return;
1266
1267 if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
1268 return;
1269
1270 mlx5_esw_for_each_vport(esw, i, vport) {
1271 if (!vport)
1272 continue;
1273
1274 if (vport->vport == MLX5_VPORT_UPLINK)
1275 continue;
1276
1277 ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
1278 vport->vport, true, speed);
1279 if (ret)
1280 mlx5_core_dbg(mdev,
1281 "Failed to set vport %d speed %d, err=%d\n",
1282 vport->vport, speed, ret);
1283 }
1284 }
1285
mlx5_lag_set_vports_agg_speed(struct mlx5_lag * ldev)1286 void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
1287 {
1288 struct mlx5_core_dev *mdev;
1289 struct lag_func *pf;
1290 u32 speed;
1291 int pf_idx;
1292
1293 if (ldev->mode == MLX5_LAG_MODE_MPESW) {
1294 if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
1295 return;
1296 } else {
1297 speed = ldev->tracker.bond_speed_mbps;
1298 if (speed == SPEED_UNKNOWN)
1299 return;
1300 }
1301
1302 /* If speed is not set, use the sum of max speeds of all PFs */
1303 if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
1304 return;
1305
1306 speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1307
1308 mlx5_ldev_for_each(pf_idx, 0, ldev) {
1309 pf = mlx5_lag_pf(ldev, pf_idx);
1310 if (!pf)
1311 continue;
1312 mdev = pf->dev;
1313 if (!mdev)
1314 continue;
1315
1316 mlx5_lag_modify_device_vports_speed(mdev, speed);
1317 }
1318 }
1319
mlx5_lag_reset_vports_speed(struct mlx5_lag * ldev)1320 void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
1321 {
1322 struct mlx5_core_dev *mdev;
1323 struct lag_func *pf;
1324 u32 speed;
1325 int pf_idx;
1326 int ret;
1327
1328 mlx5_ldev_for_each(pf_idx, 0, ldev) {
1329 pf = mlx5_lag_pf(ldev, pf_idx);
1330 if (!pf)
1331 continue;
1332 mdev = pf->dev;
1333 if (!mdev)
1334 continue;
1335
1336 ret = mlx5_port_oper_linkspeed(mdev, &speed);
1337 if (ret) {
1338 mlx5_core_dbg(mdev,
1339 "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
1340 dev_name(mdev->device), ret);
1341 continue;
1342 }
1343
1344 speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1345 mlx5_lag_modify_device_vports_speed(mdev, speed);
1346 }
1347 }
1348 #endif
1349
mlx5_do_bond(struct mlx5_lag * ldev)1350 static void mlx5_do_bond(struct mlx5_lag *ldev)
1351 {
1352 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1353 struct lag_tracker tracker = { };
1354 struct mlx5_core_dev *dev0;
1355 struct net_device *ndev;
1356 bool do_bond, roce_lag;
1357 int err;
1358 int i;
1359
1360 if (idx < 0)
1361 return;
1362
1363 dev0 = mlx5_lag_pf(ldev, idx)->dev;
1364 if (!mlx5_lag_is_ready(ldev)) {
1365 do_bond = false;
1366 } else {
1367 /* VF LAG is in multipath mode, ignore bond change requests */
1368 if (mlx5_lag_is_multipath(dev0))
1369 return;
1370
1371 tracker = ldev->tracker;
1372
1373 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1374 }
1375
1376 if (do_bond && !__mlx5_lag_is_active(ldev)) {
1377 bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1378
1379 roce_lag = mlx5_lag_is_roce_lag(ldev);
1380
1381 if (shared_fdb || roce_lag)
1382 mlx5_lag_remove_devices(ldev);
1383
1384 err = mlx5_activate_lag(ldev, &tracker,
1385 roce_lag ? MLX5_LAG_MODE_ROCE :
1386 MLX5_LAG_MODE_SRIOV,
1387 shared_fdb);
1388 if (err) {
1389 if (shared_fdb || roce_lag)
1390 mlx5_lag_add_devices(ldev);
1391 if (shared_fdb) {
1392 mlx5_ldev_for_each(i, 0, ldev)
1393 mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1394 }
1395
1396 return;
1397 }
1398
1399 if (roce_lag) {
1400 struct mlx5_core_dev *dev;
1401
1402 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1403 mlx5_rescan_drivers_locked(dev0);
1404 mlx5_ldev_for_each(i, 0, ldev) {
1405 if (i == idx)
1406 continue;
1407 dev = mlx5_lag_pf(ldev, i)->dev;
1408 if (mlx5_get_roce_state(dev))
1409 mlx5_nic_vport_enable_roce(dev);
1410 }
1411 } else if (shared_fdb) {
1412 int i;
1413
1414 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1415 mlx5_rescan_drivers_locked(dev0);
1416
1417 mlx5_ldev_for_each(i, 0, ldev) {
1418 err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1419 if (err)
1420 break;
1421 }
1422
1423 if (err) {
1424 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1425 mlx5_rescan_drivers_locked(dev0);
1426 mlx5_deactivate_lag(ldev);
1427 mlx5_lag_add_devices(ldev);
1428 mlx5_ldev_for_each(i, 0, ldev)
1429 mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1430 mlx5_core_err(dev0, "Failed to enable lag\n");
1431 return;
1432 }
1433 }
1434 if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1435 ndev = mlx5_lag_active_backup_get_netdev(dev0);
1436 /** Only sriov and roce lag should have tracker->TX_type
1437 * set so no need to check the mode
1438 */
1439 blocking_notifier_call_chain(&dev0->priv.lag_nh,
1440 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1441 ndev);
1442 dev_put(ndev);
1443 }
1444 mlx5_lag_set_vports_agg_speed(ldev);
1445 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1446 mlx5_modify_lag(ldev, &tracker);
1447 mlx5_lag_set_vports_agg_speed(ldev);
1448 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1449 mlx5_lag_reset_vports_speed(ldev);
1450 mlx5_disable_lag(ldev);
1451 }
1452 }
1453
1454 /* The last mdev to unregister will destroy the workqueue before removing the
1455 * devcom component, and as all the mdevs use the same devcom component we are
1456 * guaranteed that the devcom is valid while the calling work is running.
1457 */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1458 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1459 {
1460 struct mlx5_devcom_comp_dev *devcom = NULL;
1461 struct lag_func *pf;
1462 int i;
1463
1464 mutex_lock(&ldev->lock);
1465 i = mlx5_get_next_ldev_func(ldev, 0);
1466 if (i < MLX5_MAX_PORTS) {
1467 pf = mlx5_lag_pf(ldev, i);
1468 devcom = pf->dev->priv.hca_devcom_comp;
1469 }
1470 mutex_unlock(&ldev->lock);
1471 return devcom;
1472 }
1473
mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr,struct mlx5_lag * ldev)1474 static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
1475 struct mlx5_flow_table_attr *ft_attr,
1476 struct mlx5_lag *ldev)
1477 {
1478 #ifdef CONFIG_MLX5_ESWITCH
1479 struct mlx5_flow_namespace *ns;
1480 struct mlx5_flow_group *fg;
1481 int err;
1482
1483 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1484 if (!ns)
1485 return 0;
1486
1487 ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr);
1488 if (IS_ERR(ldev->lag_demux_ft))
1489 return PTR_ERR(ldev->lag_demux_ft);
1490
1491 fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch,
1492 ldev->lag_demux_ft);
1493 if (IS_ERR(fg)) {
1494 err = PTR_ERR(fg);
1495 mlx5_destroy_flow_table(ldev->lag_demux_ft);
1496 ldev->lag_demux_ft = NULL;
1497 return err;
1498 }
1499
1500 ldev->lag_demux_fg = fg;
1501 return 0;
1502 #else
1503 return -EOPNOTSUPP;
1504 #endif
1505 }
1506
mlx5_lag_demux_fw_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr,struct mlx5_lag * ldev)1507 static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev,
1508 struct mlx5_flow_table_attr *ft_attr,
1509 struct mlx5_lag *ldev)
1510 {
1511 struct mlx5_flow_namespace *ns;
1512 int err;
1513
1514 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1515 if (!ns)
1516 return 0;
1517
1518 ldev->lag_demux_fg = NULL;
1519 ft_attr->max_fte = 1;
1520 ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr);
1521 if (IS_ERR(ldev->lag_demux_ft)) {
1522 err = PTR_ERR(ldev->lag_demux_ft);
1523 ldev->lag_demux_ft = NULL;
1524 return err;
1525 }
1526
1527 return 0;
1528 }
1529
mlx5_lag_demux_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr)1530 int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
1531 struct mlx5_flow_table_attr *ft_attr)
1532 {
1533 struct mlx5_lag *ldev;
1534
1535 if (!ft_attr)
1536 return -EINVAL;
1537
1538 ldev = mlx5_lag_dev(dev);
1539 if (!ldev)
1540 return -ENODEV;
1541
1542 xa_init(&ldev->lag_demux_rules);
1543
1544 if (mlx5_get_sd(dev))
1545 return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev);
1546
1547 return mlx5_lag_demux_fw_init(dev, ft_attr, ldev);
1548 }
1549 EXPORT_SYMBOL(mlx5_lag_demux_init);
1550
mlx5_lag_demux_cleanup(struct mlx5_core_dev * dev)1551 void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev)
1552 {
1553 struct mlx5_flow_handle *rule;
1554 struct mlx5_lag *ldev;
1555 unsigned long vport_num;
1556
1557 ldev = mlx5_lag_dev(dev);
1558 if (!ldev)
1559 return;
1560
1561 xa_for_each(&ldev->lag_demux_rules, vport_num, rule)
1562 mlx5_del_flow_rules(rule);
1563 xa_destroy(&ldev->lag_demux_rules);
1564
1565 if (ldev->lag_demux_fg)
1566 mlx5_destroy_flow_group(ldev->lag_demux_fg);
1567 if (ldev->lag_demux_ft)
1568 mlx5_destroy_flow_table(ldev->lag_demux_ft);
1569 ldev->lag_demux_fg = NULL;
1570 ldev->lag_demux_ft = NULL;
1571 }
1572 EXPORT_SYMBOL(mlx5_lag_demux_cleanup);
1573
mlx5_lag_demux_rule_add(struct mlx5_core_dev * vport_dev,u16 vport_num,int index)1574 int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num,
1575 int index)
1576 {
1577 struct mlx5_flow_handle *rule;
1578 struct mlx5_lag *ldev;
1579 int err;
1580
1581 ldev = mlx5_lag_dev(vport_dev);
1582 if (!ldev || !ldev->lag_demux_fg)
1583 return 0;
1584
1585 if (xa_load(&ldev->lag_demux_rules, index))
1586 return 0;
1587
1588 rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch,
1589 vport_num, ldev->lag_demux_ft);
1590 if (IS_ERR(rule)) {
1591 err = PTR_ERR(rule);
1592 mlx5_core_warn(vport_dev,
1593 "Failed to create LAG demux rule for vport %u, err %d\n",
1594 vport_num, err);
1595 return err;
1596 }
1597
1598 err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule,
1599 GFP_KERNEL));
1600 if (err) {
1601 mlx5_del_flow_rules(rule);
1602 mlx5_core_warn(vport_dev,
1603 "Failed to store LAG demux rule for vport %u, err %d\n",
1604 vport_num, err);
1605 }
1606
1607 return err;
1608 }
1609 EXPORT_SYMBOL(mlx5_lag_demux_rule_add);
1610
mlx5_lag_demux_rule_del(struct mlx5_core_dev * dev,int index)1611 void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index)
1612 {
1613 struct mlx5_flow_handle *rule;
1614 struct mlx5_lag *ldev;
1615
1616 ldev = mlx5_lag_dev(dev);
1617 if (!ldev || !ldev->lag_demux_fg)
1618 return;
1619
1620 rule = xa_erase(&ldev->lag_demux_rules, index);
1621 if (rule)
1622 mlx5_del_flow_rules(rule);
1623 }
1624 EXPORT_SYMBOL(mlx5_lag_demux_rule_del);
1625
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1626 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1627 {
1628 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1629 }
1630
mlx5_do_bond_work(struct work_struct * work)1631 static void mlx5_do_bond_work(struct work_struct *work)
1632 {
1633 struct delayed_work *delayed_work = to_delayed_work(work);
1634 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1635 bond_work);
1636 struct mlx5_devcom_comp_dev *devcom;
1637 int status;
1638
1639 devcom = mlx5_lag_get_devcom_comp(ldev);
1640 if (!devcom)
1641 return;
1642
1643 status = mlx5_devcom_comp_trylock(devcom);
1644 if (!status) {
1645 mlx5_queue_bond_work(ldev, HZ);
1646 return;
1647 }
1648
1649 mutex_lock(&ldev->lock);
1650 if (ldev->mode_changes_in_progress) {
1651 mutex_unlock(&ldev->lock);
1652 mlx5_devcom_comp_unlock(devcom);
1653 mlx5_queue_bond_work(ldev, HZ);
1654 return;
1655 }
1656
1657 mlx5_do_bond(ldev);
1658 mutex_unlock(&ldev->lock);
1659 mlx5_devcom_comp_unlock(devcom);
1660 }
1661
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1662 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1663 struct lag_tracker *tracker,
1664 struct netdev_notifier_changeupper_info *info)
1665 {
1666 struct net_device *upper = info->upper_dev, *ndev_tmp;
1667 struct netdev_lag_upper_info *lag_upper_info = NULL;
1668 bool is_bonded, is_in_lag, mode_supported;
1669 bool has_inactive = 0;
1670 struct lag_func *pf;
1671 struct slave *slave;
1672 u8 bond_status = 0;
1673 int num_slaves = 0;
1674 int changed = 0;
1675 int i, idx = -1;
1676
1677 if (!netif_is_lag_master(upper))
1678 return 0;
1679
1680 if (info->linking)
1681 lag_upper_info = info->upper_info;
1682
1683 /* The event may still be of interest if the slave does not belong to
1684 * us, but is enslaved to a master which has one or more of our netdevs
1685 * as slaves (e.g., if a new slave is added to a master that bonds two
1686 * of our netdevs, we should unbond).
1687 */
1688 rcu_read_lock();
1689 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1690 mlx5_ldev_for_each(i, 0, ldev) {
1691 pf = mlx5_lag_pf(ldev, i);
1692 if (pf->netdev == ndev_tmp) {
1693 idx++;
1694 break;
1695 }
1696 }
1697 if (i < MLX5_MAX_PORTS) {
1698 slave = bond_slave_get_rcu(ndev_tmp);
1699 if (slave)
1700 has_inactive |= bond_is_slave_inactive(slave);
1701 bond_status |= (1 << idx);
1702 }
1703
1704 num_slaves++;
1705 }
1706 rcu_read_unlock();
1707
1708 /* None of this lagdev's netdevs are slaves of this master. */
1709 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1710 return 0;
1711
1712 if (lag_upper_info) {
1713 tracker->tx_type = lag_upper_info->tx_type;
1714 tracker->hash_type = lag_upper_info->hash_type;
1715 }
1716
1717 tracker->has_inactive = has_inactive;
1718 /* Determine bonding status:
1719 * A device is considered bonded if both its physical ports are slaves
1720 * of the same lag master, and only them.
1721 */
1722 is_in_lag = num_slaves == ldev->ports &&
1723 bond_status == GENMASK(ldev->ports - 1, 0);
1724
1725 /* Lag mode must be activebackup or hash. */
1726 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1727 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1728
1729 is_bonded = is_in_lag && mode_supported;
1730 if (tracker->is_bonded != is_bonded) {
1731 tracker->is_bonded = is_bonded;
1732 changed = 1;
1733 }
1734
1735 if (!is_in_lag)
1736 return changed;
1737
1738 if (!mlx5_lag_is_ready(ldev))
1739 NL_SET_ERR_MSG_MOD(info->info.extack,
1740 "Can't activate LAG offload, PF is configured with more than 64 VFs");
1741 else if (!mode_supported)
1742 NL_SET_ERR_MSG_MOD(info->info.extack,
1743 "Can't activate LAG offload, TX type isn't supported");
1744
1745 return changed;
1746 }
1747
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1748 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1749 struct lag_tracker *tracker,
1750 struct net_device *ndev,
1751 struct netdev_notifier_changelowerstate_info *info)
1752 {
1753 struct netdev_lag_lower_state_info *lag_lower_info;
1754 int idx;
1755
1756 if (!netif_is_lag_port(ndev))
1757 return 0;
1758
1759 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1760 if (idx < 0)
1761 return 0;
1762
1763 /* This information is used to determine virtual to physical
1764 * port mapping.
1765 */
1766 lag_lower_info = info->lower_state_info;
1767 if (!lag_lower_info)
1768 return 0;
1769
1770 tracker->netdev_state[idx] = *lag_lower_info;
1771
1772 return 1;
1773 }
1774
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1775 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1776 struct lag_tracker *tracker,
1777 struct net_device *ndev)
1778 {
1779 struct net_device *ndev_tmp;
1780 struct slave *slave;
1781 bool has_inactive = 0;
1782 int idx;
1783
1784 if (!netif_is_lag_master(ndev))
1785 return 0;
1786
1787 rcu_read_lock();
1788 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1789 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1790 if (idx < 0)
1791 continue;
1792
1793 slave = bond_slave_get_rcu(ndev_tmp);
1794 if (slave)
1795 has_inactive |= bond_is_slave_inactive(slave);
1796 }
1797 rcu_read_unlock();
1798
1799 if (tracker->has_inactive == has_inactive)
1800 return 0;
1801
1802 tracker->has_inactive = has_inactive;
1803
1804 return 1;
1805 }
1806
mlx5_lag_update_tracker_speed(struct lag_tracker * tracker,struct net_device * ndev)1807 static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
1808 struct net_device *ndev)
1809 {
1810 struct ethtool_link_ksettings lksettings;
1811 struct net_device *bond_dev;
1812 int err;
1813
1814 if (netif_is_lag_master(ndev))
1815 bond_dev = ndev;
1816 else
1817 bond_dev = netdev_master_upper_dev_get(ndev);
1818
1819 if (!bond_dev) {
1820 tracker->bond_speed_mbps = SPEED_UNKNOWN;
1821 return;
1822 }
1823
1824 err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
1825 if (err) {
1826 netdev_dbg(bond_dev,
1827 "Failed to get speed for bond dev %s, err=%d\n",
1828 bond_dev->name, err);
1829 tracker->bond_speed_mbps = SPEED_UNKNOWN;
1830 return;
1831 }
1832
1833 if (lksettings.base.speed == SPEED_UNKNOWN)
1834 tracker->bond_speed_mbps = 0;
1835 else
1836 tracker->bond_speed_mbps = lksettings.base.speed;
1837 }
1838
1839 /* Returns speed in Mbps. */
mlx5_lag_query_bond_speed(struct mlx5_core_dev * mdev,u32 * speed)1840 int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed)
1841 {
1842 struct mlx5_lag *ldev;
1843 unsigned long flags;
1844 int ret = 0;
1845
1846 spin_lock_irqsave(&lag_lock, flags);
1847 ldev = mlx5_lag_dev(mdev);
1848 if (!ldev) {
1849 ret = -ENODEV;
1850 goto unlock;
1851 }
1852
1853 *speed = ldev->tracker.bond_speed_mbps;
1854
1855 if (*speed == SPEED_UNKNOWN) {
1856 mlx5_core_dbg(mdev, "Bond speed is unknown\n");
1857 ret = -EINVAL;
1858 }
1859
1860 unlock:
1861 spin_unlock_irqrestore(&lag_lock, flags);
1862 return ret;
1863 }
1864 EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed);
1865
1866 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1867 static int mlx5_lag_netdev_event(struct notifier_block *this,
1868 unsigned long event, void *ptr)
1869 {
1870 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1871 struct lag_tracker tracker;
1872 struct mlx5_lag *ldev;
1873 int changed = 0;
1874
1875 if (event != NETDEV_CHANGEUPPER &&
1876 event != NETDEV_CHANGELOWERSTATE &&
1877 event != NETDEV_CHANGEINFODATA)
1878 return NOTIFY_DONE;
1879
1880 ldev = container_of(this, struct mlx5_lag, nb);
1881
1882 tracker = ldev->tracker;
1883
1884 switch (event) {
1885 case NETDEV_CHANGEUPPER:
1886 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1887 break;
1888 case NETDEV_CHANGELOWERSTATE:
1889 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1890 ndev, ptr);
1891 break;
1892 case NETDEV_CHANGEINFODATA:
1893 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1894 break;
1895 }
1896
1897 if (changed)
1898 mlx5_lag_update_tracker_speed(&tracker, ndev);
1899
1900 ldev->tracker = tracker;
1901
1902 if (changed)
1903 mlx5_queue_bond_work(ldev, 0);
1904
1905 return NOTIFY_DONE;
1906 }
1907
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1908 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1909 struct mlx5_core_dev *dev,
1910 struct net_device *netdev)
1911 {
1912 struct lag_func *pf;
1913 unsigned long flags;
1914 int i;
1915
1916 spin_lock_irqsave(&lag_lock, flags);
1917 /* Find pf entry by matching dev pointer */
1918 mlx5_ldev_for_each(i, 0, ldev) {
1919 pf = mlx5_lag_pf(ldev, i);
1920 if (pf->dev == dev) {
1921 pf->netdev = netdev;
1922 ldev->tracker.netdev_state[i].link_up = 0;
1923 ldev->tracker.netdev_state[i].tx_enabled = 0;
1924 break;
1925 }
1926 }
1927 spin_unlock_irqrestore(&lag_lock, flags);
1928 }
1929
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1930 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1931 struct net_device *netdev)
1932 {
1933 struct lag_func *pf;
1934 unsigned long flags;
1935 int i;
1936
1937 spin_lock_irqsave(&lag_lock, flags);
1938 mlx5_ldev_for_each(i, 0, ldev) {
1939 pf = mlx5_lag_pf(ldev, i);
1940 if (pf->netdev == netdev) {
1941 pf->netdev = NULL;
1942 break;
1943 }
1944 }
1945 spin_unlock_irqrestore(&lag_lock, flags);
1946 }
1947
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1948 static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1949 struct mlx5_core_dev *dev)
1950 {
1951 struct lag_func *pf;
1952 u32 idx;
1953 int err;
1954
1955 pf = kzalloc_obj(*pf);
1956 if (!pf)
1957 return -ENOMEM;
1958
1959 err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1),
1960 GFP_KERNEL);
1961 if (err) {
1962 kfree(pf);
1963 return err;
1964 }
1965
1966 pf->idx = idx;
1967 pf->dev = dev;
1968 dev->priv.lag = ldev;
1969
1970 MLX5_NB_INIT(&pf->port_change_nb,
1971 mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
1972 mlx5_eq_notifier_register(dev, &pf->port_change_nb);
1973
1974 return 0;
1975 }
1976
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1977 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1978 struct mlx5_core_dev *dev)
1979 {
1980 struct lag_func *pf;
1981 int i;
1982
1983 mlx5_ldev_for_each(i, 0, ldev) {
1984 pf = mlx5_lag_pf(ldev, i);
1985 if (pf->dev == dev)
1986 break;
1987 }
1988 if (i >= MLX5_MAX_PORTS)
1989 return;
1990
1991 if (pf->port_change_nb.nb.notifier_call)
1992 mlx5_eq_notifier_unregister(dev, &pf->port_change_nb);
1993
1994 pf->dev = NULL;
1995 dev->priv.lag = NULL;
1996 xa_erase(&ldev->pfs, pf->idx);
1997 kfree(pf);
1998 }
1999
2000 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)2001 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
2002 {
2003 struct mlx5_devcom_comp_dev *pos = NULL;
2004 struct mlx5_lag *ldev = NULL;
2005 struct mlx5_core_dev *tmp_dev;
2006 int err;
2007
2008 tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
2009 if (tmp_dev)
2010 ldev = mlx5_lag_dev(tmp_dev);
2011
2012 if (!ldev) {
2013 ldev = mlx5_lag_dev_alloc(dev);
2014 if (!ldev) {
2015 mlx5_core_err(dev, "Failed to alloc lag dev\n");
2016 return 0;
2017 }
2018 err = mlx5_ldev_add_mdev(ldev, dev);
2019 if (err) {
2020 mlx5_core_err(dev, "Failed to add mdev to lag dev\n");
2021 mlx5_ldev_put(ldev);
2022 return 0;
2023 }
2024 return 0;
2025 }
2026
2027 mutex_lock(&ldev->lock);
2028 if (ldev->mode_changes_in_progress) {
2029 mutex_unlock(&ldev->lock);
2030 return -EAGAIN;
2031 }
2032 mlx5_ldev_get(ldev);
2033 err = mlx5_ldev_add_mdev(ldev, dev);
2034 if (err) {
2035 mlx5_ldev_put(ldev);
2036 mutex_unlock(&ldev->lock);
2037 return err;
2038 }
2039 mutex_unlock(&ldev->lock);
2040
2041 return 0;
2042 }
2043
mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev * dev)2044 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
2045 {
2046 mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
2047 dev->priv.hca_devcom_comp = NULL;
2048 }
2049
mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev * dev)2050 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
2051 {
2052 struct mlx5_devcom_match_attr attr = {
2053 .flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
2054 .net = mlx5_core_net(dev),
2055 };
2056 u8 len __always_unused;
2057
2058 mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len);
2059
2060 /* This component is use to sync adding core_dev to lag_dev and to sync
2061 * changes of mlx5_adev_devices between LAG layer and other layers.
2062 */
2063 dev->priv.hca_devcom_comp =
2064 mlx5_devcom_register_component(dev->priv.devc,
2065 MLX5_DEVCOM_HCA_PORTS,
2066 &attr, mlx5_lag_devcom_event,
2067 dev);
2068 if (!dev->priv.hca_devcom_comp) {
2069 mlx5_core_err(dev,
2070 "Failed to register devcom HCA component.");
2071 return -EINVAL;
2072 }
2073
2074 return 0;
2075 }
2076
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)2077 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
2078 {
2079 struct mlx5_lag *ldev;
2080
2081 ldev = mlx5_lag_dev(dev);
2082 if (!ldev)
2083 return;
2084
2085 /* mdev is being removed, might as well remove debugfs
2086 * as early as possible.
2087 */
2088 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
2089 recheck:
2090 mutex_lock(&ldev->lock);
2091 if (ldev->mode_changes_in_progress) {
2092 mutex_unlock(&ldev->lock);
2093 msleep(100);
2094 goto recheck;
2095 }
2096 mlx5_ldev_remove_mdev(ldev, dev);
2097 mutex_unlock(&ldev->lock);
2098 /* Send devcom event to notify peers that a device is being removed */
2099 mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2100 LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev);
2101 mlx5_lag_unregister_hca_devcom_comp(dev);
2102 mlx5_ldev_put(ldev);
2103 }
2104
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)2105 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
2106 {
2107 int err;
2108
2109 if (!mlx5_lag_is_supported(dev))
2110 return;
2111
2112 if (mlx5_lag_register_hca_devcom_comp(dev))
2113 return;
2114
2115 recheck:
2116 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
2117 err = __mlx5_lag_dev_add_mdev(dev);
2118 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
2119
2120 if (err) {
2121 msleep(100);
2122 goto recheck;
2123 }
2124 /* Send devcom event to notify peers that a device was added */
2125 mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2126 LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev);
2127 mlx5_ldev_add_debugfs(dev);
2128 }
2129
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)2130 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
2131 struct net_device *netdev)
2132 {
2133 struct mlx5_lag *ldev;
2134 bool lag_is_active;
2135
2136 ldev = mlx5_lag_dev(dev);
2137 if (!ldev)
2138 return;
2139
2140 mutex_lock(&ldev->lock);
2141 mlx5_ldev_remove_netdev(ldev, netdev);
2142 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2143
2144 lag_is_active = __mlx5_lag_is_active(ldev);
2145 mutex_unlock(&ldev->lock);
2146
2147 if (lag_is_active)
2148 mlx5_queue_bond_work(ldev, 0);
2149 }
2150
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)2151 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
2152 struct net_device *netdev)
2153 {
2154 struct mlx5_lag *ldev;
2155 int num = 0;
2156
2157 ldev = mlx5_lag_dev(dev);
2158 if (!ldev)
2159 return;
2160
2161 mutex_lock(&ldev->lock);
2162 mlx5_ldev_add_netdev(ldev, dev, netdev);
2163 num = mlx5_lag_num_netdevs(ldev);
2164 if (num >= ldev->ports)
2165 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2166 mutex_unlock(&ldev->lock);
2167 mlx5_queue_bond_work(ldev, 0);
2168 }
2169
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)2170 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
2171 {
2172 struct lag_func *pf;
2173 int i;
2174
2175 for (i = start_idx; i >= end_idx; i--) {
2176 pf = xa_load(&ldev->pfs, i);
2177 if (pf && pf->dev)
2178 return i;
2179 }
2180 return -1;
2181 }
2182
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)2183 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
2184 {
2185 struct lag_func *pf;
2186 unsigned long idx;
2187
2188 xa_for_each_start(&ldev->pfs, idx, pf, start_idx)
2189 if (pf->dev)
2190 return idx;
2191 return MLX5_MAX_PORTS;
2192 }
2193
mlx5_lag_is_roce(struct mlx5_core_dev * dev)2194 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
2195 {
2196 struct mlx5_lag *ldev;
2197 unsigned long flags;
2198 bool res;
2199
2200 spin_lock_irqsave(&lag_lock, flags);
2201 ldev = mlx5_lag_dev(dev);
2202 res = ldev && __mlx5_lag_is_roce(ldev);
2203 spin_unlock_irqrestore(&lag_lock, flags);
2204
2205 return res;
2206 }
2207 EXPORT_SYMBOL(mlx5_lag_is_roce);
2208
mlx5_lag_is_active(struct mlx5_core_dev * dev)2209 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
2210 {
2211 struct mlx5_lag *ldev;
2212 unsigned long flags;
2213 bool res;
2214
2215 spin_lock_irqsave(&lag_lock, flags);
2216 ldev = mlx5_lag_dev(dev);
2217 res = ldev && __mlx5_lag_is_active(ldev);
2218 spin_unlock_irqrestore(&lag_lock, flags);
2219
2220 return res;
2221 }
2222 EXPORT_SYMBOL(mlx5_lag_is_active);
2223
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)2224 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
2225 {
2226 struct mlx5_lag *ldev;
2227 unsigned long flags;
2228 bool res = 0;
2229
2230 spin_lock_irqsave(&lag_lock, flags);
2231 ldev = mlx5_lag_dev(dev);
2232 if (ldev)
2233 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
2234 spin_unlock_irqrestore(&lag_lock, flags);
2235
2236 return res;
2237 }
2238 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
2239
mlx5_lag_is_master(struct mlx5_core_dev * dev)2240 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
2241 {
2242 struct mlx5_lag *ldev;
2243 unsigned long flags;
2244 struct lag_func *pf;
2245 bool res = false;
2246 int idx;
2247
2248 spin_lock_irqsave(&lag_lock, flags);
2249 ldev = mlx5_lag_dev(dev);
2250 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
2251 if (ldev && __mlx5_lag_is_active(ldev) && idx >= 0) {
2252 pf = mlx5_lag_pf(ldev, idx);
2253 res = pf && dev == pf->dev;
2254 }
2255 spin_unlock_irqrestore(&lag_lock, flags);
2256
2257 return res;
2258 }
2259 EXPORT_SYMBOL(mlx5_lag_is_master);
2260
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)2261 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
2262 {
2263 struct mlx5_lag *ldev;
2264 unsigned long flags;
2265 bool res;
2266
2267 spin_lock_irqsave(&lag_lock, flags);
2268 ldev = mlx5_lag_dev(dev);
2269 res = ldev && __mlx5_lag_is_sriov(ldev);
2270 spin_unlock_irqrestore(&lag_lock, flags);
2271
2272 return res;
2273 }
2274 EXPORT_SYMBOL(mlx5_lag_is_sriov);
2275
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)2276 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
2277 {
2278 struct mlx5_lag *ldev;
2279 unsigned long flags;
2280 bool res;
2281
2282 spin_lock_irqsave(&lag_lock, flags);
2283 ldev = mlx5_lag_dev(dev);
2284 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
2285 spin_unlock_irqrestore(&lag_lock, flags);
2286
2287 return res;
2288 }
2289 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
2290
mlx5_lag_disable_change(struct mlx5_core_dev * dev)2291 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
2292 {
2293 struct mlx5_lag *ldev;
2294
2295 ldev = mlx5_lag_dev(dev);
2296 if (!ldev)
2297 return;
2298
2299 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
2300 mutex_lock(&ldev->lock);
2301
2302 ldev->mode_changes_in_progress++;
2303 if (__mlx5_lag_is_active(ldev)) {
2304 if (ldev->mode == MLX5_LAG_MODE_MPESW)
2305 mlx5_lag_disable_mpesw(ldev);
2306 else
2307 mlx5_disable_lag(ldev);
2308 }
2309
2310 mutex_unlock(&ldev->lock);
2311 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
2312 }
2313
mlx5_lag_enable_change(struct mlx5_core_dev * dev)2314 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
2315 {
2316 struct mlx5_lag *ldev;
2317
2318 ldev = mlx5_lag_dev(dev);
2319 if (!ldev)
2320 return;
2321
2322 mutex_lock(&ldev->lock);
2323 ldev->mode_changes_in_progress--;
2324 mutex_unlock(&ldev->lock);
2325 mlx5_queue_bond_work(ldev, 0);
2326 }
2327
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)2328 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
2329 struct net_device *slave)
2330 {
2331 struct mlx5_lag *ldev;
2332 unsigned long flags;
2333 struct lag_func *pf;
2334 u8 port = 0;
2335 int i;
2336
2337 spin_lock_irqsave(&lag_lock, flags);
2338 ldev = mlx5_lag_dev(dev);
2339 if (!(ldev && __mlx5_lag_is_roce(ldev)))
2340 goto unlock;
2341
2342 mlx5_ldev_for_each(i, 0, ldev) {
2343 pf = mlx5_lag_pf(ldev, i);
2344 if (pf->netdev == slave) {
2345 port = i;
2346 break;
2347 }
2348 }
2349
2350 port = ldev->v2p_map[port * ldev->buckets];
2351
2352 unlock:
2353 spin_unlock_irqrestore(&lag_lock, flags);
2354 return port;
2355 }
2356 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
2357
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)2358 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
2359 {
2360 struct mlx5_lag *ldev;
2361
2362 ldev = mlx5_lag_dev(dev);
2363 if (!ldev)
2364 return 0;
2365
2366 return ldev->ports;
2367 }
2368 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
2369
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)2370 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
2371 {
2372 struct mlx5_core_dev *peer_dev = NULL;
2373 struct mlx5_lag *ldev;
2374 unsigned long flags;
2375 struct lag_func *pf;
2376 int idx;
2377
2378 spin_lock_irqsave(&lag_lock, flags);
2379 ldev = mlx5_lag_dev(dev);
2380 if (!ldev)
2381 goto unlock;
2382
2383 if (*i == MLX5_MAX_PORTS)
2384 goto unlock;
2385 mlx5_ldev_for_each(idx, *i, ldev) {
2386 pf = mlx5_lag_pf(ldev, idx);
2387 if (pf->dev != dev)
2388 break;
2389 }
2390
2391 if (idx == MLX5_MAX_PORTS) {
2392 *i = idx;
2393 goto unlock;
2394 }
2395 *i = idx + 1;
2396
2397 pf = mlx5_lag_pf(ldev, idx);
2398 peer_dev = pf->dev;
2399
2400 unlock:
2401 spin_unlock_irqrestore(&lag_lock, flags);
2402 return peer_dev;
2403 }
2404 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
2405
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)2406 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
2407 u64 *values,
2408 int num_counters,
2409 size_t *offsets)
2410 {
2411 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
2412 struct mlx5_core_dev **mdev;
2413 int ret = 0, i, j, idx = 0;
2414 struct mlx5_lag *ldev;
2415 unsigned long flags;
2416 struct lag_func *pf;
2417 int num_ports;
2418 void *out;
2419
2420 out = kvzalloc(outlen, GFP_KERNEL);
2421 if (!out)
2422 return -ENOMEM;
2423
2424 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
2425 if (!mdev) {
2426 ret = -ENOMEM;
2427 goto free_out;
2428 }
2429
2430 memset(values, 0, sizeof(*values) * num_counters);
2431
2432 spin_lock_irqsave(&lag_lock, flags);
2433 ldev = mlx5_lag_dev(dev);
2434 if (ldev && __mlx5_lag_is_active(ldev)) {
2435 num_ports = ldev->ports;
2436 mlx5_ldev_for_each(i, 0, ldev) {
2437 pf = mlx5_lag_pf(ldev, i);
2438 mdev[idx++] = pf->dev;
2439 }
2440 } else {
2441 num_ports = 1;
2442 mdev[MLX5_LAG_P1] = dev;
2443 }
2444 spin_unlock_irqrestore(&lag_lock, flags);
2445
2446 for (i = 0; i < num_ports; ++i) {
2447 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
2448
2449 MLX5_SET(query_cong_statistics_in, in, opcode,
2450 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
2451 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
2452 out);
2453 if (ret)
2454 goto free_mdev;
2455
2456 for (j = 0; j < num_counters; ++j)
2457 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
2458 }
2459
2460 free_mdev:
2461 kvfree(mdev);
2462 free_out:
2463 kvfree(out);
2464 return ret;
2465 }
2466 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
2467