xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c (revision 32e940f2bd3b16551f23ea44be47f6f5d1746d64)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include <linux/mlx5/lag.h>
39 #include "lib/mlx5.h"
40 #include "lib/devcom.h"
41 #include "mlx5_core.h"
42 #include "eswitch.h"
43 #include "esw/acl/ofld.h"
44 #include "lag.h"
45 #include "mp.h"
46 #include "mpesw.h"
47 
48 
49 /* General purpose, use for short periods of time.
50  * Beware of lock dependencies (preferably, no locks should be acquired
51  * under it).
52  */
53 static DEFINE_SPINLOCK(lag_lock);
54 
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)55 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
56 {
57 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
58 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
59 
60 	if (mode == MLX5_LAG_MODE_MPESW)
61 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
62 
63 	return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
64 }
65 
lag_active_port_bits(struct mlx5_lag * ldev)66 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
67 {
68 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
69 	u8 active_port = 0;
70 	int num_enabled;
71 	int idx;
72 
73 	mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
74 			      &num_enabled);
75 	for (idx = 0; idx < num_enabled; idx++)
76 		active_port |= BIT_MASK(enabled_ports[idx]);
77 
78 	return active_port;
79 }
80 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)81 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
82 			       int mode, unsigned long flags)
83 {
84 	bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
85 				     &flags);
86 	int port_sel_mode = get_port_sel_mode(mode, flags);
87 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
88 	u8 *ports = ldev->v2p_map;
89 	int idx0, idx1;
90 	void *lag_ctx;
91 
92 	lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
93 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
94 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
95 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
96 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
97 
98 	if (idx0 < 0 || idx1 < 0)
99 		return -EINVAL;
100 
101 	switch (port_sel_mode) {
102 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
103 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
104 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
105 		break;
106 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
107 		if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
108 			break;
109 
110 		MLX5_SET(lagc, lag_ctx, active_port,
111 			 lag_active_port_bits(mlx5_lag_dev(dev)));
112 		break;
113 	default:
114 		break;
115 	}
116 	MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
117 
118 	return mlx5_cmd_exec_in(dev, create_lag, in);
119 }
120 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)121 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
122 			       u8 *ports)
123 {
124 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
125 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
126 	int idx0, idx1;
127 
128 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
129 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
130 	if (idx0 < 0 || idx1 < 0)
131 		return -EINVAL;
132 
133 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
134 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
135 
136 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
137 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
138 
139 	return mlx5_cmd_exec_in(dev, modify_lag, in);
140 }
141 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)142 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
143 {
144 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
145 
146 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
147 
148 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
149 }
150 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
151 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)152 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
153 {
154 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
155 
156 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
157 
158 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
159 }
160 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
161 
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)162 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
163 				   u8 *ports, int *num_disabled)
164 {
165 	int i;
166 
167 	*num_disabled = 0;
168 	mlx5_ldev_for_each(i, 0, ldev)
169 		if (!tracker->netdev_state[i].tx_enabled ||
170 		    !tracker->netdev_state[i].link_up)
171 			ports[(*num_disabled)++] = i;
172 }
173 
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)174 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
175 			   u8 *ports, int *num_enabled)
176 {
177 	int i;
178 
179 	*num_enabled = 0;
180 	mlx5_ldev_for_each(i, 0, ldev)
181 		if (tracker->netdev_state[i].tx_enabled &&
182 		    tracker->netdev_state[i].link_up)
183 			ports[(*num_enabled)++] = i;
184 
185 	if (*num_enabled == 0)
186 		mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
187 }
188 
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)189 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
190 				   struct mlx5_lag *ldev,
191 				   struct lag_tracker *tracker,
192 				   unsigned long flags)
193 {
194 	char buf[MLX5_MAX_PORTS * 10 + 1] = {};
195 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
196 	int written = 0;
197 	int num_enabled;
198 	int idx;
199 	int err;
200 	int i;
201 	int j;
202 
203 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
204 		mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
205 				      &num_enabled);
206 		for (i = 0; i < num_enabled; i++) {
207 			err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
208 			if (err != 3)
209 				return;
210 			written += err;
211 		}
212 		buf[written - 2] = 0;
213 		mlx5_core_info(dev, "lag map active ports: %s\n", buf);
214 	} else {
215 		mlx5_ldev_for_each(i, 0, ldev) {
216 			for (j  = 0; j < ldev->buckets; j++) {
217 				idx = i * ldev->buckets + j;
218 				err = scnprintf(buf + written, 10,
219 						" port %d:%d", i + 1, ldev->v2p_map[idx]);
220 				if (err != 9)
221 					return;
222 				written += err;
223 			}
224 		}
225 		mlx5_core_info(dev, "lag map:%s\n", buf);
226 	}
227 }
228 
229 static int mlx5_lag_netdev_event(struct notifier_block *this,
230 				 unsigned long event, void *ptr);
231 static void mlx5_do_bond_work(struct work_struct *work);
232 
mlx5_ldev_free(struct kref * ref)233 static void mlx5_ldev_free(struct kref *ref)
234 {
235 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
236 	struct lag_func *pf;
237 	struct net *net;
238 	int i;
239 
240 	if (ldev->nb.notifier_call) {
241 		net = read_pnet(&ldev->net);
242 		unregister_netdevice_notifier_net(net, &ldev->nb);
243 	}
244 
245 	mlx5_ldev_for_each(i, 0, ldev) {
246 		pf = mlx5_lag_pf(ldev, i);
247 		if (pf->port_change_nb.nb.notifier_call) {
248 			struct mlx5_nb *nb = &pf->port_change_nb;
249 
250 			mlx5_eq_notifier_unregister(pf->dev, nb);
251 		}
252 		xa_erase(&ldev->pfs, i);
253 		kfree(pf);
254 	}
255 	xa_destroy(&ldev->pfs);
256 
257 	mlx5_lag_mp_cleanup(ldev);
258 	cancel_delayed_work_sync(&ldev->bond_work);
259 	cancel_work_sync(&ldev->speed_update_work);
260 	destroy_workqueue(ldev->wq);
261 	mutex_destroy(&ldev->lock);
262 	kfree(ldev);
263 }
264 
mlx5_ldev_put(struct mlx5_lag * ldev)265 static void mlx5_ldev_put(struct mlx5_lag *ldev)
266 {
267 	kref_put(&ldev->ref, mlx5_ldev_free);
268 }
269 
mlx5_ldev_get(struct mlx5_lag * ldev)270 static void mlx5_ldev_get(struct mlx5_lag *ldev)
271 {
272 	kref_get(&ldev->ref);
273 }
274 
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)275 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
276 {
277 	struct mlx5_lag *ldev;
278 	int err;
279 
280 	ldev = kzalloc_obj(*ldev);
281 	if (!ldev)
282 		return NULL;
283 
284 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
285 	if (!ldev->wq) {
286 		kfree(ldev);
287 		return NULL;
288 	}
289 
290 	kref_init(&ldev->ref);
291 	mutex_init(&ldev->lock);
292 	xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC);
293 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
294 	INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
295 
296 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
297 	write_pnet(&ldev->net, mlx5_core_net(dev));
298 	if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
299 		ldev->nb.notifier_call = NULL;
300 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
301 	}
302 	ldev->mode = MLX5_LAG_MODE_NONE;
303 
304 	err = mlx5_lag_mp_init(ldev);
305 	if (err)
306 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
307 			      err);
308 
309 	ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
310 	ldev->buckets = 1;
311 
312 	return ldev;
313 }
314 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)315 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
316 				struct net_device *ndev)
317 {
318 	struct lag_func *pf;
319 	int i;
320 
321 	mlx5_ldev_for_each(i, 0, ldev) {
322 		pf = mlx5_lag_pf(ldev, i);
323 		if (pf->netdev == ndev)
324 			return i;
325 	}
326 
327 	return -ENOENT;
328 }
329 
mlx5_lag_get_master_idx(struct mlx5_lag * ldev)330 static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev)
331 {
332 	unsigned long idx = 0;
333 	void *entry;
334 
335 	if (!ldev)
336 		return -ENOENT;
337 
338 	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
339 	if (!entry)
340 		return -ENOENT;
341 
342 	return (int)idx;
343 }
344 
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)345 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
346 {
347 	int master_idx, i, num = 0;
348 
349 	if (!ldev)
350 		return -ENOENT;
351 
352 	master_idx = mlx5_lag_get_master_idx(ldev);
353 
354 	/* If seq 0 is requested and there's a primary PF, return it */
355 	if (master_idx >= 0) {
356 		if (seq == 0)
357 			return master_idx;
358 		num++;
359 	}
360 
361 	mlx5_ldev_for_each(i, 0, ldev) {
362 		/* Skip the primary PF in the loop */
363 		if (i == master_idx)
364 			continue;
365 
366 		if (num == seq)
367 			return i;
368 		num++;
369 	}
370 	return -ENOENT;
371 }
372 
373 /* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its
374  * sequence number in the LAG. Master is always 0, others numbered
375  * sequentially starting from 1.
376  */
mlx5_lag_get_dev_seq(struct mlx5_core_dev * dev)377 int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev)
378 {
379 	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
380 	int master_idx, i, num = 1;
381 	struct lag_func *pf;
382 
383 	if (!ldev)
384 		return -ENOENT;
385 
386 	master_idx = mlx5_lag_get_master_idx(ldev);
387 	if (master_idx < 0)
388 		return -ENOENT;
389 
390 	pf = mlx5_lag_pf(ldev, master_idx);
391 	if (pf && pf->dev == dev)
392 		return 0;
393 
394 	mlx5_ldev_for_each(i, 0, ldev) {
395 		if (i == master_idx)
396 			continue;
397 		pf = mlx5_lag_pf(ldev, i);
398 		if (pf->dev == dev)
399 			return num;
400 		num++;
401 	}
402 	return -ENOENT;
403 }
404 EXPORT_SYMBOL(mlx5_lag_get_dev_seq);
405 
406 /* Devcom events for LAG master marking */
407 #define LAG_DEVCOM_PAIR		(0)
408 #define LAG_DEVCOM_UNPAIR	(1)
409 
mlx5_lag_mark_master(struct mlx5_lag * ldev)410 static void mlx5_lag_mark_master(struct mlx5_lag *ldev)
411 {
412 	int lowest_dev_idx = INT_MAX;
413 	struct lag_func *pf;
414 	int master_xa_idx = -1;
415 	int dev_idx;
416 	int i;
417 
418 	mlx5_ldev_for_each(i, 0, ldev) {
419 		pf = mlx5_lag_pf(ldev, i);
420 		dev_idx = mlx5_get_dev_index(pf->dev);
421 		if (dev_idx < lowest_dev_idx) {
422 			lowest_dev_idx = dev_idx;
423 			master_xa_idx = i;
424 		}
425 	}
426 
427 	if (master_xa_idx >= 0)
428 		xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER);
429 }
430 
mlx5_lag_clear_master(struct mlx5_lag * ldev)431 static void mlx5_lag_clear_master(struct mlx5_lag *ldev)
432 {
433 	unsigned long idx = 0;
434 	void *entry;
435 
436 	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
437 	if (!entry)
438 		return;
439 
440 	xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER);
441 }
442 
443 /* Devcom event handler to manage LAG master marking */
mlx5_lag_devcom_event(int event,void * my_data,void * event_data)444 static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data)
445 {
446 	struct mlx5_core_dev *dev = my_data;
447 	struct mlx5_lag *ldev;
448 	int idx;
449 
450 	ldev = mlx5_lag_dev(dev);
451 	if (!ldev)
452 		return 0;
453 
454 	mutex_lock(&ldev->lock);
455 	switch (event) {
456 	case LAG_DEVCOM_PAIR:
457 		/* No need to mark more than once */
458 		idx = mlx5_lag_get_master_idx(ldev);
459 		if (idx >= 0)
460 			break;
461 		/* Check if all LAG ports are now registered */
462 		if (mlx5_lag_num_devs(ldev) == ldev->ports)
463 			mlx5_lag_mark_master(ldev);
464 		break;
465 
466 	case LAG_DEVCOM_UNPAIR:
467 		/* Clear master mark when a device is removed */
468 		mlx5_lag_clear_master(ldev);
469 		break;
470 	}
471 	mutex_unlock(&ldev->lock);
472 	return 0;
473 }
474 
mlx5_lag_num_devs(struct mlx5_lag * ldev)475 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
476 {
477 	int i, num = 0;
478 
479 	if (!ldev)
480 		return 0;
481 
482 	mlx5_ldev_for_each(i, 0, ldev) {
483 		(void)i;
484 		num++;
485 	}
486 	return num;
487 }
488 
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)489 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
490 {
491 	struct lag_func *pf;
492 	int i, num = 0;
493 
494 	if (!ldev)
495 		return 0;
496 
497 	mlx5_ldev_for_each(i, 0, ldev) {
498 		pf = mlx5_lag_pf(ldev, i);
499 		if (pf->netdev)
500 			num++;
501 	}
502 	return num;
503 }
504 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)505 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
506 {
507 	return ldev->mode == MLX5_LAG_MODE_ROCE;
508 }
509 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)510 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
511 {
512 	return ldev->mode == MLX5_LAG_MODE_SRIOV;
513 }
514 
515 /* Create a mapping between steering slots and active ports.
516  * As we have ldev->buckets slots per port first assume the native
517  * mapping should be used.
518  * If there are ports that are disabled fill the relevant slots
519  * with mapping that points to active ports.
520  */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)521 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
522 					   struct mlx5_lag *ldev,
523 					   u8 buckets,
524 					   u8 *ports)
525 {
526 	int disabled[MLX5_MAX_PORTS] = {};
527 	int enabled[MLX5_MAX_PORTS] = {};
528 	int disabled_ports_num = 0;
529 	int enabled_ports_num = 0;
530 	int idx;
531 	u32 rand;
532 	int i;
533 	int j;
534 
535 	mlx5_ldev_for_each(i, 0, ldev) {
536 		if (tracker->netdev_state[i].tx_enabled &&
537 		    tracker->netdev_state[i].link_up)
538 			enabled[enabled_ports_num++] = i;
539 		else
540 			disabled[disabled_ports_num++] = i;
541 	}
542 
543 	/* Use native mapping by default where each port's buckets
544 	 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
545 	 * ports[] values are 1-indexed device indices for FW.
546 	 */
547 	mlx5_ldev_for_each(i, 0, ldev) {
548 		for (j = 0; j < buckets; j++) {
549 			idx = i * buckets + j;
550 			ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1;
551 		}
552 	}
553 
554 	/* If all ports are disabled/enabled keep native mapping */
555 	if (enabled_ports_num == ldev->ports ||
556 	    disabled_ports_num == ldev->ports)
557 		return;
558 
559 	/* Go over the disabled ports and for each assign a random active port */
560 	for (i = 0; i < disabled_ports_num; i++) {
561 		for (j = 0; j < buckets; j++) {
562 			int rand_xa_idx;
563 
564 			get_random_bytes(&rand, 4);
565 			rand_xa_idx = enabled[rand % enabled_ports_num];
566 			ports[disabled[i] * buckets + j] =
567 				mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1;
568 		}
569 	}
570 }
571 
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)572 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
573 {
574 	struct lag_func *pf;
575 	int i;
576 
577 	mlx5_ldev_for_each(i, 0, ldev) {
578 		pf = mlx5_lag_pf(ldev, i);
579 		if (pf->has_drop)
580 			return true;
581 	}
582 	return false;
583 }
584 
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)585 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
586 {
587 	struct lag_func *pf;
588 	int i;
589 
590 	mlx5_ldev_for_each(i, 0, ldev) {
591 		pf = mlx5_lag_pf(ldev, i);
592 		if (!pf->has_drop)
593 			continue;
594 
595 		mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch,
596 							     MLX5_VPORT_UPLINK);
597 		pf->has_drop = false;
598 	}
599 }
600 
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)601 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
602 				     struct lag_tracker *tracker)
603 {
604 	u8 disabled_ports[MLX5_MAX_PORTS] = {};
605 	struct mlx5_core_dev *dev;
606 	struct lag_func *pf;
607 	int disabled_index;
608 	int num_disabled;
609 	int err;
610 	int i;
611 
612 	/* First delete the current drop rule so there won't be any dropped
613 	 * packets
614 	 */
615 	mlx5_lag_drop_rule_cleanup(ldev);
616 
617 	if (!ldev->tracker.has_inactive)
618 		return;
619 
620 	mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
621 
622 	for (i = 0; i < num_disabled; i++) {
623 		disabled_index = disabled_ports[i];
624 		pf = mlx5_lag_pf(ldev, disabled_index);
625 		dev = pf->dev;
626 		err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
627 								  MLX5_VPORT_UPLINK);
628 		if (!err)
629 			pf->has_drop = true;
630 		else
631 			mlx5_core_err(dev,
632 				      "Failed to create lag drop rule, error: %d", err);
633 	}
634 }
635 
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)636 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
637 {
638 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
639 	void *lag_ctx;
640 
641 	lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
642 
643 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
644 	MLX5_SET(modify_lag_in, in, field_select, 0x2);
645 
646 	MLX5_SET(lagc, lag_ctx, active_port, ports);
647 
648 	return mlx5_cmd_exec_in(dev, modify_lag, in);
649 }
650 
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)651 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
652 {
653 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
654 	struct mlx5_core_dev *dev0;
655 	u8 active_ports;
656 	int ret;
657 
658 	if (idx < 0)
659 		return -EINVAL;
660 
661 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
662 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
663 		ret = mlx5_lag_port_sel_modify(ldev, ports);
664 		if (ret ||
665 		    !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
666 			return ret;
667 
668 		active_ports = lag_active_port_bits(ldev);
669 
670 		return mlx5_cmd_modify_active_port(dev0, active_ports);
671 	}
672 	return mlx5_cmd_modify_lag(dev0, ldev, ports);
673 }
674 
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)675 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
676 {
677 	struct net_device *ndev = NULL;
678 	struct lag_func *pf;
679 	struct mlx5_lag *ldev;
680 	unsigned long flags;
681 	int i, last_idx;
682 
683 	spin_lock_irqsave(&lag_lock, flags);
684 	ldev = mlx5_lag_dev(dev);
685 
686 	if (!ldev)
687 		goto unlock;
688 
689 	mlx5_ldev_for_each(i, 0, ldev) {
690 		pf = mlx5_lag_pf(ldev, i);
691 		if (ldev->tracker.netdev_state[i].tx_enabled)
692 			ndev = pf->netdev;
693 	}
694 	if (!ndev) {
695 		last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
696 		if (last_idx < 0)
697 			goto unlock;
698 		pf = mlx5_lag_pf(ldev, last_idx);
699 		ndev = pf->netdev;
700 	}
701 
702 	dev_hold(ndev);
703 
704 unlock:
705 	spin_unlock_irqrestore(&lag_lock, flags);
706 
707 	return ndev;
708 }
709 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)710 void mlx5_modify_lag(struct mlx5_lag *ldev,
711 		     struct lag_tracker *tracker)
712 {
713 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
714 	u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
715 	struct mlx5_core_dev *dev0;
716 	int idx;
717 	int err;
718 	int i;
719 	int j;
720 
721 	if (first_idx < 0)
722 		return;
723 
724 	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
725 	mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
726 
727 	mlx5_ldev_for_each(i, 0, ldev) {
728 		for (j = 0; j < ldev->buckets; j++) {
729 			idx = i * ldev->buckets + j;
730 			if (ports[idx] == ldev->v2p_map[idx])
731 				continue;
732 			err = _mlx5_modify_lag(ldev, ports);
733 			if (err) {
734 				mlx5_core_err(dev0,
735 					      "Failed to modify LAG (%d)\n",
736 					      err);
737 				return;
738 			}
739 			memcpy(ldev->v2p_map, ports, sizeof(ports));
740 
741 			mlx5_lag_print_mapping(dev0, ldev, tracker,
742 					       ldev->mode_flags);
743 			break;
744 		}
745 	}
746 
747 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
748 		struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
749 
750 		if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
751 			mlx5_lag_drop_rule_setup(ldev, tracker);
752 		/** Only sriov and roce lag should have tracker->tx_type set so
753 		 *  no need to check the mode
754 		 */
755 		blocking_notifier_call_chain(&dev0->priv.lag_nh,
756 					     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
757 					     ndev);
758 		dev_put(ndev);
759 	}
760 }
761 
mlx5_lag_set_port_sel_mode(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,unsigned long * flags)762 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
763 				      enum mlx5_lag_mode mode,
764 				      unsigned long *flags)
765 {
766 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
767 	struct mlx5_core_dev *dev0;
768 
769 	if (first_idx < 0)
770 		return -EINVAL;
771 
772 	if (mode == MLX5_LAG_MODE_MPESW ||
773 	    mode == MLX5_LAG_MODE_MULTIPATH)
774 		return 0;
775 
776 	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
777 
778 	if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
779 		if (ldev->ports > 2)
780 			return -EINVAL;
781 		return 0;
782 	}
783 
784 	if (ldev->ports > 2)
785 		ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
786 
787 	set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
788 
789 	return 0;
790 }
791 
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)792 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
793 			      struct lag_tracker *tracker, bool shared_fdb,
794 			      unsigned long *flags)
795 {
796 	*flags = 0;
797 	if (shared_fdb) {
798 		set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
799 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
800 	}
801 
802 	if (mode == MLX5_LAG_MODE_MPESW)
803 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
804 
805 	return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
806 }
807 
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)808 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
809 {
810 	int port_sel_mode = get_port_sel_mode(mode, flags);
811 
812 	switch (port_sel_mode) {
813 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
814 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
815 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
816 	default: return "invalid";
817 	}
818 }
819 
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)820 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
821 {
822 	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
823 	struct mlx5_eswitch *master_esw;
824 	struct mlx5_core_dev *dev0;
825 	int i, j;
826 	int err;
827 
828 	if (master_idx < 0)
829 		return -EINVAL;
830 
831 	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
832 	master_esw = dev0->priv.eswitch;
833 	mlx5_ldev_for_each(i, 0, ldev) {
834 		struct mlx5_eswitch *slave_esw;
835 
836 		if (i == master_idx)
837 			continue;
838 
839 		slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
840 
841 		err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
842 							       slave_esw, ldev->ports);
843 		if (err)
844 			goto err;
845 	}
846 	return 0;
847 err:
848 	mlx5_ldev_for_each_reverse(j, i, 0, ldev) {
849 		if (j == master_idx)
850 			continue;
851 		mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
852 							 mlx5_lag_pf(ldev, j)->dev->priv.eswitch);
853 	}
854 	return err;
855 }
856 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)857 static int mlx5_create_lag(struct mlx5_lag *ldev,
858 			   struct lag_tracker *tracker,
859 			   enum mlx5_lag_mode mode,
860 			   unsigned long flags)
861 {
862 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
863 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
864 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
865 	struct mlx5_core_dev *dev0;
866 	int err;
867 
868 	if (first_idx < 0)
869 		return -EINVAL;
870 
871 	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
872 	if (tracker)
873 		mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
874 	mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
875 		       shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
876 
877 	err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
878 	if (err) {
879 		mlx5_core_err(dev0,
880 			      "Failed to create LAG (%d)\n",
881 			      err);
882 		return err;
883 	}
884 
885 	if (shared_fdb) {
886 		err = mlx5_lag_create_single_fdb(ldev);
887 		if (err)
888 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
889 		else
890 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
891 	}
892 
893 	if (err) {
894 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
895 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
896 			mlx5_core_err(dev0,
897 				      "Failed to deactivate RoCE LAG; driver restart required\n");
898 	}
899 	BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
900 
901 	return err;
902 }
903 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)904 int mlx5_activate_lag(struct mlx5_lag *ldev,
905 		      struct lag_tracker *tracker,
906 		      enum mlx5_lag_mode mode,
907 		      bool shared_fdb)
908 {
909 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
910 	struct mlx5_core_dev *dev0;
911 	unsigned long flags = 0;
912 	int master_idx;
913 	int err;
914 
915 	master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
916 	if (master_idx < 0)
917 		return -EINVAL;
918 
919 	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
920 	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
921 	if (err)
922 		return err;
923 
924 	if (mode != MLX5_LAG_MODE_MPESW) {
925 		mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
926 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
927 			err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
928 						       ldev->v2p_map);
929 			if (err) {
930 				mlx5_core_err(dev0,
931 					      "Failed to create LAG port selection(%d)\n",
932 					      err);
933 				return err;
934 			}
935 		}
936 	}
937 
938 	err = mlx5_create_lag(ldev, tracker, mode, flags);
939 	if (err) {
940 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
941 			mlx5_lag_port_sel_destroy(ldev);
942 		if (roce_lag)
943 			mlx5_core_err(dev0,
944 				      "Failed to activate RoCE LAG\n");
945 		else
946 			mlx5_core_err(dev0,
947 				      "Failed to activate VF LAG\n"
948 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
949 		return err;
950 	}
951 
952 	if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
953 	    !roce_lag)
954 		mlx5_lag_drop_rule_setup(ldev, tracker);
955 
956 	ldev->mode = mode;
957 	ldev->mode_flags = flags;
958 	return 0;
959 }
960 
mlx5_deactivate_lag(struct mlx5_lag * ldev)961 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
962 {
963 	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
964 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
965 	bool roce_lag = __mlx5_lag_is_roce(ldev);
966 	unsigned long flags = ldev->mode_flags;
967 	struct mlx5_eswitch *master_esw;
968 	struct mlx5_core_dev *dev0;
969 	int err;
970 	int i;
971 
972 	if (master_idx < 0)
973 		return -EINVAL;
974 
975 	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
976 	master_esw = dev0->priv.eswitch;
977 	ldev->mode = MLX5_LAG_MODE_NONE;
978 	ldev->mode_flags = 0;
979 	mlx5_lag_mp_reset(ldev);
980 
981 	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
982 		mlx5_ldev_for_each(i, 0, ldev) {
983 			if (i == master_idx)
984 				continue;
985 			mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
986 								 mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
987 		}
988 		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
989 	}
990 
991 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
992 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
993 	if (err) {
994 		if (roce_lag) {
995 			mlx5_core_err(dev0,
996 				      "Failed to deactivate RoCE LAG; driver restart required\n");
997 		} else {
998 			mlx5_core_err(dev0,
999 				      "Failed to deactivate VF LAG; driver restart required\n"
1000 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
1001 		}
1002 		return err;
1003 	}
1004 
1005 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
1006 		mlx5_lag_port_sel_destroy(ldev);
1007 		ldev->buckets = 1;
1008 	}
1009 	if (mlx5_lag_has_drop_rule(ldev))
1010 		mlx5_lag_drop_rule_cleanup(ldev);
1011 
1012 	return 0;
1013 }
1014 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)1015 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
1016 {
1017 	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1018 #ifdef CONFIG_MLX5_ESWITCH
1019 	struct mlx5_core_dev *dev;
1020 	u8 mode;
1021 #endif
1022 	struct lag_func *pf;
1023 	bool roce_support;
1024 	int i;
1025 
1026 	if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
1027 		return false;
1028 
1029 #ifdef CONFIG_MLX5_ESWITCH
1030 	mlx5_ldev_for_each(i, 0, ldev) {
1031 		pf = mlx5_lag_pf(ldev, i);
1032 		dev = pf->dev;
1033 		if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
1034 			return false;
1035 	}
1036 
1037 	pf = mlx5_lag_pf(ldev, master_idx);
1038 	dev = pf->dev;
1039 	mode = mlx5_eswitch_mode(dev);
1040 	mlx5_ldev_for_each(i, 0, ldev) {
1041 		pf = mlx5_lag_pf(ldev, i);
1042 		if (mlx5_eswitch_mode(pf->dev) != mode)
1043 			return false;
1044 	}
1045 
1046 #else
1047 	mlx5_ldev_for_each(i, 0, ldev) {
1048 		pf = mlx5_lag_pf(ldev, i);
1049 		if (mlx5_sriov_is_enabled(pf->dev))
1050 			return false;
1051 	}
1052 #endif
1053 	pf = mlx5_lag_pf(ldev, master_idx);
1054 	roce_support = mlx5_get_roce_state(pf->dev);
1055 	mlx5_ldev_for_each(i, 0, ldev) {
1056 		if (i == master_idx)
1057 			continue;
1058 		pf = mlx5_lag_pf(ldev, i);
1059 		if (mlx5_get_roce_state(pf->dev) != roce_support)
1060 			return false;
1061 	}
1062 
1063 	return true;
1064 }
1065 
mlx5_lag_add_devices(struct mlx5_lag * ldev)1066 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
1067 {
1068 	struct lag_func *pf;
1069 	int i;
1070 
1071 	mlx5_ldev_for_each(i, 0, ldev) {
1072 		pf = mlx5_lag_pf(ldev, i);
1073 		if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1074 			continue;
1075 
1076 		pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1077 		mlx5_rescan_drivers_locked(pf->dev);
1078 	}
1079 }
1080 
mlx5_lag_remove_devices(struct mlx5_lag * ldev)1081 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
1082 {
1083 	struct lag_func *pf;
1084 	int i;
1085 
1086 	mlx5_ldev_for_each(i, 0, ldev) {
1087 		pf = mlx5_lag_pf(ldev, i);
1088 		if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1089 			continue;
1090 
1091 		pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1092 		mlx5_rescan_drivers_locked(pf->dev);
1093 	}
1094 }
1095 
mlx5_disable_lag(struct mlx5_lag * ldev)1096 void mlx5_disable_lag(struct mlx5_lag *ldev)
1097 {
1098 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1099 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1100 	struct mlx5_core_dev *dev0;
1101 	bool roce_lag;
1102 	int err;
1103 	int i;
1104 
1105 	if (idx < 0)
1106 		return;
1107 
1108 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
1109 	roce_lag = __mlx5_lag_is_roce(ldev);
1110 
1111 	if (shared_fdb) {
1112 		mlx5_lag_remove_devices(ldev);
1113 	} else if (roce_lag) {
1114 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
1115 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1116 			mlx5_rescan_drivers_locked(dev0);
1117 		}
1118 		mlx5_ldev_for_each(i, 0, ldev) {
1119 			if (i == idx)
1120 				continue;
1121 			mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev);
1122 		}
1123 	}
1124 
1125 	err = mlx5_deactivate_lag(ldev);
1126 	if (err)
1127 		return;
1128 
1129 	if (shared_fdb || roce_lag)
1130 		mlx5_lag_add_devices(ldev);
1131 
1132 	if (shared_fdb)
1133 		mlx5_ldev_for_each(i, 0, ldev)
1134 			if (!(mlx5_lag_pf(ldev, i)->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
1135 				mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1136 }
1137 
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)1138 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
1139 {
1140 	struct mlx5_core_dev *dev;
1141 	bool ret = false;
1142 	int idx;
1143 	int i;
1144 
1145 	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1146 	if (idx < 0)
1147 		return false;
1148 
1149 	mlx5_ldev_for_each(i, 0, ldev) {
1150 		if (i == idx)
1151 			continue;
1152 		dev = mlx5_lag_pf(ldev, i)->dev;
1153 		if (is_mdev_switchdev_mode(dev) &&
1154 		    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
1155 		    MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
1156 		    MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
1157 		    mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
1158 		    MLX5_CAP_GEN(dev, num_lag_ports) - 1)
1159 			continue;
1160 		return false;
1161 	}
1162 
1163 	dev = mlx5_lag_pf(ldev, idx)->dev;
1164 	if (is_mdev_switchdev_mode(dev) &&
1165 	    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
1166 	    mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
1167 	    MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
1168 	    mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
1169 		ret = true;
1170 
1171 	return ret;
1172 }
1173 
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)1174 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
1175 {
1176 	bool roce_lag = true;
1177 	struct lag_func *pf;
1178 	int i;
1179 
1180 	mlx5_ldev_for_each(i, 0, ldev) {
1181 		pf = mlx5_lag_pf(ldev, i);
1182 		roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev);
1183 	}
1184 
1185 #ifdef CONFIG_MLX5_ESWITCH
1186 	mlx5_ldev_for_each(i, 0, ldev) {
1187 		pf = mlx5_lag_pf(ldev, i);
1188 		roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev);
1189 	}
1190 #endif
1191 
1192 	return roce_lag;
1193 }
1194 
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)1195 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
1196 {
1197 	return do_bond && __mlx5_lag_is_active(ldev) &&
1198 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1199 }
1200 
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)1201 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
1202 {
1203 	return !do_bond && __mlx5_lag_is_active(ldev) &&
1204 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1205 }
1206 
1207 #ifdef CONFIG_MLX5_ESWITCH
1208 static int
mlx5_lag_sum_devices_speed(struct mlx5_lag * ldev,u32 * sum_speed,int (* get_speed)(struct mlx5_core_dev *,u32 *))1209 mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
1210 			   int (*get_speed)(struct mlx5_core_dev *, u32 *))
1211 {
1212 	struct mlx5_core_dev *pf_mdev;
1213 	struct lag_func *pf;
1214 	int pf_idx;
1215 	u32 speed;
1216 	int ret;
1217 
1218 	*sum_speed = 0;
1219 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1220 		pf = mlx5_lag_pf(ldev, pf_idx);
1221 		if (!pf)
1222 			continue;
1223 		pf_mdev = pf->dev;
1224 		if (!pf_mdev)
1225 			continue;
1226 
1227 		ret = get_speed(pf_mdev, &speed);
1228 		if (ret) {
1229 			mlx5_core_dbg(pf_mdev,
1230 				      "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
1231 				      get_speed, dev_name(pf_mdev->device),
1232 				      ret);
1233 			return ret;
1234 		}
1235 
1236 		*sum_speed += speed;
1237 	}
1238 
1239 	return 0;
1240 }
1241 
mlx5_lag_sum_devices_max_speed(struct mlx5_lag * ldev,u32 * max_speed)1242 static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
1243 {
1244 	return mlx5_lag_sum_devices_speed(ldev, max_speed,
1245 					  mlx5_port_max_linkspeed);
1246 }
1247 
mlx5_lag_sum_devices_oper_speed(struct mlx5_lag * ldev,u32 * oper_speed)1248 static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
1249 					   u32 *oper_speed)
1250 {
1251 	return mlx5_lag_sum_devices_speed(ldev, oper_speed,
1252 					  mlx5_port_oper_linkspeed);
1253 }
1254 
mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev * mdev,u32 speed)1255 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
1256 						u32 speed)
1257 {
1258 	u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
1259 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
1260 	struct mlx5_vport *vport;
1261 	unsigned long i;
1262 	int ret;
1263 
1264 	if (!esw)
1265 		return;
1266 
1267 	if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
1268 		return;
1269 
1270 	mlx5_esw_for_each_vport(esw, i, vport) {
1271 		if (!vport)
1272 			continue;
1273 
1274 		if (vport->vport == MLX5_VPORT_UPLINK)
1275 			continue;
1276 
1277 		ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
1278 						     vport->vport, true, speed);
1279 		if (ret)
1280 			mlx5_core_dbg(mdev,
1281 				      "Failed to set vport %d speed %d, err=%d\n",
1282 				      vport->vport, speed, ret);
1283 	}
1284 }
1285 
mlx5_lag_set_vports_agg_speed(struct mlx5_lag * ldev)1286 void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
1287 {
1288 	struct mlx5_core_dev *mdev;
1289 	struct lag_func *pf;
1290 	u32 speed;
1291 	int pf_idx;
1292 
1293 	if (ldev->mode == MLX5_LAG_MODE_MPESW) {
1294 		if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
1295 			return;
1296 	} else {
1297 		speed = ldev->tracker.bond_speed_mbps;
1298 		if (speed == SPEED_UNKNOWN)
1299 			return;
1300 	}
1301 
1302 	/* If speed is not set, use the sum of max speeds of all PFs */
1303 	if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
1304 		return;
1305 
1306 	speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1307 
1308 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1309 		pf = mlx5_lag_pf(ldev, pf_idx);
1310 		if (!pf)
1311 			continue;
1312 		mdev = pf->dev;
1313 		if (!mdev)
1314 			continue;
1315 
1316 		mlx5_lag_modify_device_vports_speed(mdev, speed);
1317 	}
1318 }
1319 
mlx5_lag_reset_vports_speed(struct mlx5_lag * ldev)1320 void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
1321 {
1322 	struct mlx5_core_dev *mdev;
1323 	struct lag_func *pf;
1324 	u32 speed;
1325 	int pf_idx;
1326 	int ret;
1327 
1328 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1329 		pf = mlx5_lag_pf(ldev, pf_idx);
1330 		if (!pf)
1331 			continue;
1332 		mdev = pf->dev;
1333 		if (!mdev)
1334 			continue;
1335 
1336 		ret = mlx5_port_oper_linkspeed(mdev, &speed);
1337 		if (ret) {
1338 			mlx5_core_dbg(mdev,
1339 				      "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
1340 				      dev_name(mdev->device), ret);
1341 			continue;
1342 		}
1343 
1344 		speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1345 		mlx5_lag_modify_device_vports_speed(mdev, speed);
1346 	}
1347 }
1348 #endif
1349 
mlx5_do_bond(struct mlx5_lag * ldev)1350 static void mlx5_do_bond(struct mlx5_lag *ldev)
1351 {
1352 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1353 	struct lag_tracker tracker = { };
1354 	struct mlx5_core_dev *dev0;
1355 	struct net_device *ndev;
1356 	bool do_bond, roce_lag;
1357 	int err;
1358 	int i;
1359 
1360 	if (idx < 0)
1361 		return;
1362 
1363 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
1364 	if (!mlx5_lag_is_ready(ldev)) {
1365 		do_bond = false;
1366 	} else {
1367 		/* VF LAG is in multipath mode, ignore bond change requests */
1368 		if (mlx5_lag_is_multipath(dev0))
1369 			return;
1370 
1371 		tracker = ldev->tracker;
1372 
1373 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1374 	}
1375 
1376 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
1377 		bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1378 
1379 		roce_lag = mlx5_lag_is_roce_lag(ldev);
1380 
1381 		if (shared_fdb || roce_lag)
1382 			mlx5_lag_remove_devices(ldev);
1383 
1384 		err = mlx5_activate_lag(ldev, &tracker,
1385 					roce_lag ? MLX5_LAG_MODE_ROCE :
1386 						   MLX5_LAG_MODE_SRIOV,
1387 					shared_fdb);
1388 		if (err) {
1389 			if (shared_fdb || roce_lag)
1390 				mlx5_lag_add_devices(ldev);
1391 			if (shared_fdb) {
1392 				mlx5_ldev_for_each(i, 0, ldev)
1393 					mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1394 			}
1395 
1396 			return;
1397 		}
1398 
1399 		if (roce_lag) {
1400 			struct mlx5_core_dev *dev;
1401 
1402 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1403 			mlx5_rescan_drivers_locked(dev0);
1404 			mlx5_ldev_for_each(i, 0, ldev) {
1405 				if (i == idx)
1406 					continue;
1407 				dev = mlx5_lag_pf(ldev, i)->dev;
1408 				if (mlx5_get_roce_state(dev))
1409 					mlx5_nic_vport_enable_roce(dev);
1410 			}
1411 		} else if (shared_fdb) {
1412 			int i;
1413 
1414 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1415 			mlx5_rescan_drivers_locked(dev0);
1416 
1417 			mlx5_ldev_for_each(i, 0, ldev) {
1418 				err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1419 				if (err)
1420 					break;
1421 			}
1422 
1423 			if (err) {
1424 				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1425 				mlx5_rescan_drivers_locked(dev0);
1426 				mlx5_deactivate_lag(ldev);
1427 				mlx5_lag_add_devices(ldev);
1428 				mlx5_ldev_for_each(i, 0, ldev)
1429 					mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1430 				mlx5_core_err(dev0, "Failed to enable lag\n");
1431 				return;
1432 			}
1433 		}
1434 		if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1435 			ndev = mlx5_lag_active_backup_get_netdev(dev0);
1436 			/** Only sriov and roce lag should have tracker->TX_type
1437 			 *  set so no need to check the mode
1438 			 */
1439 			blocking_notifier_call_chain(&dev0->priv.lag_nh,
1440 						     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1441 						     ndev);
1442 			dev_put(ndev);
1443 		}
1444 		mlx5_lag_set_vports_agg_speed(ldev);
1445 	} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1446 		mlx5_modify_lag(ldev, &tracker);
1447 		mlx5_lag_set_vports_agg_speed(ldev);
1448 	} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1449 		mlx5_lag_reset_vports_speed(ldev);
1450 		mlx5_disable_lag(ldev);
1451 	}
1452 }
1453 
1454 /* The last mdev to unregister will destroy the workqueue before removing the
1455  * devcom component, and as all the mdevs use the same devcom component we are
1456  * guaranteed that the devcom is valid while the calling work is running.
1457  */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1458 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1459 {
1460 	struct mlx5_devcom_comp_dev *devcom = NULL;
1461 	struct lag_func *pf;
1462 	int i;
1463 
1464 	mutex_lock(&ldev->lock);
1465 	i = mlx5_get_next_ldev_func(ldev, 0);
1466 	if (i < MLX5_MAX_PORTS) {
1467 		pf = mlx5_lag_pf(ldev, i);
1468 		devcom = pf->dev->priv.hca_devcom_comp;
1469 	}
1470 	mutex_unlock(&ldev->lock);
1471 	return devcom;
1472 }
1473 
mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr,struct mlx5_lag * ldev)1474 static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
1475 				     struct mlx5_flow_table_attr *ft_attr,
1476 				     struct mlx5_lag *ldev)
1477 {
1478 #ifdef CONFIG_MLX5_ESWITCH
1479 	struct mlx5_flow_namespace *ns;
1480 	struct mlx5_flow_group *fg;
1481 	int err;
1482 
1483 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1484 	if (!ns)
1485 		return 0;
1486 
1487 	ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr);
1488 	if (IS_ERR(ldev->lag_demux_ft))
1489 		return PTR_ERR(ldev->lag_demux_ft);
1490 
1491 	fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch,
1492 					  ldev->lag_demux_ft);
1493 	if (IS_ERR(fg)) {
1494 		err = PTR_ERR(fg);
1495 		mlx5_destroy_flow_table(ldev->lag_demux_ft);
1496 		ldev->lag_demux_ft = NULL;
1497 		return err;
1498 	}
1499 
1500 	ldev->lag_demux_fg = fg;
1501 	return 0;
1502 #else
1503 	return -EOPNOTSUPP;
1504 #endif
1505 }
1506 
mlx5_lag_demux_fw_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr,struct mlx5_lag * ldev)1507 static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev,
1508 				  struct mlx5_flow_table_attr *ft_attr,
1509 				  struct mlx5_lag *ldev)
1510 {
1511 	struct mlx5_flow_namespace *ns;
1512 	int err;
1513 
1514 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1515 	if (!ns)
1516 		return 0;
1517 
1518 	ldev->lag_demux_fg = NULL;
1519 	ft_attr->max_fte = 1;
1520 	ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr);
1521 	if (IS_ERR(ldev->lag_demux_ft)) {
1522 		err = PTR_ERR(ldev->lag_demux_ft);
1523 		ldev->lag_demux_ft = NULL;
1524 		return err;
1525 	}
1526 
1527 	return 0;
1528 }
1529 
mlx5_lag_demux_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr)1530 int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
1531 			struct mlx5_flow_table_attr *ft_attr)
1532 {
1533 	struct mlx5_lag *ldev;
1534 
1535 	if (!ft_attr)
1536 		return -EINVAL;
1537 
1538 	ldev = mlx5_lag_dev(dev);
1539 	if (!ldev)
1540 		return -ENODEV;
1541 
1542 	xa_init(&ldev->lag_demux_rules);
1543 
1544 	if (mlx5_get_sd(dev))
1545 		return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev);
1546 
1547 	return mlx5_lag_demux_fw_init(dev, ft_attr, ldev);
1548 }
1549 EXPORT_SYMBOL(mlx5_lag_demux_init);
1550 
mlx5_lag_demux_cleanup(struct mlx5_core_dev * dev)1551 void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev)
1552 {
1553 	struct mlx5_flow_handle *rule;
1554 	struct mlx5_lag *ldev;
1555 	unsigned long vport_num;
1556 
1557 	ldev = mlx5_lag_dev(dev);
1558 	if (!ldev)
1559 		return;
1560 
1561 	xa_for_each(&ldev->lag_demux_rules, vport_num, rule)
1562 		mlx5_del_flow_rules(rule);
1563 	xa_destroy(&ldev->lag_demux_rules);
1564 
1565 	if (ldev->lag_demux_fg)
1566 		mlx5_destroy_flow_group(ldev->lag_demux_fg);
1567 	if (ldev->lag_demux_ft)
1568 		mlx5_destroy_flow_table(ldev->lag_demux_ft);
1569 	ldev->lag_demux_fg = NULL;
1570 	ldev->lag_demux_ft = NULL;
1571 }
1572 EXPORT_SYMBOL(mlx5_lag_demux_cleanup);
1573 
mlx5_lag_demux_rule_add(struct mlx5_core_dev * vport_dev,u16 vport_num,int index)1574 int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num,
1575 			    int index)
1576 {
1577 	struct mlx5_flow_handle *rule;
1578 	struct mlx5_lag *ldev;
1579 	int err;
1580 
1581 	ldev = mlx5_lag_dev(vport_dev);
1582 	if (!ldev || !ldev->lag_demux_fg)
1583 		return 0;
1584 
1585 	if (xa_load(&ldev->lag_demux_rules, index))
1586 		return 0;
1587 
1588 	rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch,
1589 					      vport_num, ldev->lag_demux_ft);
1590 	if (IS_ERR(rule)) {
1591 		err = PTR_ERR(rule);
1592 		mlx5_core_warn(vport_dev,
1593 			       "Failed to create LAG demux rule for vport %u, err %d\n",
1594 			       vport_num, err);
1595 		return err;
1596 	}
1597 
1598 	err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule,
1599 			      GFP_KERNEL));
1600 	if (err) {
1601 		mlx5_del_flow_rules(rule);
1602 		mlx5_core_warn(vport_dev,
1603 			       "Failed to store LAG demux rule for vport %u, err %d\n",
1604 			       vport_num, err);
1605 	}
1606 
1607 	return err;
1608 }
1609 EXPORT_SYMBOL(mlx5_lag_demux_rule_add);
1610 
mlx5_lag_demux_rule_del(struct mlx5_core_dev * dev,int index)1611 void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index)
1612 {
1613 	struct mlx5_flow_handle *rule;
1614 	struct mlx5_lag *ldev;
1615 
1616 	ldev = mlx5_lag_dev(dev);
1617 	if (!ldev || !ldev->lag_demux_fg)
1618 		return;
1619 
1620 	rule = xa_erase(&ldev->lag_demux_rules, index);
1621 	if (rule)
1622 		mlx5_del_flow_rules(rule);
1623 }
1624 EXPORT_SYMBOL(mlx5_lag_demux_rule_del);
1625 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1626 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1627 {
1628 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1629 }
1630 
mlx5_do_bond_work(struct work_struct * work)1631 static void mlx5_do_bond_work(struct work_struct *work)
1632 {
1633 	struct delayed_work *delayed_work = to_delayed_work(work);
1634 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1635 					     bond_work);
1636 	struct mlx5_devcom_comp_dev *devcom;
1637 	int status;
1638 
1639 	devcom = mlx5_lag_get_devcom_comp(ldev);
1640 	if (!devcom)
1641 		return;
1642 
1643 	status = mlx5_devcom_comp_trylock(devcom);
1644 	if (!status) {
1645 		mlx5_queue_bond_work(ldev, HZ);
1646 		return;
1647 	}
1648 
1649 	mutex_lock(&ldev->lock);
1650 	if (ldev->mode_changes_in_progress) {
1651 		mutex_unlock(&ldev->lock);
1652 		mlx5_devcom_comp_unlock(devcom);
1653 		mlx5_queue_bond_work(ldev, HZ);
1654 		return;
1655 	}
1656 
1657 	mlx5_do_bond(ldev);
1658 	mutex_unlock(&ldev->lock);
1659 	mlx5_devcom_comp_unlock(devcom);
1660 }
1661 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1662 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1663 					 struct lag_tracker *tracker,
1664 					 struct netdev_notifier_changeupper_info *info)
1665 {
1666 	struct net_device *upper = info->upper_dev, *ndev_tmp;
1667 	struct netdev_lag_upper_info *lag_upper_info = NULL;
1668 	bool is_bonded, is_in_lag, mode_supported;
1669 	bool has_inactive = 0;
1670 	struct lag_func *pf;
1671 	struct slave *slave;
1672 	u8 bond_status = 0;
1673 	int num_slaves = 0;
1674 	int changed = 0;
1675 	int i, idx = -1;
1676 
1677 	if (!netif_is_lag_master(upper))
1678 		return 0;
1679 
1680 	if (info->linking)
1681 		lag_upper_info = info->upper_info;
1682 
1683 	/* The event may still be of interest if the slave does not belong to
1684 	 * us, but is enslaved to a master which has one or more of our netdevs
1685 	 * as slaves (e.g., if a new slave is added to a master that bonds two
1686 	 * of our netdevs, we should unbond).
1687 	 */
1688 	rcu_read_lock();
1689 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1690 		mlx5_ldev_for_each(i, 0, ldev) {
1691 			pf = mlx5_lag_pf(ldev, i);
1692 			if (pf->netdev == ndev_tmp) {
1693 				idx++;
1694 				break;
1695 			}
1696 		}
1697 		if (i < MLX5_MAX_PORTS) {
1698 			slave = bond_slave_get_rcu(ndev_tmp);
1699 			if (slave)
1700 				has_inactive |= bond_is_slave_inactive(slave);
1701 			bond_status |= (1 << idx);
1702 		}
1703 
1704 		num_slaves++;
1705 	}
1706 	rcu_read_unlock();
1707 
1708 	/* None of this lagdev's netdevs are slaves of this master. */
1709 	if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1710 		return 0;
1711 
1712 	if (lag_upper_info) {
1713 		tracker->tx_type = lag_upper_info->tx_type;
1714 		tracker->hash_type = lag_upper_info->hash_type;
1715 	}
1716 
1717 	tracker->has_inactive = has_inactive;
1718 	/* Determine bonding status:
1719 	 * A device is considered bonded if both its physical ports are slaves
1720 	 * of the same lag master, and only them.
1721 	 */
1722 	is_in_lag = num_slaves == ldev->ports &&
1723 		bond_status == GENMASK(ldev->ports - 1, 0);
1724 
1725 	/* Lag mode must be activebackup or hash. */
1726 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1727 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1728 
1729 	is_bonded = is_in_lag && mode_supported;
1730 	if (tracker->is_bonded != is_bonded) {
1731 		tracker->is_bonded = is_bonded;
1732 		changed = 1;
1733 	}
1734 
1735 	if (!is_in_lag)
1736 		return changed;
1737 
1738 	if (!mlx5_lag_is_ready(ldev))
1739 		NL_SET_ERR_MSG_MOD(info->info.extack,
1740 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
1741 	else if (!mode_supported)
1742 		NL_SET_ERR_MSG_MOD(info->info.extack,
1743 				   "Can't activate LAG offload, TX type isn't supported");
1744 
1745 	return changed;
1746 }
1747 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1748 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1749 					      struct lag_tracker *tracker,
1750 					      struct net_device *ndev,
1751 					      struct netdev_notifier_changelowerstate_info *info)
1752 {
1753 	struct netdev_lag_lower_state_info *lag_lower_info;
1754 	int idx;
1755 
1756 	if (!netif_is_lag_port(ndev))
1757 		return 0;
1758 
1759 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1760 	if (idx < 0)
1761 		return 0;
1762 
1763 	/* This information is used to determine virtual to physical
1764 	 * port mapping.
1765 	 */
1766 	lag_lower_info = info->lower_state_info;
1767 	if (!lag_lower_info)
1768 		return 0;
1769 
1770 	tracker->netdev_state[idx] = *lag_lower_info;
1771 
1772 	return 1;
1773 }
1774 
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1775 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1776 					    struct lag_tracker *tracker,
1777 					    struct net_device *ndev)
1778 {
1779 	struct net_device *ndev_tmp;
1780 	struct slave *slave;
1781 	bool has_inactive = 0;
1782 	int idx;
1783 
1784 	if (!netif_is_lag_master(ndev))
1785 		return 0;
1786 
1787 	rcu_read_lock();
1788 	for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1789 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1790 		if (idx < 0)
1791 			continue;
1792 
1793 		slave = bond_slave_get_rcu(ndev_tmp);
1794 		if (slave)
1795 			has_inactive |= bond_is_slave_inactive(slave);
1796 	}
1797 	rcu_read_unlock();
1798 
1799 	if (tracker->has_inactive == has_inactive)
1800 		return 0;
1801 
1802 	tracker->has_inactive = has_inactive;
1803 
1804 	return 1;
1805 }
1806 
mlx5_lag_update_tracker_speed(struct lag_tracker * tracker,struct net_device * ndev)1807 static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
1808 					  struct net_device *ndev)
1809 {
1810 	struct ethtool_link_ksettings lksettings;
1811 	struct net_device *bond_dev;
1812 	int err;
1813 
1814 	if (netif_is_lag_master(ndev))
1815 		bond_dev = ndev;
1816 	else
1817 		bond_dev = netdev_master_upper_dev_get(ndev);
1818 
1819 	if (!bond_dev) {
1820 		tracker->bond_speed_mbps = SPEED_UNKNOWN;
1821 		return;
1822 	}
1823 
1824 	err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
1825 	if (err) {
1826 		netdev_dbg(bond_dev,
1827 			   "Failed to get speed for bond dev %s, err=%d\n",
1828 			   bond_dev->name, err);
1829 		tracker->bond_speed_mbps = SPEED_UNKNOWN;
1830 		return;
1831 	}
1832 
1833 	if (lksettings.base.speed == SPEED_UNKNOWN)
1834 		tracker->bond_speed_mbps = 0;
1835 	else
1836 		tracker->bond_speed_mbps = lksettings.base.speed;
1837 }
1838 
1839 /* Returns speed in Mbps. */
mlx5_lag_query_bond_speed(struct mlx5_core_dev * mdev,u32 * speed)1840 int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed)
1841 {
1842 	struct mlx5_lag *ldev;
1843 	unsigned long flags;
1844 	int ret = 0;
1845 
1846 	spin_lock_irqsave(&lag_lock, flags);
1847 	ldev = mlx5_lag_dev(mdev);
1848 	if (!ldev) {
1849 		ret = -ENODEV;
1850 		goto unlock;
1851 	}
1852 
1853 	*speed = ldev->tracker.bond_speed_mbps;
1854 
1855 	if (*speed == SPEED_UNKNOWN) {
1856 		mlx5_core_dbg(mdev, "Bond speed is unknown\n");
1857 		ret = -EINVAL;
1858 	}
1859 
1860 unlock:
1861 	spin_unlock_irqrestore(&lag_lock, flags);
1862 	return ret;
1863 }
1864 EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed);
1865 
1866 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1867 static int mlx5_lag_netdev_event(struct notifier_block *this,
1868 				 unsigned long event, void *ptr)
1869 {
1870 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1871 	struct lag_tracker tracker;
1872 	struct mlx5_lag *ldev;
1873 	int changed = 0;
1874 
1875 	if (event != NETDEV_CHANGEUPPER &&
1876 	    event != NETDEV_CHANGELOWERSTATE &&
1877 	    event != NETDEV_CHANGEINFODATA)
1878 		return NOTIFY_DONE;
1879 
1880 	ldev    = container_of(this, struct mlx5_lag, nb);
1881 
1882 	tracker = ldev->tracker;
1883 
1884 	switch (event) {
1885 	case NETDEV_CHANGEUPPER:
1886 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1887 		break;
1888 	case NETDEV_CHANGELOWERSTATE:
1889 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1890 							     ndev, ptr);
1891 		break;
1892 	case NETDEV_CHANGEINFODATA:
1893 		changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1894 		break;
1895 	}
1896 
1897 	if (changed)
1898 		mlx5_lag_update_tracker_speed(&tracker, ndev);
1899 
1900 	ldev->tracker = tracker;
1901 
1902 	if (changed)
1903 		mlx5_queue_bond_work(ldev, 0);
1904 
1905 	return NOTIFY_DONE;
1906 }
1907 
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1908 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1909 				struct mlx5_core_dev *dev,
1910 				struct net_device *netdev)
1911 {
1912 	struct lag_func *pf;
1913 	unsigned long flags;
1914 	int i;
1915 
1916 	spin_lock_irqsave(&lag_lock, flags);
1917 	/* Find pf entry by matching dev pointer */
1918 	mlx5_ldev_for_each(i, 0, ldev) {
1919 		pf = mlx5_lag_pf(ldev, i);
1920 		if (pf->dev == dev) {
1921 			pf->netdev = netdev;
1922 			ldev->tracker.netdev_state[i].link_up = 0;
1923 			ldev->tracker.netdev_state[i].tx_enabled = 0;
1924 			break;
1925 		}
1926 	}
1927 	spin_unlock_irqrestore(&lag_lock, flags);
1928 }
1929 
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1930 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1931 				    struct net_device *netdev)
1932 {
1933 	struct lag_func *pf;
1934 	unsigned long flags;
1935 	int i;
1936 
1937 	spin_lock_irqsave(&lag_lock, flags);
1938 	mlx5_ldev_for_each(i, 0, ldev) {
1939 		pf = mlx5_lag_pf(ldev, i);
1940 		if (pf->netdev == netdev) {
1941 			pf->netdev = NULL;
1942 			break;
1943 		}
1944 	}
1945 	spin_unlock_irqrestore(&lag_lock, flags);
1946 }
1947 
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1948 static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1949 			      struct mlx5_core_dev *dev)
1950 {
1951 	struct lag_func *pf;
1952 	u32 idx;
1953 	int err;
1954 
1955 	pf = kzalloc_obj(*pf);
1956 	if (!pf)
1957 		return -ENOMEM;
1958 
1959 	err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1),
1960 		       GFP_KERNEL);
1961 	if (err) {
1962 		kfree(pf);
1963 		return err;
1964 	}
1965 
1966 	pf->idx = idx;
1967 	pf->dev = dev;
1968 	dev->priv.lag = ldev;
1969 
1970 	MLX5_NB_INIT(&pf->port_change_nb,
1971 		     mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
1972 	mlx5_eq_notifier_register(dev, &pf->port_change_nb);
1973 
1974 	return 0;
1975 }
1976 
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1977 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1978 				  struct mlx5_core_dev *dev)
1979 {
1980 	struct lag_func *pf;
1981 	int i;
1982 
1983 	mlx5_ldev_for_each(i, 0, ldev) {
1984 		pf = mlx5_lag_pf(ldev, i);
1985 		if (pf->dev == dev)
1986 			break;
1987 	}
1988 	if (i >= MLX5_MAX_PORTS)
1989 		return;
1990 
1991 	if (pf->port_change_nb.nb.notifier_call)
1992 		mlx5_eq_notifier_unregister(dev, &pf->port_change_nb);
1993 
1994 	pf->dev = NULL;
1995 	dev->priv.lag = NULL;
1996 	xa_erase(&ldev->pfs, pf->idx);
1997 	kfree(pf);
1998 }
1999 
2000 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)2001 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
2002 {
2003 	struct mlx5_devcom_comp_dev *pos = NULL;
2004 	struct mlx5_lag *ldev = NULL;
2005 	struct mlx5_core_dev *tmp_dev;
2006 	int err;
2007 
2008 	tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
2009 	if (tmp_dev)
2010 		ldev = mlx5_lag_dev(tmp_dev);
2011 
2012 	if (!ldev) {
2013 		ldev = mlx5_lag_dev_alloc(dev);
2014 		if (!ldev) {
2015 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
2016 			return 0;
2017 		}
2018 		err = mlx5_ldev_add_mdev(ldev, dev);
2019 		if (err) {
2020 			mlx5_core_err(dev, "Failed to add mdev to lag dev\n");
2021 			mlx5_ldev_put(ldev);
2022 			return 0;
2023 		}
2024 		return 0;
2025 	}
2026 
2027 	mutex_lock(&ldev->lock);
2028 	if (ldev->mode_changes_in_progress) {
2029 		mutex_unlock(&ldev->lock);
2030 		return -EAGAIN;
2031 	}
2032 	mlx5_ldev_get(ldev);
2033 	err = mlx5_ldev_add_mdev(ldev, dev);
2034 	if (err) {
2035 		mlx5_ldev_put(ldev);
2036 		mutex_unlock(&ldev->lock);
2037 		return err;
2038 	}
2039 	mutex_unlock(&ldev->lock);
2040 
2041 	return 0;
2042 }
2043 
mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev * dev)2044 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
2045 {
2046 	mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
2047 	dev->priv.hca_devcom_comp = NULL;
2048 }
2049 
mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev * dev)2050 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
2051 {
2052 	struct mlx5_devcom_match_attr attr = {
2053 		.flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
2054 		.net = mlx5_core_net(dev),
2055 	};
2056 	u8 len __always_unused;
2057 
2058 	mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len);
2059 
2060 	/* This component is use to sync adding core_dev to lag_dev and to sync
2061 	 * changes of mlx5_adev_devices between LAG layer and other layers.
2062 	 */
2063 	dev->priv.hca_devcom_comp =
2064 		mlx5_devcom_register_component(dev->priv.devc,
2065 					       MLX5_DEVCOM_HCA_PORTS,
2066 					       &attr, mlx5_lag_devcom_event,
2067 					       dev);
2068 	if (!dev->priv.hca_devcom_comp) {
2069 		mlx5_core_err(dev,
2070 			      "Failed to register devcom HCA component.");
2071 		return -EINVAL;
2072 	}
2073 
2074 	return 0;
2075 }
2076 
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)2077 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
2078 {
2079 	struct mlx5_lag *ldev;
2080 
2081 	ldev = mlx5_lag_dev(dev);
2082 	if (!ldev)
2083 		return;
2084 
2085 	/* mdev is being removed, might as well remove debugfs
2086 	 * as early as possible.
2087 	 */
2088 	mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
2089 recheck:
2090 	mutex_lock(&ldev->lock);
2091 	if (ldev->mode_changes_in_progress) {
2092 		mutex_unlock(&ldev->lock);
2093 		msleep(100);
2094 		goto recheck;
2095 	}
2096 	mlx5_ldev_remove_mdev(ldev, dev);
2097 	mutex_unlock(&ldev->lock);
2098 	/* Send devcom event to notify peers that a device is being removed */
2099 	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2100 			       LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev);
2101 	mlx5_lag_unregister_hca_devcom_comp(dev);
2102 	mlx5_ldev_put(ldev);
2103 }
2104 
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)2105 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
2106 {
2107 	int err;
2108 
2109 	if (!mlx5_lag_is_supported(dev))
2110 		return;
2111 
2112 	if (mlx5_lag_register_hca_devcom_comp(dev))
2113 		return;
2114 
2115 recheck:
2116 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
2117 	err = __mlx5_lag_dev_add_mdev(dev);
2118 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
2119 
2120 	if (err) {
2121 		msleep(100);
2122 		goto recheck;
2123 	}
2124 	/* Send devcom event to notify peers that a device was added */
2125 	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2126 			       LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev);
2127 	mlx5_ldev_add_debugfs(dev);
2128 }
2129 
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)2130 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
2131 			    struct net_device *netdev)
2132 {
2133 	struct mlx5_lag *ldev;
2134 	bool lag_is_active;
2135 
2136 	ldev = mlx5_lag_dev(dev);
2137 	if (!ldev)
2138 		return;
2139 
2140 	mutex_lock(&ldev->lock);
2141 	mlx5_ldev_remove_netdev(ldev, netdev);
2142 	clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2143 
2144 	lag_is_active = __mlx5_lag_is_active(ldev);
2145 	mutex_unlock(&ldev->lock);
2146 
2147 	if (lag_is_active)
2148 		mlx5_queue_bond_work(ldev, 0);
2149 }
2150 
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)2151 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
2152 			 struct net_device *netdev)
2153 {
2154 	struct mlx5_lag *ldev;
2155 	int num = 0;
2156 
2157 	ldev = mlx5_lag_dev(dev);
2158 	if (!ldev)
2159 		return;
2160 
2161 	mutex_lock(&ldev->lock);
2162 	mlx5_ldev_add_netdev(ldev, dev, netdev);
2163 	num = mlx5_lag_num_netdevs(ldev);
2164 	if (num >= ldev->ports)
2165 		set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2166 	mutex_unlock(&ldev->lock);
2167 	mlx5_queue_bond_work(ldev, 0);
2168 }
2169 
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)2170 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
2171 {
2172 	struct lag_func *pf;
2173 	int i;
2174 
2175 	for (i = start_idx; i >= end_idx; i--) {
2176 		pf = xa_load(&ldev->pfs, i);
2177 		if (pf && pf->dev)
2178 			return i;
2179 	}
2180 	return -1;
2181 }
2182 
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)2183 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
2184 {
2185 	struct lag_func *pf;
2186 	unsigned long idx;
2187 
2188 	xa_for_each_start(&ldev->pfs, idx, pf, start_idx)
2189 		if (pf->dev)
2190 			return idx;
2191 	return MLX5_MAX_PORTS;
2192 }
2193 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)2194 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
2195 {
2196 	struct mlx5_lag *ldev;
2197 	unsigned long flags;
2198 	bool res;
2199 
2200 	spin_lock_irqsave(&lag_lock, flags);
2201 	ldev = mlx5_lag_dev(dev);
2202 	res  = ldev && __mlx5_lag_is_roce(ldev);
2203 	spin_unlock_irqrestore(&lag_lock, flags);
2204 
2205 	return res;
2206 }
2207 EXPORT_SYMBOL(mlx5_lag_is_roce);
2208 
mlx5_lag_is_active(struct mlx5_core_dev * dev)2209 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
2210 {
2211 	struct mlx5_lag *ldev;
2212 	unsigned long flags;
2213 	bool res;
2214 
2215 	spin_lock_irqsave(&lag_lock, flags);
2216 	ldev = mlx5_lag_dev(dev);
2217 	res  = ldev && __mlx5_lag_is_active(ldev);
2218 	spin_unlock_irqrestore(&lag_lock, flags);
2219 
2220 	return res;
2221 }
2222 EXPORT_SYMBOL(mlx5_lag_is_active);
2223 
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)2224 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
2225 {
2226 	struct mlx5_lag *ldev;
2227 	unsigned long flags;
2228 	bool res = 0;
2229 
2230 	spin_lock_irqsave(&lag_lock, flags);
2231 	ldev = mlx5_lag_dev(dev);
2232 	if (ldev)
2233 		res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
2234 	spin_unlock_irqrestore(&lag_lock, flags);
2235 
2236 	return res;
2237 }
2238 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
2239 
mlx5_lag_is_master(struct mlx5_core_dev * dev)2240 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
2241 {
2242 	struct mlx5_lag *ldev;
2243 	unsigned long flags;
2244 	struct lag_func *pf;
2245 	bool res = false;
2246 	int idx;
2247 
2248 	spin_lock_irqsave(&lag_lock, flags);
2249 	ldev = mlx5_lag_dev(dev);
2250 	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
2251 	if (ldev && __mlx5_lag_is_active(ldev) && idx >= 0) {
2252 		pf = mlx5_lag_pf(ldev, idx);
2253 		res = pf && dev == pf->dev;
2254 	}
2255 	spin_unlock_irqrestore(&lag_lock, flags);
2256 
2257 	return res;
2258 }
2259 EXPORT_SYMBOL(mlx5_lag_is_master);
2260 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)2261 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
2262 {
2263 	struct mlx5_lag *ldev;
2264 	unsigned long flags;
2265 	bool res;
2266 
2267 	spin_lock_irqsave(&lag_lock, flags);
2268 	ldev = mlx5_lag_dev(dev);
2269 	res  = ldev && __mlx5_lag_is_sriov(ldev);
2270 	spin_unlock_irqrestore(&lag_lock, flags);
2271 
2272 	return res;
2273 }
2274 EXPORT_SYMBOL(mlx5_lag_is_sriov);
2275 
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)2276 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
2277 {
2278 	struct mlx5_lag *ldev;
2279 	unsigned long flags;
2280 	bool res;
2281 
2282 	spin_lock_irqsave(&lag_lock, flags);
2283 	ldev = mlx5_lag_dev(dev);
2284 	res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
2285 	spin_unlock_irqrestore(&lag_lock, flags);
2286 
2287 	return res;
2288 }
2289 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
2290 
mlx5_lag_disable_change(struct mlx5_core_dev * dev)2291 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
2292 {
2293 	struct mlx5_lag *ldev;
2294 
2295 	ldev = mlx5_lag_dev(dev);
2296 	if (!ldev)
2297 		return;
2298 
2299 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
2300 	mutex_lock(&ldev->lock);
2301 
2302 	ldev->mode_changes_in_progress++;
2303 	if (__mlx5_lag_is_active(ldev)) {
2304 		if (ldev->mode == MLX5_LAG_MODE_MPESW)
2305 			mlx5_lag_disable_mpesw(ldev);
2306 		else
2307 			mlx5_disable_lag(ldev);
2308 	}
2309 
2310 	mutex_unlock(&ldev->lock);
2311 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
2312 }
2313 
mlx5_lag_enable_change(struct mlx5_core_dev * dev)2314 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
2315 {
2316 	struct mlx5_lag *ldev;
2317 
2318 	ldev = mlx5_lag_dev(dev);
2319 	if (!ldev)
2320 		return;
2321 
2322 	mutex_lock(&ldev->lock);
2323 	ldev->mode_changes_in_progress--;
2324 	mutex_unlock(&ldev->lock);
2325 	mlx5_queue_bond_work(ldev, 0);
2326 }
2327 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)2328 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
2329 			   struct net_device *slave)
2330 {
2331 	struct mlx5_lag *ldev;
2332 	unsigned long flags;
2333 	struct lag_func *pf;
2334 	u8 port = 0;
2335 	int i;
2336 
2337 	spin_lock_irqsave(&lag_lock, flags);
2338 	ldev = mlx5_lag_dev(dev);
2339 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
2340 		goto unlock;
2341 
2342 	mlx5_ldev_for_each(i, 0, ldev) {
2343 		pf = mlx5_lag_pf(ldev, i);
2344 		if (pf->netdev == slave) {
2345 			port = i;
2346 			break;
2347 		}
2348 	}
2349 
2350 	port = ldev->v2p_map[port * ldev->buckets];
2351 
2352 unlock:
2353 	spin_unlock_irqrestore(&lag_lock, flags);
2354 	return port;
2355 }
2356 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
2357 
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)2358 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
2359 {
2360 	struct mlx5_lag *ldev;
2361 
2362 	ldev = mlx5_lag_dev(dev);
2363 	if (!ldev)
2364 		return 0;
2365 
2366 	return ldev->ports;
2367 }
2368 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
2369 
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)2370 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
2371 {
2372 	struct mlx5_core_dev *peer_dev = NULL;
2373 	struct mlx5_lag *ldev;
2374 	unsigned long flags;
2375 	struct lag_func *pf;
2376 	int idx;
2377 
2378 	spin_lock_irqsave(&lag_lock, flags);
2379 	ldev = mlx5_lag_dev(dev);
2380 	if (!ldev)
2381 		goto unlock;
2382 
2383 	if (*i == MLX5_MAX_PORTS)
2384 		goto unlock;
2385 	mlx5_ldev_for_each(idx, *i, ldev) {
2386 		pf = mlx5_lag_pf(ldev, idx);
2387 		if (pf->dev != dev)
2388 			break;
2389 	}
2390 
2391 	if (idx == MLX5_MAX_PORTS) {
2392 		*i = idx;
2393 		goto unlock;
2394 	}
2395 	*i = idx + 1;
2396 
2397 	pf = mlx5_lag_pf(ldev, idx);
2398 	peer_dev = pf->dev;
2399 
2400 unlock:
2401 	spin_unlock_irqrestore(&lag_lock, flags);
2402 	return peer_dev;
2403 }
2404 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
2405 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)2406 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
2407 				 u64 *values,
2408 				 int num_counters,
2409 				 size_t *offsets)
2410 {
2411 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
2412 	struct mlx5_core_dev **mdev;
2413 	int ret = 0, i, j, idx = 0;
2414 	struct mlx5_lag *ldev;
2415 	unsigned long flags;
2416 	struct lag_func *pf;
2417 	int num_ports;
2418 	void *out;
2419 
2420 	out = kvzalloc(outlen, GFP_KERNEL);
2421 	if (!out)
2422 		return -ENOMEM;
2423 
2424 	mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
2425 	if (!mdev) {
2426 		ret = -ENOMEM;
2427 		goto free_out;
2428 	}
2429 
2430 	memset(values, 0, sizeof(*values) * num_counters);
2431 
2432 	spin_lock_irqsave(&lag_lock, flags);
2433 	ldev = mlx5_lag_dev(dev);
2434 	if (ldev && __mlx5_lag_is_active(ldev)) {
2435 		num_ports = ldev->ports;
2436 		mlx5_ldev_for_each(i, 0, ldev) {
2437 			pf = mlx5_lag_pf(ldev, i);
2438 			mdev[idx++] = pf->dev;
2439 		}
2440 	} else {
2441 		num_ports = 1;
2442 		mdev[MLX5_LAG_P1] = dev;
2443 	}
2444 	spin_unlock_irqrestore(&lag_lock, flags);
2445 
2446 	for (i = 0; i < num_ports; ++i) {
2447 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
2448 
2449 		MLX5_SET(query_cong_statistics_in, in, opcode,
2450 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
2451 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
2452 					  out);
2453 		if (ret)
2454 			goto free_mdev;
2455 
2456 		for (j = 0; j < num_counters; ++j)
2457 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
2458 	}
2459 
2460 free_mdev:
2461 	kvfree(mdev);
2462 free_out:
2463 	kvfree(out);
2464 	return ret;
2465 }
2466 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
2467