1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/vport.h>
36 #include "mlx5_core.h"
37 #include "eswitch.h"
38 #include "lag.h"
39 #include "lag_mp.h"
40 
41 /* General purpose, use for short periods of time.
42  * Beware of lock dependencies (preferably, no locks should be acquired
43  * under it).
44  */
45 static DEFINE_SPINLOCK(lag_lock);
46 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)47 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
48 			       u8 remap_port2)
49 {
50 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
51 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
52 
53 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
54 
55 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
56 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
57 
58 	return mlx5_cmd_exec_in(dev, create_lag, in);
59 }
60 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)61 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
62 			       u8 remap_port2)
63 {
64 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
65 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
66 
67 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
68 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
69 
70 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
71 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
72 
73 	return mlx5_cmd_exec_in(dev, modify_lag, in);
74 }
75 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)76 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
77 {
78 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
79 
80 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
81 
82 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
83 }
84 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
85 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)86 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
87 {
88 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
89 
90 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
91 
92 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
93 }
94 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
95 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)96 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
97 				struct net_device *ndev)
98 {
99 	int i;
100 
101 	for (i = 0; i < MLX5_MAX_PORTS; i++)
102 		if (ldev->pf[i].netdev == ndev)
103 			return i;
104 
105 	return -ENOENT;
106 }
107 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)108 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
109 {
110 	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
111 }
112 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)113 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
114 {
115 	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
116 }
117 
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 * port1,u8 * port2)118 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
119 					   u8 *port1, u8 *port2)
120 {
121 	*port1 = 1;
122 	*port2 = 2;
123 	if (!tracker->netdev_state[MLX5_LAG_P1].tx_enabled ||
124 	    !tracker->netdev_state[MLX5_LAG_P1].link_up) {
125 		*port1 = 2;
126 		return;
127 	}
128 
129 	if (!tracker->netdev_state[MLX5_LAG_P2].tx_enabled ||
130 	    !tracker->netdev_state[MLX5_LAG_P2].link_up)
131 		*port2 = 1;
132 }
133 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)134 void mlx5_modify_lag(struct mlx5_lag *ldev,
135 		     struct lag_tracker *tracker)
136 {
137 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
138 	u8 v2p_port1, v2p_port2;
139 	int err;
140 
141 	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
142 				       &v2p_port2);
143 
144 	if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
145 	    v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
146 		ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
147 		ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
148 
149 		mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
150 			       ldev->v2p_map[MLX5_LAG_P1],
151 			       ldev->v2p_map[MLX5_LAG_P2]);
152 
153 		err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
154 		if (err)
155 			mlx5_core_err(dev0,
156 				      "Failed to modify LAG (%d)\n",
157 				      err);
158 	}
159 }
160 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)161 static int mlx5_create_lag(struct mlx5_lag *ldev,
162 			   struct lag_tracker *tracker)
163 {
164 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
165 	int err;
166 
167 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
168 				       &ldev->v2p_map[MLX5_LAG_P2]);
169 
170 	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
171 		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
172 
173 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
174 				  ldev->v2p_map[MLX5_LAG_P2]);
175 	if (err)
176 		mlx5_core_err(dev0,
177 			      "Failed to create LAG (%d)\n",
178 			      err);
179 	return err;
180 }
181 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,u8 flags)182 int mlx5_activate_lag(struct mlx5_lag *ldev,
183 		      struct lag_tracker *tracker,
184 		      u8 flags)
185 {
186 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
187 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
188 	int err;
189 
190 	err = mlx5_create_lag(ldev, tracker);
191 	if (err) {
192 		if (roce_lag) {
193 			mlx5_core_err(dev0,
194 				      "Failed to activate RoCE LAG\n");
195 		} else {
196 			mlx5_core_err(dev0,
197 				      "Failed to activate VF LAG\n"
198 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
199 		}
200 		return err;
201 	}
202 
203 	ldev->flags |= flags;
204 	return 0;
205 }
206 
mlx5_deactivate_lag(struct mlx5_lag * ldev)207 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
208 {
209 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
210 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
211 	bool roce_lag = __mlx5_lag_is_roce(ldev);
212 	int err;
213 
214 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
215 
216 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
217 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
218 	if (err) {
219 		if (roce_lag) {
220 			mlx5_core_err(dev0,
221 				      "Failed to deactivate RoCE LAG; driver restart required\n");
222 		} else {
223 			mlx5_core_err(dev0,
224 				      "Failed to deactivate VF LAG; driver restart required\n"
225 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
226 		}
227 	}
228 
229 	return err;
230 }
231 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)232 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
233 {
234 	if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
235 		return false;
236 
237 #ifdef CONFIG_MLX5_ESWITCH
238 	return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
239 				   ldev->pf[MLX5_LAG_P2].dev);
240 #else
241 	return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
242 		!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
243 #endif
244 }
245 
mlx5_lag_add_ib_devices(struct mlx5_lag * ldev)246 static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
247 {
248 	int i;
249 
250 	for (i = 0; i < MLX5_MAX_PORTS; i++)
251 		if (ldev->pf[i].dev)
252 			mlx5_add_dev_by_protocol(ldev->pf[i].dev,
253 						 MLX5_INTERFACE_PROTOCOL_IB);
254 }
255 
mlx5_lag_remove_ib_devices(struct mlx5_lag * ldev)256 static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
257 {
258 	int i;
259 
260 	for (i = 0; i < MLX5_MAX_PORTS; i++)
261 		if (ldev->pf[i].dev)
262 			mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
263 						    MLX5_INTERFACE_PROTOCOL_IB);
264 }
265 
mlx5_do_bond(struct mlx5_lag * ldev)266 static void mlx5_do_bond(struct mlx5_lag *ldev)
267 {
268 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
269 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
270 	struct lag_tracker tracker;
271 	bool do_bond, roce_lag;
272 	int err;
273 
274 	if (!mlx5_lag_is_ready(ldev))
275 		return;
276 
277 	spin_lock(&lag_lock);
278 	tracker = ldev->tracker;
279 	spin_unlock(&lag_lock);
280 
281 	do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
282 
283 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
284 		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
285 			   !mlx5_sriov_is_enabled(dev1);
286 
287 #ifdef CONFIG_MLX5_ESWITCH
288 		roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
289 			    dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
290 #endif
291 
292 		if (roce_lag)
293 			mlx5_lag_remove_ib_devices(ldev);
294 
295 		err = mlx5_activate_lag(ldev, &tracker,
296 					roce_lag ? MLX5_LAG_FLAG_ROCE :
297 					MLX5_LAG_FLAG_SRIOV);
298 		if (err) {
299 			if (roce_lag)
300 				mlx5_lag_add_ib_devices(ldev);
301 
302 			return;
303 		}
304 
305 		if (roce_lag) {
306 			mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
307 			mlx5_nic_vport_enable_roce(dev1);
308 		}
309 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
310 		mlx5_modify_lag(ldev, &tracker);
311 	} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
312 		roce_lag = __mlx5_lag_is_roce(ldev);
313 
314 		if (roce_lag) {
315 			mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
316 			mlx5_nic_vport_disable_roce(dev1);
317 		}
318 
319 		err = mlx5_deactivate_lag(ldev);
320 		if (err)
321 			return;
322 
323 		if (roce_lag)
324 			mlx5_lag_add_ib_devices(ldev);
325 	}
326 }
327 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)328 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
329 {
330 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
331 }
332 
mlx5_do_bond_work(struct work_struct * work)333 static void mlx5_do_bond_work(struct work_struct *work)
334 {
335 	struct delayed_work *delayed_work = to_delayed_work(work);
336 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
337 					     bond_work);
338 	int status;
339 
340 	status = mlx5_dev_list_trylock();
341 	if (!status) {
342 		/* 1 sec delay. */
343 		mlx5_queue_bond_work(ldev, HZ);
344 		return;
345 	}
346 
347 	mlx5_do_bond(ldev);
348 	mlx5_dev_list_unlock();
349 }
350 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changeupper_info * info)351 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
352 					 struct lag_tracker *tracker,
353 					 struct net_device *ndev,
354 					 struct netdev_notifier_changeupper_info *info)
355 {
356 	struct net_device *upper = info->upper_dev, *ndev_tmp;
357 	struct netdev_lag_upper_info *lag_upper_info = NULL;
358 	bool is_bonded, is_in_lag, mode_supported;
359 	int bond_status = 0;
360 	int num_slaves = 0;
361 	int idx;
362 
363 	if (!netif_is_lag_master(upper))
364 		return 0;
365 
366 	if (info->linking)
367 		lag_upper_info = info->upper_info;
368 
369 	/* The event may still be of interest if the slave does not belong to
370 	 * us, but is enslaved to a master which has one or more of our netdevs
371 	 * as slaves (e.g., if a new slave is added to a master that bonds two
372 	 * of our netdevs, we should unbond).
373 	 */
374 	rcu_read_lock();
375 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
376 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
377 		if (idx >= 0)
378 			bond_status |= (1 << idx);
379 
380 		num_slaves++;
381 	}
382 	rcu_read_unlock();
383 
384 	/* None of this lagdev's netdevs are slaves of this master. */
385 	if (!(bond_status & 0x3))
386 		return 0;
387 
388 	if (lag_upper_info)
389 		tracker->tx_type = lag_upper_info->tx_type;
390 
391 	/* Determine bonding status:
392 	 * A device is considered bonded if both its physical ports are slaves
393 	 * of the same lag master, and only them.
394 	 */
395 	is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
396 
397 	if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
398 		NL_SET_ERR_MSG_MOD(info->info.extack,
399 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
400 		return 0;
401 	}
402 
403 	/* Lag mode must be activebackup or hash. */
404 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
405 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
406 
407 	if (is_in_lag && !mode_supported)
408 		NL_SET_ERR_MSG_MOD(info->info.extack,
409 				   "Can't activate LAG offload, TX type isn't supported");
410 
411 	is_bonded = is_in_lag && mode_supported;
412 	if (tracker->is_bonded != is_bonded) {
413 		tracker->is_bonded = is_bonded;
414 		return 1;
415 	}
416 
417 	return 0;
418 }
419 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)420 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
421 					      struct lag_tracker *tracker,
422 					      struct net_device *ndev,
423 					      struct netdev_notifier_changelowerstate_info *info)
424 {
425 	struct netdev_lag_lower_state_info *lag_lower_info;
426 	int idx;
427 
428 	if (!netif_is_lag_port(ndev))
429 		return 0;
430 
431 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
432 	if (idx < 0)
433 		return 0;
434 
435 	/* This information is used to determine virtual to physical
436 	 * port mapping.
437 	 */
438 	lag_lower_info = info->lower_state_info;
439 	if (!lag_lower_info)
440 		return 0;
441 
442 	tracker->netdev_state[idx] = *lag_lower_info;
443 
444 	return 1;
445 }
446 
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)447 static int mlx5_lag_netdev_event(struct notifier_block *this,
448 				 unsigned long event, void *ptr)
449 {
450 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
451 	struct lag_tracker tracker;
452 	struct mlx5_lag *ldev;
453 	int changed = 0;
454 
455 	if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
456 		return NOTIFY_DONE;
457 
458 	ldev    = container_of(this, struct mlx5_lag, nb);
459 
460 	if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
461 		return NOTIFY_DONE;
462 
463 	tracker = ldev->tracker;
464 
465 	switch (event) {
466 	case NETDEV_CHANGEUPPER:
467 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
468 							ptr);
469 		break;
470 	case NETDEV_CHANGELOWERSTATE:
471 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
472 							     ndev, ptr);
473 		break;
474 	}
475 
476 	spin_lock(&lag_lock);
477 	ldev->tracker = tracker;
478 	spin_unlock(&lag_lock);
479 
480 	if (changed)
481 		mlx5_queue_bond_work(ldev, 0);
482 
483 	return NOTIFY_DONE;
484 }
485 
mlx5_lag_dev_alloc(void)486 static struct mlx5_lag *mlx5_lag_dev_alloc(void)
487 {
488 	struct mlx5_lag *ldev;
489 
490 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
491 	if (!ldev)
492 		return NULL;
493 
494 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
495 	if (!ldev->wq) {
496 		kfree(ldev);
497 		return NULL;
498 	}
499 
500 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
501 
502 	return ldev;
503 }
504 
mlx5_lag_dev_free(struct mlx5_lag * ldev)505 static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
506 {
507 	destroy_workqueue(ldev->wq);
508 	kfree(ldev);
509 }
510 
mlx5_lag_dev_add_pf(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)511 static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
512 			       struct mlx5_core_dev *dev,
513 			       struct net_device *netdev)
514 {
515 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
516 
517 	if (fn >= MLX5_MAX_PORTS)
518 		return -EPERM;
519 
520 	spin_lock(&lag_lock);
521 	ldev->pf[fn].dev    = dev;
522 	ldev->pf[fn].netdev = netdev;
523 	ldev->tracker.netdev_state[fn].link_up = 0;
524 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
525 
526 	dev->priv.lag = ldev;
527 
528 	spin_unlock(&lag_lock);
529 
530 	return fn;
531 }
532 
mlx5_lag_dev_remove_pf(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)533 static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
534 				   struct mlx5_core_dev *dev)
535 {
536 	int i;
537 
538 	for (i = 0; i < MLX5_MAX_PORTS; i++)
539 		if (ldev->pf[i].dev == dev)
540 			break;
541 
542 	if (i == MLX5_MAX_PORTS)
543 		return;
544 
545 	spin_lock(&lag_lock);
546 	memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
547 
548 	dev->priv.lag = NULL;
549 	spin_unlock(&lag_lock);
550 }
551 
552 /* Must be called with intf_mutex held */
mlx5_lag_add(struct mlx5_core_dev * dev,struct net_device * netdev)553 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
554 {
555 	struct mlx5_lag *ldev = NULL;
556 	struct mlx5_core_dev *tmp_dev;
557 	int i, err;
558 
559 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
560 		return;
561 
562 	tmp_dev = mlx5_get_next_phys_dev(dev);
563 	if (tmp_dev)
564 		ldev = tmp_dev->priv.lag;
565 
566 	if (!ldev) {
567 		ldev = mlx5_lag_dev_alloc();
568 		if (!ldev) {
569 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
570 			return;
571 		}
572 	}
573 
574 	if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
575 		return;
576 
577 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
578 		tmp_dev = ldev->pf[i].dev;
579 		if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) ||
580 		    MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS)
581 			break;
582 	}
583 
584 	if (i >= MLX5_MAX_PORTS)
585 		ldev->flags |= MLX5_LAG_FLAG_READY;
586 
587 	if (!ldev->nb.notifier_call) {
588 		ldev->nb.notifier_call = mlx5_lag_netdev_event;
589 		if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
590 			ldev->nb.notifier_call = NULL;
591 			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
592 		}
593 	}
594 
595 	err = mlx5_lag_mp_init(ldev);
596 	if (err)
597 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
598 			      err);
599 }
600 
601 /* Must be called with intf_mutex held */
mlx5_lag_remove(struct mlx5_core_dev * dev)602 void mlx5_lag_remove(struct mlx5_core_dev *dev)
603 {
604 	struct mlx5_lag *ldev;
605 	int i;
606 
607 	ldev = mlx5_lag_dev_get(dev);
608 	if (!ldev)
609 		return;
610 
611 	if (__mlx5_lag_is_active(ldev))
612 		mlx5_deactivate_lag(ldev);
613 
614 	mlx5_lag_dev_remove_pf(ldev, dev);
615 
616 	ldev->flags &= ~MLX5_LAG_FLAG_READY;
617 
618 	for (i = 0; i < MLX5_MAX_PORTS; i++)
619 		if (ldev->pf[i].dev)
620 			break;
621 
622 	if (i == MLX5_MAX_PORTS) {
623 		if (ldev->nb.notifier_call) {
624 			unregister_netdevice_notifier_net(&init_net, &ldev->nb);
625 			ldev->nb.notifier_call = NULL;
626 		}
627 		mlx5_lag_mp_cleanup(ldev);
628 		cancel_delayed_work_sync(&ldev->bond_work);
629 		mlx5_lag_dev_free(ldev);
630 	}
631 }
632 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)633 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
634 {
635 	struct mlx5_lag *ldev;
636 	bool res;
637 
638 	spin_lock(&lag_lock);
639 	ldev = mlx5_lag_dev_get(dev);
640 	res  = ldev && __mlx5_lag_is_roce(ldev);
641 	spin_unlock(&lag_lock);
642 
643 	return res;
644 }
645 EXPORT_SYMBOL(mlx5_lag_is_roce);
646 
mlx5_lag_is_active(struct mlx5_core_dev * dev)647 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
648 {
649 	struct mlx5_lag *ldev;
650 	bool res;
651 
652 	spin_lock(&lag_lock);
653 	ldev = mlx5_lag_dev_get(dev);
654 	res  = ldev && __mlx5_lag_is_active(ldev);
655 	spin_unlock(&lag_lock);
656 
657 	return res;
658 }
659 EXPORT_SYMBOL(mlx5_lag_is_active);
660 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)661 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
662 {
663 	struct mlx5_lag *ldev;
664 	bool res;
665 
666 	spin_lock(&lag_lock);
667 	ldev = mlx5_lag_dev_get(dev);
668 	res  = ldev && __mlx5_lag_is_sriov(ldev);
669 	spin_unlock(&lag_lock);
670 
671 	return res;
672 }
673 EXPORT_SYMBOL(mlx5_lag_is_sriov);
674 
mlx5_lag_update(struct mlx5_core_dev * dev)675 void mlx5_lag_update(struct mlx5_core_dev *dev)
676 {
677 	struct mlx5_lag *ldev;
678 
679 	mlx5_dev_list_lock();
680 	ldev = mlx5_lag_dev_get(dev);
681 	if (!ldev)
682 		goto unlock;
683 
684 	mlx5_do_bond(ldev);
685 
686 unlock:
687 	mlx5_dev_list_unlock();
688 }
689 
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)690 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
691 {
692 	struct net_device *ndev = NULL;
693 	struct mlx5_lag *ldev;
694 
695 	spin_lock(&lag_lock);
696 	ldev = mlx5_lag_dev_get(dev);
697 
698 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
699 		goto unlock;
700 
701 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
702 		ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
703 		       ldev->pf[MLX5_LAG_P1].netdev :
704 		       ldev->pf[MLX5_LAG_P2].netdev;
705 	} else {
706 		ndev = ldev->pf[MLX5_LAG_P1].netdev;
707 	}
708 	if (ndev)
709 		dev_hold(ndev);
710 
711 unlock:
712 	spin_unlock(&lag_lock);
713 
714 	return ndev;
715 }
716 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
717 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)718 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
719 			   struct net_device *slave)
720 {
721 	struct mlx5_lag *ldev;
722 	u8 port = 0;
723 
724 	spin_lock(&lag_lock);
725 	ldev = mlx5_lag_dev_get(dev);
726 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
727 		goto unlock;
728 
729 	if (ldev->pf[MLX5_LAG_P1].netdev == slave)
730 		port = MLX5_LAG_P1;
731 	else
732 		port = MLX5_LAG_P2;
733 
734 	port = ldev->v2p_map[port];
735 
736 unlock:
737 	spin_unlock(&lag_lock);
738 	return port;
739 }
740 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
741 
mlx5_lag_intf_add(struct mlx5_interface * intf,struct mlx5_priv * priv)742 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
743 {
744 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
745 						 priv);
746 	struct mlx5_lag *ldev;
747 
748 	if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
749 		return true;
750 
751 	ldev = mlx5_lag_dev_get(dev);
752 	if (!ldev || !__mlx5_lag_is_roce(ldev) ||
753 	    ldev->pf[MLX5_LAG_P1].dev == dev)
754 		return true;
755 
756 	/* If bonded, we do not add an IB device for PF1. */
757 	return false;
758 }
759 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)760 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
761 				 u64 *values,
762 				 int num_counters,
763 				 size_t *offsets)
764 {
765 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
766 	struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
767 	struct mlx5_lag *ldev;
768 	int num_ports;
769 	int ret, i, j;
770 	void *out;
771 
772 	out = kvzalloc(outlen, GFP_KERNEL);
773 	if (!out)
774 		return -ENOMEM;
775 
776 	memset(values, 0, sizeof(*values) * num_counters);
777 
778 	spin_lock(&lag_lock);
779 	ldev = mlx5_lag_dev_get(dev);
780 	if (ldev && __mlx5_lag_is_roce(ldev)) {
781 		num_ports = MLX5_MAX_PORTS;
782 		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
783 		mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
784 	} else {
785 		num_ports = 1;
786 		mdev[MLX5_LAG_P1] = dev;
787 	}
788 	spin_unlock(&lag_lock);
789 
790 	for (i = 0; i < num_ports; ++i) {
791 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
792 
793 		MLX5_SET(query_cong_statistics_in, in, opcode,
794 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
795 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
796 					  out);
797 		if (ret)
798 			goto free;
799 
800 		for (j = 0; j < num_counters; ++j)
801 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
802 	}
803 
804 free:
805 	kvfree(out);
806 	return ret;
807 }
808 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
809