1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/netdevice.h> 34 #include <net/bonding.h> 35 #include <linux/mlx5/driver.h> 36 #include <linux/mlx5/eswitch.h> 37 #include <linux/mlx5/vport.h> 38 #include <linux/mlx5/lag.h> 39 #include "lib/mlx5.h" 40 #include "lib/devcom.h" 41 #include "mlx5_core.h" 42 #include "eswitch.h" 43 #include "esw/acl/ofld.h" 44 #include "lag.h" 45 #include "mp.h" 46 #include "mpesw.h" 47 48 49 /* General purpose, use for short periods of time. 50 * Beware of lock dependencies (preferably, no locks should be acquired 51 * under it). 52 */ 53 static DEFINE_SPINLOCK(lag_lock); 54 55 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 56 { 57 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 58 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 59 60 if (mode == MLX5_LAG_MODE_MPESW) 61 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 62 63 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 64 } 65 66 static u8 lag_active_port_bits(struct mlx5_lag *ldev) 67 { 68 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 69 u8 active_port = 0; 70 int num_enabled; 71 int idx; 72 73 mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports, 74 &num_enabled); 75 for (idx = 0; idx < num_enabled; idx++) 76 active_port |= BIT_MASK(enabled_ports[idx]); 77 78 return active_port; 79 } 80 81 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev, 82 int mode, unsigned long flags) 83 { 84 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, 85 &flags); 86 int port_sel_mode = get_port_sel_mode(mode, flags); 87 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 88 u8 *ports = ldev->v2p_map; 89 int idx0, idx1; 90 void *lag_ctx; 91 92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); 95 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0); 96 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1); 97 98 if (idx0 < 0 || idx1 < 0) 99 return -EINVAL; 100 101 switch (port_sel_mode) { 102 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: 103 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]); 104 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]); 105 break; 106 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: 107 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) 108 break; 109 110 MLX5_SET(lagc, lag_ctx, active_port, 111 lag_active_port_bits(mlx5_lag_dev(dev))); 112 break; 113 default: 114 break; 115 } 116 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 117 118 return mlx5_cmd_exec_in(dev, create_lag, in); 119 } 120 121 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev, 122 u8 *ports) 123 { 124 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 125 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 126 int idx0, idx1; 127 128 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0); 129 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1); 130 if (idx0 < 0 || idx1 < 0) 131 return -EINVAL; 132 133 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 134 MLX5_SET(modify_lag_in, in, field_select, 0x1); 135 136 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]); 137 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]); 138 139 return mlx5_cmd_exec_in(dev, modify_lag, in); 140 } 141 142 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 143 { 144 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 145 146 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 147 148 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 149 } 150 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 151 152 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 153 { 154 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 155 156 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 157 158 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 159 } 160 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 161 162 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, 163 u8 *ports, int *num_disabled) 164 { 165 int i; 166 167 *num_disabled = 0; 168 mlx5_ldev_for_each(i, 0, ldev) 169 if (!tracker->netdev_state[i].tx_enabled || 170 !tracker->netdev_state[i].link_up) 171 ports[(*num_disabled)++] = i; 172 } 173 174 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, 175 u8 *ports, int *num_enabled) 176 { 177 int i; 178 179 *num_enabled = 0; 180 mlx5_ldev_for_each(i, 0, ldev) 181 if (tracker->netdev_state[i].tx_enabled && 182 tracker->netdev_state[i].link_up) 183 ports[(*num_enabled)++] = i; 184 185 if (*num_enabled == 0) 186 mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled); 187 } 188 189 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 190 struct mlx5_lag *ldev, 191 struct lag_tracker *tracker, 192 unsigned long flags) 193 { 194 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 195 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 196 int written = 0; 197 int num_enabled; 198 int idx; 199 int err; 200 int i; 201 int j; 202 203 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 204 mlx5_infer_tx_enabled(tracker, ldev, enabled_ports, 205 &num_enabled); 206 for (i = 0; i < num_enabled; i++) { 207 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 208 if (err != 3) 209 return; 210 written += err; 211 } 212 buf[written - 2] = 0; 213 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 214 } else { 215 mlx5_ldev_for_each(i, 0, ldev) { 216 for (j = 0; j < ldev->buckets; j++) { 217 idx = i * ldev->buckets + j; 218 err = scnprintf(buf + written, 10, 219 " port %d:%d", i + 1, ldev->v2p_map[idx]); 220 if (err != 9) 221 return; 222 written += err; 223 } 224 } 225 mlx5_core_info(dev, "lag map:%s\n", buf); 226 } 227 } 228 229 static int mlx5_lag_netdev_event(struct notifier_block *this, 230 unsigned long event, void *ptr); 231 static void mlx5_do_bond_work(struct work_struct *work); 232 233 static void mlx5_ldev_free(struct kref *ref) 234 { 235 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 236 struct lag_func *pf; 237 struct net *net; 238 int i; 239 240 if (ldev->nb.notifier_call) { 241 net = read_pnet(&ldev->net); 242 unregister_netdevice_notifier_net(net, &ldev->nb); 243 } 244 245 mlx5_ldev_for_each(i, 0, ldev) { 246 pf = mlx5_lag_pf(ldev, i); 247 if (pf->port_change_nb.nb.notifier_call) { 248 struct mlx5_nb *nb = &pf->port_change_nb; 249 250 mlx5_eq_notifier_unregister(pf->dev, nb); 251 } 252 xa_erase(&ldev->pfs, i); 253 kfree(pf); 254 } 255 xa_destroy(&ldev->pfs); 256 257 mlx5_lag_mp_cleanup(ldev); 258 cancel_delayed_work_sync(&ldev->bond_work); 259 cancel_work_sync(&ldev->speed_update_work); 260 destroy_workqueue(ldev->wq); 261 mutex_destroy(&ldev->lock); 262 kfree(ldev); 263 } 264 265 static void mlx5_ldev_put(struct mlx5_lag *ldev) 266 { 267 kref_put(&ldev->ref, mlx5_ldev_free); 268 } 269 270 static void mlx5_ldev_get(struct mlx5_lag *ldev) 271 { 272 kref_get(&ldev->ref); 273 } 274 275 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 276 { 277 struct mlx5_lag *ldev; 278 int err; 279 280 ldev = kzalloc_obj(*ldev); 281 if (!ldev) 282 return NULL; 283 284 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 285 if (!ldev->wq) { 286 kfree(ldev); 287 return NULL; 288 } 289 290 kref_init(&ldev->ref); 291 mutex_init(&ldev->lock); 292 xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC); 293 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 294 INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work); 295 296 ldev->nb.notifier_call = mlx5_lag_netdev_event; 297 write_pnet(&ldev->net, mlx5_core_net(dev)); 298 if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) { 299 ldev->nb.notifier_call = NULL; 300 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 301 } 302 ldev->mode = MLX5_LAG_MODE_NONE; 303 304 err = mlx5_lag_mp_init(ldev); 305 if (err) 306 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 307 err); 308 309 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 310 ldev->buckets = 1; 311 312 return ldev; 313 } 314 315 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 316 struct net_device *ndev) 317 { 318 struct lag_func *pf; 319 int i; 320 321 mlx5_ldev_for_each(i, 0, ldev) { 322 pf = mlx5_lag_pf(ldev, i); 323 if (pf->netdev == ndev) 324 return i; 325 } 326 327 return -ENOENT; 328 } 329 330 static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev) 331 { 332 unsigned long idx = 0; 333 void *entry; 334 335 if (!ldev) 336 return -ENOENT; 337 338 entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER); 339 if (!entry) 340 return -ENOENT; 341 342 return (int)idx; 343 } 344 345 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) 346 { 347 int master_idx, i, num = 0; 348 349 if (!ldev) 350 return -ENOENT; 351 352 master_idx = mlx5_lag_get_master_idx(ldev); 353 354 /* If seq 0 is requested and there's a primary PF, return it */ 355 if (master_idx >= 0) { 356 if (seq == 0) 357 return master_idx; 358 num++; 359 } 360 361 mlx5_ldev_for_each(i, 0, ldev) { 362 /* Skip the primary PF in the loop */ 363 if (i == master_idx) 364 continue; 365 366 if (num == seq) 367 return i; 368 num++; 369 } 370 return -ENOENT; 371 } 372 373 /* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its 374 * sequence number in the LAG. Master is always 0, others numbered 375 * sequentially starting from 1. 376 */ 377 int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev) 378 { 379 struct mlx5_lag *ldev = mlx5_lag_dev(dev); 380 int master_idx, i, num = 1; 381 struct lag_func *pf; 382 383 if (!ldev) 384 return -ENOENT; 385 386 master_idx = mlx5_lag_get_master_idx(ldev); 387 if (master_idx < 0) 388 return -ENOENT; 389 390 pf = mlx5_lag_pf(ldev, master_idx); 391 if (pf && pf->dev == dev) 392 return 0; 393 394 mlx5_ldev_for_each(i, 0, ldev) { 395 if (i == master_idx) 396 continue; 397 pf = mlx5_lag_pf(ldev, i); 398 if (pf->dev == dev) 399 return num; 400 num++; 401 } 402 return -ENOENT; 403 } 404 EXPORT_SYMBOL(mlx5_lag_get_dev_seq); 405 406 /* Devcom events for LAG master marking */ 407 #define LAG_DEVCOM_PAIR (0) 408 #define LAG_DEVCOM_UNPAIR (1) 409 410 static void mlx5_lag_mark_master(struct mlx5_lag *ldev) 411 { 412 int lowest_dev_idx = INT_MAX; 413 struct lag_func *pf; 414 int master_xa_idx = -1; 415 int dev_idx; 416 int i; 417 418 mlx5_ldev_for_each(i, 0, ldev) { 419 pf = mlx5_lag_pf(ldev, i); 420 dev_idx = mlx5_get_dev_index(pf->dev); 421 if (dev_idx < lowest_dev_idx) { 422 lowest_dev_idx = dev_idx; 423 master_xa_idx = i; 424 } 425 } 426 427 if (master_xa_idx >= 0) 428 xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER); 429 } 430 431 static void mlx5_lag_clear_master(struct mlx5_lag *ldev) 432 { 433 unsigned long idx = 0; 434 void *entry; 435 436 entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER); 437 if (!entry) 438 return; 439 440 xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER); 441 } 442 443 /* Devcom event handler to manage LAG master marking */ 444 static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data) 445 { 446 struct mlx5_core_dev *dev = my_data; 447 struct mlx5_lag *ldev; 448 int idx; 449 450 ldev = mlx5_lag_dev(dev); 451 if (!ldev) 452 return 0; 453 454 mutex_lock(&ldev->lock); 455 switch (event) { 456 case LAG_DEVCOM_PAIR: 457 /* No need to mark more than once */ 458 idx = mlx5_lag_get_master_idx(ldev); 459 if (idx >= 0) 460 break; 461 /* Check if all LAG ports are now registered */ 462 if (mlx5_lag_num_devs(ldev) == ldev->ports) 463 mlx5_lag_mark_master(ldev); 464 break; 465 466 case LAG_DEVCOM_UNPAIR: 467 /* Clear master mark when a device is removed */ 468 mlx5_lag_clear_master(ldev); 469 break; 470 } 471 mutex_unlock(&ldev->lock); 472 return 0; 473 } 474 475 int mlx5_lag_num_devs(struct mlx5_lag *ldev) 476 { 477 int i, num = 0; 478 479 if (!ldev) 480 return 0; 481 482 mlx5_ldev_for_each(i, 0, ldev) { 483 (void)i; 484 num++; 485 } 486 return num; 487 } 488 489 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev) 490 { 491 struct lag_func *pf; 492 int i, num = 0; 493 494 if (!ldev) 495 return 0; 496 497 mlx5_ldev_for_each(i, 0, ldev) { 498 pf = mlx5_lag_pf(ldev, i); 499 if (pf->netdev) 500 num++; 501 } 502 return num; 503 } 504 505 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 506 { 507 return ldev->mode == MLX5_LAG_MODE_ROCE; 508 } 509 510 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 511 { 512 return ldev->mode == MLX5_LAG_MODE_SRIOV; 513 } 514 515 /* Create a mapping between steering slots and active ports. 516 * As we have ldev->buckets slots per port first assume the native 517 * mapping should be used. 518 * If there are ports that are disabled fill the relevant slots 519 * with mapping that points to active ports. 520 */ 521 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 522 struct mlx5_lag *ldev, 523 u8 buckets, 524 u8 *ports) 525 { 526 int disabled[MLX5_MAX_PORTS] = {}; 527 int enabled[MLX5_MAX_PORTS] = {}; 528 int disabled_ports_num = 0; 529 int enabled_ports_num = 0; 530 int idx; 531 u32 rand; 532 int i; 533 int j; 534 535 mlx5_ldev_for_each(i, 0, ldev) { 536 if (tracker->netdev_state[i].tx_enabled && 537 tracker->netdev_state[i].link_up) 538 enabled[enabled_ports_num++] = i; 539 else 540 disabled[disabled_ports_num++] = i; 541 } 542 543 /* Use native mapping by default where each port's buckets 544 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 545 * ports[] values are 1-indexed device indices for FW. 546 */ 547 mlx5_ldev_for_each(i, 0, ldev) { 548 for (j = 0; j < buckets; j++) { 549 idx = i * buckets + j; 550 ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1; 551 } 552 } 553 554 /* If all ports are disabled/enabled keep native mapping */ 555 if (enabled_ports_num == ldev->ports || 556 disabled_ports_num == ldev->ports) 557 return; 558 559 /* Go over the disabled ports and for each assign a random active port */ 560 for (i = 0; i < disabled_ports_num; i++) { 561 for (j = 0; j < buckets; j++) { 562 int rand_xa_idx; 563 564 get_random_bytes(&rand, 4); 565 rand_xa_idx = enabled[rand % enabled_ports_num]; 566 ports[disabled[i] * buckets + j] = 567 mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1; 568 } 569 } 570 } 571 572 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 573 { 574 struct lag_func *pf; 575 int i; 576 577 mlx5_ldev_for_each(i, 0, ldev) { 578 pf = mlx5_lag_pf(ldev, i); 579 if (pf->has_drop) 580 return true; 581 } 582 return false; 583 } 584 585 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 586 { 587 struct lag_func *pf; 588 int i; 589 590 mlx5_ldev_for_each(i, 0, ldev) { 591 pf = mlx5_lag_pf(ldev, i); 592 if (!pf->has_drop) 593 continue; 594 595 mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch, 596 MLX5_VPORT_UPLINK); 597 pf->has_drop = false; 598 } 599 } 600 601 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 602 struct lag_tracker *tracker) 603 { 604 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 605 struct mlx5_core_dev *dev; 606 struct lag_func *pf; 607 int disabled_index; 608 int num_disabled; 609 int err; 610 int i; 611 612 /* First delete the current drop rule so there won't be any dropped 613 * packets 614 */ 615 mlx5_lag_drop_rule_cleanup(ldev); 616 617 if (!ldev->tracker.has_inactive) 618 return; 619 620 mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled); 621 622 for (i = 0; i < num_disabled; i++) { 623 disabled_index = disabled_ports[i]; 624 pf = mlx5_lag_pf(ldev, disabled_index); 625 dev = pf->dev; 626 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 627 MLX5_VPORT_UPLINK); 628 if (!err) 629 pf->has_drop = true; 630 else 631 mlx5_core_err(dev, 632 "Failed to create lag drop rule, error: %d", err); 633 } 634 } 635 636 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) 637 { 638 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 639 void *lag_ctx; 640 641 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 642 643 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 644 MLX5_SET(modify_lag_in, in, field_select, 0x2); 645 646 MLX5_SET(lagc, lag_ctx, active_port, ports); 647 648 return mlx5_cmd_exec_in(dev, modify_lag, in); 649 } 650 651 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 652 { 653 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 654 struct mlx5_core_dev *dev0; 655 u8 active_ports; 656 int ret; 657 658 if (idx < 0) 659 return -EINVAL; 660 661 dev0 = mlx5_lag_pf(ldev, idx)->dev; 662 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { 663 ret = mlx5_lag_port_sel_modify(ldev, ports); 664 if (ret || 665 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) 666 return ret; 667 668 active_ports = lag_active_port_bits(ldev); 669 670 return mlx5_cmd_modify_active_port(dev0, active_ports); 671 } 672 return mlx5_cmd_modify_lag(dev0, ldev, ports); 673 } 674 675 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev) 676 { 677 struct net_device *ndev = NULL; 678 struct lag_func *pf; 679 struct mlx5_lag *ldev; 680 unsigned long flags; 681 int i, last_idx; 682 683 spin_lock_irqsave(&lag_lock, flags); 684 ldev = mlx5_lag_dev(dev); 685 686 if (!ldev) 687 goto unlock; 688 689 mlx5_ldev_for_each(i, 0, ldev) { 690 pf = mlx5_lag_pf(ldev, i); 691 if (ldev->tracker.netdev_state[i].tx_enabled) 692 ndev = pf->netdev; 693 } 694 if (!ndev) { 695 last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1); 696 if (last_idx < 0) 697 goto unlock; 698 pf = mlx5_lag_pf(ldev, last_idx); 699 ndev = pf->netdev; 700 } 701 702 dev_hold(ndev); 703 704 unlock: 705 spin_unlock_irqrestore(&lag_lock, flags); 706 707 return ndev; 708 } 709 710 void mlx5_modify_lag(struct mlx5_lag *ldev, 711 struct lag_tracker *tracker) 712 { 713 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 714 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 715 struct mlx5_core_dev *dev0; 716 int idx; 717 int err; 718 int i; 719 int j; 720 721 if (first_idx < 0) 722 return; 723 724 dev0 = mlx5_lag_pf(ldev, first_idx)->dev; 725 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports); 726 727 mlx5_ldev_for_each(i, 0, ldev) { 728 for (j = 0; j < ldev->buckets; j++) { 729 idx = i * ldev->buckets + j; 730 if (ports[idx] == ldev->v2p_map[idx]) 731 continue; 732 err = _mlx5_modify_lag(ldev, ports); 733 if (err) { 734 mlx5_core_err(dev0, 735 "Failed to modify LAG (%d)\n", 736 err); 737 return; 738 } 739 memcpy(ldev->v2p_map, ports, sizeof(ports)); 740 741 mlx5_lag_print_mapping(dev0, ldev, tracker, 742 ldev->mode_flags); 743 break; 744 } 745 } 746 747 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 748 struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0); 749 750 if(!(ldev->mode == MLX5_LAG_MODE_ROCE)) 751 mlx5_lag_drop_rule_setup(ldev, tracker); 752 /** Only sriov and roce lag should have tracker->tx_type set so 753 * no need to check the mode 754 */ 755 blocking_notifier_call_chain(&dev0->priv.lag_nh, 756 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, 757 ndev); 758 dev_put(ndev); 759 } 760 } 761 762 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, 763 enum mlx5_lag_mode mode, 764 unsigned long *flags) 765 { 766 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 767 struct mlx5_core_dev *dev0; 768 769 if (first_idx < 0) 770 return -EINVAL; 771 772 if (mode == MLX5_LAG_MODE_MPESW || 773 mode == MLX5_LAG_MODE_MULTIPATH) 774 return 0; 775 776 dev0 = mlx5_lag_pf(ldev, first_idx)->dev; 777 778 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { 779 if (ldev->ports > 2) 780 return -EINVAL; 781 return 0; 782 } 783 784 if (ldev->ports > 2) 785 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 786 787 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 788 789 return 0; 790 } 791 792 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 793 struct lag_tracker *tracker, bool shared_fdb, 794 unsigned long *flags) 795 { 796 *flags = 0; 797 if (shared_fdb) { 798 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 799 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 800 } 801 802 if (mode == MLX5_LAG_MODE_MPESW) 803 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 804 805 return mlx5_lag_set_port_sel_mode(ldev, mode, flags); 806 } 807 808 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 809 { 810 int port_sel_mode = get_port_sel_mode(mode, flags); 811 812 switch (port_sel_mode) { 813 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 814 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 815 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 816 default: return "invalid"; 817 } 818 } 819 820 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) 821 { 822 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 823 struct mlx5_eswitch *master_esw; 824 struct mlx5_core_dev *dev0; 825 int i, j; 826 int err; 827 828 if (master_idx < 0) 829 return -EINVAL; 830 831 dev0 = mlx5_lag_pf(ldev, master_idx)->dev; 832 master_esw = dev0->priv.eswitch; 833 mlx5_ldev_for_each(i, 0, ldev) { 834 struct mlx5_eswitch *slave_esw; 835 836 if (i == master_idx) 837 continue; 838 839 slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch; 840 841 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, 842 slave_esw, ldev->ports); 843 if (err) 844 goto err; 845 } 846 return 0; 847 err: 848 mlx5_ldev_for_each_reverse(j, i, 0, ldev) { 849 if (j == master_idx) 850 continue; 851 mlx5_eswitch_offloads_single_fdb_del_one(master_esw, 852 mlx5_lag_pf(ldev, j)->dev->priv.eswitch); 853 } 854 return err; 855 } 856 857 static int mlx5_create_lag(struct mlx5_lag *ldev, 858 struct lag_tracker *tracker, 859 enum mlx5_lag_mode mode, 860 unsigned long flags) 861 { 862 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 863 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 864 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 865 struct mlx5_core_dev *dev0; 866 int err; 867 868 if (first_idx < 0) 869 return -EINVAL; 870 871 dev0 = mlx5_lag_pf(ldev, first_idx)->dev; 872 if (tracker) 873 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 874 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 875 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); 876 877 err = mlx5_cmd_create_lag(dev0, ldev, mode, flags); 878 if (err) { 879 mlx5_core_err(dev0, 880 "Failed to create LAG (%d)\n", 881 err); 882 return err; 883 } 884 885 if (shared_fdb) { 886 err = mlx5_lag_create_single_fdb(ldev); 887 if (err) 888 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 889 else 890 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 891 } 892 893 if (err) { 894 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 895 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 896 mlx5_core_err(dev0, 897 "Failed to deactivate RoCE LAG; driver restart required\n"); 898 } 899 BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh); 900 901 return err; 902 } 903 904 int mlx5_activate_lag(struct mlx5_lag *ldev, 905 struct lag_tracker *tracker, 906 enum mlx5_lag_mode mode, 907 bool shared_fdb) 908 { 909 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 910 struct mlx5_core_dev *dev0; 911 unsigned long flags = 0; 912 int master_idx; 913 int err; 914 915 master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 916 if (master_idx < 0) 917 return -EINVAL; 918 919 dev0 = mlx5_lag_pf(ldev, master_idx)->dev; 920 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 921 if (err) 922 return err; 923 924 if (mode != MLX5_LAG_MODE_MPESW) { 925 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map); 926 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 927 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 928 ldev->v2p_map); 929 if (err) { 930 mlx5_core_err(dev0, 931 "Failed to create LAG port selection(%d)\n", 932 err); 933 return err; 934 } 935 } 936 } 937 938 err = mlx5_create_lag(ldev, tracker, mode, flags); 939 if (err) { 940 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 941 mlx5_lag_port_sel_destroy(ldev); 942 if (roce_lag) 943 mlx5_core_err(dev0, 944 "Failed to activate RoCE LAG\n"); 945 else 946 mlx5_core_err(dev0, 947 "Failed to activate VF LAG\n" 948 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 949 return err; 950 } 951 952 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 953 !roce_lag) 954 mlx5_lag_drop_rule_setup(ldev, tracker); 955 956 ldev->mode = mode; 957 ldev->mode_flags = flags; 958 return 0; 959 } 960 961 int mlx5_deactivate_lag(struct mlx5_lag *ldev) 962 { 963 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 964 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 965 bool roce_lag = __mlx5_lag_is_roce(ldev); 966 unsigned long flags = ldev->mode_flags; 967 struct mlx5_eswitch *master_esw; 968 struct mlx5_core_dev *dev0; 969 int err; 970 int i; 971 972 if (master_idx < 0) 973 return -EINVAL; 974 975 dev0 = mlx5_lag_pf(ldev, master_idx)->dev; 976 master_esw = dev0->priv.eswitch; 977 ldev->mode = MLX5_LAG_MODE_NONE; 978 ldev->mode_flags = 0; 979 mlx5_lag_mp_reset(ldev); 980 981 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 982 mlx5_ldev_for_each(i, 0, ldev) { 983 if (i == master_idx) 984 continue; 985 mlx5_eswitch_offloads_single_fdb_del_one(master_esw, 986 mlx5_lag_pf(ldev, i)->dev->priv.eswitch); 987 } 988 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 989 } 990 991 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 992 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 993 if (err) { 994 if (roce_lag) { 995 mlx5_core_err(dev0, 996 "Failed to deactivate RoCE LAG; driver restart required\n"); 997 } else { 998 mlx5_core_err(dev0, 999 "Failed to deactivate VF LAG; driver restart required\n" 1000 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 1001 } 1002 return err; 1003 } 1004 1005 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 1006 mlx5_lag_port_sel_destroy(ldev); 1007 ldev->buckets = 1; 1008 } 1009 if (mlx5_lag_has_drop_rule(ldev)) 1010 mlx5_lag_drop_rule_cleanup(ldev); 1011 1012 return 0; 1013 } 1014 1015 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 1016 { 1017 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 1018 #ifdef CONFIG_MLX5_ESWITCH 1019 struct mlx5_core_dev *dev; 1020 u8 mode; 1021 #endif 1022 struct lag_func *pf; 1023 bool roce_support; 1024 int i; 1025 1026 if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports) 1027 return false; 1028 1029 #ifdef CONFIG_MLX5_ESWITCH 1030 mlx5_ldev_for_each(i, 0, ldev) { 1031 pf = mlx5_lag_pf(ldev, i); 1032 dev = pf->dev; 1033 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) 1034 return false; 1035 } 1036 1037 pf = mlx5_lag_pf(ldev, master_idx); 1038 dev = pf->dev; 1039 mode = mlx5_eswitch_mode(dev); 1040 mlx5_ldev_for_each(i, 0, ldev) { 1041 pf = mlx5_lag_pf(ldev, i); 1042 if (mlx5_eswitch_mode(pf->dev) != mode) 1043 return false; 1044 } 1045 1046 #else 1047 mlx5_ldev_for_each(i, 0, ldev) { 1048 pf = mlx5_lag_pf(ldev, i); 1049 if (mlx5_sriov_is_enabled(pf->dev)) 1050 return false; 1051 } 1052 #endif 1053 pf = mlx5_lag_pf(ldev, master_idx); 1054 roce_support = mlx5_get_roce_state(pf->dev); 1055 mlx5_ldev_for_each(i, 0, ldev) { 1056 if (i == master_idx) 1057 continue; 1058 pf = mlx5_lag_pf(ldev, i); 1059 if (mlx5_get_roce_state(pf->dev) != roce_support) 1060 return false; 1061 } 1062 1063 return true; 1064 } 1065 1066 void mlx5_lag_add_devices(struct mlx5_lag *ldev) 1067 { 1068 struct lag_func *pf; 1069 int i; 1070 1071 mlx5_ldev_for_each(i, 0, ldev) { 1072 pf = mlx5_lag_pf(ldev, i); 1073 if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 1074 continue; 1075 1076 pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1077 mlx5_rescan_drivers_locked(pf->dev); 1078 } 1079 } 1080 1081 void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 1082 { 1083 struct lag_func *pf; 1084 int i; 1085 1086 mlx5_ldev_for_each(i, 0, ldev) { 1087 pf = mlx5_lag_pf(ldev, i); 1088 if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 1089 continue; 1090 1091 pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1092 mlx5_rescan_drivers_locked(pf->dev); 1093 } 1094 } 1095 1096 void mlx5_disable_lag(struct mlx5_lag *ldev) 1097 { 1098 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1099 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 1100 struct mlx5_core_dev *dev0; 1101 bool roce_lag; 1102 int err; 1103 int i; 1104 1105 if (idx < 0) 1106 return; 1107 1108 dev0 = mlx5_lag_pf(ldev, idx)->dev; 1109 roce_lag = __mlx5_lag_is_roce(ldev); 1110 1111 if (shared_fdb) { 1112 mlx5_lag_remove_devices(ldev); 1113 } else if (roce_lag) { 1114 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { 1115 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1116 mlx5_rescan_drivers_locked(dev0); 1117 } 1118 mlx5_ldev_for_each(i, 0, ldev) { 1119 if (i == idx) 1120 continue; 1121 mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev); 1122 } 1123 } 1124 1125 err = mlx5_deactivate_lag(ldev); 1126 if (err) 1127 return; 1128 1129 if (shared_fdb || roce_lag) 1130 mlx5_lag_add_devices(ldev); 1131 1132 if (shared_fdb) 1133 mlx5_ldev_for_each(i, 0, ldev) 1134 if (!(mlx5_lag_pf(ldev, i)->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 1135 mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); 1136 } 1137 1138 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) 1139 { 1140 struct mlx5_core_dev *dev; 1141 bool ret = false; 1142 int idx; 1143 int i; 1144 1145 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 1146 if (idx < 0) 1147 return false; 1148 1149 mlx5_ldev_for_each(i, 0, ldev) { 1150 if (i == idx) 1151 continue; 1152 dev = mlx5_lag_pf(ldev, i)->dev; 1153 if (is_mdev_switchdev_mode(dev) && 1154 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && 1155 MLX5_CAP_GEN(dev, lag_native_fdb_selection) && 1156 MLX5_CAP_ESW(dev, root_ft_on_other_esw) && 1157 mlx5_eswitch_get_npeers(dev->priv.eswitch) == 1158 MLX5_CAP_GEN(dev, num_lag_ports) - 1) 1159 continue; 1160 return false; 1161 } 1162 1163 dev = mlx5_lag_pf(ldev, idx)->dev; 1164 if (is_mdev_switchdev_mode(dev) && 1165 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && 1166 mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && 1167 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && 1168 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) 1169 ret = true; 1170 1171 return ret; 1172 } 1173 1174 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 1175 { 1176 bool roce_lag = true; 1177 struct lag_func *pf; 1178 int i; 1179 1180 mlx5_ldev_for_each(i, 0, ldev) { 1181 pf = mlx5_lag_pf(ldev, i); 1182 roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev); 1183 } 1184 1185 #ifdef CONFIG_MLX5_ESWITCH 1186 mlx5_ldev_for_each(i, 0, ldev) { 1187 pf = mlx5_lag_pf(ldev, i); 1188 roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev); 1189 } 1190 #endif 1191 1192 return roce_lag; 1193 } 1194 1195 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 1196 { 1197 return do_bond && __mlx5_lag_is_active(ldev) && 1198 ldev->mode != MLX5_LAG_MODE_MPESW; 1199 } 1200 1201 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 1202 { 1203 return !do_bond && __mlx5_lag_is_active(ldev) && 1204 ldev->mode != MLX5_LAG_MODE_MPESW; 1205 } 1206 1207 #ifdef CONFIG_MLX5_ESWITCH 1208 static int 1209 mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed, 1210 int (*get_speed)(struct mlx5_core_dev *, u32 *)) 1211 { 1212 struct mlx5_core_dev *pf_mdev; 1213 struct lag_func *pf; 1214 int pf_idx; 1215 u32 speed; 1216 int ret; 1217 1218 *sum_speed = 0; 1219 mlx5_ldev_for_each(pf_idx, 0, ldev) { 1220 pf = mlx5_lag_pf(ldev, pf_idx); 1221 if (!pf) 1222 continue; 1223 pf_mdev = pf->dev; 1224 if (!pf_mdev) 1225 continue; 1226 1227 ret = get_speed(pf_mdev, &speed); 1228 if (ret) { 1229 mlx5_core_dbg(pf_mdev, 1230 "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n", 1231 get_speed, dev_name(pf_mdev->device), 1232 ret); 1233 return ret; 1234 } 1235 1236 *sum_speed += speed; 1237 } 1238 1239 return 0; 1240 } 1241 1242 static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed) 1243 { 1244 return mlx5_lag_sum_devices_speed(ldev, max_speed, 1245 mlx5_port_max_linkspeed); 1246 } 1247 1248 static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev, 1249 u32 *oper_speed) 1250 { 1251 return mlx5_lag_sum_devices_speed(ldev, oper_speed, 1252 mlx5_port_oper_linkspeed); 1253 } 1254 1255 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev, 1256 u32 speed) 1257 { 1258 u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT; 1259 struct mlx5_eswitch *esw = mdev->priv.eswitch; 1260 struct mlx5_vport *vport; 1261 unsigned long i; 1262 int ret; 1263 1264 if (!esw) 1265 return; 1266 1267 if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed)) 1268 return; 1269 1270 mlx5_esw_for_each_vport(esw, i, vport) { 1271 if (!vport) 1272 continue; 1273 1274 if (vport->vport == MLX5_VPORT_UPLINK) 1275 continue; 1276 1277 ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod, 1278 vport->vport, true, speed); 1279 if (ret) 1280 mlx5_core_dbg(mdev, 1281 "Failed to set vport %d speed %d, err=%d\n", 1282 vport->vport, speed, ret); 1283 } 1284 } 1285 1286 void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) 1287 { 1288 struct mlx5_core_dev *mdev; 1289 struct lag_func *pf; 1290 u32 speed; 1291 int pf_idx; 1292 1293 if (ldev->mode == MLX5_LAG_MODE_MPESW) { 1294 if (mlx5_lag_sum_devices_oper_speed(ldev, &speed)) 1295 return; 1296 } else { 1297 speed = ldev->tracker.bond_speed_mbps; 1298 if (speed == SPEED_UNKNOWN) 1299 return; 1300 } 1301 1302 /* If speed is not set, use the sum of max speeds of all PFs */ 1303 if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed)) 1304 return; 1305 1306 speed = speed / MLX5_MAX_TX_SPEED_UNIT; 1307 1308 mlx5_ldev_for_each(pf_idx, 0, ldev) { 1309 pf = mlx5_lag_pf(ldev, pf_idx); 1310 if (!pf) 1311 continue; 1312 mdev = pf->dev; 1313 if (!mdev) 1314 continue; 1315 1316 mlx5_lag_modify_device_vports_speed(mdev, speed); 1317 } 1318 } 1319 1320 void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev) 1321 { 1322 struct mlx5_core_dev *mdev; 1323 struct lag_func *pf; 1324 u32 speed; 1325 int pf_idx; 1326 int ret; 1327 1328 mlx5_ldev_for_each(pf_idx, 0, ldev) { 1329 pf = mlx5_lag_pf(ldev, pf_idx); 1330 if (!pf) 1331 continue; 1332 mdev = pf->dev; 1333 if (!mdev) 1334 continue; 1335 1336 ret = mlx5_port_oper_linkspeed(mdev, &speed); 1337 if (ret) { 1338 mlx5_core_dbg(mdev, 1339 "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n", 1340 dev_name(mdev->device), ret); 1341 continue; 1342 } 1343 1344 speed = speed / MLX5_MAX_TX_SPEED_UNIT; 1345 mlx5_lag_modify_device_vports_speed(mdev, speed); 1346 } 1347 } 1348 #endif 1349 1350 static void mlx5_do_bond(struct mlx5_lag *ldev) 1351 { 1352 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 1353 struct lag_tracker tracker = { }; 1354 struct mlx5_core_dev *dev0; 1355 struct net_device *ndev; 1356 bool do_bond, roce_lag; 1357 int err; 1358 int i; 1359 1360 if (idx < 0) 1361 return; 1362 1363 dev0 = mlx5_lag_pf(ldev, idx)->dev; 1364 if (!mlx5_lag_is_ready(ldev)) { 1365 do_bond = false; 1366 } else { 1367 /* VF LAG is in multipath mode, ignore bond change requests */ 1368 if (mlx5_lag_is_multipath(dev0)) 1369 return; 1370 1371 tracker = ldev->tracker; 1372 1373 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 1374 } 1375 1376 if (do_bond && !__mlx5_lag_is_active(ldev)) { 1377 bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev); 1378 1379 roce_lag = mlx5_lag_is_roce_lag(ldev); 1380 1381 if (shared_fdb || roce_lag) 1382 mlx5_lag_remove_devices(ldev); 1383 1384 err = mlx5_activate_lag(ldev, &tracker, 1385 roce_lag ? MLX5_LAG_MODE_ROCE : 1386 MLX5_LAG_MODE_SRIOV, 1387 shared_fdb); 1388 if (err) { 1389 if (shared_fdb || roce_lag) 1390 mlx5_lag_add_devices(ldev); 1391 if (shared_fdb) { 1392 mlx5_ldev_for_each(i, 0, ldev) 1393 mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); 1394 } 1395 1396 return; 1397 } 1398 1399 if (roce_lag) { 1400 struct mlx5_core_dev *dev; 1401 1402 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1403 mlx5_rescan_drivers_locked(dev0); 1404 mlx5_ldev_for_each(i, 0, ldev) { 1405 if (i == idx) 1406 continue; 1407 dev = mlx5_lag_pf(ldev, i)->dev; 1408 if (mlx5_get_roce_state(dev)) 1409 mlx5_nic_vport_enable_roce(dev); 1410 } 1411 } else if (shared_fdb) { 1412 int i; 1413 1414 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1415 mlx5_rescan_drivers_locked(dev0); 1416 1417 mlx5_ldev_for_each(i, 0, ldev) { 1418 err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); 1419 if (err) 1420 break; 1421 } 1422 1423 if (err) { 1424 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1425 mlx5_rescan_drivers_locked(dev0); 1426 mlx5_deactivate_lag(ldev); 1427 mlx5_lag_add_devices(ldev); 1428 mlx5_ldev_for_each(i, 0, ldev) 1429 mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); 1430 mlx5_core_err(dev0, "Failed to enable lag\n"); 1431 return; 1432 } 1433 } 1434 if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1435 ndev = mlx5_lag_active_backup_get_netdev(dev0); 1436 /** Only sriov and roce lag should have tracker->TX_type 1437 * set so no need to check the mode 1438 */ 1439 blocking_notifier_call_chain(&dev0->priv.lag_nh, 1440 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, 1441 ndev); 1442 dev_put(ndev); 1443 } 1444 mlx5_lag_set_vports_agg_speed(ldev); 1445 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 1446 mlx5_modify_lag(ldev, &tracker); 1447 mlx5_lag_set_vports_agg_speed(ldev); 1448 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 1449 mlx5_lag_reset_vports_speed(ldev); 1450 mlx5_disable_lag(ldev); 1451 } 1452 } 1453 1454 /* The last mdev to unregister will destroy the workqueue before removing the 1455 * devcom component, and as all the mdevs use the same devcom component we are 1456 * guaranteed that the devcom is valid while the calling work is running. 1457 */ 1458 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev) 1459 { 1460 struct mlx5_devcom_comp_dev *devcom = NULL; 1461 struct lag_func *pf; 1462 int i; 1463 1464 mutex_lock(&ldev->lock); 1465 i = mlx5_get_next_ldev_func(ldev, 0); 1466 if (i < MLX5_MAX_PORTS) { 1467 pf = mlx5_lag_pf(ldev, i); 1468 devcom = pf->dev->priv.hca_devcom_comp; 1469 } 1470 mutex_unlock(&ldev->lock); 1471 return devcom; 1472 } 1473 1474 static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev, 1475 struct mlx5_flow_table_attr *ft_attr, 1476 struct mlx5_lag *ldev) 1477 { 1478 #ifdef CONFIG_MLX5_ESWITCH 1479 struct mlx5_flow_namespace *ns; 1480 struct mlx5_flow_group *fg; 1481 int err; 1482 1483 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG); 1484 if (!ns) 1485 return 0; 1486 1487 ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr); 1488 if (IS_ERR(ldev->lag_demux_ft)) 1489 return PTR_ERR(ldev->lag_demux_ft); 1490 1491 fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch, 1492 ldev->lag_demux_ft); 1493 if (IS_ERR(fg)) { 1494 err = PTR_ERR(fg); 1495 mlx5_destroy_flow_table(ldev->lag_demux_ft); 1496 ldev->lag_demux_ft = NULL; 1497 return err; 1498 } 1499 1500 ldev->lag_demux_fg = fg; 1501 return 0; 1502 #else 1503 return -EOPNOTSUPP; 1504 #endif 1505 } 1506 1507 static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev, 1508 struct mlx5_flow_table_attr *ft_attr, 1509 struct mlx5_lag *ldev) 1510 { 1511 struct mlx5_flow_namespace *ns; 1512 int err; 1513 1514 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG); 1515 if (!ns) 1516 return 0; 1517 1518 ldev->lag_demux_fg = NULL; 1519 ft_attr->max_fte = 1; 1520 ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr); 1521 if (IS_ERR(ldev->lag_demux_ft)) { 1522 err = PTR_ERR(ldev->lag_demux_ft); 1523 ldev->lag_demux_ft = NULL; 1524 return err; 1525 } 1526 1527 return 0; 1528 } 1529 1530 int mlx5_lag_demux_init(struct mlx5_core_dev *dev, 1531 struct mlx5_flow_table_attr *ft_attr) 1532 { 1533 struct mlx5_lag *ldev; 1534 1535 if (!ft_attr) 1536 return -EINVAL; 1537 1538 ldev = mlx5_lag_dev(dev); 1539 if (!ldev) 1540 return -ENODEV; 1541 1542 xa_init(&ldev->lag_demux_rules); 1543 1544 if (mlx5_get_sd(dev)) 1545 return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev); 1546 1547 return mlx5_lag_demux_fw_init(dev, ft_attr, ldev); 1548 } 1549 EXPORT_SYMBOL(mlx5_lag_demux_init); 1550 1551 void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev) 1552 { 1553 struct mlx5_flow_handle *rule; 1554 struct mlx5_lag *ldev; 1555 unsigned long vport_num; 1556 1557 ldev = mlx5_lag_dev(dev); 1558 if (!ldev) 1559 return; 1560 1561 xa_for_each(&ldev->lag_demux_rules, vport_num, rule) 1562 mlx5_del_flow_rules(rule); 1563 xa_destroy(&ldev->lag_demux_rules); 1564 1565 if (ldev->lag_demux_fg) 1566 mlx5_destroy_flow_group(ldev->lag_demux_fg); 1567 if (ldev->lag_demux_ft) 1568 mlx5_destroy_flow_table(ldev->lag_demux_ft); 1569 ldev->lag_demux_fg = NULL; 1570 ldev->lag_demux_ft = NULL; 1571 } 1572 EXPORT_SYMBOL(mlx5_lag_demux_cleanup); 1573 1574 int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num, 1575 int index) 1576 { 1577 struct mlx5_flow_handle *rule; 1578 struct mlx5_lag *ldev; 1579 int err; 1580 1581 ldev = mlx5_lag_dev(vport_dev); 1582 if (!ldev || !ldev->lag_demux_fg) 1583 return 0; 1584 1585 if (xa_load(&ldev->lag_demux_rules, index)) 1586 return 0; 1587 1588 rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch, 1589 vport_num, ldev->lag_demux_ft); 1590 if (IS_ERR(rule)) { 1591 err = PTR_ERR(rule); 1592 mlx5_core_warn(vport_dev, 1593 "Failed to create LAG demux rule for vport %u, err %d\n", 1594 vport_num, err); 1595 return err; 1596 } 1597 1598 err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule, 1599 GFP_KERNEL)); 1600 if (err) { 1601 mlx5_del_flow_rules(rule); 1602 mlx5_core_warn(vport_dev, 1603 "Failed to store LAG demux rule for vport %u, err %d\n", 1604 vport_num, err); 1605 } 1606 1607 return err; 1608 } 1609 EXPORT_SYMBOL(mlx5_lag_demux_rule_add); 1610 1611 void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index) 1612 { 1613 struct mlx5_flow_handle *rule; 1614 struct mlx5_lag *ldev; 1615 1616 ldev = mlx5_lag_dev(dev); 1617 if (!ldev || !ldev->lag_demux_fg) 1618 return; 1619 1620 rule = xa_erase(&ldev->lag_demux_rules, index); 1621 if (rule) 1622 mlx5_del_flow_rules(rule); 1623 } 1624 EXPORT_SYMBOL(mlx5_lag_demux_rule_del); 1625 1626 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 1627 { 1628 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 1629 } 1630 1631 static void mlx5_do_bond_work(struct work_struct *work) 1632 { 1633 struct delayed_work *delayed_work = to_delayed_work(work); 1634 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 1635 bond_work); 1636 struct mlx5_devcom_comp_dev *devcom; 1637 int status; 1638 1639 devcom = mlx5_lag_get_devcom_comp(ldev); 1640 if (!devcom) 1641 return; 1642 1643 status = mlx5_devcom_comp_trylock(devcom); 1644 if (!status) { 1645 mlx5_queue_bond_work(ldev, HZ); 1646 return; 1647 } 1648 1649 mutex_lock(&ldev->lock); 1650 if (ldev->mode_changes_in_progress) { 1651 mutex_unlock(&ldev->lock); 1652 mlx5_devcom_comp_unlock(devcom); 1653 mlx5_queue_bond_work(ldev, HZ); 1654 return; 1655 } 1656 1657 mlx5_do_bond(ldev); 1658 mutex_unlock(&ldev->lock); 1659 mlx5_devcom_comp_unlock(devcom); 1660 } 1661 1662 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 1663 struct lag_tracker *tracker, 1664 struct netdev_notifier_changeupper_info *info) 1665 { 1666 struct net_device *upper = info->upper_dev, *ndev_tmp; 1667 struct netdev_lag_upper_info *lag_upper_info = NULL; 1668 bool is_bonded, is_in_lag, mode_supported; 1669 bool has_inactive = 0; 1670 struct lag_func *pf; 1671 struct slave *slave; 1672 u8 bond_status = 0; 1673 int num_slaves = 0; 1674 int changed = 0; 1675 int i, idx = -1; 1676 1677 if (!netif_is_lag_master(upper)) 1678 return 0; 1679 1680 if (info->linking) 1681 lag_upper_info = info->upper_info; 1682 1683 /* The event may still be of interest if the slave does not belong to 1684 * us, but is enslaved to a master which has one or more of our netdevs 1685 * as slaves (e.g., if a new slave is added to a master that bonds two 1686 * of our netdevs, we should unbond). 1687 */ 1688 rcu_read_lock(); 1689 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 1690 mlx5_ldev_for_each(i, 0, ldev) { 1691 pf = mlx5_lag_pf(ldev, i); 1692 if (pf->netdev == ndev_tmp) { 1693 idx++; 1694 break; 1695 } 1696 } 1697 if (i < MLX5_MAX_PORTS) { 1698 slave = bond_slave_get_rcu(ndev_tmp); 1699 if (slave) 1700 has_inactive |= bond_is_slave_inactive(slave); 1701 bond_status |= (1 << idx); 1702 } 1703 1704 num_slaves++; 1705 } 1706 rcu_read_unlock(); 1707 1708 /* None of this lagdev's netdevs are slaves of this master. */ 1709 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 1710 return 0; 1711 1712 if (lag_upper_info) { 1713 tracker->tx_type = lag_upper_info->tx_type; 1714 tracker->hash_type = lag_upper_info->hash_type; 1715 } 1716 1717 tracker->has_inactive = has_inactive; 1718 /* Determine bonding status: 1719 * A device is considered bonded if both its physical ports are slaves 1720 * of the same lag master, and only them. 1721 */ 1722 is_in_lag = num_slaves == ldev->ports && 1723 bond_status == GENMASK(ldev->ports - 1, 0); 1724 1725 /* Lag mode must be activebackup or hash. */ 1726 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 1727 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 1728 1729 is_bonded = is_in_lag && mode_supported; 1730 if (tracker->is_bonded != is_bonded) { 1731 tracker->is_bonded = is_bonded; 1732 changed = 1; 1733 } 1734 1735 if (!is_in_lag) 1736 return changed; 1737 1738 if (!mlx5_lag_is_ready(ldev)) 1739 NL_SET_ERR_MSG_MOD(info->info.extack, 1740 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 1741 else if (!mode_supported) 1742 NL_SET_ERR_MSG_MOD(info->info.extack, 1743 "Can't activate LAG offload, TX type isn't supported"); 1744 1745 return changed; 1746 } 1747 1748 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 1749 struct lag_tracker *tracker, 1750 struct net_device *ndev, 1751 struct netdev_notifier_changelowerstate_info *info) 1752 { 1753 struct netdev_lag_lower_state_info *lag_lower_info; 1754 int idx; 1755 1756 if (!netif_is_lag_port(ndev)) 1757 return 0; 1758 1759 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 1760 if (idx < 0) 1761 return 0; 1762 1763 /* This information is used to determine virtual to physical 1764 * port mapping. 1765 */ 1766 lag_lower_info = info->lower_state_info; 1767 if (!lag_lower_info) 1768 return 0; 1769 1770 tracker->netdev_state[idx] = *lag_lower_info; 1771 1772 return 1; 1773 } 1774 1775 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 1776 struct lag_tracker *tracker, 1777 struct net_device *ndev) 1778 { 1779 struct net_device *ndev_tmp; 1780 struct slave *slave; 1781 bool has_inactive = 0; 1782 int idx; 1783 1784 if (!netif_is_lag_master(ndev)) 1785 return 0; 1786 1787 rcu_read_lock(); 1788 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 1789 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1790 if (idx < 0) 1791 continue; 1792 1793 slave = bond_slave_get_rcu(ndev_tmp); 1794 if (slave) 1795 has_inactive |= bond_is_slave_inactive(slave); 1796 } 1797 rcu_read_unlock(); 1798 1799 if (tracker->has_inactive == has_inactive) 1800 return 0; 1801 1802 tracker->has_inactive = has_inactive; 1803 1804 return 1; 1805 } 1806 1807 static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker, 1808 struct net_device *ndev) 1809 { 1810 struct ethtool_link_ksettings lksettings; 1811 struct net_device *bond_dev; 1812 int err; 1813 1814 if (netif_is_lag_master(ndev)) 1815 bond_dev = ndev; 1816 else 1817 bond_dev = netdev_master_upper_dev_get(ndev); 1818 1819 if (!bond_dev) { 1820 tracker->bond_speed_mbps = SPEED_UNKNOWN; 1821 return; 1822 } 1823 1824 err = __ethtool_get_link_ksettings(bond_dev, &lksettings); 1825 if (err) { 1826 netdev_dbg(bond_dev, 1827 "Failed to get speed for bond dev %s, err=%d\n", 1828 bond_dev->name, err); 1829 tracker->bond_speed_mbps = SPEED_UNKNOWN; 1830 return; 1831 } 1832 1833 if (lksettings.base.speed == SPEED_UNKNOWN) 1834 tracker->bond_speed_mbps = 0; 1835 else 1836 tracker->bond_speed_mbps = lksettings.base.speed; 1837 } 1838 1839 /* Returns speed in Mbps. */ 1840 int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed) 1841 { 1842 struct mlx5_lag *ldev; 1843 unsigned long flags; 1844 int ret = 0; 1845 1846 spin_lock_irqsave(&lag_lock, flags); 1847 ldev = mlx5_lag_dev(mdev); 1848 if (!ldev) { 1849 ret = -ENODEV; 1850 goto unlock; 1851 } 1852 1853 *speed = ldev->tracker.bond_speed_mbps; 1854 1855 if (*speed == SPEED_UNKNOWN) { 1856 mlx5_core_dbg(mdev, "Bond speed is unknown\n"); 1857 ret = -EINVAL; 1858 } 1859 1860 unlock: 1861 spin_unlock_irqrestore(&lag_lock, flags); 1862 return ret; 1863 } 1864 EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed); 1865 1866 /* this handler is always registered to netdev events */ 1867 static int mlx5_lag_netdev_event(struct notifier_block *this, 1868 unsigned long event, void *ptr) 1869 { 1870 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 1871 struct lag_tracker tracker; 1872 struct mlx5_lag *ldev; 1873 int changed = 0; 1874 1875 if (event != NETDEV_CHANGEUPPER && 1876 event != NETDEV_CHANGELOWERSTATE && 1877 event != NETDEV_CHANGEINFODATA) 1878 return NOTIFY_DONE; 1879 1880 ldev = container_of(this, struct mlx5_lag, nb); 1881 1882 tracker = ldev->tracker; 1883 1884 switch (event) { 1885 case NETDEV_CHANGEUPPER: 1886 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 1887 break; 1888 case NETDEV_CHANGELOWERSTATE: 1889 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 1890 ndev, ptr); 1891 break; 1892 case NETDEV_CHANGEINFODATA: 1893 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 1894 break; 1895 } 1896 1897 if (changed) 1898 mlx5_lag_update_tracker_speed(&tracker, ndev); 1899 1900 ldev->tracker = tracker; 1901 1902 if (changed) 1903 mlx5_queue_bond_work(ldev, 0); 1904 1905 return NOTIFY_DONE; 1906 } 1907 1908 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 1909 struct mlx5_core_dev *dev, 1910 struct net_device *netdev) 1911 { 1912 struct lag_func *pf; 1913 unsigned long flags; 1914 int i; 1915 1916 spin_lock_irqsave(&lag_lock, flags); 1917 /* Find pf entry by matching dev pointer */ 1918 mlx5_ldev_for_each(i, 0, ldev) { 1919 pf = mlx5_lag_pf(ldev, i); 1920 if (pf->dev == dev) { 1921 pf->netdev = netdev; 1922 ldev->tracker.netdev_state[i].link_up = 0; 1923 ldev->tracker.netdev_state[i].tx_enabled = 0; 1924 break; 1925 } 1926 } 1927 spin_unlock_irqrestore(&lag_lock, flags); 1928 } 1929 1930 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 1931 struct net_device *netdev) 1932 { 1933 struct lag_func *pf; 1934 unsigned long flags; 1935 int i; 1936 1937 spin_lock_irqsave(&lag_lock, flags); 1938 mlx5_ldev_for_each(i, 0, ldev) { 1939 pf = mlx5_lag_pf(ldev, i); 1940 if (pf->netdev == netdev) { 1941 pf->netdev = NULL; 1942 break; 1943 } 1944 } 1945 spin_unlock_irqrestore(&lag_lock, flags); 1946 } 1947 1948 static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 1949 struct mlx5_core_dev *dev) 1950 { 1951 struct lag_func *pf; 1952 u32 idx; 1953 int err; 1954 1955 pf = kzalloc_obj(*pf); 1956 if (!pf) 1957 return -ENOMEM; 1958 1959 err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1), 1960 GFP_KERNEL); 1961 if (err) { 1962 kfree(pf); 1963 return err; 1964 } 1965 1966 pf->idx = idx; 1967 pf->dev = dev; 1968 dev->priv.lag = ldev; 1969 1970 MLX5_NB_INIT(&pf->port_change_nb, 1971 mlx5_lag_mpesw_port_change_event, PORT_CHANGE); 1972 mlx5_eq_notifier_register(dev, &pf->port_change_nb); 1973 1974 return 0; 1975 } 1976 1977 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1978 struct mlx5_core_dev *dev) 1979 { 1980 struct lag_func *pf; 1981 int i; 1982 1983 mlx5_ldev_for_each(i, 0, ldev) { 1984 pf = mlx5_lag_pf(ldev, i); 1985 if (pf->dev == dev) 1986 break; 1987 } 1988 if (i >= MLX5_MAX_PORTS) 1989 return; 1990 1991 if (pf->port_change_nb.nb.notifier_call) 1992 mlx5_eq_notifier_unregister(dev, &pf->port_change_nb); 1993 1994 pf->dev = NULL; 1995 dev->priv.lag = NULL; 1996 xa_erase(&ldev->pfs, pf->idx); 1997 kfree(pf); 1998 } 1999 2000 /* Must be called with HCA devcom component lock held */ 2001 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 2002 { 2003 struct mlx5_devcom_comp_dev *pos = NULL; 2004 struct mlx5_lag *ldev = NULL; 2005 struct mlx5_core_dev *tmp_dev; 2006 int err; 2007 2008 tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos); 2009 if (tmp_dev) 2010 ldev = mlx5_lag_dev(tmp_dev); 2011 2012 if (!ldev) { 2013 ldev = mlx5_lag_dev_alloc(dev); 2014 if (!ldev) { 2015 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 2016 return 0; 2017 } 2018 err = mlx5_ldev_add_mdev(ldev, dev); 2019 if (err) { 2020 mlx5_core_err(dev, "Failed to add mdev to lag dev\n"); 2021 mlx5_ldev_put(ldev); 2022 return 0; 2023 } 2024 return 0; 2025 } 2026 2027 mutex_lock(&ldev->lock); 2028 if (ldev->mode_changes_in_progress) { 2029 mutex_unlock(&ldev->lock); 2030 return -EAGAIN; 2031 } 2032 mlx5_ldev_get(ldev); 2033 err = mlx5_ldev_add_mdev(ldev, dev); 2034 if (err) { 2035 mlx5_ldev_put(ldev); 2036 mutex_unlock(&ldev->lock); 2037 return err; 2038 } 2039 mutex_unlock(&ldev->lock); 2040 2041 return 0; 2042 } 2043 2044 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev) 2045 { 2046 mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp); 2047 dev->priv.hca_devcom_comp = NULL; 2048 } 2049 2050 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) 2051 { 2052 struct mlx5_devcom_match_attr attr = { 2053 .flags = MLX5_DEVCOM_MATCH_FLAGS_NS, 2054 .net = mlx5_core_net(dev), 2055 }; 2056 u8 len __always_unused; 2057 2058 mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len); 2059 2060 /* This component is use to sync adding core_dev to lag_dev and to sync 2061 * changes of mlx5_adev_devices between LAG layer and other layers. 2062 */ 2063 dev->priv.hca_devcom_comp = 2064 mlx5_devcom_register_component(dev->priv.devc, 2065 MLX5_DEVCOM_HCA_PORTS, 2066 &attr, mlx5_lag_devcom_event, 2067 dev); 2068 if (!dev->priv.hca_devcom_comp) { 2069 mlx5_core_err(dev, 2070 "Failed to register devcom HCA component."); 2071 return -EINVAL; 2072 } 2073 2074 return 0; 2075 } 2076 2077 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 2078 { 2079 struct mlx5_lag *ldev; 2080 2081 ldev = mlx5_lag_dev(dev); 2082 if (!ldev) 2083 return; 2084 2085 /* mdev is being removed, might as well remove debugfs 2086 * as early as possible. 2087 */ 2088 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 2089 recheck: 2090 mutex_lock(&ldev->lock); 2091 if (ldev->mode_changes_in_progress) { 2092 mutex_unlock(&ldev->lock); 2093 msleep(100); 2094 goto recheck; 2095 } 2096 mlx5_ldev_remove_mdev(ldev, dev); 2097 mutex_unlock(&ldev->lock); 2098 /* Send devcom event to notify peers that a device is being removed */ 2099 mlx5_devcom_send_event(dev->priv.hca_devcom_comp, 2100 LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev); 2101 mlx5_lag_unregister_hca_devcom_comp(dev); 2102 mlx5_ldev_put(ldev); 2103 } 2104 2105 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 2106 { 2107 int err; 2108 2109 if (!mlx5_lag_is_supported(dev)) 2110 return; 2111 2112 if (mlx5_lag_register_hca_devcom_comp(dev)) 2113 return; 2114 2115 recheck: 2116 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp); 2117 err = __mlx5_lag_dev_add_mdev(dev); 2118 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp); 2119 2120 if (err) { 2121 msleep(100); 2122 goto recheck; 2123 } 2124 /* Send devcom event to notify peers that a device was added */ 2125 mlx5_devcom_send_event(dev->priv.hca_devcom_comp, 2126 LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev); 2127 mlx5_ldev_add_debugfs(dev); 2128 } 2129 2130 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 2131 struct net_device *netdev) 2132 { 2133 struct mlx5_lag *ldev; 2134 bool lag_is_active; 2135 2136 ldev = mlx5_lag_dev(dev); 2137 if (!ldev) 2138 return; 2139 2140 mutex_lock(&ldev->lock); 2141 mlx5_ldev_remove_netdev(ldev, netdev); 2142 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 2143 2144 lag_is_active = __mlx5_lag_is_active(ldev); 2145 mutex_unlock(&ldev->lock); 2146 2147 if (lag_is_active) 2148 mlx5_queue_bond_work(ldev, 0); 2149 } 2150 2151 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 2152 struct net_device *netdev) 2153 { 2154 struct mlx5_lag *ldev; 2155 int num = 0; 2156 2157 ldev = mlx5_lag_dev(dev); 2158 if (!ldev) 2159 return; 2160 2161 mutex_lock(&ldev->lock); 2162 mlx5_ldev_add_netdev(ldev, dev, netdev); 2163 num = mlx5_lag_num_netdevs(ldev); 2164 if (num >= ldev->ports) 2165 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 2166 mutex_unlock(&ldev->lock); 2167 mlx5_queue_bond_work(ldev, 0); 2168 } 2169 2170 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx) 2171 { 2172 struct lag_func *pf; 2173 int i; 2174 2175 for (i = start_idx; i >= end_idx; i--) { 2176 pf = xa_load(&ldev->pfs, i); 2177 if (pf && pf->dev) 2178 return i; 2179 } 2180 return -1; 2181 } 2182 2183 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx) 2184 { 2185 struct lag_func *pf; 2186 unsigned long idx; 2187 2188 xa_for_each_start(&ldev->pfs, idx, pf, start_idx) 2189 if (pf->dev) 2190 return idx; 2191 return MLX5_MAX_PORTS; 2192 } 2193 2194 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 2195 { 2196 struct mlx5_lag *ldev; 2197 unsigned long flags; 2198 bool res; 2199 2200 spin_lock_irqsave(&lag_lock, flags); 2201 ldev = mlx5_lag_dev(dev); 2202 res = ldev && __mlx5_lag_is_roce(ldev); 2203 spin_unlock_irqrestore(&lag_lock, flags); 2204 2205 return res; 2206 } 2207 EXPORT_SYMBOL(mlx5_lag_is_roce); 2208 2209 bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 2210 { 2211 struct mlx5_lag *ldev; 2212 unsigned long flags; 2213 bool res; 2214 2215 spin_lock_irqsave(&lag_lock, flags); 2216 ldev = mlx5_lag_dev(dev); 2217 res = ldev && __mlx5_lag_is_active(ldev); 2218 spin_unlock_irqrestore(&lag_lock, flags); 2219 2220 return res; 2221 } 2222 EXPORT_SYMBOL(mlx5_lag_is_active); 2223 2224 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) 2225 { 2226 struct mlx5_lag *ldev; 2227 unsigned long flags; 2228 bool res = 0; 2229 2230 spin_lock_irqsave(&lag_lock, flags); 2231 ldev = mlx5_lag_dev(dev); 2232 if (ldev) 2233 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); 2234 spin_unlock_irqrestore(&lag_lock, flags); 2235 2236 return res; 2237 } 2238 EXPORT_SYMBOL(mlx5_lag_mode_is_hash); 2239 2240 bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 2241 { 2242 struct mlx5_lag *ldev; 2243 unsigned long flags; 2244 struct lag_func *pf; 2245 bool res = false; 2246 int idx; 2247 2248 spin_lock_irqsave(&lag_lock, flags); 2249 ldev = mlx5_lag_dev(dev); 2250 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 2251 if (ldev && __mlx5_lag_is_active(ldev) && idx >= 0) { 2252 pf = mlx5_lag_pf(ldev, idx); 2253 res = pf && dev == pf->dev; 2254 } 2255 spin_unlock_irqrestore(&lag_lock, flags); 2256 2257 return res; 2258 } 2259 EXPORT_SYMBOL(mlx5_lag_is_master); 2260 2261 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 2262 { 2263 struct mlx5_lag *ldev; 2264 unsigned long flags; 2265 bool res; 2266 2267 spin_lock_irqsave(&lag_lock, flags); 2268 ldev = mlx5_lag_dev(dev); 2269 res = ldev && __mlx5_lag_is_sriov(ldev); 2270 spin_unlock_irqrestore(&lag_lock, flags); 2271 2272 return res; 2273 } 2274 EXPORT_SYMBOL(mlx5_lag_is_sriov); 2275 2276 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 2277 { 2278 struct mlx5_lag *ldev; 2279 unsigned long flags; 2280 bool res; 2281 2282 spin_lock_irqsave(&lag_lock, flags); 2283 ldev = mlx5_lag_dev(dev); 2284 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 2285 spin_unlock_irqrestore(&lag_lock, flags); 2286 2287 return res; 2288 } 2289 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 2290 2291 void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 2292 { 2293 struct mlx5_lag *ldev; 2294 2295 ldev = mlx5_lag_dev(dev); 2296 if (!ldev) 2297 return; 2298 2299 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp); 2300 mutex_lock(&ldev->lock); 2301 2302 ldev->mode_changes_in_progress++; 2303 if (__mlx5_lag_is_active(ldev)) { 2304 if (ldev->mode == MLX5_LAG_MODE_MPESW) 2305 mlx5_lag_disable_mpesw(ldev); 2306 else 2307 mlx5_disable_lag(ldev); 2308 } 2309 2310 mutex_unlock(&ldev->lock); 2311 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp); 2312 } 2313 2314 void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 2315 { 2316 struct mlx5_lag *ldev; 2317 2318 ldev = mlx5_lag_dev(dev); 2319 if (!ldev) 2320 return; 2321 2322 mutex_lock(&ldev->lock); 2323 ldev->mode_changes_in_progress--; 2324 mutex_unlock(&ldev->lock); 2325 mlx5_queue_bond_work(ldev, 0); 2326 } 2327 2328 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 2329 struct net_device *slave) 2330 { 2331 struct mlx5_lag *ldev; 2332 unsigned long flags; 2333 struct lag_func *pf; 2334 u8 port = 0; 2335 int i; 2336 2337 spin_lock_irqsave(&lag_lock, flags); 2338 ldev = mlx5_lag_dev(dev); 2339 if (!(ldev && __mlx5_lag_is_roce(ldev))) 2340 goto unlock; 2341 2342 mlx5_ldev_for_each(i, 0, ldev) { 2343 pf = mlx5_lag_pf(ldev, i); 2344 if (pf->netdev == slave) { 2345 port = i; 2346 break; 2347 } 2348 } 2349 2350 port = ldev->v2p_map[port * ldev->buckets]; 2351 2352 unlock: 2353 spin_unlock_irqrestore(&lag_lock, flags); 2354 return port; 2355 } 2356 EXPORT_SYMBOL(mlx5_lag_get_slave_port); 2357 2358 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 2359 { 2360 struct mlx5_lag *ldev; 2361 2362 ldev = mlx5_lag_dev(dev); 2363 if (!ldev) 2364 return 0; 2365 2366 return ldev->ports; 2367 } 2368 EXPORT_SYMBOL(mlx5_lag_get_num_ports); 2369 2370 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i) 2371 { 2372 struct mlx5_core_dev *peer_dev = NULL; 2373 struct mlx5_lag *ldev; 2374 unsigned long flags; 2375 struct lag_func *pf; 2376 int idx; 2377 2378 spin_lock_irqsave(&lag_lock, flags); 2379 ldev = mlx5_lag_dev(dev); 2380 if (!ldev) 2381 goto unlock; 2382 2383 if (*i == MLX5_MAX_PORTS) 2384 goto unlock; 2385 mlx5_ldev_for_each(idx, *i, ldev) { 2386 pf = mlx5_lag_pf(ldev, idx); 2387 if (pf->dev != dev) 2388 break; 2389 } 2390 2391 if (idx == MLX5_MAX_PORTS) { 2392 *i = idx; 2393 goto unlock; 2394 } 2395 *i = idx + 1; 2396 2397 pf = mlx5_lag_pf(ldev, idx); 2398 peer_dev = pf->dev; 2399 2400 unlock: 2401 spin_unlock_irqrestore(&lag_lock, flags); 2402 return peer_dev; 2403 } 2404 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev); 2405 2406 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 2407 u64 *values, 2408 int num_counters, 2409 size_t *offsets) 2410 { 2411 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 2412 struct mlx5_core_dev **mdev; 2413 int ret = 0, i, j, idx = 0; 2414 struct mlx5_lag *ldev; 2415 unsigned long flags; 2416 struct lag_func *pf; 2417 int num_ports; 2418 void *out; 2419 2420 out = kvzalloc(outlen, GFP_KERNEL); 2421 if (!out) 2422 return -ENOMEM; 2423 2424 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 2425 if (!mdev) { 2426 ret = -ENOMEM; 2427 goto free_out; 2428 } 2429 2430 memset(values, 0, sizeof(*values) * num_counters); 2431 2432 spin_lock_irqsave(&lag_lock, flags); 2433 ldev = mlx5_lag_dev(dev); 2434 if (ldev && __mlx5_lag_is_active(ldev)) { 2435 num_ports = ldev->ports; 2436 mlx5_ldev_for_each(i, 0, ldev) { 2437 pf = mlx5_lag_pf(ldev, i); 2438 mdev[idx++] = pf->dev; 2439 } 2440 } else { 2441 num_ports = 1; 2442 mdev[MLX5_LAG_P1] = dev; 2443 } 2444 spin_unlock_irqrestore(&lag_lock, flags); 2445 2446 for (i = 0; i < num_ports; ++i) { 2447 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 2448 2449 MLX5_SET(query_cong_statistics_in, in, opcode, 2450 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 2451 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 2452 out); 2453 if (ret) 2454 goto free_mdev; 2455 2456 for (j = 0; j < num_counters; ++j) 2457 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 2458 } 2459 2460 free_mdev: 2461 kvfree(mdev); 2462 free_out: 2463 kvfree(out); 2464 return ret; 2465 } 2466 EXPORT_SYMBOL(mlx5_lag_query_cong_counters); 2467