1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/netdevice.h> 34 #include <net/bonding.h> 35 #include <linux/mlx5/driver.h> 36 #include <linux/mlx5/eswitch.h> 37 #include <linux/mlx5/vport.h> 38 #include "lib/devcom.h" 39 #include "mlx5_core.h" 40 #include "eswitch.h" 41 #include "esw/acl/ofld.h" 42 #include "lag.h" 43 #include "mp.h" 44 #include "mpesw.h" 45 46 enum { 47 MLX5_LAG_EGRESS_PORT_1 = 1, 48 MLX5_LAG_EGRESS_PORT_2, 49 }; 50 51 /* General purpose, use for short periods of time. 52 * Beware of lock dependencies (preferably, no locks should be acquired 53 * under it). 54 */ 55 static DEFINE_SPINLOCK(lag_lock); 56 57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 58 { 59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 61 62 if (mode == MLX5_LAG_MODE_MPESW) 63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 64 65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 66 } 67 68 static u8 lag_active_port_bits(struct mlx5_lag *ldev) 69 { 70 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 71 u8 active_port = 0; 72 int num_enabled; 73 int idx; 74 75 mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, 76 &num_enabled); 77 for (idx = 0; idx < num_enabled; idx++) 78 active_port |= BIT_MASK(enabled_ports[idx]); 79 80 return active_port; 81 } 82 83 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, 84 unsigned long flags) 85 { 86 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, 87 &flags); 88 int port_sel_mode = get_port_sel_mode(mode, flags); 89 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 90 void *lag_ctx; 91 92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); 95 96 switch (port_sel_mode) { 97 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: 98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 100 break; 101 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: 102 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) 103 break; 104 105 MLX5_SET(lagc, lag_ctx, active_port, 106 lag_active_port_bits(mlx5_lag_dev(dev))); 107 break; 108 default: 109 break; 110 } 111 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 112 113 return mlx5_cmd_exec_in(dev, create_lag, in); 114 } 115 116 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, 117 u8 *ports) 118 { 119 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 120 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 121 122 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 123 MLX5_SET(modify_lag_in, in, field_select, 0x1); 124 125 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 126 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 127 128 return mlx5_cmd_exec_in(dev, modify_lag, in); 129 } 130 131 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 132 { 133 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 134 135 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 136 137 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 138 } 139 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 140 141 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 142 { 143 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 144 145 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 146 147 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 148 } 149 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 150 151 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, 152 u8 *ports, int *num_disabled) 153 { 154 int i; 155 156 *num_disabled = 0; 157 for (i = 0; i < num_ports; i++) { 158 if (!tracker->netdev_state[i].tx_enabled || 159 !tracker->netdev_state[i].link_up) 160 ports[(*num_disabled)++] = i; 161 } 162 } 163 164 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 165 u8 *ports, int *num_enabled) 166 { 167 int i; 168 169 *num_enabled = 0; 170 for (i = 0; i < num_ports; i++) { 171 if (tracker->netdev_state[i].tx_enabled && 172 tracker->netdev_state[i].link_up) 173 ports[(*num_enabled)++] = i; 174 } 175 176 if (*num_enabled == 0) 177 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); 178 } 179 180 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 181 struct mlx5_lag *ldev, 182 struct lag_tracker *tracker, 183 unsigned long flags) 184 { 185 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 186 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 187 int written = 0; 188 int num_enabled; 189 int idx; 190 int err; 191 int i; 192 int j; 193 194 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 195 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, 196 &num_enabled); 197 for (i = 0; i < num_enabled; i++) { 198 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 199 if (err != 3) 200 return; 201 written += err; 202 } 203 buf[written - 2] = 0; 204 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 205 } else { 206 for (i = 0; i < ldev->ports; i++) { 207 for (j = 0; j < ldev->buckets; j++) { 208 idx = i * ldev->buckets + j; 209 err = scnprintf(buf + written, 10, 210 " port %d:%d", i + 1, ldev->v2p_map[idx]); 211 if (err != 9) 212 return; 213 written += err; 214 } 215 } 216 mlx5_core_info(dev, "lag map:%s\n", buf); 217 } 218 } 219 220 static int mlx5_lag_netdev_event(struct notifier_block *this, 221 unsigned long event, void *ptr); 222 static void mlx5_do_bond_work(struct work_struct *work); 223 224 static void mlx5_ldev_free(struct kref *ref) 225 { 226 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 227 228 if (ldev->nb.notifier_call) 229 unregister_netdevice_notifier_net(&init_net, &ldev->nb); 230 mlx5_lag_mp_cleanup(ldev); 231 mlx5_lag_mpesw_cleanup(ldev); 232 cancel_work_sync(&ldev->mpesw_work); 233 destroy_workqueue(ldev->wq); 234 mutex_destroy(&ldev->lock); 235 kfree(ldev); 236 } 237 238 static void mlx5_ldev_put(struct mlx5_lag *ldev) 239 { 240 kref_put(&ldev->ref, mlx5_ldev_free); 241 } 242 243 static void mlx5_ldev_get(struct mlx5_lag *ldev) 244 { 245 kref_get(&ldev->ref); 246 } 247 248 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 249 { 250 struct mlx5_lag *ldev; 251 int err; 252 253 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); 254 if (!ldev) 255 return NULL; 256 257 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 258 if (!ldev->wq) { 259 kfree(ldev); 260 return NULL; 261 } 262 263 kref_init(&ldev->ref); 264 mutex_init(&ldev->lock); 265 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 266 267 ldev->nb.notifier_call = mlx5_lag_netdev_event; 268 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { 269 ldev->nb.notifier_call = NULL; 270 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 271 } 272 ldev->mode = MLX5_LAG_MODE_NONE; 273 274 err = mlx5_lag_mp_init(ldev); 275 if (err) 276 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 277 err); 278 279 mlx5_lag_mpesw_init(ldev); 280 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 281 ldev->buckets = 1; 282 283 return ldev; 284 } 285 286 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 287 struct net_device *ndev) 288 { 289 int i; 290 291 for (i = 0; i < ldev->ports; i++) 292 if (ldev->pf[i].netdev == ndev) 293 return i; 294 295 return -ENOENT; 296 } 297 298 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 299 { 300 return ldev->mode == MLX5_LAG_MODE_ROCE; 301 } 302 303 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 304 { 305 return ldev->mode == MLX5_LAG_MODE_SRIOV; 306 } 307 308 /* Create a mapping between steering slots and active ports. 309 * As we have ldev->buckets slots per port first assume the native 310 * mapping should be used. 311 * If there are ports that are disabled fill the relevant slots 312 * with mapping that points to active ports. 313 */ 314 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 315 u8 num_ports, 316 u8 buckets, 317 u8 *ports) 318 { 319 int disabled[MLX5_MAX_PORTS] = {}; 320 int enabled[MLX5_MAX_PORTS] = {}; 321 int disabled_ports_num = 0; 322 int enabled_ports_num = 0; 323 int idx; 324 u32 rand; 325 int i; 326 int j; 327 328 for (i = 0; i < num_ports; i++) { 329 if (tracker->netdev_state[i].tx_enabled && 330 tracker->netdev_state[i].link_up) 331 enabled[enabled_ports_num++] = i; 332 else 333 disabled[disabled_ports_num++] = i; 334 } 335 336 /* Use native mapping by default where each port's buckets 337 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 338 */ 339 for (i = 0; i < num_ports; i++) 340 for (j = 0; j < buckets; j++) { 341 idx = i * buckets + j; 342 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; 343 } 344 345 /* If all ports are disabled/enabled keep native mapping */ 346 if (enabled_ports_num == num_ports || 347 disabled_ports_num == num_ports) 348 return; 349 350 /* Go over the disabled ports and for each assign a random active port */ 351 for (i = 0; i < disabled_ports_num; i++) { 352 for (j = 0; j < buckets; j++) { 353 get_random_bytes(&rand, 4); 354 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; 355 } 356 } 357 } 358 359 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 360 { 361 int i; 362 363 for (i = 0; i < ldev->ports; i++) 364 if (ldev->pf[i].has_drop) 365 return true; 366 return false; 367 } 368 369 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 370 { 371 int i; 372 373 for (i = 0; i < ldev->ports; i++) { 374 if (!ldev->pf[i].has_drop) 375 continue; 376 377 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, 378 MLX5_VPORT_UPLINK); 379 ldev->pf[i].has_drop = false; 380 } 381 } 382 383 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 384 struct lag_tracker *tracker) 385 { 386 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 387 struct mlx5_core_dev *dev; 388 int disabled_index; 389 int num_disabled; 390 int err; 391 int i; 392 393 /* First delete the current drop rule so there won't be any dropped 394 * packets 395 */ 396 mlx5_lag_drop_rule_cleanup(ldev); 397 398 if (!ldev->tracker.has_inactive) 399 return; 400 401 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); 402 403 for (i = 0; i < num_disabled; i++) { 404 disabled_index = disabled_ports[i]; 405 dev = ldev->pf[disabled_index].dev; 406 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 407 MLX5_VPORT_UPLINK); 408 if (!err) 409 ldev->pf[disabled_index].has_drop = true; 410 else 411 mlx5_core_err(dev, 412 "Failed to create lag drop rule, error: %d", err); 413 } 414 } 415 416 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) 417 { 418 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 419 void *lag_ctx; 420 421 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 422 423 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 424 MLX5_SET(modify_lag_in, in, field_select, 0x2); 425 426 MLX5_SET(lagc, lag_ctx, active_port, ports); 427 428 return mlx5_cmd_exec_in(dev, modify_lag, in); 429 } 430 431 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 432 { 433 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 434 u8 active_ports; 435 int ret; 436 437 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { 438 ret = mlx5_lag_port_sel_modify(ldev, ports); 439 if (ret || 440 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) 441 return ret; 442 443 active_ports = lag_active_port_bits(ldev); 444 445 return mlx5_cmd_modify_active_port(dev0, active_ports); 446 } 447 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); 448 } 449 450 void mlx5_modify_lag(struct mlx5_lag *ldev, 451 struct lag_tracker *tracker) 452 { 453 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 454 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 455 int idx; 456 int err; 457 int i; 458 int j; 459 460 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); 461 462 for (i = 0; i < ldev->ports; i++) { 463 for (j = 0; j < ldev->buckets; j++) { 464 idx = i * ldev->buckets + j; 465 if (ports[idx] == ldev->v2p_map[idx]) 466 continue; 467 err = _mlx5_modify_lag(ldev, ports); 468 if (err) { 469 mlx5_core_err(dev0, 470 "Failed to modify LAG (%d)\n", 471 err); 472 return; 473 } 474 memcpy(ldev->v2p_map, ports, sizeof(ports)); 475 476 mlx5_lag_print_mapping(dev0, ldev, tracker, 477 ldev->mode_flags); 478 break; 479 } 480 } 481 482 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 483 !(ldev->mode == MLX5_LAG_MODE_ROCE)) 484 mlx5_lag_drop_rule_setup(ldev, tracker); 485 } 486 487 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, 488 unsigned long *flags) 489 { 490 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 491 492 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { 493 if (ldev->ports > 2) 494 return -EINVAL; 495 return 0; 496 } 497 498 if (ldev->ports > 2) 499 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 500 501 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 502 503 return 0; 504 } 505 506 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, 507 struct lag_tracker *tracker, 508 enum mlx5_lag_mode mode, 509 unsigned long *flags) 510 { 511 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 512 513 if (mode == MLX5_LAG_MODE_MPESW) 514 return; 515 516 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && 517 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) 518 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 519 } 520 521 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 522 struct lag_tracker *tracker, bool shared_fdb, 523 unsigned long *flags) 524 { 525 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 526 527 *flags = 0; 528 if (shared_fdb) { 529 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 530 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 531 } 532 533 if (mode == MLX5_LAG_MODE_MPESW) 534 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 535 536 if (roce_lag) 537 return mlx5_lag_set_port_sel_mode_roce(ldev, flags); 538 539 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); 540 return 0; 541 } 542 543 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 544 { 545 int port_sel_mode = get_port_sel_mode(mode, flags); 546 547 switch (port_sel_mode) { 548 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 549 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 550 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 551 default: return "invalid"; 552 } 553 } 554 555 static int mlx5_create_lag(struct mlx5_lag *ldev, 556 struct lag_tracker *tracker, 557 enum mlx5_lag_mode mode, 558 unsigned long flags) 559 { 560 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 561 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 562 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 563 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 564 int err; 565 566 if (tracker) 567 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 568 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 569 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); 570 571 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); 572 if (err) { 573 mlx5_core_err(dev0, 574 "Failed to create LAG (%d)\n", 575 err); 576 return err; 577 } 578 579 if (shared_fdb) { 580 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, 581 dev1->priv.eswitch); 582 if (err) 583 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 584 else 585 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 586 } 587 588 if (err) { 589 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 590 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 591 mlx5_core_err(dev0, 592 "Failed to deactivate RoCE LAG; driver restart required\n"); 593 } 594 595 return err; 596 } 597 598 int mlx5_activate_lag(struct mlx5_lag *ldev, 599 struct lag_tracker *tracker, 600 enum mlx5_lag_mode mode, 601 bool shared_fdb) 602 { 603 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 604 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 605 unsigned long flags = 0; 606 int err; 607 608 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 609 if (err) 610 return err; 611 612 if (mode != MLX5_LAG_MODE_MPESW) { 613 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); 614 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 615 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 616 ldev->v2p_map); 617 if (err) { 618 mlx5_core_err(dev0, 619 "Failed to create LAG port selection(%d)\n", 620 err); 621 return err; 622 } 623 } 624 } 625 626 err = mlx5_create_lag(ldev, tracker, mode, flags); 627 if (err) { 628 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 629 mlx5_lag_port_sel_destroy(ldev); 630 if (roce_lag) 631 mlx5_core_err(dev0, 632 "Failed to activate RoCE LAG\n"); 633 else 634 mlx5_core_err(dev0, 635 "Failed to activate VF LAG\n" 636 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 637 return err; 638 } 639 640 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 641 !roce_lag) 642 mlx5_lag_drop_rule_setup(ldev, tracker); 643 644 ldev->mode = mode; 645 ldev->mode_flags = flags; 646 return 0; 647 } 648 649 static int mlx5_deactivate_lag(struct mlx5_lag *ldev) 650 { 651 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 652 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 653 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 654 bool roce_lag = __mlx5_lag_is_roce(ldev); 655 unsigned long flags = ldev->mode_flags; 656 int err; 657 658 ldev->mode = MLX5_LAG_MODE_NONE; 659 ldev->mode_flags = 0; 660 mlx5_lag_mp_reset(ldev); 661 662 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 663 mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, 664 dev1->priv.eswitch); 665 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 666 } 667 668 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 669 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 670 if (err) { 671 if (roce_lag) { 672 mlx5_core_err(dev0, 673 "Failed to deactivate RoCE LAG; driver restart required\n"); 674 } else { 675 mlx5_core_err(dev0, 676 "Failed to deactivate VF LAG; driver restart required\n" 677 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 678 } 679 return err; 680 } 681 682 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 683 mlx5_lag_port_sel_destroy(ldev); 684 if (mlx5_lag_has_drop_rule(ldev)) 685 mlx5_lag_drop_rule_cleanup(ldev); 686 687 return 0; 688 } 689 690 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 691 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 692 { 693 #ifdef CONFIG_MLX5_ESWITCH 694 struct mlx5_core_dev *dev; 695 u8 mode; 696 #endif 697 int i; 698 699 for (i = 0; i < ldev->ports; i++) 700 if (!ldev->pf[i].dev) 701 return false; 702 703 #ifdef CONFIG_MLX5_ESWITCH 704 dev = ldev->pf[MLX5_LAG_P1].dev; 705 if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev)) 706 return false; 707 708 mode = mlx5_eswitch_mode(dev); 709 for (i = 0; i < ldev->ports; i++) 710 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) 711 return false; 712 713 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) 714 return false; 715 #else 716 for (i = 0; i < ldev->ports; i++) 717 if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) 718 return false; 719 #endif 720 return true; 721 } 722 723 static void mlx5_lag_add_devices(struct mlx5_lag *ldev) 724 { 725 int i; 726 727 for (i = 0; i < ldev->ports; i++) { 728 if (!ldev->pf[i].dev) 729 continue; 730 731 if (ldev->pf[i].dev->priv.flags & 732 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 733 continue; 734 735 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 736 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 737 } 738 } 739 740 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 741 { 742 int i; 743 744 for (i = 0; i < ldev->ports; i++) { 745 if (!ldev->pf[i].dev) 746 continue; 747 748 if (ldev->pf[i].dev->priv.flags & 749 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 750 continue; 751 752 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 753 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 754 } 755 } 756 757 void mlx5_disable_lag(struct mlx5_lag *ldev) 758 { 759 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 760 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 761 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 762 bool roce_lag; 763 int err; 764 int i; 765 766 roce_lag = __mlx5_lag_is_roce(ldev); 767 768 if (shared_fdb) { 769 mlx5_lag_remove_devices(ldev); 770 } else if (roce_lag) { 771 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { 772 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 773 mlx5_rescan_drivers_locked(dev0); 774 } 775 for (i = 1; i < ldev->ports; i++) 776 mlx5_nic_vport_disable_roce(ldev->pf[i].dev); 777 } 778 779 err = mlx5_deactivate_lag(ldev); 780 if (err) 781 return; 782 783 if (shared_fdb || roce_lag) 784 mlx5_lag_add_devices(ldev); 785 786 if (shared_fdb) { 787 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 788 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 789 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 790 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 791 } 792 } 793 794 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) 795 { 796 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 797 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 798 799 if (is_mdev_switchdev_mode(dev0) && 800 is_mdev_switchdev_mode(dev1) && 801 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && 802 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && 803 mlx5_devcom_is_paired(dev0->priv.devcom, 804 MLX5_DEVCOM_ESW_OFFLOADS) && 805 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && 806 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && 807 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) 808 return true; 809 810 return false; 811 } 812 813 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 814 { 815 bool roce_lag = true; 816 int i; 817 818 for (i = 0; i < ldev->ports; i++) 819 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); 820 821 #ifdef CONFIG_MLX5_ESWITCH 822 for (i = 0; i < ldev->ports; i++) 823 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); 824 #endif 825 826 return roce_lag; 827 } 828 829 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 830 { 831 return do_bond && __mlx5_lag_is_active(ldev) && 832 ldev->mode != MLX5_LAG_MODE_MPESW; 833 } 834 835 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 836 { 837 return !do_bond && __mlx5_lag_is_active(ldev) && 838 ldev->mode != MLX5_LAG_MODE_MPESW; 839 } 840 841 static void mlx5_do_bond(struct mlx5_lag *ldev) 842 { 843 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 844 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 845 struct lag_tracker tracker = { }; 846 bool do_bond, roce_lag; 847 int err; 848 int i; 849 850 if (!mlx5_lag_is_ready(ldev)) { 851 do_bond = false; 852 } else { 853 /* VF LAG is in multipath mode, ignore bond change requests */ 854 if (mlx5_lag_is_multipath(dev0)) 855 return; 856 857 tracker = ldev->tracker; 858 859 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 860 } 861 862 if (do_bond && !__mlx5_lag_is_active(ldev)) { 863 bool shared_fdb = mlx5_shared_fdb_supported(ldev); 864 865 roce_lag = mlx5_lag_is_roce_lag(ldev); 866 867 if (shared_fdb || roce_lag) 868 mlx5_lag_remove_devices(ldev); 869 870 err = mlx5_activate_lag(ldev, &tracker, 871 roce_lag ? MLX5_LAG_MODE_ROCE : 872 MLX5_LAG_MODE_SRIOV, 873 shared_fdb); 874 if (err) { 875 if (shared_fdb || roce_lag) 876 mlx5_lag_add_devices(ldev); 877 878 return; 879 } else if (roce_lag) { 880 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 881 mlx5_rescan_drivers_locked(dev0); 882 for (i = 1; i < ldev->ports; i++) 883 mlx5_nic_vport_enable_roce(ldev->pf[i].dev); 884 } else if (shared_fdb) { 885 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 886 mlx5_rescan_drivers_locked(dev0); 887 888 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); 889 if (!err) 890 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); 891 892 if (err) { 893 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 894 mlx5_rescan_drivers_locked(dev0); 895 mlx5_deactivate_lag(ldev); 896 mlx5_lag_add_devices(ldev); 897 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 898 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 899 mlx5_core_err(dev0, "Failed to enable lag\n"); 900 return; 901 } 902 } 903 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 904 mlx5_modify_lag(ldev, &tracker); 905 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 906 mlx5_disable_lag(ldev); 907 } 908 } 909 910 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 911 { 912 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 913 } 914 915 static void mlx5_do_bond_work(struct work_struct *work) 916 { 917 struct delayed_work *delayed_work = to_delayed_work(work); 918 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 919 bond_work); 920 int status; 921 922 status = mlx5_dev_list_trylock(); 923 if (!status) { 924 mlx5_queue_bond_work(ldev, HZ); 925 return; 926 } 927 928 mutex_lock(&ldev->lock); 929 if (ldev->mode_changes_in_progress) { 930 mutex_unlock(&ldev->lock); 931 mlx5_dev_list_unlock(); 932 mlx5_queue_bond_work(ldev, HZ); 933 return; 934 } 935 936 mlx5_do_bond(ldev); 937 mutex_unlock(&ldev->lock); 938 mlx5_dev_list_unlock(); 939 } 940 941 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 942 struct lag_tracker *tracker, 943 struct netdev_notifier_changeupper_info *info) 944 { 945 struct net_device *upper = info->upper_dev, *ndev_tmp; 946 struct netdev_lag_upper_info *lag_upper_info = NULL; 947 bool is_bonded, is_in_lag, mode_supported; 948 bool has_inactive = 0; 949 struct slave *slave; 950 u8 bond_status = 0; 951 int num_slaves = 0; 952 int changed = 0; 953 int idx; 954 955 if (!netif_is_lag_master(upper)) 956 return 0; 957 958 if (info->linking) 959 lag_upper_info = info->upper_info; 960 961 /* The event may still be of interest if the slave does not belong to 962 * us, but is enslaved to a master which has one or more of our netdevs 963 * as slaves (e.g., if a new slave is added to a master that bonds two 964 * of our netdevs, we should unbond). 965 */ 966 rcu_read_lock(); 967 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 968 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 969 if (idx >= 0) { 970 slave = bond_slave_get_rcu(ndev_tmp); 971 if (slave) 972 has_inactive |= bond_is_slave_inactive(slave); 973 bond_status |= (1 << idx); 974 } 975 976 num_slaves++; 977 } 978 rcu_read_unlock(); 979 980 /* None of this lagdev's netdevs are slaves of this master. */ 981 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 982 return 0; 983 984 if (lag_upper_info) { 985 tracker->tx_type = lag_upper_info->tx_type; 986 tracker->hash_type = lag_upper_info->hash_type; 987 } 988 989 tracker->has_inactive = has_inactive; 990 /* Determine bonding status: 991 * A device is considered bonded if both its physical ports are slaves 992 * of the same lag master, and only them. 993 */ 994 is_in_lag = num_slaves == ldev->ports && 995 bond_status == GENMASK(ldev->ports - 1, 0); 996 997 /* Lag mode must be activebackup or hash. */ 998 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 999 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 1000 1001 is_bonded = is_in_lag && mode_supported; 1002 if (tracker->is_bonded != is_bonded) { 1003 tracker->is_bonded = is_bonded; 1004 changed = 1; 1005 } 1006 1007 if (!is_in_lag) 1008 return changed; 1009 1010 if (!mlx5_lag_is_ready(ldev)) 1011 NL_SET_ERR_MSG_MOD(info->info.extack, 1012 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 1013 else if (!mode_supported) 1014 NL_SET_ERR_MSG_MOD(info->info.extack, 1015 "Can't activate LAG offload, TX type isn't supported"); 1016 1017 return changed; 1018 } 1019 1020 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 1021 struct lag_tracker *tracker, 1022 struct net_device *ndev, 1023 struct netdev_notifier_changelowerstate_info *info) 1024 { 1025 struct netdev_lag_lower_state_info *lag_lower_info; 1026 int idx; 1027 1028 if (!netif_is_lag_port(ndev)) 1029 return 0; 1030 1031 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 1032 if (idx < 0) 1033 return 0; 1034 1035 /* This information is used to determine virtual to physical 1036 * port mapping. 1037 */ 1038 lag_lower_info = info->lower_state_info; 1039 if (!lag_lower_info) 1040 return 0; 1041 1042 tracker->netdev_state[idx] = *lag_lower_info; 1043 1044 return 1; 1045 } 1046 1047 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 1048 struct lag_tracker *tracker, 1049 struct net_device *ndev) 1050 { 1051 struct net_device *ndev_tmp; 1052 struct slave *slave; 1053 bool has_inactive = 0; 1054 int idx; 1055 1056 if (!netif_is_lag_master(ndev)) 1057 return 0; 1058 1059 rcu_read_lock(); 1060 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 1061 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1062 if (idx < 0) 1063 continue; 1064 1065 slave = bond_slave_get_rcu(ndev_tmp); 1066 if (slave) 1067 has_inactive |= bond_is_slave_inactive(slave); 1068 } 1069 rcu_read_unlock(); 1070 1071 if (tracker->has_inactive == has_inactive) 1072 return 0; 1073 1074 tracker->has_inactive = has_inactive; 1075 1076 return 1; 1077 } 1078 1079 /* this handler is always registered to netdev events */ 1080 static int mlx5_lag_netdev_event(struct notifier_block *this, 1081 unsigned long event, void *ptr) 1082 { 1083 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 1084 struct lag_tracker tracker; 1085 struct mlx5_lag *ldev; 1086 int changed = 0; 1087 1088 if (event != NETDEV_CHANGEUPPER && 1089 event != NETDEV_CHANGELOWERSTATE && 1090 event != NETDEV_CHANGEINFODATA) 1091 return NOTIFY_DONE; 1092 1093 ldev = container_of(this, struct mlx5_lag, nb); 1094 1095 tracker = ldev->tracker; 1096 1097 switch (event) { 1098 case NETDEV_CHANGEUPPER: 1099 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 1100 break; 1101 case NETDEV_CHANGELOWERSTATE: 1102 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 1103 ndev, ptr); 1104 break; 1105 case NETDEV_CHANGEINFODATA: 1106 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 1107 break; 1108 } 1109 1110 ldev->tracker = tracker; 1111 1112 if (changed) 1113 mlx5_queue_bond_work(ldev, 0); 1114 1115 return NOTIFY_DONE; 1116 } 1117 1118 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 1119 struct mlx5_core_dev *dev, 1120 struct net_device *netdev) 1121 { 1122 unsigned int fn = mlx5_get_dev_index(dev); 1123 unsigned long flags; 1124 1125 if (fn >= ldev->ports) 1126 return; 1127 1128 spin_lock_irqsave(&lag_lock, flags); 1129 ldev->pf[fn].netdev = netdev; 1130 ldev->tracker.netdev_state[fn].link_up = 0; 1131 ldev->tracker.netdev_state[fn].tx_enabled = 0; 1132 spin_unlock_irqrestore(&lag_lock, flags); 1133 } 1134 1135 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 1136 struct net_device *netdev) 1137 { 1138 unsigned long flags; 1139 int i; 1140 1141 spin_lock_irqsave(&lag_lock, flags); 1142 for (i = 0; i < ldev->ports; i++) { 1143 if (ldev->pf[i].netdev == netdev) { 1144 ldev->pf[i].netdev = NULL; 1145 break; 1146 } 1147 } 1148 spin_unlock_irqrestore(&lag_lock, flags); 1149 } 1150 1151 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 1152 struct mlx5_core_dev *dev) 1153 { 1154 unsigned int fn = mlx5_get_dev_index(dev); 1155 1156 if (fn >= ldev->ports) 1157 return; 1158 1159 ldev->pf[fn].dev = dev; 1160 dev->priv.lag = ldev; 1161 } 1162 1163 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1164 struct mlx5_core_dev *dev) 1165 { 1166 int i; 1167 1168 for (i = 0; i < ldev->ports; i++) 1169 if (ldev->pf[i].dev == dev) 1170 break; 1171 1172 if (i == ldev->ports) 1173 return; 1174 1175 ldev->pf[i].dev = NULL; 1176 dev->priv.lag = NULL; 1177 } 1178 1179 /* Must be called with intf_mutex held */ 1180 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 1181 { 1182 struct mlx5_lag *ldev = NULL; 1183 struct mlx5_core_dev *tmp_dev; 1184 1185 tmp_dev = mlx5_get_next_phys_dev_lag(dev); 1186 if (tmp_dev) 1187 ldev = tmp_dev->priv.lag; 1188 1189 if (!ldev) { 1190 ldev = mlx5_lag_dev_alloc(dev); 1191 if (!ldev) { 1192 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 1193 return 0; 1194 } 1195 mlx5_ldev_add_mdev(ldev, dev); 1196 return 0; 1197 } 1198 1199 mutex_lock(&ldev->lock); 1200 if (ldev->mode_changes_in_progress) { 1201 mutex_unlock(&ldev->lock); 1202 return -EAGAIN; 1203 } 1204 mlx5_ldev_get(ldev); 1205 mlx5_ldev_add_mdev(ldev, dev); 1206 mutex_unlock(&ldev->lock); 1207 1208 return 0; 1209 } 1210 1211 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 1212 { 1213 struct mlx5_lag *ldev; 1214 1215 ldev = mlx5_lag_dev(dev); 1216 if (!ldev) 1217 return; 1218 1219 /* mdev is being removed, might as well remove debugfs 1220 * as early as possible. 1221 */ 1222 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 1223 recheck: 1224 mutex_lock(&ldev->lock); 1225 if (ldev->mode_changes_in_progress) { 1226 mutex_unlock(&ldev->lock); 1227 msleep(100); 1228 goto recheck; 1229 } 1230 mlx5_ldev_remove_mdev(ldev, dev); 1231 mutex_unlock(&ldev->lock); 1232 mlx5_ldev_put(ldev); 1233 } 1234 1235 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 1236 { 1237 int err; 1238 1239 if (!MLX5_CAP_GEN(dev, vport_group_manager) || 1240 !MLX5_CAP_GEN(dev, lag_master) || 1241 (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || 1242 MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) 1243 return; 1244 1245 recheck: 1246 mlx5_dev_list_lock(); 1247 err = __mlx5_lag_dev_add_mdev(dev); 1248 mlx5_dev_list_unlock(); 1249 1250 if (err) { 1251 msleep(100); 1252 goto recheck; 1253 } 1254 mlx5_ldev_add_debugfs(dev); 1255 } 1256 1257 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 1258 struct net_device *netdev) 1259 { 1260 struct mlx5_lag *ldev; 1261 bool lag_is_active; 1262 1263 ldev = mlx5_lag_dev(dev); 1264 if (!ldev) 1265 return; 1266 1267 mutex_lock(&ldev->lock); 1268 mlx5_ldev_remove_netdev(ldev, netdev); 1269 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1270 1271 lag_is_active = __mlx5_lag_is_active(ldev); 1272 mutex_unlock(&ldev->lock); 1273 1274 if (lag_is_active) 1275 mlx5_queue_bond_work(ldev, 0); 1276 } 1277 1278 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 1279 struct net_device *netdev) 1280 { 1281 struct mlx5_lag *ldev; 1282 int i; 1283 1284 ldev = mlx5_lag_dev(dev); 1285 if (!ldev) 1286 return; 1287 1288 mutex_lock(&ldev->lock); 1289 mlx5_ldev_add_netdev(ldev, dev, netdev); 1290 1291 for (i = 0; i < ldev->ports; i++) 1292 if (!ldev->pf[i].netdev) 1293 break; 1294 1295 if (i >= ldev->ports) 1296 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1297 mutex_unlock(&ldev->lock); 1298 mlx5_queue_bond_work(ldev, 0); 1299 } 1300 1301 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 1302 { 1303 struct mlx5_lag *ldev; 1304 unsigned long flags; 1305 bool res; 1306 1307 spin_lock_irqsave(&lag_lock, flags); 1308 ldev = mlx5_lag_dev(dev); 1309 res = ldev && __mlx5_lag_is_roce(ldev); 1310 spin_unlock_irqrestore(&lag_lock, flags); 1311 1312 return res; 1313 } 1314 EXPORT_SYMBOL(mlx5_lag_is_roce); 1315 1316 bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 1317 { 1318 struct mlx5_lag *ldev; 1319 unsigned long flags; 1320 bool res; 1321 1322 spin_lock_irqsave(&lag_lock, flags); 1323 ldev = mlx5_lag_dev(dev); 1324 res = ldev && __mlx5_lag_is_active(ldev); 1325 spin_unlock_irqrestore(&lag_lock, flags); 1326 1327 return res; 1328 } 1329 EXPORT_SYMBOL(mlx5_lag_is_active); 1330 1331 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) 1332 { 1333 struct mlx5_lag *ldev; 1334 unsigned long flags; 1335 bool res = 0; 1336 1337 spin_lock_irqsave(&lag_lock, flags); 1338 ldev = mlx5_lag_dev(dev); 1339 if (ldev) 1340 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); 1341 spin_unlock_irqrestore(&lag_lock, flags); 1342 1343 return res; 1344 } 1345 EXPORT_SYMBOL(mlx5_lag_mode_is_hash); 1346 1347 bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 1348 { 1349 struct mlx5_lag *ldev; 1350 unsigned long flags; 1351 bool res; 1352 1353 spin_lock_irqsave(&lag_lock, flags); 1354 ldev = mlx5_lag_dev(dev); 1355 res = ldev && __mlx5_lag_is_active(ldev) && 1356 dev == ldev->pf[MLX5_LAG_P1].dev; 1357 spin_unlock_irqrestore(&lag_lock, flags); 1358 1359 return res; 1360 } 1361 EXPORT_SYMBOL(mlx5_lag_is_master); 1362 1363 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 1364 { 1365 struct mlx5_lag *ldev; 1366 unsigned long flags; 1367 bool res; 1368 1369 spin_lock_irqsave(&lag_lock, flags); 1370 ldev = mlx5_lag_dev(dev); 1371 res = ldev && __mlx5_lag_is_sriov(ldev); 1372 spin_unlock_irqrestore(&lag_lock, flags); 1373 1374 return res; 1375 } 1376 EXPORT_SYMBOL(mlx5_lag_is_sriov); 1377 1378 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 1379 { 1380 struct mlx5_lag *ldev; 1381 unsigned long flags; 1382 bool res; 1383 1384 spin_lock_irqsave(&lag_lock, flags); 1385 ldev = mlx5_lag_dev(dev); 1386 res = ldev && __mlx5_lag_is_sriov(ldev) && 1387 test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1388 spin_unlock_irqrestore(&lag_lock, flags); 1389 1390 return res; 1391 } 1392 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 1393 1394 void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 1395 { 1396 struct mlx5_lag *ldev; 1397 1398 ldev = mlx5_lag_dev(dev); 1399 if (!ldev) 1400 return; 1401 1402 mlx5_dev_list_lock(); 1403 mutex_lock(&ldev->lock); 1404 1405 ldev->mode_changes_in_progress++; 1406 if (__mlx5_lag_is_active(ldev)) 1407 mlx5_disable_lag(ldev); 1408 1409 mutex_unlock(&ldev->lock); 1410 mlx5_dev_list_unlock(); 1411 } 1412 1413 void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 1414 { 1415 struct mlx5_lag *ldev; 1416 1417 ldev = mlx5_lag_dev(dev); 1418 if (!ldev) 1419 return; 1420 1421 mutex_lock(&ldev->lock); 1422 ldev->mode_changes_in_progress--; 1423 mutex_unlock(&ldev->lock); 1424 mlx5_queue_bond_work(ldev, 0); 1425 } 1426 1427 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) 1428 { 1429 struct net_device *ndev = NULL; 1430 struct mlx5_lag *ldev; 1431 unsigned long flags; 1432 int i; 1433 1434 spin_lock_irqsave(&lag_lock, flags); 1435 ldev = mlx5_lag_dev(dev); 1436 1437 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1438 goto unlock; 1439 1440 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1441 for (i = 0; i < ldev->ports; i++) 1442 if (ldev->tracker.netdev_state[i].tx_enabled) 1443 ndev = ldev->pf[i].netdev; 1444 if (!ndev) 1445 ndev = ldev->pf[ldev->ports - 1].netdev; 1446 } else { 1447 ndev = ldev->pf[MLX5_LAG_P1].netdev; 1448 } 1449 if (ndev) 1450 dev_hold(ndev); 1451 1452 unlock: 1453 spin_unlock_irqrestore(&lag_lock, flags); 1454 1455 return ndev; 1456 } 1457 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); 1458 1459 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 1460 struct net_device *slave) 1461 { 1462 struct mlx5_lag *ldev; 1463 unsigned long flags; 1464 u8 port = 0; 1465 int i; 1466 1467 spin_lock_irqsave(&lag_lock, flags); 1468 ldev = mlx5_lag_dev(dev); 1469 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1470 goto unlock; 1471 1472 for (i = 0; i < ldev->ports; i++) { 1473 if (ldev->pf[MLX5_LAG_P1].netdev == slave) { 1474 port = i; 1475 break; 1476 } 1477 } 1478 1479 port = ldev->v2p_map[port * ldev->buckets]; 1480 1481 unlock: 1482 spin_unlock_irqrestore(&lag_lock, flags); 1483 return port; 1484 } 1485 EXPORT_SYMBOL(mlx5_lag_get_slave_port); 1486 1487 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 1488 { 1489 struct mlx5_lag *ldev; 1490 1491 ldev = mlx5_lag_dev(dev); 1492 if (!ldev) 1493 return 0; 1494 1495 return ldev->ports; 1496 } 1497 EXPORT_SYMBOL(mlx5_lag_get_num_ports); 1498 1499 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) 1500 { 1501 struct mlx5_core_dev *peer_dev = NULL; 1502 struct mlx5_lag *ldev; 1503 unsigned long flags; 1504 1505 spin_lock_irqsave(&lag_lock, flags); 1506 ldev = mlx5_lag_dev(dev); 1507 if (!ldev) 1508 goto unlock; 1509 1510 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? 1511 ldev->pf[MLX5_LAG_P2].dev : 1512 ldev->pf[MLX5_LAG_P1].dev; 1513 1514 unlock: 1515 spin_unlock_irqrestore(&lag_lock, flags); 1516 return peer_dev; 1517 } 1518 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); 1519 1520 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 1521 u64 *values, 1522 int num_counters, 1523 size_t *offsets) 1524 { 1525 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 1526 struct mlx5_core_dev **mdev; 1527 struct mlx5_lag *ldev; 1528 unsigned long flags; 1529 int num_ports; 1530 int ret, i, j; 1531 void *out; 1532 1533 out = kvzalloc(outlen, GFP_KERNEL); 1534 if (!out) 1535 return -ENOMEM; 1536 1537 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 1538 if (!mdev) { 1539 ret = -ENOMEM; 1540 goto free_out; 1541 } 1542 1543 memset(values, 0, sizeof(*values) * num_counters); 1544 1545 spin_lock_irqsave(&lag_lock, flags); 1546 ldev = mlx5_lag_dev(dev); 1547 if (ldev && __mlx5_lag_is_active(ldev)) { 1548 num_ports = ldev->ports; 1549 for (i = 0; i < ldev->ports; i++) 1550 mdev[i] = ldev->pf[i].dev; 1551 } else { 1552 num_ports = 1; 1553 mdev[MLX5_LAG_P1] = dev; 1554 } 1555 spin_unlock_irqrestore(&lag_lock, flags); 1556 1557 for (i = 0; i < num_ports; ++i) { 1558 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 1559 1560 MLX5_SET(query_cong_statistics_in, in, opcode, 1561 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 1562 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 1563 out); 1564 if (ret) 1565 goto free_mdev; 1566 1567 for (j = 0; j < num_counters; ++j) 1568 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 1569 } 1570 1571 free_mdev: 1572 kvfree(mdev); 1573 free_out: 1574 kvfree(out); 1575 return ret; 1576 } 1577 EXPORT_SYMBOL(mlx5_lag_query_cong_counters); 1578