1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/vport.h>
36 #include "mlx5_core.h"
37 #include "eswitch.h"
38 #include "lag.h"
39 #include "lag_mp.h"
40 
41 /* General purpose, use for short periods of time.
42  * Beware of lock dependencies (preferably, no locks should be acquired
43  * under it).
44  */
45 static DEFINE_SPINLOCK(lag_lock);
46 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)47 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
48 			       u8 remap_port2)
49 {
50 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
51 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
52 
53 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
54 
55 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
56 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
57 
58 	return mlx5_cmd_exec_in(dev, create_lag, in);
59 }
60 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,u8 remap_port1,u8 remap_port2)61 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
62 			       u8 remap_port2)
63 {
64 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
65 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
66 
67 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
68 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
69 
70 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
71 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
72 
73 	return mlx5_cmd_exec_in(dev, modify_lag, in);
74 }
75 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)76 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
77 {
78 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
79 
80 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
81 
82 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
83 }
84 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
85 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)86 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
87 {
88 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
89 
90 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
91 
92 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
93 }
94 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
95 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)96 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
97 				struct net_device *ndev)
98 {
99 	int i;
100 
101 	for (i = 0; i < MLX5_MAX_PORTS; i++)
102 		if (ldev->pf[i].netdev == ndev)
103 			return i;
104 
105 	return -ENOENT;
106 }
107 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)108 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
109 {
110 	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
111 }
112 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)113 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
114 {
115 	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
116 }
117 
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,u8 * port1,u8 * port2)118 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
119 					   u8 *port1, u8 *port2)
120 {
121 	*port1 = 1;
122 	*port2 = 2;
123 	if (!tracker->netdev_state[MLX5_LAG_P1].tx_enabled ||
124 	    !tracker->netdev_state[MLX5_LAG_P1].link_up) {
125 		*port1 = 2;
126 		return;
127 	}
128 
129 	if (!tracker->netdev_state[MLX5_LAG_P2].tx_enabled ||
130 	    !tracker->netdev_state[MLX5_LAG_P2].link_up)
131 		*port2 = 1;
132 }
133 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)134 void mlx5_modify_lag(struct mlx5_lag *ldev,
135 		     struct lag_tracker *tracker)
136 {
137 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
138 	u8 v2p_port1, v2p_port2;
139 	int err;
140 
141 	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
142 				       &v2p_port2);
143 
144 	if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
145 	    v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
146 		ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
147 		ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
148 
149 		mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
150 			       ldev->v2p_map[MLX5_LAG_P1],
151 			       ldev->v2p_map[MLX5_LAG_P2]);
152 
153 		err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
154 		if (err)
155 			mlx5_core_err(dev0,
156 				      "Failed to modify LAG (%d)\n",
157 				      err);
158 	}
159 }
160 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)161 static int mlx5_create_lag(struct mlx5_lag *ldev,
162 			   struct lag_tracker *tracker)
163 {
164 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
165 	int err;
166 
167 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
168 				       &ldev->v2p_map[MLX5_LAG_P2]);
169 
170 	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
171 		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
172 
173 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
174 				  ldev->v2p_map[MLX5_LAG_P2]);
175 	if (err)
176 		mlx5_core_err(dev0,
177 			      "Failed to create LAG (%d)\n",
178 			      err);
179 	return err;
180 }
181 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,u8 flags)182 int mlx5_activate_lag(struct mlx5_lag *ldev,
183 		      struct lag_tracker *tracker,
184 		      u8 flags)
185 {
186 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
187 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
188 	int err;
189 
190 	err = mlx5_create_lag(ldev, tracker);
191 	if (err) {
192 		if (roce_lag) {
193 			mlx5_core_err(dev0,
194 				      "Failed to activate RoCE LAG\n");
195 		} else {
196 			mlx5_core_err(dev0,
197 				      "Failed to activate VF LAG\n"
198 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
199 		}
200 		return err;
201 	}
202 
203 	ldev->flags |= flags;
204 	return 0;
205 }
206 
mlx5_deactivate_lag(struct mlx5_lag * ldev)207 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
208 {
209 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
210 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
211 	bool roce_lag = __mlx5_lag_is_roce(ldev);
212 	int err;
213 
214 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
215 
216 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
217 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
218 	if (err) {
219 		if (roce_lag) {
220 			mlx5_core_err(dev0,
221 				      "Failed to deactivate RoCE LAG; driver restart required\n");
222 		} else {
223 			mlx5_core_err(dev0,
224 				      "Failed to deactivate VF LAG; driver restart required\n"
225 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
226 		}
227 	}
228 
229 	return err;
230 }
231 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)232 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
233 {
234 	if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
235 		return false;
236 
237 #ifdef CONFIG_MLX5_ESWITCH
238 	return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
239 				   ldev->pf[MLX5_LAG_P2].dev);
240 #else
241 	return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
242 		!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
243 #endif
244 }
245 
mlx5_lag_add_devices(struct mlx5_lag * ldev)246 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
247 {
248 	int i;
249 
250 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
251 		if (!ldev->pf[i].dev)
252 			continue;
253 
254 		ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
255 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
256 	}
257 }
258 
mlx5_lag_remove_devices(struct mlx5_lag * ldev)259 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
260 {
261 	int i;
262 
263 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
264 		if (!ldev->pf[i].dev)
265 			continue;
266 
267 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
268 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
269 	}
270 }
271 
mlx5_do_bond(struct mlx5_lag * ldev)272 static void mlx5_do_bond(struct mlx5_lag *ldev)
273 {
274 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
275 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
276 	struct lag_tracker tracker;
277 	bool do_bond, roce_lag;
278 	int err;
279 
280 	if (!mlx5_lag_is_ready(ldev))
281 		return;
282 
283 	spin_lock(&lag_lock);
284 	tracker = ldev->tracker;
285 	spin_unlock(&lag_lock);
286 
287 	do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
288 
289 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
290 		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
291 			   !mlx5_sriov_is_enabled(dev1);
292 
293 #ifdef CONFIG_MLX5_ESWITCH
294 		roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
295 			    dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
296 #endif
297 
298 		if (roce_lag)
299 			mlx5_lag_remove_devices(ldev);
300 
301 		err = mlx5_activate_lag(ldev, &tracker,
302 					roce_lag ? MLX5_LAG_FLAG_ROCE :
303 					MLX5_LAG_FLAG_SRIOV);
304 		if (err) {
305 			if (roce_lag)
306 				mlx5_lag_add_devices(ldev);
307 
308 			return;
309 		}
310 
311 		if (roce_lag) {
312 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
313 			mlx5_rescan_drivers_locked(dev0);
314 			mlx5_nic_vport_enable_roce(dev1);
315 		}
316 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
317 		mlx5_modify_lag(ldev, &tracker);
318 	} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
319 		roce_lag = __mlx5_lag_is_roce(ldev);
320 
321 		if (roce_lag) {
322 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
323 			mlx5_rescan_drivers_locked(dev0);
324 			mlx5_nic_vport_disable_roce(dev1);
325 		}
326 
327 		err = mlx5_deactivate_lag(ldev);
328 		if (err)
329 			return;
330 
331 		if (roce_lag)
332 			mlx5_lag_add_devices(ldev);
333 	}
334 }
335 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)336 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
337 {
338 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
339 }
340 
mlx5_do_bond_work(struct work_struct * work)341 static void mlx5_do_bond_work(struct work_struct *work)
342 {
343 	struct delayed_work *delayed_work = to_delayed_work(work);
344 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
345 					     bond_work);
346 	int status;
347 
348 	status = mlx5_dev_list_trylock();
349 	if (!status) {
350 		/* 1 sec delay. */
351 		mlx5_queue_bond_work(ldev, HZ);
352 		return;
353 	}
354 
355 	mlx5_do_bond(ldev);
356 	mlx5_dev_list_unlock();
357 }
358 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changeupper_info * info)359 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
360 					 struct lag_tracker *tracker,
361 					 struct net_device *ndev,
362 					 struct netdev_notifier_changeupper_info *info)
363 {
364 	struct net_device *upper = info->upper_dev, *ndev_tmp;
365 	struct netdev_lag_upper_info *lag_upper_info = NULL;
366 	bool is_bonded, is_in_lag, mode_supported;
367 	int bond_status = 0;
368 	int num_slaves = 0;
369 	int idx;
370 
371 	if (!netif_is_lag_master(upper))
372 		return 0;
373 
374 	if (info->linking)
375 		lag_upper_info = info->upper_info;
376 
377 	/* The event may still be of interest if the slave does not belong to
378 	 * us, but is enslaved to a master which has one or more of our netdevs
379 	 * as slaves (e.g., if a new slave is added to a master that bonds two
380 	 * of our netdevs, we should unbond).
381 	 */
382 	rcu_read_lock();
383 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
384 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
385 		if (idx >= 0)
386 			bond_status |= (1 << idx);
387 
388 		num_slaves++;
389 	}
390 	rcu_read_unlock();
391 
392 	/* None of this lagdev's netdevs are slaves of this master. */
393 	if (!(bond_status & 0x3))
394 		return 0;
395 
396 	if (lag_upper_info)
397 		tracker->tx_type = lag_upper_info->tx_type;
398 
399 	/* Determine bonding status:
400 	 * A device is considered bonded if both its physical ports are slaves
401 	 * of the same lag master, and only them.
402 	 */
403 	is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
404 
405 	if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
406 		NL_SET_ERR_MSG_MOD(info->info.extack,
407 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
408 		return 0;
409 	}
410 
411 	/* Lag mode must be activebackup or hash. */
412 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
413 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
414 
415 	if (is_in_lag && !mode_supported)
416 		NL_SET_ERR_MSG_MOD(info->info.extack,
417 				   "Can't activate LAG offload, TX type isn't supported");
418 
419 	is_bonded = is_in_lag && mode_supported;
420 	if (tracker->is_bonded != is_bonded) {
421 		tracker->is_bonded = is_bonded;
422 		return 1;
423 	}
424 
425 	return 0;
426 }
427 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)428 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
429 					      struct lag_tracker *tracker,
430 					      struct net_device *ndev,
431 					      struct netdev_notifier_changelowerstate_info *info)
432 {
433 	struct netdev_lag_lower_state_info *lag_lower_info;
434 	int idx;
435 
436 	if (!netif_is_lag_port(ndev))
437 		return 0;
438 
439 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
440 	if (idx < 0)
441 		return 0;
442 
443 	/* This information is used to determine virtual to physical
444 	 * port mapping.
445 	 */
446 	lag_lower_info = info->lower_state_info;
447 	if (!lag_lower_info)
448 		return 0;
449 
450 	tracker->netdev_state[idx] = *lag_lower_info;
451 
452 	return 1;
453 }
454 
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)455 static int mlx5_lag_netdev_event(struct notifier_block *this,
456 				 unsigned long event, void *ptr)
457 {
458 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
459 	struct lag_tracker tracker;
460 	struct mlx5_lag *ldev;
461 	int changed = 0;
462 
463 	if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
464 		return NOTIFY_DONE;
465 
466 	ldev    = container_of(this, struct mlx5_lag, nb);
467 
468 	if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
469 		return NOTIFY_DONE;
470 
471 	tracker = ldev->tracker;
472 
473 	switch (event) {
474 	case NETDEV_CHANGEUPPER:
475 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
476 							ptr);
477 		break;
478 	case NETDEV_CHANGELOWERSTATE:
479 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
480 							     ndev, ptr);
481 		break;
482 	}
483 
484 	spin_lock(&lag_lock);
485 	ldev->tracker = tracker;
486 	spin_unlock(&lag_lock);
487 
488 	if (changed)
489 		mlx5_queue_bond_work(ldev, 0);
490 
491 	return NOTIFY_DONE;
492 }
493 
mlx5_lag_dev_alloc(void)494 static struct mlx5_lag *mlx5_lag_dev_alloc(void)
495 {
496 	struct mlx5_lag *ldev;
497 
498 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
499 	if (!ldev)
500 		return NULL;
501 
502 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
503 	if (!ldev->wq) {
504 		kfree(ldev);
505 		return NULL;
506 	}
507 
508 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
509 
510 	return ldev;
511 }
512 
mlx5_lag_dev_free(struct mlx5_lag * ldev)513 static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
514 {
515 	destroy_workqueue(ldev->wq);
516 	kfree(ldev);
517 }
518 
mlx5_lag_dev_add_pf(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)519 static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
520 			       struct mlx5_core_dev *dev,
521 			       struct net_device *netdev)
522 {
523 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
524 
525 	if (fn >= MLX5_MAX_PORTS)
526 		return -EPERM;
527 
528 	spin_lock(&lag_lock);
529 	ldev->pf[fn].dev    = dev;
530 	ldev->pf[fn].netdev = netdev;
531 	ldev->tracker.netdev_state[fn].link_up = 0;
532 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
533 
534 	dev->priv.lag = ldev;
535 
536 	spin_unlock(&lag_lock);
537 
538 	return fn;
539 }
540 
mlx5_lag_dev_remove_pf(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)541 static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
542 				   struct mlx5_core_dev *dev)
543 {
544 	int i;
545 
546 	for (i = 0; i < MLX5_MAX_PORTS; i++)
547 		if (ldev->pf[i].dev == dev)
548 			break;
549 
550 	if (i == MLX5_MAX_PORTS)
551 		return;
552 
553 	spin_lock(&lag_lock);
554 	memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
555 
556 	dev->priv.lag = NULL;
557 	spin_unlock(&lag_lock);
558 }
559 
560 /* Must be called with intf_mutex held */
mlx5_lag_add(struct mlx5_core_dev * dev,struct net_device * netdev)561 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
562 {
563 	struct mlx5_lag *ldev = NULL;
564 	struct mlx5_core_dev *tmp_dev;
565 	int i, err;
566 
567 	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
568 	    !MLX5_CAP_GEN(dev, lag_master) ||
569 	    MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
570 		return;
571 
572 	tmp_dev = mlx5_get_next_phys_dev(dev);
573 	if (tmp_dev)
574 		ldev = tmp_dev->priv.lag;
575 
576 	if (!ldev) {
577 		ldev = mlx5_lag_dev_alloc();
578 		if (!ldev) {
579 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
580 			return;
581 		}
582 	}
583 
584 	if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
585 		return;
586 
587 	for (i = 0; i < MLX5_MAX_PORTS; i++)
588 		if (!ldev->pf[i].dev)
589 			break;
590 
591 	if (i >= MLX5_MAX_PORTS)
592 		ldev->flags |= MLX5_LAG_FLAG_READY;
593 
594 	if (!ldev->nb.notifier_call) {
595 		ldev->nb.notifier_call = mlx5_lag_netdev_event;
596 		if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
597 			ldev->nb.notifier_call = NULL;
598 			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
599 		}
600 	}
601 
602 	err = mlx5_lag_mp_init(ldev);
603 	if (err)
604 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
605 			      err);
606 }
607 
608 /* Must be called with intf_mutex held */
mlx5_lag_remove(struct mlx5_core_dev * dev)609 void mlx5_lag_remove(struct mlx5_core_dev *dev)
610 {
611 	struct mlx5_lag *ldev;
612 	int i;
613 
614 	ldev = mlx5_lag_dev_get(dev);
615 	if (!ldev)
616 		return;
617 
618 	if (__mlx5_lag_is_active(ldev))
619 		mlx5_deactivate_lag(ldev);
620 
621 	mlx5_lag_dev_remove_pf(ldev, dev);
622 
623 	ldev->flags &= ~MLX5_LAG_FLAG_READY;
624 
625 	for (i = 0; i < MLX5_MAX_PORTS; i++)
626 		if (ldev->pf[i].dev)
627 			break;
628 
629 	if (i == MLX5_MAX_PORTS) {
630 		if (ldev->nb.notifier_call) {
631 			unregister_netdevice_notifier_net(&init_net, &ldev->nb);
632 			ldev->nb.notifier_call = NULL;
633 		}
634 		mlx5_lag_mp_cleanup(ldev);
635 		cancel_delayed_work_sync(&ldev->bond_work);
636 		mlx5_lag_dev_free(ldev);
637 	}
638 }
639 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)640 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
641 {
642 	struct mlx5_lag *ldev;
643 	bool res;
644 
645 	spin_lock(&lag_lock);
646 	ldev = mlx5_lag_dev_get(dev);
647 	res  = ldev && __mlx5_lag_is_roce(ldev);
648 	spin_unlock(&lag_lock);
649 
650 	return res;
651 }
652 EXPORT_SYMBOL(mlx5_lag_is_roce);
653 
mlx5_lag_is_active(struct mlx5_core_dev * dev)654 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
655 {
656 	struct mlx5_lag *ldev;
657 	bool res;
658 
659 	spin_lock(&lag_lock);
660 	ldev = mlx5_lag_dev_get(dev);
661 	res  = ldev && __mlx5_lag_is_active(ldev);
662 	spin_unlock(&lag_lock);
663 
664 	return res;
665 }
666 EXPORT_SYMBOL(mlx5_lag_is_active);
667 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)668 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
669 {
670 	struct mlx5_lag *ldev;
671 	bool res;
672 
673 	spin_lock(&lag_lock);
674 	ldev = mlx5_lag_dev_get(dev);
675 	res  = ldev && __mlx5_lag_is_sriov(ldev);
676 	spin_unlock(&lag_lock);
677 
678 	return res;
679 }
680 EXPORT_SYMBOL(mlx5_lag_is_sriov);
681 
mlx5_lag_update(struct mlx5_core_dev * dev)682 void mlx5_lag_update(struct mlx5_core_dev *dev)
683 {
684 	struct mlx5_lag *ldev;
685 
686 	mlx5_dev_list_lock();
687 	ldev = mlx5_lag_dev_get(dev);
688 	if (!ldev)
689 		goto unlock;
690 
691 	mlx5_do_bond(ldev);
692 
693 unlock:
694 	mlx5_dev_list_unlock();
695 }
696 
mlx5_lag_get_roce_netdev(struct mlx5_core_dev * dev)697 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
698 {
699 	struct net_device *ndev = NULL;
700 	struct mlx5_lag *ldev;
701 
702 	spin_lock(&lag_lock);
703 	ldev = mlx5_lag_dev_get(dev);
704 
705 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
706 		goto unlock;
707 
708 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
709 		ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
710 		       ldev->pf[MLX5_LAG_P1].netdev :
711 		       ldev->pf[MLX5_LAG_P2].netdev;
712 	} else {
713 		ndev = ldev->pf[MLX5_LAG_P1].netdev;
714 	}
715 	if (ndev)
716 		dev_hold(ndev);
717 
718 unlock:
719 	spin_unlock(&lag_lock);
720 
721 	return ndev;
722 }
723 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
724 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)725 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
726 			   struct net_device *slave)
727 {
728 	struct mlx5_lag *ldev;
729 	u8 port = 0;
730 
731 	spin_lock(&lag_lock);
732 	ldev = mlx5_lag_dev_get(dev);
733 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
734 		goto unlock;
735 
736 	if (ldev->pf[MLX5_LAG_P1].netdev == slave)
737 		port = MLX5_LAG_P1;
738 	else
739 		port = MLX5_LAG_P2;
740 
741 	port = ldev->v2p_map[port];
742 
743 unlock:
744 	spin_unlock(&lag_lock);
745 	return port;
746 }
747 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
748 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)749 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
750 				 u64 *values,
751 				 int num_counters,
752 				 size_t *offsets)
753 {
754 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
755 	struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
756 	struct mlx5_lag *ldev;
757 	int num_ports;
758 	int ret, i, j;
759 	void *out;
760 
761 	out = kvzalloc(outlen, GFP_KERNEL);
762 	if (!out)
763 		return -ENOMEM;
764 
765 	memset(values, 0, sizeof(*values) * num_counters);
766 
767 	spin_lock(&lag_lock);
768 	ldev = mlx5_lag_dev_get(dev);
769 	if (ldev && __mlx5_lag_is_active(ldev)) {
770 		num_ports = MLX5_MAX_PORTS;
771 		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
772 		mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
773 	} else {
774 		num_ports = 1;
775 		mdev[MLX5_LAG_P1] = dev;
776 	}
777 	spin_unlock(&lag_lock);
778 
779 	for (i = 0; i < num_ports; ++i) {
780 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
781 
782 		MLX5_SET(query_cong_statistics_in, in, opcode,
783 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
784 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
785 					  out);
786 		if (ret)
787 			goto free;
788 
789 		for (j = 0; j < num_counters; ++j)
790 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
791 	}
792 
793 free:
794 	kvfree(out);
795 	return ret;
796 }
797 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
798