1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag/lag.h"
7 #include "lag/mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11 
12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13 {
14 	return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
15 }
16 
17 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
18 {
19 	if (!mlx5_lag_is_ready(ldev))
20 		return false;
21 
22 	if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
23 		return false;
24 
25 	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
26 					 ldev->pf[MLX5_LAG_P2].dev);
27 }
28 
29 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
30 {
31 	struct mlx5_lag *ldev;
32 	bool res;
33 
34 	ldev = mlx5_lag_dev(dev);
35 	res  = ldev && __mlx5_lag_is_multipath(ldev);
36 
37 	return res;
38 }
39 
40 /**
41  * mlx5_lag_set_port_affinity
42  *
43  * @ldev: lag device
44  * @port:
45  *     0 - set normal affinity.
46  *     1 - set affinity to port 1.
47  *     2 - set affinity to port 2.
48  *
49  **/
50 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
51 				       enum mlx5_lag_port_affinity port)
52 {
53 	struct lag_tracker tracker = {};
54 
55 	if (!__mlx5_lag_is_multipath(ldev))
56 		return;
57 
58 	switch (port) {
59 	case MLX5_LAG_NORMAL_AFFINITY:
60 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
61 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
62 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
63 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
64 		break;
65 	case MLX5_LAG_P1_AFFINITY:
66 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
67 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
68 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
69 		tracker.netdev_state[MLX5_LAG_P2].link_up = false;
70 		break;
71 	case MLX5_LAG_P2_AFFINITY:
72 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
73 		tracker.netdev_state[MLX5_LAG_P1].link_up = false;
74 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
75 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
76 		break;
77 	default:
78 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
79 			       "Invalid affinity port %d", port);
80 		return;
81 	}
82 
83 	if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
84 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
85 					 MLX5_DEV_EVENT_PORT_AFFINITY,
86 					 (void *)0);
87 
88 	if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
89 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
90 					 MLX5_DEV_EVENT_PORT_AFFINITY,
91 					 (void *)0);
92 
93 	mlx5_modify_lag(ldev, &tracker);
94 }
95 
96 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
97 {
98 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
99 
100 	flush_workqueue(mp->wq);
101 }
102 
103 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
104 {
105 	mp->fib.mfi = fi;
106 	mp->fib.priority = fi->fib_priority;
107 	mp->fib.dst = dst;
108 	mp->fib.dst_len = dst_len;
109 }
110 
111 struct mlx5_fib_event_work {
112 	struct work_struct work;
113 	struct mlx5_lag *ldev;
114 	unsigned long event;
115 	union {
116 		struct fib_entry_notifier_info fen_info;
117 		struct fib_nh_notifier_info fnh_info;
118 	};
119 };
120 
121 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
122 				     struct fib_entry_notifier_info *fen_info)
123 {
124 	struct fib_info *fi = fen_info->fi;
125 	struct lag_mp *mp = &ldev->lag_mp;
126 	struct fib_nh *fib_nh0, *fib_nh1;
127 	unsigned int nhs;
128 
129 	/* Handle delete event */
130 	if (event == FIB_EVENT_ENTRY_DEL) {
131 		/* stop track */
132 		if (mp->fib.mfi == fi)
133 			mp->fib.mfi = NULL;
134 		return;
135 	}
136 
137 	/* Handle multipath entry with lower priority value */
138 	if (mp->fib.mfi && mp->fib.mfi != fi &&
139 	    (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
140 	    fi->fib_priority >= mp->fib.priority)
141 		return;
142 
143 	/* Handle add/replace event */
144 	nhs = fib_info_num_path(fi);
145 	if (nhs == 1) {
146 		if (__mlx5_lag_is_active(ldev)) {
147 			struct fib_nh *nh = fib_info_nh(fi, 0);
148 			struct net_device *nh_dev = nh->fib_nh_dev;
149 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
150 
151 			if (i < 0)
152 				return;
153 
154 			i++;
155 			mlx5_lag_set_port_affinity(ldev, i);
156 			mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
157 		}
158 
159 		return;
160 	}
161 
162 	if (nhs != 2)
163 		return;
164 
165 	/* Verify next hops are ports of the same hca */
166 	fib_nh0 = fib_info_nh(fi, 0);
167 	fib_nh1 = fib_info_nh(fi, 1);
168 	if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
169 	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
170 	    !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
171 	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
172 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
173 			       "Multipath offload require two ports of the same HCA\n");
174 		return;
175 	}
176 
177 	/* First time we see multipath route */
178 	if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
179 		struct lag_tracker tracker;
180 
181 		tracker = ldev->tracker;
182 		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
183 	}
184 
185 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
186 	mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
187 }
188 
189 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
190 				       unsigned long event,
191 				       struct fib_nh *fib_nh,
192 				       struct fib_info *fi)
193 {
194 	struct lag_mp *mp = &ldev->lag_mp;
195 
196 	/* Check the nh event is related to the route */
197 	if (!mp->fib.mfi || mp->fib.mfi != fi)
198 		return;
199 
200 	/* nh added/removed */
201 	if (event == FIB_EVENT_NH_DEL) {
202 		int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
203 
204 		if (i >= 0) {
205 			i = (i + 1) % 2 + 1; /* peer port */
206 			mlx5_lag_set_port_affinity(ldev, i);
207 		}
208 	} else if (event == FIB_EVENT_NH_ADD &&
209 		   fib_info_num_path(fi) == 2) {
210 		mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
211 	}
212 }
213 
214 static void mlx5_lag_fib_update(struct work_struct *work)
215 {
216 	struct mlx5_fib_event_work *fib_work =
217 		container_of(work, struct mlx5_fib_event_work, work);
218 	struct mlx5_lag *ldev = fib_work->ldev;
219 	struct fib_nh *fib_nh;
220 
221 	/* Protect internal structures from changes */
222 	rtnl_lock();
223 	switch (fib_work->event) {
224 	case FIB_EVENT_ENTRY_REPLACE:
225 	case FIB_EVENT_ENTRY_DEL:
226 		mlx5_lag_fib_route_event(ldev, fib_work->event,
227 					 &fib_work->fen_info);
228 		fib_info_put(fib_work->fen_info.fi);
229 		break;
230 	case FIB_EVENT_NH_ADD:
231 	case FIB_EVENT_NH_DEL:
232 		fib_nh = fib_work->fnh_info.fib_nh;
233 		mlx5_lag_fib_nexthop_event(ldev,
234 					   fib_work->event,
235 					   fib_work->fnh_info.fib_nh,
236 					   fib_nh->nh_parent);
237 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
238 		break;
239 	}
240 
241 	rtnl_unlock();
242 	kfree(fib_work);
243 }
244 
245 static struct mlx5_fib_event_work *
246 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
247 {
248 	struct mlx5_fib_event_work *fib_work;
249 
250 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
251 	if (WARN_ON(!fib_work))
252 		return NULL;
253 
254 	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
255 	fib_work->ldev = ldev;
256 	fib_work->event = event;
257 
258 	return fib_work;
259 }
260 
261 static int mlx5_lag_fib_event(struct notifier_block *nb,
262 			      unsigned long event,
263 			      void *ptr)
264 {
265 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
266 	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
267 	struct fib_notifier_info *info = ptr;
268 	struct mlx5_fib_event_work *fib_work;
269 	struct fib_entry_notifier_info *fen_info;
270 	struct fib_nh_notifier_info *fnh_info;
271 	struct net_device *fib_dev;
272 	struct fib_info *fi;
273 
274 	if (info->family != AF_INET)
275 		return NOTIFY_DONE;
276 
277 	if (!mlx5_lag_multipath_check_prereq(ldev))
278 		return NOTIFY_DONE;
279 
280 	switch (event) {
281 	case FIB_EVENT_ENTRY_REPLACE:
282 	case FIB_EVENT_ENTRY_DEL:
283 		fen_info = container_of(info, struct fib_entry_notifier_info,
284 					info);
285 		fi = fen_info->fi;
286 		if (fi->nh)
287 			return NOTIFY_DONE;
288 		fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
289 		if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
290 		    fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
291 			return NOTIFY_DONE;
292 		}
293 		fib_work = mlx5_lag_init_fib_work(ldev, event);
294 		if (!fib_work)
295 			return NOTIFY_DONE;
296 		fib_work->fen_info = *fen_info;
297 		/* Take reference on fib_info to prevent it from being
298 		 * freed while work is queued. Release it afterwards.
299 		 */
300 		fib_info_hold(fib_work->fen_info.fi);
301 		break;
302 	case FIB_EVENT_NH_ADD:
303 	case FIB_EVENT_NH_DEL:
304 		fnh_info = container_of(info, struct fib_nh_notifier_info,
305 					info);
306 		fib_work = mlx5_lag_init_fib_work(ldev, event);
307 		if (!fib_work)
308 			return NOTIFY_DONE;
309 		fib_work->fnh_info = *fnh_info;
310 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
311 		break;
312 	default:
313 		return NOTIFY_DONE;
314 	}
315 
316 	queue_work(mp->wq, &fib_work->work);
317 
318 	return NOTIFY_DONE;
319 }
320 
321 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
322 {
323 	/* Clear mfi, as it might become stale when a route delete event
324 	 * has been missed, see mlx5_lag_fib_route_event().
325 	 */
326 	ldev->lag_mp.fib.mfi = NULL;
327 }
328 
329 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
330 {
331 	struct lag_mp *mp = &ldev->lag_mp;
332 	int err;
333 
334 	/* always clear mfi, as it might become stale when a route delete event
335 	 * has been missed
336 	 */
337 	mp->fib.mfi = NULL;
338 
339 	if (mp->fib_nb.notifier_call)
340 		return 0;
341 
342 	mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
343 	if (!mp->wq)
344 		return -ENOMEM;
345 
346 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
347 	err = register_fib_notifier(&init_net, &mp->fib_nb,
348 				    mlx5_lag_fib_event_flush, NULL);
349 	if (err) {
350 		destroy_workqueue(mp->wq);
351 		mp->fib_nb.notifier_call = NULL;
352 	}
353 
354 	return err;
355 }
356 
357 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
358 {
359 	struct lag_mp *mp = &ldev->lag_mp;
360 
361 	if (!mp->fib_nb.notifier_call)
362 		return;
363 
364 	unregister_fib_notifier(&init_net, &mp->fib_nb);
365 	destroy_workqueue(mp->wq);
366 	mp->fib_nb.notifier_call = NULL;
367 	mp->fib.mfi = NULL;
368 }
369