1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag.h"
7 #include "lag_mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11 
mlx5_lag_multipath_check_prereq(struct mlx5_lag * ldev)12 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
13 {
14 	if (!mlx5_lag_is_ready(ldev))
15 		return false;
16 
17 	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
18 					 ldev->pf[MLX5_LAG_P2].dev);
19 }
20 
__mlx5_lag_is_multipath(struct mlx5_lag * ldev)21 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
22 {
23 	return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
24 }
25 
mlx5_lag_is_multipath(struct mlx5_core_dev * dev)26 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
27 {
28 	struct mlx5_lag *ldev;
29 	bool res;
30 
31 	ldev = mlx5_lag_dev_get(dev);
32 	res  = ldev && __mlx5_lag_is_multipath(ldev);
33 
34 	return res;
35 }
36 
37 /**
38  * mlx5_lag_set_port_affinity
39  *
40  * @ldev: lag device
41  * @port:
42  *     0 - set normal affinity.
43  *     1 - set affinity to port 1.
44  *     2 - set affinity to port 2.
45  *
46  **/
mlx5_lag_set_port_affinity(struct mlx5_lag * ldev,enum mlx5_lag_port_affinity port)47 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
48 				       enum mlx5_lag_port_affinity port)
49 {
50 	struct lag_tracker tracker;
51 
52 	if (!__mlx5_lag_is_multipath(ldev))
53 		return;
54 
55 	switch (port) {
56 	case MLX5_LAG_NORMAL_AFFINITY:
57 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
58 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
59 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
60 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
61 		break;
62 	case MLX5_LAG_P1_AFFINITY:
63 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
64 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
65 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
66 		tracker.netdev_state[MLX5_LAG_P2].link_up = false;
67 		break;
68 	case MLX5_LAG_P2_AFFINITY:
69 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
70 		tracker.netdev_state[MLX5_LAG_P1].link_up = false;
71 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
72 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
73 		break;
74 	default:
75 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
76 			       "Invalid affinity port %d", port);
77 		return;
78 	}
79 
80 	if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
81 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
82 					 MLX5_DEV_EVENT_PORT_AFFINITY,
83 					 (void *)0);
84 
85 	if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
86 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
87 					 MLX5_DEV_EVENT_PORT_AFFINITY,
88 					 (void *)0);
89 
90 	mlx5_modify_lag(ldev, &tracker);
91 }
92 
mlx5_lag_fib_event_flush(struct notifier_block * nb)93 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
94 {
95 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
96 
97 	flush_workqueue(mp->wq);
98 }
99 
100 struct mlx5_fib_event_work {
101 	struct work_struct work;
102 	struct mlx5_lag *ldev;
103 	unsigned long event;
104 	union {
105 		struct fib_entry_notifier_info fen_info;
106 		struct fib_nh_notifier_info fnh_info;
107 	};
108 };
109 
mlx5_lag_fib_route_event(struct mlx5_lag * ldev,unsigned long event,struct fib_info * fi)110 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
111 				     unsigned long event,
112 				     struct fib_info *fi)
113 {
114 	struct lag_mp *mp = &ldev->lag_mp;
115 	struct fib_nh *fib_nh0, *fib_nh1;
116 	unsigned int nhs;
117 
118 	/* Handle delete event */
119 	if (event == FIB_EVENT_ENTRY_DEL) {
120 		/* stop track */
121 		if (mp->mfi == fi)
122 			mp->mfi = NULL;
123 		return;
124 	}
125 
126 	/* Handle add/replace event */
127 	nhs = fib_info_num_path(fi);
128 	if (nhs == 1) {
129 		if (__mlx5_lag_is_active(ldev)) {
130 			struct fib_nh *nh = fib_info_nh(fi, 0);
131 			struct net_device *nh_dev = nh->fib_nh_dev;
132 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
133 
134 			if (i < 0)
135 				i = MLX5_LAG_NORMAL_AFFINITY;
136 			else
137 				++i;
138 
139 			mlx5_lag_set_port_affinity(ldev, i);
140 		}
141 		return;
142 	}
143 
144 	if (nhs != 2)
145 		return;
146 
147 	/* Verify next hops are ports of the same hca */
148 	fib_nh0 = fib_info_nh(fi, 0);
149 	fib_nh1 = fib_info_nh(fi, 1);
150 	if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
151 	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
152 	    !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
153 	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
154 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
155 			       "Multipath offload require two ports of the same HCA\n");
156 		return;
157 	}
158 
159 	/* First time we see multipath route */
160 	if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
161 		struct lag_tracker tracker;
162 
163 		tracker = ldev->tracker;
164 		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
165 	}
166 
167 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
168 	mp->mfi = fi;
169 }
170 
mlx5_lag_fib_nexthop_event(struct mlx5_lag * ldev,unsigned long event,struct fib_nh * fib_nh,struct fib_info * fi)171 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
172 				       unsigned long event,
173 				       struct fib_nh *fib_nh,
174 				       struct fib_info *fi)
175 {
176 	struct lag_mp *mp = &ldev->lag_mp;
177 
178 	/* Check the nh event is related to the route */
179 	if (!mp->mfi || mp->mfi != fi)
180 		return;
181 
182 	/* nh added/removed */
183 	if (event == FIB_EVENT_NH_DEL) {
184 		int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
185 
186 		if (i >= 0) {
187 			i = (i + 1) % 2 + 1; /* peer port */
188 			mlx5_lag_set_port_affinity(ldev, i);
189 		}
190 	} else if (event == FIB_EVENT_NH_ADD &&
191 		   fib_info_num_path(fi) == 2) {
192 		mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
193 	}
194 }
195 
mlx5_lag_fib_update(struct work_struct * work)196 static void mlx5_lag_fib_update(struct work_struct *work)
197 {
198 	struct mlx5_fib_event_work *fib_work =
199 		container_of(work, struct mlx5_fib_event_work, work);
200 	struct mlx5_lag *ldev = fib_work->ldev;
201 	struct fib_nh *fib_nh;
202 
203 	/* Protect internal structures from changes */
204 	rtnl_lock();
205 	switch (fib_work->event) {
206 	case FIB_EVENT_ENTRY_REPLACE:
207 	case FIB_EVENT_ENTRY_DEL:
208 		mlx5_lag_fib_route_event(ldev, fib_work->event,
209 					 fib_work->fen_info.fi);
210 		fib_info_put(fib_work->fen_info.fi);
211 		break;
212 	case FIB_EVENT_NH_ADD:
213 	case FIB_EVENT_NH_DEL:
214 		fib_nh = fib_work->fnh_info.fib_nh;
215 		mlx5_lag_fib_nexthop_event(ldev,
216 					   fib_work->event,
217 					   fib_work->fnh_info.fib_nh,
218 					   fib_nh->nh_parent);
219 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
220 		break;
221 	}
222 
223 	rtnl_unlock();
224 	kfree(fib_work);
225 }
226 
227 static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag * ldev,unsigned long event)228 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
229 {
230 	struct mlx5_fib_event_work *fib_work;
231 
232 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
233 	if (WARN_ON(!fib_work))
234 		return NULL;
235 
236 	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
237 	fib_work->ldev = ldev;
238 	fib_work->event = event;
239 
240 	return fib_work;
241 }
242 
mlx5_lag_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)243 static int mlx5_lag_fib_event(struct notifier_block *nb,
244 			      unsigned long event,
245 			      void *ptr)
246 {
247 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
248 	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
249 	struct fib_notifier_info *info = ptr;
250 	struct mlx5_fib_event_work *fib_work;
251 	struct fib_entry_notifier_info *fen_info;
252 	struct fib_nh_notifier_info *fnh_info;
253 	struct net_device *fib_dev;
254 	struct fib_info *fi;
255 
256 	if (info->family != AF_INET)
257 		return NOTIFY_DONE;
258 
259 	if (!mlx5_lag_multipath_check_prereq(ldev))
260 		return NOTIFY_DONE;
261 
262 	switch (event) {
263 	case FIB_EVENT_ENTRY_REPLACE:
264 	case FIB_EVENT_ENTRY_DEL:
265 		fen_info = container_of(info, struct fib_entry_notifier_info,
266 					info);
267 		fi = fen_info->fi;
268 		if (fi->nh) {
269 			NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
270 			return notifier_from_errno(-EINVAL);
271 		}
272 		fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
273 		if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
274 		    fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
275 			return NOTIFY_DONE;
276 		}
277 		fib_work = mlx5_lag_init_fib_work(ldev, event);
278 		if (!fib_work)
279 			return NOTIFY_DONE;
280 		fib_work->fen_info = *fen_info;
281 		/* Take reference on fib_info to prevent it from being
282 		 * freed while work is queued. Release it afterwards.
283 		 */
284 		fib_info_hold(fib_work->fen_info.fi);
285 		break;
286 	case FIB_EVENT_NH_ADD:
287 	case FIB_EVENT_NH_DEL:
288 		fnh_info = container_of(info, struct fib_nh_notifier_info,
289 					info);
290 		fib_work = mlx5_lag_init_fib_work(ldev, event);
291 		if (!fib_work)
292 			return NOTIFY_DONE;
293 		fib_work->fnh_info = *fnh_info;
294 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
295 		break;
296 	default:
297 		return NOTIFY_DONE;
298 	}
299 
300 	queue_work(mp->wq, &fib_work->work);
301 
302 	return NOTIFY_DONE;
303 }
304 
mlx5_lag_mp_init(struct mlx5_lag * ldev)305 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
306 {
307 	struct lag_mp *mp = &ldev->lag_mp;
308 	int err;
309 
310 	if (mp->fib_nb.notifier_call)
311 		return 0;
312 
313 	mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
314 	if (!mp->wq)
315 		return -ENOMEM;
316 
317 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
318 	err = register_fib_notifier(&init_net, &mp->fib_nb,
319 				    mlx5_lag_fib_event_flush, NULL);
320 	if (err) {
321 		destroy_workqueue(mp->wq);
322 		mp->fib_nb.notifier_call = NULL;
323 	}
324 
325 	return err;
326 }
327 
mlx5_lag_mp_cleanup(struct mlx5_lag * ldev)328 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
329 {
330 	struct lag_mp *mp = &ldev->lag_mp;
331 
332 	if (!mp->fib_nb.notifier_call)
333 		return;
334 
335 	unregister_fib_notifier(&init_net, &mp->fib_nb);
336 	destroy_workqueue(mp->wq);
337 	mp->fib_nb.notifier_call = NULL;
338 }
339