1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag.h"
7 #include "lag_mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11
mlx5_lag_multipath_check_prereq(struct mlx5_lag * ldev)12 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
13 {
14 if (!mlx5_lag_is_ready(ldev))
15 return false;
16
17 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
18 ldev->pf[MLX5_LAG_P2].dev);
19 }
20
__mlx5_lag_is_multipath(struct mlx5_lag * ldev)21 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
22 {
23 return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
24 }
25
mlx5_lag_is_multipath(struct mlx5_core_dev * dev)26 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
27 {
28 struct mlx5_lag *ldev;
29 bool res;
30
31 ldev = mlx5_lag_dev_get(dev);
32 res = ldev && __mlx5_lag_is_multipath(ldev);
33
34 return res;
35 }
36
37 /**
38 * mlx5_lag_set_port_affinity
39 *
40 * @ldev: lag device
41 * @port:
42 * 0 - set normal affinity.
43 * 1 - set affinity to port 1.
44 * 2 - set affinity to port 2.
45 *
46 **/
mlx5_lag_set_port_affinity(struct mlx5_lag * ldev,enum mlx5_lag_port_affinity port)47 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
48 enum mlx5_lag_port_affinity port)
49 {
50 struct lag_tracker tracker;
51
52 if (!__mlx5_lag_is_multipath(ldev))
53 return;
54
55 switch (port) {
56 case MLX5_LAG_NORMAL_AFFINITY:
57 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
58 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
59 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
60 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
61 break;
62 case MLX5_LAG_P1_AFFINITY:
63 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
64 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
65 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
66 tracker.netdev_state[MLX5_LAG_P2].link_up = false;
67 break;
68 case MLX5_LAG_P2_AFFINITY:
69 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
70 tracker.netdev_state[MLX5_LAG_P1].link_up = false;
71 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
72 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
73 break;
74 default:
75 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
76 "Invalid affinity port %d", port);
77 return;
78 }
79
80 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
81 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
82 MLX5_DEV_EVENT_PORT_AFFINITY,
83 (void *)0);
84
85 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
86 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
87 MLX5_DEV_EVENT_PORT_AFFINITY,
88 (void *)0);
89
90 mlx5_modify_lag(ldev, &tracker);
91 }
92
mlx5_lag_fib_event_flush(struct notifier_block * nb)93 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
94 {
95 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
96
97 flush_workqueue(mp->wq);
98 }
99
100 struct mlx5_fib_event_work {
101 struct work_struct work;
102 struct mlx5_lag *ldev;
103 unsigned long event;
104 union {
105 struct fib_entry_notifier_info fen_info;
106 struct fib_nh_notifier_info fnh_info;
107 };
108 };
109
mlx5_lag_fib_route_event(struct mlx5_lag * ldev,unsigned long event,struct fib_info * fi)110 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
111 unsigned long event,
112 struct fib_info *fi)
113 {
114 struct lag_mp *mp = &ldev->lag_mp;
115 struct fib_nh *fib_nh0, *fib_nh1;
116 unsigned int nhs;
117
118 /* Handle delete event */
119 if (event == FIB_EVENT_ENTRY_DEL) {
120 /* stop track */
121 if (mp->mfi == fi)
122 mp->mfi = NULL;
123 return;
124 }
125
126 /* Handle add/replace event */
127 nhs = fib_info_num_path(fi);
128 if (nhs == 1) {
129 if (__mlx5_lag_is_active(ldev)) {
130 struct fib_nh *nh = fib_info_nh(fi, 0);
131 struct net_device *nh_dev = nh->fib_nh_dev;
132 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
133
134 if (i < 0)
135 i = MLX5_LAG_NORMAL_AFFINITY;
136 else
137 ++i;
138
139 mlx5_lag_set_port_affinity(ldev, i);
140 }
141 return;
142 }
143
144 if (nhs != 2)
145 return;
146
147 /* Verify next hops are ports of the same hca */
148 fib_nh0 = fib_info_nh(fi, 0);
149 fib_nh1 = fib_info_nh(fi, 1);
150 if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
151 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
152 !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
153 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
154 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
155 "Multipath offload require two ports of the same HCA\n");
156 return;
157 }
158
159 /* First time we see multipath route */
160 if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
161 struct lag_tracker tracker;
162
163 tracker = ldev->tracker;
164 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
165 }
166
167 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
168 mp->mfi = fi;
169 }
170
mlx5_lag_fib_nexthop_event(struct mlx5_lag * ldev,unsigned long event,struct fib_nh * fib_nh,struct fib_info * fi)171 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
172 unsigned long event,
173 struct fib_nh *fib_nh,
174 struct fib_info *fi)
175 {
176 struct lag_mp *mp = &ldev->lag_mp;
177
178 /* Check the nh event is related to the route */
179 if (!mp->mfi || mp->mfi != fi)
180 return;
181
182 /* nh added/removed */
183 if (event == FIB_EVENT_NH_DEL) {
184 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
185
186 if (i >= 0) {
187 i = (i + 1) % 2 + 1; /* peer port */
188 mlx5_lag_set_port_affinity(ldev, i);
189 }
190 } else if (event == FIB_EVENT_NH_ADD &&
191 fib_info_num_path(fi) == 2) {
192 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
193 }
194 }
195
mlx5_lag_fib_update(struct work_struct * work)196 static void mlx5_lag_fib_update(struct work_struct *work)
197 {
198 struct mlx5_fib_event_work *fib_work =
199 container_of(work, struct mlx5_fib_event_work, work);
200 struct mlx5_lag *ldev = fib_work->ldev;
201 struct fib_nh *fib_nh;
202
203 /* Protect internal structures from changes */
204 rtnl_lock();
205 switch (fib_work->event) {
206 case FIB_EVENT_ENTRY_REPLACE:
207 case FIB_EVENT_ENTRY_DEL:
208 mlx5_lag_fib_route_event(ldev, fib_work->event,
209 fib_work->fen_info.fi);
210 fib_info_put(fib_work->fen_info.fi);
211 break;
212 case FIB_EVENT_NH_ADD:
213 case FIB_EVENT_NH_DEL:
214 fib_nh = fib_work->fnh_info.fib_nh;
215 mlx5_lag_fib_nexthop_event(ldev,
216 fib_work->event,
217 fib_work->fnh_info.fib_nh,
218 fib_nh->nh_parent);
219 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
220 break;
221 }
222
223 rtnl_unlock();
224 kfree(fib_work);
225 }
226
227 static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag * ldev,unsigned long event)228 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
229 {
230 struct mlx5_fib_event_work *fib_work;
231
232 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
233 if (WARN_ON(!fib_work))
234 return NULL;
235
236 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
237 fib_work->ldev = ldev;
238 fib_work->event = event;
239
240 return fib_work;
241 }
242
mlx5_lag_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)243 static int mlx5_lag_fib_event(struct notifier_block *nb,
244 unsigned long event,
245 void *ptr)
246 {
247 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
248 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
249 struct fib_notifier_info *info = ptr;
250 struct mlx5_fib_event_work *fib_work;
251 struct fib_entry_notifier_info *fen_info;
252 struct fib_nh_notifier_info *fnh_info;
253 struct net_device *fib_dev;
254 struct fib_info *fi;
255
256 if (info->family != AF_INET)
257 return NOTIFY_DONE;
258
259 if (!mlx5_lag_multipath_check_prereq(ldev))
260 return NOTIFY_DONE;
261
262 switch (event) {
263 case FIB_EVENT_ENTRY_REPLACE:
264 case FIB_EVENT_ENTRY_DEL:
265 fen_info = container_of(info, struct fib_entry_notifier_info,
266 info);
267 fi = fen_info->fi;
268 if (fi->nh) {
269 NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
270 return notifier_from_errno(-EINVAL);
271 }
272 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
273 if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
274 fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
275 return NOTIFY_DONE;
276 }
277 fib_work = mlx5_lag_init_fib_work(ldev, event);
278 if (!fib_work)
279 return NOTIFY_DONE;
280 fib_work->fen_info = *fen_info;
281 /* Take reference on fib_info to prevent it from being
282 * freed while work is queued. Release it afterwards.
283 */
284 fib_info_hold(fib_work->fen_info.fi);
285 break;
286 case FIB_EVENT_NH_ADD:
287 case FIB_EVENT_NH_DEL:
288 fnh_info = container_of(info, struct fib_nh_notifier_info,
289 info);
290 fib_work = mlx5_lag_init_fib_work(ldev, event);
291 if (!fib_work)
292 return NOTIFY_DONE;
293 fib_work->fnh_info = *fnh_info;
294 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
295 break;
296 default:
297 return NOTIFY_DONE;
298 }
299
300 queue_work(mp->wq, &fib_work->work);
301
302 return NOTIFY_DONE;
303 }
304
mlx5_lag_mp_init(struct mlx5_lag * ldev)305 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
306 {
307 struct lag_mp *mp = &ldev->lag_mp;
308 int err;
309
310 if (mp->fib_nb.notifier_call)
311 return 0;
312
313 mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
314 if (!mp->wq)
315 return -ENOMEM;
316
317 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
318 err = register_fib_notifier(&init_net, &mp->fib_nb,
319 mlx5_lag_fib_event_flush, NULL);
320 if (err) {
321 destroy_workqueue(mp->wq);
322 mp->fib_nb.notifier_call = NULL;
323 }
324
325 return err;
326 }
327
mlx5_lag_mp_cleanup(struct mlx5_lag * ldev)328 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
329 {
330 struct lag_mp *mp = &ldev->lag_mp;
331
332 if (!mp->fib_nb.notifier_call)
333 return;
334
335 unregister_fib_notifier(&init_net, &mp->fib_nb);
336 destroy_workqueue(mp->wq);
337 mp->fib_nb.notifier_call = NULL;
338 }
339