1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2015-2017, Mellanox Technologies inc.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * $FreeBSD$
35  */
36 
37 #include "core_priv.h"
38 
39 #include <linux/in.h>
40 #include <linux/in6.h>
41 #include <linux/rcupdate.h>
42 
43 #include <rdma/ib_cache.h>
44 #include <rdma/ib_addr.h>
45 
46 #include <netinet6/scope6_var.h>
47 
48 static struct workqueue_struct *roce_gid_mgmt_wq;
49 
50 enum gid_op_type {
51 	GID_DEL = 0,
52 	GID_ADD
53 };
54 
55 struct roce_netdev_event_work {
56 	struct work_struct work;
57 	struct net_device *ndev;
58 };
59 
60 struct roce_rescan_work {
61 	struct work_struct	work;
62 	struct ib_device	*ib_dev;
63 };
64 
65 static const struct {
66 	bool (*is_supported)(const struct ib_device *device, u8 port_num);
67 	enum ib_gid_type gid_type;
68 } PORT_CAP_TO_GID_TYPE[] = {
69 	{rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
70 	{rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
71 };
72 
73 #define CAP_TO_GID_TABLE_SIZE	ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
74 
75 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
76 {
77 	int i;
78 	unsigned int ret_flags = 0;
79 
80 	if (!rdma_protocol_roce(ib_dev, port))
81 		return 1UL << IB_GID_TYPE_IB;
82 
83 	for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
84 		if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
85 			ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
86 
87 	return ret_flags;
88 }
89 EXPORT_SYMBOL(roce_gid_type_mask_support);
90 
91 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
92     u8 port, union ib_gid *gid, struct net_device *ndev)
93 {
94 	int i;
95 	unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
96 	struct ib_gid_attr gid_attr;
97 
98 	memset(&gid_attr, 0, sizeof(gid_attr));
99 	gid_attr.ndev = ndev;
100 
101 	for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
102 		if ((1UL << i) & gid_type_mask) {
103 			gid_attr.gid_type = i;
104 			switch (gid_op) {
105 			case GID_ADD:
106 				ib_cache_gid_add(ib_dev, port,
107 						 gid, &gid_attr);
108 				break;
109 			case GID_DEL:
110 				ib_cache_gid_del(ib_dev, port,
111 						 gid, &gid_attr);
112 				break;
113 			}
114 		}
115 	}
116 }
117 
118 static int
119 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
120     struct net_device *idev, void *cookie)
121 {
122 	struct net_device *ndev = (struct net_device *)cookie;
123 	if (idev == NULL)
124 		return (0);
125 	return (ndev == idev);
126 }
127 
128 static int
129 roce_gid_match_all(struct ib_device *ib_dev, u8 port,
130     struct net_device *idev, void *cookie)
131 {
132 	if (idev == NULL)
133 		return (0);
134 	return (1);
135 }
136 
137 static int
138 roce_gid_enum_netdev_default(struct ib_device *ib_dev,
139     u8 port, struct net_device *idev)
140 {
141 	unsigned long gid_type_mask;
142 
143 	gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
144 
145 	ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
146 				     IB_CACHE_GID_DEFAULT_MODE_SET);
147 
148 	return (hweight_long(gid_type_mask));
149 }
150 
151 #define ETH_IPOIB_DRV_NAME	"ib"
152 
153 static inline int
154 is_eth_ipoib_intf(struct net_device *dev)
155 {
156 	if (strcmp(dev->if_dname, ETH_IPOIB_DRV_NAME))
157 		return 0;
158 	return 1;
159 }
160 
161 static void
162 roce_gid_update_addr_callback(struct ib_device *device, u8 port,
163     struct net_device *ndev, void *cookie)
164 {
165 	struct ipx_entry {
166 		STAILQ_ENTRY(ipx_entry)	entry;
167 		union ipx_addr {
168 			struct sockaddr sa[0];
169 			struct sockaddr_in v4;
170 			struct sockaddr_in6 v6;
171 		} ipx_addr;
172 		struct net_device *ndev;
173 	};
174 	struct ipx_entry *entry;
175 	struct net_device *idev;
176 #if defined(INET) || defined(INET6)
177 	struct ifaddr *ifa;
178 #endif
179 	struct ib_gid_attr gid_attr;
180 	union ib_gid gid;
181 	int default_gids;
182 	u16 index_num;
183 	int i;
184 
185 	STAILQ_HEAD(, ipx_entry) ipx_head;
186 
187 	STAILQ_INIT(&ipx_head);
188 
189 	/* make sure default GIDs are in */
190 	default_gids = roce_gid_enum_netdev_default(device, port, ndev);
191 
192 	CURVNET_SET(ndev->if_vnet);
193 	IFNET_RLOCK();
194 	TAILQ_FOREACH(idev, &V_ifnet, if_link) {
195 		if (idev != ndev) {
196 			if (idev->if_type != IFT_L2VLAN)
197 				continue;
198 			if (ndev != rdma_vlan_dev_real_dev(idev))
199 				continue;
200 		}
201 
202 		/* clone address information for IPv4 and IPv6 */
203 		IF_ADDR_RLOCK(idev);
204 #if defined(INET)
205 		TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
206 			if (ifa->ifa_addr == NULL ||
207 			    ifa->ifa_addr->sa_family != AF_INET)
208 				continue;
209 			entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
210 			if (entry == NULL) {
211 				pr_warn("roce_gid_update_addr_callback: "
212 				    "couldn't allocate entry for IPv4 update\n");
213 				continue;
214 			}
215 			entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
216 			entry->ndev = idev;
217 			STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
218 		}
219 #endif
220 #if defined(INET6)
221 		TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
222 			if (ifa->ifa_addr == NULL ||
223 			    ifa->ifa_addr->sa_family != AF_INET6)
224 				continue;
225 			entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
226 			if (entry == NULL) {
227 				pr_warn("roce_gid_update_addr_callback: "
228 				    "couldn't allocate entry for IPv6 update\n");
229 				continue;
230 			}
231 			entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
232 			entry->ndev = idev;
233 
234 			/* trash IPv6 scope ID */
235 			sa6_recoverscope(&entry->ipx_addr.v6);
236 			entry->ipx_addr.v6.sin6_scope_id = 0;
237 
238 			STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
239 		}
240 #endif
241 		IF_ADDR_RUNLOCK(idev);
242 	}
243 	IFNET_RUNLOCK();
244 	CURVNET_RESTORE();
245 
246 	/* add missing GIDs, if any */
247 	STAILQ_FOREACH(entry, &ipx_head, entry) {
248 		unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
249 
250 		if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
251 			continue;
252 
253 		for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
254 			if (!((1UL << i) & gid_type_mask))
255 				continue;
256 			/* check if entry found */
257 			if (ib_find_cached_gid_by_port(device, &gid, i,
258 			    port, entry->ndev, &index_num) == 0)
259 				break;
260 		}
261 		if (i != IB_GID_TYPE_SIZE)
262 			continue;
263 		/* add new GID */
264 		update_gid(GID_ADD, device, port, &gid, entry->ndev);
265 	}
266 
267 	/* remove stale GIDs, if any */
268 	for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) {
269 		union ipx_addr ipx;
270 
271 		/* check for valid network device pointer */
272 		ndev = gid_attr.ndev;
273 		if (ndev == NULL)
274 			continue;
275 		dev_put(ndev);
276 
277 		/* don't delete empty entries */
278 		if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
279 			continue;
280 
281 		/* zero default */
282 		memset(&ipx, 0, sizeof(ipx));
283 
284 		rdma_gid2ip(&ipx.sa[0], &gid);
285 
286 		STAILQ_FOREACH(entry, &ipx_head, entry) {
287 			if (entry->ndev == ndev &&
288 			    memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
289 				break;
290 		}
291 		/* check if entry found */
292 		if (entry != NULL)
293 			continue;
294 
295 		/* remove GID */
296 		update_gid(GID_DEL, device, port, &gid, ndev);
297 	}
298 
299 	while ((entry = STAILQ_FIRST(&ipx_head))) {
300 		STAILQ_REMOVE_HEAD(&ipx_head, entry);
301 		kfree(entry);
302 	}
303 }
304 
305 static void
306 roce_gid_queue_scan_event_handler(struct work_struct *_work)
307 {
308 	struct roce_netdev_event_work *work =
309 		container_of(_work, struct roce_netdev_event_work, work);
310 
311 	ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
312 	    roce_gid_update_addr_callback, NULL);
313 
314 	dev_put(work->ndev);
315 	kfree(work);
316 }
317 
318 static void
319 roce_gid_queue_scan_event(struct net_device *ndev)
320 {
321 	struct roce_netdev_event_work *work;
322 
323 retry:
324 	if (is_eth_ipoib_intf(ndev))
325 		return;
326 
327 	if (ndev->if_type != IFT_ETHER) {
328 		if (ndev->if_type == IFT_L2VLAN) {
329 			ndev = rdma_vlan_dev_real_dev(ndev);
330 			if (ndev != NULL)
331 				goto retry;
332 		}
333 		return;
334 	}
335 
336 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
337 	if (!work) {
338 		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
339 		return;
340 	}
341 
342 	INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
343 	dev_hold(ndev);
344 
345 	work->ndev = ndev;
346 
347 	queue_work(roce_gid_mgmt_wq, &work->work);
348 }
349 
350 static void
351 roce_gid_delete_all_event_handler(struct work_struct *_work)
352 {
353 	struct roce_netdev_event_work *work =
354 		container_of(_work, struct roce_netdev_event_work, work);
355 
356 	ib_cache_gid_del_all_by_netdev(work->ndev);
357 	dev_put(work->ndev);
358 	kfree(work);
359 }
360 
361 static void
362 roce_gid_delete_all_event(struct net_device *ndev)
363 {
364 	struct roce_netdev_event_work *work;
365 
366 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
367 	if (!work) {
368 		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
369 		return;
370 	}
371 
372 	INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
373 	dev_hold(ndev);
374 	work->ndev = ndev;
375 	queue_work(roce_gid_mgmt_wq, &work->work);
376 
377 	/* make sure job is complete before returning */
378 	flush_workqueue(roce_gid_mgmt_wq);
379 }
380 
381 static int
382 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
383 {
384 	struct net_device *ndev = ptr;
385 
386 	switch (event) {
387 	case NETDEV_UNREGISTER:
388 		roce_gid_delete_all_event(ndev);
389 		break;
390 	case NETDEV_REGISTER:
391 	case NETDEV_CHANGEADDR:
392 	case NETDEV_CHANGEIFADDR:
393 		roce_gid_queue_scan_event(ndev);
394 		break;
395 	default:
396 		break;
397 	}
398 	return NOTIFY_DONE;
399 }
400 
401 static struct notifier_block nb_inetaddr = {
402 	.notifier_call = inetaddr_event
403 };
404 
405 static void
406 roce_rescan_device_handler(struct work_struct *_work)
407 {
408 	struct roce_rescan_work *work =
409 	    container_of(_work, struct roce_rescan_work, work);
410 
411 	ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
412 	    roce_gid_update_addr_callback, NULL);
413 	kfree(work);
414 }
415 
416 /* Caller must flush system workqueue before removing the ib_device */
417 int roce_rescan_device(struct ib_device *ib_dev)
418 {
419 	struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
420 
421 	if (!work)
422 		return -ENOMEM;
423 
424 	work->ib_dev = ib_dev;
425 	INIT_WORK(&work->work, roce_rescan_device_handler);
426 	queue_work(roce_gid_mgmt_wq, &work->work);
427 
428 	return 0;
429 }
430 
431 int __init roce_gid_mgmt_init(void)
432 {
433 	roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
434 	if (!roce_gid_mgmt_wq) {
435 		pr_warn("roce_gid_mgmt: can't allocate work queue\n");
436 		return -ENOMEM;
437 	}
438 
439 	register_inetaddr_notifier(&nb_inetaddr);
440 
441 	/*
442 	 * We rely on the netdevice notifier to enumerate all existing
443 	 * devices in the system. Register to this notifier last to
444 	 * make sure we will not miss any IP add/del callbacks.
445 	 */
446 	register_netdevice_notifier(&nb_inetaddr);
447 
448 	return 0;
449 }
450 
451 void __exit roce_gid_mgmt_cleanup(void)
452 {
453 	unregister_inetaddr_notifier(&nb_inetaddr);
454 	unregister_netdevice_notifier(&nb_inetaddr);
455 
456 	/*
457 	 * Ensure all gid deletion tasks complete before we go down,
458 	 * to avoid any reference to free'd memory. By the time
459 	 * ib-core is removed, all physical devices have been removed,
460 	 * so no issue with remaining hardware contexts.
461 	 */
462 	synchronize_rcu();
463 	drain_workqueue(roce_gid_mgmt_wq);
464 	destroy_workqueue(roce_gid_mgmt_wq);
465 }
466