1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2015-2017, Mellanox Technologies inc.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "core_priv.h"
39 #include <sys/eventhandler.h>
40 
41 #include <linux/in.h>
42 #include <linux/in6.h>
43 #include <linux/rcupdate.h>
44 
45 #include <rdma/ib_cache.h>
46 #include <rdma/ib_addr.h>
47 
48 #include <netinet6/scope6_var.h>
49 
50 static struct workqueue_struct *roce_gid_mgmt_wq;
51 
52 enum gid_op_type {
53 	GID_DEL = 0,
54 	GID_ADD
55 };
56 
57 struct roce_netdev_event_work {
58 	struct work_struct work;
59 	if_t ndev;
60 };
61 
62 struct roce_rescan_work {
63 	struct work_struct	work;
64 	struct ib_device	*ib_dev;
65 };
66 
67 static const struct {
68 	bool (*is_supported)(const struct ib_device *device, u8 port_num);
69 	enum ib_gid_type gid_type;
70 } PORT_CAP_TO_GID_TYPE[] = {
71 	{rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
72 	{rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
73 };
74 
75 #define CAP_TO_GID_TABLE_SIZE	ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
76 
77 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
78 {
79 	int i;
80 	unsigned int ret_flags = 0;
81 
82 	if (!rdma_protocol_roce(ib_dev, port))
83 		return 1UL << IB_GID_TYPE_IB;
84 
85 	for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
86 		if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
87 			ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
88 
89 	return ret_flags;
90 }
91 EXPORT_SYMBOL(roce_gid_type_mask_support);
92 
93 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
94     u8 port, union ib_gid *gid, if_t ndev)
95 {
96 	int i;
97 	unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
98 	struct ib_gid_attr gid_attr;
99 
100 	memset(&gid_attr, 0, sizeof(gid_attr));
101 	gid_attr.ndev = ndev;
102 
103 	for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
104 		if ((1UL << i) & gid_type_mask) {
105 			gid_attr.gid_type = i;
106 			switch (gid_op) {
107 			case GID_ADD:
108 				ib_cache_gid_add(ib_dev, port,
109 						 gid, &gid_attr);
110 				break;
111 			case GID_DEL:
112 				ib_cache_gid_del(ib_dev, port,
113 						 gid, &gid_attr);
114 				break;
115 			}
116 		}
117 	}
118 }
119 
120 static int
121 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
122     if_t idev, void *cookie)
123 {
124 	if_t ndev = (if_t )cookie;
125 	if (idev == NULL)
126 		return (0);
127 	return (ndev == idev);
128 }
129 
130 static int
131 roce_gid_match_all(struct ib_device *ib_dev, u8 port,
132     if_t idev, void *cookie)
133 {
134 	if (idev == NULL)
135 		return (0);
136 	return (1);
137 }
138 
139 static int
140 roce_gid_enum_netdev_default(struct ib_device *ib_dev,
141     u8 port, if_t idev)
142 {
143 	unsigned long gid_type_mask;
144 
145 	gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
146 
147 	ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
148 				     IB_CACHE_GID_DEFAULT_MODE_SET);
149 
150 	return (hweight_long(gid_type_mask));
151 }
152 
153 struct ipx_entry {
154 	STAILQ_ENTRY(ipx_entry)	entry;
155 	union ipx_addr {
156 		struct sockaddr sa[0];
157 		struct sockaddr_in v4;
158 		struct sockaddr_in6 v6;
159 	} ipx_addr;
160 	if_t ndev;
161 };
162 
163 STAILQ_HEAD(ipx_queue, ipx_entry);
164 
165 #ifdef INET
166 static u_int
167 roce_gid_update_addr_ifa4_cb(void *arg, struct ifaddr *ifa, u_int count)
168 {
169 	struct ipx_queue *ipx_head = arg;
170 	struct ipx_entry *entry;
171 
172 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
173 	if (entry == NULL) {
174 		pr_warn("roce_gid_update_addr_callback: "
175 		    "couldn't allocate entry for IPv4 update\n");
176 		return (0);
177 	}
178 	entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
179 	entry->ndev = ifa->ifa_ifp;
180 	STAILQ_INSERT_TAIL(ipx_head, entry, entry);
181 
182 	return (1);
183 }
184 #endif
185 
186 #ifdef INET6
187 static u_int
188 roce_gid_update_addr_ifa6_cb(void *arg, struct ifaddr *ifa, u_int count)
189 {
190 	struct ipx_queue *ipx_head = arg;
191 	struct ipx_entry *entry;
192 
193 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
194 	if (entry == NULL) {
195 		pr_warn("roce_gid_update_addr_callback: "
196 		    "couldn't allocate entry for IPv6 update\n");
197 		return (0);
198 	}
199 	entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
200 	entry->ndev = ifa->ifa_ifp;
201 
202 	/* trash IPv6 scope ID */
203 	sa6_recoverscope(&entry->ipx_addr.v6);
204 	entry->ipx_addr.v6.sin6_scope_id = 0;
205 
206 	STAILQ_INSERT_TAIL(ipx_head, entry, entry);
207 
208 	return (1);
209 }
210 #endif
211 
212 static void
213 roce_gid_update_addr_callback(struct ib_device *device, u8 port,
214     if_t ndev, void *cookie)
215 {
216 	struct epoch_tracker et;
217 	struct if_iter iter;
218 	struct ipx_entry *entry;
219 	VNET_ITERATOR_DECL(vnet_iter);
220 	struct ib_gid_attr gid_attr;
221 	union ib_gid gid;
222 	if_t ifp;
223 	int default_gids;
224 	u16 index_num;
225 	int i;
226 
227 	struct ipx_queue ipx_head;
228 
229 	STAILQ_INIT(&ipx_head);
230 
231 	/* make sure default GIDs are in */
232 	default_gids = roce_gid_enum_netdev_default(device, port, ndev);
233 
234 	VNET_LIST_RLOCK();
235 	VNET_FOREACH(vnet_iter) {
236 	    CURVNET_SET(vnet_iter);
237 	    NET_EPOCH_ENTER(et);
238 	    for (ifp = if_iter_start(&iter); ifp != NULL; ifp = if_iter_next(&iter)) {
239 		if (ifp != ndev) {
240 			if (if_gettype(ifp) != IFT_L2VLAN)
241 				continue;
242 			if (ifp != rdma_vlan_dev_real_dev(ifp))
243 				continue;
244 		}
245 
246 		/* clone address information for IPv4 and IPv6 */
247 #if defined(INET)
248 		if_foreach_addr_type(ifp, AF_INET, roce_gid_update_addr_ifa4_cb, &ipx_head);
249 #endif
250 #if defined(INET6)
251 		if_foreach_addr_type(ifp, AF_INET6, roce_gid_update_addr_ifa6_cb, &ipx_head);
252 #endif
253 	    }
254 	    NET_EPOCH_EXIT(et);
255 	    CURVNET_RESTORE();
256 	}
257 	VNET_LIST_RUNLOCK();
258 
259 	/* add missing GIDs, if any */
260 	STAILQ_FOREACH(entry, &ipx_head, entry) {
261 		unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
262 
263 		if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
264 			continue;
265 
266 		for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
267 			if (!((1UL << i) & gid_type_mask))
268 				continue;
269 			/* check if entry found */
270 			if (ib_find_cached_gid_by_port(device, &gid, i,
271 			    port, entry->ndev, &index_num) == 0)
272 				break;
273 		}
274 		if (i != IB_GID_TYPE_SIZE)
275 			continue;
276 		/* add new GID */
277 		update_gid(GID_ADD, device, port, &gid, entry->ndev);
278 	}
279 
280 	/* remove stale GIDs, if any */
281 	for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) {
282 		union ipx_addr ipx;
283 
284 		/* check for valid network device pointer */
285 		ndev = gid_attr.ndev;
286 		if (ndev == NULL)
287 			continue;
288 		dev_put(ndev);
289 
290 		/* don't delete empty entries */
291 		if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
292 			continue;
293 
294 		/* zero default */
295 		memset(&ipx, 0, sizeof(ipx));
296 
297 		rdma_gid2ip(&ipx.sa[0], &gid);
298 
299 		STAILQ_FOREACH(entry, &ipx_head, entry) {
300 			if (entry->ndev == ndev &&
301 			    memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
302 				break;
303 		}
304 		/* check if entry found */
305 		if (entry != NULL)
306 			continue;
307 
308 		/* remove GID */
309 		update_gid(GID_DEL, device, port, &gid, ndev);
310 	}
311 
312 	while ((entry = STAILQ_FIRST(&ipx_head))) {
313 		STAILQ_REMOVE_HEAD(&ipx_head, entry);
314 		kfree(entry);
315 	}
316 }
317 
318 static void
319 roce_gid_queue_scan_event_handler(struct work_struct *_work)
320 {
321 	struct roce_netdev_event_work *work =
322 		container_of(_work, struct roce_netdev_event_work, work);
323 
324 	ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
325 	    roce_gid_update_addr_callback, NULL);
326 
327 	dev_put(work->ndev);
328 	kfree(work);
329 }
330 
331 static void
332 roce_gid_queue_scan_event(if_t ndev)
333 {
334 	struct roce_netdev_event_work *work;
335 
336 retry:
337 	switch (if_gettype(ndev)) {
338 	case IFT_ETHER:
339 		break;
340 	case IFT_L2VLAN:
341 		ndev = rdma_vlan_dev_real_dev(ndev);
342 		if (ndev != NULL)
343 			goto retry;
344 		/* FALLTHROUGH */
345 	default:
346 		return;
347 	}
348 
349 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
350 	if (!work) {
351 		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
352 		return;
353 	}
354 
355 	INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
356 	dev_hold(ndev);
357 
358 	work->ndev = ndev;
359 
360 	queue_work(roce_gid_mgmt_wq, &work->work);
361 }
362 
363 static void
364 roce_gid_delete_all_event_handler(struct work_struct *_work)
365 {
366 	struct roce_netdev_event_work *work =
367 		container_of(_work, struct roce_netdev_event_work, work);
368 
369 	ib_cache_gid_del_all_by_netdev(work->ndev);
370 	dev_put(work->ndev);
371 	kfree(work);
372 }
373 
374 static void
375 roce_gid_delete_all_event(if_t ndev)
376 {
377 	struct roce_netdev_event_work *work;
378 
379 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
380 	if (!work) {
381 		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
382 		return;
383 	}
384 
385 	INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
386 	dev_hold(ndev);
387 	work->ndev = ndev;
388 	queue_work(roce_gid_mgmt_wq, &work->work);
389 
390 	/* make sure job is complete before returning */
391 	flush_workqueue(roce_gid_mgmt_wq);
392 }
393 
394 static int
395 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
396 {
397 	if_t ndev = netdev_notifier_info_to_ifp(ptr);
398 
399 	switch (event) {
400 	case NETDEV_UNREGISTER:
401 		roce_gid_delete_all_event(ndev);
402 		break;
403 	case NETDEV_REGISTER:
404 	case NETDEV_CHANGEADDR:
405 	case NETDEV_CHANGEIFADDR:
406 		roce_gid_queue_scan_event(ndev);
407 		break;
408 	default:
409 		break;
410 	}
411 	return NOTIFY_DONE;
412 }
413 
414 static struct notifier_block nb_inetaddr = {
415 	.notifier_call = inetaddr_event
416 };
417 
418 static eventhandler_tag eh_ifnet_event;
419 
420 static void
421 roce_ifnet_event(void *arg, if_t ifp, int event)
422 {
423 	if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp))
424 		return;
425 
426 	/* make sure GID table is reloaded */
427 	roce_gid_delete_all_event(ifp);
428 	roce_gid_queue_scan_event(ifp);
429 }
430 
431 static void
432 roce_rescan_device_handler(struct work_struct *_work)
433 {
434 	struct roce_rescan_work *work =
435 	    container_of(_work, struct roce_rescan_work, work);
436 
437 	ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
438 	    roce_gid_update_addr_callback, NULL);
439 	kfree(work);
440 }
441 
442 /* Caller must flush system workqueue before removing the ib_device */
443 int roce_rescan_device(struct ib_device *ib_dev)
444 {
445 	struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
446 
447 	if (!work)
448 		return -ENOMEM;
449 
450 	work->ib_dev = ib_dev;
451 	INIT_WORK(&work->work, roce_rescan_device_handler);
452 	queue_work(roce_gid_mgmt_wq, &work->work);
453 
454 	return 0;
455 }
456 
457 int __init roce_gid_mgmt_init(void)
458 {
459 	roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
460 	if (!roce_gid_mgmt_wq) {
461 		pr_warn("roce_gid_mgmt: can't allocate work queue\n");
462 		return -ENOMEM;
463 	}
464 
465 	register_inetaddr_notifier(&nb_inetaddr);
466 
467 	/*
468 	 * We rely on the netdevice notifier to enumerate all existing
469 	 * devices in the system. Register to this notifier last to
470 	 * make sure we will not miss any IP add/del callbacks.
471 	 */
472 	register_netdevice_notifier(&nb_inetaddr);
473 
474 	eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event,
475 	    roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
476 
477 	return 0;
478 }
479 
480 void __exit roce_gid_mgmt_cleanup(void)
481 {
482 
483 	if (eh_ifnet_event != NULL)
484 		EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event);
485 
486 	unregister_inetaddr_notifier(&nb_inetaddr);
487 	unregister_netdevice_notifier(&nb_inetaddr);
488 
489 	/*
490 	 * Ensure all gid deletion tasks complete before we go down,
491 	 * to avoid any reference to free'd memory. By the time
492 	 * ib-core is removed, all physical devices have been removed,
493 	 * so no issue with remaining hardware contexts.
494 	 */
495 	synchronize_rcu();
496 	drain_workqueue(roce_gid_mgmt_wq);
497 	destroy_workqueue(roce_gid_mgmt_wq);
498 }
499