1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
5  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
6  * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
7  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
8  *
9  * This software is available to you under a choice of one of two
10  * licenses.  You may choose to be licensed under the terms of the GNU
11  * General Public License (GPL) Version 2, available from the file
12  * COPYING in the main directory of this source tree, or the
13  * OpenIB.org BSD license below:
14  *
15  *     Redistribution and use in source and binary forms, with or
16  *     without modification, are permitted provided that the following
17  *     conditions are met:
18  *
19  *      - Redistributions of source code must retain the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer.
22  *
23  *      - Redistributions in binary form must reproduce the above
24  *        copyright notice, this list of conditions and the following
25  *        disclaimer in the documentation and/or other materials
26  *        provided with the distribution.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35  * SOFTWARE.
36  */
37 
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include <linux/mutex.h>
42 #include <linux/slab.h>
43 #include <linux/workqueue.h>
44 #include <linux/module.h>
45 #include <net/if_llatbl.h>
46 #include <net/route.h>
47 #include <net/route/nhop.h>
48 #include <net/netevent.h>
49 #include <rdma/ib_addr.h>
50 #include <rdma/ib.h>
51 
52 #include <netinet/in_fib.h>
53 #include <netinet/if_ether.h>
54 #include <netinet/ip_var.h>
55 #include <netinet6/scope6_var.h>
56 #include <netinet6/in6_pcb.h>
57 #include <netinet6/in6_fib.h>
58 
59 #include "core_priv.h"
60 
61 struct addr_req {
62 	struct list_head list;
63 	struct sockaddr_storage src_addr;
64 	struct sockaddr_storage dst_addr;
65 	struct rdma_dev_addr *addr;
66 	struct rdma_addr_client *client;
67 	void *context;
68 	void (*callback)(int status, struct sockaddr *src_addr,
69 			 struct rdma_dev_addr *addr, void *context);
70 	int timeout;
71 	int status;
72 };
73 
74 static void process_req(struct work_struct *work);
75 
76 static DEFINE_MUTEX(lock);
77 static LIST_HEAD(req_list);
78 static DECLARE_DELAYED_WORK(work, process_req);
79 static struct workqueue_struct *addr_wq;
80 
81 int rdma_addr_size(struct sockaddr *addr)
82 {
83 	switch (addr->sa_family) {
84 	case AF_INET:
85 		return sizeof(struct sockaddr_in);
86 	case AF_INET6:
87 		return sizeof(struct sockaddr_in6);
88 	case AF_IB:
89 		return sizeof(struct sockaddr_ib);
90 	default:
91 		return 0;
92 	}
93 }
94 EXPORT_SYMBOL(rdma_addr_size);
95 
96 int rdma_addr_size_in6(struct sockaddr_in6 *addr)
97 {
98 	int ret = rdma_addr_size((struct sockaddr *) addr);
99 
100 	return ret <= sizeof(*addr) ? ret : 0;
101 }
102 EXPORT_SYMBOL(rdma_addr_size_in6);
103 
104 int rdma_addr_size_kss(struct sockaddr_storage *addr)
105 {
106 	int ret = rdma_addr_size((struct sockaddr *) addr);
107 
108 	return ret <= sizeof(*addr) ? ret : 0;
109 }
110 EXPORT_SYMBOL(rdma_addr_size_kss);
111 
112 static struct rdma_addr_client self;
113 
114 void rdma_addr_register_client(struct rdma_addr_client *client)
115 {
116 	atomic_set(&client->refcount, 1);
117 	init_completion(&client->comp);
118 }
119 EXPORT_SYMBOL(rdma_addr_register_client);
120 
121 static inline void put_client(struct rdma_addr_client *client)
122 {
123 	if (atomic_dec_and_test(&client->refcount))
124 		complete(&client->comp);
125 }
126 
127 void rdma_addr_unregister_client(struct rdma_addr_client *client)
128 {
129 	put_client(client);
130 	wait_for_completion(&client->comp);
131 }
132 EXPORT_SYMBOL(rdma_addr_unregister_client);
133 
134 static inline void
135 rdma_copy_addr_sub(u8 *dst, const u8 *src, unsigned min, unsigned max)
136 {
137 	if (min > max)
138 		min = max;
139 	memcpy(dst, src, min);
140 	memset(dst + min, 0, max - min);
141 }
142 
143 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, if_t dev,
144 		     const unsigned char *dst_dev_addr)
145 {
146 	int dev_type = if_gettype(dev);
147 
148 	/* check for loopback device */
149 	if (if_getflags(dev) & IFF_LOOPBACK) {
150 		dev_addr->dev_type = ARPHRD_ETHER;
151 		memset(dev_addr->src_dev_addr, 0, MAX_ADDR_LEN);
152 		memset(dev_addr->broadcast, 0, MAX_ADDR_LEN);
153 		memset(dev_addr->dst_dev_addr, 0, MAX_ADDR_LEN);
154 		dev_addr->bound_dev_if = if_getindex(dev);
155 		return (0);
156 	} else if (dev_type == IFT_INFINIBAND)
157 		dev_addr->dev_type = ARPHRD_INFINIBAND;
158 	else if (dev_type == IFT_ETHER || dev_type == IFT_L2VLAN)
159 		dev_addr->dev_type = ARPHRD_ETHER;
160 	else
161 		dev_addr->dev_type = 0;
162 	rdma_copy_addr_sub(dev_addr->src_dev_addr, if_getlladdr(dev),
163 			   if_getaddrlen(dev), MAX_ADDR_LEN);
164 	rdma_copy_addr_sub(dev_addr->broadcast, if_getbroadcastaddr(dev),
165 			   if_getaddrlen(dev), MAX_ADDR_LEN);
166 	if (dst_dev_addr != NULL) {
167 		rdma_copy_addr_sub(dev_addr->dst_dev_addr, dst_dev_addr,
168 				   if_getaddrlen(dev), MAX_ADDR_LEN);
169 	}
170 	dev_addr->bound_dev_if = if_getindex(dev);
171 	return 0;
172 }
173 EXPORT_SYMBOL(rdma_copy_addr);
174 
175 int rdma_translate_ip(const struct sockaddr *addr,
176 		      struct rdma_dev_addr *dev_addr)
177 {
178 	if_t dev;
179 	int ret;
180 
181 	if (dev_addr->bound_dev_if) {
182 		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
183 	} else switch (addr->sa_family) {
184 #ifdef INET
185 	case AF_INET:
186 		dev = ip_ifp_find(dev_addr->net,
187 			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
188 		break;
189 #endif
190 #ifdef INET6
191 	case AF_INET6:
192 		dev = ip6_ifp_find(dev_addr->net,
193 			((const struct sockaddr_in6 *)addr)->sin6_addr, 0);
194 		break;
195 #endif
196 	default:
197 		dev = NULL;
198 		break;
199 	}
200 
201 	if (dev != NULL) {
202 		/* disallow connections through 127.0.0.1 itself */
203 		if (if_getflags(dev) & IFF_LOOPBACK)
204 			ret = -EINVAL;
205 		else
206 			ret = rdma_copy_addr(dev_addr, dev, NULL);
207 		dev_put(dev);
208 	} else {
209 		ret = -ENODEV;
210 	}
211 	return ret;
212 }
213 EXPORT_SYMBOL(rdma_translate_ip);
214 
215 static void set_timeout(int time)
216 {
217 	int delay;	/* under FreeBSD ticks are 32-bit */
218 
219 	delay = time - jiffies;
220 	if (delay <= 0)
221 		delay = 1;
222 	else if (delay > hz)
223 		delay = hz;
224 
225 	mod_delayed_work(addr_wq, &work, delay);
226 }
227 
228 static void queue_req(struct addr_req *req)
229 {
230 	struct addr_req *temp_req;
231 
232 	mutex_lock(&lock);
233 	list_for_each_entry_reverse(temp_req, &req_list, list) {
234 		if (time_after_eq(req->timeout, temp_req->timeout))
235 			break;
236 	}
237 
238 	list_add(&req->list, &temp_req->list);
239 
240 	if (req_list.next == &req->list)
241 		set_timeout(req->timeout);
242 	mutex_unlock(&lock);
243 }
244 
245 #if defined(INET) || defined(INET6)
246 static int addr_resolve_multi(u8 *edst, if_t ifp, struct sockaddr *dst_in)
247 {
248 	struct sockaddr *llsa;
249 	struct sockaddr_dl sdl;
250 	int error;
251 
252 	sdl.sdl_len = sizeof(sdl);
253 	llsa = (struct sockaddr *)&sdl;
254 
255 	error = if_resolvemulti(ifp, &llsa, dst_in);
256 	if (error == 0) {
257 		rdma_copy_addr_sub(edst, LLADDR((struct sockaddr_dl *)llsa),
258 		    if_getaddrlen(ifp), MAX_ADDR_LEN);
259 	}
260 	return (error);
261 }
262 #endif
263 
264 #ifdef INET
265 static int addr4_resolve(struct sockaddr_in *src_in,
266 			 const struct sockaddr_in *dst_in,
267 			 struct rdma_dev_addr *addr,
268 			 u8 *edst,
269 			 if_t *ifpp)
270 {
271 	enum {
272 		ADDR_VALID = 0,
273 		ADDR_SRC_ANY = 1,
274 		ADDR_DST_ANY = 2,
275 	};
276 	struct sockaddr_in dst_tmp = *dst_in;
277 	in_port_t src_port;
278 	struct sockaddr *saddr = NULL;
279 	struct nhop_object *nh;
280 	if_t ifp;
281 	int error;
282 	int type;
283 
284 	NET_EPOCH_ASSERT();
285 
286 	/* set VNET, if any */
287 	CURVNET_SET(addr->net);
288 
289 	/* set default TTL limit */
290 	addr->hoplimit = V_ip_defttl;
291 
292 	type = ADDR_VALID;
293 	if (src_in->sin_addr.s_addr == INADDR_ANY)
294 		type |= ADDR_SRC_ANY;
295 	if (dst_tmp.sin_addr.s_addr == INADDR_ANY)
296 		type |= ADDR_DST_ANY;
297 
298 	/*
299 	 * Make sure the socket address length field is set.
300 	 */
301 	dst_tmp.sin_len = sizeof(dst_tmp);
302 
303 	/* Step 1 - lookup destination route if any */
304 	switch (type) {
305 	case ADDR_VALID:
306 	case ADDR_SRC_ANY:
307 		/* regular destination route lookup */
308 		nh = fib4_lookup(RT_DEFAULT_FIB, dst_tmp.sin_addr,0,NHR_NONE,0);
309 		if (nh == NULL) {
310 			error = EHOSTUNREACH;
311 			goto done;
312 		}
313 		break;
314 	default:
315 		error = ENETUNREACH;
316 		goto done;
317 	}
318 
319 	/* Step 2 - find outgoing network interface */
320 	switch (type) {
321 	case ADDR_VALID:
322 		/* get source interface */
323 		if (addr->bound_dev_if != 0) {
324 			ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
325 		} else {
326 			ifp = ip_ifp_find(addr->net, src_in->sin_addr.s_addr);
327 		}
328 
329 		/* check source interface */
330 		if (ifp == NULL) {
331 			error = ENETUNREACH;
332 			goto done;
333 		} else if (if_getflags(ifp) & IFF_LOOPBACK) {
334 			/*
335 			 * Source address cannot be a loopback device.
336 			 */
337 			error = EHOSTUNREACH;
338 			goto error_put_ifp;
339 		} else if (if_getflags(nh->nh_ifp) & IFF_LOOPBACK) {
340 			if (memcmp(&src_in->sin_addr, &dst_in->sin_addr,
341 			    sizeof(src_in->sin_addr))) {
342 				/*
343 				 * Destination is loopback, but source
344 				 * and destination address is not the
345 				 * same.
346 				 */
347 				error = EHOSTUNREACH;
348 				goto error_put_ifp;
349 			}
350 			/* get destination network interface from route */
351 			dev_put(ifp);
352 			ifp = nh->nh_ifp;
353 			dev_hold(ifp);
354 		} else if (ifp != nh->nh_ifp) {
355 			/*
356 			 * Source and destination interfaces are
357 			 * different.
358 			 */
359 			error = ENETUNREACH;
360 			goto error_put_ifp;
361 		}
362 		break;
363 	case ADDR_SRC_ANY:
364 		/* check for loopback device */
365 		if (if_getflags(nh->nh_ifp) & IFF_LOOPBACK)
366 			saddr = (struct sockaddr *)&dst_tmp;
367 		else
368 			saddr = nh->nh_ifa->ifa_addr;
369 
370 		/* get destination network interface from route */
371 		ifp = nh->nh_ifp;
372 		dev_hold(ifp);
373 		break;
374 	default:
375 		break;
376 	}
377 
378 	/*
379 	 * Step 3 - resolve destination MAC address
380 	 */
381 	if (dst_tmp.sin_addr.s_addr == INADDR_BROADCAST) {
382 		rdma_copy_addr_sub(edst, if_getbroadcastaddr(ifp),
383 		    if_getaddrlen(ifp), MAX_ADDR_LEN);
384 		error = 0;
385 	} else if (IN_MULTICAST(ntohl(dst_tmp.sin_addr.s_addr))) {
386 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
387 		error = addr_resolve_multi(edst, ifp, (struct sockaddr *)&dst_tmp);
388 		if (error != 0)
389 			goto error_put_ifp;
390 		else if (is_gw)
391 			addr->network = RDMA_NETWORK_IPV4;
392 	} else if (if_getflags(ifp) & IFF_LOOPBACK) {
393 		memset(edst, 0, MAX_ADDR_LEN);
394 		error = 0;
395 	} else {
396 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
397 		memset(edst, 0, MAX_ADDR_LEN);
398 #ifdef INET6
399 		if (is_gw && nh->gw_sa.sa_family == AF_INET6)
400 			error = nd6_resolve(ifp, LLE_SF(AF_INET, is_gw), NULL,
401 			    &nh->gw_sa, edst, NULL, NULL);
402 		else
403 #endif
404 			error = arpresolve(ifp, is_gw, NULL, is_gw ?
405 			    &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
406 			    edst, NULL, NULL);
407 
408 		if (error != 0)
409 			goto error_put_ifp;
410 		else if (is_gw)
411 			addr->network = RDMA_NETWORK_IPV4;
412 	}
413 
414 	/*
415 	 * Step 4 - update source address, if any
416 	 */
417 	if (saddr != NULL) {
418 		src_port = src_in->sin_port;
419 		memcpy(src_in, saddr, rdma_addr_size(saddr));
420 		src_in->sin_port = src_port;	/* preserve port number */
421 	}
422 
423 	*ifpp = ifp;
424 
425 	goto done;
426 
427 error_put_ifp:
428 	dev_put(ifp);
429 done:
430 	CURVNET_RESTORE();
431 
432 	if (error == EWOULDBLOCK || error == EAGAIN)
433 		error = ENODATA;
434 	return (-error);
435 }
436 #else
437 static int addr4_resolve(struct sockaddr_in *src_in,
438 			 const struct sockaddr_in *dst_in,
439 			 struct rdma_dev_addr *addr,
440 			 u8 *edst,
441 			 if_t *ifpp)
442 {
443 	return -EADDRNOTAVAIL;
444 }
445 #endif
446 
447 #ifdef INET6
448 static int addr6_resolve(struct sockaddr_in6 *src_in,
449 			 const struct sockaddr_in6 *dst_in,
450 			 struct rdma_dev_addr *addr,
451 			 u8 *edst,
452 			 if_t *ifpp)
453 {
454 	enum {
455 		ADDR_VALID = 0,
456 		ADDR_SRC_ANY = 1,
457 		ADDR_DST_ANY = 2,
458 	};
459 	struct sockaddr_in6 dst_tmp = *dst_in;
460 	in_port_t src_port;
461 	struct sockaddr *saddr = NULL;
462 	struct nhop_object *nh;
463 	if_t ifp;
464 	int error;
465 	int type;
466 
467 	NET_EPOCH_ASSERT();
468 
469 	/* set VNET, if any */
470 	CURVNET_SET(addr->net);
471 
472 	/* set default TTL limit */
473 	addr->hoplimit = V_ip_defttl;
474 
475 	type = ADDR_VALID;
476 	if (ipv6_addr_any(&src_in->sin6_addr))
477 		type |= ADDR_SRC_ANY;
478 	if (ipv6_addr_any(&dst_tmp.sin6_addr))
479 		type |= ADDR_DST_ANY;
480 
481 	/*
482 	 * Make sure the socket address length field is set.
483 	 */
484 	dst_tmp.sin6_len = sizeof(dst_tmp);
485 
486 	/*
487 	 * Make sure the scope ID gets embedded, else nd6_resolve() will
488 	 * not find the record.
489 	 */
490 	dst_tmp.sin6_scope_id = addr->bound_dev_if;
491 	sa6_embedscope(&dst_tmp, 0);
492 
493 	/* Step 1 - lookup destination route if any */
494 	switch (type) {
495 	case ADDR_VALID:
496 		/* sanity check for IPv4 addresses */
497 		if (ipv6_addr_v4mapped(&src_in->sin6_addr) !=
498 		    ipv6_addr_v4mapped(&dst_tmp.sin6_addr)) {
499 			error = EAFNOSUPPORT;
500 			goto done;
501 		}
502 		/* FALLTHROUGH */
503 	case ADDR_SRC_ANY:
504 		/* regular destination route lookup */
505 		nh = fib6_lookup(RT_DEFAULT_FIB, &dst_in->sin6_addr,
506 		    addr->bound_dev_if, NHR_NONE, 0);
507 		if (nh == NULL) {
508 			error = EHOSTUNREACH;
509 			goto done;
510 		}
511 		break;
512 	default:
513 		error = ENETUNREACH;
514 		goto done;
515 	}
516 
517 	/* Step 2 - find outgoing network interface */
518 	switch (type) {
519 	case ADDR_VALID:
520 		/* get source interface */
521 		if (addr->bound_dev_if != 0) {
522 			ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
523 		} else {
524 			ifp = ip6_ifp_find(addr->net, src_in->sin6_addr, 0);
525 		}
526 
527 		/* check source interface */
528 		if (ifp == NULL) {
529 			error = ENETUNREACH;
530 			goto done;
531 		} else if (if_getflags(ifp) & IFF_LOOPBACK) {
532 			/*
533 			 * Source address cannot be a loopback device.
534 			 */
535 			error = EHOSTUNREACH;
536 			goto error_put_ifp;
537 		} else if (if_getflags(nh->nh_ifp) & IFF_LOOPBACK) {
538 			if (memcmp(&src_in->sin6_addr, &dst_in->sin6_addr,
539 			    sizeof(src_in->sin6_addr))) {
540 				/*
541 				 * Destination is loopback, but source
542 				 * and destination address is not the
543 				 * same.
544 				 */
545 				error = EHOSTUNREACH;
546 				goto error_put_ifp;
547 			}
548 			/* get destination network interface from route */
549 			dev_put(ifp);
550 			ifp = nh->nh_ifp;
551 			dev_hold(ifp);
552 		} else if (ifp != nh->nh_ifp) {
553 			/*
554 			 * Source and destination interfaces are
555 			 * different.
556 			 */
557 			error = ENETUNREACH;
558 			goto error_put_ifp;
559 		}
560 		break;
561 	case ADDR_SRC_ANY:
562 		/* check for loopback device */
563 		if (if_getflags(nh->nh_ifp) & IFF_LOOPBACK)
564 			saddr = (struct sockaddr *)&dst_tmp;
565 		else
566 			saddr = nh->nh_ifa->ifa_addr;
567 
568 		/* get destination network interface from route */
569 		ifp = nh->nh_ifp;
570 		dev_hold(ifp);
571 		break;
572 	default:
573 		break;
574 	}
575 
576 	/*
577 	 * Step 3 - resolve destination MAC address
578 	 */
579 	if (IN6_IS_ADDR_MULTICAST(&dst_tmp.sin6_addr)) {
580 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
581 		error = addr_resolve_multi(edst, ifp,
582 		    (struct sockaddr *)&dst_tmp);
583 		if (error != 0)
584 			goto error_put_ifp;
585 		else if (is_gw)
586 			addr->network = RDMA_NETWORK_IPV6;
587 	} else if (if_getflags(nh->nh_ifp) & IFF_LOOPBACK) {
588 		memset(edst, 0, MAX_ADDR_LEN);
589 		error = 0;
590 	} else {
591 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
592 		memset(edst, 0, MAX_ADDR_LEN);
593 		error = nd6_resolve(ifp, LLE_SF(AF_INET6, is_gw), NULL,
594 		    is_gw ? &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
595 		    edst, NULL, NULL);
596 		if (error != 0)
597 			goto error_put_ifp;
598 		else if (is_gw)
599 			addr->network = RDMA_NETWORK_IPV6;
600 	}
601 
602 	/*
603 	 * Step 4 - update source address, if any
604 	 */
605 	if (saddr != NULL) {
606 		src_port = src_in->sin6_port;
607 		memcpy(src_in, saddr, rdma_addr_size(saddr));
608 		src_in->sin6_port = src_port;	/* preserve port number */
609 	}
610 
611 	*ifpp = ifp;
612 
613 	goto done;
614 
615 error_put_ifp:
616 	dev_put(ifp);
617 done:
618 	CURVNET_RESTORE();
619 
620 	if (error == EWOULDBLOCK || error == EAGAIN)
621 		error = ENODATA;
622 	return (-error);
623 }
624 #else
625 static int addr6_resolve(struct sockaddr_in6 *src_in,
626 			 const struct sockaddr_in6 *dst_in,
627 			 struct rdma_dev_addr *addr,
628 			 u8 *edst,
629 			 if_t *ifpp)
630 {
631 	return -EADDRNOTAVAIL;
632 }
633 #endif
634 
635 static int addr_resolve_neigh(if_t dev,
636 			      const struct sockaddr *dst_in,
637 			      u8 *edst,
638 			      struct rdma_dev_addr *addr)
639 {
640 	if (if_getflags(dev) & IFF_LOOPBACK) {
641 		int ret;
642 
643 		/*
644 		 * Binding to a loopback device is not allowed. Make
645 		 * sure the destination device address is global by
646 		 * clearing the bound device interface:
647 		 */
648 		if (addr->bound_dev_if == if_getindex(dev))
649 			addr->bound_dev_if = 0;
650 
651 		ret = rdma_translate_ip(dst_in, addr);
652 		if (ret == 0) {
653 			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
654 			       MAX_ADDR_LEN);
655 		}
656 		return ret;
657 	}
658 
659 	/* If the device doesn't do ARP internally */
660 	if (!(if_getflags(dev) & IFF_NOARP))
661 		return rdma_copy_addr(addr, dev, edst);
662 
663 	return rdma_copy_addr(addr, dev, NULL);
664 }
665 
666 static int addr_resolve(struct sockaddr *src_in,
667 			const struct sockaddr *dst_in,
668 			struct rdma_dev_addr *addr)
669 {
670 	struct epoch_tracker et;
671 	if_t ndev = NULL;
672 	u8 edst[MAX_ADDR_LEN];
673 	int ret;
674 
675 	if (dst_in->sa_family != src_in->sa_family)
676 		return -EINVAL;
677 
678 	NET_EPOCH_ENTER(et);
679 	switch (src_in->sa_family) {
680 	case AF_INET:
681 		ret = addr4_resolve((struct sockaddr_in *)src_in,
682 				    (const struct sockaddr_in *)dst_in,
683 				    addr, edst, &ndev);
684 		break;
685 	case AF_INET6:
686 		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
687 				    (const struct sockaddr_in6 *)dst_in, addr,
688 				    edst, &ndev);
689 		break;
690 	default:
691 		ret = -EADDRNOTAVAIL;
692 		break;
693 	}
694 	NET_EPOCH_EXIT(et);
695 
696 	/* check for error */
697 	if (ret != 0)
698 		return ret;
699 
700 	/* store MAC addresses and check for loopback */
701 	ret = addr_resolve_neigh(ndev, dst_in, edst, addr);
702 
703 	/* set belonging VNET, if any */
704 	addr->net = dev_net(ndev);
705 	dev_put(ndev);
706 
707 	return ret;
708 }
709 
710 static void process_req(struct work_struct *work)
711 {
712 	struct addr_req *req, *temp_req;
713 	struct sockaddr *src_in, *dst_in;
714 	struct list_head done_list;
715 
716 	INIT_LIST_HEAD(&done_list);
717 
718 	mutex_lock(&lock);
719 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
720 		if (req->status == -ENODATA) {
721 			src_in = (struct sockaddr *) &req->src_addr;
722 			dst_in = (struct sockaddr *) &req->dst_addr;
723 			req->status = addr_resolve(src_in, dst_in, req->addr);
724 			if (req->status && time_after_eq(jiffies, req->timeout))
725 				req->status = -ETIMEDOUT;
726 			else if (req->status == -ENODATA)
727 				continue;
728 		}
729 		list_move_tail(&req->list, &done_list);
730 	}
731 
732 	if (!list_empty(&req_list)) {
733 		req = list_entry(req_list.next, struct addr_req, list);
734 		set_timeout(req->timeout);
735 	}
736 	mutex_unlock(&lock);
737 
738 	list_for_each_entry_safe(req, temp_req, &done_list, list) {
739 		list_del(&req->list);
740 		req->callback(req->status, (struct sockaddr *) &req->src_addr,
741 			req->addr, req->context);
742 		put_client(req->client);
743 		kfree(req);
744 	}
745 }
746 
747 int rdma_resolve_ip(struct rdma_addr_client *client,
748 		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
749 		    struct rdma_dev_addr *addr, int timeout_ms,
750 		    void (*callback)(int status, struct sockaddr *src_addr,
751 				     struct rdma_dev_addr *addr, void *context),
752 		    void *context)
753 {
754 	struct sockaddr *src_in, *dst_in;
755 	struct addr_req *req;
756 	int ret = 0;
757 
758 	req = kzalloc(sizeof *req, GFP_KERNEL);
759 	if (!req)
760 		return -ENOMEM;
761 
762 	src_in = (struct sockaddr *) &req->src_addr;
763 	dst_in = (struct sockaddr *) &req->dst_addr;
764 
765 	if (src_addr) {
766 		if (src_addr->sa_family != dst_addr->sa_family) {
767 			ret = -EINVAL;
768 			goto err;
769 		}
770 
771 		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
772 	} else {
773 		src_in->sa_family = dst_addr->sa_family;
774 	}
775 
776 	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
777 	req->addr = addr;
778 	req->callback = callback;
779 	req->context = context;
780 	req->client = client;
781 	atomic_inc(&client->refcount);
782 
783 	req->status = addr_resolve(src_in, dst_in, addr);
784 	switch (req->status) {
785 	case 0:
786 		req->timeout = jiffies;
787 		queue_req(req);
788 		break;
789 	case -ENODATA:
790 		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
791 		queue_req(req);
792 		break;
793 	default:
794 		ret = req->status;
795 		atomic_dec(&client->refcount);
796 		goto err;
797 	}
798 	return ret;
799 err:
800 	kfree(req);
801 	return ret;
802 }
803 EXPORT_SYMBOL(rdma_resolve_ip);
804 
805 int rdma_resolve_ip_route(struct sockaddr *src_addr,
806 			  const struct sockaddr *dst_addr,
807 			  struct rdma_dev_addr *addr)
808 {
809 	struct sockaddr_storage ssrc_addr = {};
810 	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
811 
812 	if (src_addr) {
813 		if (src_addr->sa_family != dst_addr->sa_family)
814 			return -EINVAL;
815 
816 		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
817 	} else {
818 		src_in->sa_family = dst_addr->sa_family;
819 	}
820 
821 	return addr_resolve(src_in, dst_addr, addr);
822 }
823 EXPORT_SYMBOL(rdma_resolve_ip_route);
824 
825 void rdma_addr_cancel(struct rdma_dev_addr *addr)
826 {
827 	struct addr_req *req, *temp_req;
828 
829 	mutex_lock(&lock);
830 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
831 		if (req->addr == addr) {
832 			req->status = -ECANCELED;
833 			req->timeout = jiffies;
834 			list_move(&req->list, &req_list);
835 			set_timeout(req->timeout);
836 			break;
837 		}
838 	}
839 	mutex_unlock(&lock);
840 }
841 EXPORT_SYMBOL(rdma_addr_cancel);
842 
843 struct resolve_cb_context {
844 	struct rdma_dev_addr *addr;
845 	struct completion comp;
846 	int status;
847 };
848 
849 static void resolve_cb(int status, struct sockaddr *src_addr,
850 	     struct rdma_dev_addr *addr, void *context)
851 {
852 	if (!status)
853 		memcpy(((struct resolve_cb_context *)context)->addr,
854 		       addr, sizeof(struct rdma_dev_addr));
855 	((struct resolve_cb_context *)context)->status = status;
856 	complete(&((struct resolve_cb_context *)context)->comp);
857 }
858 
859 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
860 				 const union ib_gid *dgid,
861 				 u8 *dmac, if_t dev,
862 				 int *hoplimit)
863 {
864 	int ret = 0;
865 	struct rdma_dev_addr dev_addr;
866 	struct resolve_cb_context ctx;
867 
868 	union rdma_sockaddr sgid_addr, dgid_addr;
869 
870 	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
871 	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
872 
873 	memset(&dev_addr, 0, sizeof(dev_addr));
874 
875 	dev_addr.bound_dev_if = if_getindex(dev);
876 	dev_addr.net = dev_net(dev);
877 
878 	ctx.addr = &dev_addr;
879 	init_completion(&ctx.comp);
880 	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
881 			&dev_addr, 1000, resolve_cb, &ctx);
882 	if (ret)
883 		return ret;
884 
885 	wait_for_completion(&ctx.comp);
886 
887 	ret = ctx.status;
888 	if (ret)
889 		return ret;
890 
891 	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
892 	if (hoplimit)
893 		*hoplimit = dev_addr.hoplimit;
894 	return ret;
895 }
896 EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
897 
898 int addr_init(void)
899 {
900 	addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0);
901 	if (!addr_wq)
902 		return -ENOMEM;
903 
904 	rdma_addr_register_client(&self);
905 
906 	return 0;
907 }
908 
909 void addr_cleanup(void)
910 {
911 	rdma_addr_unregister_client(&self);
912 	destroy_workqueue(addr_wq);
913 }
914