1 /*
2  * Copyright (c) 2015-2017 Intel Corporation. All rights reserved.
3  * Copyright (c) 2017, Cisco Systems, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include "config.h"
35 
36 #include <arpa/inet.h>
37 #include <ctype.h>
38 #include <stdlib.h>
39 #include <stdio.h>
40 #include <sys/socket.h>
41 #include <sys/types.h>
42 #include <netdb.h>
43 #include <netinet/in.h>
44 #include <inttypes.h>
45 
46 #if HAVE_GETIFADDRS
47 #include <net/if.h>
48 #include <ifaddrs.h>
49 #endif
50 
51 #include <ofi_util.h>
52 
53 
54 enum {
55 	UTIL_NO_ENTRY = -1,
56 	UTIL_DEFAULT_AV_SIZE = 1024,
57 };
58 
fi_get_src_sockaddr(const struct sockaddr * dest_addr,size_t dest_addrlen,struct sockaddr ** src_addr,size_t * src_addrlen)59 static int fi_get_src_sockaddr(const struct sockaddr *dest_addr, size_t dest_addrlen,
60 			       struct sockaddr **src_addr, size_t *src_addrlen)
61 {
62 	socklen_t len; /* needed for OS compatability */
63 	int sock, ret;
64 
65 	sock = socket(dest_addr->sa_family, SOCK_DGRAM, 0);
66 	if (sock < 0)
67 		return -errno;
68 
69 	ret = connect(sock, dest_addr, dest_addrlen);
70 	if (ret)
71 		goto out;
72 
73 	*src_addr = calloc(dest_addrlen, 1);
74 	if (!*src_addr) {
75 		ret = -FI_ENOMEM;
76 		goto out;
77 	}
78 
79 	len = (socklen_t) dest_addrlen;
80 	ret = getsockname(sock, *src_addr, &len);
81 	if (ret) {
82 		ret = -errno;
83 		goto out;
84 	}
85 	*src_addrlen = len;
86 
87 	switch ((*src_addr)->sa_family) {
88 	case AF_INET:
89 		((struct sockaddr_in *) (*src_addr))->sin_port = 0;
90 		break;
91 	case AF_INET6:
92 		((struct sockaddr_in6 *) (*src_addr))->sin6_port = 0;
93 		break;
94 	default:
95 		ret = -FI_ENOSYS;
96 		break;
97 	}
98 
99 out:
100 	ofi_close_socket(sock);
101 	return ret;
102 
103 }
104 
ofi_getnodename(uint16_t sa_family,char * buf,int buflen)105 void ofi_getnodename(uint16_t sa_family, char *buf, int buflen)
106 {
107 	int ret;
108 	struct addrinfo ai, *rai = NULL;
109 	struct ifaddrs *ifaddrs, *ifa;
110 
111 	assert(buf && buflen > 0);
112 	ret = gethostname(buf, buflen);
113 	buf[buflen - 1] = '\0';
114 	if (ret == 0) {
115 		memset(&ai, 0, sizeof(ai));
116 		ai.ai_family = sa_family  ? sa_family : AF_INET;
117 		ret = getaddrinfo(buf, NULL, &ai, &rai);
118 		if (!ret) {
119 			freeaddrinfo(rai);
120 			return;
121 		}
122 	}
123 
124 #if HAVE_GETIFADDRS
125 	ret = ofi_getifaddrs(&ifaddrs);
126 	if (!ret) {
127 		for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) {
128 			if (ifa->ifa_addr == NULL || !(ifa->ifa_flags & IFF_UP))
129 				continue;
130 
131 			if (sa_family) {
132 				if (ifa->ifa_addr->sa_family != sa_family)
133 					continue;
134 			} else if ((ifa->ifa_addr->sa_family != AF_INET) &&
135 				   (ifa->ifa_addr->sa_family != AF_INET6)) {
136 				continue;
137 			}
138 
139 			ret = getnameinfo(ifa->ifa_addr, ofi_sizeofaddr(ifa->ifa_addr),
140 				  	  buf, buflen, NULL, 0, NI_NUMERICHOST);
141 			buf[buflen - 1] = '\0';
142 			if (ret == 0) {
143 				freeifaddrs(ifaddrs);
144 				return;
145 			}
146 		}
147 		freeifaddrs(ifaddrs);
148 	}
149 #endif
150 	/* no reasonable address found, use ipv4 loopback */
151 	strncpy(buf, "127.0.0.1", buflen);
152 	buf[buflen - 1] = '\0';
153 }
154 
ofi_get_src_addr(uint32_t addr_format,const void * dest_addr,size_t dest_addrlen,void ** src_addr,size_t * src_addrlen)155 int ofi_get_src_addr(uint32_t addr_format,
156 		    const void *dest_addr, size_t dest_addrlen,
157 		    void **src_addr, size_t *src_addrlen)
158 {
159 	switch (addr_format) {
160 	case FI_SOCKADDR:
161 	case FI_SOCKADDR_IN:
162 	case FI_SOCKADDR_IN6:
163 		return fi_get_src_sockaddr(dest_addr, dest_addrlen,
164 					   (struct sockaddr **) src_addr,
165 					   src_addrlen);
166 	default:
167 		return -FI_ENOSYS;
168 	}
169 }
170 
fi_get_sockaddr(int * sa_family,uint64_t flags,const char * node,const char * service,struct sockaddr ** addr,size_t * addrlen)171 static int fi_get_sockaddr(int *sa_family, uint64_t flags,
172 			   const char *node, const char *service,
173 			   struct sockaddr **addr, size_t *addrlen)
174 {
175 	struct addrinfo hints, *ai;
176 	int ret;
177 
178 	memset(&hints, 0, sizeof hints);
179 	hints.ai_family = *sa_family;
180 	hints.ai_socktype = SOCK_STREAM;
181 	if (flags & FI_SOURCE)
182 		hints.ai_flags = AI_PASSIVE;
183 
184 	ret = getaddrinfo(node, service, &hints, &ai);
185 	if (ret)
186 		return -FI_ENODATA;
187 
188 	*addr = mem_dup(ai->ai_addr, ai->ai_addrlen);
189 	if (!*addr) {
190 		ret = -FI_ENOMEM;
191 		goto out;
192 	}
193 
194 	*sa_family = ai->ai_family;
195 	*addrlen = ai->ai_addrlen;
196 out:
197 	freeaddrinfo(ai);
198 	return ret;
199 }
200 
ofi_get_str_addr(const char * node,const char * service,char ** addr,size_t * addrlen)201 void ofi_get_str_addr(const char *node, const char *service,
202 		      char **addr, size_t *addrlen)
203 {
204 	if (!node || !strstr(node, "://"))
205 		return;
206 
207 	*addr = strdup(node);
208 	*addrlen = strlen(node) + 1;
209 }
210 
ofi_get_addr(uint32_t * addr_format,uint64_t flags,const char * node,const char * service,void ** addr,size_t * addrlen)211 int ofi_get_addr(uint32_t *addr_format, uint64_t flags,
212 		const char *node, const char *service,
213 		void **addr, size_t *addrlen)
214 {
215 	int sa_family, ret;
216 
217 	switch (*addr_format) {
218 	case FI_SOCKADDR:
219 		sa_family = 0;
220 		ret = fi_get_sockaddr(&sa_family, flags, node, service,
221 				      (struct sockaddr **) addr, addrlen);
222 		if (ret)
223 			return ret;
224 		*addr_format = sa_family == AF_INET ?
225 			       FI_SOCKADDR_IN : FI_SOCKADDR_IN6;
226 		return 0;
227 	case FI_SOCKADDR_IN:
228 		sa_family = AF_INET;
229 		return fi_get_sockaddr(&sa_family, flags, node, service,
230 				       (struct sockaddr **) addr, addrlen);
231 	case FI_SOCKADDR_IN6:
232 		sa_family = AF_INET6;
233 		return fi_get_sockaddr(&sa_family, flags, node, service,
234 				       (struct sockaddr **) addr, addrlen);
235 	case FI_ADDR_STR:
236 		ofi_get_str_addr(node, service, (char **) addr, addrlen);
237 		return 0;
238 	default:
239 		return -FI_ENOSYS;
240 	}
241 }
242 
ofi_av_get_addr(struct util_av * av,fi_addr_t fi_addr)243 void *ofi_av_get_addr(struct util_av *av, fi_addr_t fi_addr)
244 {
245 	struct util_av_entry *entry;
246 
247 	entry = ofi_bufpool_get_ibuf(av->av_entry_pool, fi_addr);
248 	return entry->addr;
249 }
250 
ofi_verify_av_insert(struct util_av * av,uint64_t flags)251 int ofi_verify_av_insert(struct util_av *av, uint64_t flags)
252 {
253 	if ((av->flags & FI_EVENT) && !av->eq) {
254 		FI_WARN(av->prov, FI_LOG_AV, "no EQ bound to AV\n");
255 		return -FI_ENOEQ;
256 	}
257 
258 	if (flags & ~(FI_MORE)) {
259 		FI_WARN(av->prov, FI_LOG_AV, "unsupported flags\n");
260 		return -FI_ENOEQ;
261 	}
262 
263 	return 0;
264 }
265 
266 /*
267  * Must hold AV lock
268  */
ofi_av_insert_addr(struct util_av * av,const void * addr,fi_addr_t * fi_addr)269 int ofi_av_insert_addr(struct util_av *av, const void *addr, fi_addr_t *fi_addr)
270 {
271 	struct util_av_entry *entry = NULL;
272 
273 	HASH_FIND(hh, av->hash, addr, av->addrlen, entry);
274 	if (entry) {
275 		if (fi_addr)
276 			*fi_addr = ofi_buf_index(entry);
277 		ofi_atomic_inc32(&entry->use_cnt);
278 		return 0;
279 	} else {
280 		entry = ofi_ibuf_alloc(av->av_entry_pool);
281 		if (!entry)
282 			return -FI_ENOMEM;
283 		if (fi_addr)
284 			*fi_addr = ofi_buf_index(entry);
285 		memcpy(entry->addr, addr, av->addrlen);
286 		ofi_atomic_initialize32(&entry->use_cnt, 1);
287 		HASH_ADD(hh, av->hash, addr, av->addrlen, entry);
288 	}
289 	return 0;
290 }
291 
ofi_av_elements_iter(struct util_av * av,ofi_av_apply_func apply,void * arg)292 int ofi_av_elements_iter(struct util_av *av, ofi_av_apply_func apply, void *arg)
293 {
294 	struct util_av_entry *av_entry = NULL, *av_entry_tmp = NULL;
295 	int ret;
296 
297 	HASH_ITER(hh, av->hash, av_entry, av_entry_tmp) {
298 		ret = apply(av, av_entry->addr,
299 			    ofi_buf_index(av_entry), arg);
300 		if (OFI_UNLIKELY(ret))
301 			return ret;
302 	}
303 	return 0;
304 }
305 
306 /*
307  * Must hold AV lock
308  */
ofi_av_remove_addr(struct util_av * av,fi_addr_t fi_addr)309 int ofi_av_remove_addr(struct util_av *av, fi_addr_t fi_addr)
310 {
311 	struct util_av_entry *av_entry;
312 
313 	av_entry = ofi_bufpool_get_ibuf(av->av_entry_pool, fi_addr);
314 	if (!av_entry)
315 		return -FI_ENOENT;
316 
317 	if (ofi_atomic_dec32(&av_entry->use_cnt))
318 		return FI_SUCCESS;
319 
320 	HASH_DELETE(hh, av->hash, av_entry);
321 	ofi_ibuf_free(av_entry);
322 	return 0;
323 }
324 
ofi_av_lookup_fi_addr_unsafe(struct util_av * av,const void * addr)325 fi_addr_t ofi_av_lookup_fi_addr_unsafe(struct util_av *av, const void *addr)
326 {
327 	struct util_av_entry *entry = NULL;
328 
329 	HASH_FIND(hh, av->hash, addr, av->addrlen, entry);
330 	return entry ? ofi_buf_index(entry) : FI_ADDR_NOTAVAIL;
331 }
332 
ofi_av_lookup_fi_addr(struct util_av * av,const void * addr)333 fi_addr_t ofi_av_lookup_fi_addr(struct util_av *av, const void *addr)
334 {
335 	fi_addr_t fi_addr;
336 	fastlock_acquire(&av->lock);
337 	fi_addr = ofi_av_lookup_fi_addr_unsafe(av, addr);
338 	fastlock_release(&av->lock);
339 	return fi_addr;
340 }
341 
342 static void *
ofi_av_lookup_addr(struct util_av * av,fi_addr_t fi_addr,size_t * addrlen)343 ofi_av_lookup_addr(struct util_av *av, fi_addr_t fi_addr, size_t *addrlen)
344 {
345 	*addrlen = av->addrlen;
346 	return ofi_av_get_addr(av, fi_addr);
347 }
348 
ofi_av_bind(struct fid * av_fid,struct fid * eq_fid,uint64_t flags)349 int ofi_av_bind(struct fid *av_fid, struct fid *eq_fid, uint64_t flags)
350 {
351 	struct util_av *av;
352 	struct util_eq *eq;
353 
354 	av = container_of(av_fid, struct util_av, av_fid.fid);
355 	if (eq_fid->fclass != FI_CLASS_EQ) {
356 		FI_WARN(av->prov, FI_LOG_AV, "invalid fid class\n");
357 		return -FI_EINVAL;
358 	}
359 
360 	if (!(av->flags & FI_EVENT)) {
361 		FI_WARN(av->prov, FI_LOG_AV, "cannot bind EQ to an AV that was "
362 			"configured for synchronous operation: FI_EVENT flag was"
363 			" not specified in fi_av_attr when AV was opened\n");
364 		return -FI_EINVAL;
365 	}
366 
367 	if (flags) {
368 		FI_WARN(av->prov, FI_LOG_AV, "invalid flags\n");
369 		return -FI_EINVAL;
370 	}
371 
372 	eq = container_of(eq_fid, struct util_eq, eq_fid.fid);
373 	av->eq = eq;
374 	ofi_atomic_inc32(&eq->ref);
375 	return 0;
376 }
377 
util_av_close(struct util_av * av)378 static void util_av_close(struct util_av *av)
379 {
380 	HASH_CLEAR(hh, av->hash);
381 	ofi_bufpool_destroy(av->av_entry_pool);
382 }
383 
ofi_av_close_lightweight(struct util_av * av)384 int ofi_av_close_lightweight(struct util_av *av)
385 {
386 	if (ofi_atomic_get32(&av->ref)) {
387 		FI_WARN(av->prov, FI_LOG_AV, "AV is busy\n");
388 		return -FI_EBUSY;
389 	}
390 
391 	if (av->eq)
392 		ofi_atomic_dec32(&av->eq->ref);
393 
394 	fastlock_destroy(&av->ep_list_lock);
395 
396 	ofi_atomic_dec32(&av->domain->ref);
397 	fastlock_destroy(&av->lock);
398 
399 	return 0;
400 }
401 
ofi_av_close(struct util_av * av)402 int ofi_av_close(struct util_av *av)
403 {
404 	int ret = ofi_av_close_lightweight(av);
405 	if (ret)
406 		return ret;
407 	util_av_close(av);
408 	return 0;
409 }
410 
util_verify_av_util_attr(struct util_domain * domain,const struct util_av_attr * util_attr)411 static int util_verify_av_util_attr(struct util_domain *domain,
412 				    const struct util_av_attr *util_attr)
413 {
414 	if (util_attr->flags) {
415 		FI_WARN(domain->prov, FI_LOG_AV, "invalid internal flags\n");
416 		return -FI_EINVAL;
417 	}
418 
419 	return 0;
420 }
421 
util_av_init(struct util_av * av,const struct fi_av_attr * attr,const struct util_av_attr * util_attr)422 static int util_av_init(struct util_av *av, const struct fi_av_attr *attr,
423 			const struct util_av_attr *util_attr)
424 {
425 	int ret = 0;
426 	size_t max_count;
427 	struct ofi_bufpool_attr pool_attr = {
428 		.size		= util_attr->addrlen +
429 				  sizeof(struct util_av_entry),
430 		.alignment	= 16,
431 		.max_cnt	= 0,
432 		/* Don't use track of buffer, because user can close
433 		 * the AV without prior deletion of addresses */
434 		.flags		= OFI_BUFPOOL_NO_TRACK | OFI_BUFPOOL_INDEXED |
435 				  OFI_BUFPOOL_HUGEPAGES,
436 	};
437 
438 	/* TODO: Handle FI_READ */
439 	/* TODO: Handle mmap - shared AV */
440 
441 	ret = util_verify_av_util_attr(av->domain, util_attr);
442 	if (ret)
443 		return ret;
444 
445 	if (attr->count) {
446 		max_count = attr->count;
447 	} else {
448 		if (fi_param_get_size_t(NULL, "universe_size", &max_count))
449 			max_count = UTIL_DEFAULT_AV_SIZE;
450 	}
451 
452 	av->count = roundup_power_of_two(max_count ?
453 					 max_count :
454 					 UTIL_DEFAULT_AV_SIZE);
455 	FI_INFO(av->prov, FI_LOG_AV, "AV size %zu\n", av->count);
456 
457 	av->addrlen = util_attr->addrlen;
458 	av->flags = util_attr->flags | attr->flags;
459 	av->hash = NULL;
460 
461 	pool_attr.chunk_cnt = av->count;
462 	return ofi_bufpool_create_attr(&pool_attr, &av->av_entry_pool);
463 }
464 
util_verify_av_attr(struct util_domain * domain,const struct fi_av_attr * attr)465 static int util_verify_av_attr(struct util_domain *domain,
466 			       const struct fi_av_attr *attr)
467 {
468 	switch (attr->type) {
469 	case FI_AV_MAP:
470 	case FI_AV_TABLE:
471 		if ((domain->av_type != FI_AV_UNSPEC) &&
472 		    (attr->type != domain->av_type)) {
473 			FI_INFO(domain->prov, FI_LOG_AV, "Invalid AV type\n");
474 		   	return -FI_EINVAL;
475 		}
476 		break;
477 	default:
478 		FI_WARN(domain->prov, FI_LOG_AV, "invalid av type\n");
479 		return -FI_EINVAL;
480 	}
481 
482 	if (attr->name) {
483 		FI_WARN(domain->prov, FI_LOG_AV, "Shared AV is unsupported\n");
484 		return -FI_ENOSYS;
485 	}
486 
487 	if (attr->flags & ~(FI_EVENT | FI_READ | FI_SYMMETRIC)) {
488 		FI_WARN(domain->prov, FI_LOG_AV, "invalid flags\n");
489 		return -FI_EINVAL;
490 	}
491 
492 	return 0;
493 }
494 
ofi_av_init_lightweight(struct util_domain * domain,const struct fi_av_attr * attr,struct util_av * av,void * context)495 int ofi_av_init_lightweight(struct util_domain *domain, const struct fi_av_attr *attr,
496 			    struct util_av *av, void *context)
497 {
498 	int ret;
499 
500 	ret = util_verify_av_attr(domain, attr);
501 	if (ret)
502 		return ret;
503 
504 	av->prov = domain->prov;
505 	ofi_atomic_initialize32(&av->ref, 0);
506 	fastlock_init(&av->lock);
507 	av->av_fid.fid.fclass = FI_CLASS_AV;
508 	/*
509 	 * ops set by provider
510 	 * av->av_fid.fid.ops = &prov_av_fi_ops;
511 	 * av->av_fid.ops = &prov_av_ops;
512 	 */
513 	av->context = context;
514 	av->domain = domain;
515 	fastlock_init(&av->ep_list_lock);
516 	dlist_init(&av->ep_list);
517 	ofi_atomic_inc32(&domain->ref);
518 	return 0;
519 }
520 
ofi_av_init(struct util_domain * domain,const struct fi_av_attr * attr,const struct util_av_attr * util_attr,struct util_av * av,void * context)521 int ofi_av_init(struct util_domain *domain, const struct fi_av_attr *attr,
522 		const struct util_av_attr *util_attr,
523 		struct util_av *av, void *context)
524 {
525 	int ret = ofi_av_init_lightweight(domain, attr, av, context);
526 	if (ret)
527 		return ret;
528 
529 	ret = util_av_init(av, attr, util_attr);
530 	if (ret)
531 		return ret;
532 	return ret;
533 }
534 
ofi_av_write_event(struct util_av * av,uint64_t data,int err,void * context)535 void ofi_av_write_event(struct util_av *av, uint64_t data,
536 			int err, void *context)
537 {
538 	struct fi_eq_err_entry entry = { 0 };
539 	size_t size;
540 	ssize_t ret;
541 	uint64_t flags;
542 
543 	entry.fid = &av->av_fid.fid;
544 	entry.context = context;
545 	entry.data = data;
546 
547 	if (err) {
548 		FI_INFO(av->prov, FI_LOG_AV, "writing error entry to EQ\n");
549 		entry.err = err;
550 		size = sizeof(struct fi_eq_err_entry);
551 		flags = UTIL_FLAG_ERROR;
552 	} else {
553 		FI_DBG(av->prov, FI_LOG_AV, "writing entry to EQ\n");
554 		size = sizeof(struct fi_eq_entry);
555 		flags = 0;
556 	}
557 
558 	ret = fi_eq_write(&av->eq->eq_fid, FI_AV_COMPLETE, &entry,
559 			  size, flags);
560 	if (ret != size)
561 		FI_WARN(av->prov, FI_LOG_AV, "error writing to EQ\n");
562 }
563 
564 /*************************************************************************
565  *
566  * AV for IP addressing
567  *
568  *************************************************************************/
569 
ofi_ip_av_get_fi_addr(struct util_av * av,const void * addr)570 fi_addr_t ofi_ip_av_get_fi_addr(struct util_av *av, const void *addr)
571 {
572 	return ofi_av_lookup_fi_addr(av, addr);
573 }
574 
ip_av_valid_addr(struct util_av * av,const void * addr)575 static int ip_av_valid_addr(struct util_av *av, const void *addr)
576 {
577 	const struct sockaddr_in *sin = addr;
578 	const struct sockaddr_in6 *sin6 = addr;
579 
580 	switch (sin->sin_family) {
581 	case AF_INET:
582 		return sin->sin_port && sin->sin_addr.s_addr;
583 	case AF_INET6:
584 		return sin6->sin6_port &&
585 		      memcmp(&in6addr_any, &sin6->sin6_addr, sizeof(in6addr_any));
586 	default:
587 		return 0;
588 	}
589 }
590 
ip_av_insert_addr(struct util_av * av,const void * addr,fi_addr_t * fi_addr,void * context)591 static int ip_av_insert_addr(struct util_av *av, const void *addr,
592 			     fi_addr_t *fi_addr, void *context)
593 {
594 	int ret;
595 	fi_addr_t fi_addr_ret;
596 
597 	if (ip_av_valid_addr(av, addr)) {
598 		fastlock_acquire(&av->lock);
599 		ret = ofi_av_insert_addr(av, addr, &fi_addr_ret);
600 		fastlock_release(&av->lock);
601 	} else {
602 		ret = -FI_EADDRNOTAVAIL;
603 		FI_WARN(av->prov, FI_LOG_AV, "invalid address\n");
604 	}
605 
606 	if (fi_addr)
607 		*fi_addr = !ret ? fi_addr_ret : FI_ADDR_NOTAVAIL;
608 
609 	ofi_straddr_dbg(av->prov, FI_LOG_AV, "av_insert addr", addr);
610 	if (fi_addr)
611 		FI_DBG(av->prov, FI_LOG_AV, "av_insert fi_addr: %" PRIu64 "\n",
612 		       *fi_addr);
613 
614 	return ret;
615 }
616 
ofi_ip_av_insertv(struct util_av * av,const void * addr,size_t addrlen,size_t count,fi_addr_t * fi_addr,void * context)617 int ofi_ip_av_insertv(struct util_av *av, const void *addr, size_t addrlen,
618 		      size_t count, fi_addr_t *fi_addr, void *context)
619 {
620 	int ret, success_cnt = 0;
621 	size_t i;
622 
623 	FI_DBG(av->prov, FI_LOG_AV, "inserting %zu addresses\n", count);
624 	for (i = 0; i < count; i++) {
625 		ret = ip_av_insert_addr(av, (const char *) addr + i * addrlen,
626 					fi_addr ? &fi_addr[i] : NULL, context);
627 		if (!ret)
628 			success_cnt++;
629 		else if (av->eq)
630 			ofi_av_write_event(av, i, -ret, context);
631 	}
632 
633 	FI_DBG(av->prov, FI_LOG_AV, "%d addresses successful\n", success_cnt);
634 	if (av->eq) {
635 		ofi_av_write_event(av, success_cnt, 0, context);
636 		ret = 0;
637 	} else {
638 		ret = success_cnt;
639 	}
640 	return ret;
641 }
642 
ofi_ip_av_insert(struct fid_av * av_fid,const void * addr,size_t count,fi_addr_t * fi_addr,uint64_t flags,void * context)643 int ofi_ip_av_insert(struct fid_av *av_fid, const void *addr, size_t count,
644 		     fi_addr_t *fi_addr, uint64_t flags, void *context)
645 {
646 	struct util_av *av;
647 	int ret;
648 
649 	av = container_of(av_fid, struct util_av, av_fid);
650 	ret = ofi_verify_av_insert(av, flags);
651 	if (ret)
652 		return ret;
653 
654 	return ofi_ip_av_insertv(av, addr, ofi_sizeofaddr(addr),
655 				 count, fi_addr, context);
656 }
657 
ip_av_insertsvc(struct fid_av * av,const char * node,const char * service,fi_addr_t * fi_addr,uint64_t flags,void * context)658 static int ip_av_insertsvc(struct fid_av *av, const char *node,
659 			   const char *service, fi_addr_t *fi_addr,
660 			   uint64_t flags, void *context)
661 {
662 	return fi_av_insertsym(av, node, 1, service, 1, fi_addr, flags, context);
663 }
664 
665 /* Caller should free *addr */
666 static int
ip_av_ip4sym_getaddr(struct util_av * av,struct in_addr ip,size_t ipcnt,uint16_t port,size_t portcnt,void ** addr,size_t * addrlen)667 ip_av_ip4sym_getaddr(struct util_av *av, struct in_addr ip, size_t ipcnt,
668 		     uint16_t port, size_t portcnt, void **addr, size_t *addrlen)
669 {
670 	struct sockaddr_in *sin;
671 	int count = ipcnt * portcnt;
672 	size_t i, p, k;
673 
674 	*addrlen = sizeof(*sin);
675 	sin = calloc(count, *addrlen);
676 	if (!sin)
677 		return -FI_ENOMEM;
678 
679 	for (i = 0, k = 0; i < ipcnt; i++) {
680 		for (p = 0; p < portcnt; p++, k++) {
681 			sin[k].sin_family = AF_INET;
682 			/* TODO: should we skip addresses x.x.x.0 and x.x.x.255? */
683 			sin[k].sin_addr.s_addr = htonl(ntohl(ip.s_addr) + i);
684 			sin[k].sin_port = htons(port + p);
685 		}
686 	}
687 	*addr = sin;
688 	return count;
689 }
690 
691 /* Caller should free *addr */
692 static int
ip_av_ip6sym_getaddr(struct util_av * av,struct in6_addr ip,size_t ipcnt,uint16_t port,size_t portcnt,void ** addr,size_t * addrlen)693 ip_av_ip6sym_getaddr(struct util_av *av, struct in6_addr ip, size_t ipcnt,
694 		     uint16_t port, size_t portcnt, void **addr, size_t *addrlen)
695 {
696 	struct sockaddr_in6 *sin6, sin6_temp;
697 	int j, count = ipcnt * portcnt;
698 	size_t i, p, k;
699 
700 	*addrlen = sizeof(*sin6);
701 	sin6 = calloc(count, *addrlen);
702 	if (!sin6)
703 		return -FI_ENOMEM;
704 
705 	sin6_temp.sin6_addr = ip;
706 
707 	for (i = 0, k = 0; i < ipcnt; i++) {
708 		for (p = 0; p < portcnt; p++, k++) {
709 			sin6[k].sin6_family = AF_INET6;
710 			sin6[k].sin6_addr = sin6_temp.sin6_addr;
711 			sin6[k].sin6_port = htons(port + p);
712 		}
713 		/* TODO: should we skip addresses x::0 and x::255? */
714 		for (j = 15; j >= 0; j--) {
715 			if (++sin6_temp.sin6_addr.s6_addr[j] < 255)
716 				break;
717 		}
718 	}
719 	*addr = sin6;
720 	return count;
721 }
722 
723 /* Caller should free *addr */
ip_av_nodesym_getaddr(struct util_av * av,const char * node,size_t nodecnt,const char * service,size_t svccnt,void ** addr,size_t * addrlen)724 static int ip_av_nodesym_getaddr(struct util_av *av, const char *node,
725 				 size_t nodecnt, const char *service,
726 				 size_t svccnt, void **addr, size_t *addrlen)
727 {
728 	struct addrinfo hints, *ai;
729 	void *addr_temp;
730 	char name[FI_NAME_MAX];
731 	char svc[FI_NAME_MAX];
732 	size_t name_len, n, s;
733 	int ret, name_index, svc_index, count = nodecnt * svccnt;
734 
735 	memset(&hints, 0, sizeof hints);
736 
737 	hints.ai_socktype = SOCK_DGRAM;
738 	switch (av->domain->addr_format) {
739 	case FI_SOCKADDR_IN:
740 		hints.ai_family = AF_INET;
741 		*addrlen = sizeof(struct sockaddr_in);
742 		break;
743 	case FI_SOCKADDR_IN6:
744 		hints.ai_family = AF_INET6;
745 		*addrlen = sizeof(struct sockaddr_in6);
746 		break;
747 	default:
748 		FI_INFO(av->prov, FI_LOG_AV, "Unknown address format!\n");
749 		return -FI_EINVAL;
750 	}
751 
752 	*addr = calloc(nodecnt * svccnt, *addrlen);
753 	if (!*addr)
754 		return -FI_ENOMEM;
755 
756 	addr_temp = *addr;
757 
758 	for (name_len = strlen(node); isdigit(node[name_len - 1]); )
759 		name_len--;
760 
761 	memcpy(name, node, name_len);
762 	name_index = atoi(node + name_len);
763 	svc_index = atoi(service);
764 
765 	for (n = 0; n < nodecnt; n++) {
766 		if (nodecnt == 1) {
767 			strncpy(name, node, sizeof(name) - 1);
768 			name[FI_NAME_MAX - 1] = '\0';
769 		} else {
770 			snprintf(name + name_len, sizeof(name) - name_len - 1,
771 				 "%zu", name_index + n);
772 		}
773 
774 		for (s = 0; s < svccnt; s++) {
775 			if (svccnt == 1) {
776 				strncpy(svc, service, sizeof(svc) - 1);
777 				svc[FI_NAME_MAX - 1] = '\0';
778 			} else {
779 				snprintf(svc, sizeof(svc) - 1,
780 					 "%zu", svc_index + s);
781 			}
782 			FI_INFO(av->prov, FI_LOG_AV, "resolving %s:%s for AV "
783 				"insert\n", node, service);
784 
785 			ret = getaddrinfo(node, service, &hints, &ai);
786 			if (ret)
787 				goto err;
788 
789 			memcpy(addr_temp, ai->ai_addr, *addrlen);
790 			addr_temp = (char *)addr_temp + *addrlen;
791 			freeaddrinfo(ai);
792 		}
793 	}
794 	return count;
795 err:
796 	free(*addr);
797 	return ret;
798 }
799 
800 /* Caller should free *addr */
ofi_ip_av_sym_getaddr(struct util_av * av,const char * node,size_t nodecnt,const char * service,size_t svccnt,void ** addr,size_t * addrlen)801 int ofi_ip_av_sym_getaddr(struct util_av *av, const char *node,
802 			  size_t nodecnt, const char *service,
803 			  size_t svccnt, void **addr, size_t *addrlen)
804 {
805 	struct in6_addr ip6;
806 	struct in_addr ip4;
807 	int ret;
808 
809 	if (strlen(node) >= FI_NAME_MAX || strlen(service) >= FI_NAME_MAX) {
810 		FI_WARN(av->prov, FI_LOG_AV,
811 			"node or service name is too long\n");
812 		return -FI_ENOSYS;
813 	}
814 
815 	ret = inet_pton(AF_INET, node, &ip4);
816 	if (ret == 1) {
817 		FI_INFO(av->prov, FI_LOG_AV, "insert symmetric IPv4\n");
818 		return ip_av_ip4sym_getaddr(av, ip4, nodecnt,
819 					  (uint16_t) strtol(service, NULL, 0),
820 					  svccnt, addr, addrlen);
821 	}
822 
823 	ret = inet_pton(AF_INET6, node, &ip6);
824 	if (ret == 1) {
825 		FI_INFO(av->prov, FI_LOG_AV, "insert symmetric IPv6\n");
826 		return ip_av_ip6sym_getaddr(av, ip6, nodecnt,
827 					  (uint16_t) strtol(service, NULL, 0),
828 					  svccnt, addr, addrlen);
829 	}
830 
831 	FI_INFO(av->prov, FI_LOG_AV, "insert symmetric host names\n");
832 	return ip_av_nodesym_getaddr(av, node, nodecnt, service,
833 				     svccnt, addr, addrlen);
834 }
835 
ip_av_insertsym(struct fid_av * av_fid,const char * node,size_t nodecnt,const char * service,size_t svccnt,fi_addr_t * fi_addr,uint64_t flags,void * context)836 static int ip_av_insertsym(struct fid_av *av_fid, const char *node,
837 			   size_t nodecnt, const char *service, size_t svccnt,
838 			   fi_addr_t *fi_addr, uint64_t flags, void *context)
839 {
840 	struct util_av *av;
841 	void *addr;
842 	size_t addrlen;
843 	int ret, count;
844 
845 	av = container_of(av_fid, struct util_av, av_fid);
846 	ret = ofi_verify_av_insert(av, flags);
847 	if (ret)
848 		return ret;
849 
850 	count = ofi_ip_av_sym_getaddr(av, node, nodecnt, service,
851 				      svccnt, &addr, &addrlen);
852 	if (count <= 0)
853 		return count;
854 
855 	ret = ofi_ip_av_insertv(av, addr, addrlen, count,
856 				fi_addr, context);
857 	free(addr);
858 	return ret;
859 }
860 
ofi_ip_av_remove(struct fid_av * av_fid,fi_addr_t * fi_addr,size_t count,uint64_t flags)861 int ofi_ip_av_remove(struct fid_av *av_fid, fi_addr_t *fi_addr,
862 		     size_t count, uint64_t flags)
863 {
864 	struct util_av *av;
865 	int i, ret;
866 
867 	av = container_of(av_fid, struct util_av, av_fid);
868 	if (flags) {
869 		FI_WARN(av->prov, FI_LOG_AV, "invalid flags\n");
870 		return -FI_EINVAL;
871 	}
872 
873 	/*
874 	 * It's more efficient to remove addresses from high to low index.
875 	 * We assume that addresses are removed in the same order that they were
876 	 * added -- i.e. fi_addr passed in here was also passed into insert.
877 	 * Thus, we walk through the array backwards.
878 	 */
879 	for (i = count - 1; i >= 0; i--) {
880 		fastlock_acquire(&av->lock);
881 		ret = ofi_av_remove_addr(av, fi_addr[i]);
882 		fastlock_release(&av->lock);
883 		if (ret) {
884 			FI_WARN(av->prov, FI_LOG_AV,
885 				"removal of fi_addr %"PRIu64" failed\n",
886 				fi_addr[i]);
887 		}
888 	}
889 	return 0;
890 }
891 
ofi_ip_av_lookup(struct fid_av * av_fid,fi_addr_t fi_addr,void * addr,size_t * addrlen)892 int ofi_ip_av_lookup(struct fid_av *av_fid, fi_addr_t fi_addr,
893 		     void *addr, size_t *addrlen)
894 {
895 	struct util_av *av =
896 		container_of(av_fid, struct util_av, av_fid);
897 	size_t av_addrlen;
898 	void *av_addr = ofi_av_lookup_addr(av, fi_addr, &av_addrlen);
899 
900 	memcpy(addr, av_addr, MIN(*addrlen, av_addrlen));
901 	*addrlen = av->addrlen;
902 
903 	return 0;
904 }
905 
906 const char *
ofi_ip_av_straddr(struct fid_av * av,const void * addr,char * buf,size_t * len)907 ofi_ip_av_straddr(struct fid_av *av, const void *addr, char *buf, size_t *len)
908 {
909 	return ofi_straddr(buf, len, FI_SOCKADDR, addr);
910 }
911 
912 static struct fi_ops_av ip_av_ops = {
913 	.size = sizeof(struct fi_ops_av),
914 	.insert = ofi_ip_av_insert,
915 	.insertsvc = ip_av_insertsvc,
916 	.insertsym = ip_av_insertsym,
917 	.remove = ofi_ip_av_remove,
918 	.lookup = ofi_ip_av_lookup,
919 	.straddr = ofi_ip_av_straddr,
920 };
921 
ip_av_close(struct fid * av_fid)922 static int ip_av_close(struct fid *av_fid)
923 {
924 	struct util_av *av;
925 	int ret;
926 
927 	av = container_of(av_fid, struct util_av, av_fid.fid);
928 	ret = ofi_av_close(av);
929 	if (ret)
930 		return ret;
931 	free(av);
932 	return 0;
933 }
934 
935 static struct fi_ops ip_av_fi_ops = {
936 	.size = sizeof(struct fi_ops),
937 	.close = ip_av_close,
938 	.bind = ofi_av_bind,
939 	.control = fi_no_control,
940 	.ops_open = fi_no_ops_open,
941 };
942 
ofi_ip_av_create_flags(struct fid_domain * domain_fid,struct fi_av_attr * attr,struct fid_av ** av,void * context,int flags)943 int ofi_ip_av_create_flags(struct fid_domain *domain_fid, struct fi_av_attr *attr,
944 			   struct fid_av **av, void *context, int flags)
945 {
946 	struct util_domain *domain;
947 	struct util_av_attr util_attr;
948 	struct util_av *util_av;
949 	int ret;
950 
951 	domain = container_of(domain_fid, struct util_domain, domain_fid);
952 	if (domain->addr_format == FI_SOCKADDR_IN)
953 		util_attr.addrlen = sizeof(struct sockaddr_in);
954 	else
955 		util_attr.addrlen = sizeof(struct sockaddr_in6);
956 
957 	util_attr.flags = flags;
958 
959 	if (attr->type == FI_AV_UNSPEC)
960 		attr->type = FI_AV_MAP;
961 
962 	util_av = calloc(1, sizeof(*util_av));
963 	if (!util_av)
964 		return -FI_ENOMEM;
965 
966 	ret = ofi_av_init(domain, attr, &util_attr, util_av, context);
967 	if (ret) {
968 		free(util_av);
969 		return ret;
970 	}
971 
972 	*av = &util_av->av_fid;
973 	(*av)->fid.ops = &ip_av_fi_ops;
974 	(*av)->ops = &ip_av_ops;
975 	return 0;
976 }
977 
ofi_ip_av_create(struct fid_domain * domain_fid,struct fi_av_attr * attr,struct fid_av ** av,void * context)978 int ofi_ip_av_create(struct fid_domain *domain_fid, struct fi_av_attr *attr,
979 		     struct fid_av **av, void *context)
980 {
981 	return ofi_ip_av_create_flags(domain_fid, attr, av, context, 0);
982 }
983