1 /*
2 * Copyright (c) 2015-2017 Intel Corporation. All rights reserved.
3 * Copyright (c) 2017, Cisco Systems, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #include "config.h"
35
36 #include <arpa/inet.h>
37 #include <ctype.h>
38 #include <stdlib.h>
39 #include <stdio.h>
40 #include <sys/socket.h>
41 #include <sys/types.h>
42 #include <netdb.h>
43 #include <netinet/in.h>
44 #include <inttypes.h>
45
46 #if HAVE_GETIFADDRS
47 #include <net/if.h>
48 #include <ifaddrs.h>
49 #endif
50
51 #include <ofi_util.h>
52
53
54 enum {
55 UTIL_NO_ENTRY = -1,
56 UTIL_DEFAULT_AV_SIZE = 1024,
57 };
58
fi_get_src_sockaddr(const struct sockaddr * dest_addr,size_t dest_addrlen,struct sockaddr ** src_addr,size_t * src_addrlen)59 static int fi_get_src_sockaddr(const struct sockaddr *dest_addr, size_t dest_addrlen,
60 struct sockaddr **src_addr, size_t *src_addrlen)
61 {
62 socklen_t len; /* needed for OS compatability */
63 int sock, ret;
64
65 sock = socket(dest_addr->sa_family, SOCK_DGRAM, 0);
66 if (sock < 0)
67 return -errno;
68
69 ret = connect(sock, dest_addr, dest_addrlen);
70 if (ret)
71 goto out;
72
73 *src_addr = calloc(dest_addrlen, 1);
74 if (!*src_addr) {
75 ret = -FI_ENOMEM;
76 goto out;
77 }
78
79 len = (socklen_t) dest_addrlen;
80 ret = getsockname(sock, *src_addr, &len);
81 if (ret) {
82 ret = -errno;
83 goto out;
84 }
85 *src_addrlen = len;
86
87 switch ((*src_addr)->sa_family) {
88 case AF_INET:
89 ((struct sockaddr_in *) (*src_addr))->sin_port = 0;
90 break;
91 case AF_INET6:
92 ((struct sockaddr_in6 *) (*src_addr))->sin6_port = 0;
93 break;
94 default:
95 ret = -FI_ENOSYS;
96 break;
97 }
98
99 out:
100 ofi_close_socket(sock);
101 return ret;
102
103 }
104
ofi_getnodename(uint16_t sa_family,char * buf,int buflen)105 void ofi_getnodename(uint16_t sa_family, char *buf, int buflen)
106 {
107 int ret;
108 struct addrinfo ai, *rai = NULL;
109 struct ifaddrs *ifaddrs, *ifa;
110
111 assert(buf && buflen > 0);
112 ret = gethostname(buf, buflen);
113 buf[buflen - 1] = '\0';
114 if (ret == 0) {
115 memset(&ai, 0, sizeof(ai));
116 ai.ai_family = sa_family ? sa_family : AF_INET;
117 ret = getaddrinfo(buf, NULL, &ai, &rai);
118 if (!ret) {
119 freeaddrinfo(rai);
120 return;
121 }
122 }
123
124 #if HAVE_GETIFADDRS
125 ret = ofi_getifaddrs(&ifaddrs);
126 if (!ret) {
127 for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) {
128 if (ifa->ifa_addr == NULL || !(ifa->ifa_flags & IFF_UP))
129 continue;
130
131 if (sa_family) {
132 if (ifa->ifa_addr->sa_family != sa_family)
133 continue;
134 } else if ((ifa->ifa_addr->sa_family != AF_INET) &&
135 (ifa->ifa_addr->sa_family != AF_INET6)) {
136 continue;
137 }
138
139 ret = getnameinfo(ifa->ifa_addr, ofi_sizeofaddr(ifa->ifa_addr),
140 buf, buflen, NULL, 0, NI_NUMERICHOST);
141 buf[buflen - 1] = '\0';
142 if (ret == 0) {
143 freeifaddrs(ifaddrs);
144 return;
145 }
146 }
147 freeifaddrs(ifaddrs);
148 }
149 #endif
150 /* no reasonable address found, use ipv4 loopback */
151 strncpy(buf, "127.0.0.1", buflen);
152 buf[buflen - 1] = '\0';
153 }
154
ofi_get_src_addr(uint32_t addr_format,const void * dest_addr,size_t dest_addrlen,void ** src_addr,size_t * src_addrlen)155 int ofi_get_src_addr(uint32_t addr_format,
156 const void *dest_addr, size_t dest_addrlen,
157 void **src_addr, size_t *src_addrlen)
158 {
159 switch (addr_format) {
160 case FI_SOCKADDR:
161 case FI_SOCKADDR_IN:
162 case FI_SOCKADDR_IN6:
163 return fi_get_src_sockaddr(dest_addr, dest_addrlen,
164 (struct sockaddr **) src_addr,
165 src_addrlen);
166 default:
167 return -FI_ENOSYS;
168 }
169 }
170
fi_get_sockaddr(int * sa_family,uint64_t flags,const char * node,const char * service,struct sockaddr ** addr,size_t * addrlen)171 static int fi_get_sockaddr(int *sa_family, uint64_t flags,
172 const char *node, const char *service,
173 struct sockaddr **addr, size_t *addrlen)
174 {
175 struct addrinfo hints, *ai;
176 int ret;
177
178 memset(&hints, 0, sizeof hints);
179 hints.ai_family = *sa_family;
180 hints.ai_socktype = SOCK_STREAM;
181 if (flags & FI_SOURCE)
182 hints.ai_flags = AI_PASSIVE;
183
184 ret = getaddrinfo(node, service, &hints, &ai);
185 if (ret)
186 return -FI_ENODATA;
187
188 *addr = mem_dup(ai->ai_addr, ai->ai_addrlen);
189 if (!*addr) {
190 ret = -FI_ENOMEM;
191 goto out;
192 }
193
194 *sa_family = ai->ai_family;
195 *addrlen = ai->ai_addrlen;
196 out:
197 freeaddrinfo(ai);
198 return ret;
199 }
200
ofi_get_str_addr(const char * node,const char * service,char ** addr,size_t * addrlen)201 void ofi_get_str_addr(const char *node, const char *service,
202 char **addr, size_t *addrlen)
203 {
204 if (!node || !strstr(node, "://"))
205 return;
206
207 *addr = strdup(node);
208 *addrlen = strlen(node) + 1;
209 }
210
ofi_get_addr(uint32_t * addr_format,uint64_t flags,const char * node,const char * service,void ** addr,size_t * addrlen)211 int ofi_get_addr(uint32_t *addr_format, uint64_t flags,
212 const char *node, const char *service,
213 void **addr, size_t *addrlen)
214 {
215 int sa_family, ret;
216
217 switch (*addr_format) {
218 case FI_SOCKADDR:
219 sa_family = 0;
220 ret = fi_get_sockaddr(&sa_family, flags, node, service,
221 (struct sockaddr **) addr, addrlen);
222 if (ret)
223 return ret;
224 *addr_format = sa_family == AF_INET ?
225 FI_SOCKADDR_IN : FI_SOCKADDR_IN6;
226 return 0;
227 case FI_SOCKADDR_IN:
228 sa_family = AF_INET;
229 return fi_get_sockaddr(&sa_family, flags, node, service,
230 (struct sockaddr **) addr, addrlen);
231 case FI_SOCKADDR_IN6:
232 sa_family = AF_INET6;
233 return fi_get_sockaddr(&sa_family, flags, node, service,
234 (struct sockaddr **) addr, addrlen);
235 case FI_ADDR_STR:
236 ofi_get_str_addr(node, service, (char **) addr, addrlen);
237 return 0;
238 default:
239 return -FI_ENOSYS;
240 }
241 }
242
ofi_av_get_addr(struct util_av * av,fi_addr_t fi_addr)243 void *ofi_av_get_addr(struct util_av *av, fi_addr_t fi_addr)
244 {
245 struct util_av_entry *entry;
246
247 entry = ofi_bufpool_get_ibuf(av->av_entry_pool, fi_addr);
248 return entry->addr;
249 }
250
ofi_verify_av_insert(struct util_av * av,uint64_t flags)251 int ofi_verify_av_insert(struct util_av *av, uint64_t flags)
252 {
253 if ((av->flags & FI_EVENT) && !av->eq) {
254 FI_WARN(av->prov, FI_LOG_AV, "no EQ bound to AV\n");
255 return -FI_ENOEQ;
256 }
257
258 if (flags & ~(FI_MORE)) {
259 FI_WARN(av->prov, FI_LOG_AV, "unsupported flags\n");
260 return -FI_ENOEQ;
261 }
262
263 return 0;
264 }
265
266 /*
267 * Must hold AV lock
268 */
ofi_av_insert_addr(struct util_av * av,const void * addr,fi_addr_t * fi_addr)269 int ofi_av_insert_addr(struct util_av *av, const void *addr, fi_addr_t *fi_addr)
270 {
271 struct util_av_entry *entry = NULL;
272
273 HASH_FIND(hh, av->hash, addr, av->addrlen, entry);
274 if (entry) {
275 if (fi_addr)
276 *fi_addr = ofi_buf_index(entry);
277 ofi_atomic_inc32(&entry->use_cnt);
278 return 0;
279 } else {
280 entry = ofi_ibuf_alloc(av->av_entry_pool);
281 if (!entry)
282 return -FI_ENOMEM;
283 if (fi_addr)
284 *fi_addr = ofi_buf_index(entry);
285 memcpy(entry->addr, addr, av->addrlen);
286 ofi_atomic_initialize32(&entry->use_cnt, 1);
287 HASH_ADD(hh, av->hash, addr, av->addrlen, entry);
288 }
289 return 0;
290 }
291
ofi_av_elements_iter(struct util_av * av,ofi_av_apply_func apply,void * arg)292 int ofi_av_elements_iter(struct util_av *av, ofi_av_apply_func apply, void *arg)
293 {
294 struct util_av_entry *av_entry = NULL, *av_entry_tmp = NULL;
295 int ret;
296
297 HASH_ITER(hh, av->hash, av_entry, av_entry_tmp) {
298 ret = apply(av, av_entry->addr,
299 ofi_buf_index(av_entry), arg);
300 if (OFI_UNLIKELY(ret))
301 return ret;
302 }
303 return 0;
304 }
305
306 /*
307 * Must hold AV lock
308 */
ofi_av_remove_addr(struct util_av * av,fi_addr_t fi_addr)309 int ofi_av_remove_addr(struct util_av *av, fi_addr_t fi_addr)
310 {
311 struct util_av_entry *av_entry;
312
313 av_entry = ofi_bufpool_get_ibuf(av->av_entry_pool, fi_addr);
314 if (!av_entry)
315 return -FI_ENOENT;
316
317 if (ofi_atomic_dec32(&av_entry->use_cnt))
318 return FI_SUCCESS;
319
320 HASH_DELETE(hh, av->hash, av_entry);
321 ofi_ibuf_free(av_entry);
322 return 0;
323 }
324
ofi_av_lookup_fi_addr_unsafe(struct util_av * av,const void * addr)325 fi_addr_t ofi_av_lookup_fi_addr_unsafe(struct util_av *av, const void *addr)
326 {
327 struct util_av_entry *entry = NULL;
328
329 HASH_FIND(hh, av->hash, addr, av->addrlen, entry);
330 return entry ? ofi_buf_index(entry) : FI_ADDR_NOTAVAIL;
331 }
332
ofi_av_lookup_fi_addr(struct util_av * av,const void * addr)333 fi_addr_t ofi_av_lookup_fi_addr(struct util_av *av, const void *addr)
334 {
335 fi_addr_t fi_addr;
336 fastlock_acquire(&av->lock);
337 fi_addr = ofi_av_lookup_fi_addr_unsafe(av, addr);
338 fastlock_release(&av->lock);
339 return fi_addr;
340 }
341
342 static void *
ofi_av_lookup_addr(struct util_av * av,fi_addr_t fi_addr,size_t * addrlen)343 ofi_av_lookup_addr(struct util_av *av, fi_addr_t fi_addr, size_t *addrlen)
344 {
345 *addrlen = av->addrlen;
346 return ofi_av_get_addr(av, fi_addr);
347 }
348
ofi_av_bind(struct fid * av_fid,struct fid * eq_fid,uint64_t flags)349 int ofi_av_bind(struct fid *av_fid, struct fid *eq_fid, uint64_t flags)
350 {
351 struct util_av *av;
352 struct util_eq *eq;
353
354 av = container_of(av_fid, struct util_av, av_fid.fid);
355 if (eq_fid->fclass != FI_CLASS_EQ) {
356 FI_WARN(av->prov, FI_LOG_AV, "invalid fid class\n");
357 return -FI_EINVAL;
358 }
359
360 if (!(av->flags & FI_EVENT)) {
361 FI_WARN(av->prov, FI_LOG_AV, "cannot bind EQ to an AV that was "
362 "configured for synchronous operation: FI_EVENT flag was"
363 " not specified in fi_av_attr when AV was opened\n");
364 return -FI_EINVAL;
365 }
366
367 if (flags) {
368 FI_WARN(av->prov, FI_LOG_AV, "invalid flags\n");
369 return -FI_EINVAL;
370 }
371
372 eq = container_of(eq_fid, struct util_eq, eq_fid.fid);
373 av->eq = eq;
374 ofi_atomic_inc32(&eq->ref);
375 return 0;
376 }
377
util_av_close(struct util_av * av)378 static void util_av_close(struct util_av *av)
379 {
380 HASH_CLEAR(hh, av->hash);
381 ofi_bufpool_destroy(av->av_entry_pool);
382 }
383
ofi_av_close_lightweight(struct util_av * av)384 int ofi_av_close_lightweight(struct util_av *av)
385 {
386 if (ofi_atomic_get32(&av->ref)) {
387 FI_WARN(av->prov, FI_LOG_AV, "AV is busy\n");
388 return -FI_EBUSY;
389 }
390
391 if (av->eq)
392 ofi_atomic_dec32(&av->eq->ref);
393
394 fastlock_destroy(&av->ep_list_lock);
395
396 ofi_atomic_dec32(&av->domain->ref);
397 fastlock_destroy(&av->lock);
398
399 return 0;
400 }
401
ofi_av_close(struct util_av * av)402 int ofi_av_close(struct util_av *av)
403 {
404 int ret = ofi_av_close_lightweight(av);
405 if (ret)
406 return ret;
407 util_av_close(av);
408 return 0;
409 }
410
util_verify_av_util_attr(struct util_domain * domain,const struct util_av_attr * util_attr)411 static int util_verify_av_util_attr(struct util_domain *domain,
412 const struct util_av_attr *util_attr)
413 {
414 if (util_attr->flags) {
415 FI_WARN(domain->prov, FI_LOG_AV, "invalid internal flags\n");
416 return -FI_EINVAL;
417 }
418
419 return 0;
420 }
421
util_av_init(struct util_av * av,const struct fi_av_attr * attr,const struct util_av_attr * util_attr)422 static int util_av_init(struct util_av *av, const struct fi_av_attr *attr,
423 const struct util_av_attr *util_attr)
424 {
425 int ret = 0;
426 size_t max_count;
427 struct ofi_bufpool_attr pool_attr = {
428 .size = util_attr->addrlen +
429 sizeof(struct util_av_entry),
430 .alignment = 16,
431 .max_cnt = 0,
432 /* Don't use track of buffer, because user can close
433 * the AV without prior deletion of addresses */
434 .flags = OFI_BUFPOOL_NO_TRACK | OFI_BUFPOOL_INDEXED |
435 OFI_BUFPOOL_HUGEPAGES,
436 };
437
438 /* TODO: Handle FI_READ */
439 /* TODO: Handle mmap - shared AV */
440
441 ret = util_verify_av_util_attr(av->domain, util_attr);
442 if (ret)
443 return ret;
444
445 if (attr->count) {
446 max_count = attr->count;
447 } else {
448 if (fi_param_get_size_t(NULL, "universe_size", &max_count))
449 max_count = UTIL_DEFAULT_AV_SIZE;
450 }
451
452 av->count = roundup_power_of_two(max_count ?
453 max_count :
454 UTIL_DEFAULT_AV_SIZE);
455 FI_INFO(av->prov, FI_LOG_AV, "AV size %zu\n", av->count);
456
457 av->addrlen = util_attr->addrlen;
458 av->flags = util_attr->flags | attr->flags;
459 av->hash = NULL;
460
461 pool_attr.chunk_cnt = av->count;
462 return ofi_bufpool_create_attr(&pool_attr, &av->av_entry_pool);
463 }
464
util_verify_av_attr(struct util_domain * domain,const struct fi_av_attr * attr)465 static int util_verify_av_attr(struct util_domain *domain,
466 const struct fi_av_attr *attr)
467 {
468 switch (attr->type) {
469 case FI_AV_MAP:
470 case FI_AV_TABLE:
471 if ((domain->av_type != FI_AV_UNSPEC) &&
472 (attr->type != domain->av_type)) {
473 FI_INFO(domain->prov, FI_LOG_AV, "Invalid AV type\n");
474 return -FI_EINVAL;
475 }
476 break;
477 default:
478 FI_WARN(domain->prov, FI_LOG_AV, "invalid av type\n");
479 return -FI_EINVAL;
480 }
481
482 if (attr->name) {
483 FI_WARN(domain->prov, FI_LOG_AV, "Shared AV is unsupported\n");
484 return -FI_ENOSYS;
485 }
486
487 if (attr->flags & ~(FI_EVENT | FI_READ | FI_SYMMETRIC)) {
488 FI_WARN(domain->prov, FI_LOG_AV, "invalid flags\n");
489 return -FI_EINVAL;
490 }
491
492 return 0;
493 }
494
ofi_av_init_lightweight(struct util_domain * domain,const struct fi_av_attr * attr,struct util_av * av,void * context)495 int ofi_av_init_lightweight(struct util_domain *domain, const struct fi_av_attr *attr,
496 struct util_av *av, void *context)
497 {
498 int ret;
499
500 ret = util_verify_av_attr(domain, attr);
501 if (ret)
502 return ret;
503
504 av->prov = domain->prov;
505 ofi_atomic_initialize32(&av->ref, 0);
506 fastlock_init(&av->lock);
507 av->av_fid.fid.fclass = FI_CLASS_AV;
508 /*
509 * ops set by provider
510 * av->av_fid.fid.ops = &prov_av_fi_ops;
511 * av->av_fid.ops = &prov_av_ops;
512 */
513 av->context = context;
514 av->domain = domain;
515 fastlock_init(&av->ep_list_lock);
516 dlist_init(&av->ep_list);
517 ofi_atomic_inc32(&domain->ref);
518 return 0;
519 }
520
ofi_av_init(struct util_domain * domain,const struct fi_av_attr * attr,const struct util_av_attr * util_attr,struct util_av * av,void * context)521 int ofi_av_init(struct util_domain *domain, const struct fi_av_attr *attr,
522 const struct util_av_attr *util_attr,
523 struct util_av *av, void *context)
524 {
525 int ret = ofi_av_init_lightweight(domain, attr, av, context);
526 if (ret)
527 return ret;
528
529 ret = util_av_init(av, attr, util_attr);
530 if (ret)
531 return ret;
532 return ret;
533 }
534
ofi_av_write_event(struct util_av * av,uint64_t data,int err,void * context)535 void ofi_av_write_event(struct util_av *av, uint64_t data,
536 int err, void *context)
537 {
538 struct fi_eq_err_entry entry = { 0 };
539 size_t size;
540 ssize_t ret;
541 uint64_t flags;
542
543 entry.fid = &av->av_fid.fid;
544 entry.context = context;
545 entry.data = data;
546
547 if (err) {
548 FI_INFO(av->prov, FI_LOG_AV, "writing error entry to EQ\n");
549 entry.err = err;
550 size = sizeof(struct fi_eq_err_entry);
551 flags = UTIL_FLAG_ERROR;
552 } else {
553 FI_DBG(av->prov, FI_LOG_AV, "writing entry to EQ\n");
554 size = sizeof(struct fi_eq_entry);
555 flags = 0;
556 }
557
558 ret = fi_eq_write(&av->eq->eq_fid, FI_AV_COMPLETE, &entry,
559 size, flags);
560 if (ret != size)
561 FI_WARN(av->prov, FI_LOG_AV, "error writing to EQ\n");
562 }
563
564 /*************************************************************************
565 *
566 * AV for IP addressing
567 *
568 *************************************************************************/
569
ofi_ip_av_get_fi_addr(struct util_av * av,const void * addr)570 fi_addr_t ofi_ip_av_get_fi_addr(struct util_av *av, const void *addr)
571 {
572 return ofi_av_lookup_fi_addr(av, addr);
573 }
574
ip_av_valid_addr(struct util_av * av,const void * addr)575 static int ip_av_valid_addr(struct util_av *av, const void *addr)
576 {
577 const struct sockaddr_in *sin = addr;
578 const struct sockaddr_in6 *sin6 = addr;
579
580 switch (sin->sin_family) {
581 case AF_INET:
582 return sin->sin_port && sin->sin_addr.s_addr;
583 case AF_INET6:
584 return sin6->sin6_port &&
585 memcmp(&in6addr_any, &sin6->sin6_addr, sizeof(in6addr_any));
586 default:
587 return 0;
588 }
589 }
590
ip_av_insert_addr(struct util_av * av,const void * addr,fi_addr_t * fi_addr,void * context)591 static int ip_av_insert_addr(struct util_av *av, const void *addr,
592 fi_addr_t *fi_addr, void *context)
593 {
594 int ret;
595 fi_addr_t fi_addr_ret;
596
597 if (ip_av_valid_addr(av, addr)) {
598 fastlock_acquire(&av->lock);
599 ret = ofi_av_insert_addr(av, addr, &fi_addr_ret);
600 fastlock_release(&av->lock);
601 } else {
602 ret = -FI_EADDRNOTAVAIL;
603 FI_WARN(av->prov, FI_LOG_AV, "invalid address\n");
604 }
605
606 if (fi_addr)
607 *fi_addr = !ret ? fi_addr_ret : FI_ADDR_NOTAVAIL;
608
609 ofi_straddr_dbg(av->prov, FI_LOG_AV, "av_insert addr", addr);
610 if (fi_addr)
611 FI_DBG(av->prov, FI_LOG_AV, "av_insert fi_addr: %" PRIu64 "\n",
612 *fi_addr);
613
614 return ret;
615 }
616
ofi_ip_av_insertv(struct util_av * av,const void * addr,size_t addrlen,size_t count,fi_addr_t * fi_addr,void * context)617 int ofi_ip_av_insertv(struct util_av *av, const void *addr, size_t addrlen,
618 size_t count, fi_addr_t *fi_addr, void *context)
619 {
620 int ret, success_cnt = 0;
621 size_t i;
622
623 FI_DBG(av->prov, FI_LOG_AV, "inserting %zu addresses\n", count);
624 for (i = 0; i < count; i++) {
625 ret = ip_av_insert_addr(av, (const char *) addr + i * addrlen,
626 fi_addr ? &fi_addr[i] : NULL, context);
627 if (!ret)
628 success_cnt++;
629 else if (av->eq)
630 ofi_av_write_event(av, i, -ret, context);
631 }
632
633 FI_DBG(av->prov, FI_LOG_AV, "%d addresses successful\n", success_cnt);
634 if (av->eq) {
635 ofi_av_write_event(av, success_cnt, 0, context);
636 ret = 0;
637 } else {
638 ret = success_cnt;
639 }
640 return ret;
641 }
642
ofi_ip_av_insert(struct fid_av * av_fid,const void * addr,size_t count,fi_addr_t * fi_addr,uint64_t flags,void * context)643 int ofi_ip_av_insert(struct fid_av *av_fid, const void *addr, size_t count,
644 fi_addr_t *fi_addr, uint64_t flags, void *context)
645 {
646 struct util_av *av;
647 int ret;
648
649 av = container_of(av_fid, struct util_av, av_fid);
650 ret = ofi_verify_av_insert(av, flags);
651 if (ret)
652 return ret;
653
654 return ofi_ip_av_insertv(av, addr, ofi_sizeofaddr(addr),
655 count, fi_addr, context);
656 }
657
ip_av_insertsvc(struct fid_av * av,const char * node,const char * service,fi_addr_t * fi_addr,uint64_t flags,void * context)658 static int ip_av_insertsvc(struct fid_av *av, const char *node,
659 const char *service, fi_addr_t *fi_addr,
660 uint64_t flags, void *context)
661 {
662 return fi_av_insertsym(av, node, 1, service, 1, fi_addr, flags, context);
663 }
664
665 /* Caller should free *addr */
666 static int
ip_av_ip4sym_getaddr(struct util_av * av,struct in_addr ip,size_t ipcnt,uint16_t port,size_t portcnt,void ** addr,size_t * addrlen)667 ip_av_ip4sym_getaddr(struct util_av *av, struct in_addr ip, size_t ipcnt,
668 uint16_t port, size_t portcnt, void **addr, size_t *addrlen)
669 {
670 struct sockaddr_in *sin;
671 int count = ipcnt * portcnt;
672 size_t i, p, k;
673
674 *addrlen = sizeof(*sin);
675 sin = calloc(count, *addrlen);
676 if (!sin)
677 return -FI_ENOMEM;
678
679 for (i = 0, k = 0; i < ipcnt; i++) {
680 for (p = 0; p < portcnt; p++, k++) {
681 sin[k].sin_family = AF_INET;
682 /* TODO: should we skip addresses x.x.x.0 and x.x.x.255? */
683 sin[k].sin_addr.s_addr = htonl(ntohl(ip.s_addr) + i);
684 sin[k].sin_port = htons(port + p);
685 }
686 }
687 *addr = sin;
688 return count;
689 }
690
691 /* Caller should free *addr */
692 static int
ip_av_ip6sym_getaddr(struct util_av * av,struct in6_addr ip,size_t ipcnt,uint16_t port,size_t portcnt,void ** addr,size_t * addrlen)693 ip_av_ip6sym_getaddr(struct util_av *av, struct in6_addr ip, size_t ipcnt,
694 uint16_t port, size_t portcnt, void **addr, size_t *addrlen)
695 {
696 struct sockaddr_in6 *sin6, sin6_temp;
697 int j, count = ipcnt * portcnt;
698 size_t i, p, k;
699
700 *addrlen = sizeof(*sin6);
701 sin6 = calloc(count, *addrlen);
702 if (!sin6)
703 return -FI_ENOMEM;
704
705 sin6_temp.sin6_addr = ip;
706
707 for (i = 0, k = 0; i < ipcnt; i++) {
708 for (p = 0; p < portcnt; p++, k++) {
709 sin6[k].sin6_family = AF_INET6;
710 sin6[k].sin6_addr = sin6_temp.sin6_addr;
711 sin6[k].sin6_port = htons(port + p);
712 }
713 /* TODO: should we skip addresses x::0 and x::255? */
714 for (j = 15; j >= 0; j--) {
715 if (++sin6_temp.sin6_addr.s6_addr[j] < 255)
716 break;
717 }
718 }
719 *addr = sin6;
720 return count;
721 }
722
723 /* Caller should free *addr */
ip_av_nodesym_getaddr(struct util_av * av,const char * node,size_t nodecnt,const char * service,size_t svccnt,void ** addr,size_t * addrlen)724 static int ip_av_nodesym_getaddr(struct util_av *av, const char *node,
725 size_t nodecnt, const char *service,
726 size_t svccnt, void **addr, size_t *addrlen)
727 {
728 struct addrinfo hints, *ai;
729 void *addr_temp;
730 char name[FI_NAME_MAX];
731 char svc[FI_NAME_MAX];
732 size_t name_len, n, s;
733 int ret, name_index, svc_index, count = nodecnt * svccnt;
734
735 memset(&hints, 0, sizeof hints);
736
737 hints.ai_socktype = SOCK_DGRAM;
738 switch (av->domain->addr_format) {
739 case FI_SOCKADDR_IN:
740 hints.ai_family = AF_INET;
741 *addrlen = sizeof(struct sockaddr_in);
742 break;
743 case FI_SOCKADDR_IN6:
744 hints.ai_family = AF_INET6;
745 *addrlen = sizeof(struct sockaddr_in6);
746 break;
747 default:
748 FI_INFO(av->prov, FI_LOG_AV, "Unknown address format!\n");
749 return -FI_EINVAL;
750 }
751
752 *addr = calloc(nodecnt * svccnt, *addrlen);
753 if (!*addr)
754 return -FI_ENOMEM;
755
756 addr_temp = *addr;
757
758 for (name_len = strlen(node); isdigit(node[name_len - 1]); )
759 name_len--;
760
761 memcpy(name, node, name_len);
762 name_index = atoi(node + name_len);
763 svc_index = atoi(service);
764
765 for (n = 0; n < nodecnt; n++) {
766 if (nodecnt == 1) {
767 strncpy(name, node, sizeof(name) - 1);
768 name[FI_NAME_MAX - 1] = '\0';
769 } else {
770 snprintf(name + name_len, sizeof(name) - name_len - 1,
771 "%zu", name_index + n);
772 }
773
774 for (s = 0; s < svccnt; s++) {
775 if (svccnt == 1) {
776 strncpy(svc, service, sizeof(svc) - 1);
777 svc[FI_NAME_MAX - 1] = '\0';
778 } else {
779 snprintf(svc, sizeof(svc) - 1,
780 "%zu", svc_index + s);
781 }
782 FI_INFO(av->prov, FI_LOG_AV, "resolving %s:%s for AV "
783 "insert\n", node, service);
784
785 ret = getaddrinfo(node, service, &hints, &ai);
786 if (ret)
787 goto err;
788
789 memcpy(addr_temp, ai->ai_addr, *addrlen);
790 addr_temp = (char *)addr_temp + *addrlen;
791 freeaddrinfo(ai);
792 }
793 }
794 return count;
795 err:
796 free(*addr);
797 return ret;
798 }
799
800 /* Caller should free *addr */
ofi_ip_av_sym_getaddr(struct util_av * av,const char * node,size_t nodecnt,const char * service,size_t svccnt,void ** addr,size_t * addrlen)801 int ofi_ip_av_sym_getaddr(struct util_av *av, const char *node,
802 size_t nodecnt, const char *service,
803 size_t svccnt, void **addr, size_t *addrlen)
804 {
805 struct in6_addr ip6;
806 struct in_addr ip4;
807 int ret;
808
809 if (strlen(node) >= FI_NAME_MAX || strlen(service) >= FI_NAME_MAX) {
810 FI_WARN(av->prov, FI_LOG_AV,
811 "node or service name is too long\n");
812 return -FI_ENOSYS;
813 }
814
815 ret = inet_pton(AF_INET, node, &ip4);
816 if (ret == 1) {
817 FI_INFO(av->prov, FI_LOG_AV, "insert symmetric IPv4\n");
818 return ip_av_ip4sym_getaddr(av, ip4, nodecnt,
819 (uint16_t) strtol(service, NULL, 0),
820 svccnt, addr, addrlen);
821 }
822
823 ret = inet_pton(AF_INET6, node, &ip6);
824 if (ret == 1) {
825 FI_INFO(av->prov, FI_LOG_AV, "insert symmetric IPv6\n");
826 return ip_av_ip6sym_getaddr(av, ip6, nodecnt,
827 (uint16_t) strtol(service, NULL, 0),
828 svccnt, addr, addrlen);
829 }
830
831 FI_INFO(av->prov, FI_LOG_AV, "insert symmetric host names\n");
832 return ip_av_nodesym_getaddr(av, node, nodecnt, service,
833 svccnt, addr, addrlen);
834 }
835
ip_av_insertsym(struct fid_av * av_fid,const char * node,size_t nodecnt,const char * service,size_t svccnt,fi_addr_t * fi_addr,uint64_t flags,void * context)836 static int ip_av_insertsym(struct fid_av *av_fid, const char *node,
837 size_t nodecnt, const char *service, size_t svccnt,
838 fi_addr_t *fi_addr, uint64_t flags, void *context)
839 {
840 struct util_av *av;
841 void *addr;
842 size_t addrlen;
843 int ret, count;
844
845 av = container_of(av_fid, struct util_av, av_fid);
846 ret = ofi_verify_av_insert(av, flags);
847 if (ret)
848 return ret;
849
850 count = ofi_ip_av_sym_getaddr(av, node, nodecnt, service,
851 svccnt, &addr, &addrlen);
852 if (count <= 0)
853 return count;
854
855 ret = ofi_ip_av_insertv(av, addr, addrlen, count,
856 fi_addr, context);
857 free(addr);
858 return ret;
859 }
860
ofi_ip_av_remove(struct fid_av * av_fid,fi_addr_t * fi_addr,size_t count,uint64_t flags)861 int ofi_ip_av_remove(struct fid_av *av_fid, fi_addr_t *fi_addr,
862 size_t count, uint64_t flags)
863 {
864 struct util_av *av;
865 int i, ret;
866
867 av = container_of(av_fid, struct util_av, av_fid);
868 if (flags) {
869 FI_WARN(av->prov, FI_LOG_AV, "invalid flags\n");
870 return -FI_EINVAL;
871 }
872
873 /*
874 * It's more efficient to remove addresses from high to low index.
875 * We assume that addresses are removed in the same order that they were
876 * added -- i.e. fi_addr passed in here was also passed into insert.
877 * Thus, we walk through the array backwards.
878 */
879 for (i = count - 1; i >= 0; i--) {
880 fastlock_acquire(&av->lock);
881 ret = ofi_av_remove_addr(av, fi_addr[i]);
882 fastlock_release(&av->lock);
883 if (ret) {
884 FI_WARN(av->prov, FI_LOG_AV,
885 "removal of fi_addr %"PRIu64" failed\n",
886 fi_addr[i]);
887 }
888 }
889 return 0;
890 }
891
ofi_ip_av_lookup(struct fid_av * av_fid,fi_addr_t fi_addr,void * addr,size_t * addrlen)892 int ofi_ip_av_lookup(struct fid_av *av_fid, fi_addr_t fi_addr,
893 void *addr, size_t *addrlen)
894 {
895 struct util_av *av =
896 container_of(av_fid, struct util_av, av_fid);
897 size_t av_addrlen;
898 void *av_addr = ofi_av_lookup_addr(av, fi_addr, &av_addrlen);
899
900 memcpy(addr, av_addr, MIN(*addrlen, av_addrlen));
901 *addrlen = av->addrlen;
902
903 return 0;
904 }
905
906 const char *
ofi_ip_av_straddr(struct fid_av * av,const void * addr,char * buf,size_t * len)907 ofi_ip_av_straddr(struct fid_av *av, const void *addr, char *buf, size_t *len)
908 {
909 return ofi_straddr(buf, len, FI_SOCKADDR, addr);
910 }
911
912 static struct fi_ops_av ip_av_ops = {
913 .size = sizeof(struct fi_ops_av),
914 .insert = ofi_ip_av_insert,
915 .insertsvc = ip_av_insertsvc,
916 .insertsym = ip_av_insertsym,
917 .remove = ofi_ip_av_remove,
918 .lookup = ofi_ip_av_lookup,
919 .straddr = ofi_ip_av_straddr,
920 };
921
ip_av_close(struct fid * av_fid)922 static int ip_av_close(struct fid *av_fid)
923 {
924 struct util_av *av;
925 int ret;
926
927 av = container_of(av_fid, struct util_av, av_fid.fid);
928 ret = ofi_av_close(av);
929 if (ret)
930 return ret;
931 free(av);
932 return 0;
933 }
934
935 static struct fi_ops ip_av_fi_ops = {
936 .size = sizeof(struct fi_ops),
937 .close = ip_av_close,
938 .bind = ofi_av_bind,
939 .control = fi_no_control,
940 .ops_open = fi_no_ops_open,
941 };
942
ofi_ip_av_create_flags(struct fid_domain * domain_fid,struct fi_av_attr * attr,struct fid_av ** av,void * context,int flags)943 int ofi_ip_av_create_flags(struct fid_domain *domain_fid, struct fi_av_attr *attr,
944 struct fid_av **av, void *context, int flags)
945 {
946 struct util_domain *domain;
947 struct util_av_attr util_attr;
948 struct util_av *util_av;
949 int ret;
950
951 domain = container_of(domain_fid, struct util_domain, domain_fid);
952 if (domain->addr_format == FI_SOCKADDR_IN)
953 util_attr.addrlen = sizeof(struct sockaddr_in);
954 else
955 util_attr.addrlen = sizeof(struct sockaddr_in6);
956
957 util_attr.flags = flags;
958
959 if (attr->type == FI_AV_UNSPEC)
960 attr->type = FI_AV_MAP;
961
962 util_av = calloc(1, sizeof(*util_av));
963 if (!util_av)
964 return -FI_ENOMEM;
965
966 ret = ofi_av_init(domain, attr, &util_attr, util_av, context);
967 if (ret) {
968 free(util_av);
969 return ret;
970 }
971
972 *av = &util_av->av_fid;
973 (*av)->fid.ops = &ip_av_fi_ops;
974 (*av)->ops = &ip_av_ops;
975 return 0;
976 }
977
ofi_ip_av_create(struct fid_domain * domain_fid,struct fi_av_attr * attr,struct fid_av ** av,void * context)978 int ofi_ip_av_create(struct fid_domain *domain_fid, struct fi_av_attr *attr,
979 struct fid_av **av, void *context)
980 {
981 return ofi_ip_av_create_flags(domain_fid, attr, av, context, 0);
982 }
983