1 /*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved.
4 * Copyright (c) 2013-2018 Intel Corp., Inc. All rights reserved.
5 * Copyright (c) 2015 Los Alamos Nat. Security, LLC. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36 #include "config.h"
37
38 #include <complex.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42
43 #include <fcntl.h>
44 #include <unistd.h>
45 #include <poll.h>
46 #include <pthread.h>
47 #include <sys/time.h>
48
49 #include <inttypes.h>
50 #include <netinet/in.h>
51 #include <arpa/inet.h>
52 #include <sys/types.h>
53 #include <sys/socket.h>
54 #include <netdb.h>
55
56 #if HAVE_GETIFADDRS
57 #include <net/if.h>
58 #include <ifaddrs.h>
59 #endif
60
61 #include <ofi_signal.h>
62 #include <rdma/providers/fi_prov.h>
63 #include <rdma/fi_errno.h>
64 #include <ofi.h>
65 #include <ofi_util.h>
66 #include <ofi_epoll.h>
67 #include <ofi_list.h>
68 #include <ofi_osd.h>
69 #include <shared/ofi_str.h>
70
71 struct fi_provider core_prov = {
72 .name = "core",
73 .version = OFI_VERSION_DEF_PROV,
74 .fi_version = OFI_VERSION_LATEST
75 };
76
77 struct ofi_common_locks common_locks = {
78 .ini_lock = PTHREAD_MUTEX_INITIALIZER,
79 .util_fabric_lock = PTHREAD_MUTEX_INITIALIZER,
80 };
81
fi_poll_fd(int fd,int timeout)82 int fi_poll_fd(int fd, int timeout)
83 {
84 struct pollfd fds;
85 int ret;
86
87 fds.fd = fd;
88 fds.events = POLLIN;
89 ret = poll(&fds, 1, timeout);
90 return ret == SOCKET_ERROR ? -ofi_sockerr() : ret;
91 }
92
ofi_max_tag(uint64_t mem_tag_format)93 uint64_t ofi_max_tag(uint64_t mem_tag_format)
94 {
95 return mem_tag_format ? UINT64_MAX >> (64 - ofi_msb(mem_tag_format)) : 0;
96 }
97
ofi_tag_format(uint64_t max_tag)98 uint64_t ofi_tag_format(uint64_t max_tag)
99 {
100 return max_tag ? FI_TAG_GENERIC >> (64 - ofi_msb(max_tag)) : 0;
101 }
102
ofi_msb(uint64_t num)103 uint8_t ofi_msb(uint64_t num)
104 {
105 uint8_t msb = 0;
106
107 while (num) {
108 msb++;
109 num >>= 1;
110 }
111 return msb;
112 }
113
ofi_lsb(uint64_t num)114 uint8_t ofi_lsb(uint64_t num)
115 {
116 return ofi_msb(num & (~(num - 1)));
117 }
118
ofi_send_allowed(uint64_t caps)119 int ofi_send_allowed(uint64_t caps)
120 {
121 if (caps & FI_MSG ||
122 caps & FI_TAGGED) {
123 if (caps & FI_SEND)
124 return 1;
125 if (caps & FI_RECV)
126 return 0;
127 return 1;
128 }
129
130 return 0;
131 }
132
ofi_recv_allowed(uint64_t caps)133 int ofi_recv_allowed(uint64_t caps)
134 {
135 if (caps & FI_MSG ||
136 caps & FI_TAGGED) {
137 if (caps & FI_RECV)
138 return 1;
139 if (caps & FI_SEND)
140 return 0;
141 return 1;
142 }
143
144 return 0;
145 }
146
ofi_rma_initiate_allowed(uint64_t caps)147 int ofi_rma_initiate_allowed(uint64_t caps)
148 {
149 if (caps & FI_RMA ||
150 caps & FI_ATOMICS) {
151 if (caps & FI_WRITE ||
152 caps & FI_READ)
153 return 1;
154 if (caps & FI_REMOTE_WRITE ||
155 caps & FI_REMOTE_READ)
156 return 0;
157 return 1;
158 }
159
160 return 0;
161 }
162
ofi_rma_target_allowed(uint64_t caps)163 int ofi_rma_target_allowed(uint64_t caps)
164 {
165 if (caps & FI_RMA ||
166 caps & FI_ATOMICS) {
167 if (caps & FI_REMOTE_WRITE ||
168 caps & FI_REMOTE_READ)
169 return 1;
170 if (caps & FI_WRITE ||
171 caps & FI_READ)
172 return 0;
173 return 1;
174 }
175
176 return 0;
177 }
178
ofi_ep_bind_valid(const struct fi_provider * prov,struct fid * bfid,uint64_t flags)179 int ofi_ep_bind_valid(const struct fi_provider *prov, struct fid *bfid, uint64_t flags)
180 {
181 if (!bfid) {
182 FI_WARN(prov, FI_LOG_EP_CTRL, "NULL bind fid\n");
183 return -FI_EINVAL;
184 }
185
186 switch (bfid->fclass) {
187 case FI_CLASS_CQ:
188 if (flags & ~(FI_TRANSMIT | FI_RECV | FI_SELECTIVE_COMPLETION)) {
189 FI_WARN(prov, FI_LOG_EP_CTRL, "invalid CQ flags\n");
190 return -FI_EBADFLAGS;
191 }
192 break;
193 case FI_CLASS_CNTR:
194 if (flags & ~(FI_SEND | FI_RECV | FI_READ | FI_WRITE |
195 FI_REMOTE_READ | FI_REMOTE_WRITE)) {
196 FI_WARN(prov, FI_LOG_EP_CTRL, "invalid cntr flags\n");
197 return -FI_EBADFLAGS;
198 }
199 break;
200 default:
201 if (flags) {
202 FI_WARN(prov, FI_LOG_EP_CTRL, "invalid bind flags\n");
203 return -FI_EBADFLAGS;
204 }
205 break;
206 }
207 return FI_SUCCESS;
208 }
209
ofi_check_rx_mode(const struct fi_info * info,uint64_t flags)210 int ofi_check_rx_mode(const struct fi_info *info, uint64_t flags)
211 {
212 if (!info)
213 return 0;
214
215 if (info->rx_attr && (info->rx_attr->mode & flags))
216 return 1;
217
218 return (info->mode & flags) ? 1 : 0;
219 }
220
ofi_gettime_ns(void)221 uint64_t ofi_gettime_ns(void)
222 {
223 struct timespec now;
224
225 clock_gettime(CLOCK_MONOTONIC, &now);
226 return now.tv_sec * 1000000000 + now.tv_nsec;
227 }
228
ofi_gettime_us(void)229 uint64_t ofi_gettime_us(void)
230 {
231 return ofi_gettime_ns() / 1000;
232 }
233
ofi_gettime_ms(void)234 uint64_t ofi_gettime_ms(void)
235 {
236 return ofi_gettime_ns() / 1000000;
237 }
238
ofi_get_sa_family(const struct fi_info * info)239 uint16_t ofi_get_sa_family(const struct fi_info *info)
240 {
241 if (!info)
242 return 0;
243
244 switch (info->addr_format) {
245 case FI_SOCKADDR_IN:
246 return AF_INET;
247 case FI_SOCKADDR_IN6:
248 return AF_INET6;
249 case FI_SOCKADDR_IB:
250 return AF_IB;
251 case FI_SOCKADDR:
252 case FI_FORMAT_UNSPEC:
253 if (info->src_addr)
254 return ((struct sockaddr *) info->src_addr)->sa_family;
255
256 if (info->dest_addr)
257 return ((struct sockaddr *) info->dest_addr)->sa_family;
258 /* fall through */
259 default:
260 return 0;
261 }
262 }
263
ofi_straddr(char * buf,size_t * len,uint32_t addr_format,const void * addr)264 const char *ofi_straddr(char *buf, size_t *len,
265 uint32_t addr_format, const void *addr)
266 {
267 const struct sockaddr *sock_addr;
268 const struct sockaddr_in6 *sin6;
269 const struct sockaddr_in *sin;
270 char str[INET6_ADDRSTRLEN + 8];
271 size_t size;
272
273 if (!addr || !len)
274 return NULL;
275
276 switch (addr_format) {
277 case FI_SOCKADDR:
278 sock_addr = addr;
279 switch (sock_addr->sa_family) {
280 case AF_INET:
281 goto sa_sin;
282 case AF_INET6:
283 goto sa_sin6;
284 default:
285 return NULL;
286 }
287 break;
288 case FI_SOCKADDR_IN:
289 sa_sin:
290 sin = addr;
291 if (!inet_ntop(sin->sin_family, &sin->sin_addr, str,
292 sizeof(str)))
293 return NULL;
294
295 size = snprintf(buf, MIN(*len, sizeof(str)),
296 "fi_sockaddr_in://%s:%" PRIu16, str,
297 ntohs(sin->sin_port));
298 break;
299 case FI_SOCKADDR_IN6:
300 sa_sin6:
301 sin6 = addr;
302 if (!inet_ntop(sin6->sin6_family, &sin6->sin6_addr, str,
303 sizeof(str)))
304 return NULL;
305
306 size = snprintf(buf, MIN(*len, sizeof(str)),
307 "fi_sockaddr_in6://[%s]:%" PRIu16, str,
308 ntohs(sin6->sin6_port));
309 break;
310 case FI_ADDR_EFA:
311 memset(str, 0, sizeof(str));
312 if (!inet_ntop(AF_INET6, addr, str, INET6_ADDRSTRLEN))
313 return NULL;
314 size = snprintf(buf, *len, "fi_addr_efa://[%s]:%" PRIu16 ":%" PRIu32,
315 str, *((uint16_t *)addr + 8), *((uint32_t *)addr + 5));
316 break;
317 case FI_SOCKADDR_IB:
318 size = snprintf(buf, *len, "fi_sockaddr_ib://%p", addr);
319 break;
320 case FI_ADDR_PSMX:
321 size = snprintf(buf, *len, "fi_addr_psmx://%" PRIx64,
322 *(uint64_t *)addr);
323 break;
324 case FI_ADDR_PSMX2:
325 size =
326 snprintf(buf, *len, "fi_addr_psmx2://%" PRIx64 ":%" PRIx64,
327 *(uint64_t *)addr, *((uint64_t *)addr + 1));
328 break;
329 case FI_ADDR_GNI:
330 size = snprintf(buf, *len, "fi_addr_gni://%" PRIx64,
331 *(uint64_t *)addr);
332 break;
333 case FI_ADDR_BGQ:
334 size = snprintf(buf, *len, "fi_addr_bgq://%p", addr);
335 break;
336 case FI_ADDR_MLX:
337 size = snprintf(buf, *len, "fi_addr_mlx://%p", addr);
338 break;
339 case FI_ADDR_IB_UD:
340 memset(str, 0, sizeof(str));
341 if (!inet_ntop(AF_INET6, addr, str, INET6_ADDRSTRLEN))
342 return NULL;
343 size = snprintf(buf, *len, "fi_addr_ib_ud://"
344 "%s" /* GID */ ":%" PRIx32 /* QPN */
345 "/%" PRIx16 /* LID */ "/%" PRIx16 /* P_Key */
346 "/%" PRIx8 /* SL */,
347 str, *((uint32_t *)addr + 4),
348 *((uint16_t *)addr + 10),
349 *((uint16_t *)addr + 11),
350 *((uint8_t *)addr + 26));
351 break;
352 case FI_ADDR_STR:
353 size = snprintf(buf, *len, "%s", (const char *) addr);
354 break;
355 default:
356 return NULL;
357 }
358
359 /* Make sure that possibly truncated messages have a null terminator. */
360 if (buf && *len)
361 buf[*len - 1] = '\0';
362 *len = size + 1;
363 return buf;
364 }
365
ofi_addr_format(const char * str)366 static uint32_t ofi_addr_format(const char *str)
367 {
368 char fmt[16];
369 int ret;
370
371 ret = sscanf(str, "%16[^:]://", fmt);
372 if (ret != 1)
373 return FI_FORMAT_UNSPEC;
374
375 fmt[sizeof(fmt) - 1] = '\0';
376 if (!strcasecmp(fmt, "fi_sockaddr_in"))
377 return FI_SOCKADDR_IN;
378 else if (!strcasecmp(fmt, "fi_sockaddr_in6"))
379 return FI_SOCKADDR_IN6;
380 else if (!strcasecmp(fmt, "fi_sockaddr_ib"))
381 return FI_SOCKADDR_IB;
382 else if (!strcasecmp(fmt, "fi_addr_psmx"))
383 return FI_ADDR_PSMX;
384 else if (!strcasecmp(fmt, "fi_addr_psmx2"))
385 return FI_ADDR_PSMX2;
386 else if (!strcasecmp(fmt, "fi_addr_gni"))
387 return FI_ADDR_GNI;
388 else if (!strcasecmp(fmt, "fi_addr_bgq"))
389 return FI_ADDR_BGQ;
390 else if (!strcasecmp(fmt, "fi_addr_efa"))
391 return FI_ADDR_EFA;
392 else if (!strcasecmp(fmt, "fi_addr_mlx"))
393 return FI_ADDR_MLX;
394 else if (!strcasecmp(fmt, "fi_addr_ib_ud"))
395 return FI_ADDR_IB_UD;
396
397 return FI_FORMAT_UNSPEC;
398 }
399
ofi_str_to_psmx(const char * str,void ** addr,size_t * len)400 static int ofi_str_to_psmx(const char *str, void **addr, size_t *len)
401 {
402 int ret;
403
404 *len = sizeof(uint64_t);
405 *addr = calloc(1, *len);
406 if (!(*addr))
407 return -FI_ENOMEM;
408
409 ret = sscanf(str, "%*[^:]://%" SCNx64, (uint64_t *) *addr);
410 if (ret == 1)
411 return 0;
412
413 free(*addr);
414 return -FI_EINVAL;
415 }
416
ofi_str_to_psmx2(const char * str,void ** addr,size_t * len)417 static int ofi_str_to_psmx2(const char *str, void **addr, size_t *len)
418 {
419 int ret;
420
421 *len = 2 * sizeof(uint64_t);
422 *addr = calloc(1, *len);
423 if (!(*addr))
424 return -FI_ENOMEM;
425
426 ret = sscanf(str, "%*[^:]://%" SCNx64 ":%" SCNx64,
427 (uint64_t *) *addr, (uint64_t *) *addr + 1);
428 if (ret == 2)
429 return 0;
430
431 free(*addr);
432 return -FI_EINVAL;
433 }
434
ofi_str_to_ib_ud(const char * str,void ** addr,size_t * len)435 static int ofi_str_to_ib_ud(const char *str, void **addr, size_t *len)
436 {
437 int ret;
438 char gid[INET6_ADDRSTRLEN];
439
440 memset(gid, 0, sizeof(gid));
441
442 *len = 32;
443 *addr = calloc(1, *len);
444 if(!(*addr))
445 return -FI_ENOMEM;
446
447 ret = sscanf(str, "%*[^:]://"
448 "%s" /* GID */ ":%" SCNx32 /* QPN */
449 ":%" SCNx16 /* LID */ ":%" SCNx16 /* P_Key */
450 ":%" SCNx8 /* SL */,
451 gid, (uint32_t *)*addr + 4,
452 (uint16_t *)*addr + 10,
453 (uint16_t *)*addr + 11,
454 (uint8_t *)*addr + 26);
455 if ((ret == 5) && (inet_pton(AF_INET6, gid, *addr) > 0))
456 return FI_SUCCESS;
457
458 free(*addr);
459 return -FI_EINVAL;
460 }
461
ofi_str_to_efa(const char * str,void ** addr,size_t * len)462 static int ofi_str_to_efa(const char *str, void **addr, size_t *len)
463 {
464 char gid[INET6_ADDRSTRLEN];
465 uint16_t *qpn;
466 uint32_t *qkey;
467 int ret;
468
469 memset(gid, 0, sizeof(gid));
470
471 *len = 24;
472 *addr = calloc(1, *len);
473 if (!*addr)
474 return -FI_ENOMEM;
475 qpn = (uint16_t *)*addr + 8;
476 qkey = (uint32_t *)*addr + 5;
477 ret = sscanf(str, "%*[^:]://[%64[^]]]:%" SCNu16 ":%" SCNu32, gid, qpn, qkey);
478 if (ret < 1)
479 goto err;
480
481 if (inet_pton(AF_INET6, gid, *addr) > 0)
482 return FI_SUCCESS;
483
484 err:
485 free(*addr);
486 return -FI_EINVAL;
487 }
488
ofi_str_to_sin(const char * str,void ** addr,size_t * len)489 static int ofi_str_to_sin(const char *str, void **addr, size_t *len)
490 {
491 struct sockaddr_in *sin;
492 char ip[64];
493 int ret;
494
495 *len = sizeof(*sin);
496 sin = calloc(1, *len);
497 if (!sin)
498 return -FI_ENOMEM;
499
500 sin->sin_family = AF_INET;
501 ret = sscanf(str, "%*[^:]://:%" SCNu16, &sin->sin_port);
502 if (ret == 1)
503 goto match_port;
504
505 ret = sscanf(str, "%*[^:]://%64[^:]:%" SCNu16, ip, &sin->sin_port);
506 if (ret == 2)
507 goto match_ip;
508
509 ret = sscanf(str, "%*[^:]://%64[^:/]", ip);
510 if (ret == 1)
511 goto match_ip;
512
513 FI_WARN(&core_prov, FI_LOG_CORE,
514 "Malformed FI_ADDR_STR: %s\n", str);
515 err:
516 free(sin);
517 return -FI_EINVAL;
518
519 match_ip:
520 ip[sizeof(ip) - 1] = '\0';
521 ret = inet_pton(AF_INET, ip, &sin->sin_addr);
522 if (ret != 1) {
523 FI_WARN(&core_prov, FI_LOG_CORE,
524 "Unable to convert IPv4 address: %s\n", ip);
525 goto err;
526 }
527
528 match_port:
529 sin->sin_port = htons(sin->sin_port);
530 *addr = sin;
531 return 0;
532 }
533
ofi_str_to_sin6(const char * str,void ** addr,size_t * len)534 static int ofi_str_to_sin6(const char *str, void **addr, size_t *len)
535 {
536 struct sockaddr_in6 *sin6;
537 char ip[64];
538 int ret;
539
540 *len = sizeof(*sin6);
541 sin6 = calloc(1, *len);
542 if (!sin6)
543 return -FI_ENOMEM;
544
545 sin6->sin6_family = AF_INET6;
546 ret = sscanf(str, "%*[^:]://:%" SCNu16, &sin6->sin6_port);
547 if (ret == 1)
548 goto match_port;
549
550 ret = sscanf(str, "%*[^:]://[%64[^]]]:%" SCNu16, ip, &sin6->sin6_port);
551 if (ret == 2)
552 goto match_ip;
553
554 ret = sscanf(str, "%*[^:]://[%64[^]]", ip);
555 if (ret == 1)
556 goto match_ip;
557
558 FI_WARN(&core_prov, FI_LOG_CORE,
559 "Malformed FI_ADDR_STR: %s\n", str);
560 err:
561 free(sin6);
562 return -FI_EINVAL;
563
564 match_ip:
565 ip[sizeof(ip) - 1] = '\0';
566 ret = inet_pton(AF_INET6, ip, &sin6->sin6_addr);
567 if (ret != 1) {
568 FI_WARN(&core_prov, FI_LOG_CORE,
569 "Unable to convert IPv6 address: %s\n", ip);
570 goto err;
571 }
572
573 match_port:
574 sin6->sin6_port = htons(sin6->sin6_port);
575 *addr = sin6;
576 return 0;
577 }
578
ofi_hostname_toaddr(const char * name,uint32_t * addr_format,void ** addr,size_t * len)579 static int ofi_hostname_toaddr(const char *name, uint32_t *addr_format,
580 void **addr, size_t *len)
581 {
582 struct addrinfo *ai;
583 int ret;
584
585 ret = getaddrinfo(name, NULL, NULL, &ai);
586 if (ret)
587 return ret;
588
589 *addr_format = (ai->ai_family == AF_INET6) ? FI_SOCKADDR_IN6 : FI_SOCKADDR_IN;
590 *len = ai->ai_addrlen;
591 *addr = calloc(1, *len);
592 if (!*addr) {
593 ret = -FI_ENOMEM;
594 goto out;
595 }
596
597 memcpy(*addr, ai->ai_addr, *len);
598
599 out:
600 freeaddrinfo(ai);
601 return ret;
602 }
603
ofi_ifname_toaddr(const char * name,uint32_t * addr_format,void ** addr,size_t * len)604 static int ofi_ifname_toaddr(const char *name, uint32_t *addr_format,
605 void **addr, size_t *len)
606 {
607 #if HAVE_GETIFADDRS
608 struct ifaddrs *ifaddrs, *ifa;
609 int ret;
610
611 ret = ofi_getifaddrs(&ifaddrs);
612 if (ret)
613 return ret;
614
615 for (ifa = ifaddrs; ifa; ifa = ifa->ifa_next) {
616 if (ifa->ifa_addr->sa_family != AF_INET &&
617 ifa->ifa_addr->sa_family != AF_INET6)
618 continue;
619 if (!strcmp(name, ifa->ifa_name))
620 break;
621 }
622
623 if (!ifa) {
624 ret = -FI_EINVAL;
625 goto out;
626 }
627
628 if (ifa->ifa_addr->sa_family == AF_INET6) {
629 *addr_format = FI_SOCKADDR_IN6;
630 *len = sizeof(struct sockaddr_in6);
631 } else {
632 *addr_format = FI_SOCKADDR_IN;
633 *len = sizeof(struct sockaddr_in);
634 }
635
636 *addr = calloc(1, *len);
637 if (!*addr) {
638 ret = -FI_ENOMEM;
639 goto out;
640 }
641
642 memcpy(*addr, ifa->ifa_addr, *len);
643
644 out:
645 freeifaddrs(ifaddrs);
646 return ret;
647 #else
648 return -FI_ENOSYS;
649 #endif
650 }
651
ofi_str_toaddr(const char * str,uint32_t * addr_format,void ** addr,size_t * len)652 int ofi_str_toaddr(const char *str, uint32_t *addr_format,
653 void **addr, size_t *len)
654 {
655 *addr_format = ofi_addr_format(str);
656
657 switch (*addr_format) {
658 case FI_FORMAT_UNSPEC:
659 if (!ofi_ifname_toaddr(str, addr_format, addr, len))
660 return 0;
661 if (!ofi_hostname_toaddr(str, addr_format, addr, len))
662 return 0;
663 return -FI_EINVAL;
664 case FI_SOCKADDR_IN:
665 return ofi_str_to_sin(str, addr, len);
666 case FI_SOCKADDR_IN6:
667 return ofi_str_to_sin6(str, addr, len);
668 case FI_ADDR_PSMX:
669 return ofi_str_to_psmx(str, addr, len);
670 case FI_ADDR_PSMX2:
671 return ofi_str_to_psmx2(str, addr, len);
672 case FI_ADDR_IB_UD:
673 return ofi_str_to_ib_ud(str, addr, len);
674 case FI_ADDR_EFA:
675 return ofi_str_to_efa(str, addr, len);
676 case FI_SOCKADDR_IB:
677 case FI_ADDR_GNI:
678 case FI_ADDR_BGQ:
679 case FI_ADDR_MLX:
680 default:
681 return -FI_ENOSYS;
682 }
683 }
684
ofi_hex_str(const uint8_t * data,size_t len)685 const char *ofi_hex_str(const uint8_t *data, size_t len)
686 {
687 static char str[64];
688 const char hex[] = "0123456789abcdef";
689 size_t i, p;
690
691 if (len >= (sizeof(str) >> 1))
692 len = (sizeof(str) >> 1) - 1;
693
694 for (p = 0, i = 0; i < len; i++) {
695 str[p++] = hex[data[i] >> 4];
696 str[p++] = hex[data[i] & 0xF];
697 }
698
699 if (len == (sizeof(str) >> 1) - 1)
700 str[p++] = '~';
701
702 str[p] = '\0';
703 return str;
704 }
705
ofi_addr_cmp(const struct fi_provider * prov,const struct sockaddr * sa1,const struct sockaddr * sa2)706 int ofi_addr_cmp(const struct fi_provider *prov, const struct sockaddr *sa1,
707 const struct sockaddr *sa2)
708 {
709 int cmp;
710
711 switch (sa1->sa_family) {
712 case AF_INET:
713 cmp = memcmp(&ofi_sin_addr(sa1), &ofi_sin_addr(sa2),
714 sizeof(ofi_sin_addr(sa1)));
715 return cmp ? cmp : memcmp(&ofi_sin_port(sa1),
716 &ofi_sin_port(sa2),
717 sizeof(ofi_sin_port(sa1)));
718 case AF_INET6:
719 cmp = memcmp(&ofi_sin6_addr(sa1), &ofi_sin6_addr(sa2),
720 sizeof(ofi_sin6_addr(sa1)));
721 return cmp ? cmp : memcmp(&ofi_sin6_port(sa1),
722 &ofi_sin_port(sa2),
723 sizeof(ofi_sin6_port(sa1)));
724 default:
725 FI_WARN(prov, FI_LOG_FABRIC, "Invalid address format!\n");
726 assert(0);
727 return 0;
728 }
729 }
730
ofi_is_any_addr_port(struct sockaddr * addr)731 static int ofi_is_any_addr_port(struct sockaddr *addr)
732 {
733 switch (ofi_sa_family(addr)) {
734 case AF_INET:
735 return (ofi_ipv4_is_any_addr(addr) &&
736 ofi_sin_port(addr));
737 case AF_INET6:
738 return (ofi_ipv6_is_any_addr(addr) &&
739 ofi_sin6_port(addr));
740 default:
741 FI_WARN(&core_prov, FI_LOG_CORE,
742 "Unknown address format\n");
743 return 0;
744 }
745 }
746
ofi_is_wildcard_listen_addr(const char * node,const char * service,uint64_t flags,const struct fi_info * hints)747 int ofi_is_wildcard_listen_addr(const char *node, const char *service,
748 uint64_t flags, const struct fi_info *hints)
749 {
750 struct addrinfo *res = NULL;
751 int ret;
752
753 if (hints && hints->addr_format != FI_FORMAT_UNSPEC &&
754 hints->addr_format != FI_SOCKADDR &&
755 hints->addr_format != FI_SOCKADDR_IN &&
756 hints->addr_format != FI_SOCKADDR_IN6)
757 return 0;
758
759 /* else it's okay to call getaddrinfo, proceed with processing */
760
761 if (node) {
762 if (!(flags & FI_SOURCE))
763 return 0;
764 ret = getaddrinfo(node, service, NULL, &res);
765 if (ret) {
766 FI_WARN(&core_prov, FI_LOG_CORE,
767 "getaddrinfo failed!\n");
768 return 0;
769 }
770 if (ofi_is_any_addr_port(res->ai_addr)) {
771 freeaddrinfo(res);
772 goto out;
773 }
774 freeaddrinfo(res);
775 return 0;
776 }
777
778 if (hints) {
779 if (hints->dest_addr)
780 return 0;
781
782 if (!hints->src_addr)
783 goto out;
784
785 return ofi_is_any_addr_port(hints->src_addr);
786 }
787 out:
788 return ((flags & FI_SOURCE) && service) ? 1 : 0;
789 }
790
ofi_mask_addr(struct sockaddr * maskaddr,const struct sockaddr * srcaddr,const struct sockaddr * netmask)791 size_t ofi_mask_addr(struct sockaddr *maskaddr, const struct sockaddr *srcaddr,
792 const struct sockaddr *netmask)
793 {
794 size_t i, size, len = 0;
795 uint8_t *ip, *mask, bits;
796
797 memcpy(maskaddr, srcaddr, ofi_sizeofaddr(srcaddr));
798 size = ofi_sizeofip(srcaddr);
799 ip = ofi_get_ipaddr(maskaddr);
800 mask = ofi_get_ipaddr(netmask);
801
802 if (!size || !ip || !mask)
803 return 0;
804
805 for (i = 0; i < size; i++) {
806 ip[i] &= mask[i];
807
808 if (mask[i] == 0xff) {
809 len += 8;
810 } else {
811 for (bits = mask[i]; bits; bits >>= 1) {
812 if (bits & 0x1)
813 len++;
814 }
815 }
816 }
817 return len;
818 }
819
ofi_straddr_log_internal(const char * func,int line,const struct fi_provider * prov,enum fi_log_level level,enum fi_log_subsys subsys,char * log_str,const void * addr)820 void ofi_straddr_log_internal(const char *func, int line,
821 const struct fi_provider *prov,
822 enum fi_log_level level,
823 enum fi_log_subsys subsys, char *log_str,
824 const void *addr)
825 {
826 char buf[OFI_ADDRSTRLEN];
827 uint32_t addr_format;
828 size_t len = sizeof(buf);
829
830 if (fi_log_enabled(prov, level, subsys)) {
831 addr_format = ofi_translate_addr_format(ofi_sa_family(addr));
832 fi_log(prov, level, subsys, func, line, "%s: %s\n", log_str,
833 ofi_straddr(buf, &len, addr_format, addr));
834 }
835 }
836
ofi_discard_socket(SOCKET sock,size_t len)837 int ofi_discard_socket(SOCKET sock, size_t len)
838 {
839 char buf;
840 ssize_t ret = 0;
841
842 for (; len && !ret; len--)
843 ret = ofi_recvall_socket(sock, &buf, 1);
844 return ret;
845 }
846
847
ofi_pollfds_create(struct ofi_pollfds ** pfds)848 int ofi_pollfds_create(struct ofi_pollfds **pfds)
849 {
850 int ret;
851
852 *pfds = calloc(1, sizeof(struct ofi_pollfds));
853 if (!*pfds)
854 return -FI_ENOMEM;
855
856 (*pfds)->size = 64;
857 (*pfds)->fds = calloc((*pfds)->size, sizeof(*(*pfds)->fds) +
858 sizeof(*(*pfds)->context));
859 if (!(*pfds)->fds) {
860 ret = -FI_ENOMEM;
861 goto err1;
862 }
863 (*pfds)->context = (void *)((*pfds)->fds + (*pfds)->size);
864
865 ret = fd_signal_init(&(*pfds)->signal);
866 if (ret)
867 goto err2;
868
869 (*pfds)->fds[(*pfds)->nfds].fd = (*pfds)->signal.fd[FI_READ_FD];
870 (*pfds)->fds[(*pfds)->nfds].events = POLLIN;
871 (*pfds)->context[(*pfds)->nfds++] = NULL;
872 slist_init(&(*pfds)->work_item_list);
873 fastlock_init(&(*pfds)->lock);
874 return FI_SUCCESS;
875 err2:
876 free((*pfds)->fds);
877 err1:
878 free(*pfds);
879 return ret;
880 }
881
ofi_pollfds_ctl(struct ofi_pollfds * pfds,enum ofi_pollfds_ctl op,int fd,uint32_t events,void * context)882 static int ofi_pollfds_ctl(struct ofi_pollfds *pfds, enum ofi_pollfds_ctl op,
883 int fd, uint32_t events, void *context)
884 {
885 struct ofi_pollfds_work_item *item;
886
887 item = calloc(1,sizeof(*item));
888 if (!item)
889 return -FI_ENOMEM;
890
891 item->fd = fd;
892 item->events = events;
893 item->context = context;
894 item->type = op;
895 fastlock_acquire(&pfds->lock);
896 slist_insert_tail(&item->entry, &pfds->work_item_list);
897 fd_signal_set(&pfds->signal);
898 fastlock_release(&pfds->lock);
899 return 0;
900 }
901
ofi_pollfds_add(struct ofi_pollfds * pfds,int fd,uint32_t events,void * context)902 int ofi_pollfds_add(struct ofi_pollfds *pfds, int fd, uint32_t events,
903 void *context)
904 {
905 return ofi_pollfds_ctl(pfds, POLLFDS_CTL_ADD, fd, events, context);
906 }
907
ofi_pollfds_mod(struct ofi_pollfds * pfds,int fd,uint32_t events,void * context)908 int ofi_pollfds_mod(struct ofi_pollfds *pfds, int fd, uint32_t events,
909 void *context)
910 {
911 return ofi_pollfds_ctl(pfds, POLLFDS_CTL_MOD, fd, events, context);
912 }
913
ofi_pollfds_del(struct ofi_pollfds * pfds,int fd)914 int ofi_pollfds_del(struct ofi_pollfds *pfds, int fd)
915 {
916 return ofi_pollfds_ctl(pfds, POLLFDS_CTL_DEL, fd, 0, NULL);
917 }
918
ofi_pollfds_array(struct ofi_pollfds * pfds)919 static int ofi_pollfds_array(struct ofi_pollfds *pfds)
920 {
921 struct pollfd *fds;
922 void *contexts;
923
924 fds = calloc(pfds->size + 64,
925 sizeof(*pfds->fds) + sizeof(*pfds->context));
926 if (!fds)
927 return -FI_ENOMEM;
928
929 pfds->size += 64;
930 contexts = fds + pfds->size;
931
932 memcpy(fds, pfds->fds, pfds->nfds * sizeof(*pfds->fds));
933 memcpy(contexts, pfds->context, pfds->nfds * sizeof(*pfds->context));
934 free(pfds->fds);
935 pfds->fds = fds;
936 pfds->context = contexts;
937 return FI_SUCCESS;
938 }
939
ofi_pollfds_cleanup(struct ofi_pollfds * pfds)940 static void ofi_pollfds_cleanup(struct ofi_pollfds *pfds)
941 {
942 int i;
943
944 for (i = 0; i < pfds->nfds; i++) {
945 while (pfds->fds[i].fd == INVALID_SOCKET) {
946 pfds->fds[i].fd = pfds->fds[pfds->nfds-1].fd;
947 pfds->fds[i].events = pfds->fds[pfds->nfds-1].events;
948 pfds->fds[i].revents = pfds->fds[pfds->nfds-1].revents;
949 pfds->context[i] = pfds->context[pfds->nfds-1];
950 pfds->nfds--;
951 if (i == pfds->nfds)
952 break;
953 }
954 }
955 }
956
ofi_pollfds_process_work(struct ofi_pollfds * pfds)957 static void ofi_pollfds_process_work(struct ofi_pollfds *pfds)
958 {
959 struct slist_entry *entry;
960 struct ofi_pollfds_work_item *item;
961 int i;
962
963 while (!slist_empty(&pfds->work_item_list)) {
964 if ((pfds->nfds == pfds->size) &&
965 ofi_pollfds_array(pfds))
966 continue;
967
968 entry = slist_remove_head(&pfds->work_item_list);
969 item = container_of(entry, struct ofi_pollfds_work_item, entry);
970
971 switch (item->type) {
972 case POLLFDS_CTL_ADD:
973 pfds->fds[pfds->nfds].fd = item->fd;
974 pfds->fds[pfds->nfds].events = item->events;
975 pfds->fds[pfds->nfds].revents = 0;
976 pfds->context[pfds->nfds] = item->context;
977 pfds->nfds++;
978 break;
979 case POLLFDS_CTL_DEL:
980 for (i = 0; i < pfds->nfds; i++) {
981 if (pfds->fds[i].fd == item->fd) {
982 pfds->fds[i].fd = INVALID_SOCKET;
983 break;
984 }
985 }
986 break;
987 case POLLFDS_CTL_MOD:
988 for (i = 0; i < pfds->nfds; i++) {
989 if (pfds->fds[i].fd == item->fd) {
990 pfds->fds[i].events = item->events;
991 pfds->fds[i].revents &= item->events;
992 pfds->context[i] = item->context;
993 break;
994 }
995 }
996 break;
997 default:
998 assert(0);
999 goto out;
1000 }
1001 free(item);
1002 }
1003 out:
1004 ofi_pollfds_cleanup(pfds);
1005 }
1006
ofi_pollfds_wait(struct ofi_pollfds * pfds,void ** contexts,int max_contexts,int timeout)1007 int ofi_pollfds_wait(struct ofi_pollfds *pfds, void **contexts,
1008 int max_contexts, int timeout)
1009 {
1010 int i, ret;
1011 int found = 0;
1012 uint64_t start = (timeout >= 0) ? ofi_gettime_ms() : 0;
1013
1014 do {
1015 ret = poll(pfds->fds, pfds->nfds, timeout);
1016 if (ret == SOCKET_ERROR)
1017 return -ofi_sockerr();
1018 else if (ret == 0)
1019 return 0;
1020
1021 if (pfds->fds[0].revents)
1022 fd_signal_reset(&pfds->signal);
1023
1024 fastlock_acquire(&pfds->lock);
1025 if (!slist_empty(&pfds->work_item_list))
1026 ofi_pollfds_process_work(pfds);
1027
1028 fastlock_release(&pfds->lock);
1029
1030 /* Index 0 is the internal signaling fd, skip it */
1031 for (i = pfds->index; i < pfds->nfds && found < max_contexts; i++) {
1032 if (pfds->fds[i].revents && i) {
1033 contexts[found++] = pfds->context[i];
1034 pfds->index = i;
1035 }
1036 }
1037 for (i = 0; i < pfds->index && found < max_contexts; i++) {
1038 if (pfds->fds[i].revents && i) {
1039 contexts[found++] = pfds->context[i];
1040 pfds->index = i;
1041 }
1042 }
1043
1044 if (timeout > 0)
1045 timeout -= (int) (ofi_gettime_ms() - start);
1046
1047 } while (timeout > 0 && !found);
1048
1049 return found;
1050 }
1051
ofi_pollfds_close(struct ofi_pollfds * pfds)1052 void ofi_pollfds_close(struct ofi_pollfds *pfds)
1053 {
1054 struct ofi_pollfds_work_item *item;
1055 struct slist_entry *entry;
1056
1057 if (pfds) {
1058 while (!slist_empty(&pfds->work_item_list)) {
1059 entry = slist_remove_head(&pfds->work_item_list);
1060 item = container_of(entry,
1061 struct ofi_pollfds_work_item,
1062 entry);
1063 free(item);
1064 }
1065 fastlock_destroy(&pfds->lock);
1066 fd_signal_free(&pfds->signal);
1067 free(pfds->fds);
1068 free(pfds);
1069 }
1070 }
1071
1072
ofi_free_list_of_addr(struct slist * addr_list)1073 void ofi_free_list_of_addr(struct slist *addr_list)
1074 {
1075 struct ofi_addr_list_entry *addr_entry;
1076
1077 while (!slist_empty(addr_list)) {
1078 slist_remove_head_container(addr_list, struct ofi_addr_list_entry,
1079 addr_entry, entry);
1080 free(addr_entry);
1081 }
1082 }
1083
1084 static inline
ofi_insert_loopback_addr(const struct fi_provider * prov,struct slist * addr_list)1085 void ofi_insert_loopback_addr(const struct fi_provider *prov, struct slist *addr_list)
1086 {
1087 struct ofi_addr_list_entry *addr_entry;
1088
1089 addr_entry = calloc(1, sizeof(struct ofi_addr_list_entry));
1090 if (!addr_entry)
1091 return;
1092
1093 addr_entry->ipaddr.sin.sin_family = AF_INET;
1094 addr_entry->ipaddr.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1095 ofi_straddr_log(prov, FI_LOG_INFO, FI_LOG_CORE,
1096 "available addr: ", &addr_entry->ipaddr);
1097
1098 strncpy(addr_entry->ipstr, "127.0.0.1", sizeof(addr_entry->ipstr));
1099 strncpy(addr_entry->net_name, "127.0.0.1/32", sizeof(addr_entry->net_name));
1100 strncpy(addr_entry->ifa_name, "lo", sizeof(addr_entry->ifa_name));
1101 slist_insert_tail(&addr_entry->entry, addr_list);
1102
1103 addr_entry = calloc(1, sizeof(struct ofi_addr_list_entry));
1104 if (!addr_entry)
1105 return;
1106
1107 addr_entry->ipaddr.sin6.sin6_family = AF_INET6;
1108 addr_entry->ipaddr.sin6.sin6_addr = in6addr_loopback;
1109 ofi_straddr_log(prov, FI_LOG_INFO, FI_LOG_CORE,
1110 "available addr: ", &addr_entry->ipaddr);
1111
1112 strncpy(addr_entry->ipstr, "::1", sizeof(addr_entry->ipstr));
1113 strncpy(addr_entry->net_name, "::1/128", sizeof(addr_entry->net_name));
1114 strncpy(addr_entry->ifa_name, "lo", sizeof(addr_entry->ifa_name));
1115 slist_insert_tail(&addr_entry->entry, addr_list);
1116 }
1117
1118 #if HAVE_GETIFADDRS
1119
1120 /* getifaddrs can fail when connecting the netlink socket. Try again
1121 * as this is a temporary error. After the 2nd retry, sleep a bit as
1122 * well in case the host is really busy. */
1123 #define MAX_GIA_RETRIES 10
ofi_getifaddrs(struct ifaddrs ** ifaddr)1124 int ofi_getifaddrs(struct ifaddrs **ifaddr)
1125 {
1126 unsigned int retries;
1127 int ret;
1128
1129 for (retries = 0; retries < MAX_GIA_RETRIES; retries++) {
1130 if (retries > 1) {
1131 /* Exponentiation sleep after the 2nd try.
1132 * 1000 << 9 is 512000, which respects the 1s
1133 * constraint for usleep. */
1134 usleep(1000 << retries);
1135 }
1136
1137 ret = getifaddrs(ifaddr);
1138 if (ret == 0 || errno != ECONNREFUSED)
1139 break;
1140 }
1141
1142 if (ret != 0)
1143 return -errno;
1144
1145 return FI_SUCCESS;
1146 }
1147
1148 static int
ofi_addr_list_entry_comp_speed(struct slist_entry * cur,const void * insert)1149 ofi_addr_list_entry_comp_speed(struct slist_entry *cur, const void *insert)
1150 {
1151 const struct ofi_addr_list_entry *cur_addr =
1152 container_of(cur, struct ofi_addr_list_entry, entry);
1153 const struct ofi_addr_list_entry *insert_addr =
1154 container_of((const struct slist_entry *) insert,
1155 struct ofi_addr_list_entry, entry);
1156
1157 return (cur_addr->speed < insert_addr->speed);
1158 }
1159
ofi_set_netmask_str(char * netstr,size_t len,struct ifaddrs * ifa)1160 void ofi_set_netmask_str(char *netstr, size_t len, struct ifaddrs *ifa)
1161 {
1162 union ofi_sock_ip addr;
1163 size_t prefix_len;
1164
1165 netstr[0] = '\0';
1166 prefix_len = ofi_mask_addr(&addr.sa, ifa->ifa_addr, ifa->ifa_netmask);
1167
1168 switch (addr.sa.sa_family) {
1169 case AF_INET:
1170 inet_ntop(AF_INET, &addr.sin.sin_addr, netstr, len);
1171 break;
1172 case AF_INET6:
1173 inet_ntop(AF_INET6, &addr.sin6.sin6_addr, netstr, len);
1174 break;
1175 default:
1176 snprintf(netstr, len, "%s", "<unknown>");
1177 netstr[len - 1] = '\0';
1178 break;
1179 }
1180
1181 snprintf(netstr + strlen(netstr), len - strlen(netstr),
1182 "%s%d", "/", (int) prefix_len);
1183 netstr[len - 1] = '\0';
1184 }
1185
ofi_get_list_of_addr(const struct fi_provider * prov,const char * env_name,struct slist * addr_list)1186 void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
1187 struct slist *addr_list)
1188 {
1189 int ret;
1190 char *iface = NULL;
1191 struct ofi_addr_list_entry *addr_entry;
1192 struct ifaddrs *ifaddrs, *ifa;
1193
1194 fi_param_get_str((struct fi_provider *) prov, env_name, &iface);
1195
1196 ret = ofi_getifaddrs(&ifaddrs);
1197 if (ret)
1198 goto insert_lo;
1199
1200 if (iface) {
1201 for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) {
1202 if (strncmp(iface, ifa->ifa_name,
1203 strlen(iface)) == 0) {
1204 break;
1205 }
1206 }
1207 if (ifa == NULL) {
1208 FI_INFO(prov, FI_LOG_CORE,
1209 "Can't set filter to unknown interface: (%s)\n",
1210 iface);
1211 iface = NULL;
1212 }
1213 }
1214 for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) {
1215 if (ifa->ifa_addr == NULL ||
1216 !(ifa->ifa_flags & IFF_UP) ||
1217 (ifa->ifa_flags & IFF_LOOPBACK) ||
1218 ((ifa->ifa_addr->sa_family != AF_INET) &&
1219 (ifa->ifa_addr->sa_family != AF_INET6)))
1220 continue;
1221 if (iface && strncmp(iface, ifa->ifa_name, strlen(iface)) != 0) {
1222 FI_DBG(prov, FI_LOG_CORE,
1223 "Skip (%s) interface\n", ifa->ifa_name);
1224 continue;
1225 }
1226
1227 addr_entry = calloc(1, sizeof(*addr_entry));
1228 if (!addr_entry)
1229 continue;
1230
1231 memcpy(&addr_entry->ipaddr, ifa->ifa_addr,
1232 ofi_sizeofaddr(ifa->ifa_addr));
1233 strncpy(addr_entry->ifa_name, ifa->ifa_name,
1234 sizeof(addr_entry->ifa_name));
1235 ofi_set_netmask_str(addr_entry->net_name,
1236 sizeof(addr_entry->net_name), ifa);
1237
1238 if (!inet_ntop(ifa->ifa_addr->sa_family,
1239 ofi_get_ipaddr(ifa->ifa_addr),
1240 addr_entry->ipstr,
1241 sizeof(addr_entry->ipstr))) {
1242 FI_DBG(prov, FI_LOG_CORE,
1243 "inet_ntop failed: %d\n", errno);
1244 free(addr_entry);
1245 continue;
1246 }
1247
1248 addr_entry->speed = ofi_ifaddr_get_speed(ifa);
1249 FI_INFO(prov, FI_LOG_CORE, "Available addr: %s, "
1250 "iface name: %s, speed: %zu\n",
1251 addr_entry->ipstr, ifa->ifa_name, addr_entry->speed);
1252
1253 slist_insert_before_first_match(addr_list, ofi_addr_list_entry_comp_speed,
1254 &addr_entry->entry);
1255 }
1256
1257 freeifaddrs(ifaddrs);
1258
1259 insert_lo:
1260 /* Always add loopback address at the end */
1261 ofi_insert_loopback_addr(prov, addr_list);
1262 }
1263
1264 #elif defined HAVE_MIB_IPADDRTABLE
1265
ofi_get_list_of_addr(const struct fi_provider * prov,const char * env_name,struct slist * addr_list)1266 void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
1267 struct slist *addr_list)
1268 {
1269 struct ofi_addr_list_entry *addr_entry;
1270 DWORD i;
1271 MIB_IPADDRTABLE _iptbl;
1272 MIB_IPADDRTABLE *iptbl = &_iptbl;
1273 ULONG ips = 1;
1274 ULONG res;
1275
1276 res = GetIpAddrTable(iptbl, &ips, 0);
1277 if (res == ERROR_INSUFFICIENT_BUFFER) {
1278 iptbl = malloc(ips);
1279 if (!iptbl)
1280 return;
1281
1282 res = GetIpAddrTable(iptbl, &ips, 0);
1283 }
1284
1285 if (res != NO_ERROR)
1286 goto out;
1287
1288 for (i = 0; i < iptbl->dwNumEntries; i++) {
1289 if (iptbl->table[i].dwAddr &&
1290 (iptbl->table[i].dwAddr != htonl(INADDR_LOOPBACK))) {
1291 addr_entry = calloc(1, sizeof(*addr_entry));
1292 if (!addr_entry)
1293 break;
1294
1295 addr_entry->ipaddr.sin.sin_family = AF_INET;
1296 addr_entry->ipaddr.sin.sin_addr.s_addr =
1297 iptbl->table[i].dwAddr;
1298 inet_ntop(AF_INET, &iptbl->table[i].dwAddr,
1299 addr_entry->ipstr,
1300 sizeof(addr_entry->ipstr));
1301 slist_insert_tail(&addr_entry->entry, addr_list);
1302 }
1303 }
1304
1305 /* Always add loopback address at the end */
1306 ofi_insert_loopback_addr(prov, addr_list);
1307
1308 out:
1309 if (iptbl != &_iptbl)
1310 free(iptbl);
1311 }
1312
1313 #else /* !HAVE_MIB_IPADDRTABLE && !HAVE_MIB_IPADDRTABLE */
1314
ofi_get_list_of_addr(const struct fi_provider * prov,const char * env_name,struct slist * addr_list)1315 void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
1316 struct slist *addr_list)
1317 {
1318 ofi_insert_loopback_addr(prov, addr_list);
1319 }
1320 #endif
1321
ofi_cpu_supports(unsigned func,unsigned reg,unsigned bit)1322 int ofi_cpu_supports(unsigned func, unsigned reg, unsigned bit)
1323 {
1324 unsigned cpuinfo[4] = { 0 };
1325
1326 ofi_cpuid(0, 0, cpuinfo);
1327 if (cpuinfo[0] < func)
1328 return 0;
1329
1330 ofi_cpuid(func, 0, cpuinfo);
1331 return cpuinfo[reg] & bit;
1332 }
1333
ofi_remove_comma(char * buffer)1334 void ofi_remove_comma(char *buffer)
1335 {
1336 size_t sz = strlen(buffer);
1337 if (sz < 2)
1338 return;
1339 if (strcmp(&buffer[sz-2], ", ") == 0)
1340 buffer[sz-2] = '\0';
1341 }
1342
ofi_strncatf(char * dest,size_t n,const char * fmt,...)1343 void ofi_strncatf(char *dest, size_t n, const char *fmt, ...)
1344 {
1345 size_t len = strnlen(dest, n);
1346 va_list arglist;
1347
1348 va_start(arglist, fmt);
1349 vsnprintf(&dest[len], n - 1 - len, fmt, arglist);
1350 va_end(arglist);
1351 }
1352
1353 /* The provider must free any prov_attr data prior to calling this
1354 * routine.
1355 */
ofi_nic_close(struct fid * fid)1356 int ofi_nic_close(struct fid *fid)
1357 {
1358 struct fid_nic *nic = (struct fid_nic *) fid;
1359
1360 assert(fid && fid->fclass == FI_CLASS_NIC);
1361
1362 if (nic->device_attr) {
1363 free(nic->device_attr->name);
1364 free(nic->device_attr->device_id);
1365 free(nic->device_attr->device_version);
1366 free(nic->device_attr->vendor_id);
1367 free(nic->device_attr->driver);
1368 free(nic->device_attr->firmware);
1369 free(nic->device_attr);
1370 }
1371
1372 free(nic->bus_attr);
1373
1374 if (nic->link_attr) {
1375 free(nic->link_attr->address);
1376 free(nic->link_attr->network_type);
1377 free(nic->link_attr);
1378 }
1379
1380 free(nic);
1381 return 0;
1382 }
1383
ofi_nic_control(struct fid * fid,int command,void * arg)1384 int ofi_nic_control(struct fid *fid, int command, void *arg)
1385 {
1386 struct fid_nic *nic = container_of(fid, struct fid_nic, fid);
1387 struct fid_nic **dup = (struct fid_nic **) arg;
1388
1389 switch(command) {
1390 case FI_DUP:
1391 *dup = ofi_nic_dup(nic);
1392 return *dup ? FI_SUCCESS : -FI_ENOMEM;
1393 default:
1394 return -FI_ENOSYS;
1395 }
1396 }
1397
ofi_tostr_device_attr(char * buf,size_t len,const struct fi_device_attr * attr)1398 static void ofi_tostr_device_attr(char *buf, size_t len,
1399 const struct fi_device_attr *attr)
1400 {
1401 const char *prefix = TAB TAB;
1402
1403 ofi_strncatf(buf, len, "%sfi_device_attr:\n", prefix);
1404
1405 prefix = TAB TAB TAB;
1406 ofi_strncatf(buf, len, "%sname: %s\n", prefix, attr->name);
1407 ofi_strncatf(buf, len, "%sdevice_id: %s\n", prefix, attr->device_id);
1408 ofi_strncatf(buf, len, "%sdevice_version: %s\n", prefix,
1409 attr->device_version);
1410 ofi_strncatf(buf, len, "%svendor_id: %s\n", prefix, attr->vendor_id);
1411 ofi_strncatf(buf, len, "%sdriver: %s\n", prefix, attr->driver);
1412 ofi_strncatf(buf, len, "%sfirmware: %s\n", prefix, attr->firmware);
1413 }
1414
ofi_tostr_pci_attr(char * buf,size_t len,const struct fi_pci_attr * attr)1415 static void ofi_tostr_pci_attr(char *buf, size_t len,
1416 const struct fi_pci_attr *attr)
1417 {
1418 const char *prefix = TAB TAB TAB;
1419
1420 ofi_strncatf(buf, len, "%sfi_pci_attr:\n", prefix);
1421
1422 prefix = TAB TAB TAB TAB;
1423 ofi_strncatf(buf, len, "%sdomain_id: %u\n", prefix, attr->domain_id);
1424 ofi_strncatf(buf, len, "%sbus_id: %u\n", prefix, attr->bus_id);
1425 ofi_strncatf(buf, len, "%sdevice_id: %u\n", prefix, attr->device_id);
1426 ofi_strncatf(buf, len, "%sfunction_id: %u\n", prefix, attr->function_id);
1427 }
1428
ofi_tostr_bus_type(char * buf,size_t len,int type)1429 static void ofi_tostr_bus_type(char *buf, size_t len, int type)
1430 {
1431 switch (type) {
1432 CASEENUMSTRN(FI_BUS_UNKNOWN, len);
1433 CASEENUMSTRN(FI_BUS_PCI, len);
1434 default:
1435 ofi_strncatf(buf, len, "Unknown");
1436 break;
1437 }
1438 }
1439
ofi_tostr_bus_attr(char * buf,size_t len,const struct fi_bus_attr * attr)1440 static void ofi_tostr_bus_attr(char *buf, size_t len,
1441 const struct fi_bus_attr *attr)
1442 {
1443 const char *prefix = TAB TAB;
1444
1445 ofi_strncatf(buf, len, "%sfi_bus_attr:\n", prefix);
1446
1447 prefix = TAB TAB TAB;
1448 ofi_strncatf(buf, len, "%sfi_bus_type: ", prefix);
1449 ofi_tostr_bus_type(buf, len, attr->bus_type);
1450 ofi_strncatf(buf, len, "\n");
1451
1452 switch (attr->bus_type) {
1453 case FI_BUS_PCI:
1454 ofi_tostr_pci_attr(buf, len, &attr->attr.pci);
1455 break;
1456 default:
1457 break;
1458 }
1459 }
1460
ofi_tostr_link_state(char * buf,size_t len,int state)1461 static void ofi_tostr_link_state(char *buf, size_t len, int state)
1462 {
1463 switch (state) {
1464 CASEENUMSTRN(FI_LINK_UNKNOWN, len);
1465 CASEENUMSTRN(FI_LINK_DOWN, len);
1466 CASEENUMSTRN(FI_LINK_UP, len);
1467 default:
1468 ofi_strncatf(buf, len, "Unknown");
1469 break;
1470 }
1471 }
1472
ofi_tostr_link_attr(char * buf,size_t len,const struct fi_link_attr * attr)1473 static void ofi_tostr_link_attr(char *buf, size_t len,
1474 const struct fi_link_attr *attr)
1475 {
1476 const char *prefix = TAB TAB;
1477 ofi_strncatf(buf, len, "%sfi_link_attr:\n", prefix);
1478
1479 prefix = TAB TAB TAB;
1480 ofi_strncatf(buf, len, "%saddress: %s\n", prefix, attr->address);
1481 ofi_strncatf(buf, len, "%smtu: %zu\n", prefix, attr->mtu);
1482 ofi_strncatf(buf, len, "%sspeed: %zu\n", prefix, attr->speed);
1483 ofi_strncatf(buf, len, "%sstate: ", prefix);
1484 ofi_tostr_link_state(buf, len, attr->state);
1485 ofi_strncatf(buf, len, "\n%snetwork_type: %s\n", prefix,
1486 attr->network_type);
1487 }
1488
ofi_nic_tostr(const struct fid * fid_nic,char * buf,size_t len)1489 int ofi_nic_tostr(const struct fid *fid_nic, char *buf, size_t len)
1490 {
1491 const struct fid_nic *nic = (const struct fid_nic*) fid_nic;
1492
1493 assert(fid_nic->fclass == FI_CLASS_NIC);
1494 ofi_strncatf(buf, len, "%sfid_nic:\n", TAB);
1495
1496 ofi_tostr_device_attr(buf, len, nic->device_attr);
1497 ofi_tostr_bus_attr(buf, len, nic->bus_attr);
1498 ofi_tostr_link_attr(buf, len, nic->link_attr);
1499 return 0;
1500 }
1501
1502 struct fi_ops default_nic_ops = {
1503 .size = sizeof(struct fi_ops),
1504 .close = ofi_nic_close,
1505 .control = ofi_nic_control,
1506 .tostr = ofi_nic_tostr,
1507 };
1508
ofi_dup_dev_attr(const struct fi_device_attr * attr,struct fi_device_attr ** dup_attr)1509 static int ofi_dup_dev_attr(const struct fi_device_attr *attr,
1510 struct fi_device_attr **dup_attr)
1511 {
1512 *dup_attr = calloc(1, sizeof(**dup_attr));
1513 if (!*dup_attr)
1514 return -FI_ENOMEM;
1515
1516 if (ofi_str_dup(attr->name, &(*dup_attr)->name) ||
1517 ofi_str_dup(attr->device_id, &(*dup_attr)->device_id) ||
1518 ofi_str_dup(attr->device_version, &(*dup_attr)->device_version) ||
1519 ofi_str_dup(attr->vendor_id, &(*dup_attr)->vendor_id) ||
1520 ofi_str_dup(attr->driver, &(*dup_attr)->driver) ||
1521 ofi_str_dup(attr->firmware, &(*dup_attr)->firmware))
1522 return -FI_ENOMEM;
1523
1524 return 0;
1525 }
1526
ofi_dup_bus_attr(const struct fi_bus_attr * attr,struct fi_bus_attr ** dup_attr)1527 static int ofi_dup_bus_attr(const struct fi_bus_attr *attr,
1528 struct fi_bus_attr **dup_attr)
1529 {
1530 *dup_attr = calloc(1, sizeof(**dup_attr));
1531 if (!*dup_attr)
1532 return -FI_ENOMEM;
1533
1534 **dup_attr = *attr;
1535 return 0;
1536 }
1537
ofi_dup_link_attr(const struct fi_link_attr * attr,struct fi_link_attr ** dup_attr)1538 static int ofi_dup_link_attr(const struct fi_link_attr *attr,
1539 struct fi_link_attr **dup_attr)
1540 {
1541 *dup_attr = calloc(1, sizeof(**dup_attr));
1542 if (!*dup_attr)
1543 return -FI_ENOMEM;
1544
1545 if (ofi_str_dup(attr->address, &(*dup_attr)->address) ||
1546 ofi_str_dup(attr->network_type, &(*dup_attr)->network_type))
1547 return -FI_ENOMEM;
1548
1549 (*dup_attr)->mtu = attr->mtu;
1550 (*dup_attr)->speed = attr->speed;
1551 (*dup_attr)->state = attr->state;
1552 return 0;
1553 }
1554
ofi_nic_dup(const struct fid_nic * nic)1555 struct fid_nic *ofi_nic_dup(const struct fid_nic *nic)
1556 {
1557 struct fid_nic *dup_nic;
1558 int ret;
1559
1560 dup_nic = calloc(1, sizeof(*dup_nic));
1561 if (!dup_nic)
1562 return NULL;
1563
1564 if (!nic) {
1565 dup_nic->fid.fclass = FI_CLASS_NIC;
1566 dup_nic->device_attr = calloc(1, sizeof(*dup_nic->device_attr));
1567 dup_nic->bus_attr = calloc(1, sizeof(*dup_nic->bus_attr));
1568 dup_nic->link_attr = calloc(1, sizeof(*dup_nic->link_attr));
1569
1570 if (!dup_nic->device_attr || !dup_nic->bus_attr ||
1571 !dup_nic->link_attr)
1572 goto fail;
1573
1574 dup_nic->fid.ops = &default_nic_ops;
1575 return dup_nic;
1576 }
1577
1578 assert(nic->fid.fclass == FI_CLASS_NIC);
1579 dup_nic->fid = nic->fid;
1580
1581 if (nic->device_attr) {
1582 ret = ofi_dup_dev_attr(nic->device_attr, &dup_nic->device_attr);
1583 if (ret)
1584 goto fail;
1585 }
1586
1587 if (nic->bus_attr) {
1588 ret = ofi_dup_bus_attr(nic->bus_attr, &dup_nic->bus_attr);
1589 if (ret)
1590 goto fail;
1591 }
1592
1593 if (nic->link_attr) {
1594 ret = ofi_dup_link_attr(nic->link_attr, &dup_nic->link_attr);
1595 if (ret)
1596 goto fail;
1597 }
1598
1599 return dup_nic;
1600
1601 fail:
1602 ofi_nic_close(&dup_nic->fid);
1603 return NULL;
1604 }
1605