1 /*
2 * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
3 * University Research and Technology
4 * Corporation. All rights reserved.
5 * Copyright (c) 2004-2011 The University of Tennessee and The University
6 * of Tennessee Research Foundation. All rights
7 * reserved.
8 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9 * University of Stuttgart. All rights reserved.
10 * Copyright (c) 2004-2005 The Regents of the University of California.
11 * All rights reserved.
12 * Copyright (c) 2006-2013 Los Alamos National Security, LLC.
13 * All rights reserved.
14 * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
15 * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
16 * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
17 * Copyright (c) 2014 Research Organization for Information Science
18 * and Technology (RIST). All rights reserved.
19 * $COPYRIGHT$
20 *
21 * Additional copyrights may follow
22 *
23 * $HEADER$
24 *
25 * In windows, many of the socket functions return an EWOULDBLOCK
26 * instead of things like EAGAIN, EINPROGRESS, etc. It has been
27 * verified that this will not conflict with other error codes that
28 * are returned by these functions under UNIX/Linux environments
29 */
30
31 #include "orte_config.h"
32 #include "orte/types.h"
33 #include "opal/types.h"
34
35 #ifdef HAVE_UNISTD_H
36 #include <unistd.h>
37 #endif
38 #ifdef HAVE_SYS_TYPES_H
39 #include <sys/types.h>
40 #endif
41 #include <fcntl.h>
42 #ifdef HAVE_NETINET_IN_H
43 #include <netinet/in.h>
44 #endif
45 #ifdef HAVE_NETINET_TCP_H
46 #include <netinet/tcp.h>
47 #endif
48 #ifdef HAVE_ARPA_INET_H
49 #include <arpa/inet.h>
50 #endif
51 #ifdef HAVE_NETDB_H
52 #include <netdb.h>
53 #endif
54 #ifdef HAVE_SYS_SOCKET_H
55 #include <sys/socket.h>
56 #endif
57 #include <ctype.h>
58
59 #include "opal/util/error.h"
60 #include "opal/util/output.h"
61 #include "opal/opal_socket_errno.h"
62 #include "opal/util/if.h"
63 #include "opal/util/net.h"
64 #include "opal/class/opal_hash_table.h"
65 #include "opal/mca/backtrace/backtrace.h"
66
67 #include "orte/mca/oob/tcp/oob_tcp.h"
68 #include "orte/mca/oob/tcp/oob_tcp_component.h"
69 #include "oob_tcp_peer.h"
70 #include "oob_tcp_common.h"
71
72 /**
73 * Set socket buffering
74 */
set_keepalive(int sd)75 static void set_keepalive(int sd)
76 {
77 #if defined(SO_KEEPALIVE)
78 int option;
79 socklen_t optlen;
80
81 /* see if the keepalive option is available */
82 optlen = sizeof(option);
83 if (getsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, &optlen) < 0) {
84 /* not available, so just return */
85 return;
86 }
87
88 /* Set the option active */
89 option = 1;
90 if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, optlen) < 0) {
91 opal_output_verbose(5, orte_oob_base_framework.framework_output,
92 "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)",
93 __FILE__, __LINE__,
94 strerror(opal_socket_errno),
95 opal_socket_errno);
96 return;
97 }
98 #if defined(TCP_KEEPALIVE)
99 /* set the idle time */
100 if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPALIVE,
101 &mca_oob_tcp_component.keepalive_time,
102 sizeof(mca_oob_tcp_component.keepalive_time)) < 0) {
103 opal_output_verbose(5, orte_oob_base_framework.framework_output,
104 "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)",
105 __FILE__, __LINE__,
106 strerror(opal_socket_errno),
107 opal_socket_errno);
108 return;
109 }
110 #elif defined(TCP_KEEPIDLE)
111 /* set the idle time */
112 if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE,
113 &mca_oob_tcp_component.keepalive_time,
114 sizeof(mca_oob_tcp_component.keepalive_time)) < 0) {
115 opal_output_verbose(5, orte_oob_base_framework.framework_output,
116 "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)",
117 __FILE__, __LINE__,
118 strerror(opal_socket_errno),
119 opal_socket_errno);
120 return;
121 }
122 #endif // TCP_KEEPIDLE
123 #if defined(TCP_KEEPINTVL)
124 /* set the keepalive interval */
125 if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL,
126 &mca_oob_tcp_component.keepalive_intvl,
127 sizeof(mca_oob_tcp_component.keepalive_intvl)) < 0) {
128 opal_output_verbose(5, orte_oob_base_framework.framework_output,
129 "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)",
130 __FILE__, __LINE__,
131 strerror(opal_socket_errno),
132 opal_socket_errno);
133 return;
134 }
135 #endif // TCP_KEEPINTVL
136 #if defined(TCP_KEEPCNT)
137 /* set the miss rate */
138 if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT,
139 &mca_oob_tcp_component.keepalive_probes,
140 sizeof(mca_oob_tcp_component.keepalive_probes)) < 0) {
141 opal_output_verbose(5, orte_oob_base_framework.framework_output,
142 "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)",
143 __FILE__, __LINE__,
144 strerror(opal_socket_errno),
145 opal_socket_errno);
146 }
147 #endif // TCP_KEEPCNT
148 #endif //SO_KEEPALIVE
149 }
150
orte_oob_tcp_set_socket_options(int sd)151 void orte_oob_tcp_set_socket_options(int sd)
152 {
153 #if defined(TCP_NODELAY)
154 int optval;
155 optval = 1;
156 if (setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
157 opal_backtrace_print(stderr, NULL, 1);
158 opal_output_verbose(5, orte_oob_base_framework.framework_output,
159 "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)",
160 __FILE__, __LINE__,
161 strerror(opal_socket_errno),
162 opal_socket_errno);
163 }
164 #endif
165 #if defined(SO_SNDBUF)
166 if (mca_oob_tcp_component.tcp_sndbuf > 0 &&
167 setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_oob_tcp_component.tcp_sndbuf, sizeof(int)) < 0) {
168 opal_output_verbose(5, orte_oob_base_framework.framework_output,
169 "[%s:%d] setsockopt(SO_SNDBUF) failed: %s (%d)",
170 __FILE__, __LINE__,
171 strerror(opal_socket_errno),
172 opal_socket_errno);
173 }
174 #endif
175 #if defined(SO_RCVBUF)
176 if (mca_oob_tcp_component.tcp_rcvbuf > 0 &&
177 setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_oob_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) {
178 opal_output_verbose(5, orte_oob_base_framework.framework_output,
179 "[%s:%d] setsockopt(SO_RCVBUF) failed: %s (%d)",
180 __FILE__, __LINE__,
181 strerror(opal_socket_errno),
182 opal_socket_errno);
183 }
184 #endif
185
186 if (0 < mca_oob_tcp_component.keepalive_time) {
187 set_keepalive(sd);
188 }
189 }
190
mca_oob_tcp_peer_lookup(const orte_process_name_t * name)191 mca_oob_tcp_peer_t* mca_oob_tcp_peer_lookup(const orte_process_name_t *name)
192 {
193 mca_oob_tcp_peer_t *peer;
194 uint64_t ui64;
195
196 memcpy(&ui64, (char*)name, sizeof(uint64_t));
197 if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_tcp_component.peers, ui64, (void**)&peer)) {
198 return NULL;
199 }
200 return peer;
201 }
202
mca_oob_tcp_state_print(mca_oob_tcp_state_t state)203 char* mca_oob_tcp_state_print(mca_oob_tcp_state_t state)
204 {
205 switch (state) {
206 case MCA_OOB_TCP_UNCONNECTED:
207 return "UNCONNECTED";
208 case MCA_OOB_TCP_CLOSED:
209 return "CLOSED";
210 case MCA_OOB_TCP_RESOLVE:
211 return "RESOLVE";
212 case MCA_OOB_TCP_CONNECTING:
213 return "CONNECTING";
214 case MCA_OOB_TCP_CONNECT_ACK:
215 return "ACK";
216 case MCA_OOB_TCP_CONNECTED:
217 return "CONNECTED";
218 case MCA_OOB_TCP_FAILED:
219 return "FAILED";
220 default:
221 return "UNKNOWN";
222 }
223 }
224