1 /*
2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
3  *                         University Research and Technology
4  *                         Corporation.  All rights reserved.
5  * Copyright (c) 2004-2011 The University of Tennessee and The University
6  *                         of Tennessee Research Foundation.  All rights
7  *                         reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  *                         University of Stuttgart.  All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  *                         All rights reserved.
12  * Copyright (c) 2006-2013 Los Alamos National Security, LLC.
13  *                         All rights reserved.
14  * Copyright (c) 2009-2015 Cisco Systems, Inc.  All rights reserved.
15  * Copyright (c) 2011      Oak Ridge National Labs.  All rights reserved.
16  * Copyright (c) 2014-2017 Intel, Inc.  All rights reserved.
17  * Copyright (c) 2014      Research Organization for Information Science
18  *                         and Technology (RIST). All rights reserved.
19  * $COPYRIGHT$
20  *
21  * Additional copyrights may follow
22  *
23  * $HEADER$
24  *
25  * In windows, many of the socket functions return an EWOULDBLOCK
26  * instead of things like EAGAIN, EINPROGRESS, etc. It has been
27  * verified that this will not conflict with other error codes that
28  * are returned by these functions under UNIX/Linux environments
29  */
30 
31 #include "orte_config.h"
32 #include "orte/types.h"
33 #include "opal/types.h"
34 
35 #ifdef HAVE_UNISTD_H
36 #include <unistd.h>
37 #endif
38 #ifdef HAVE_SYS_TYPES_H
39 #include <sys/types.h>
40 #endif
41 #include <fcntl.h>
42 #ifdef HAVE_NETINET_IN_H
43 #include <netinet/in.h>
44 #endif
45 #ifdef HAVE_NETINET_TCP_H
46 #include <netinet/tcp.h>
47 #endif
48 #ifdef HAVE_ARPA_INET_H
49 #include <arpa/inet.h>
50 #endif
51 #ifdef HAVE_NETDB_H
52 #include <netdb.h>
53 #endif
54 #ifdef HAVE_SYS_SOCKET_H
55 #include <sys/socket.h>
56 #endif
57 #include <ctype.h>
58 
59 #include "opal/util/error.h"
60 #include "opal/util/output.h"
61 #include "opal/opal_socket_errno.h"
62 #include "opal/util/if.h"
63 #include "opal/util/net.h"
64 #include "opal/class/opal_hash_table.h"
65 #include "opal/mca/backtrace/backtrace.h"
66 
67 #include "orte/mca/oob/tcp/oob_tcp.h"
68 #include "orte/mca/oob/tcp/oob_tcp_component.h"
69 #include "oob_tcp_peer.h"
70 #include "oob_tcp_common.h"
71 
72 /**
73  * Set socket buffering
74  */
set_keepalive(int sd)75 static void set_keepalive(int sd)
76 {
77 #if defined(SO_KEEPALIVE)
78     int option;
79     socklen_t optlen;
80 
81     /* see if the keepalive option is available */
82     optlen = sizeof(option);
83     if (getsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, &optlen) < 0) {
84         /* not available, so just return */
85         return;
86     }
87 
88     /* Set the option active */
89     option = 1;
90     if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, optlen) < 0) {
91         opal_output_verbose(5, orte_oob_base_framework.framework_output,
92                             "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)",
93                             __FILE__, __LINE__,
94                             strerror(opal_socket_errno),
95                             opal_socket_errno);
96         return;
97     }
98 #if defined(TCP_KEEPALIVE)
99     /* set the idle time */
100     if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPALIVE,
101                    &mca_oob_tcp_component.keepalive_time,
102                    sizeof(mca_oob_tcp_component.keepalive_time)) < 0) {
103         opal_output_verbose(5, orte_oob_base_framework.framework_output,
104                             "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)",
105                             __FILE__, __LINE__,
106                             strerror(opal_socket_errno),
107                             opal_socket_errno);
108         return;
109     }
110 #elif defined(TCP_KEEPIDLE)
111     /* set the idle time */
112     if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE,
113                    &mca_oob_tcp_component.keepalive_time,
114                    sizeof(mca_oob_tcp_component.keepalive_time)) < 0) {
115         opal_output_verbose(5, orte_oob_base_framework.framework_output,
116                             "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)",
117                             __FILE__, __LINE__,
118                             strerror(opal_socket_errno),
119                             opal_socket_errno);
120         return;
121     }
122 #endif  // TCP_KEEPIDLE
123 #if defined(TCP_KEEPINTVL)
124     /* set the keepalive interval */
125     if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL,
126                    &mca_oob_tcp_component.keepalive_intvl,
127                    sizeof(mca_oob_tcp_component.keepalive_intvl)) < 0) {
128         opal_output_verbose(5, orte_oob_base_framework.framework_output,
129                             "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)",
130                             __FILE__, __LINE__,
131                             strerror(opal_socket_errno),
132                             opal_socket_errno);
133         return;
134     }
135 #endif  // TCP_KEEPINTVL
136 #if defined(TCP_KEEPCNT)
137     /* set the miss rate */
138     if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT,
139                    &mca_oob_tcp_component.keepalive_probes,
140                    sizeof(mca_oob_tcp_component.keepalive_probes)) < 0) {
141         opal_output_verbose(5, orte_oob_base_framework.framework_output,
142                             "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)",
143                             __FILE__, __LINE__,
144                             strerror(opal_socket_errno),
145                             opal_socket_errno);
146     }
147 #endif  // TCP_KEEPCNT
148 #endif //SO_KEEPALIVE
149 }
150 
orte_oob_tcp_set_socket_options(int sd)151 void orte_oob_tcp_set_socket_options(int sd)
152 {
153 #if defined(TCP_NODELAY)
154     int optval;
155     optval = 1;
156     if (setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
157         opal_backtrace_print(stderr, NULL, 1);
158         opal_output_verbose(5, orte_oob_base_framework.framework_output,
159                             "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)",
160                             __FILE__, __LINE__,
161                             strerror(opal_socket_errno),
162                             opal_socket_errno);
163     }
164 #endif
165 #if defined(SO_SNDBUF)
166     if (mca_oob_tcp_component.tcp_sndbuf > 0 &&
167         setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_oob_tcp_component.tcp_sndbuf, sizeof(int)) < 0) {
168         opal_output_verbose(5, orte_oob_base_framework.framework_output,
169                             "[%s:%d] setsockopt(SO_SNDBUF) failed: %s (%d)",
170                             __FILE__, __LINE__,
171                             strerror(opal_socket_errno),
172                             opal_socket_errno);
173     }
174 #endif
175 #if defined(SO_RCVBUF)
176     if (mca_oob_tcp_component.tcp_rcvbuf > 0 &&
177         setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_oob_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) {
178         opal_output_verbose(5, orte_oob_base_framework.framework_output,
179                             "[%s:%d] setsockopt(SO_RCVBUF) failed: %s (%d)",
180                             __FILE__, __LINE__,
181                             strerror(opal_socket_errno),
182                             opal_socket_errno);
183     }
184 #endif
185 
186     if (0 < mca_oob_tcp_component.keepalive_time) {
187         set_keepalive(sd);
188     }
189 }
190 
mca_oob_tcp_peer_lookup(const orte_process_name_t * name)191 mca_oob_tcp_peer_t* mca_oob_tcp_peer_lookup(const orte_process_name_t *name)
192 {
193     mca_oob_tcp_peer_t *peer;
194     uint64_t ui64;
195 
196     memcpy(&ui64, (char*)name, sizeof(uint64_t));
197     if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_tcp_component.peers, ui64, (void**)&peer)) {
198         return NULL;
199     }
200     return peer;
201 }
202 
mca_oob_tcp_state_print(mca_oob_tcp_state_t state)203 char* mca_oob_tcp_state_print(mca_oob_tcp_state_t state)
204 {
205     switch (state) {
206     case MCA_OOB_TCP_UNCONNECTED:
207         return "UNCONNECTED";
208     case MCA_OOB_TCP_CLOSED:
209         return "CLOSED";
210     case MCA_OOB_TCP_RESOLVE:
211         return "RESOLVE";
212     case MCA_OOB_TCP_CONNECTING:
213         return "CONNECTING";
214     case MCA_OOB_TCP_CONNECT_ACK:
215         return "ACK";
216     case MCA_OOB_TCP_CONNECTED:
217         return "CONNECTED";
218     case MCA_OOB_TCP_FAILED:
219         return "FAILED";
220     default:
221         return "UNKNOWN";
222     }
223 }
224