1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "mpidi_ch3_impl.h"
7 #ifdef USE_PMI2_API
8 #include "pmi2.h"
9 #else
10 #include "pmi.h"
11 #endif
12
13 #include "mpidu_sock.h"
14
15 #include "ch3usock.h"
16
17 /* Private packet types used only within this file */
18 /* Note that these must be smaller than the PktGeneric type and
19 their MPIDI_CH3_Pkt_type_t values are arbitrary (but must be
20 consistent) */
21 /* FIXME - We need a little security here to avoid having a random port scan
22 crash the process. Perhaps a "secret" value for each process could be
23 published in the key-val space and subsequently sent in the open pkt. */
24 typedef struct
25 {
26 MPIDI_CH3_Pkt_type_t type;
27 int pg_id_len;
28 int pg_rank;
29 }
30 MPIDI_CH3I_Pkt_sc_open_req_t;
31
32 typedef struct
33 {
34 MPIDI_CH3_Pkt_type_t type;
35 int ack;
36 }
37 MPIDI_CH3I_Pkt_sc_open_resp_t;
38
39 typedef struct
40 {
41 MPIDI_CH3_Pkt_type_t type;
42 int port_name_tag;
43 }
44 MPIDI_CH3I_Pkt_sc_conn_accept_t;
45
46 #ifdef HAVE_NETDB_H
47 #include <netdb.h>
48 #endif
49 #ifdef HAVE_SYS_SOCKET_H
50 /* Include this for AF_INET */
51 #include <sys/socket.h>
52 #endif
53 #ifdef HAVE_ARPA_INET_H
54 /* Include this for inet_pton prototype */
55 #include <arpa/inet.h>
56 #endif
57
58 /* FIXME: Describe what these routines do */
59
60 /* FIXME: Clean up use of private packets (open/accept) */
61
62 /* Partial description:
63 This file contains the routines that are used to create socket connections,
64 including the routines used to encode/decode the description of a connection
65 into/out of the "business card".
66
67 ToDo: change the "host description" to an "interface address" so that
68 socket connections are targeted at particularly interfaces, not
69 compute nodes, and that the address is in ready-to-use IP address format,
70 and does not require a gethostbyname lookup. - Partially done
71 */
72
73 /*
74 * Manage the connection information that is exported to other processes
75 *
76 */
77 #define MPIDI_CH3I_HOST_DESCRIPTION_KEY "description"
78 #define MPIDI_CH3I_PORT_KEY "port"
79 #define MPIDI_CH3I_IFNAME_KEY "ifname"
80
81 /*
82 * Routines for establishing a listener socket on the socket set that
83 * is used for all communication. These should be called from the
84 * channel init and finalize routines.
85 */
86 static int MPIDI_CH3I_listener_port = 0;
87 static MPIDI_CH3I_Connection_t * MPIDI_CH3I_listener_conn = NULL;
88
89 /* Required for (socket version) upcall to Connect_to_root (see FIXME) */
90 extern MPIDI_CH3I_Sock_set_t MPIDI_CH3I_sock_set;
91
MPIDU_CH3I_SetupListener(MPIDI_CH3I_Sock_set_t sock_set)92 int MPIDU_CH3I_SetupListener( MPIDI_CH3I_Sock_set_t sock_set )
93 {
94 int mpi_errno = MPI_SUCCESS;
95 MPIDI_CH3I_Sock_t sock;
96
97 mpi_errno = MPIDI_CH3I_Connection_alloc(&MPIDI_CH3I_listener_conn);
98 if (mpi_errno != MPI_SUCCESS) {
99 return mpi_errno;
100 }
101
102 MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,TYPICAL,
103 "Setting listener connect state to CONN_STATE_LISTENING");
104 MPIDI_CH3I_listener_conn->sock = NULL;
105 MPIDI_CH3I_listener_conn->vc = NULL;
106 MPIDI_CH3I_listener_conn->state = CONN_STATE_LISTENING;
107 MPIDI_CH3I_listener_conn->send_active = NULL;
108 MPIDI_CH3I_listener_conn->recv_active = NULL;
109
110 mpi_errno = MPIDI_CH3I_Sock_listen(sock_set, MPIDI_CH3I_listener_conn,
111 &MPIDI_CH3I_listener_port, &sock);
112 if (mpi_errno) return mpi_errno;
113
114 MPL_DBG_MSG_D(MPIDI_CH3_DBG_CONNECT,VERBOSE,"Listener port %d",
115 MPIDI_CH3I_listener_port );
116
117 MPIDI_CH3I_listener_conn->sock = sock;
118
119 return mpi_errno;
120 }
121
MPIDU_CH3I_ShutdownListener(void)122 int MPIDU_CH3I_ShutdownListener( void )
123 {
124 int mpi_errno;
125 MPID_Progress_state progress_state;
126
127 MPL_DBG_MSG(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,"Closing listener sock (Post_close)");
128 mpi_errno = MPIDI_CH3I_Sock_post_close(MPIDI_CH3I_listener_conn->sock);
129 if (mpi_errno != MPI_SUCCESS) {
130 return mpi_errno;
131 }
132
133 MPID_Progress_start(&progress_state);
134 while(MPIDI_CH3I_listener_conn != NULL)
135 {
136 mpi_errno = MPID_Progress_wait(&progress_state);
137
138 }
139 MPID_Progress_end(&progress_state);
140
141 return mpi_errno;
142 }
143
144 /* Allocates a connection and the pg_id field for a connection only.
145 Does not initialize any connection fields other than pg_id.
146 Called by routines that create connections, used in this
147 file and in ch3_progress*.c in various channels.
148 */
MPIDI_CH3I_Connection_alloc(MPIDI_CH3I_Connection_t ** connp)149 int MPIDI_CH3I_Connection_alloc(MPIDI_CH3I_Connection_t ** connp)
150 {
151 int mpi_errno = MPI_SUCCESS;
152 MPIDI_CH3I_Connection_t * conn = NULL;
153 int id_sz;
154 int pmi_errno;
155 MPIR_CHKPMEM_DECL(2);
156 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_ALLOC);
157
158 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_ALLOC);
159
160 MPIR_CHKPMEM_MALLOC(conn,MPIDI_CH3I_Connection_t*,
161 sizeof(MPIDI_CH3I_Connection_t),mpi_errno,"conn", MPL_MEM_DYNAMIC);
162
163 /* FIXME: This size is unchanging, so get it only once (at most);
164 we might prefer for connections to simply point at the single process
165 group to which the remote process belong */
166 #ifdef USE_PMI2_API
167 id_sz = MPID_MAX_JOBID_LEN;
168 #else
169 pmi_errno = PMI_KVS_Get_name_length_max(&id_sz);
170 MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno,MPI_ERR_OTHER,
171 "**pmi_get_id_length_max",
172 "**pmi_get_id_length_max %d", pmi_errno);
173 #endif
174 MPIR_CHKPMEM_MALLOC(conn->pg_id,char*,id_sz + 1,mpi_errno,"conn->pg_id", MPL_MEM_DYNAMIC);
175 conn->pg_id[0] = 0; /* Be careful about pg_id in case a later
176 error */
177 *connp = conn;
178
179 fn_exit:
180 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_ALLOC);
181 return mpi_errno;
182 fn_fail:
183 MPIR_CHKPMEM_REAP();
184 goto fn_exit;
185 }
186
187
188 /* FIXME: Why does the name include "to_root"? */
189
190 /* FIXME: Describe the algorithm for the connection logic */
MPIDI_CH3I_Connect_to_root_sock(const char * port_name,MPIDI_VC_t ** new_vc)191 int MPIDI_CH3I_Connect_to_root_sock(const char * port_name,
192 MPIDI_VC_t ** new_vc)
193 {
194 int mpi_errno = MPI_SUCCESS;
195 MPIDI_VC_t * vc;
196 MPIDI_CH3I_VC *vcch;
197 MPIR_CHKPMEM_DECL(1);
198 char host_description[MAX_HOST_DESCRIPTION_LEN];
199 int port, port_name_tag;
200 MPL_sockaddr_t ifaddr;
201 int hasIfaddr = 0;
202 MPIDI_CH3I_Connection_t * conn;
203 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_CONNECT_TO_ROOT_SOCK);
204
205 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_CONNECT_TO_ROOT_SOCK);
206
207 /* First, create a new vc (we may use this to pass to a generic
208 connection routine) */
209 MPIR_CHKPMEM_MALLOC(vc,MPIDI_VC_t *,sizeof(MPIDI_VC_t),mpi_errno,"vc", MPL_MEM_DYNAMIC);
210 /* FIXME - where does this vc get freed? */
211
212 *new_vc = vc;
213
214 /* FIXME: There may need to be an additional routine here, to ensure that the
215 channel is initialized for this pair of process groups (this process
216 and the remote process to which the vc will connect). */
217 MPIDI_VC_Init(vc, NULL, 0);
218
219 MPL_DBG_MSG_S(MPIDI_CH3_DBG_CONNECT,VERBOSE,"Connect to root with portstring %s",
220 port_name );
221
222 mpi_errno = MPIDI_CH3I_Sock_get_conninfo_from_bc( port_name, host_description,
223 sizeof(host_description),
224 &port, &ifaddr, &hasIfaddr );
225 MPIR_ERR_CHECK(mpi_errno);
226 mpi_errno = MPIDI_GetTagFromPort(port_name, &port_name_tag);
227 if (mpi_errno != MPL_SUCCESS) {
228 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**argstr_port_name_tag");
229 }
230
231 MPL_DBG_MSG_D(MPIDI_CH3_DBG_CONNECT,VERBOSE,"port tag %d",port_name_tag);
232
233 mpi_errno = MPIDI_CH3I_Connection_alloc(&conn);
234 MPIR_ERR_CHECK(mpi_errno);
235
236 /* conn->pg_id is not used for this conection */
237
238 /* FIXME: To avoid this global (MPIDI_CH3I_sock_set) which is
239 used only in ch3_progress.c and ch3_progress_connect.c in the channels,
240 this should be a call into the channel, asking it to setup the
241 socket for a connection and return the connection. That will
242 keep the socket set out of the general ch3 code, even if this
243 is the socket utility functions. */
244 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,VERBOSE,(MPL_DBG_FDEST,
245 "posting connect to host %s, port %d", host_description, port ));
246 mpi_errno = MPIDI_CH3I_Sock_post_connect(MPIDI_CH3I_sock_set, conn,
247 host_description, port, &conn->sock);
248 if (mpi_errno == MPI_SUCCESS)
249 {
250 MPIDI_CH3I_Pkt_sc_conn_accept_t *acceptpkt =
251 (MPIDI_CH3I_Pkt_sc_conn_accept_t *)&conn->pkt.type;
252 vcch = &vc->ch;
253 vcch->sock = conn->sock;
254 vcch->conn = conn;
255 vcch->state = MPIDI_CH3I_VC_STATE_CONNECTING;
256 conn->vc = vc;
257 MPL_DBG_CONNSTATECHANGE(vc,conn,CONN_STATE_CONNECT_ACCEPT);
258 conn->state = CONN_STATE_CONNECT_ACCEPT;
259 conn->send_active = NULL;
260 conn->recv_active = NULL;
261
262 /* place the port name tag in the pkt that will eventually be sent to
263 the other side */
264 acceptpkt->port_name_tag = port_name_tag;
265 }
266 /* --BEGIN ERROR HANDLING-- */
267 else
268 {
269 if (MPIR_ERR_GET_CLASS(mpi_errno) == MPIDI_CH3I_SOCK_ERR_BAD_HOST)
270 {
271 mpi_errno = MPIR_Err_create_code(
272 MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER, "**ch3|sock|badhost",
273 "**ch3|sock|badhost %s %d %s", conn->pg_id, conn->vc->pg_rank, port_name);
274 }
275 else if (MPIR_ERR_GET_CLASS(mpi_errno) == MPIDI_CH3I_SOCK_ERR_CONN_FAILED)
276 {
277 mpi_errno = MPIR_Err_create_code(
278 MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER, "**ch3|sock|connrefused",
279 "**ch3|sock|connrefused %s %d %s", conn->pg_id, conn->vc->pg_rank, port_name);
280 }
281 else
282 {
283 MPIR_ERR_POP(mpi_errno);
284 }
285 vcch = &vc->ch;
286 vcch->state = MPIDI_CH3I_VC_STATE_FAILED;
287 MPL_free(conn);
288 goto fn_fail;
289 }
290 /* --END ERROR HANDLING-- */
291
292 fn_exit:
293 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_CONNECT_TO_ROOT_SOCK);
294 return mpi_errno;
295 fn_fail:
296 MPIR_CHKPMEM_REAP();
297 goto fn_exit;
298 }
299
300 /* ------------------------------------------------------------------------- */
301 /* Business card management. These routines insert or extract connection
302 information when using sockets from the business card */
303 /* ------------------------------------------------------------------------- */
304
305 /* FIXME: These are small routines; we may want to bring them together
306 into a more specific post-connection-for-sock */
307
308 /* The host_description should be of length MAX_HOST_DESCRIPTION_LEN */
309
MPIDI_CH3I_Sock_get_conninfo_from_bc(const char * bc,char * host_description,int maxlen,int * port,MPL_sockaddr_t * ifaddr,int * hasIfaddr)310 int MPIDI_CH3I_Sock_get_conninfo_from_bc( const char *bc,
311 char *host_description, int maxlen,
312 int *port, MPL_sockaddr_t * ifaddr,
313 int *hasIfaddr )
314 {
315 int mpi_errno = MPI_SUCCESS;
316 int str_errno;
317 #if !defined(HAVE_WINDOWS_H) && defined(HAVE_INET_PTON)
318 char ifname[256];
319 #endif
320 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SOCK_GET_CONNINFO_FROM_BC);
321
322 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SOCK_GET_CONNINFO_FROM_BC);
323
324 str_errno = MPL_str_get_string_arg(bc, MPIDI_CH3I_HOST_DESCRIPTION_KEY,
325 host_description, maxlen);
326 if (str_errno != MPL_SUCCESS) {
327 /* --BEGIN ERROR HANDLING */
328 if (str_errno == MPL_ERR_STR_FAIL) {
329 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**argstr_missinghost");
330 }
331 else {
332 /* MPL_ERR_STR_TRUNCATED or MPL_ERR_STR_NOMEM */
333 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**argstr_hostd");
334 }
335 /* --END ERROR HANDLING-- */
336 }
337 str_errno = MPL_str_get_int_arg(bc, MPIDI_CH3I_PORT_KEY, port);
338 if (str_errno != MPL_SUCCESS) {
339 /* --BEGIN ERROR HANDLING */
340 if (str_errno == MPL_ERR_STR_FAIL) {
341 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**argstr_missingport");
342 }
343 else {
344 /* MPL_ERR_STR_TRUNCATED or MPL_ERR_STR_NOMEM */
345 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**argstr_port");
346 }
347 /* --END ERROR HANDLING-- */
348 }
349 /* ifname is optional */
350 /* FIXME: This is a hack to allow Windows to continue to use
351 the host description string instead of the interface address
352 bytes when posting a socket connection. This should be fixed
353 by changing the Sock_post_connect to only accept interface
354 address. Note also that Windows does not have the inet_pton
355 routine; the Windows version of this routine will need to
356 be identified or written. See also channels/sock/ch3_progress.c */
357 *hasIfaddr = 0;
358 #if !defined(HAVE_WINDOWS_H) && defined(HAVE_INET_PTON)
359 str_errno = MPL_str_get_string_arg(bc, MPIDI_CH3I_IFNAME_KEY,
360 ifname, sizeof(ifname) );
361 if (str_errno == MPL_SUCCESS) {
362 int ret = MPL_get_sockaddr((const char *)ifname, ifaddr);
363 if (ret) {
364 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ifnameinvalid");
365 }
366 }
367 #endif
368
369 fn_exit:
370 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SOCK_GET_CONNINFO_FROM_BC);
371 return mpi_errno;
372 fn_fail:
373 goto fn_exit;
374 }
375
376
377 /* MPIDI_CH3U_Get_business_card_sock - does socket specific portion of
378 * setting up a business card
379 *
380 * Parameters:
381 * bc_val_p - business card value buffer pointer, updated to the next
382 * available location or freed if published.
383 * val_max_sz_p - ptr to maximum value buffer size reduced by the number
384 * of characters written
385 *
386 */
387
MPIDI_CH3U_Get_business_card_sock(int myRank,char ** bc_val_p,int * val_max_sz_p)388 int MPIDI_CH3U_Get_business_card_sock(int myRank,
389 char **bc_val_p, int *val_max_sz_p)
390 {
391 int mpi_errno = MPI_SUCCESS;
392 int str_errno = MPL_SUCCESS;
393 MPL_sockaddr_t ifaddr;
394 char ifnamestr[MAX_HOST_DESCRIPTION_LEN];
395 #ifdef MPL_USE_DBG_LOGGING
396 char *bc_orig = *bc_val_p;
397 #endif
398 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_GET_BUSINESS_CARD_SOCK);
399
400 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_GET_BUSINESS_CARD_SOCK);
401
402 MPIDU_CH3U_GetSockInterfaceAddr( myRank, ifnamestr, sizeof(ifnamestr), &ifaddr );
403
404 str_errno = MPL_str_add_int_arg(bc_val_p, val_max_sz_p,
405 MPIDI_CH3I_PORT_KEY, MPIDI_CH3I_listener_port);
406 if (str_errno) {
407 MPIR_ERR_CHKANDJUMP(str_errno == MPL_ERR_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
408 MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
409 }
410
411 str_errno = MPL_str_add_string_arg(bc_val_p, val_max_sz_p,
412 MPIDI_CH3I_HOST_DESCRIPTION_KEY, ifnamestr );
413 if (str_errno) {
414 MPIR_ERR_CHKANDJUMP(str_errno == MPL_ERR_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
415 MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
416 }
417
418 /* Look up the interface address cooresponding to this host description */
419 /* FIXME: We should start switching to getaddrinfo instead of
420 gethostbyname */
421 /* FIXME: We don't make use of the ifname in Windows in order to
422 provide backward compatibility with the (undocumented) host
423 description string used by the socket connection routine
424 MPIDI_CH3I_Sock_post_connect. We need to change to an interface-address
425 (already resolved) based description for better scalability and
426 to eliminate reliance on fragile DNS services. Note that this is
427 also more scalable, since the DNS server may serialize address
428 requests. On most systems, asking for the host info of yourself
429 is resolved locally (i.e., perfectly parallel). Regrettably, not
430 all systems do this (e.g., some versions of FreeBSD).
431 */
432 #if 0
433 #ifndef HAVE_WINDOWS_H
434 {
435 struct hostent *info;
436 char ifname[256];
437 unsigned char *p;
438 info = gethostbyname( ifname );
439 if (info && info->h_addr_list) {
440 p = (unsigned char *)(info->h_addr_list[0]);
441 MPL_snprintf( ifname, sizeof(ifname), "%u.%u.%u.%u",
442 p[0], p[1], p[2], p[3] );
443 MPL_DBG_MSG_S(MPIDI_CH3_DBG_CONNECT,VERBOSE,"ifname = %s",ifname );
444 str_errno = MPL_str_add_string_arg( bc_val_p,
445 val_max_sz_p,
446 MPIDI_CH3I_IFNAME_KEY,
447 ifname );
448 if (str_errno) {
449 MPIR_ERR_CHKANDJUMP(str_errno == MPL_ERR_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
450 MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
451 }
452 }
453 }
454 #endif
455 #endif
456
457 {
458 char ifname[256]="";
459 MPL_sockaddr_to_str(&ifaddr, ifname, 256);
460 if (ifname[0]) {
461 MPL_DBG_MSG_S(MPIDI_CH3_DBG_CONNECT,VERBOSE,"ifname = %s",ifname );
462 str_errno = MPL_str_add_string_arg( bc_val_p,
463 val_max_sz_p,
464 MPIDI_CH3I_IFNAME_KEY,
465 ifname );
466 if (str_errno) {
467 MPIR_ERR_CHKANDJUMP(str_errno == MPL_ERR_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
468 MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
469 }
470 }
471 }
472
473 MPL_DBG_MSG_S(MPIDI_CH3_DBG_CONNECT,TYPICAL,"business card is %s", bc_orig );
474
475 fn_exit:
476 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_GET_BUSINESS_CARD_SOCK);
477 return mpi_errno;
478 fn_fail:
479 goto fn_exit;
480 }
481
482 /* ------------------------------------------------------------------------- */
483 /* Below will be/is the code that is used to create a connection and
484 * to handle changes to the state of a connection.
485 */
486 /* ------------------------------------------------------------------------- */
487 static int connection_post_recv_pkt(MPIDI_CH3I_Connection_t * conn);
488 static int connection_post_send_pkt(MPIDI_CH3I_Connection_t * conn);
489 static int connection_post_send_pkt_and_pgid(MPIDI_CH3I_Connection_t * conn);
490 static int connection_post_sendq_req(MPIDI_CH3I_Connection_t * conn);
491 static void connection_destroy(MPIDI_CH3I_Connection_t * conn);
492
493 /* This routine is called in response to an MPIDI_CH3I_SOCK_OP_ACCEPT event
494 in ch3_progress */
MPIDI_CH3_Sockconn_handle_accept_event(void)495 int MPIDI_CH3_Sockconn_handle_accept_event( void )
496 {
497 int mpi_errno = MPI_SUCCESS;
498 MPIDI_CH3I_Connection_t * conn;
499 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_ACCEPT_EVENT);
500
501 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_ACCEPT_EVENT);
502
503 mpi_errno = MPIDI_CH3I_Connection_alloc(&conn);
504 MPIR_ERR_CHECK(mpi_errno);
505 mpi_errno = MPIDI_CH3I_Sock_accept(MPIDI_CH3I_listener_conn->sock,
506 MPIDI_CH3I_sock_set, conn, &conn->sock);
507 if (mpi_errno != MPI_SUCCESS) {
508 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|sock|accept");
509 }
510
511 conn->vc = NULL;
512 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_LRECV_PKT);
513 conn->state = CONN_STATE_OPEN_LRECV_PKT;
514 conn->send_active = NULL;
515 conn->recv_active = NULL;
516
517 mpi_errno = connection_post_recv_pkt(conn);
518 MPIR_ERR_CHECK(mpi_errno);
519
520 fn_exit:
521 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_ACCEPT_EVENT);
522
523 return mpi_errno;
524 fn_fail:
525 goto fn_exit;
526 }
527
MPIDI_CH3_Sockconn_handle_connect_event(MPIDI_CH3I_Connection_t * conn,int event_error)528 int MPIDI_CH3_Sockconn_handle_connect_event( MPIDI_CH3I_Connection_t *conn,
529 int event_error )
530 {
531 int mpi_errno = MPI_SUCCESS;
532 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNECT_EVENT);
533
534 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNECT_EVENT);
535
536 /* --BEGIN ERROR HANDLING-- */
537 if (event_error != MPI_SUCCESS) {
538 /* If the connection fails, conn->vc etc is probably invalid,
539 so we can only report that the connection failed */
540 mpi_errno = event_error;
541 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|sock|connfailed" );
542 }
543 /* --END ERROR HANDLING-- */
544
545 if (conn->state == CONN_STATE_CONNECTING || conn->state == CONN_STATE_DISCARD) {
546 MPIDI_CH3I_Pkt_sc_open_req_t *openpkt =
547 (MPIDI_CH3I_Pkt_sc_open_req_t *)&conn->pkt.type;
548 if(conn->state == CONN_STATE_CONNECTING){
549 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_CSEND);
550 conn->state = CONN_STATE_OPEN_CSEND;
551 }
552 MPIDI_Pkt_init(openpkt, MPIDI_CH3I_PKT_SC_OPEN_REQ);
553 openpkt->pg_id_len = (int) strlen(MPIDI_Process.my_pg->id) + 1;
554 openpkt->pg_rank = MPIR_Process.comm_world->rank;
555
556 mpi_errno = connection_post_send_pkt_and_pgid(conn);
557 if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
558 }
559 else {
560 /* CONN_STATE_CONNECT_ACCEPT */
561 int port_name_tag;
562 MPIDI_CH3I_Pkt_sc_conn_accept_t *acceptpkt =
563 (MPIDI_CH3I_Pkt_sc_conn_accept_t *)&conn->pkt.type;
564
565 MPIR_Assert(conn->state == CONN_STATE_CONNECT_ACCEPT);
566 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_CSEND);
567 conn->state = CONN_STATE_OPEN_CSEND;
568
569 /* pkt contains port name tag. In memory debugging mode,
570 MPIDI_Pkt_init resets the packet contents. Therefore,
571 save the port name tag and then add it back. */
572 port_name_tag = acceptpkt->port_name_tag;
573 MPIDI_Pkt_init(acceptpkt, MPIDI_CH3I_PKT_SC_CONN_ACCEPT);
574 acceptpkt->port_name_tag = port_name_tag;
575
576 mpi_errno = connection_post_send_pkt(conn);
577 if (mpi_errno != MPI_SUCCESS) {
578 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_INTERN,
579 "**ch3|sock|scconnaccept");
580 }
581 }
582
583 fn_exit:
584 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNECT_EVENT);
585 return mpi_errno;
586 fn_fail:
587 goto fn_exit;
588 }
589
MPIDI_CH3_Sockconn_handle_close_event(MPIDI_CH3I_Connection_t * conn)590 int MPIDI_CH3_Sockconn_handle_close_event( MPIDI_CH3I_Connection_t * conn )
591 {
592 int mpi_errno = MPI_SUCCESS;
593 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CLOSE_EVENT);
594
595 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CLOSE_EVENT);
596
597 /* If the conn pointer is NULL then the close was intentional */
598 /* FIXME: What does the above comment mean? */
599 if (conn != NULL) {
600 if (conn->state == CONN_STATE_CLOSING) {
601 MPIR_Assert(conn->send_active == NULL);
602 MPIR_Assert(conn->recv_active == NULL);
603 if (conn->vc != NULL) {
604 MPIDI_CH3I_VC *vcch = &conn->vc->ch;
605
606 conn->sock = MPIDI_CH3I_SOCK_INVALID_SOCK;
607 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_CLOSED);
608 conn->state = CONN_STATE_CLOSED;
609
610 /* Only manipulate vcch if conn was not the loser in a
611 head-to-head resolution. */
612 if (vcch && vcch->conn == conn) {
613 MPL_DBG_VCCHSTATECHANGE(conn->vc,VC_STATE_UNCONNECTED);
614 vcch->state = MPIDI_CH3I_VC_STATE_UNCONNECTED;
615 vcch->sock = MPIDI_CH3I_SOCK_INVALID_SOCK;
616
617 /* This step is important; without this, test
618 disconnect_reconnect fails because the vc->ch.conn
619 connection will continue to be used, even though
620 the memory has been freed */
621 vcch->conn = NULL;
622
623 /* Handle_connection takes care of updating the state on the VC */
624 mpi_errno = MPIDI_CH3U_Handle_connection(conn->vc, MPIDI_VC_EVENT_TERMINATED);
625 MPIR_ERR_CHECK(mpi_errno);
626 }
627 }
628
629 /* The VC was likely freed in the _Handle_connection call and should
630 not be referenced anymore in any case. */
631 conn->vc = NULL;
632 }
633 else if(conn->state == CONN_STATE_DISCARD) {
634 /* post close, so the socket is closed and memmory leaks are avoided */
635 MPL_DBG_MSG(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,"CLosing sock (Post_close)");
636 conn->state = CONN_STATE_CLOSING;
637 mpi_errno = MPIDI_CH3I_Sock_post_close(conn->sock);
638 MPIR_ERR_CHECK(mpi_errno);
639 goto fn_exit;
640 }
641 else {
642 MPIR_Assert(conn->state == CONN_STATE_LISTENING);
643 MPIDI_CH3I_listener_conn = NULL;
644 MPIDI_CH3I_listener_port = 0;
645
646 MPIDI_CH3_Progress_signal_completion();
647 }
648
649 connection_destroy(conn);
650 }
651 fn_exit:
652 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CLOSE_EVENT);
653 return mpi_errno;
654 fn_fail:
655 goto fn_exit;
656 }
657
658 /* Cycle through the connection setup states */
659 /* FIXME: separate out the accept and connect sides to make it easier
660 to follow the logic */
MPIDI_CH3_Sockconn_handle_conn_event(MPIDI_CH3I_Connection_t * conn)661 int MPIDI_CH3_Sockconn_handle_conn_event( MPIDI_CH3I_Connection_t * conn )
662 {
663 int mpi_errno = MPI_SUCCESS;
664 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONN_EVENT);
665
666 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONN_EVENT);
667
668 /* FIXME: Is there an assumption about conn->state? */
669
670 if (conn->pkt.type == MPIDI_CH3I_PKT_SC_OPEN_REQ) {
671 MPIDI_CH3I_Pkt_sc_open_req_t *openpkt =
672 (MPIDI_CH3I_Pkt_sc_open_req_t *)&conn->pkt.type;
673 /* Answer to fixme: it appears from the control flow that this is
674 the required state) */
675 MPIR_Assert( conn->state == CONN_STATE_OPEN_LRECV_PKT);
676 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_LRECV_DATA);
677 conn->state = CONN_STATE_OPEN_LRECV_DATA;
678 mpi_errno = MPIDI_CH3I_Sock_post_read(conn->sock, conn->pg_id,
679 openpkt->pg_id_len,
680 openpkt->pg_id_len, NULL);
681 MPIR_ERR_CHECK(mpi_errno);
682 }
683 else if (conn->pkt.type == MPIDI_CH3I_PKT_SC_CONN_ACCEPT) {
684 MPIDI_VC_t *vc;
685 MPIDI_CH3I_VC *vcch;
686 int port_name_tag;
687 MPIDI_CH3I_Pkt_sc_conn_accept_t *acceptpkt =
688 (MPIDI_CH3I_Pkt_sc_conn_accept_t *)&conn->pkt.type;
689 MPIDI_CH3I_Pkt_sc_open_resp_t *openresp =
690 (MPIDI_CH3I_Pkt_sc_open_resp_t *)&conn->pkt.type;
691
692 vc = (MPIDI_VC_t *) MPL_malloc(sizeof(MPIDI_VC_t), MPL_MEM_ADDRESS);
693 /* --BEGIN ERROR HANDLING-- */
694 if (vc == NULL) {
695 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, __func__, __LINE__, MPI_ERR_OTHER,
696 "**nomem", NULL);
697 goto fn_fail;
698 }
699 /* --END ERROR HANDLING-- */
700 /* FIXME - where does this vc get freed? */
701
702 MPIDI_VC_Init(vc, NULL, 0);
703
704 vcch = &vc->ch;
705 MPL_DBG_VCCHSTATECHANGE(vc,VC_STATE_CONNECTING);
706 vcch->state = MPIDI_CH3I_VC_STATE_CONNECTING;
707 vcch->sock = conn->sock;
708 vcch->conn = conn;
709 conn->vc = vc;
710 port_name_tag = acceptpkt->port_name_tag;
711
712 MPIDI_Pkt_init(openresp, MPIDI_CH3I_PKT_SC_OPEN_RESP);
713 openresp->ack = TRUE;
714
715 /* FIXME: Possible ambiguous state (two ways to get to OPEN_LSEND) */
716 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_LSEND);
717 conn->state = CONN_STATE_OPEN_LSEND;
718 mpi_errno = connection_post_send_pkt(conn);
719 if (mpi_errno != MPI_SUCCESS) {
720 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_INTERN,
721 "**ch3|sock|scconnaccept");
722 }
723
724 /* ENQUEUE vc */
725 MPIDI_CH3I_Acceptq_enqueue(vc, port_name_tag);
726
727 }
728 else if (conn->pkt.type == MPIDI_CH3I_PKT_SC_OPEN_RESP) {
729 MPIDI_CH3I_Pkt_sc_open_resp_t *openpkt =
730 (MPIDI_CH3I_Pkt_sc_open_resp_t *)&conn->pkt.type;
731 /* FIXME: is this the correct assert? */
732
733 if (openpkt->ack && conn->state != CONN_STATE_DISCARD) {
734 MPIR_Assert( conn->state == CONN_STATE_OPEN_CRECV );
735 MPIDI_CH3I_VC *vcch = &conn->vc->ch;
736 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_CONNECTED);
737 conn->state = CONN_STATE_CONNECTED;
738 vcch->state = MPIDI_CH3I_VC_STATE_CONNECTED;
739 MPIR_Assert(vcch->conn == conn);
740 MPIR_Assert(vcch->sock == conn->sock);
741
742 mpi_errno = connection_post_recv_pkt(conn);
743 MPIR_ERR_CHECK(mpi_errno);
744 mpi_errno = connection_post_sendq_req(conn);
745 if (mpi_errno != MPI_SUCCESS) {
746 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_INTERN,
747 "**ch3|sock|scopenresp");
748 }
749 }
750 else {
751 MPIDI_CH3I_VC *vcch = &conn->vc->ch;
752 /* FIXME: Should conn->vc be freed? Who allocated? Why not? */
753 /* FIXME: Should probably reduce ref count on conn->vc */
754 /* FIXME: What happens to the state of the associated VC?
755 Why isn't it changed? Is there an assert here,
756 such as conn->vc->conn != conn (there is another connection
757 chosen for the vc)? */
758 /*Answer to FIXME */
759 /* Neither freed nor updated. This connection is the looser of
760 a head-to-head connection. The VC is still in use, but by
761 another sochekt connection. The refcount is not incremented
762 By chaning the assosiated connection. */
763 /* MPIR_Assert( conn->vc->ch.conn != conn ); */
764 /* Set the candidate vc for this connection to NULL (we
765 are discarding this connection because (I think) we
766 are performing a head-to-head connection, and this
767 connection is being rejected in favor of the connection
768 from the other side. */
769 if (vcch->conn == conn) vcch->conn = NULL;
770 MPL_DBG_CONNSTATECHANGE_MSG(conn->vc,conn,CONN_STATE_CLOSING,
771 "because ack on OPEN_CRECV was false");
772 conn->vc = NULL;
773 conn->state = CONN_STATE_CLOSING;
774 /* FIXME: What does post close do here? */
775 /* Answer to FIXME: */
776 /* Since the connection is discarded, the socket is
777 no longer needed and should be closed. This is initiated with the post
778 close command. This also caused that the socket is removed from the
779 socket set, so no more polling on this socket*/
780 MPL_DBG_MSG(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,"CLosing sock (Post_close)");
781 mpi_errno = MPIDI_CH3I_Sock_post_close(conn->sock);
782 MPIR_ERR_CHECK(mpi_errno);
783 }
784 }
785 /* --BEGIN ERROR HANDLING-- */
786 else {
787 MPL_DBG_STMT(MPIDI_CH3_DBG_CONNECT,VERBOSE,MPIDI_DBG_Print_packet(&conn->pkt));
788 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, __func__, __LINE__, MPI_ERR_INTERN,
789 "**ch3|sock|badpacket", "**ch3|sock|badpacket %d", conn->pkt.type);
790 goto fn_fail;
791 }
792 /* --END ERROR HANDLING-- */
793
794
795 fn_exit:
796 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONN_EVENT);
797 return mpi_errno;
798 fn_fail:
799 goto fn_exit;
800 }
801
802 /* FIXME: This should really be combined with handle_conn_event */
MPIDI_CH3_Sockconn_handle_connopen_event(MPIDI_CH3I_Connection_t * conn)803 int MPIDI_CH3_Sockconn_handle_connopen_event( MPIDI_CH3I_Connection_t * conn )
804 {
805 int mpi_errno = MPI_SUCCESS;
806 MPIDI_PG_t * pg;
807 int pg_rank;
808 MPIDI_VC_t * vc;
809 MPIDI_CH3I_VC *vcch;
810 MPIDI_CH3I_Pkt_sc_open_req_t *openpkt =
811 (MPIDI_CH3I_Pkt_sc_open_req_t *)&conn->pkt.type;
812 MPIDI_CH3I_Pkt_sc_open_resp_t *openresp =
813 (MPIDI_CH3I_Pkt_sc_open_resp_t *)&conn->pkt.type;
814 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNOPEN_EVENT);
815
816 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNOPEN_EVENT);
817
818 /* Look up pg based on conn->pg_id */
819 mpi_errno = MPIDI_PG_Find(conn->pg_id, &pg);
820 if (pg == NULL) {
821 MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,
822 "**pglookup",
823 "**pglookup %s", conn->pg_id);
824 }
825
826 /* We require that the packet be the open_req type */
827 pg_rank = openpkt->pg_rank;
828 MPIDI_PG_Get_vc_set_active(pg, pg_rank, &vc);
829 MPIR_Assert(vc->pg_rank == pg_rank);
830
831 if(pg->finalize == 1) {
832 MPIDI_Pkt_init(openresp, MPIDI_CH3I_PKT_SC_OPEN_RESP);
833 openresp->ack = FALSE;
834 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_LSEND);
835 conn->state = CONN_STATE_OPEN_LSEND;
836 mpi_errno = connection_post_send_pkt(conn);
837 if (mpi_errno != MPI_SUCCESS) {
838 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_INTERN,
839 "**ch3|sock|open_lrecv_data");
840 }
841 goto fn_exit;
842 }
843 vcch = &vc->ch;
844 if (vcch->conn == NULL) {
845 /* no head-to-head connects, accept the connection */
846 MPL_DBG_VCCHSTATECHANGE(vc,VC_STATE_CONNECTING);
847 vcch->state = MPIDI_CH3I_VC_STATE_CONNECTING;
848 vcch->sock = conn->sock;
849 vcch->conn = conn;
850 conn->vc = vc;
851
852 MPIDI_Pkt_init(openresp, MPIDI_CH3I_PKT_SC_OPEN_RESP);
853 openresp->ack = TRUE;
854 }
855 else {
856 /* head to head situation */
857 if (pg == MPIDI_Process.my_pg) {
858 /* the other process is in the same comm_world; just compare the
859 ranks */
860 if (MPIR_Process.comm_world->rank < pg_rank) {
861 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,(MPL_DBG_FDEST,
862 "vc=%p,conn=%p:Accept head-to-head connection (my process group), discarding vcch->conn=%p",vc,conn, vcch->conn));
863
864 /* mark old connection */
865 MPIDI_CH3I_Connection_t *old_conn = vcch->conn;
866 MPL_DBG_CONNSTATECHANGE(old_conn,old_conn,CONN_STATE_DISCARD);
867 old_conn->state = CONN_STATE_DISCARD;
868
869 /* accept connection */
870 MPL_DBG_VCCHSTATECHANGE(vc,VC_STATE_CONNECTING);
871 vcch->state = MPIDI_CH3I_VC_STATE_CONNECTING;
872 vcch->sock = conn->sock;
873 vcch->conn = conn;
874 conn->vc = vc;
875
876 MPIDI_Pkt_init(openresp, MPIDI_CH3I_PKT_SC_OPEN_RESP);
877 openresp->ack = TRUE;
878 }
879 else {
880 /* refuse connection */
881 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,(MPL_DBG_FDEST,
882 "vc=%p,conn=%p:Refuse head-to-head connection (my process group)",vc,conn));
883 MPIDI_Pkt_init(openresp, MPIDI_CH3I_PKT_SC_OPEN_RESP);
884 openresp->ack = FALSE;
885 }
886 }
887 else {
888 /* the two processes are in different comm_worlds; compare their
889 unique pg_ids. */
890 if (strcmp(MPIDI_Process.my_pg->id, pg->id) < 0) {
891 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,(MPL_DBG_FDEST,
892 "vc=%p,conn=%p:Accept head-to-head connection (two process groups), discarding vcch->conn=%p",vc,conn, vcch->conn));
893 /* mark old connection */
894 MPIDI_CH3I_Connection_t *old_conn = vcch->conn;
895 MPL_DBG_CONNSTATECHANGE(old_conn,old_conn,CONN_STATE_DISCARD);
896 old_conn->state = CONN_STATE_DISCARD;
897 /* accept connection */
898 MPL_DBG_VCCHSTATECHANGE(vc,VC_STATE_CONNECTING);
899 vcch->state = MPIDI_CH3I_VC_STATE_CONNECTING;
900 vcch->sock = conn->sock;
901 vcch->conn = conn;
902 conn->vc = vc;
903
904 MPIDI_Pkt_init(openresp, MPIDI_CH3I_PKT_SC_OPEN_RESP);
905 openresp->ack = TRUE;
906 }
907 else {
908 /* refuse connection */
909 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,(MPL_DBG_FDEST,
910 "vc=%p,conn=%p:Refuse head-to-head connection (two process groups)",vc,conn));
911 MPIDI_Pkt_init(openresp, MPIDI_CH3I_PKT_SC_OPEN_RESP);
912 openresp->ack = FALSE;
913 }
914 }
915 }
916
917 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_LSEND);
918 conn->state = CONN_STATE_OPEN_LSEND;
919 mpi_errno = connection_post_send_pkt(conn);
920 if (mpi_errno != MPI_SUCCESS) {
921 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_INTERN,
922 "**ch3|sock|open_lrecv_data");
923 }
924
925 fn_exit:
926 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNOPEN_EVENT);
927 return mpi_errno;
928 fn_fail:
929 goto fn_exit;
930 }
931
932 /* FIXME: This routine is called when? What is valid in conn? */
MPIDI_CH3_Sockconn_handle_connwrite(MPIDI_CH3I_Connection_t * conn)933 int MPIDI_CH3_Sockconn_handle_connwrite( MPIDI_CH3I_Connection_t * conn )
934 {
935 int mpi_errno = MPI_SUCCESS;
936 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNWRITE);
937
938 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNWRITE);
939
940 if (conn->state == CONN_STATE_OPEN_CSEND || conn->state == CONN_STATE_DISCARD) {
941 /* finished sending open request packet */
942 /* post receive for open response packet */
943 if(conn->state == CONN_STATE_OPEN_CSEND){
944 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_CRECV);
945 conn->state = CONN_STATE_OPEN_CRECV;
946 }
947 mpi_errno = connection_post_recv_pkt(conn);
948 MPIR_ERR_CHECK(mpi_errno);
949 }
950 else if (conn->state == CONN_STATE_OPEN_LSEND) {
951 MPIDI_CH3I_Pkt_sc_open_resp_t *openresp =
952 (MPIDI_CH3I_Pkt_sc_open_resp_t *)&conn->pkt.type;
953 /* finished sending open response packet */
954 if (openresp->ack == TRUE) {
955 MPIDI_CH3I_VC *vcch = &conn->vc->ch;
956 /* post receive for packet header */
957 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_CONNECTED);
958 conn->state = CONN_STATE_CONNECTED;
959 MPL_DBG_VCCHSTATECHANGE(conn->vc,VC_STATE_CONNECTED);
960 vcch->state = MPIDI_CH3I_VC_STATE_CONNECTED;
961 mpi_errno = connection_post_recv_pkt(conn);
962 MPIR_ERR_CHECK(mpi_errno);
963
964 mpi_errno = connection_post_sendq_req(conn);
965 if (mpi_errno != MPI_SUCCESS) {
966 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_INTERN,
967 "**ch3|sock|openlsend");
968 }
969 }
970 else {
971 /* head-to-head connections - close this connection */
972 MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_CLOSING);
973 /* FIXME: the connect side of this sets conn->vc to NULL. Why is
974 this different? The code that checks CONN_STATE_CLOSING uses
975 conn == NULL to identify intentional close, which this
976 appears to be. */
977 conn->state = CONN_STATE_CLOSING;
978
979 /* zero out the vc to prevent trouble in _handle_close_event */
980 conn->vc = NULL;
981
982 MPL_DBG_MSG(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,"Closing sock2 (Post_close)");
983 mpi_errno = MPIDI_CH3I_Sock_post_close(conn->sock);
984 if (mpi_errno != MPI_SUCCESS) {
985 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
986 "**sock_post_close");
987 }
988 }
989 }
990
991 fn_exit:
992 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNWRITE);
993 return mpi_errno;
994 fn_fail:
995 goto fn_exit;
996 }
997
998 /* ----------------------------------------------------------------------- */
999 /* FIXME: What does this do? */
MPIDI_CH3I_VC_post_sockconnect(MPIDI_VC_t * vc)1000 int MPIDI_CH3I_VC_post_sockconnect(MPIDI_VC_t * vc)
1001 {
1002 int mpi_errno = MPI_SUCCESS;
1003 char val[MPIDI_MAX_KVS_VALUE_LEN];
1004 MPIDI_CH3I_VC *vcch = &vc->ch;
1005 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_VC_POST_SOCKCONNECT);
1006
1007 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_VC_POST_SOCKCONNECT);
1008
1009 /* MPIDI_PG_GetConnString() can block & release the lock for
1010 * the current thread. Prevent other threads from trying to
1011 * obtain the ConnString by setting the VC to *CONNECTING.
1012 */
1013 if(vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED){
1014 MPL_DBG_VCCHSTATECHANGE(vc,VC_STATE_CONNECTING);
1015 vcch->state = MPIDI_CH3I_VC_STATE_CONNECTING;
1016 MPL_DBG_MSG_P(MPIDI_CH3_DBG_CONNECT,TYPICAL,"vc=(%p) Going ahead to obtain connstring", vc);
1017 }else{
1018 MPL_DBG_MSG_P(MPIDI_CH3_DBG_CONNECT,TYPICAL,"MT: vc=(%p) is already connecting/ed", vc);
1019 MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,TYPICAL,"Aborting posting a connect");
1020 /*************** MT *****************/
1021 /* There are 3 cases here,
1022 * 1) Another thread posted a connect while the current thread
1023 * was blocked in MPIDI_PG_GetConnString()
1024 * VC state = MPIDI_CH3I_VC_STATE_CONNECTING
1025 * 2) Another thread posted a connect and completed the
1026 * connection while the current thread was blocked in
1027 * MPIDI_PG_GetConnString()
1028 * VC state = MPIDI_CH3I_VC_STATE_CONNECTED
1029 * 3) Another thread received a connect from the same proc we
1030 * are connecting to and opened a connection while the
1031 * current thread was blocked in MPIDI_PG_GetConnString()
1032 * VC state = MPIDI_CH3I_VC_STATE_CONNECTING or
1033 * VC state = MPIDI_CH3I_VC_STATE_CONNECTED
1034 * If we bail out here, in all the cases above the other thread
1035 * will handle the connection. In particular in the 3rd case
1036 * if we proceed to post a connect before the VC state is set
1037 * by the thread processing the remote connect,
1038 * the code for head-to-head conn resolution will take care of
1039 * discarding one of the connections
1040 */
1041 mpi_errno = MPI_SUCCESS;
1042 goto fn_exit;
1043 }
1044 mpi_errno = MPIDI_PG_GetConnString( vc->pg, vc->pg_rank, val, sizeof(val));
1045 MPIR_ERR_CHECK(mpi_errno);
1046
1047 mpi_errno = MPIDI_CH3I_Sock_connect( vc, val, sizeof(val) );
1048
1049 fn_exit:
1050 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_VC_POST_SOCKCONNECT);
1051 return mpi_errno;
1052 fn_fail:
1053 goto fn_exit;
1054 /* --END ERROR HANDLING-- */
1055 }
1056 /* end MPIDI_CH3I_VC_post_sockconnect() */
1057
1058 /* Given a connection string, start the process of creating a socket
1059 connection to that designated interface (on a node). This routine
1060 is used in MPIDI_CH3I_VC_post_sockconnect.
1061
1062 vallen = sizeof(val)
1063 */
MPIDI_CH3I_Sock_connect(MPIDI_VC_t * vc,const char val[],int vallen)1064 int MPIDI_CH3I_Sock_connect( MPIDI_VC_t *vc, const char val[], int vallen )
1065 {
1066 char host_description[MAX_HOST_DESCRIPTION_LEN];
1067 MPL_sockaddr_t ifaddr;
1068 int hasIfaddr = 0, port;
1069 MPIDI_CH3I_Connection_t * conn = 0;
1070 int mpi_errno = MPI_SUCCESS;
1071 MPIDI_CH3I_VC *vcch = &vc->ch;
1072 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SOCK_CONNECT);
1073
1074 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SOCK_CONNECT);
1075
1076 if(vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING){
1077 MPL_DBG_MSG_P(MPIDI_CH3_DBG_CONNECT,TYPICAL,"Posting a connect for vc=(%p)", vc);
1078 }else{
1079 MPL_DBG_MSG_P(MPIDI_CH3_DBG_CONNECT,TYPICAL,"MT: vc=(%p) is already connected", vc);
1080 MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,TYPICAL,"Aborting posting a connect");
1081 /*************** MT *****************/
1082 /* 1) Another thread received a connect from the same proc
1083 * the current thread is connecting to and opened a
1084 * connection while the current thread was blocked in
1085 * MPIDI_PG_GetConnString()
1086 * VC state = MPIDI_CH3I_VC_STATE_CONNECTED
1087 * If we bail out here, the other thread will handle the connection.
1088 * if we proceed to post a connect before the VC state is set
1089 * by the thread processing the remote connect,
1090 * the code for head-to-head conn resolution will take care of
1091 * discarding one of the connections
1092 */
1093 mpi_errno = MPI_SUCCESS;
1094 goto fn_exit;
1095 }
1096
1097 mpi_errno = MPIDI_CH3I_Sock_get_conninfo_from_bc( val, host_description,
1098 sizeof(host_description),
1099 &port, &ifaddr, &hasIfaddr );
1100 MPIR_ERR_CHECK(mpi_errno);
1101
1102 mpi_errno = MPIDI_CH3I_Connection_alloc(&conn);
1103 if (mpi_errno == MPI_SUCCESS)
1104 {
1105 /* FIXME: This is a hack to allow Windows to continue to use
1106 the host description string instead of the interface address
1107 bytes when posting a socket connection. This should be fixed
1108 by changing the Sock_post_connect to only accept interface
1109 address. */
1110 #ifndef HAVE_WINDOWS_H
1111 if (hasIfaddr) {
1112 mpi_errno = MPIDI_CH3I_Sock_post_connect_ifaddr(MPIDI_CH3I_sock_set,
1113 conn, &ifaddr, port,
1114 &conn->sock);
1115 }
1116 else
1117 #endif
1118 {
1119 mpi_errno = MPIDI_CH3I_Sock_post_connect(MPIDI_CH3I_sock_set, conn,
1120 host_description, port,
1121 &conn->sock);
1122 }
1123 if (mpi_errno == MPI_SUCCESS)
1124 {
1125 MPL_DBG_CONNSTATECHANGE(vc,conn,CONN_STATE_CONNECTING);
1126 vcch->sock = conn->sock;
1127 vcch->conn = conn;
1128 conn->vc = vc;
1129 conn->state = CONN_STATE_CONNECTING;
1130 conn->send_active = NULL;
1131 conn->recv_active = NULL;
1132 }
1133 /* --BEGIN ERROR HANDLING-- */
1134 else
1135 {
1136 MPL_DBG_VCCHSTATECHANGE(vc,VC_STATE_FAILED);
1137 vcch->state = MPIDI_CH3I_VC_STATE_FAILED;
1138 mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, __func__, __LINE__, MPI_ERR_OTHER, "**ch3|sock|postconnect",
1139 "**ch3|sock|postconnect %d %d %s", MPIR_Process.comm_world->rank, vc->pg_rank, val);
1140 goto fn_fail;
1141 }
1142 /* --END ERROR HANDLING-- */
1143 }
1144 else {
1145 MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|sock|connalloc");
1146 }
1147
1148 fn_exit:
1149 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SOCK_CONNECT);
1150 return mpi_errno;
1151 fn_fail:
1152 /* --BEGIN ERROR HANDLING-- */
1153 if (conn) {
1154 connection_destroy(conn);
1155 }
1156 goto fn_exit;
1157 /* --END ERROR HANDLING-- */
1158 }
1159
1160
1161 /* FIXME: What does this do? */
1162 /* Guess: Setup a wait-to-read on the socket that was set after the accept
1163 was handled */
1164 /* Wrong guess. */
connection_post_recv_pkt(MPIDI_CH3I_Connection_t * conn)1165 static int connection_post_recv_pkt(MPIDI_CH3I_Connection_t * conn)
1166 {
1167 int mpi_errno = MPI_SUCCESS;
1168 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POST_RECV_PKT);
1169
1170 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POST_RECV_PKT);
1171
1172 mpi_errno = MPIDI_CH3I_Sock_post_read(conn->sock, &conn->pkt, sizeof(conn->pkt),
1173 sizeof(conn->pkt), NULL);
1174 MPIR_ERR_CHECK(mpi_errno);
1175
1176 fn_fail:
1177 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POST_RECV_PKT);
1178 return mpi_errno;
1179 }
1180
1181
connection_post_send_pkt(MPIDI_CH3I_Connection_t * conn)1182 static int connection_post_send_pkt(MPIDI_CH3I_Connection_t * conn)
1183 {
1184 int mpi_errno = MPI_SUCCESS;
1185 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POST_SEND_PKT);
1186
1187 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POST_SEND_PKT);
1188
1189 MPL_DBG_PKT(conn,&conn->pkt,"connect");
1190 mpi_errno = MPIDI_CH3I_Sock_post_write(conn->sock, &conn->pkt, sizeof(conn->pkt),
1191 sizeof(conn->pkt), NULL);
1192 MPIR_ERR_CHECK(mpi_errno);
1193
1194 fn_fail:
1195 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POST_SEND_PKT);
1196 return mpi_errno;
1197 }
1198
connection_post_send_pkt_and_pgid(MPIDI_CH3I_Connection_t * conn)1199 static int connection_post_send_pkt_and_pgid(MPIDI_CH3I_Connection_t * conn)
1200 {
1201 int mpi_errno;
1202 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POST_SEND_PKT_AND_PGID);
1203
1204 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POST_SEND_PKT_AND_PGID);
1205
1206 conn->iov[0].iov_base = (void *) &conn->pkt;
1207 conn->iov[0].iov_len = (int) sizeof(conn->pkt);
1208
1209 conn->iov[1].iov_base = (void *) MPIDI_Process.my_pg->id;
1210 conn->iov[1].iov_len = (int) strlen(MPIDI_Process.my_pg->id) + 1;
1211
1212 MPL_DBG_PKT(conn,&conn->pkt,"connect-pgid");
1213 mpi_errno = MPIDI_CH3I_Sock_post_writev(conn->sock, conn->iov, 2, NULL);
1214 MPIR_ERR_CHECK(mpi_errno);
1215
1216 fn_fail:
1217 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POST_SEND_PKT_AND_PGID);
1218 return mpi_errno;
1219 }
1220
1221 /* FIXME: This function also used in channels/sock/src/ch3_progress.c */
connection_post_sendq_req(MPIDI_CH3I_Connection_t * conn)1222 static int connection_post_sendq_req(MPIDI_CH3I_Connection_t * conn)
1223 {
1224 int mpi_errno = MPI_SUCCESS;
1225 MPIDI_CH3I_VC *vcch = &conn->vc->ch;
1226 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POST_SENDQ_REQ);
1227
1228 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POST_SENDQ_REQ);
1229
1230 /* post send of next request on the send queue */
1231 conn->send_active = MPIDI_CH3I_SendQ_head(vcch); /* MT */
1232 if (conn->send_active != NULL)
1233 {
1234 MPL_DBG_MSG_P(MPIDI_CH3_DBG_CONNECT,TYPICAL,"conn=%p: Posting message from connection send queue", conn );
1235 mpi_errno = MPIDI_CH3I_Sock_post_writev(conn->sock,
1236 conn->send_active->dev.iov,
1237 conn->send_active->dev.iov_count,
1238 NULL);
1239 MPIR_ERR_CHECK(mpi_errno);
1240 }
1241
1242 fn_fail:
1243 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POST_SENDQ_REQ);
1244 return mpi_errno;
1245 }
1246
1247
1248 /* This routine frees all of the memory associated with a connection.
1249 It is named destroy instead of free because routines with name "free"
1250 should have MPI semantics - free means to
1251 decrement reference count and free if reference count is zero */
connection_destroy(MPIDI_CH3I_Connection_t * conn)1252 static void connection_destroy(MPIDI_CH3I_Connection_t * conn)
1253 {
1254 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_DESTROY);
1255
1256 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_DESTROY);
1257
1258 MPL_free(conn->pg_id);
1259 MPL_free(conn);
1260
1261 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_DESTROY);
1262 }
1263
1264
1265 #ifdef MPL_USE_DBG_LOGGING
MPIDI_CH3_VC_SockGetStateString(struct MPIDI_VC * vc)1266 const char * MPIDI_CH3_VC_SockGetStateString( struct MPIDI_VC *vc )
1267 {
1268 const char *name = "unknown";
1269 static char asdigits[20];
1270 MPIDI_CH3I_VC *vcch = &vc->ch;
1271 int state = vcch->state;
1272
1273 switch (state) {
1274 case MPIDI_CH3I_VC_STATE_UNCONNECTED: name = "CH3I_VC_STATE_UNCONNECTED"; break;
1275 case MPIDI_CH3I_VC_STATE_CONNECTING: name = "CH3I_VC_STATE_CONNECTING"; break;
1276 case MPIDI_CH3I_VC_STATE_CONNECTED: name = "CH3I_VC_STATE_CONNECTED"; break;
1277 case MPIDI_CH3I_VC_STATE_FAILED: name = "CH3I_VC_STATE_FAILED"; break;
1278 default:
1279 MPL_snprintf( asdigits, sizeof(asdigits), "%d", state );
1280 asdigits[20-1] = 0;
1281 name = (const char *)asdigits;
1282 }
1283 return name;
1284 }
1285 #endif
1286