1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 
3 /*
4  *  (C) 2001 by Argonne National Laboratory.
5  *      See COPYRIGHT in top-level directory.
6  */
7 
8 
9 #ifdef MPICH_IS_THREADED
10 static int MPIDU_Socki_wakeup(struct MPIDU_Sock_set * sock_set);
11 int MPIDI_Sock_update_sock_set( struct MPIDU_Sock_set *, int );
12 #endif
13 
14 static int MPIDU_Socki_os_to_mpi_errno(struct pollinfo * pollinfo,
15 		     int os_errno, const char * fcname, int line, int * conn_failed);
16 
17 static int MPIDU_Socki_adjust_iov(ssize_t nb, MPID_IOV * const iov,
18 				  const int count, int * const offsetp);
19 
20 static int MPIDU_Socki_sock_alloc(struct MPIDU_Sock_set * sock_set,
21 				  struct MPIDU_Sock ** sockp);
22 static void MPIDU_Socki_sock_free(struct MPIDU_Sock * sock);
23 
24 static int MPIDU_Socki_event_enqueue(struct pollinfo * pollinfo,
25 				     enum MPIDU_Sock_op op,
26 				     MPIU_Size_t num_bytes,
27 				     void * user_ptr, int error);
28 static inline int MPIDU_Socki_event_dequeue(struct MPIDU_Sock_set * sock_set,
29 					    int * set_elem,
30 					    struct MPIDU_Sock_event * eventp);
31 
32 static void MPIDU_Socki_free_eventq_mem(void);
33 
34 struct MPIDU_Socki_eventq_table
35 {
36     struct MPIDU_Socki_eventq_elem elems[MPIDU_SOCK_EVENTQ_POOL_SIZE];
37     struct MPIDU_Socki_eventq_table * next;
38 };
39 
40 static struct MPIDU_Socki_eventq_table *MPIDU_Socki_eventq_table_head=NULL;
41 
42 
43 
44 #define MPIDU_Socki_sock_get_pollfd(sock_)          (&(sock_)->sock_set->pollfds[(sock_)->elem])
45 #define MPIDU_Socki_sock_get_pollinfo(sock_)        (&(sock_)->sock_set->pollinfos[(sock_)->elem])
46 #define MPIDU_Socki_pollinfo_get_pollfd(pollinfo_) (&(pollinfo_)->sock_set->pollfds[(pollinfo_)->elem])
47 
48 
49 /* Enqueue a new event.  If the enqueue fails, generate an error and jump to
50    the fail_label_ */
51 #define MPIDU_SOCKI_EVENT_ENQUEUE(pollinfo_, op_, nb_, user_ptr_, event_mpi_errno_, mpi_errno_, fail_label_)	\
52 {									\
53     mpi_errno_ = MPIDU_Socki_event_enqueue((pollinfo_), (op_), (nb_), (user_ptr_), (event_mpi_errno_));		\
54     if (mpi_errno_ != MPI_SUCCESS)					\
55     {									\
56 	mpi_errno_ = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,	\
57 					  "**sock|poll|eqfail", "**sock|poll|eqfail %d %d %d",			\
58 					  pollinfo->sock_set->id, pollinfo->sock_id, (op_));			\
59 	goto fail_label_;						\
60     }									\
61 }
62 
63 /* FIXME: These need to separate the operations from the thread-related
64    synchronization to ensure that the code that is independent of
65    threads is always the same.  Also, the thread-level check needs
66    to be identical to all others, and there should be an option,
67    possibly embedded within special thread macros, to allow
68    runtime control of the thread level */
69 
70 #ifndef MPICH_IS_THREADED
71 #   define MPIDU_SOCKI_POLLFD_OP_SET(pollfd_, pollinfo_, op_)	\
72     {								\
73         (pollfd_)->events |= (op_);				\
74         (pollfd_)->fd = (pollinfo_)->fd;			\
75     }
76 #   define MPIDU_SOCKI_POLLFD_OP_CLEAR(pollfd_, pollinfo_, op_)	\
77     {								\
78         (pollfd_)->events &= ~(op_);				\
79         (pollfd_)->revents &= ~(op_);				\
80         if (((pollfd_)->events & (POLLIN | POLLOUT)) == 0)	\
81         {							\
82             (pollfd_)->fd = -1;					\
83         }							\
84     }
85 #else /* MPICH_IS_THREADED */
86 /* FIXME: Does this need a runtime check on whether threads are in use? */
87 #   define MPIDU_SOCKI_POLLFD_OP_SET(pollfd_, pollinfo_, op_)		\
88     {									\
89 	(pollinfo_)->pollfd_events |= (op_);				\
90 	if ((pollinfo_)->sock_set->pollfds_active == NULL)		\
91 	{								\
92 	    (pollfd_)->events |= (op_);					\
93 	    (pollfd_)->fd = (pollinfo_)->fd;				\
94 	}								\
95 	else								\
96 	{								\
97 	    (pollinfo_)->sock_set->pollfds_updated = TRUE;		\
98 	    MPIDU_Socki_wakeup((pollinfo_)->sock_set);			\
99 	}								\
100     }
101 #   define MPIDU_SOCKI_POLLFD_OP_CLEAR(pollfd_, pollinfo_, op_)		\
102     {									\
103 	(pollinfo_)->pollfd_events &= ~(op_);				\
104 	if ((pollinfo_)->sock_set->pollfds_active == NULL)		\
105 	{								\
106 	    (pollfd_)->events &= ~(op_);				\
107 	    (pollfd_)->revents &= ~(op_);				\
108 	    if (((pollfd_)->events & (POLLIN | POLLOUT)) == 0)		\
109 	    {								\
110 		(pollfd_)->fd = -1;					\
111 	    }								\
112 	}								\
113 	else								\
114 	{								\
115 	    (pollinfo_)->sock_set->pollfds_updated = TRUE;		\
116 	    MPIDU_Socki_wakeup((pollinfo_)->sock_set);			\
117 	}								\
118     }
119 #endif
120 
121 #define MPIDU_SOCKI_POLLFD_OP_ISSET(pollfd_, pollinfo_, op_) ((pollfd_)->events & (op_))
122 
123 /* FIXME: Low usage operations like this should be a function for
124    better readability, modularity, and code size */
125 #define MPIDU_SOCKI_GET_SOCKET_ERROR(pollinfo_, os_errno_, mpi_errno_, fail_label_)				\
126 {								\
127     int rc__;							\
128     socklen_t sz__;						\
129 								\
130     sz__ = sizeof(os_errno_);					\
131     rc__ = getsockopt((pollinfo_)->fd, SOL_SOCKET, SO_ERROR, &(os_errno_), &sz__);				\
132     if (rc__ != 0)						\
133     {								\
134 	if (errno == ENOMEM || errno == ENOBUFS)		\
135 	{							\
136 	    mpi_errno_ = MPIR_Err_create_code(			\
137 		MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_NOMEM, "**sock|osnomem",	\
138 		"**sock|osnomem %s %d %d", "getsockopt", pollinfo->sock_set->id, pollinfo->sock_id);		\
139 	}							\
140 	else							\
141 	{							\
142 	    mpi_errno = MPIR_Err_create_code(			\
143 		MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**sock|oserror",		\
144 		"**sock|poll|oserror %s %d %d %d %s", "getsockopt", pollinfo->sock_set->id, pollinfo->sock_id,	\
145 		 (os_errno_), MPIU_Strerror(os_errno_));	\
146 	}							\
147 								\
148         goto fail_label_;					\
149     }								\
150 }
151 
152 
153 /*
154  * Validation tests
155  */
156 /* FIXME: Are these really optional?  Based on their definitions, it looks
157    like they should only be used when debugging the code.  */
158 #ifdef USE_SOCK_VERIFY
159 #define MPIDU_SOCKI_VERIFY_INIT(mpi_errno_, fail_label_)		\
160 {								        \
161     if (MPIDU_Socki_initialized <= 0)					\
162     {									\
163 	(mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT,	\
164 					 "**sock|uninit", NULL);	\
165 	goto fail_label_;						\
166     }									\
167 }
168 
169 
170 #define MPIDU_SOCKI_VALIDATE_SOCK_SET(sock_set_, mpi_errno_, fail_label_)
171 
172 
173 #define MPIDU_SOCKI_VALIDATE_SOCK(sock_, mpi_errno_, fail_label_)	\
174 {									\
175     struct pollinfo * pollinfo__;					\
176 									\
177     if ((sock_) == NULL || (sock_)->sock_set == NULL || (sock_)->elem < 0 ||							\
178 	(sock_)->elem >= (sock_)->sock_set->poll_array_elems)		\
179     {									\
180 	(mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK,	\
181 					    "**sock|badsock", NULL);	\
182 	goto fail_label_;						\
183     }									\
184 									\
185     pollinfo__ = MPIDU_Socki_sock_get_pollinfo(sock_);			\
186 									\
187     if (pollinfo__->type <= MPIDU_SOCKI_TYPE_FIRST || pollinfo__->type >= MPIDU_SOCKI_TYPE_INTERRUPTER ||			\
188 	pollinfo__->state <= MPIDU_SOCKI_STATE_FIRST || pollinfo__->state >= MPIDU_SOCKI_STATE_LAST)				\
189     {									\
190 	(mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK,	\
191 					    "**sock|badsock", NULL);	\
192 	goto fail_label_;						\
193     }									\
194 }
195 
196 
197 #define MPIDU_SOCKI_VERIFY_CONNECTED_READABLE(pollinfo_, mpi_errno_, fail_label_)						\
198 {									\
199     if ((pollinfo_)->type == MPIDU_SOCKI_TYPE_COMMUNICATION)		\
200     {									\
201 	if ((pollinfo_)->state == MPIDU_SOCKI_STATE_CONNECTING)		\
202 	{								\
203 	    (mpi_errno_) = MPIR_Err_create_code(			\
204 		(mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK, "**sock|notconnected",		\
205 		"**sock|notconnected %d %d", (pollinfo_)->sock_set->id, (pollinfo_)->sock_id);					\
206 	    goto fail_label_;						\
207 	}								\
208 	else if ((pollinfo_)->state == MPIDU_SOCKI_STATE_DISCONNECTED)	\
209 	{								\
210 	    if ((pollinfo_)->os_errno == 0)				\
211 	    {								\
212 		(mpi_errno_) = MPIR_Err_create_code(			\
213 		    (mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_CONN_CLOSED, "**sock|connclosed",	\
214 		    "**sock|connclosed %d %d", (pollinfo_)->sock_set->id, (pollinfo_)->sock_id);				\
215 	    }								\
216 	    else							\
217 	    {								\
218 		(mpi_errno_) = MPIR_Err_create_code(			\
219 		    (mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_CONN_FAILED, "**sock|connfailed",	\
220 		    "**sock|poll|connfailed %d %d %d %s", (pollinfo_)->sock_set->id, (pollinfo_)->sock_id,			\
221 		    (pollinfo_)->os_errno, MPIU_Strerror((pollinfo_)->os_errno));						\
222 	    }								\
223 	    goto fail_label_;						\
224 	}								\
225 	else if ((pollinfo_)->state == MPIDU_SOCKI_STATE_CLOSING)	\
226 	{								\
227 	    (mpi_errno_) = MPIR_Err_create_code(			\
228 		(mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INPROGRESS, "**sock|closing",		\
229 		"**sock|closing %d %d", (pollinfo_)->sock_set->id, (pollinfo_)->sock_id);					\
230 									\
231 	    goto fail_label_;						\
232 	}								\
233 	else if ((pollinfo_)->state != MPIDU_SOCKI_STATE_CONNECTED_RW && (pollinfo_)->state != MPIDU_SOCKI_STATE_CONNECTED_RO)	\
234 	{								\
235 	    (mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK,	\
236 						"**sock|badsock", NULL);							\
237 	    goto fail_label_;						\
238 	}								\
239     }									\
240     else if ((pollinfo_)->type == MPIDU_SOCKI_TYPE_LISTENER)		\
241     {									\
242 	(mpi_errno_) = MPIR_Err_create_code(				\
243 	    (mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK, "**sock|listener_read",		\
244 	    "**sock|listener_read %d %d", (pollinfo_)->sock_set->id, (pollinfo_)->sock_id);					\
245 									\
246 	goto fail_label_;						\
247     }									\
248 }
249 
250 
251 #define MPIDU_SOCKI_VERIFY_CONNECTED_WRITABLE(pollinfo_, mpi_errno_, fail_label_)						 \
252 {									\
253     if ((pollinfo_)->type == MPIDU_SOCKI_TYPE_COMMUNICATION)		\
254     {									\
255 	if ((pollinfo_)->state == MPIDU_SOCKI_STATE_CONNECTING)		\
256 	{								\
257 	    (mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK,	 \
258 						"**sock|notconnected", "**sock|notconnected %d %d",				 \
259 						(pollinfo_)->sock_set->id, (pollinfo_)->sock_id);				 \
260 	    goto fail_label_;						\
261 	}								\
262 	else if ((pollinfo_)->state == MPIDU_SOCKI_STATE_DISCONNECTED || (pollinfo_)->state == MPIDU_SOCKI_STATE_CONNECTED_RO)	 \
263 	{								\
264 	    if ((pollinfo_)->os_errno == 0)				\
265 	    {								\
266 		(mpi_errno_) = MPIR_Err_create_code(			\
267 		    (mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_CONN_CLOSED, "**sock|connclosed",	 \
268 		    "**sock|connclosed %d %d", (pollinfo_)->sock_set->id, (pollinfo_)->sock_id);				 \
269 	    }								\
270 	    else							\
271 	    {								\
272 		(mpi_errno_) = MPIR_Err_create_code(										 \
273 		    (mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_CONN_FAILED, "**sock|connfailed",	 \
274 		    "**sock|poll|connfailed %d %d %d %s", (pollinfo_)->sock_set->id, (pollinfo_)->sock_id,			 \
275 		    (pollinfo_)->os_errno, MPIU_Strerror((pollinfo_)->os_errno));						 \
276 	    }								\
277 	    goto fail_label_;						\
278 	}								\
279 	else if ((pollinfo_)->state == MPIDU_SOCKI_STATE_CLOSING)	\
280 	{								\
281 	    (mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INPROGRESS, \
282 						"**sock|closing", "**sock|closing %d %d",					 \
283 						(pollinfo_)->sock_set->id, (pollinfo_)->sock_id);				 \
284 									\
285 	    goto fail_label_;						\
286 	}								\
287 	else if ((pollinfo_)->state != MPIDU_SOCKI_STATE_CONNECTED_RW)	\
288 	{								\
289 	    (mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK,	 \
290 						"**sock|badsock", NULL);							 \
291 	    goto fail_label_;						\
292 	}								\
293     }									\
294     else if ((pollinfo_)->type == MPIDU_SOCKI_TYPE_LISTENER)		\
295     {									\
296 	(mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK,	 \
297 					    "**sock|listener_write", "**sock|listener_write %d %d",				 \
298 					    (pollinfo_)->sock_set->id, (pollinfo_)->sock_id);					 \
299 									\
300 	goto fail_label_;						\
301     }									\
302 }
303 
304 
305 #define MPIDU_SOCKI_VALIDATE_FD(pollinfo_, mpi_errno_, fail_label_)	\
306 {									\
307     if ((pollinfo_)->fd < 0)						\
308     {									\
309 	(mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK,	\
310 					    "**sock|badhandle", "**sock|poll|badhandle %d %d %d",				\
311 					    (pollinfo_)->sock_set->id, (pollinfo_)->sock_id, (pollinfo_)->fd);			\
312 	goto fail_label_;						\
313     }									\
314 }
315 
316 
317 #define MPIDU_SOCKI_VERIFY_NO_POSTED_READ(pollfd_, pollinfo_, mpi_errno_, fail_label_)						\
318 {									\
319     if (MPIDU_SOCKI_POLLFD_OP_ISSET((pollfd_), (pollinfo_), POLLIN))	\
320     {									\
321 	(mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INPROGRESS,	\
322 					    "**sock|reads", "**sock|reads %d %d",						\
323 					    (pollinfo_)->sock_set->id, (pollinfo_)->sock_id);					\
324 	goto fail_label_;						\
325     }									\
326 }
327 
328 
329 #define MPIDU_SOCKI_VERIFY_NO_POSTED_WRITE(pollfd_, pollinfo_, mpi_errno_, fail_label_)						\
330 {									\
331     if (MPIDU_SOCKI_POLLFD_OP_ISSET((pollfd_), (pollinfo_), POLLOUT))	\
332     {									\
333 	(mpi_errno_) = MPIR_Err_create_code((mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INPROGRESS,	\
334 					    "**sock|writes", "**sock|writes %d %d",						\
335 					    (pollinfo_)->sock_set->id, (pollinfo_)->sock_id);					\
336 	goto fail_label_;						\
337     }									\
338 }
339 #else
340 /* Use minimal to no checking */
341 #define MPIDU_SOCKI_VERIFY_INIT(mpi_errno_,fail_label_)
342 #define MPIDU_SOCKI_VALIDATE_SOCK_SET(sock_set_,mpi_errno_,fail_label_)
343 #define MPIDU_SOCKI_VALIDATE_SOCK(sock_,mpi_errno_,fail_label_)
344 #define MPIDU_SOCKI_VERIFY_CONNECTED_READABLE(pollinfo_,mpi_errno_,fail_label_)
345 #define MPIDU_SOCKI_VERIFY_CONNECTED_WRITABLE(pollinfo_,mpi_errno_,fail_label_)
346 #define MPIDU_SOCKI_VALIDATE_FD(pollinfo_,mpi_errno_,fail_label_)
347 #define MPIDU_SOCKI_VERIFY_NO_POSTED_READ(pollfd_,pollinfo_,mpi_errno,fail_label_)
348 #define MPIDU_SOCKI_VERIFY_NO_POSTED_WRITE(pollfd_,pollinfo_,mpi_errno,fail_label_)
349 
350 #endif
351 
352 
353 #ifdef MPICH_IS_THREADED
354 
355 /*
356  * MPIDU_Socki_wakeup()
357  */
358 #undef FUNCNAME
359 #define FUNCNAME MPIDU_Socki_wakeup
360 #undef FCNAME
361 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDU_Socki_wakeup(struct MPIDU_Sock_set * sock_set)362 static int MPIDU_Socki_wakeup(struct MPIDU_Sock_set * sock_set)
363 {
364     MPIU_THREAD_CHECK_BEGIN
365     if (sock_set->wakeup_posted == FALSE)
366     {
367 	for(;;)
368 	{
369 	    int nb;
370 	    char c = 0;
371 
372 	    nb = write(sock_set->intr_fds[1], &c, 1);
373 	    if (nb == 1)
374 	    {
375 		break;
376 	    }
377 
378 	    MPIU_Assertp(nb == 0 || errno == EINTR);
379 	}
380 
381 	sock_set->wakeup_posted = TRUE;
382     }
383     MPIU_THREAD_CHECK_END
384     return MPIDU_SOCK_SUCCESS;
385 }
386 /* end MPIDU_Socki_wakeup() */
387 
388 #undef FUNCNAME
389 #define FUNCNAME MPIDI_Sock_update_sock_set
390 #undef FCNAME
391 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDI_Sock_update_sock_set(struct MPIDU_Sock_set * sock_set,int pollfds_active_elems)392 int MPIDI_Sock_update_sock_set( struct MPIDU_Sock_set *sock_set,
393 				int pollfds_active_elems )
394 {
395     int mpi_errno = MPI_SUCCESS;
396     int elem;
397     MPIDI_STATE_DECL(MPID_STATE_MPIDI_SOCK_UPDATE_SOCK_SET);
398 
399     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_SOCK_UPDATE_SOCK_SET);
400     for (elem = 0; elem < sock_set->poll_array_elems; elem++) {
401 	sock_set->pollfds[elem].events = sock_set->pollinfos[elem].pollfd_events;
402 	if ((sock_set->pollfds[elem].events & (POLLIN | POLLOUT)) != 0) {
403 	    sock_set->pollfds[elem].fd = sock_set->pollinfos[elem].fd;
404 	}
405 	else {
406 	    sock_set->pollfds[elem].fd = -1;
407 	}
408 
409 	if (elem < pollfds_active_elems) {
410 	    if (sock_set->pollfds_active == sock_set->pollfds) {
411 		sock_set->pollfds[elem].revents &= ~(POLLIN | POLLOUT) | sock_set->pollfds[elem].events;
412 	    }
413 	    else {
414 		sock_set->pollfds[elem].revents = sock_set->pollfds_active[elem].revents &
415 		    (~(POLLIN | POLLOUT) | sock_set->pollfds[elem].events);
416 	    }
417 	}
418 	else {
419 	    sock_set->pollfds[elem].revents = 0;
420 	}
421     }
422 
423     if (sock_set->pollfds_active != sock_set->pollfds) {
424 	MPIU_Free(sock_set->pollfds_active);
425     }
426 
427     sock_set->pollfds_updated = FALSE;
428 
429     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_SOCK_UPDATE_SOCK_SET);
430     return mpi_errno;
431 
432 }
433 
434 #endif /* (MPICH_IS_THREADED) */
435 
436 
437 /*
438  * MPIDU_Socki_os_to_mpi_errno()
439  *
440  * This routine assumes that no thread can change the state between state check before the nonblocking OS operation and the call
441  * to this routine.
442  */
443 #undef FUNCNAME
444 #define FUNCNAME MPIDU_Socki_os_to_mpi_errno
445 #undef FCNAME
446 #define FCNAME MPIU_QUOTE(FUNCNAME)
447 /* --BEGIN ERROR HANDLING-- */
MPIDU_Socki_os_to_mpi_errno(struct pollinfo * pollinfo,int os_errno,const char * fcname,int line,int * disconnected)448 static int MPIDU_Socki_os_to_mpi_errno(struct pollinfo * pollinfo, int os_errno, const char * fcname, int line, int * disconnected)
449 {
450     int mpi_errno;
451 
452     if (os_errno == ENOMEM || os_errno == ENOBUFS)
453     {
454 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, fcname, line, MPIDU_SOCK_ERR_NOMEM,
455 					 "**sock|osnomem", "**sock|poll|osnomem %d %d %d %s",
456 					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
457 	*disconnected = FALSE;
458     }
459     else if (os_errno == EFAULT || os_errno == EINVAL)
460     {
461 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, fcname, line, MPIDU_SOCK_ERR_BAD_BUF,
462 					 "**sock|badbuf", "**sock|poll|badbuf %d %d %d %s",
463 					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
464 	*disconnected = FALSE;
465     }
466     else if (os_errno == EPIPE)
467     {
468 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, fcname, line, MPIDU_SOCK_ERR_CONN_CLOSED,
469 					 "**sock|connclosed", "**sock|poll|connclosed %d %d %d %s",
470 					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
471 	*disconnected = TRUE;
472     }
473     else if (os_errno == ECONNRESET || os_errno == ENOTCONN || os_errno == ETIMEDOUT)
474     {
475 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, fcname, line, MPIDU_SOCK_ERR_CONN_FAILED,
476 					 "**sock|connfailed", "**sock|poll|connfailed %d %d %d %s",
477 					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
478 	pollinfo->os_errno = os_errno;
479 	*disconnected = TRUE;
480     }
481     else if (os_errno == EBADF)
482     {
483 	/*
484 	 * If we have a bad file descriptor, then either the sock was bad to
485 	 * start with and we didn't catch it in the preliminary
486 	 * checks, or a sock closure was finalized after the preliminary
487 	 * checks were performed.  The latter should not happen if
488 	 * the thread safety code is correctly implemented.  In any case,
489 	 * the data structures associated with the sock are no
490 	 * longer valid and should not be modified.  We indicate this by
491 	 * returning a fatal error.
492 	 */
493 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, fcname, line, MPIDU_SOCK_ERR_BAD_SOCK,
494 					 "**sock|badsock", NULL);
495 	*disconnected = FALSE;
496     }
497     else
498     {
499 	/*
500 	 * Unexpected OS error.
501 	 *
502 	 * FIXME: technically we should never reach this section of code.
503 	 * What's the right way to handle this situation?  Should
504 	 * we print an immediate message asking the user to report the errno
505 	 * so that we can plug the hole?
506 	 */
507 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, fcname, line, MPIDU_SOCK_ERR_CONN_FAILED,
508 					 "**sock|oserror", "**sock|poll|oserror %d %d %d %s",
509 					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
510 	pollinfo->os_errno = os_errno;
511 	*disconnected = TRUE;
512     }
513 
514     return mpi_errno;
515 }
516 /* --END ERROR HANDLING-- */
517 /* end MPIDU_Socki_os_to_mpi_errno() */
518 
519 
520 /*
521  * MPIDU_Socki_adjust_iov()
522  *
523  * Use the specified number of bytes (nb) to adjust the iovec and associated
524  * values.  If the iovec has been consumed, return
525  * true; otherwise return false.
526  *
527  * The input is an iov (MPID_IOV is just an iov) and the offset into which
528  * to start (start with entry iov[*offsetp]) and remove nb bytes from the iov.
529  * The use of the offsetp term allows use to remove values from the iov without
530  * making a copy to shift down elements when only part of the iov is
531  * consumed.
532  */
533 #undef FUNCNAME
534 #define FUNCNAME MPIDU_Socki_adjust_iov
535 #undef FCNAME
536 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDU_Socki_adjust_iov(ssize_t nb,MPID_IOV * const iov,const int count,int * const offsetp)537 static int MPIDU_Socki_adjust_iov(ssize_t nb, MPID_IOV * const iov, const int count, int * const offsetp)
538 {
539     int offset = *offsetp;
540 
541     while (offset < count)
542     {
543 	if (iov[offset].MPID_IOV_LEN <= nb)
544 	{
545 	    nb -= iov[offset].MPID_IOV_LEN;
546 	    offset++;
547 	}
548 	else
549 	{
550 	    iov[offset].MPID_IOV_BUF = (char *) iov[offset].MPID_IOV_BUF + nb;
551 	    iov[offset].MPID_IOV_LEN -= nb;
552 	    *offsetp = offset;
553 	    return FALSE;
554 	}
555     }
556 
557     *offsetp = offset;
558     return TRUE;
559 }
560 /* end MPIDU_Socki_adjust_iov() */
561 
562 
563 #undef FUNCNAME
564 #define FUNCNAME MPIDU_Socki_sock_alloc
565 #undef FCNAME
566 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDU_Socki_sock_alloc(struct MPIDU_Sock_set * sock_set,struct MPIDU_Sock ** sockp)567 static int MPIDU_Socki_sock_alloc(struct MPIDU_Sock_set * sock_set, struct MPIDU_Sock ** sockp)
568 {
569     struct MPIDU_Sock * sock = NULL;
570     int avail_elem;
571     struct pollfd * pollfds = NULL;
572     struct pollinfo * pollinfos = NULL;
573     int mpi_errno = MPI_SUCCESS;
574     MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCKI_SOCK_ALLOC);
575 
576     MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCKI_SOCK_ALLOC);
577 
578     /* FIXME: Should this use the CHKPMEM macros (perm malloc)? */
579     sock = MPIU_Malloc(sizeof(struct MPIDU_Sock));
580     /* --BEGIN ERROR HANDLING-- */
581     if (sock == NULL)
582     {
583 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_NOMEM, "**nomem", 0);
584 	goto fn_fail;
585     }
586     /* --END ERROR HANDLING-- */
587 
588     /*
589      * Check existing poll structures for a free element.
590      */
591     for (avail_elem = 0; avail_elem < sock_set->poll_array_sz; avail_elem++)
592     {
593 	if (sock_set->pollinfos[avail_elem].sock_id == -1)
594 	{
595 	    if (avail_elem >= sock_set->poll_array_elems)
596 	    {
597 		sock_set->poll_array_elems = avail_elem + 1;
598 	    }
599 
600 	    break;
601 	}
602     }
603 
604     /*
605      * No free elements were found.  Larger pollfd and pollinfo arrays need to
606      * be allocated and the existing data transfered over.
607      */
608     if (avail_elem == sock_set->poll_array_sz)
609     {
610 	int elem;
611 
612 	pollfds = MPIU_Malloc((sock_set->poll_array_sz + MPIDU_SOCK_SET_DEFAULT_SIZE) * sizeof(struct pollfd));
613 	/* --BEGIN ERROR HANDLING-- */
614 	if (pollfds == NULL)
615 	{
616 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_NOMEM,
617 					     "**nomem", 0);
618 	    goto fn_fail;
619 	}
620 	/* --END ERROR HANDLING-- */
621 	pollinfos = MPIU_Malloc((sock_set->poll_array_sz + MPIDU_SOCK_SET_DEFAULT_SIZE) * sizeof(struct pollinfo));
622 	/* --BEGIN ERROR HANDLING-- */
623 	if (pollinfos == NULL)
624 	{
625 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_NOMEM,
626 					     "**nomem", 0);
627 	    goto fn_fail;
628 	}
629 	/* --END ERROR HANDLING-- */
630 
631 	if (sock_set->poll_array_sz > 0)
632 	{
633 	    /*
634 	     * Copy information from the old arrays and then free them.
635 	     *
636 	     * In the multi-threaded case, the pollfd array can only be copied
637 	     * if another thread is not already blocking in poll()
638 	     * and thus potentially modifying the array.  Furthermore, the
639 	     * pollfd array must not be freed if it is the one
640 	     * actively being used by pol().
641 	     */
642 #	    ifndef MPICH_IS_THREADED
643 	    {
644 		memcpy(pollfds, sock_set->pollfds, sock_set->poll_array_sz * sizeof(struct pollfd));
645 		MPIU_Free(sock_set->pollfds);
646 	    }
647 #	    else
648 	    {
649 		if (sock_set->pollfds_active == NULL)
650 		{
651 		    memcpy(pollfds, sock_set->pollfds, sock_set->poll_array_sz * sizeof(struct pollfd));
652 		}
653 		if  (sock_set->pollfds_active != sock_set->pollfds)
654 		{
655 		    MPIU_Free(sock_set->pollfds);
656 		}
657 	    }
658 #           endif
659 
660 	    memcpy(pollinfos, sock_set->pollinfos, sock_set->poll_array_sz * sizeof(struct pollinfo));
661 	    MPIU_Free(sock_set->pollinfos);
662 	}
663 
664 	sock_set->poll_array_elems = avail_elem + 1;
665 	sock_set->poll_array_sz += MPIDU_SOCK_SET_DEFAULT_SIZE;
666 	sock_set->pollfds = pollfds;
667 	sock_set->pollinfos = pollinfos;
668 
669 	/*
670 	 * Initialize new elements
671 	 */
672 	for (elem = avail_elem; elem < sock_set->poll_array_sz; elem++)
673 	{
674 	    pollfds[elem].fd = -1;
675 	    pollfds[elem].events = 0;
676 	    pollfds[elem].revents = 0;
677 	}
678 	for (elem = avail_elem; elem < sock_set->poll_array_sz; elem++)
679 	{
680 	    pollinfos[elem].fd = -1;
681 	    pollinfos[elem].sock_set = sock_set;
682 	    pollinfos[elem].elem = elem;
683 	    pollinfos[elem].sock = NULL;
684 	    pollinfos[elem].sock_id = -1;
685 	    pollinfos[elem].type  = MPIDU_SOCKI_TYPE_FIRST;
686 	    pollinfos[elem].state = MPIDU_SOCKI_STATE_FIRST;
687 #	    ifdef MPICH_IS_THREADED
688 	    {
689 		pollinfos[elem].pollfd_events = 0;
690 	    }
691 #	    endif
692 	}
693     }
694 
695     /*
696      * Verify that memory hasn't been messed up.
697      */
698     MPIU_Assert(sock_set->pollinfos[avail_elem].sock_set == sock_set);
699     MPIU_Assert(sock_set->pollinfos[avail_elem].elem == avail_elem);
700     MPIU_Assert(sock_set->pollinfos[avail_elem].fd == -1);
701     MPIU_Assert(sock_set->pollinfos[avail_elem].sock == NULL);
702     MPIU_Assert(sock_set->pollinfos[avail_elem].sock_id == -1);
703     MPIU_Assert(sock_set->pollinfos[avail_elem].type == MPIDU_SOCKI_TYPE_FIRST);
704     MPIU_Assert(sock_set->pollinfos[avail_elem].state == MPIDU_SOCKI_STATE_FIRST);
705 #   ifdef MPICH_IS_THREADED
706     {
707 	MPIU_Assert(sock_set->pollinfos[avail_elem].pollfd_events == 0);
708     }
709 #   endif
710 
711     /*
712      * Initialize newly allocated sock structure and associated poll structures
713      */
714     sock_set->pollinfos[avail_elem].sock_id = (sock_set->id << 24) | avail_elem;
715     sock_set->pollinfos[avail_elem].sock = sock;
716     sock->sock_set = sock_set;
717     sock->elem = avail_elem;
718 
719     sock_set->pollfds[avail_elem].fd = -1;
720     sock_set->pollfds[avail_elem].events = 0;
721     sock_set->pollfds[avail_elem].revents = 0;
722 
723 #   ifdef MPICH_IS_THREADED
724     {
725     MPIU_THREAD_CHECK_BEGIN
726 	if (sock_set->pollfds_active != NULL)
727 	{
728 	    sock_set->pollfds_updated = TRUE;
729 	}
730     MPIU_THREAD_CHECK_END
731     }
732 #   endif
733 
734     *sockp = sock;
735 
736   fn_exit:
737     MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCKI_SOCK_ALLOC);
738     return mpi_errno;
739 
740     /* --BEGIN ERROR HANDLING-- */
741   fn_fail:
742     if (pollinfos != NULL)
743     {
744 	MPIU_Free(pollinfos);
745     }
746 
747     if (pollfds != NULL)
748     {
749 	MPIU_Free(pollfds);
750     }
751 
752     if (sock != NULL)
753     {
754 	MPIU_Free(sock);
755     }
756 
757     goto fn_exit;
758     /* --END ERROR HANDLING-- */
759 }
760 /* end MPIDU_Socki_sock_alloc() */
761 
762 
763 #undef FUNCNAME
764 #define FUNCNAME MPIDU_Socki_sock_free
765 #undef FCNAME
766 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDU_Socki_sock_free(struct MPIDU_Sock * sock)767 static void MPIDU_Socki_sock_free(struct MPIDU_Sock * sock)
768 {
769     struct pollfd * pollfd = MPIDU_Socki_sock_get_pollfd(sock);
770     struct pollinfo * pollinfo = MPIDU_Socki_sock_get_pollinfo(sock);
771     struct MPIDU_Sock_set * sock_set = sock->sock_set;
772     MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCKI_SOCK_FREE);
773 
774     MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCKI_SOCK_FREE);
775 
776     /* FIXME: We need an abstraction for the thread sync operations */
777 #   ifdef MPICH_IS_THREADED
778     {
779 	/*
780 	 * Freeing a sock while Sock_wait() is blocked in poll() is not supported
781 	 */
782 	MPIU_Assert(sock_set->pollfds_active == NULL);
783     }
784 #   endif
785 
786     /*
787      * Compress poll array
788      */
789      /* FIXME: move last element into current position and update sock associated with last element.
790      */
791     if (sock->elem + 1 == sock_set->poll_array_elems)
792     {
793 	sock_set->poll_array_elems -= 1;
794 	if (sock_set->starting_elem >= sock_set->poll_array_elems)
795 	{
796 	    sock_set->starting_elem = 0;
797 	}
798     }
799 
800     /*
801      * Remove entry from the poll list and mark the entry as free
802      */
803     pollinfo->fd      = -1;
804     pollinfo->sock    = NULL;
805     pollinfo->sock_id = -1;
806     pollinfo->type    = MPIDU_SOCKI_TYPE_FIRST;
807     pollinfo->state   = MPIDU_SOCKI_STATE_FIRST;
808 #   ifdef MPICH_IS_THREADED
809     {
810 	pollinfo->pollfd_events = 0;
811     }
812 #   endif
813 
814     pollfd->fd = -1;
815     pollfd->events = 0;
816     pollfd->revents = 0;
817 
818     /*
819      * Mark the sock as invalid so that any future use might be caught
820      */
821     sock->sock_set = NULL;
822     sock->elem = -1;
823 
824     MPIU_Free(sock);
825 
826     MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCKI_SOCK_FREE);
827 }
828 /* end MPIDU_Socki_sock_free() */
829 
830 
831 #undef FUNCNAME
832 #define FUNCNAME MPIDU_Socki_event_enqueue
833 #undef FCNAME
834 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDU_Socki_event_enqueue(struct pollinfo * pollinfo,MPIDU_Sock_op_t op,MPIU_Size_t num_bytes,void * user_ptr,int error)835 static int MPIDU_Socki_event_enqueue(struct pollinfo * pollinfo, MPIDU_Sock_op_t op, MPIU_Size_t num_bytes,
836 				     void * user_ptr, int error)
837 {
838     struct MPIDU_Sock_set * sock_set = pollinfo->sock_set;
839     struct MPIDU_Socki_eventq_elem * eventq_elem;
840     int mpi_errno = MPI_SUCCESS;
841     MPIDI_STATE_DECL(MPID_STATE_SOCKI_EVENT_ENQUEUE);
842 
843     MPIDI_FUNC_ENTER(MPID_STATE_SOCKI_EVENT_ENQUEUE);
844 
845     if (MPIDU_Socki_eventq_pool != NULL)
846     {
847 	eventq_elem = MPIDU_Socki_eventq_pool;
848 	MPIDU_Socki_eventq_pool = MPIDU_Socki_eventq_pool->next;
849     }
850     else
851     {
852 	int i;
853 	struct MPIDU_Socki_eventq_table *eventq_table;
854 
855 	eventq_table = MPIU_Malloc(sizeof(struct MPIDU_Socki_eventq_table));
856 	/* --BEGIN ERROR HANDLING-- */
857 	if (eventq_table == NULL)
858 	{
859 	    mpi_errno = MPIR_Err_create_code(errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
860 					     "**sock|poll|eqmalloc", 0);
861 	    goto fn_exit;
862 	}
863 	/* --END ERROR HANDLING-- */
864 
865         eventq_elem = eventq_table->elems;
866 
867         eventq_table->next = MPIDU_Socki_eventq_table_head;
868         MPIDU_Socki_eventq_table_head = eventq_table;
869 
870 	if (MPIDU_SOCK_EVENTQ_POOL_SIZE > 1)
871 	{
872 	    MPIDU_Socki_eventq_pool = &eventq_elem[1];
873 	    for (i = 0; i < MPIDU_SOCK_EVENTQ_POOL_SIZE - 2; i++)
874 	    {
875 		MPIDU_Socki_eventq_pool[i].next = &MPIDU_Socki_eventq_pool[i+1];
876 	    }
877 	    MPIDU_Socki_eventq_pool[MPIDU_SOCK_EVENTQ_POOL_SIZE - 2].next = NULL;
878 	}
879     }
880 
881     eventq_elem->event.op_type = op;
882     eventq_elem->event.num_bytes = num_bytes;
883     eventq_elem->event.user_ptr = user_ptr;
884     eventq_elem->event.error = error;
885     eventq_elem->set_elem = pollinfo->elem;
886     eventq_elem->next = NULL;
887 
888     if (sock_set->eventq_head == NULL)
889     {
890 	sock_set->eventq_head = eventq_elem;
891     }
892     else
893     {
894 	sock_set->eventq_tail->next = eventq_elem;
895     }
896     sock_set->eventq_tail = eventq_elem;
897 fn_exit:
898     MPIDI_FUNC_EXIT(MPID_STATE_SOCKI_EVENT_ENQUEUE);
899     return mpi_errno;
900 }
901 /* end MPIDU_Socki_event_enqueue() */
902 
903 
904 #undef FUNCNAME
905 #define FUNCNAME MPIDU_Socki_event_dequeue
906 #undef FCNAME
907 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIDU_Socki_event_dequeue(struct MPIDU_Sock_set * sock_set,int * set_elem,struct MPIDU_Sock_event * eventp)908 static inline int MPIDU_Socki_event_dequeue(struct MPIDU_Sock_set * sock_set, int * set_elem, struct MPIDU_Sock_event * eventp)
909 {
910     struct MPIDU_Socki_eventq_elem * eventq_elem;
911     int mpi_errno = MPI_SUCCESS;
912     MPIDI_STATE_DECL(MPID_STATE_SOCKI_EVENT_DEQUEUE);
913 
914     MPIDI_FUNC_ENTER(MPID_STATE_SOCKI_EVENT_DEQUEUE);
915 
916     if (sock_set->eventq_head != NULL)
917     {
918 	eventq_elem = sock_set->eventq_head;
919 
920 	sock_set->eventq_head = eventq_elem->next;
921 	if (eventq_elem->next == NULL)
922 	{
923 	    sock_set->eventq_tail = NULL;
924 	}
925 
926 	*eventp = eventq_elem->event;
927 	*set_elem = eventq_elem->set_elem;
928 
929 	eventq_elem->next = MPIDU_Socki_eventq_pool;
930 	MPIDU_Socki_eventq_pool = eventq_elem;
931     }
932     /* --BEGIN ERROR HANDLING-- */
933     else
934     {
935 	/* FIXME: Shouldn't this be an mpi error code? */
936 	mpi_errno = MPIDU_SOCK_ERR_FAIL;
937     }
938     /* --END ERROR HANDLING-- */
939 
940     MPIDI_FUNC_EXIT(MPID_STATE_SOCKI_EVENT_DEQUEUE);
941     return mpi_errno;
942 }
943 /* end MPIDU_Socki_event_dequeue() */
944 
945 
946 /* FIXME: Who allocates eventq tables?  Should there be a check that these
947    tables are empty first? */
948 #undef FUNCNAME
949 #define FUNCNAME MPIDU_Socki_free_eventq_mem
950 #undef FCNAME
951 #define FCNAME "MPIDU_Socki_free_eventq_mem"
MPIDU_Socki_free_eventq_mem(void)952 static void MPIDU_Socki_free_eventq_mem(void)
953 {
954     struct MPIDU_Socki_eventq_table *eventq_table, *eventq_table_next;
955     MPIDI_STATE_DECL(MPID_STATE_SOCKI_FREE_EVENTQ_MEM);
956 
957     MPIDI_FUNC_ENTER(MPID_STATE_SOCKI_FREE_EVENTQ_MEM);
958 
959     eventq_table = MPIDU_Socki_eventq_table_head;
960     while (eventq_table) {
961         eventq_table_next = eventq_table->next;
962         MPIU_Free(eventq_table);
963         eventq_table = eventq_table_next;
964     }
965     MPIDU_Socki_eventq_table_head = NULL;
966 
967     MPIDI_FUNC_EXIT(MPID_STATE_SOCKI_FREE_EVENTQ_MEM);
968 }
969 
970 /* Provide a standard mechanism for setting the socket buffer size.
971    The value is -1 if the default size hasn't been set, 0 if no size
972    should be set, and > 0 if that size should be used */
973 static int sockBufSize = -1;
974 
975 /* Set the socket buffer sizes on fd to the standard values (this is controlled
976    by the parameter MPICH_SOCK_BUFSIZE).  If "firm" is true, require that the
977    sockets actually accept that buffer size.  */
MPIDU_Sock_SetSockBufferSize(int fd,int firm)978 int MPIDU_Sock_SetSockBufferSize( int fd, int firm )
979 {
980     int mpi_errno = MPI_SUCCESS;
981     int rc;
982 
983     /* Get the socket buffer size if we haven't yet acquired it */
984     if (sockBufSize < 0) {
985 	/* FIXME: Is this the name that we want to use (this was chosen
986 	   to match the original, undocumented name) */
987 	rc = MPL_env2int( "MPICH_SOCKET_BUFFER_SIZE", &sockBufSize );
988 	if (rc <= 0) {
989 	    sockBufSize = 0;
990 	}
991 	MPIU_DBG_MSG_D(CH3_CONNECT,TYPICAL,"Sock buf size = %d\n",sockBufSize);
992     }
993 
994     if (sockBufSize > 0) {
995 	int bufsz;
996 	socklen_t bufsz_len;
997 
998 	bufsz     = sockBufSize;
999 	bufsz_len = sizeof(bufsz);
1000 	rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &bufsz, bufsz_len);
1001 	if (rc == -1) {
1002 	    MPIU_ERR_SETANDJUMP3(mpi_errno,MPIDU_SOCK_ERR_FAIL,
1003 				 "**sock|poll|setsndbufsz",
1004 				 "**sock|poll|setsndbufsz %d %d %s",
1005 				 bufsz, errno, MPIU_Strerror(errno));
1006 	}
1007 	bufsz     = sockBufSize;
1008 	bufsz_len = sizeof(bufsz);
1009 	rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsz, bufsz_len);
1010 	if (rc == -1) {
1011 	    MPIU_ERR_SETANDJUMP3(mpi_errno,MPIDU_SOCK_ERR_FAIL,
1012 				 "**sock|poll|setrcvbufsz",
1013 				 "**sock|poll|setrcvbufsz %d %d %s",
1014 				 bufsz, errno, MPIU_Strerror(errno));
1015 	}
1016 	bufsz_len = sizeof(bufsz);
1017 
1018 	if (firm) {
1019 	    rc = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &bufsz, &bufsz_len);
1020 	    /* --BEGIN ERROR HANDLING-- */
1021 	    if (rc == 0) {
1022 		if (bufsz < sockBufSize * 0.9) {
1023 		MPIU_Msg_printf("WARNING: send socket buffer size differs from requested size (requested=%d, actual=%d)\n",
1024 				sockBufSize, bufsz);
1025 		}
1026 	    }
1027 	    /* --END ERROR HANDLING-- */
1028 
1029 	    bufsz_len = sizeof(bufsz);
1030 	    rc = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsz, &bufsz_len);
1031 	    /* --BEGIN ERROR HANDLING-- */
1032 	    if (rc == 0) {
1033 		if (bufsz < sockBufSize * 0.9) {
1034 		    MPIU_Msg_printf("WARNING: receive socket buffer size differs from requested size (requested=%d, actual=%d)\n",
1035 				    sockBufSize, bufsz);
1036 		}
1037 	    }
1038 	    /* --END ERROR HANDLING-- */
1039 	}
1040     }
1041  fn_fail:
1042     return mpi_errno;
1043 }
1044 
1045 /* This routine provides a string version of the address. */
MPIDU_Sock_AddrToStr(MPIDU_Sock_ifaddr_t * ifaddr,char * str,int maxlen)1046 int MPIDU_Sock_AddrToStr( MPIDU_Sock_ifaddr_t *ifaddr, char *str, int maxlen )
1047 {
1048     int i;
1049     unsigned char *p = ifaddr->ifaddr;
1050     for (i=0; i<ifaddr->len && maxlen > 4; i++) {
1051 	snprintf( str, maxlen, "%.3d.", *p++ );
1052 	str += 4;
1053 	maxlen -= 4;
1054     }
1055     /* Change the last period to a null; but be careful in case len was zero */
1056     if (i > 0) *--str = 0;
1057     else       *str = 0;
1058     return 0;
1059 }
1060