1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "mpidimpl.h"
7 
8 /* This file contains two types of routines associated with requests:
9  * Routines to allocate and free requests
10  * Routines to manage iovs on requests
11  *
12  * Note that there are a number of macros that also manage requests defined
13  * in mpidimpl.h ; these are intended to optimize request creation for
14  * specific types of requests.  See the comments in mpidimpl.h for more
15  * details.
16  */
17 
18 /* Routines and data structures for request allocation and deallocation */
19 
20 /* Max depth of recursive calls of MPID_Request_complete */
21 #define REQUEST_CB_DEPTH 2
22 #define MPIDI_LOAD_RECV_IOV_ORIG_MSG_OFFSET_UNSET (-1)
23 
24 /* See the comments above about request creation.  Some routines will
25    use macros in mpidimpl.h *instead* of this routine */
MPID_Request_create_hook(MPIR_Request * req)26 void MPID_Request_create_hook(MPIR_Request *req)
27 {
28     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_REQUEST_INIT);
29 
30     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_REQUEST_INIT);
31 
32     req->dev.datatype_ptr	   = NULL;
33     req->dev.msg_offset         = 0;
34     /* Masks and pkt_flags for channel device state in an MPIR_Request */
35     req->dev.state		   = 0;
36     req->dev.cancel_pending	   = FALSE;
37     /* FIXME: RMA ops shouldn't need to be set except when creating a
38      * request for RMA operations */
39     req->dev.target_win_handle = MPI_WIN_NULL;
40     req->dev.source_win_handle = MPI_WIN_NULL;
41     req->dev.target_lock_queue_entry = NULL;
42     req->dev.flattened_type = NULL;
43     req->dev.iov_offset        = 0;
44     req->dev.pkt_flags             = MPIDI_CH3_PKT_FLAG_NONE;
45     req->dev.resp_request_handle = MPI_REQUEST_NULL;
46     req->dev.user_buf          = NULL;
47     req->dev.OnDataAvail       = NULL;
48     req->dev.OnFinal           = NULL;
49     req->dev.user_buf          = NULL;
50     req->dev.drop_data         = FALSE;
51     req->dev.tmpbuf            = NULL;
52     req->dev.ext_hdr_ptr       = NULL;
53     req->dev.ext_hdr_sz        = 0;
54     req->dev.rma_target_ptr    = NULL;
55     req->dev.request_handle    = MPI_REQUEST_NULL;
56     req->dev.orig_msg_offset = MPIDI_LOAD_RECV_IOV_ORIG_MSG_OFFSET_UNSET;
57 
58     req->dev.request_completed_cb  = NULL;
59 #ifdef MPIDI_CH3_REQUEST_INIT
60     MPIDI_CH3_REQUEST_INIT(req);
61 #endif
62 }
63 
64 
65 /* ------------------------------------------------------------------------- */
66 /* Here are the routines to manipulate the iovs in the requests              */
67 /* ------------------------------------------------------------------------- */
68 
69 
70 
71 /*
72  * MPIDI_CH3U_Request_load_send_iov()
73  *
74  * Fill the provided IOV with the next (or remaining) portion of data described
75  * by the segment contained in the request structure.
76  * If the density of IOV is not sufficient, pack the data into a send/receive
77  * buffer and point the IOV at the buffer.
78  *
79  * Expects sreq->dev.OnFinal to be initialized (even if it's NULL).
80  */
MPIDI_CH3U_Request_load_send_iov(MPIR_Request * const sreq,struct iovec * const iov,int * const iov_n)81 int MPIDI_CH3U_Request_load_send_iov(MPIR_Request * const sreq,
82 				     struct iovec * const iov, int * const iov_n)
83 {
84     MPI_Aint last;
85     int mpi_errno = MPI_SUCCESS;
86     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
87 
88     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
89 
90     last = sreq->dev.msgsize;
91     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,(MPL_DBG_FDEST,
92      "pre-pv: first=%" PRIdPTR ", last=%" PRIdPTR ", iov_n=%d",
93 		      sreq->dev.msg_offset, last, *iov_n));
94     MPIR_Assert(sreq->dev.msg_offset < last);
95     MPIR_Assert(last > 0);
96     MPIR_Assert(*iov_n > 0 && *iov_n <= MPL_IOV_LIMIT);
97 
98     int max_iov_len = *iov_n;
99     MPI_Aint actual_iov_bytes, actual_iov_len;
100     MPIR_Typerep_to_iov(sreq->dev.user_buf, sreq->dev.user_count, sreq->dev.datatype,
101                      sreq->dev.msg_offset, iov, (MPI_Aint) max_iov_len,
102                      sreq->dev.msgsize - sreq->dev.msg_offset, &actual_iov_len, &actual_iov_bytes);
103     *iov_n = (int) actual_iov_len;
104     last = sreq->dev.msg_offset + actual_iov_bytes;
105 
106     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,(MPL_DBG_FDEST,
107     "post-pv: first=%" PRIdPTR ", last=%" PRIdPTR ", iov_n=%d",
108 		      sreq->dev.msg_offset, last, *iov_n));
109     MPIR_Assert(*iov_n > 0 && *iov_n <= MPL_IOV_LIMIT);
110 
111     if (last == sreq->dev.msgsize)
112     {
113 	MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,"remaining data loaded into IOV");
114 	sreq->dev.OnDataAvail = sreq->dev.OnFinal;
115     }
116     else if ((last - sreq->dev.msg_offset) / *iov_n >= MPIDI_IOV_DENSITY_MIN)
117     {
118 	MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,"more data loaded into IOV");
119 	sreq->dev.msg_offset = last;
120 	sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV;
121     }
122     else
123     {
124 	intptr_t data_sz;
125 	int i, iov_data_copied;
126 
127 	MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,"low density.  using SRBuf.");
128 
129 	data_sz = sreq->dev.msgsize - sreq->dev.msg_offset;
130 	if (!MPIDI_Request_get_srbuf_flag(sreq))
131 	{
132 	    MPIDI_CH3U_SRBuf_alloc(sreq, data_sz);
133 	    /* --BEGIN ERROR HANDLING-- */
134 	    if (sreq->dev.tmpbuf_sz == 0)
135 	    {
136 		MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,TYPICAL,"SRBuf allocation failure");
137 		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
138                                 __func__, __LINE__, MPI_ERR_OTHER, "**nomem",
139 						 "**nomem %d", data_sz);
140 		sreq->status.MPI_ERROR = mpi_errno;
141 		goto fn_exit;
142 	    }
143 	    /* --END ERROR HANDLING-- */
144 	}
145 
146 	iov_data_copied = 0;
147 	for (i = 0; i < *iov_n; i++) {
148 	    MPIR_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied,
149 		   iov[i].iov_base, iov[i].iov_len);
150 	    iov_data_copied += iov[i].iov_len;
151 	}
152 	sreq->dev.msg_offset = last;
153 
154         MPI_Aint max_pack_bytes;
155         MPI_Aint actual_pack_bytes;
156 
157         if (data_sz > sreq->dev.tmpbuf_sz - iov_data_copied)
158             max_pack_bytes = sreq->dev.tmpbuf_sz - iov_data_copied;
159         else
160             max_pack_bytes = sreq->dev.msgsize - sreq->dev.msg_offset;
161 
162         MPIR_Typerep_pack(sreq->dev.user_buf, sreq->dev.user_count, sreq->dev.datatype,
163                        sreq->dev.msg_offset, (char*) sreq->dev.tmpbuf + iov_data_copied,
164                        max_pack_bytes, &actual_pack_bytes);
165         last = sreq->dev.msg_offset + actual_pack_bytes;
166 
167 	iov[0].iov_base = (void *)sreq->dev.tmpbuf;
168 	iov[0].iov_len = actual_pack_bytes + iov_data_copied;
169 	*iov_n = 1;
170 	if (last == sreq->dev.msgsize)
171 	{
172 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,"remaining data packed into SRBuf");
173 	    sreq->dev.OnDataAvail = sreq->dev.OnFinal;
174 	}
175 	else
176 	{
177 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,"more data packed into SRBuf");
178 	    sreq->dev.msg_offset = last;
179 	    sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV;
180 	}
181     }
182 
183   fn_exit:
184     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
185     return mpi_errno;
186 
187   fn_fail:
188     goto fn_exit;
189 }
190 
191 
192 /*
193  * MPIDI_CH3U_Request_load_recv_iov()
194  *
195  * Fill the request's IOV with the next (or remaining) portion of data
196  * described by the segment (also contained in the request
197  * structure).  If the density of IOV is not sufficient, allocate a
198  * send/receive buffer and point the IOV at the buffer.
199  */
MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)200 int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)
201 {
202     MPI_Aint last;
203     int mpi_errno = MPI_SUCCESS;
204     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
205 
206     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
207 
208     if (rreq->dev.orig_msg_offset == MPIDI_LOAD_RECV_IOV_ORIG_MSG_OFFSET_UNSET) {
209         rreq->dev.orig_msg_offset = rreq->dev.msg_offset;
210     }
211 
212     if (rreq->dev.msg_offset < rreq->dev.msgsize)
213     {
214 	/* still reading data that needs to go into the user buffer */
215 
216 	if (MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_ACCUM_RECV &&
217             MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_ACCUM_RECV &&
218             MPIDI_Request_get_srbuf_flag(rreq))
219 	{
220 	    intptr_t data_sz;
221 	    intptr_t tmpbuf_sz;
222 
223 	    /* Once a SRBuf is in use, we continue to use it since a small
224 	       amount of data may already be present at the beginning
225 	       of the buffer.  This data is left over from the previous unpack,
226 	       most like a result of alignment issues.  NOTE: we
227 	       could force the use of the SRBuf only
228 	       when (rreq->dev.tmpbuf_off > 0)... */
229 
230 	    data_sz = rreq->dev.msgsize - rreq->dev.msg_offset -
231 		rreq->dev.tmpbuf_off;
232 	    MPIR_Assert(data_sz > 0);
233 	    tmpbuf_sz = rreq->dev.tmpbuf_sz - rreq->dev.tmpbuf_off;
234 	    if (data_sz > tmpbuf_sz)
235 	    {
236 		data_sz = tmpbuf_sz;
237 	    }
238 	    rreq->dev.iov[0].iov_base =
239 		(void *)((char *) rreq->dev.tmpbuf +
240 				    rreq->dev.tmpbuf_off);
241 	    rreq->dev.iov[0].iov_len = data_sz;
242             rreq->dev.iov_offset = 0;
243 	    rreq->dev.iov_count = 1;
244 	    MPIR_Assert(rreq->dev.msg_offset - rreq->dev.orig_msg_offset + data_sz +
245 			rreq->dev.tmpbuf_off <= rreq->dev.recv_data_sz);
246 	    if (rreq->dev.msg_offset - rreq->dev.orig_msg_offset + data_sz + rreq->dev.tmpbuf_off ==
247 		rreq->dev.recv_data_sz)
248 	    {
249 		MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
250 		  "updating rreq to read the remaining data into the SRBuf");
251 		rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufComplete;
252                 rreq->dev.orig_msg_offset = MPIDI_LOAD_RECV_IOV_ORIG_MSG_OFFSET_UNSET;
253 	    }
254 	    else
255 	    {
256 		MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
257 		       "updating rreq to read more data into the SRBuf");
258 		rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV;
259 	    }
260 	    goto fn_exit;
261 	}
262 
263 	last = rreq->dev.msgsize;
264 	rreq->dev.iov_count = MPL_IOV_LIMIT;
265 	rreq->dev.iov_offset = 0;
266 	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,(MPL_DBG_FDEST,
267    "pre-upv: first=%" PRIdPTR ", last=%" PRIdPTR ", iov_n=%d",
268 			  rreq->dev.msg_offset, last, rreq->dev.iov_count));
269 	MPIR_Assert(rreq->dev.msg_offset < last);
270 	MPIR_Assert(last > 0);
271 
272         MPI_Aint actual_iov_bytes, actual_iov_len;
273         MPIR_Typerep_to_iov(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype,
274                          rreq->dev.msg_offset, &rreq->dev.iov[0], MPL_IOV_LIMIT,
275                          rreq->dev.msgsize - rreq->dev.msg_offset,
276                          &actual_iov_len, &actual_iov_bytes);
277         rreq->dev.iov_count = (int) actual_iov_len;
278         last = rreq->dev.msg_offset + actual_iov_bytes;
279 
280 	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,(MPL_DBG_FDEST,
281    "post-upv: first=%" PRIdPTR ", last=%" PRIdPTR ", iov_n=%d, iov_offset=%lld",
282 			  rreq->dev.msg_offset, last, rreq->dev.iov_count, (long long)rreq->dev.iov_offset));
283 	MPIR_Assert(rreq->dev.iov_count >= 0 && rreq->dev.iov_count <=
284 		    MPL_IOV_LIMIT);
285 
286 	/* --BEGIN ERROR HANDLING-- */
287 	if (rreq->dev.iov_count == 0)
288 	{
289 	    /* If the data can't be unpacked, the we have a mis-match between
290 	       the datatype and the amount of data received.  Adjust
291 	       the segment info so that the remaining data is received and
292 	       thrown away. */
293 	    rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS,
294 		       MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_TYPE,
295 		       "**dtypemismatch", 0);
296             MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.msg_offset);
297 	    rreq->dev.msgsize = rreq->dev.msg_offset;
298 	    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
299 	    goto fn_exit;
300 	}
301         else
302         {
303             MPIR_Assert(rreq->dev.iov_offset < rreq->dev.iov_count);
304         }
305 	/* --END ERROR HANDLING-- */
306 
307 	if (last == rreq->dev.recv_data_sz + rreq->dev.orig_msg_offset)
308 	{
309 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
310      "updating rreq to read the remaining data directly into the user buffer");
311 	    /* Eventually, use OnFinal for this instead */
312 	    rreq->dev.OnDataAvail = rreq->dev.OnFinal;
313             rreq->dev.orig_msg_offset = MPIDI_LOAD_RECV_IOV_ORIG_MSG_OFFSET_UNSET;
314 	}
315 	else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV ||
316                  MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV ||
317                  (last == rreq->dev.msgsize ||
318                   (last - rreq->dev.msg_offset) / rreq->dev.iov_count >= MPIDI_IOV_DENSITY_MIN))
319 	{
320 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
321 	     "updating rreq to read more data directly into the user buffer");
322 	    rreq->dev.msg_offset = last;
323 	    rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV;
324 	}
325 	else
326 	{
327 	    /* Too little data would have been received using an IOV.
328 	       We will start receiving data into a SRBuf and unpacking it
329 	       later. */
330 	    MPIR_Assert(MPIDI_Request_get_srbuf_flag(rreq) == FALSE);
331 
332 	    MPIDI_CH3U_SRBuf_alloc(rreq,
333 			    rreq->dev.msgsize - rreq->dev.msg_offset);
334 	    rreq->dev.tmpbuf_off = 0;
335 	    /* --BEGIN ERROR HANDLING-- */
336 	    if (rreq->dev.tmpbuf_sz == 0)
337 	    {
338 		/* FIXME - we should drain the data off the pipe here, but we
339 		   don't have a buffer to drain it into.  should this be
340 		   a fatal error? */
341 		MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,"SRBuf allocation failure");
342 		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
343 			      __func__, __LINE__, MPI_ERR_OTHER, "**nomem",
344 			 "**nomem %d",
345 			 rreq->dev.msgsize - rreq->dev.msg_offset);
346 		rreq->status.MPI_ERROR = mpi_errno;
347 		goto fn_exit;
348 	    }
349 	    /* --END ERROR HANDLING-- */
350 
351 	    /* fill in the IOV using a recursive call */
352 	    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
353 	}
354     }
355     else
356     {
357 	/* receive and toss any extra data that does not fit in the user's
358 	   buffer */
359 	intptr_t data_sz;
360 
361 	data_sz = rreq->dev.recv_data_sz - rreq->dev.msg_offset;
362 	if (!MPIDI_Request_get_srbuf_flag(rreq))
363 	{
364 	    MPIDI_CH3U_SRBuf_alloc(rreq, data_sz);
365 	    /* --BEGIN ERROR HANDLING-- */
366 	    if (rreq->dev.tmpbuf_sz == 0)
367 	    {
368 		MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,TYPICAL,"SRBuf allocation failure");
369 		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
370 			       __func__, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
371 		rreq->status.MPI_ERROR = mpi_errno;
372 		goto fn_exit;
373 	    }
374 	    /* --END ERROR HANDLING-- */
375 	}
376 
377 	if (data_sz <= rreq->dev.tmpbuf_sz)
378 	{
379 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
380 	    "updating rreq to read overflow data into the SRBuf and complete");
381 	    rreq->dev.iov[0].iov_len = data_sz;
382 	    MPIR_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
383 	    /* Eventually, use OnFinal for this instead */
384 	    rreq->dev.OnDataAvail = rreq->dev.OnFinal;
385             rreq->dev.orig_msg_offset = MPIDI_LOAD_RECV_IOV_ORIG_MSG_OFFSET_UNSET;
386 	}
387 	else
388 	{
389 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
390 	  "updating rreq to read overflow data into the SRBuf and reload IOV");
391 	    rreq->dev.iov[0].iov_len = rreq->dev.tmpbuf_sz;
392 	    rreq->dev.msg_offset += rreq->dev.tmpbuf_sz;
393 	    rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV;
394 	}
395 
396 	rreq->dev.iov[0].iov_base = (void *)rreq->dev.tmpbuf;
397 	rreq->dev.iov_count = 1;
398     }
399 
400   fn_exit:
401     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
402     return mpi_errno;
403 }
404 
405 /*
406  * MPIDI_CH3U_Request_unpack_srbuf
407  *
408  * Unpack data from a send/receive buffer into the user buffer.
409  */
MPIDI_CH3U_Request_unpack_srbuf(MPIR_Request * rreq)410 int MPIDI_CH3U_Request_unpack_srbuf(MPIR_Request * rreq)
411 {
412     MPI_Aint last;
413     int tmpbuf_last;
414     int mpi_errno = MPI_SUCCESS;
415     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
416 
417     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
418 
419     tmpbuf_last = (int)(rreq->dev.msg_offset + rreq->dev.tmpbuf_sz);
420     if (rreq->dev.msgsize < tmpbuf_last)
421     {
422 	tmpbuf_last = (int)rreq->dev.msgsize;
423     }
424 
425     MPI_Aint actual_unpack_bytes;
426     MPIR_Typerep_unpack(rreq->dev.tmpbuf, tmpbuf_last - rreq->dev.msg_offset,
427                      rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype,
428                      rreq->dev.msg_offset, &actual_unpack_bytes);
429     last = rreq->dev.msg_offset + actual_unpack_bytes;
430 
431     if (last == 0 || last == rreq->dev.msg_offset)
432     {
433 	/* --BEGIN ERROR HANDLING-- */
434 	/* If no data can be unpacked, then we have a datatype processing
435 	   problem.  Adjust the segment info so that the remaining
436 	   data is received and thrown away. */
437 	MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.msg_offset);
438 	rreq->dev.msgsize = rreq->dev.msg_offset;
439 	rreq->dev.msg_offset += tmpbuf_last;
440 	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS,
441 		       MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_TYPE,
442 		       "**dtypemismatch", 0);
443 	/* --END ERROR HANDLING-- */
444     }
445     else if (tmpbuf_last == rreq->dev.msgsize)
446     {
447 	/* --BEGIN ERROR HANDLING-- */
448 	if (last != tmpbuf_last)
449 	{
450 	    /* received data was not entirely consumed by unpack() because too
451 	       few bytes remained to fill the next basic datatype.
452 	       Note: the msg_offset field is set to segment_last so that if
453 	       this is a truncated message, extra data will be read
454 	       off the pipe. */
455 	    MPIR_STATUS_SET_COUNT(rreq->status, last);
456 	    rreq->dev.msgsize = last;
457 	    rreq->dev.msg_offset = tmpbuf_last;
458 	    rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS,
459 		  MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_TYPE,
460 							  "**dtypemismatch", 0);
461 	}
462 	/* --END ERROR HANDLING-- */
463     }
464     else
465     {
466 	rreq->dev.tmpbuf_off = (int)(tmpbuf_last - last);
467 	if (rreq->dev.tmpbuf_off > 0)
468 	{
469 	    /* move any remaining data to the beginning of the buffer.
470 	       Note: memmove() is used since the data regions could
471                overlap. */
472 	    memmove(rreq->dev.tmpbuf, (char *) rreq->dev.tmpbuf +
473 		    (last - rreq->dev.msg_offset), rreq->dev.tmpbuf_off);
474 	}
475 	rreq->dev.msg_offset = last;
476     }
477 
478     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
479     return mpi_errno;
480 }
481 
482 /*
483  * MPIDI_CH3U_Request_unpack_uebuf
484  *
485  * Copy/unpack data from an "unexpected eager buffer" into the user buffer.
486  */
MPIDI_CH3U_Request_unpack_uebuf(MPIR_Request * rreq)487 int MPIDI_CH3U_Request_unpack_uebuf(MPIR_Request * rreq)
488 {
489     int dt_contig;
490     MPI_Aint dt_true_lb;
491     intptr_t userbuf_sz;
492     MPIR_Datatype * dt_ptr;
493     intptr_t unpack_sz;
494     int mpi_errno = MPI_SUCCESS;
495     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
496     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MEMCPY);
497 
498     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
499 
500     MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype,
501 			    dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
502 
503     if (rreq->dev.recv_data_sz <= userbuf_sz)
504     {
505 	unpack_sz = rreq->dev.recv_data_sz;
506     }
507     else
508     {
509 	/* --BEGIN ERROR HANDLING-- */
510 	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,(MPL_DBG_FDEST,
511       "receive buffer overflow; message truncated, msg_sz=%" PRIdPTR
512 	      ", buf_sz=%" PRIdPTR,
513                 rreq->dev.recv_data_sz, userbuf_sz));
514 	unpack_sz = userbuf_sz;
515 	MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz);
516 	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS,
517 		 MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_TRUNCATE,
518 		 "**truncate", "**truncate %d %d",
519                  rreq->dev.recv_data_sz, userbuf_sz);
520 	/* --END ERROR HANDLING-- */
521     }
522 
523     if (unpack_sz > 0)
524     {
525 	if (dt_contig)
526 	{
527 	    /* TODO - check that amount of data is consistent with
528 	       datatype.  If not we should return an error (unless
529 	       configured with --enable-fast) */
530 	    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MEMCPY);
531 	    MPIR_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, rreq->dev.tmpbuf,
532 		   unpack_sz);
533 	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MEMCPY);
534 	}
535 	else
536 	{
537 	    MPI_Aint actual_unpack_bytes;
538 	    MPIR_Typerep_unpack(rreq->dev.tmpbuf, unpack_sz,
539 			     rreq->dev.user_buf, rreq->dev.user_count,
540 			     rreq->dev.datatype, 0, &actual_unpack_bytes);
541 
542 	    if (actual_unpack_bytes != unpack_sz)
543 	    {
544 		/* --BEGIN ERROR HANDLING-- */
545 		/* received data was not entirely consumed by unpack()
546 		   because too few bytes remained to fill the next basic
547 		   datatype */
548 		MPIR_STATUS_SET_COUNT(rreq->status, actual_unpack_bytes);
549 		rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS,
550                          MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_TYPE,
551 			 "**dtypemismatch", 0);
552 		/* --END ERROR HANDLING-- */
553 	    }
554 	}
555     }
556 
557     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
558     return mpi_errno;
559 }
560 
MPID_Request_complete(MPIR_Request * req)561 int MPID_Request_complete(MPIR_Request *req)
562 {
563     int incomplete, notify_counter;
564     int mpi_errno = MPI_SUCCESS;
565 
566     MPIDI_CH3U_Request_decrement_cc(req, &incomplete);
567     if (!incomplete) {
568         /* decrement completion_notification counter */
569         if (req->completion_notification)
570             MPIR_cc_decr(req->completion_notification, &notify_counter);
571 
572 	MPIR_Request_free(req);
573     }
574 
575  fn_exit:
576     return mpi_errno;
577  fn_fail:
578     goto fn_exit;
579 }
580 
MPID_Request_free_hook(MPIR_Request * req)581 void MPID_Request_free_hook(MPIR_Request *req)
582 {
583     static int called_cnt = 0;
584 
585     MPIR_Assert(called_cnt <= REQUEST_CB_DEPTH);
586     called_cnt++;
587 
588     /* trigger request_completed callback function */
589     if (req->dev.request_completed_cb != NULL && MPIR_Request_is_complete(req)) {
590         int mpi_errno = req->dev.request_completed_cb(req);
591         MPIR_Assert(mpi_errno == MPI_SUCCESS);
592 
593         req->dev.request_completed_cb = NULL;
594     }
595 
596     MPIDI_CH3_Progress_signal_completion();
597 
598     called_cnt--;
599 }
600 
MPID_Request_destroy_hook(MPIR_Request * req)601 void MPID_Request_destroy_hook(MPIR_Request *req)
602 {
603     if (req->dev.datatype_ptr != NULL) {
604         MPIR_Datatype_ptr_release(req->dev.datatype_ptr);
605     }
606 
607     if (MPIDI_Request_get_srbuf_flag(req)) {
608         MPIDI_CH3U_SRBuf_free(req);
609     }
610 
611     MPL_free(req->dev.ext_hdr_ptr);
612     MPL_free(req->dev.flattened_type);
613 }
614