1 /*
2  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
3  *                         University Research and Technology
4  *                         Corporation.  All rights reserved.
5  * Copyright (c) 2004-2010 The University of Tennessee and The University
6  *                         of Tennessee Research Foundation.  All rights
7  *                         reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  *                         University of Stuttgart.  All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  *                         All rights reserved.
12  * Copyright (c) 2010-2012 Sandia National Laboratories.  All rights reserved.
13  * $COPYRIGHT$
14  *
15  * Additional copyrights may follow
16  *
17  * $HEADER$
18  */
19 
20 
21 #include "ompi_config.h"
22 
23 #include "opal/class/opal_list.h"
24 #include "ompi/communicator/communicator.h"
25 #include "ompi/datatype/ompi_datatype.h"
26 #include "opal/datatype/opal_convertor.h"
27 #include "ompi/mca/mtl/base/base.h"
28 #include "ompi/mca/mtl/base/mtl_base_datatype.h"
29 #include "ompi/message/message.h"
30 #include "opal/mca/timer/base/base.h"
31 
32 #include "mtl_portals4.h"
33 #include "mtl_portals4_endpoint.h"
34 #include "mtl_portals4_request.h"
35 #include "mtl_portals4_recv_short.h"
36 #include "mtl_portals4_message.h"
37 
38 
39 static int
40 ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
41                                 ompi_mtl_portals4_base_request_t* ptl_base_request);
42 static int
43 ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev,
44                                          ompi_mtl_portals4_rndv_get_frag_t* rndv_get_frag);
45 
46 static int
read_msg(void * start,ptl_size_t length,ptl_process_t target,ptl_match_bits_t match_bits,ptl_size_t remote_offset,ompi_mtl_portals4_recv_request_t * request)47 read_msg(void *start, ptl_size_t length, ptl_process_t target,
48          ptl_match_bits_t match_bits, ptl_size_t remote_offset,
49          ompi_mtl_portals4_recv_request_t *request)
50 {
51     int ret, i;
52     ptl_size_t rest = length, asked = 0;
53     int32_t frag_count;
54 
55 #if OMPI_MTL_PORTALS4_FLOW_CONTROL
56     while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
57         OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
58         ompi_mtl_portals4_progress();
59     }
60 #endif
61 
62     frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl;
63     ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count);
64 
65     for (i = 0 ; i < frag_count ; i++) {
66         opal_free_list_item_t *tmp;
67         ompi_mtl_portals4_rndv_get_frag_t* frag;
68 
69         tmp = opal_free_list_get (&ompi_mtl_portals4.fl_rndv_get_frag);
70         if (NULL == tmp) return OMPI_ERR_OUT_OF_RESOURCE;
71 
72         frag = (ompi_mtl_portals4_rndv_get_frag_t*) tmp;
73 
74         frag->request = request;
75 #if OPAL_ENABLE_DEBUG
76         frag->frag_num = i;
77 #endif
78         frag->frag_start = (char*)start + i * ompi_mtl_portals4.max_msg_size_mtl;
79         frag->frag_length = (OPAL_UNLIKELY(rest > ompi_mtl_portals4.max_msg_size_mtl)) ? ompi_mtl_portals4.max_msg_size_mtl : rest;
80         frag->frag_target = target;
81         frag->frag_match_bits = match_bits;
82         frag->frag_remote_offset = remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl;
83 
84         frag->event_callback = ompi_mtl_portals4_rndv_get_frag_progress;
85         frag->frag_abs_timeout_usec = 0;
86 
87         OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d, size %ld) send",
88                              i + 1, frag_count, frag->frag_length));
89 
90         ret = PtlGet(ompi_mtl_portals4.send_md_h,
91                      (ptl_size_t) frag->frag_start,
92                      frag->frag_length,
93                      frag->frag_target,
94                      ompi_mtl_portals4.read_idx,
95                      frag->frag_match_bits,
96                      frag->frag_remote_offset,
97                      frag);
98         if (OPAL_UNLIKELY(PTL_OK != ret)) {
99             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
100                                 "%s:%d: PtlGet failed: %d",
101                                 __FILE__, __LINE__, ret);
102             return OMPI_ERR_OUT_OF_RESOURCE;
103         }
104         rest -= frag->frag_length;
105         asked += frag->frag_length;
106     }
107 
108     return OMPI_SUCCESS;
109 }
110 
111 
112 /* called when a receive should be progressed */
113 static int
ompi_mtl_portals4_recv_progress(ptl_event_t * ev,ompi_mtl_portals4_base_request_t * ptl_base_request)114 ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
115                                 ompi_mtl_portals4_base_request_t* ptl_base_request)
116 {
117     int ret;
118     ompi_mtl_portals4_recv_request_t* ptl_request =
119         (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
120     size_t msg_length = 0;
121 
122     /* as soon as we've seen any event associated with a request, it's
123        started */
124     ptl_request->req_started = true;
125 
126     switch (ev->type) {
127     case PTL_EVENT_PUT:
128         OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
129                              "Recv %lu (0x%lx) got put event",
130                              ptl_request->opcount, ev->hdr_data));
131 
132         if (ev->ni_fail_type != PTL_NI_OK) {
133             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
134                                 "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
135                                 __FILE__, __LINE__, ev->ni_fail_type);
136             ret = PTL_FAIL;
137             goto callback_error;
138         }
139 
140         ptl_request->me_h = PTL_INVALID_HANDLE;
141 
142         msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
143         ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
144             MTL_PORTALS4_GET_SOURCE(ev->match_bits);
145         ptl_request->super.super.ompi_req->req_status.MPI_TAG =
146             MTL_PORTALS4_GET_TAG(ev->match_bits);
147         if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
148             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
149                                 "truncate expected: %ld %ld",
150                                 msg_length, ptl_request->delivery_len);
151             ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
152         }
153 
154         if (ev->mlength < msg_length)
155              OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "Truncated message, some PtlGet are required (protocol = %d)",
156                                  ompi_mtl_portals4.protocol));
157 
158 #if OPAL_ENABLE_DEBUG
159         ptl_request->hdr_data = ev->hdr_data;
160 #endif
161 
162         ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
163         if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && msg_length > ev->mlength) {
164             /* If it's not a short message and we're doing rndv and the message is not complete,  we
165                only have the first part of the message.  Issue the get
166                to pull the second part of the message. */
167             ret = read_msg((char*)ptl_request->delivery_ptr + ev->mlength,
168                            ((msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length) - ev->mlength,
169                            ev->initiator,
170                            ev->hdr_data,
171                            ev->mlength,
172                            ptl_request);
173             if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
174                 if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
175                 goto callback_error;
176             }
177         } else {
178             /* If we're either using the eager protocol or were a
179                short message, all data has been received, so complete
180                the message. */
181             ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
182                                            ev->start,
183                                            ev->mlength);
184             if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
185                 opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
186                                     "%s:%d: ompi_mtl_datatype_unpack failed: %d",
187                                     __FILE__, __LINE__, ret);
188                 ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
189             }
190             OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
191                                  "Recv %lu (0x%lx) completed, expected",
192                                  ptl_request->opcount, ptl_request->hdr_data));
193             ptl_request->super.super.completion_callback(&ptl_request->super.super);
194         }
195         break;
196 
197     case PTL_EVENT_PUT_OVERFLOW:
198         OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
199                              "Recv %lu (0x%lx) got put_overflow event",
200                              ptl_request->opcount, ev->hdr_data));
201 
202         if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
203             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
204                                 "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
205                                 __FILE__, __LINE__, ev->ni_fail_type);
206             ret = PTL_FAIL;
207             goto callback_error;
208         }
209 
210         ptl_request->me_h = PTL_INVALID_HANDLE;
211 
212         msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
213         ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
214             MTL_PORTALS4_GET_SOURCE(ev->match_bits);
215         ptl_request->super.super.ompi_req->req_status.MPI_TAG =
216             MTL_PORTALS4_GET_TAG(ev->match_bits);
217         if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
218             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
219                                 "truncate unexpected: %ld %ld %d",
220                                 msg_length, ptl_request->delivery_len,
221                                 MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
222             ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
223         }
224 
225 #if OPAL_ENABLE_DEBUG
226         ptl_request->hdr_data = ev->hdr_data;
227 #endif
228 
229         /* overflow case.  Short messages have the buffer stashed
230            somewhere.  Long messages left in buffer at the source */
231         if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
232             ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
233             if (ev->mlength > 0) {
234                 struct iovec iov;
235                 uint32_t iov_count = 1;
236                 size_t max_data;
237                 iov.iov_base = (char*) ev->start;
238                 iov.iov_len = ev->mlength;
239                 max_data = iov.iov_len;
240 
241                 ret = opal_convertor_unpack(ptl_request->convertor,
242                                             &iov, &iov_count,
243                                             &max_data );
244                 if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
245                 if (OPAL_UNLIKELY(ret < 0)) {
246                     opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
247                                         "%s:%d: opal_convertor_unpack failed: %d",
248                                         __FILE__, __LINE__, ret);
249                     goto callback_error;
250                 }
251             }
252             /* if it's a sync, send the ack */
253             if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
254                 OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
255                                      "Recv %lu (0x%lx) sending sync ack",
256                                      ptl_request->opcount, ptl_request->hdr_data));
257                 ret = PtlPut(ompi_mtl_portals4.zero_md_h,
258                              0,
259                              0,
260                              PTL_NO_ACK_REQ,
261                              ev->initiator,
262                              ompi_mtl_portals4.read_idx,
263                              ev->hdr_data,
264                              0,
265                              NULL,
266                              0);
267                 if (OPAL_UNLIKELY(PTL_OK != ret)) {
268                     opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
269                                         "%s:%d: PtlPut failed: %d",
270                                         __FILE__, __LINE__, ret);
271                     goto callback_error;
272                 }
273             }
274 
275             OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
276                                  "Recv %lu (0x%lx) completed, unexpected short (0x%lx)",
277                                  ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
278             ptl_request->super.super.completion_callback(&ptl_request->super.super);
279 
280         } else {
281 
282             /* For long messages in the overflow list, ev->mlength = 0 */
283             ptl_request->super.super.ompi_req->req_status._ucount = 0;
284 
285             ret = read_msg((char*)ptl_request->delivery_ptr,
286                            (msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length,
287                            ev->initiator,
288                            ev->hdr_data,
289                            0,
290                            ptl_request);
291             if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
292                 if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
293                 goto callback_error;
294             }
295         }
296 
297         break;
298 
299     case PTL_EVENT_LINK:
300         break;
301 
302     default:
303         opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
304                             "Unhandled receive callback with event type %d",
305                             ev->type);
306         return OMPI_ERROR;
307     }
308 
309     return OMPI_SUCCESS;
310 
311  callback_error:
312     ptl_request->super.super.ompi_req->req_status.MPI_ERROR =
313         ompi_mtl_portals4_get_error(ret);
314     ptl_request->super.super.completion_callback(&ptl_request->super.super);
315     return OMPI_SUCCESS;
316 }
317 
318 
319 static int
ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t * ev,ompi_mtl_portals4_rndv_get_frag_t * rndv_get_frag)320 ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev,
321                                          ompi_mtl_portals4_rndv_get_frag_t* rndv_get_frag)
322 {
323     int ret;
324     ompi_mtl_portals4_recv_request_t* ptl_request =
325         (ompi_mtl_portals4_recv_request_t*) rndv_get_frag->request;
326 
327     assert(PTL_EVENT_REPLY == ev->type);
328 
329     OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
330         "Recv %lu (0x%lx) got reply event",
331         ptl_request->opcount, ptl_request->hdr_data));
332 
333 
334     if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
335         opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
336                             "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
337                             __FILE__, __LINE__, ev->ni_fail_type);
338 
339         if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_DROPPED)) {
340             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
341                                 "PTL_EVENT_REPLY with ni_fail_type: %u => cannot retry",
342                                 (uint32_t)ev->ni_fail_type);
343             ret = PTL_FAIL;
344             goto callback_error;
345         }
346 
347         if (0 == rndv_get_frag->frag_abs_timeout_usec) {
348             /* this is the first retry of the frag.  start the timer. */
349             /* instead of recording the start time, record the end time
350              * and avoid addition on each retry. */
351             rndv_get_frag->frag_abs_timeout_usec = opal_timer_base_get_usec() + ompi_mtl_portals4.get_retransmit_timeout;
352             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
353                                 "setting frag timeout at %lu",
354                                 rndv_get_frag->frag_abs_timeout_usec);
355         } else if (opal_timer_base_get_usec() >= rndv_get_frag->frag_abs_timeout_usec) {
356             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
357                                 "timeout retrying GET");
358             ret = PTL_FAIL;
359             goto callback_error;
360         }
361 
362         OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
363             "Rendezvous Get Failed: Reissuing frag #%u", rndv_get_frag->frag_num));
364 
365         ret = PtlGet(ompi_mtl_portals4.send_md_h,
366                      (ptl_size_t) rndv_get_frag->frag_start,
367                      rndv_get_frag->frag_length,
368                      rndv_get_frag->frag_target,
369                      ompi_mtl_portals4.read_idx,
370                      rndv_get_frag->frag_match_bits,
371                      rndv_get_frag->frag_remote_offset,
372                      rndv_get_frag);
373         if (OPAL_UNLIKELY(PTL_OK != ret)) {
374             if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
375             goto callback_error;
376         }
377         return OMPI_SUCCESS;
378     }
379 
380     /* set the received length in the status, now that we know
381            exactly how much data was sent. */
382     ptl_request->super.super.ompi_req->req_status._ucount += ev->mlength;
383 
384     /* this frag is complete.  return to freelist. */
385     opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag,
386                            &rndv_get_frag->super);
387 
388     ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1);
389     if (ret > 0) {
390         return OMPI_SUCCESS;
391     }
392     assert(ptl_request->pending_reply == 0);
393 
394 #if OMPI_MTL_PORTALS4_FLOW_CONTROL
395     OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
396 #endif
397 
398     /* make sure the data is in the right place.  Use _ucount for
399            the total length because it will be set correctly for all
400            three protocols. mlength is only correct for eager, and
401            delivery_len is the length of the buffer, not the length of
402            the send. */
403     ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
404                                    ptl_request->delivery_ptr,
405                                    ptl_request->super.super.ompi_req->req_status._ucount);
406     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
407         opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
408                             "%s:%d: ompi_mtl_datatype_unpack failed: %d",
409                             __FILE__, __LINE__, ret);
410         ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
411     }
412 
413     OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
414         "Recv %lu (0x%lx) completed , reply (pending_reply: %d)",
415         ptl_request->opcount, ptl_request->hdr_data, ptl_request->pending_reply));
416     ptl_request->super.super.completion_callback(&ptl_request->super.super);
417 
418     return OMPI_SUCCESS;
419 
420  callback_error:
421     ptl_request->super.super.ompi_req->req_status.MPI_ERROR =
422         ompi_mtl_portals4_get_error(ret);
423     ptl_request->super.super.completion_callback(&ptl_request->super.super);
424     return OMPI_SUCCESS;
425 }
426 
427 
428 int
ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t * mtl,struct ompi_communicator_t * comm,int src,int tag,struct opal_convertor_t * convertor,mca_mtl_request_t * mtl_request)429 ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
430                         struct ompi_communicator_t *comm,
431                         int src,
432                         int tag,
433                         struct opal_convertor_t *convertor,
434                         mca_mtl_request_t *mtl_request)
435 {
436     ptl_match_bits_t match_bits, ignore_bits;
437     int ret = OMPI_SUCCESS;
438     ptl_process_t remote_proc;
439     ompi_mtl_portals4_recv_request_t *ptl_request =
440         (ompi_mtl_portals4_recv_request_t*) mtl_request;
441     void *start;
442     size_t length;
443     bool free_after;
444     ptl_me_t me;
445 
446     if  (MPI_ANY_SOURCE == src) {
447         if (ompi_mtl_portals4.use_logical) {
448             remote_proc.rank = PTL_RANK_ANY;
449         } else {
450             remote_proc.phys.nid = PTL_NID_ANY;
451             remote_proc.phys.pid = PTL_PID_ANY;
452         }
453     } else if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) {
454         remote_proc.rank = src;
455     } else {
456         ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
457         remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc));
458     }
459 
460     MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid,
461                                src, tag);
462 
463     ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
464     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
465         return ret;
466     }
467 
468     ptl_request->super.type = portals4_req_recv;
469     ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
470 #if OPAL_ENABLE_DEBUG
471     ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
472     ptl_request->hdr_data = 0;
473 #endif
474     ptl_request->buffer_ptr = (free_after) ? start : NULL;
475     ptl_request->convertor = convertor;
476     ptl_request->delivery_ptr = start;
477     ptl_request->delivery_len = length;
478     ptl_request->req_started = false;
479     ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
480     ptl_request->pending_reply = 0;
481 
482     OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
483                          "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n",
484                          ptl_request->opcount,
485                          remote_proc.phys.nid, remote_proc.phys.pid,
486                          (int64_t)length, match_bits, ignore_bits, (unsigned long) ptl_request));
487 
488     me.start = start;
489     me.length = length;
490     me.ct_handle = PTL_CT_NONE;
491     me.min_free = 0;
492     me.uid = ompi_mtl_portals4.uid;
493     me.options =
494         PTL_ME_OP_PUT |
495         PTL_ME_USE_ONCE |
496         PTL_ME_EVENT_UNLINK_DISABLE;
497     if (length <= ompi_mtl_portals4.short_limit) {
498         me.options |= PTL_ME_EVENT_LINK_DISABLE;
499     }
500     me.match_id = remote_proc;
501     me.match_bits = match_bits;
502     me.ignore_bits = ignore_bits;
503 
504     ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
505                       ompi_mtl_portals4.recv_idx,
506                       &me,
507                       PTL_PRIORITY_LIST,
508                       ptl_request,
509                       &ptl_request->me_h);
510     if (OPAL_UNLIKELY(PTL_OK != ret)) {
511         if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
512         opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
513                             "%s:%d: PtlMEAppend failed: %d",
514                             __FILE__, __LINE__, ret);
515         return ompi_mtl_portals4_get_error(ret);
516     }
517 
518     /* if a long message, spin until we either have a comm event or a
519        link event, guaranteeing progress for long unexpected
520        messages. */
521     if (length > ompi_mtl_portals4.short_limit) {
522         while (true != ptl_request->req_started) {
523             ompi_mtl_portals4_progress();
524         }
525     }
526 
527     return OMPI_SUCCESS;
528 }
529 
530 
531 int
ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t * mtl,struct opal_convertor_t * convertor,struct ompi_message_t ** message,struct mca_mtl_request_t * mtl_request)532 ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl,
533                          struct opal_convertor_t *convertor,
534                          struct ompi_message_t **message,
535                          struct mca_mtl_request_t *mtl_request)
536 {
537     ompi_mtl_portals4_recv_request_t *ptl_request =
538         (ompi_mtl_portals4_recv_request_t*) mtl_request;
539     void *start;
540     size_t length;
541     bool free_after;
542     int ret;
543     ompi_mtl_portals4_message_t *ptl_message =
544         (ompi_mtl_portals4_message_t*) (*message)->req_ptr;
545 
546     ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
547     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
548         return ret;
549     }
550 
551 #if OPAL_ENABLE_DEBUG
552     ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
553     ptl_request->hdr_data = 0;
554 #endif
555     ptl_request->super.type = portals4_req_recv;
556     ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
557     ptl_request->buffer_ptr = (free_after) ? start : NULL;
558     ptl_request->convertor = convertor;
559     ptl_request->delivery_ptr = start;
560     ptl_request->delivery_len = length;
561     ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
562     ptl_request->pending_reply = 0;
563 
564     OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
565                          "Mrecv %lu of length %ld (0x%lx)\n",
566                          ptl_request->opcount,
567                          (int64_t)length, (unsigned long) ptl_request));
568 
569     (*message) = MPI_MESSAGE_NULL;
570 
571     return ompi_mtl_portals4_recv_progress(&(ptl_message->ev), &ptl_request->super);
572 }
573