1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2015 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
14 * reserved.
15 * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
16 * Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
17 * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
18 * Copyright (c) 2016-2017 Research Organization for Information Science
19 * and Technology (RIST). All rights reserved.
20 * $COPYRIGHT$
21 *
22 * Additional copyrights may follow
23 *
24 * $HEADER$
25 */
26
27 #include "ompi_config.h"
28 #include "ompi/request/request.h"
29 #include "pml_ob1_recvreq.h"
30 #include "pml_ob1_recvfrag.h"
31 #include "ompi/peruse/peruse-internal.h"
32 #include "ompi/message/message.h"
33 #include "ompi/memchecker.h"
34
35 /**
36 * Single usage request. As we allow recursive calls to recv
37 * (from the request completion callback), we cannot rely on
38 * using a global request. Thus, once a recv acquires ownership
39 * this global request, it should set it to NULL to prevent
40 * the reuse until the first user completes.
41 */
42 mca_pml_ob1_recv_request_t *mca_pml_ob1_recvreq = NULL;
43
mca_pml_ob1_irecv_init(void * addr,size_t count,ompi_datatype_t * datatype,int src,int tag,struct ompi_communicator_t * comm,struct ompi_request_t ** request)44 int mca_pml_ob1_irecv_init(void *addr,
45 size_t count,
46 ompi_datatype_t * datatype,
47 int src,
48 int tag,
49 struct ompi_communicator_t *comm,
50 struct ompi_request_t **request)
51 {
52 mca_pml_ob1_recv_request_t *recvreq;
53 MCA_PML_OB1_RECV_REQUEST_ALLOC(recvreq);
54 if (NULL == recvreq)
55 return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
56
57 recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV;
58 MCA_PML_OB1_RECV_REQUEST_INIT(recvreq,
59 addr,
60 count, datatype, src, tag, comm, true);
61
62 PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
63 &((recvreq)->req_recv.req_base),
64 PERUSE_RECV);
65
66 /* Work around a leak in start by marking this request as complete. The
67 * problem occured because we do not have a way to differentiate an
68 * inital request and an incomplete pml request in start. This line
69 * allows us to detect this state. */
70 recvreq->req_recv.req_base.req_pml_complete = true;
71
72 *request = (ompi_request_t *) recvreq;
73 return OMPI_SUCCESS;
74 }
75
mca_pml_ob1_irecv(void * addr,size_t count,ompi_datatype_t * datatype,int src,int tag,struct ompi_communicator_t * comm,struct ompi_request_t ** request)76 int mca_pml_ob1_irecv(void *addr,
77 size_t count,
78 ompi_datatype_t * datatype,
79 int src,
80 int tag,
81 struct ompi_communicator_t *comm,
82 struct ompi_request_t **request)
83 {
84 mca_pml_ob1_recv_request_t *recvreq;
85 MCA_PML_OB1_RECV_REQUEST_ALLOC(recvreq);
86 if (NULL == recvreq)
87 return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
88
89 recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV;
90 MCA_PML_OB1_RECV_REQUEST_INIT(recvreq,
91 addr,
92 count, datatype, src, tag, comm, false);
93
94 PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
95 &((recvreq)->req_recv.req_base),
96 PERUSE_RECV);
97
98 MCA_PML_OB1_RECV_REQUEST_START(recvreq);
99 *request = (ompi_request_t *) recvreq;
100 return OMPI_SUCCESS;
101 }
102
103
mca_pml_ob1_recv(void * addr,size_t count,ompi_datatype_t * datatype,int src,int tag,struct ompi_communicator_t * comm,ompi_status_public_t * status)104 int mca_pml_ob1_recv(void *addr,
105 size_t count,
106 ompi_datatype_t * datatype,
107 int src,
108 int tag,
109 struct ompi_communicator_t *comm,
110 ompi_status_public_t * status)
111 {
112 mca_pml_ob1_recv_request_t *recvreq = NULL;
113 int rc;
114
115 if (OPAL_LIKELY(!ompi_mpi_thread_multiple)) {
116 recvreq = mca_pml_ob1_recvreq;
117 mca_pml_ob1_recvreq = NULL;
118 }
119
120 if( OPAL_UNLIKELY(NULL == recvreq) ) {
121 MCA_PML_OB1_RECV_REQUEST_ALLOC(recvreq);
122 if (NULL == recvreq)
123 return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
124 }
125
126 recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV;
127 MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, addr, count, datatype,
128 src, tag, comm, false);
129
130 PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
131 &(recvreq->req_recv.req_base),
132 PERUSE_RECV);
133
134 MCA_PML_OB1_RECV_REQUEST_START(recvreq);
135 ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi);
136
137 if( true == recvreq->req_recv.req_base.req_pml_complete ) {
138 /* make buffer defined when the request is compeleted */
139 MEMCHECKER(
140 memchecker_call(&opal_memchecker_base_mem_defined,
141 recvreq->req_recv.req_base.req_addr,
142 recvreq->req_recv.req_base.req_count,
143 recvreq->req_recv.req_base.req_datatype);
144 );
145 }
146
147 if (NULL != status) { /* return status */
148 *status = recvreq->req_recv.req_base.req_ompi.req_status;
149 }
150
151 rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
152
153 if (recvreq->req_recv.req_base.req_pml_complete) {
154 /* make buffer defined when the request is compeleted,
155 and before releasing the objects. */
156 MEMCHECKER(
157 memchecker_call(&opal_memchecker_base_mem_defined,
158 recvreq->req_recv.req_base.req_addr,
159 recvreq->req_recv.req_base.req_count,
160 recvreq->req_recv.req_base.req_datatype);
161 );
162 }
163
164 if (OPAL_UNLIKELY(ompi_mpi_thread_multiple || NULL != mca_pml_ob1_recvreq)) {
165 MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq);
166 } else {
167 mca_pml_ob1_recv_request_fini (recvreq);
168 mca_pml_ob1_recvreq = recvreq;
169 }
170
171 return rc;
172 }
173
174
175 int
mca_pml_ob1_imrecv(void * buf,size_t count,ompi_datatype_t * datatype,struct ompi_message_t ** message,struct ompi_request_t ** request)176 mca_pml_ob1_imrecv( void *buf,
177 size_t count,
178 ompi_datatype_t *datatype,
179 struct ompi_message_t **message,
180 struct ompi_request_t **request )
181 {
182 mca_pml_ob1_recv_frag_t* frag;
183 mca_pml_ob1_recv_request_t *recvreq;
184 mca_pml_ob1_hdr_t *hdr;
185 int src, tag;
186 ompi_communicator_t *comm;
187 mca_pml_ob1_comm_proc_t* proc;
188 uint64_t seq;
189
190 /* get the request from the message and the frag from the request
191 before we overwrite everything */
192 recvreq = (mca_pml_ob1_recv_request_t*) (*message)->req_ptr;
193 frag = (mca_pml_ob1_recv_frag_t*) recvreq->req_recv.req_base.req_addr;
194 src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
195 tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
196 comm = (*message)->comm;
197 seq = recvreq->req_recv.req_base.req_sequence;
198
199 /* make the request a recv request again */
200 /* The old request kept pointers to comm and the char datatype.
201 We're about to release those, but need to make sure comm
202 doesn't go out of scope (we don't care about the char datatype
203 anymore). So retain comm, then release the frag, then reinit
204 the frag (which will retain comm), then release comm (but the
205 frag still has it's ref, so it'll stay in scope). Make
206 sense? */
207 OBJ_RETAIN(comm);
208 MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv);
209 recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV;
210 MCA_PML_OB1_RECV_REQUEST_INIT(recvreq,
211 buf,
212 count, datatype,
213 src, tag, comm, false);
214 OBJ_RELEASE(comm);
215
216 PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
217 &((recvreq)->req_recv.req_base),
218 PERUSE_RECV);
219
220 /* init/re-init the request */
221 recvreq->req_lock = 0;
222 recvreq->req_pipeline_depth = 0;
223 recvreq->req_bytes_received = 0;
224 /* What about req_rdma_cnt ? */
225 recvreq->req_rdma_idx = 0;
226 recvreq->req_pending = false;
227 recvreq->req_ack_sent = false;
228
229 MCA_PML_BASE_RECV_START(&recvreq->req_recv);
230
231 /* Note - sequence number already assigned */
232 recvreq->req_recv.req_base.req_sequence = seq;
233
234 proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer);
235 recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
236 prepare_recv_req_converter(recvreq);
237
238 /* we can't go through the match, since we already have the match.
239 Cheat and do what REQUEST_START does, but without the frag
240 search */
241 hdr = (mca_pml_ob1_hdr_t*)frag->segments->seg_addr.pval;
242 switch(hdr->hdr_common.hdr_type) {
243 case MCA_PML_OB1_HDR_TYPE_MATCH:
244 mca_pml_ob1_recv_request_progress_match(recvreq, frag->btl, frag->segments,
245 frag->num_segments);
246 break;
247 case MCA_PML_OB1_HDR_TYPE_RNDV:
248 mca_pml_ob1_recv_request_progress_rndv(recvreq, frag->btl, frag->segments,
249 frag->num_segments);
250 break;
251 case MCA_PML_OB1_HDR_TYPE_RGET:
252 mca_pml_ob1_recv_request_progress_rget(recvreq, frag->btl, frag->segments,
253 frag->num_segments);
254 break;
255 default:
256 assert(0);
257 }
258 MCA_PML_OB1_RECV_FRAG_RETURN(frag);
259
260 ompi_message_return(*message);
261 *message = MPI_MESSAGE_NULL;
262 *request = (ompi_request_t *) recvreq;
263
264 return OMPI_SUCCESS;
265 }
266
267
268 int
mca_pml_ob1_mrecv(void * buf,size_t count,ompi_datatype_t * datatype,struct ompi_message_t ** message,ompi_status_public_t * status)269 mca_pml_ob1_mrecv( void *buf,
270 size_t count,
271 ompi_datatype_t *datatype,
272 struct ompi_message_t **message,
273 ompi_status_public_t* status )
274 {
275 mca_pml_ob1_recv_frag_t* frag;
276 mca_pml_ob1_recv_request_t *recvreq;
277 mca_pml_ob1_hdr_t *hdr;
278 int src, tag, rc;
279 ompi_communicator_t *comm;
280 mca_pml_ob1_comm_proc_t* proc;
281 uint64_t seq;
282
283 /* get the request from the message and the frag from the request
284 before we overwrite everything */
285 comm = (*message)->comm;
286 recvreq = (mca_pml_ob1_recv_request_t*) (*message)->req_ptr;
287 frag = (mca_pml_ob1_recv_frag_t*) recvreq->req_recv.req_base.req_addr;
288 src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
289 tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
290 seq = recvreq->req_recv.req_base.req_sequence;
291
292 /* make the request a recv request again */
293 /* The old request kept pointers to comm and the char datatype.
294 We're about to release those, but need to make sure comm
295 doesn't go out of scope (we don't care about the char datatype
296 anymore). So retain comm, then release the frag, then reinit
297 the frag (which will retain comm), then release comm (but the
298 frag still has it's ref, so it'll stay in scope). Make
299 sense? */
300 OBJ_RETAIN(comm);
301 MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv);
302 recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV;
303 MCA_PML_OB1_RECV_REQUEST_INIT(recvreq,
304 buf,
305 count, datatype,
306 src, tag, comm, false);
307 OBJ_RELEASE(comm);
308
309 PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
310 &((recvreq)->req_recv.req_base),
311 PERUSE_RECV);
312
313 /* init/re-init the request */
314 recvreq->req_lock = 0;
315 recvreq->req_pipeline_depth = 0;
316 recvreq->req_bytes_received = 0;
317 recvreq->req_rdma_cnt = 0;
318 recvreq->req_rdma_idx = 0;
319 recvreq->req_pending = false;
320
321 MCA_PML_BASE_RECV_START(&recvreq->req_recv);
322
323 /* Note - sequence number already assigned */
324 recvreq->req_recv.req_base.req_sequence = seq;
325
326 proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer);
327 recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
328 prepare_recv_req_converter(recvreq);
329
330 /* we can't go through the match, since we already have the match.
331 Cheat and do what REQUEST_START does, but without the frag
332 search */
333 hdr = (mca_pml_ob1_hdr_t*)frag->segments->seg_addr.pval;
334 switch(hdr->hdr_common.hdr_type) {
335 case MCA_PML_OB1_HDR_TYPE_MATCH:
336 mca_pml_ob1_recv_request_progress_match(recvreq, frag->btl, frag->segments,
337 frag->num_segments);
338 break;
339 case MCA_PML_OB1_HDR_TYPE_RNDV:
340 mca_pml_ob1_recv_request_progress_rndv(recvreq, frag->btl, frag->segments,
341 frag->num_segments);
342 break;
343 case MCA_PML_OB1_HDR_TYPE_RGET:
344 mca_pml_ob1_recv_request_progress_rget(recvreq, frag->btl, frag->segments,
345 frag->num_segments);
346 break;
347 default:
348 assert(0);
349 }
350
351 ompi_message_return(*message);
352 *message = MPI_MESSAGE_NULL;
353 ompi_request_wait_completion(&(recvreq->req_recv.req_base.req_ompi));
354
355 MCA_PML_OB1_RECV_FRAG_RETURN(frag);
356
357 if (NULL != status) { /* return status */
358 *status = recvreq->req_recv.req_base.req_ompi.req_status;
359 }
360 rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
361 ompi_request_free( (ompi_request_t**)&recvreq );
362 return rc;
363 }
364
365