1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 /*
3 * (C) 2010 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
5 */
6
7 #include "mpiimpl.h"
8
9 /* -- Begin Profiling Symbol Block for routine MPIX_Iscatter */
10 #if defined(HAVE_PRAGMA_WEAK)
11 #pragma weak MPIX_Iscatter = PMPIX_Iscatter
12 #elif defined(HAVE_PRAGMA_HP_SEC_DEF)
13 #pragma _HP_SECONDARY_DEF PMPIX_Iscatter MPIX_Iscatter
14 #elif defined(HAVE_PRAGMA_CRI_DUP)
15 #pragma _CRI duplicate MPIX_Iscatter as PMPIX_Iscatter
16 #endif
17 /* -- End Profiling Symbol Block */
18
19 /* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
20 the MPI routines */
21 #ifndef MPICH_MPI_FROM_PMPI
22 #undef MPIX_Iscatter
23 #define MPIX_Iscatter PMPIX_Iscatter
24
25 /* helper callbacks and associated state structures */
26 struct shared_state {
27 int sendcount;
28 int curr_count;
29 int send_subtree_count;
30 int nbytes;
31 MPI_Status status;
32 };
get_count(MPID_Comm * comm,int tag,void * state)33 static int get_count(MPID_Comm *comm, int tag, void *state)
34 {
35 struct shared_state *ss = state;
36 MPIR_Get_count_impl(&ss->status, MPI_BYTE, &ss->curr_count);
37 return MPI_SUCCESS;
38 }
calc_send_count_root(MPID_Comm * comm,int tag,void * state,void * state2)39 static int calc_send_count_root(MPID_Comm *comm, int tag, void *state, void *state2)
40 {
41 struct shared_state *ss = state;
42 int mask = (int)(size_t)state2;
43 ss->send_subtree_count = ss->curr_count - ss->sendcount * mask;
44 return MPI_SUCCESS;
45 }
calc_send_count_non_root(MPID_Comm * comm,int tag,void * state,void * state2)46 static int calc_send_count_non_root(MPID_Comm *comm, int tag, void *state, void *state2)
47 {
48 struct shared_state *ss = state;
49 int mask = (int)(size_t)state2;
50 ss->send_subtree_count = ss->curr_count - ss->nbytes * mask;
51 return MPI_SUCCESS;
52 }
calc_curr_count(MPID_Comm * comm,int tag,void * state)53 static int calc_curr_count(MPID_Comm *comm, int tag, void *state)
54 {
55 struct shared_state *ss = state;
56 ss->curr_count -= ss->send_subtree_count;
57 return MPI_SUCCESS;
58 }
59
60 /* any non-MPI functions go here, especially non-static ones */
61
62 /* This is the default implementation of scatter. The algorithm is:
63
64 Algorithm: MPI_Scatter
65
66 We use a binomial tree algorithm for both short and
67 long messages. At nodes other than leaf nodes we need to allocate
68 a temporary buffer to store the incoming message. If the root is
69 not rank 0, we reorder the sendbuf in order of relative ranks by
70 copying it into a temporary buffer, so that all the sends from the
71 root are contiguous and in the right order. In the heterogeneous
72 case, we first pack the buffer by using MPI_Pack and then do the
73 scatter.
74
75 Cost = lgp.alpha + n.((p-1)/p).beta
76 where n is the total size of the data to be scattered from the root.
77
78 Possible improvements:
79
80 End Algorithm: MPI_Scatter
81 */
82 #undef FUNCNAME
83 #define FUNCNAME MPIR_Iscatter_intra
84 #undef FCNAME
85 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Iscatter_intra(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,int root,MPID_Comm * comm_ptr,MPID_Sched_t s)86 int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
87 void *recvbuf, int recvcount, MPI_Datatype recvtype,
88 int root, MPID_Comm *comm_ptr, MPID_Sched_t s)
89 {
90 int mpi_errno = MPI_SUCCESS;
91 MPI_Aint extent = 0;
92 int rank, comm_size, is_homogeneous, sendtype_size;
93 int relative_rank;
94 int mask, recvtype_size=0, src, dst;
95 int tmp_buf_size = 0;
96 void *tmp_buf = NULL;
97 struct shared_state *ss = NULL;
98 MPIR_SCHED_CHKPMEM_DECL(4);
99
100 comm_size = comm_ptr->local_size;
101 rank = comm_ptr->rank;
102
103 if (((rank == root) && (sendcount == 0)) || ((rank != root) && (recvcount == 0)))
104 goto fn_exit;
105
106 is_homogeneous = 1;
107 #ifdef MPID_HAS_HETERO
108 if (comm_ptr->is_hetero)
109 is_homogeneous = 0;
110 #endif
111
112 /* Use binomial tree algorithm */
113
114 MPIR_SCHED_CHKPMEM_MALLOC(ss, struct shared_state *, sizeof(struct shared_state), mpi_errno, "shared_state");
115 ss->sendcount = sendcount;
116
117 if (rank == root)
118 MPID_Datatype_get_extent_macro(sendtype, extent);
119
120 relative_rank = (rank >= root) ? rank - root : rank - root + comm_size;
121
122 if (is_homogeneous) {
123 /* communicator is homogeneous */
124 if (rank == root) {
125 /* We separate the two cases (root and non-root) because
126 in the event of recvbuf=MPI_IN_PLACE on the root,
127 recvcount and recvtype are not valid */
128 MPID_Datatype_get_size_macro(sendtype, sendtype_size);
129 MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
130 extent*sendcount*comm_size);
131
132 ss->nbytes = sendtype_size * sendcount;
133 }
134 else {
135 MPID_Datatype_get_size_macro(recvtype, recvtype_size);
136 MPID_Ensure_Aint_fits_in_pointer(extent*recvcount*comm_size);
137 ss->nbytes = recvtype_size * recvcount;
138 }
139
140 ss->curr_count = 0;
141
142 /* all even nodes other than root need a temporary buffer to
143 receive data of max size (ss->nbytes*comm_size)/2 */
144 if (relative_rank && !(relative_rank % 2)) {
145 tmp_buf_size = (ss->nbytes*comm_size)/2;
146 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
147 }
148
149 /* if the root is not rank 0, we reorder the sendbuf in order of
150 relative ranks and copy it into a temporary buffer, so that
151 all the sends from the root are contiguous and in the right
152 order. */
153 if (rank == root) {
154 if (root != 0) {
155 tmp_buf_size = ss->nbytes*comm_size;
156 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
157
158 if (recvbuf != MPI_IN_PLACE)
159 mpi_errno = MPID_Sched_copy(((char *) sendbuf + extent*sendcount*rank),
160 sendcount*(comm_size-rank), sendtype,
161 tmp_buf, ss->nbytes*(comm_size-rank), MPI_BYTE, s);
162 else
163 mpi_errno = MPID_Sched_copy(((char *) sendbuf + extent*sendcount*(rank+1)),
164 sendcount*(comm_size-rank-1), sendtype,
165 ((char *)tmp_buf + ss->nbytes),
166 ss->nbytes*(comm_size-rank-1), MPI_BYTE, s);
167 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
168
169 mpi_errno = MPID_Sched_copy(sendbuf, sendcount*rank, sendtype,
170 ((char *) tmp_buf + ss->nbytes*(comm_size-rank)),
171 ss->nbytes*rank, MPI_BYTE, s);
172 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
173
174 MPID_SCHED_BARRIER(s);
175 ss->curr_count = ss->nbytes*comm_size;
176 }
177 else
178 ss->curr_count = sendcount*comm_size;
179 }
180
181 /* root has all the data; others have zero so far */
182
183 mask = 0x1;
184 while (mask < comm_size) {
185 if (relative_rank & mask) {
186 src = rank - mask;
187 if (src < 0) src += comm_size;
188
189 /* The leaf nodes receive directly into recvbuf because
190 they don't have to forward data to anyone. Others
191 receive data into a temporary buffer. */
192 if (relative_rank % 2) {
193 mpi_errno = MPID_Sched_recv(recvbuf, recvcount, recvtype, src, comm_ptr, s);
194 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
195 MPID_SCHED_BARRIER(s);
196 }
197 else {
198
199 /* the recv size is larger than what may be sent in
200 some cases. query amount of data actually received */
201 mpi_errno = MPID_Sched_recv_status(tmp_buf, tmp_buf_size, MPI_BYTE, src, comm_ptr, &ss->status, s);
202 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
203 MPID_SCHED_BARRIER(s);
204 mpi_errno = MPID_Sched_cb(&get_count, ss, s);
205 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
206 MPID_SCHED_BARRIER(s);
207 }
208 break;
209 }
210 mask <<= 1;
211 }
212
213 /* This process is responsible for all processes that have bits
214 set from the LSB upto (but not including) mask. Because of
215 the "not including", we start by shifting mask back down
216 one. */
217
218 mask >>= 1;
219 while (mask > 0) {
220 if (relative_rank + mask < comm_size) {
221 dst = rank + mask;
222 if (dst >= comm_size) dst -= comm_size;
223
224 if ((rank == root) && (root == 0))
225 {
226 #if 0
227 /* FIXME how can this be right? shouldn't (sendcount*mask)
228 * be the amount sent and curr_cnt be reduced by that? Or
229 * is it always true the (curr_cnt/2==sendcount*mask)? */
230 send_subtree_cnt = curr_cnt - sendcount * mask;
231 #endif
232 mpi_errno = MPID_Sched_cb2(&calc_send_count_root, ss, ((void *)(size_t)mask), s);
233 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
234 MPID_SCHED_BARRIER(s);
235
236 /* mask is also the size of this process's subtree */
237 mpi_errno = MPID_Sched_send_defer(((char *)sendbuf + extent*sendcount*mask),
238 &ss->send_subtree_count, sendtype, dst,
239 comm_ptr, s);
240 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
241 MPID_SCHED_BARRIER(s);
242 }
243 else
244 {
245 /* non-zero root and others */
246 mpi_errno = MPID_Sched_cb2(&calc_send_count_non_root, ss, ((void *)(size_t)mask), s);
247 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
248 MPID_SCHED_BARRIER(s);
249
250 /* mask is also the size of this process's subtree */
251 mpi_errno = MPID_Sched_send_defer(((char *)tmp_buf + ss->nbytes*mask),
252 &ss->send_subtree_count, MPI_BYTE, dst,
253 comm_ptr, s);
254 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
255 MPID_SCHED_BARRIER(s);
256 }
257 mpi_errno = MPID_Sched_cb(&calc_curr_count, ss, s);
258 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
259 MPID_SCHED_BARRIER(s);
260 }
261 mask >>= 1;
262 }
263
264 if ((rank == root) && (root == 0) && (recvbuf != MPI_IN_PLACE)) {
265 /* for root=0, put root's data in recvbuf if not MPI_IN_PLACE */
266 mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
267 recvbuf, recvcount, recvtype, s);
268 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
269 MPID_SCHED_BARRIER(s);
270 }
271 else if (!(relative_rank % 2) && (recvbuf != MPI_IN_PLACE)) {
272 /* for non-zero root and non-leaf nodes, copy from tmp_buf
273 into recvbuf */
274 mpi_errno = MPID_Sched_copy(tmp_buf, ss->nbytes, MPI_BYTE,
275 recvbuf, recvcount, recvtype, s);
276 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
277 MPID_SCHED_BARRIER(s);
278 }
279
280 }
281 #ifdef MPID_HAS_HETERO
282 else { /* communicator is heterogeneous */
283 int position;
284 MPIU_Assertp(FALSE); /* hetero case not yet implemented */
285
286 if (rank == root) {
287 MPIR_Pack_size_impl(sendcount*comm_size, sendtype, &tmp_buf_size);
288
289 MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
290
291 /* calculate the value of nbytes, the number of bytes in packed
292 representation that each process receives. We can't
293 accurately calculate that from tmp_buf_size because
294 MPI_Pack_size returns an upper bound on the amount of memory
295 required. (For example, for a single integer, MPICH-1 returns
296 pack_size=12.) Therefore, we actually pack some data into
297 tmp_buf and see by how much 'position' is incremented. */
298
299 position = 0;
300 mpi_errno = MPIR_Pack_impl(sendbuf, 1, sendtype, tmp_buf, tmp_buf_size, &position);
301 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
302
303 nbytes = position*sendcount;
304
305 curr_cnt = nbytes*comm_size;
306
307 if (root == 0) {
308 if (recvbuf != MPI_IN_PLACE) {
309 position = 0;
310 mpi_errno = MPIR_Pack_impl(sendbuf, sendcount*comm_size, sendtype, tmp_buf,
311 tmp_buf_size, &position);
312 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
313 }
314 else {
315 position = nbytes;
316 mpi_errno = MPIR_Pack_impl(((char *) sendbuf + extent*sendcount),
317 sendcount*(comm_size-1), sendtype, tmp_buf,
318 tmp_buf_size, &position);
319 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
320 }
321 }
322 else {
323 if (recvbuf != MPI_IN_PLACE) {
324 position = 0;
325 mpi_errno = MPIR_Pack_impl(((char *) sendbuf + extent*sendcount*rank),
326 sendcount*(comm_size-rank), sendtype, tmp_buf,
327 tmp_buf_size, &position);
328 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
329 }
330 else {
331 position = nbytes;
332 mpi_errno = MPIR_Pack_impl(((char *) sendbuf + extent*sendcount*(rank+1)),
333 sendcount*(comm_size-rank-1), sendtype, tmp_buf,
334 tmp_buf_size, &position);
335 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
336 }
337 mpi_errno = MPIR_Pack_impl(sendbuf, sendcount*rank, sendtype, tmp_buf,
338 tmp_buf_size, &position);
339 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
340 }
341 }
342 else {
343 MPIR_Pack_size_impl(recvcount*(comm_size/2), recvtype, &tmp_buf_size);
344 MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
345
346 /* calculate nbytes */
347 position = 0;
348 mpi_errno = MPIR_Pack_impl(recvbuf, 1, recvtype, tmp_buf, tmp_buf_size, &position);
349 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
350 nbytes = position*recvcount;
351
352 curr_cnt = 0;
353 }
354
355 mask = 0x1;
356 while (mask < comm_size) {
357 if (relative_rank & mask) {
358 src = rank - mask;
359 if (src < 0) src += comm_size;
360
361 mpi_errno = MPIC_Recv_ft(tmp_buf, tmp_buf_size, MPI_BYTE, src,
362 MPIR_SCATTER_TAG, comm, &status, errflag);
363 if (mpi_errno) {
364 /* for communication errors, just record the error but continue */
365 *errflag = TRUE;
366 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
367 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
368 curr_cnt = 0;
369 } else
370 /* the recv size is larger than what may be sent in
371 some cases. query amount of data actually received */
372 MPIR_Get_count_impl(&status, MPI_BYTE, &curr_cnt);
373 break;
374 }
375 mask <<= 1;
376 }
377
378 /* This process is responsible for all processes that have bits
379 set from the LSB upto (but not including) mask. Because of
380 the "not including", we start by shifting mask back down
381 one. */
382
383 mask >>= 1;
384 while (mask > 0) {
385 if (relative_rank + mask < comm_size) {
386 dst = rank + mask;
387 if (dst >= comm_size) dst -= comm_size;
388
389 send_subtree_cnt = curr_cnt - nbytes * mask;
390 /* mask is also the size of this process's subtree */
391 mpi_errno = MPIC_Send_ft(((char *)tmp_buf + nbytes*mask),
392 send_subtree_cnt, MPI_BYTE, dst,
393 MPIR_SCATTER_TAG, comm, errflag);
394 if (mpi_errno) {
395 /* for communication errors, just record the error but continue */
396 *errflag = TRUE;
397 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
398 MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
399 }
400 curr_cnt -= send_subtree_cnt;
401 }
402 mask >>= 1;
403 }
404
405 /* copy local data into recvbuf */
406 position = 0;
407 if (recvbuf != MPI_IN_PLACE) {
408 mpi_errno = MPIR_Unpack_impl(tmp_buf, tmp_buf_size, &position, recvbuf,
409 recvcount, recvtype);
410 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
411 }
412 }
413 #endif /* MPID_HAS_HETERO */
414
415
416 MPIR_SCHED_CHKPMEM_COMMIT(s);
417 fn_exit:
418 return mpi_errno;
419 fn_fail:
420 MPIR_SCHED_CHKPMEM_REAP(s);
421 goto fn_exit;
422 }
423
424 #undef FUNCNAME
425 #define FUNCNAME MPIR_Iscatter_inter
426 #undef FCNAME
427 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Iscatter_inter(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,int root,MPID_Comm * comm_ptr,MPID_Sched_t s)428 int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
429 void *recvbuf, int recvcount, MPI_Datatype recvtype,
430 int root, MPID_Comm *comm_ptr, MPID_Sched_t s)
431 {
432 /* Intercommunicator scatter.
433 For short messages, root sends to rank 0 in remote group. rank 0
434 does local intracommunicator scatter (binomial tree).
435 Cost: (lgp+1).alpha + n.((p-1)/p).beta + n.beta
436
437 For long messages, we use linear scatter to avoid the extra n.beta.
438 Cost: p.alpha + n.beta
439 */
440 int mpi_errno = MPI_SUCCESS;
441 int rank, local_size, remote_size;
442 int i, nbytes, sendtype_size, recvtype_size;
443 MPI_Aint extent, true_extent, true_lb = 0;
444 void *tmp_buf = NULL;
445 MPID_Comm *newcomm_ptr = NULL;
446 MPIR_SCHED_CHKPMEM_DECL(1);
447
448 if (root == MPI_PROC_NULL) {
449 /* local processes other than root do nothing */
450 goto fn_exit;
451 }
452
453 remote_size = comm_ptr->remote_size;
454 local_size = comm_ptr->local_size;
455
456 if (root == MPI_ROOT) {
457 MPID_Datatype_get_size_macro(sendtype, sendtype_size);
458 nbytes = sendtype_size * sendcount * remote_size;
459 }
460 else {
461 /* remote side */
462 MPID_Datatype_get_size_macro(recvtype, recvtype_size);
463 nbytes = recvtype_size * recvcount * local_size;
464 }
465
466 if (nbytes < MPIR_PARAM_SCATTER_INTER_SHORT_MSG_SIZE) {
467 if (root == MPI_ROOT) {
468 /* root sends all data to rank 0 on remote group and returns */
469 mpi_errno = MPID_Sched_send(sendbuf, sendcount*remote_size, sendtype, 0, comm_ptr, s);
470 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
471 MPID_SCHED_BARRIER(s);
472 goto fn_exit;
473 }
474 else {
475 /* remote group. rank 0 receives data from root. need to
476 allocate temporary buffer to store this data. */
477 rank = comm_ptr->rank;
478
479 if (rank == 0) {
480 MPIR_Type_get_true_extent_impl(recvtype, &true_lb, &true_extent);
481
482 MPID_Datatype_get_extent_macro(recvtype, extent);
483 MPID_Ensure_Aint_fits_in_pointer(extent*recvcount*local_size);
484 MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
485 sendcount*remote_size*extent);
486
487 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, recvcount*local_size*(MPIR_MAX(extent,true_extent)),
488 mpi_errno, "tmp_buf");
489
490 /* adjust for potential negative lower bound in datatype */
491 tmp_buf = (void *)((char*)tmp_buf - true_lb);
492
493 mpi_errno = MPID_Sched_recv(tmp_buf, recvcount*local_size, recvtype, root, comm_ptr, s);
494 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
495 MPID_SCHED_BARRIER(s);
496 }
497
498 /* Get the local intracommunicator */
499 if (!comm_ptr->local_comm)
500 MPIR_Setup_intercomm_localcomm(comm_ptr);
501
502 newcomm_ptr = comm_ptr->local_comm;
503
504 /* now do the usual scatter on this intracommunicator */
505 MPIU_Assert(newcomm_ptr->coll_fns != NULL);
506 MPIU_Assert(newcomm_ptr->coll_fns->Iscatter != NULL);
507 mpi_errno = newcomm_ptr->coll_fns->Iscatter(tmp_buf, recvcount, recvtype,
508 recvbuf, recvcount, recvtype,
509 0, newcomm_ptr, s);
510 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
511 MPID_SCHED_BARRIER(s);
512 }
513 }
514 else {
515 /* long message. use linear algorithm. */
516 if (root == MPI_ROOT) {
517 MPID_Datatype_get_extent_macro(sendtype, extent);
518 for (i = 0; i < remote_size; i++) {
519 mpi_errno = MPID_Sched_send(((char *)sendbuf+sendcount*i*extent),
520 sendcount, sendtype, i, comm_ptr, s);
521 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
522 }
523 MPID_SCHED_BARRIER(s);
524 }
525 else {
526 mpi_errno = MPID_Sched_recv(recvbuf, recvcount, recvtype, root, comm_ptr, s);
527 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
528 MPID_SCHED_BARRIER(s);
529 }
530 }
531
532
533 MPIR_SCHED_CHKPMEM_COMMIT(s);
534 fn_exit:
535 return mpi_errno;
536 fn_fail:
537 MPIR_SCHED_CHKPMEM_REAP(s);
538 goto fn_exit;
539 }
540
541 #undef FUNCNAME
542 #define FUNCNAME MPIR_Iscatter_impl
543 #undef FCNAME
544 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Iscatter_impl(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,int root,MPID_Comm * comm_ptr,MPI_Request * request)545 int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPI_Request *request)
546 {
547 int mpi_errno = MPI_SUCCESS;
548 int tag = -1;
549 MPID_Request *reqp = NULL;
550 MPID_Sched_t s = MPID_SCHED_NULL;
551
552 *request = MPI_REQUEST_NULL;
553
554 mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
555 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
556 mpi_errno = MPID_Sched_create(&s);
557 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
558
559 MPIU_Assert(comm_ptr->coll_fns != NULL);
560 MPIU_Assert(comm_ptr->coll_fns->Iscatter != NULL);
561 mpi_errno = comm_ptr->coll_fns->Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
562 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
563
564 mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
565 if (reqp)
566 *request = reqp->handle;
567 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
568
569 fn_exit:
570 return mpi_errno;
571 fn_fail:
572 goto fn_exit;
573 }
574
575 #endif /* MPICH_MPI_FROM_PMPI */
576
577 #undef FUNCNAME
578 #define FUNCNAME MPIX_Iscatter
579 #undef FCNAME
580 #define FCNAME MPIU_QUOTE(FUNCNAME)
581 /*@
582 MPIX_Iscatter - XXX description here
583
584 Input Parameters:
585 + sendbuf - address of send buffer (significant only at root) (choice)
586 . sendcount - number of elements sent to each process (significant only at root) (non-negative integer)
587 . sendtype - data type of send buffer elements (significant only at root) (handle)
588 . recvcount - number of elements in receive buffer (non-negative integer)
589 . recvtype - data type of receive buffer elements (handle)
590 . root - rank of sending process (integer)
591 - comm - communicator (handle)
592
593 Output Parameters:
594 + recvbuf - starting address of the receive buffer (choice)
595 - request - communication request (handle)
596
597 .N ThreadSafe
598
599 .N Fortran
600
601 .N Errors
602 @*/
MPIX_Iscatter(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,int root,MPI_Comm comm,MPI_Request * request)603 int MPIX_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)
604 {
605 int mpi_errno = MPI_SUCCESS;
606 MPID_Comm *comm_ptr = NULL;
607 MPID_Datatype *sendtype_ptr, *recvtype_ptr;
608 MPID_MPI_STATE_DECL(MPID_STATE_MPIX_ISCATTER);
609
610 MPIU_THREAD_CS_ENTER(ALLFUNC,);
611 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIX_ISCATTER);
612
613 /* Validate parameters, especially handles needing to be converted */
614 # ifdef HAVE_ERROR_CHECKING
615 {
616 MPID_BEGIN_ERROR_CHECKS
617 {
618 MPIR_ERRTEST_COMM(comm, mpi_errno);
619
620 /* TODO more checks may be appropriate */
621 }
622 MPID_END_ERROR_CHECKS
623 }
624 # endif /* HAVE_ERROR_CHECKING */
625
626 /* Convert MPI object handles to object pointers */
627 MPID_Comm_get_ptr(comm, comm_ptr);
628
629 /* Validate parameters and objects (post conversion) */
630 # ifdef HAVE_ERROR_CHECKING
631 {
632 MPID_BEGIN_ERROR_CHECKS
633 {
634 MPID_Comm_valid_ptr(comm_ptr, mpi_errno);
635 if (comm_ptr->comm_kind == MPID_INTRACOMM) {
636 MPIR_ERRTEST_INTRA_ROOT(comm_ptr, root, mpi_errno);
637
638 if (comm_ptr->rank == root) {
639 MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
640 MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
641 if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
642 MPID_Datatype_get_ptr(sendtype, sendtype_ptr);
643 MPID_Datatype_valid_ptr(sendtype_ptr, mpi_errno);
644 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
645 MPID_Datatype_committed_ptr(sendtype_ptr, mpi_errno);
646 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
647 }
648 MPIR_ERRTEST_USERBUFFER(sendbuf,sendcount,sendtype,mpi_errno);
649 MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
650
651 /* catch common aliasing cases */
652 if (recvbuf != MPI_IN_PLACE && sendtype == recvtype && sendcount == recvcount && recvcount != 0)
653 MPIR_ERRTEST_ALIAS_COLL(sendbuf,recvbuf,mpi_errno);
654 }
655 else
656 MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcount, mpi_errno);
657
658 if (recvbuf != MPI_IN_PLACE) {
659 MPIR_ERRTEST_COUNT(recvcount, mpi_errno);
660 MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
661 if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
662 MPID_Datatype_get_ptr(recvtype, recvtype_ptr);
663 MPID_Datatype_valid_ptr(recvtype_ptr, mpi_errno);
664 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
665 MPID_Datatype_committed_ptr(recvtype_ptr, mpi_errno);
666 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
667 }
668 MPIR_ERRTEST_USERBUFFER(recvbuf,recvcount,recvtype,mpi_errno);
669 }
670 }
671
672 if (comm_ptr->comm_kind == MPID_INTERCOMM) {
673 MPIR_ERRTEST_INTER_ROOT(comm_ptr, root, mpi_errno);
674
675 if (root == MPI_ROOT) {
676 MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
677 MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
678 if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
679 MPID_Datatype_get_ptr(sendtype, sendtype_ptr);
680 MPID_Datatype_valid_ptr(sendtype_ptr, mpi_errno);
681 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
682 MPID_Datatype_committed_ptr(sendtype_ptr, mpi_errno);
683 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
684 }
685 MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
686 MPIR_ERRTEST_USERBUFFER(sendbuf,sendcount,sendtype,mpi_errno);
687 }
688 else if (root != MPI_PROC_NULL) {
689 MPIR_ERRTEST_COUNT(recvcount, mpi_errno);
690 MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
691 if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
692 MPID_Datatype_get_ptr(recvtype, recvtype_ptr);
693 MPID_Datatype_valid_ptr(recvtype_ptr, mpi_errno);
694 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
695 MPID_Datatype_committed_ptr(recvtype_ptr, mpi_errno);
696 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
697 }
698 MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcount, mpi_errno);
699 MPIR_ERRTEST_USERBUFFER(recvbuf,recvcount,recvtype,mpi_errno);
700 }
701 }
702 }
703 MPID_END_ERROR_CHECKS
704 }
705 # endif /* HAVE_ERROR_CHECKING */
706
707 /* ... body of routine ... */
708
709 mpi_errno = MPIR_Iscatter_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, request);
710 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
711
712 /* ... end of body of routine ... */
713
714 fn_exit:
715 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIX_ISCATTER);
716 MPIU_THREAD_CS_EXIT(ALLFUNC,);
717 return mpi_errno;
718
719 fn_fail:
720 /* --BEGIN ERROR HANDLING-- */
721 # ifdef HAVE_ERROR_CHECKING
722 {
723 mpi_errno = MPIR_Err_create_code(
724 mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
725 "**mpix_iscatter", "**mpix_iscatter %p %d %D %p %d %D %d %C %p", sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request);
726 }
727 # endif
728 mpi_errno = MPIR_Err_return_comm(comm_ptr, FCNAME, mpi_errno);
729 goto fn_exit;
730 /* --END ERROR HANDLING-- */
731 goto fn_exit;
732 }
733