1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 /*
3  *  (C) 2010 by Argonne National Laboratory.
4  *      See COPYRIGHT in top-level directory.
5  */
6 
7 #include "mpiimpl.h"
8 
9 /* -- Begin Profiling Symbol Block for routine MPIX_Iscatter */
10 #if defined(HAVE_PRAGMA_WEAK)
11 #pragma weak MPIX_Iscatter = PMPIX_Iscatter
12 #elif defined(HAVE_PRAGMA_HP_SEC_DEF)
13 #pragma _HP_SECONDARY_DEF PMPIX_Iscatter  MPIX_Iscatter
14 #elif defined(HAVE_PRAGMA_CRI_DUP)
15 #pragma _CRI duplicate MPIX_Iscatter as PMPIX_Iscatter
16 #endif
17 /* -- End Profiling Symbol Block */
18 
19 /* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
20    the MPI routines */
21 #ifndef MPICH_MPI_FROM_PMPI
22 #undef MPIX_Iscatter
23 #define MPIX_Iscatter PMPIX_Iscatter
24 
25 /* helper callbacks and associated state structures */
26 struct shared_state {
27     int sendcount;
28     int curr_count;
29     int send_subtree_count;
30     int nbytes;
31     MPI_Status status;
32 };
get_count(MPID_Comm * comm,int tag,void * state)33 static int get_count(MPID_Comm *comm, int tag, void *state)
34 {
35     struct shared_state *ss = state;
36     MPIR_Get_count_impl(&ss->status, MPI_BYTE, &ss->curr_count);
37     return MPI_SUCCESS;
38 }
calc_send_count_root(MPID_Comm * comm,int tag,void * state,void * state2)39 static int calc_send_count_root(MPID_Comm *comm, int tag, void *state, void *state2)
40 {
41     struct shared_state *ss = state;
42     int mask = (int)(size_t)state2;
43     ss->send_subtree_count = ss->curr_count - ss->sendcount * mask;
44     return MPI_SUCCESS;
45 }
calc_send_count_non_root(MPID_Comm * comm,int tag,void * state,void * state2)46 static int calc_send_count_non_root(MPID_Comm *comm, int tag, void *state, void *state2)
47 {
48     struct shared_state *ss = state;
49     int mask = (int)(size_t)state2;
50     ss->send_subtree_count = ss->curr_count - ss->nbytes * mask;
51     return MPI_SUCCESS;
52 }
calc_curr_count(MPID_Comm * comm,int tag,void * state)53 static int calc_curr_count(MPID_Comm *comm, int tag, void *state)
54 {
55     struct shared_state *ss = state;
56     ss->curr_count -= ss->send_subtree_count;
57     return MPI_SUCCESS;
58 }
59 
60 /* any non-MPI functions go here, especially non-static ones */
61 
62 /* This is the default implementation of scatter. The algorithm is:
63 
64    Algorithm: MPI_Scatter
65 
66    We use a binomial tree algorithm for both short and
67    long messages. At nodes other than leaf nodes we need to allocate
68    a temporary buffer to store the incoming message. If the root is
69    not rank 0, we reorder the sendbuf in order of relative ranks by
70    copying it into a temporary buffer, so that all the sends from the
71    root are contiguous and in the right order. In the heterogeneous
72    case, we first pack the buffer by using MPI_Pack and then do the
73    scatter.
74 
75    Cost = lgp.alpha + n.((p-1)/p).beta
76    where n is the total size of the data to be scattered from the root.
77 
78    Possible improvements:
79 
80    End Algorithm: MPI_Scatter
81 */
82 #undef FUNCNAME
83 #define FUNCNAME MPIR_Iscatter_intra
84 #undef FCNAME
85 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Iscatter_intra(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,int root,MPID_Comm * comm_ptr,MPID_Sched_t s)86 int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
87                         void *recvbuf, int recvcount, MPI_Datatype recvtype,
88                         int root, MPID_Comm *comm_ptr, MPID_Sched_t s)
89 {
90     int mpi_errno = MPI_SUCCESS;
91     MPI_Aint extent = 0;
92     int rank, comm_size, is_homogeneous, sendtype_size;
93     int relative_rank;
94     int mask, recvtype_size=0, src, dst;
95     int tmp_buf_size = 0;
96     void *tmp_buf = NULL;
97     struct shared_state *ss = NULL;
98     MPIR_SCHED_CHKPMEM_DECL(4);
99 
100     comm_size = comm_ptr->local_size;
101     rank = comm_ptr->rank;
102 
103     if (((rank == root) && (sendcount == 0)) || ((rank != root) && (recvcount == 0)))
104         goto fn_exit;
105 
106     is_homogeneous = 1;
107 #ifdef MPID_HAS_HETERO
108     if (comm_ptr->is_hetero)
109         is_homogeneous = 0;
110 #endif
111 
112 /* Use binomial tree algorithm */
113 
114     MPIR_SCHED_CHKPMEM_MALLOC(ss, struct shared_state *, sizeof(struct shared_state), mpi_errno, "shared_state");
115     ss->sendcount = sendcount;
116 
117     if (rank == root)
118         MPID_Datatype_get_extent_macro(sendtype, extent);
119 
120     relative_rank = (rank >= root) ? rank - root : rank - root + comm_size;
121 
122     if (is_homogeneous) {
123         /* communicator is homogeneous */
124         if (rank == root) {
125             /* We separate the two cases (root and non-root) because
126                in the event of recvbuf=MPI_IN_PLACE on the root,
127                recvcount and recvtype are not valid */
128             MPID_Datatype_get_size_macro(sendtype, sendtype_size);
129             MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
130                                              extent*sendcount*comm_size);
131 
132             ss->nbytes = sendtype_size * sendcount;
133         }
134         else {
135             MPID_Datatype_get_size_macro(recvtype, recvtype_size);
136             MPID_Ensure_Aint_fits_in_pointer(extent*recvcount*comm_size);
137             ss->nbytes = recvtype_size * recvcount;
138         }
139 
140         ss->curr_count = 0;
141 
142         /* all even nodes other than root need a temporary buffer to
143            receive data of max size (ss->nbytes*comm_size)/2 */
144         if (relative_rank && !(relative_rank % 2)) {
145             tmp_buf_size = (ss->nbytes*comm_size)/2;
146             MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
147         }
148 
149         /* if the root is not rank 0, we reorder the sendbuf in order of
150            relative ranks and copy it into a temporary buffer, so that
151            all the sends from the root are contiguous and in the right
152            order. */
153         if (rank == root) {
154             if (root != 0) {
155                 tmp_buf_size = ss->nbytes*comm_size;
156                 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
157 
158                 if (recvbuf != MPI_IN_PLACE)
159                     mpi_errno = MPID_Sched_copy(((char *) sendbuf + extent*sendcount*rank),
160                                                 sendcount*(comm_size-rank), sendtype,
161                                                 tmp_buf, ss->nbytes*(comm_size-rank), MPI_BYTE, s);
162                 else
163                     mpi_errno = MPID_Sched_copy(((char *) sendbuf + extent*sendcount*(rank+1)),
164                                                 sendcount*(comm_size-rank-1), sendtype,
165                                                 ((char *)tmp_buf + ss->nbytes),
166                                                 ss->nbytes*(comm_size-rank-1), MPI_BYTE, s);
167                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
168 
169                 mpi_errno = MPID_Sched_copy(sendbuf, sendcount*rank, sendtype,
170                                             ((char *) tmp_buf + ss->nbytes*(comm_size-rank)),
171                                             ss->nbytes*rank, MPI_BYTE, s);
172                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
173 
174                 MPID_SCHED_BARRIER(s);
175                 ss->curr_count = ss->nbytes*comm_size;
176             }
177             else
178                 ss->curr_count = sendcount*comm_size;
179         }
180 
181         /* root has all the data; others have zero so far */
182 
183         mask = 0x1;
184         while (mask < comm_size) {
185             if (relative_rank & mask) {
186                 src = rank - mask;
187                 if (src < 0) src += comm_size;
188 
189                 /* The leaf nodes receive directly into recvbuf because
190                    they don't have to forward data to anyone. Others
191                    receive data into a temporary buffer. */
192                 if (relative_rank % 2) {
193                     mpi_errno = MPID_Sched_recv(recvbuf, recvcount, recvtype, src, comm_ptr, s);
194                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
195                     MPID_SCHED_BARRIER(s);
196                 }
197                 else {
198 
199                     /* the recv size is larger than what may be sent in
200                        some cases. query amount of data actually received */
201                     mpi_errno = MPID_Sched_recv_status(tmp_buf, tmp_buf_size, MPI_BYTE, src, comm_ptr, &ss->status, s);
202                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
203                     MPID_SCHED_BARRIER(s);
204                     mpi_errno = MPID_Sched_cb(&get_count, ss, s);
205                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
206                     MPID_SCHED_BARRIER(s);
207                 }
208                 break;
209             }
210             mask <<= 1;
211         }
212 
213         /* This process is responsible for all processes that have bits
214            set from the LSB upto (but not including) mask.  Because of
215            the "not including", we start by shifting mask back down
216            one. */
217 
218         mask >>= 1;
219         while (mask > 0) {
220             if (relative_rank + mask < comm_size) {
221                 dst = rank + mask;
222                 if (dst >= comm_size) dst -= comm_size;
223 
224                 if ((rank == root) && (root == 0))
225                 {
226 #if 0
227                     /* FIXME how can this be right? shouldn't (sendcount*mask)
228                      * be the amount sent and curr_cnt be reduced by that?  Or
229                      * is it always true the (curr_cnt/2==sendcount*mask)? */
230                     send_subtree_cnt = curr_cnt - sendcount * mask;
231 #endif
232                     mpi_errno = MPID_Sched_cb2(&calc_send_count_root, ss, ((void *)(size_t)mask), s);
233                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
234                     MPID_SCHED_BARRIER(s);
235 
236                     /* mask is also the size of this process's subtree */
237                     mpi_errno = MPID_Sched_send_defer(((char *)sendbuf + extent*sendcount*mask),
238                                                       &ss->send_subtree_count, sendtype, dst,
239                                                       comm_ptr, s);
240                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
241                     MPID_SCHED_BARRIER(s);
242                 }
243                 else
244                 {
245                     /* non-zero root and others */
246                     mpi_errno = MPID_Sched_cb2(&calc_send_count_non_root, ss, ((void *)(size_t)mask), s);
247                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
248                     MPID_SCHED_BARRIER(s);
249 
250                     /* mask is also the size of this process's subtree */
251                     mpi_errno = MPID_Sched_send_defer(((char *)tmp_buf + ss->nbytes*mask),
252                                                       &ss->send_subtree_count, MPI_BYTE, dst,
253                                                       comm_ptr, s);
254                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
255                     MPID_SCHED_BARRIER(s);
256                 }
257                 mpi_errno = MPID_Sched_cb(&calc_curr_count, ss, s);
258                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
259                 MPID_SCHED_BARRIER(s);
260             }
261             mask >>= 1;
262         }
263 
264         if ((rank == root) && (root == 0) && (recvbuf != MPI_IN_PLACE)) {
265             /* for root=0, put root's data in recvbuf if not MPI_IN_PLACE */
266             mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
267                                         recvbuf, recvcount, recvtype, s);
268             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
269             MPID_SCHED_BARRIER(s);
270         }
271         else if (!(relative_rank % 2) && (recvbuf != MPI_IN_PLACE)) {
272             /* for non-zero root and non-leaf nodes, copy from tmp_buf
273                into recvbuf */
274             mpi_errno = MPID_Sched_copy(tmp_buf, ss->nbytes, MPI_BYTE,
275                                         recvbuf, recvcount, recvtype, s);
276             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
277             MPID_SCHED_BARRIER(s);
278         }
279 
280     }
281 #ifdef MPID_HAS_HETERO
282     else { /* communicator is heterogeneous */
283         int position;
284         MPIU_Assertp(FALSE); /* hetero case not yet implemented */
285 
286         if (rank == root) {
287             MPIR_Pack_size_impl(sendcount*comm_size, sendtype, &tmp_buf_size);
288 
289             MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
290 
291           /* calculate the value of nbytes, the number of bytes in packed
292              representation that each process receives. We can't
293              accurately calculate that from tmp_buf_size because
294              MPI_Pack_size returns an upper bound on the amount of memory
295              required. (For example, for a single integer, MPICH-1 returns
296              pack_size=12.) Therefore, we actually pack some data into
297              tmp_buf and see by how much 'position' is incremented. */
298 
299             position = 0;
300             mpi_errno = MPIR_Pack_impl(sendbuf, 1, sendtype, tmp_buf, tmp_buf_size, &position);
301             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
302 
303             nbytes = position*sendcount;
304 
305             curr_cnt = nbytes*comm_size;
306 
307             if (root == 0) {
308                 if (recvbuf != MPI_IN_PLACE) {
309                     position = 0;
310                     mpi_errno = MPIR_Pack_impl(sendbuf, sendcount*comm_size, sendtype, tmp_buf,
311                                                tmp_buf_size, &position);
312                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
313                 }
314                 else {
315                     position = nbytes;
316                     mpi_errno = MPIR_Pack_impl(((char *) sendbuf + extent*sendcount),
317                                                sendcount*(comm_size-1), sendtype, tmp_buf,
318                                                tmp_buf_size, &position);
319                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
320                 }
321             }
322             else {
323                 if (recvbuf != MPI_IN_PLACE) {
324                     position = 0;
325                     mpi_errno = MPIR_Pack_impl(((char *) sendbuf + extent*sendcount*rank),
326                                                sendcount*(comm_size-rank), sendtype, tmp_buf,
327                                                tmp_buf_size, &position);
328                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
329                 }
330                 else {
331                     position = nbytes;
332                     mpi_errno = MPIR_Pack_impl(((char *) sendbuf + extent*sendcount*(rank+1)),
333                                                sendcount*(comm_size-rank-1), sendtype, tmp_buf,
334                                                tmp_buf_size, &position);
335                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
336                 }
337                 mpi_errno = MPIR_Pack_impl(sendbuf, sendcount*rank, sendtype, tmp_buf,
338                                            tmp_buf_size, &position);
339                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
340             }
341         }
342         else {
343             MPIR_Pack_size_impl(recvcount*(comm_size/2), recvtype, &tmp_buf_size);
344             MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
345 
346             /* calculate nbytes */
347             position = 0;
348             mpi_errno = MPIR_Pack_impl(recvbuf, 1, recvtype, tmp_buf, tmp_buf_size, &position);
349             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
350             nbytes = position*recvcount;
351 
352             curr_cnt = 0;
353         }
354 
355         mask = 0x1;
356         while (mask < comm_size) {
357             if (relative_rank & mask) {
358                 src = rank - mask;
359                 if (src < 0) src += comm_size;
360 
361                 mpi_errno = MPIC_Recv_ft(tmp_buf, tmp_buf_size, MPI_BYTE, src,
362                                          MPIR_SCATTER_TAG, comm, &status, errflag);
363                 if (mpi_errno) {
364                     /* for communication errors, just record the error but continue */
365                     *errflag = TRUE;
366                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
367                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
368                     curr_cnt = 0;
369                 } else
370                     /* the recv size is larger than what may be sent in
371                        some cases. query amount of data actually received */
372                     MPIR_Get_count_impl(&status, MPI_BYTE, &curr_cnt);
373                 break;
374             }
375             mask <<= 1;
376         }
377 
378         /* This process is responsible for all processes that have bits
379            set from the LSB upto (but not including) mask.  Because of
380            the "not including", we start by shifting mask back down
381            one. */
382 
383         mask >>= 1;
384         while (mask > 0) {
385             if (relative_rank + mask < comm_size) {
386                 dst = rank + mask;
387                 if (dst >= comm_size) dst -= comm_size;
388 
389                 send_subtree_cnt = curr_cnt - nbytes * mask;
390                 /* mask is also the size of this process's subtree */
391                 mpi_errno = MPIC_Send_ft(((char *)tmp_buf + nbytes*mask),
392                                          send_subtree_cnt, MPI_BYTE, dst,
393                                          MPIR_SCATTER_TAG, comm, errflag);
394                 if (mpi_errno) {
395                     /* for communication errors, just record the error but continue */
396                     *errflag = TRUE;
397                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
398                     MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
399                 }
400                 curr_cnt -= send_subtree_cnt;
401             }
402             mask >>= 1;
403         }
404 
405         /* copy local data into recvbuf */
406         position = 0;
407         if (recvbuf != MPI_IN_PLACE) {
408             mpi_errno = MPIR_Unpack_impl(tmp_buf, tmp_buf_size, &position, recvbuf,
409                                          recvcount, recvtype);
410             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
411         }
412     }
413 #endif /* MPID_HAS_HETERO */
414 
415 
416     MPIR_SCHED_CHKPMEM_COMMIT(s);
417  fn_exit:
418     return mpi_errno;
419  fn_fail:
420     MPIR_SCHED_CHKPMEM_REAP(s);
421     goto fn_exit;
422 }
423 
424 #undef FUNCNAME
425 #define FUNCNAME MPIR_Iscatter_inter
426 #undef FCNAME
427 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Iscatter_inter(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,int root,MPID_Comm * comm_ptr,MPID_Sched_t s)428 int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
429                         void *recvbuf, int recvcount, MPI_Datatype recvtype,
430                         int root, MPID_Comm *comm_ptr, MPID_Sched_t s)
431 {
432 /*  Intercommunicator scatter.
433     For short messages, root sends to rank 0 in remote group. rank 0
434     does local intracommunicator scatter (binomial tree).
435     Cost: (lgp+1).alpha + n.((p-1)/p).beta + n.beta
436 
437     For long messages, we use linear scatter to avoid the extra n.beta.
438     Cost: p.alpha + n.beta
439 */
440     int mpi_errno = MPI_SUCCESS;
441     int rank, local_size, remote_size;
442     int i, nbytes, sendtype_size, recvtype_size;
443     MPI_Aint extent, true_extent, true_lb = 0;
444     void *tmp_buf = NULL;
445     MPID_Comm *newcomm_ptr = NULL;
446     MPIR_SCHED_CHKPMEM_DECL(1);
447 
448     if (root == MPI_PROC_NULL) {
449         /* local processes other than root do nothing */
450         goto fn_exit;
451     }
452 
453     remote_size = comm_ptr->remote_size;
454     local_size  = comm_ptr->local_size;
455 
456     if (root == MPI_ROOT) {
457         MPID_Datatype_get_size_macro(sendtype, sendtype_size);
458         nbytes = sendtype_size * sendcount * remote_size;
459     }
460     else {
461         /* remote side */
462         MPID_Datatype_get_size_macro(recvtype, recvtype_size);
463         nbytes = recvtype_size * recvcount * local_size;
464     }
465 
466     if (nbytes < MPIR_PARAM_SCATTER_INTER_SHORT_MSG_SIZE) {
467         if (root == MPI_ROOT) {
468             /* root sends all data to rank 0 on remote group and returns */
469             mpi_errno = MPID_Sched_send(sendbuf, sendcount*remote_size, sendtype, 0, comm_ptr, s);
470             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
471             MPID_SCHED_BARRIER(s);
472             goto fn_exit;
473         }
474         else {
475             /* remote group. rank 0 receives data from root. need to
476                allocate temporary buffer to store this data. */
477             rank = comm_ptr->rank;
478 
479             if (rank == 0) {
480                 MPIR_Type_get_true_extent_impl(recvtype, &true_lb, &true_extent);
481 
482                 MPID_Datatype_get_extent_macro(recvtype, extent);
483                 MPID_Ensure_Aint_fits_in_pointer(extent*recvcount*local_size);
484                 MPID_Ensure_Aint_fits_in_pointer(MPI_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
485                                                  sendcount*remote_size*extent);
486 
487                 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, recvcount*local_size*(MPIR_MAX(extent,true_extent)),
488                                           mpi_errno, "tmp_buf");
489 
490                 /* adjust for potential negative lower bound in datatype */
491                 tmp_buf = (void *)((char*)tmp_buf - true_lb);
492 
493                 mpi_errno = MPID_Sched_recv(tmp_buf, recvcount*local_size, recvtype, root, comm_ptr, s);
494                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
495                 MPID_SCHED_BARRIER(s);
496             }
497 
498             /* Get the local intracommunicator */
499             if (!comm_ptr->local_comm)
500                 MPIR_Setup_intercomm_localcomm(comm_ptr);
501 
502             newcomm_ptr = comm_ptr->local_comm;
503 
504             /* now do the usual scatter on this intracommunicator */
505             MPIU_Assert(newcomm_ptr->coll_fns != NULL);
506             MPIU_Assert(newcomm_ptr->coll_fns->Iscatter != NULL);
507             mpi_errno = newcomm_ptr->coll_fns->Iscatter(tmp_buf, recvcount, recvtype,
508                                                         recvbuf, recvcount, recvtype,
509                                                         0, newcomm_ptr, s);
510             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
511             MPID_SCHED_BARRIER(s);
512         }
513     }
514     else {
515         /* long message. use linear algorithm. */
516         if (root == MPI_ROOT) {
517             MPID_Datatype_get_extent_macro(sendtype, extent);
518             for (i = 0; i < remote_size; i++) {
519                 mpi_errno = MPID_Sched_send(((char *)sendbuf+sendcount*i*extent),
520                                             sendcount, sendtype, i, comm_ptr, s);
521                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
522             }
523             MPID_SCHED_BARRIER(s);
524         }
525         else {
526             mpi_errno = MPID_Sched_recv(recvbuf, recvcount, recvtype, root, comm_ptr, s);
527             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
528             MPID_SCHED_BARRIER(s);
529         }
530     }
531 
532 
533     MPIR_SCHED_CHKPMEM_COMMIT(s);
534 fn_exit:
535     return mpi_errno;
536 fn_fail:
537     MPIR_SCHED_CHKPMEM_REAP(s);
538     goto fn_exit;
539 }
540 
541 #undef FUNCNAME
542 #define FUNCNAME MPIR_Iscatter_impl
543 #undef FCNAME
544 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Iscatter_impl(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,int root,MPID_Comm * comm_ptr,MPI_Request * request)545 int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPI_Request *request)
546 {
547     int mpi_errno = MPI_SUCCESS;
548     int tag = -1;
549     MPID_Request *reqp = NULL;
550     MPID_Sched_t s = MPID_SCHED_NULL;
551 
552     *request = MPI_REQUEST_NULL;
553 
554     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
555     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
556     mpi_errno = MPID_Sched_create(&s);
557     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
558 
559     MPIU_Assert(comm_ptr->coll_fns != NULL);
560     MPIU_Assert(comm_ptr->coll_fns->Iscatter != NULL);
561     mpi_errno = comm_ptr->coll_fns->Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
562     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
563 
564     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
565     if (reqp)
566         *request = reqp->handle;
567     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
568 
569 fn_exit:
570     return mpi_errno;
571 fn_fail:
572     goto fn_exit;
573 }
574 
575 #endif /* MPICH_MPI_FROM_PMPI */
576 
577 #undef FUNCNAME
578 #define FUNCNAME MPIX_Iscatter
579 #undef FCNAME
580 #define FCNAME MPIU_QUOTE(FUNCNAME)
581 /*@
582 MPIX_Iscatter - XXX description here
583 
584 Input Parameters:
585 + sendbuf - address of send buffer (significant only at root) (choice)
586 . sendcount - number of elements sent to each process (significant only at root) (non-negative integer)
587 . sendtype - data type of send buffer elements (significant only at root) (handle)
588 . recvcount - number of elements in receive buffer (non-negative integer)
589 . recvtype - data type of receive buffer elements (handle)
590 . root - rank of sending process (integer)
591 - comm - communicator (handle)
592 
593 Output Parameters:
594 + recvbuf - starting address of the receive buffer (choice)
595 - request - communication request (handle)
596 
597 .N ThreadSafe
598 
599 .N Fortran
600 
601 .N Errors
602 @*/
MPIX_Iscatter(const void * sendbuf,int sendcount,MPI_Datatype sendtype,void * recvbuf,int recvcount,MPI_Datatype recvtype,int root,MPI_Comm comm,MPI_Request * request)603 int MPIX_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)
604 {
605     int mpi_errno = MPI_SUCCESS;
606     MPID_Comm *comm_ptr = NULL;
607     MPID_Datatype *sendtype_ptr, *recvtype_ptr;
608     MPID_MPI_STATE_DECL(MPID_STATE_MPIX_ISCATTER);
609 
610     MPIU_THREAD_CS_ENTER(ALLFUNC,);
611     MPID_MPI_FUNC_ENTER(MPID_STATE_MPIX_ISCATTER);
612 
613     /* Validate parameters, especially handles needing to be converted */
614 #   ifdef HAVE_ERROR_CHECKING
615     {
616         MPID_BEGIN_ERROR_CHECKS
617         {
618             MPIR_ERRTEST_COMM(comm, mpi_errno);
619 
620             /* TODO more checks may be appropriate */
621         }
622         MPID_END_ERROR_CHECKS
623     }
624 #   endif /* HAVE_ERROR_CHECKING */
625 
626     /* Convert MPI object handles to object pointers */
627     MPID_Comm_get_ptr(comm, comm_ptr);
628 
629     /* Validate parameters and objects (post conversion) */
630 #   ifdef HAVE_ERROR_CHECKING
631     {
632         MPID_BEGIN_ERROR_CHECKS
633         {
634             MPID_Comm_valid_ptr(comm_ptr, mpi_errno);
635             if (comm_ptr->comm_kind == MPID_INTRACOMM) {
636                 MPIR_ERRTEST_INTRA_ROOT(comm_ptr, root, mpi_errno);
637 
638                 if (comm_ptr->rank == root) {
639                     MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
640                     MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
641                     if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
642                         MPID_Datatype_get_ptr(sendtype, sendtype_ptr);
643                         MPID_Datatype_valid_ptr(sendtype_ptr, mpi_errno);
644                         if (mpi_errno != MPI_SUCCESS) goto fn_fail;
645                         MPID_Datatype_committed_ptr(sendtype_ptr, mpi_errno);
646                         if (mpi_errno != MPI_SUCCESS) goto fn_fail;
647                     }
648                     MPIR_ERRTEST_USERBUFFER(sendbuf,sendcount,sendtype,mpi_errno);
649                     MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
650 
651                     /* catch common aliasing cases */
652                     if (recvbuf != MPI_IN_PLACE && sendtype == recvtype && sendcount == recvcount && recvcount != 0)
653                         MPIR_ERRTEST_ALIAS_COLL(sendbuf,recvbuf,mpi_errno);
654                 }
655                 else
656                     MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcount, mpi_errno);
657 
658                 if (recvbuf != MPI_IN_PLACE) {
659                     MPIR_ERRTEST_COUNT(recvcount, mpi_errno);
660                     MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
661                     if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
662                         MPID_Datatype_get_ptr(recvtype, recvtype_ptr);
663                         MPID_Datatype_valid_ptr(recvtype_ptr, mpi_errno);
664                         if (mpi_errno != MPI_SUCCESS) goto fn_fail;
665                         MPID_Datatype_committed_ptr(recvtype_ptr, mpi_errno);
666                         if (mpi_errno != MPI_SUCCESS) goto fn_fail;
667                     }
668                     MPIR_ERRTEST_USERBUFFER(recvbuf,recvcount,recvtype,mpi_errno);
669                 }
670             }
671 
672             if (comm_ptr->comm_kind == MPID_INTERCOMM) {
673                 MPIR_ERRTEST_INTER_ROOT(comm_ptr, root, mpi_errno);
674 
675                 if (root == MPI_ROOT) {
676                     MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
677                     MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
678                     if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
679                         MPID_Datatype_get_ptr(sendtype, sendtype_ptr);
680                         MPID_Datatype_valid_ptr(sendtype_ptr, mpi_errno);
681                         if (mpi_errno != MPI_SUCCESS) goto fn_fail;
682                         MPID_Datatype_committed_ptr(sendtype_ptr, mpi_errno);
683                         if (mpi_errno != MPI_SUCCESS) goto fn_fail;
684                     }
685                     MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
686                     MPIR_ERRTEST_USERBUFFER(sendbuf,sendcount,sendtype,mpi_errno);
687                 }
688                 else if (root != MPI_PROC_NULL) {
689                     MPIR_ERRTEST_COUNT(recvcount, mpi_errno);
690                     MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
691                     if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
692                         MPID_Datatype_get_ptr(recvtype, recvtype_ptr);
693                         MPID_Datatype_valid_ptr(recvtype_ptr, mpi_errno);
694                         if (mpi_errno != MPI_SUCCESS) goto fn_fail;
695                         MPID_Datatype_committed_ptr(recvtype_ptr, mpi_errno);
696                         if (mpi_errno != MPI_SUCCESS) goto fn_fail;
697                     }
698                     MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcount, mpi_errno);
699                     MPIR_ERRTEST_USERBUFFER(recvbuf,recvcount,recvtype,mpi_errno);
700                 }
701             }
702         }
703         MPID_END_ERROR_CHECKS
704     }
705 #   endif /* HAVE_ERROR_CHECKING */
706 
707     /* ... body of routine ...  */
708 
709     mpi_errno = MPIR_Iscatter_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, request);
710     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
711 
712     /* ... end of body of routine ... */
713 
714 fn_exit:
715     MPID_MPI_FUNC_EXIT(MPID_STATE_MPIX_ISCATTER);
716     MPIU_THREAD_CS_EXIT(ALLFUNC,);
717     return mpi_errno;
718 
719 fn_fail:
720     /* --BEGIN ERROR HANDLING-- */
721 #   ifdef HAVE_ERROR_CHECKING
722     {
723         mpi_errno = MPIR_Err_create_code(
724             mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
725             "**mpix_iscatter", "**mpix_iscatter %p %d %D %p %d %D %d %C %p", sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request);
726     }
727 #   endif
728     mpi_errno = MPIR_Err_return_comm(comm_ptr, FCNAME, mpi_errno);
729     goto fn_exit;
730     /* --END ERROR HANDLING-- */
731     goto fn_exit;
732 }
733