1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 /*
3 * (C) 2010 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
5 */
6
7 #include "mpiimpl.h"
8 #include "collutil.h"
9
10 /* -- Begin Profiling Symbol Block for routine MPIX_Ireduce_scatter_block */
11 #if defined(HAVE_PRAGMA_WEAK)
12 #pragma weak MPIX_Ireduce_scatter_block = PMPIX_Ireduce_scatter_block
13 #elif defined(HAVE_PRAGMA_HP_SEC_DEF)
14 #pragma _HP_SECONDARY_DEF PMPIX_Ireduce_scatter_block MPIX_Ireduce_scatter_block
15 #elif defined(HAVE_PRAGMA_CRI_DUP)
16 #pragma _CRI duplicate MPIX_Ireduce_scatter_block as PMPIX_Ireduce_scatter_block
17 #endif
18 /* -- End Profiling Symbol Block */
19
20 /* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
21 the MPI routines */
22 #ifndef MPICH_MPI_FROM_PMPI
23 #undef MPIX_Ireduce_scatter_block
24 #define MPIX_Ireduce_scatter_block PMPIX_Ireduce_scatter_block
25
26 /* any non-MPI functions go here, especially non-static ones */
27
28 /* A recursive halving MPIX_Ireduce_scatter_block algorithm. Requires that op is
29 * commutative. Typically yields better performance for shorter messages. */
30 #undef FUNCNAME
31 #define FUNCNAME MPIR_Ireduce_scatter_block_rec_hlv
32 #undef FCNAME
33 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Ireduce_scatter_block_rec_hlv(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPID_Comm * comm_ptr,MPID_Sched_t s)34 int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s)
35 {
36 int mpi_errno = MPI_SUCCESS;
37 int rank, comm_size, i;
38 MPI_Aint extent, true_extent, true_lb;
39 int *disps;
40 void *tmp_recvbuf, *tmp_results;
41 int type_size ATTRIBUTE((unused)), total_count, dst;
42 int mask;
43 int *newcnts, *newdisps, rem, newdst, send_idx, recv_idx,
44 last_idx, send_cnt, recv_cnt;
45 int pof2, old_i, newrank;
46 MPIR_SCHED_CHKPMEM_DECL(5);
47
48 comm_size = comm_ptr->local_size;
49 rank = comm_ptr->rank;
50
51 MPID_Datatype_get_extent_macro(datatype, extent);
52 MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
53
54 MPIU_Assert(MPIR_Op_is_commutative(op));
55
56 MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
57
58 total_count = 0;
59 for (i=0; i<comm_size; i++) {
60 disps[i] = total_count;
61 total_count += recvcount;
62 }
63
64 if (total_count == 0) {
65 goto fn_exit;
66 }
67
68 MPID_Datatype_get_size_macro(datatype, type_size);
69
70 /* allocate temp. buffer to receive incoming data */
71 MPIR_SCHED_CHKPMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPIR_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
72 /* adjust for potential negative lower bound in datatype */
73 tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
74
75 /* need to allocate another temporary buffer to accumulate
76 results because recvbuf may not be big enough */
77 MPIR_SCHED_CHKPMEM_MALLOC(tmp_results, void *, total_count*(MPIR_MAX(true_extent,extent)), mpi_errno, "tmp_results");
78 /* adjust for potential negative lower bound in datatype */
79 tmp_results = (void *)((char*)tmp_results - true_lb);
80
81 /* copy sendbuf into tmp_results */
82 if (sendbuf != MPI_IN_PLACE)
83 mpi_errno = MPID_Sched_copy(sendbuf, total_count, datatype,
84 tmp_results, total_count, datatype, s);
85 else
86 mpi_errno = MPID_Sched_copy(recvbuf, total_count, datatype,
87 tmp_results, total_count, datatype, s);
88 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
89 MPID_SCHED_BARRIER(s);
90
91 pof2 = 1;
92 while (pof2 <= comm_size) pof2 <<= 1;
93 pof2 >>=1;
94
95 rem = comm_size - pof2;
96
97 /* In the non-power-of-two case, all even-numbered
98 processes of rank < 2*rem send their data to
99 (rank+1). These even-numbered processes no longer
100 participate in the algorithm until the very end. The
101 remaining processes form a nice power-of-two. */
102
103 if (rank < 2*rem) {
104 if (rank % 2 == 0) { /* even */
105 mpi_errno = MPID_Sched_send(tmp_results, total_count, datatype, rank+1, comm_ptr, s);
106 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
107 MPID_SCHED_BARRIER(s);
108
109 /* temporarily set the rank to -1 so that this
110 process does not pariticipate in recursive
111 doubling */
112 newrank = -1;
113 }
114 else { /* odd */
115 mpi_errno = MPID_Sched_recv(tmp_recvbuf, total_count, datatype, rank-1, comm_ptr, s);
116 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
117 MPID_SCHED_BARRIER(s);
118
119 /* do the reduction on received data. since the
120 ordering is right, it doesn't matter whether
121 the operation is commutative or not. */
122 mpi_errno = MPID_Sched_reduce(tmp_recvbuf, tmp_results, total_count, datatype, op, s);
123 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
124 MPID_SCHED_BARRIER(s);
125
126 /* change the rank */
127 newrank = rank / 2;
128 }
129 }
130 else /* rank >= 2*rem */
131 newrank = rank - rem;
132
133 if (newrank != -1) {
134 /* recalculate the recvcnts and disps arrays because the
135 even-numbered processes who no longer participate will
136 have their result calculated by the process to their
137 right (rank+1). */
138
139 MPIR_SCHED_CHKPMEM_MALLOC(newcnts, int *, pof2*sizeof(int), mpi_errno, "newcnts");
140 MPIR_SCHED_CHKPMEM_MALLOC(newdisps, int *, pof2*sizeof(int), mpi_errno, "newdisps");
141
142 for (i = 0; i < pof2; i++) {
143 /* what does i map to in the old ranking? */
144 old_i = (i < rem) ? i*2 + 1 : i + rem;
145 if (old_i < 2*rem) {
146 /* This process has to also do its left neighbor's
147 work */
148 newcnts[i] = 2 * recvcount;
149 }
150 else
151 newcnts[i] = recvcount;
152 }
153
154 newdisps[0] = 0;
155 for (i=1; i<pof2; i++)
156 newdisps[i] = newdisps[i-1] + newcnts[i-1];
157
158 mask = pof2 >> 1;
159 send_idx = recv_idx = 0;
160 last_idx = pof2;
161 while (mask > 0) {
162 newdst = newrank ^ mask;
163 /* find real rank of dest */
164 dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem;
165
166 send_cnt = recv_cnt = 0;
167 if (newrank < newdst) {
168 send_idx = recv_idx + mask;
169 for (i=send_idx; i<last_idx; i++)
170 send_cnt += newcnts[i];
171 for (i=recv_idx; i<send_idx; i++)
172 recv_cnt += newcnts[i];
173 }
174 else {
175 recv_idx = send_idx + mask;
176 for (i=send_idx; i<recv_idx; i++)
177 send_cnt += newcnts[i];
178 for (i=recv_idx; i<last_idx; i++)
179 recv_cnt += newcnts[i];
180 }
181
182 /* Send data from tmp_results. Recv into tmp_recvbuf */
183 {
184 /* avoid sending and receiving pointless 0-byte messages */
185 int send_dst = (send_cnt ? dst : MPI_PROC_NULL);
186 int recv_dst = (recv_cnt ? dst : MPI_PROC_NULL);
187
188 mpi_errno = MPID_Sched_send(((char *)tmp_results + newdisps[send_idx]*extent),
189 send_cnt, datatype, send_dst, comm_ptr, s);
190 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
191 mpi_errno = MPID_Sched_recv(((char *) tmp_recvbuf + newdisps[recv_idx]*extent),
192 recv_cnt, datatype, recv_dst, comm_ptr, s);
193 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
194 MPID_SCHED_BARRIER(s);
195 }
196
197 /* tmp_recvbuf contains data received in this step.
198 tmp_results contains data accumulated so far */
199 if (recv_cnt) {
200 mpi_errno = MPID_Sched_reduce(((char *)tmp_recvbuf + newdisps[recv_idx]*extent),
201 ((char *)tmp_results + newdisps[recv_idx]*extent),
202 recv_cnt, datatype, op, s);
203 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
204 MPID_SCHED_BARRIER(s);
205 }
206
207 /* update send_idx for next iteration */
208 send_idx = recv_idx;
209 last_idx = recv_idx + mask;
210 mask >>= 1;
211 }
212
213 /* copy this process's result from tmp_results to recvbuf */
214 mpi_errno = MPID_Sched_copy(((char *)tmp_results + disps[rank]*extent),
215 recvcount, datatype,
216 recvbuf, recvcount, datatype, s);
217 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
218 MPID_SCHED_BARRIER(s);
219
220 }
221
222 /* In the non-power-of-two case, all odd-numbered
223 processes of rank < 2*rem send to (rank-1) the result they
224 calculated for that process */
225 if (rank < 2*rem) {
226 if (rank % 2) { /* odd */
227 mpi_errno = MPID_Sched_send(((char *)tmp_results + disps[rank-1]*extent),
228 recvcount, datatype, rank-1, comm_ptr, s);
229 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
230 MPID_SCHED_BARRIER(s);
231 }
232 else { /* even */
233 mpi_errno = MPID_Sched_recv(recvbuf, recvcount, datatype, rank+1, comm_ptr, s);
234 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
235 MPID_SCHED_BARRIER(s);
236 }
237 }
238
239
240 MPIR_SCHED_CHKPMEM_COMMIT(s);
241 fn_exit:
242 return mpi_errno;
243 fn_fail:
244 MPIR_SCHED_CHKPMEM_REAP(s);
245 goto fn_exit;
246 }
247
248 /* A pairwise exchange algorithm for MPI_Ireduce_scatter_block. Requires a
249 * commutative op and is intended for use with large messages. */
250 #undef FUNCNAME
251 #define FUNCNAME MPIR_Ireduce_scatter_block_rec_pairwise
252 #undef FCNAME
253 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Ireduce_scatter_block_pairwise(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPID_Comm * comm_ptr,MPID_Sched_t s)254 int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s)
255 {
256 int mpi_errno = MPI_SUCCESS;
257 int rank, comm_size, i;
258 MPI_Aint extent, true_extent, true_lb;
259 int *disps;
260 void *tmp_recvbuf;
261 int src, dst;
262 int is_commutative;
263 int total_count;
264 MPIR_SCHED_CHKPMEM_DECL(2);
265
266 comm_size = comm_ptr->local_size;
267 rank = comm_ptr->rank;
268
269 MPID_Datatype_get_extent_macro(datatype, extent);
270 MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
271
272 is_commutative = MPIR_Op_is_commutative(op);
273 MPIU_Assert(is_commutative);
274
275 MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
276
277 total_count = 0;
278 for (i=0; i<comm_size; i++) {
279 disps[i] = total_count;
280 total_count += recvcount;
281 }
282
283 if (total_count == 0) {
284 goto fn_exit;
285 }
286 /* total_count*extent eventually gets malloced. it isn't added to
287 * a user-passed in buffer */
288 MPID_Ensure_Aint_fits_in_pointer(total_count * MPIR_MAX(true_extent, extent));
289
290 if (sendbuf != MPI_IN_PLACE) {
291 /* copy local data into recvbuf */
292 mpi_errno = MPID_Sched_copy(((char *)sendbuf+disps[rank]*extent),
293 recvcount, datatype,
294 recvbuf, recvcount, datatype, s);
295 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
296 MPID_SCHED_BARRIER(s);
297 }
298
299 /* allocate temporary buffer to store incoming data */
300 MPIR_SCHED_CHKPMEM_MALLOC(tmp_recvbuf, void *, recvcount*(MPIR_MAX(true_extent,extent))+1, mpi_errno, "tmp_recvbuf");
301 /* adjust for potential negative lower bound in datatype */
302 tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
303
304 for (i=1; i<comm_size; i++) {
305 src = (rank - i + comm_size) % comm_size;
306 dst = (rank + i) % comm_size;
307
308 /* send the data that dst needs. recv data that this process
309 needs from src into tmp_recvbuf */
310 if (sendbuf != MPI_IN_PLACE) {
311 mpi_errno = MPID_Sched_send(((char *)sendbuf+disps[dst]*extent),
312 recvcount, datatype, dst, comm_ptr, s);
313 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
314 }
315 else {
316 mpi_errno = MPID_Sched_send(((char *)recvbuf+disps[dst]*extent),
317 recvcount, datatype, dst, comm_ptr, s);
318 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
319 }
320 mpi_errno = MPID_Sched_recv(tmp_recvbuf, recvcount, datatype, src, comm_ptr, s);
321 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
322 MPID_SCHED_BARRIER(s);
323
324 /* FIXME does this algorithm actually work correctly for noncommutative ops?
325 * If so, relax restriction in assert and comments... */
326 if (is_commutative || (src < rank)) {
327 if (sendbuf != MPI_IN_PLACE) {
328 mpi_errno = MPID_Sched_reduce(tmp_recvbuf, recvbuf, recvcount, datatype, op, s);
329 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
330 }
331 else {
332 mpi_errno = MPID_Sched_reduce(tmp_recvbuf, ((char *)recvbuf+disps[rank]*extent),
333 recvcount, datatype, op, s);
334 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
335 /* We can't store the result at the beginning of
336 recvbuf right here because there is useful data there that
337 other process/processes need. At the end we will copy back
338 the result to the beginning of recvbuf. */
339 }
340 MPID_SCHED_BARRIER(s);
341 }
342 else {
343 if (sendbuf != MPI_IN_PLACE) {
344 mpi_errno = MPID_Sched_reduce(recvbuf, tmp_recvbuf, recvcount, datatype, op, s);
345 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
346 MPID_SCHED_BARRIER(s);
347 /* copy result back into recvbuf */
348 mpi_errno = MPID_Sched_copy(tmp_recvbuf, recvcount, datatype,
349 recvbuf, recvcount, datatype, s);
350 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
351 }
352 else {
353 mpi_errno = MPID_Sched_reduce(((char *)recvbuf+disps[rank]*extent),
354 tmp_recvbuf, recvcount, datatype, op, s);
355 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
356 MPID_SCHED_BARRIER(s);
357 /* copy result back into recvbuf */
358 mpi_errno = MPID_Sched_copy(tmp_recvbuf, recvcount, datatype,
359 ((char *)recvbuf + disps[rank]*extent),
360 recvcount, datatype, s);
361 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
362 }
363 MPID_SCHED_BARRIER(s);
364 }
365 }
366
367 /* if MPI_IN_PLACE, move output data to the beginning of
368 recvbuf. already done for rank 0. */
369 if ((sendbuf == MPI_IN_PLACE) && (rank != 0)) {
370 mpi_errno = MPID_Sched_copy(((char *)recvbuf + disps[rank]*extent),
371 recvcount, datatype,
372 recvbuf, recvcount, datatype, s);
373 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
374 MPID_SCHED_BARRIER(s);
375 }
376
377 MPIR_SCHED_CHKPMEM_COMMIT(s);
378 fn_exit:
379 return mpi_errno;
380 fn_fail:
381 MPIR_SCHED_CHKPMEM_REAP(s);
382 goto fn_exit;
383 }
384
385 /* A recursive doubling algorithm for MPI_Ireduce_scatter_block, suitable for
386 * noncommutative and (non-pof2 or block irregular). */
387 #undef FUNCNAME
388 #define FUNCNAME MPIR_Ireduce_scatter_block_rec_dbl
389 #undef FCNAME
390 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Ireduce_scatter_block_rec_dbl(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPID_Comm * comm_ptr,MPID_Sched_t s)391 int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s)
392 {
393 int mpi_errno = MPI_SUCCESS;
394 int rank, comm_size, i;
395 MPI_Aint extent, true_extent, true_lb;
396 int *disps;
397 void *tmp_recvbuf, *tmp_results;
398 int type_size ATTRIBUTE((unused)), dis[2], blklens[2], total_count, dst;
399 int mask, dst_tree_root, my_tree_root, j, k;
400 int received;
401 MPI_Datatype sendtype, recvtype;
402 int nprocs_completed, tmp_mask, tree_root, is_commutative;
403 MPIR_SCHED_CHKPMEM_DECL(5);
404
405 comm_size = comm_ptr->local_size;
406 rank = comm_ptr->rank;
407
408 MPID_Datatype_get_extent_macro(datatype, extent);
409 MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
410 is_commutative = MPIR_Op_is_commutative(op);
411
412 MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
413
414 total_count = 0;
415 for (i=0; i<comm_size; i++) {
416 disps[i] = total_count;
417 total_count += recvcount;
418 }
419
420 if (total_count == 0) {
421 goto fn_exit;
422 }
423
424 MPID_Datatype_get_size_macro(datatype, type_size);
425
426 /* total_count*extent eventually gets malloced. it isn't added to
427 * a user-passed in buffer */
428 MPID_Ensure_Aint_fits_in_pointer(total_count * MPIR_MAX(true_extent, extent));
429
430
431 /* need to allocate temporary buffer to receive incoming data*/
432 MPIR_SCHED_CHKPMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPIR_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
433 /* adjust for potential negative lower bound in datatype */
434 tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
435
436 /* need to allocate another temporary buffer to accumulate
437 results */
438 MPIR_SCHED_CHKPMEM_MALLOC(tmp_results, void *, total_count*(MPIR_MAX(true_extent,extent)), mpi_errno, "tmp_results");
439 /* adjust for potential negative lower bound in datatype */
440 tmp_results = (void *)((char*)tmp_results - true_lb);
441
442 /* copy sendbuf into tmp_results */
443 if (sendbuf != MPI_IN_PLACE)
444 mpi_errno = MPID_Sched_copy(sendbuf, total_count, datatype,
445 tmp_results, total_count, datatype, s);
446 else
447 mpi_errno = MPID_Sched_copy(recvbuf, total_count, datatype,
448 tmp_results, total_count, datatype, s);
449
450 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
451 MPID_SCHED_BARRIER(s);
452
453 mask = 0x1;
454 i = 0;
455 while (mask < comm_size) {
456 dst = rank ^ mask;
457
458 dst_tree_root = dst >> i;
459 dst_tree_root <<= i;
460
461 my_tree_root = rank >> i;
462 my_tree_root <<= i;
463
464 /* At step 1, processes exchange (n-n/p) amount of
465 data; at step 2, (n-2n/p) amount of data; at step 3, (n-4n/p)
466 amount of data, and so forth. We use derived datatypes for this.
467
468 At each step, a process does not need to send data
469 indexed from my_tree_root to
470 my_tree_root+mask-1. Similarly, a process won't receive
471 data indexed from dst_tree_root to dst_tree_root+mask-1. */
472
473 /* calculate sendtype */
474 blklens[0] = my_tree_root * recvcount;
475 blklens[1] = (comm_size - (my_tree_root + mask)) * recvcount;
476 if (blklens[1] < 0)
477 blklens[1] = 0;
478
479 dis[0] = 0;
480 dis[1] = blklens[0] + recvcount * (MPIR_MIN((my_tree_root + mask), comm_size) - my_tree_root);
481
482 mpi_errno = MPIR_Type_indexed_impl(2, blklens, dis, datatype, &sendtype);
483 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
484
485 mpi_errno = MPIR_Type_commit_impl(&sendtype);
486 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
487
488 /* calculate recvtype */
489 blklens[0] = recvcount * MPIR_MIN(dst_tree_root, comm_size);
490 blklens[1] = recvcount * (comm_size - (dst_tree_root + mask));
491 if (blklens[1] < 0)
492 blklens[1] = 0;
493
494 dis[0] = 0;
495 dis[1] = blklens[0];
496 dis[1] = blklens[0] + recvcount * (MPIR_MIN((dst_tree_root + mask), comm_size) - dst_tree_root);
497
498 mpi_errno = MPIR_Type_indexed_impl(2, blklens, dis, datatype, &recvtype);
499 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
500
501 mpi_errno = MPIR_Type_commit_impl(&recvtype);
502 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
503
504 received = 0;
505 if (dst < comm_size) {
506 /* tmp_results contains data to be sent in each step. Data is
507 received in tmp_recvbuf and then accumulated into
508 tmp_results. accumulation is done later below. */
509 mpi_errno = MPID_Sched_send(tmp_results, 1, sendtype, dst, comm_ptr, s);
510 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
511 mpi_errno = MPID_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
512 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
513 MPID_SCHED_BARRIER(s);
514 received = 1;
515 }
516
517 /* if some processes in this process's subtree in this step
518 did not have any destination process to communicate with
519 because of non-power-of-two, we need to send them the
520 result. We use a logarithmic recursive-halfing algorithm
521 for this. */
522
523 if (dst_tree_root + mask > comm_size) {
524 nprocs_completed = comm_size - my_tree_root - mask;
525 /* nprocs_completed is the number of processes in this
526 subtree that have all the data. Send data to others
527 in a tree fashion. First find root of current tree
528 that is being divided into two. k is the number of
529 least-significant bits in this process's rank that
530 must be zeroed out to find the rank of the root */
531 j = mask;
532 k = 0;
533 while (j) {
534 j >>= 1;
535 k++;
536 }
537 k--;
538
539 tmp_mask = mask >> 1;
540 while (tmp_mask) {
541 dst = rank ^ tmp_mask;
542
543 tree_root = rank >> k;
544 tree_root <<= k;
545
546 /* send only if this proc has data and destination
547 doesn't have data. at any step, multiple processes
548 can send if they have the data */
549 if ((dst > rank) &&
550 (rank < tree_root + nprocs_completed)
551 && (dst >= tree_root + nprocs_completed))
552 {
553 /* send the current result */
554 mpi_errno = MPID_Sched_send(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
555 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
556 MPID_SCHED_BARRIER(s);
557 }
558 /* recv only if this proc. doesn't have data and sender
559 has data */
560 else if ((dst < rank) &&
561 (dst < tree_root + nprocs_completed) &&
562 (rank >= tree_root + nprocs_completed))
563 {
564 mpi_errno = MPID_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
565 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
566 MPID_SCHED_BARRIER(s);
567 received = 1;
568 }
569 tmp_mask >>= 1;
570 k--;
571 }
572 }
573
574 /* N.B. The following comment comes from the FT version of
575 * MPI_Reduce_scatter. It does not currently apply to this code, but
576 * will in the future when we update the NBC code to be fault-tolerant
577 * in roughly the same fashion. [goodell@ 2011-03-03] */
578 /* The following reduction is done here instead of after
579 the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is
580 because to do it above, in the noncommutative
581 case, we would need an extra temp buffer so as not to
582 overwrite temp_recvbuf, because temp_recvbuf may have
583 to be communicated to other processes in the
584 non-power-of-two case. To avoid that extra allocation,
585 we do the reduce here. */
586 if (received) {
587 if (is_commutative || (dst_tree_root < my_tree_root)) {
588 mpi_errno = MPID_Sched_reduce(tmp_recvbuf, tmp_results, blklens[0], datatype, op, s);
589 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
590 mpi_errno = MPID_Sched_reduce(((char *)tmp_recvbuf + dis[1]*extent),
591 ((char *)tmp_results + dis[1]*extent),
592 blklens[1], datatype, op, s);
593 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
594 MPID_SCHED_BARRIER(s);
595 }
596 else {
597 mpi_errno = MPID_Sched_reduce(tmp_results, tmp_recvbuf, blklens[0], datatype, op, s);
598 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
599 mpi_errno = MPID_Sched_reduce(((char *)tmp_results + dis[1]*extent),
600 ((char *)tmp_recvbuf + dis[1]*extent),
601 blklens[1], datatype, op, s);
602 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
603 MPID_SCHED_BARRIER(s);
604
605 /* copy result back into tmp_results */
606 mpi_errno = MPID_Sched_copy(tmp_recvbuf, 1, recvtype,
607 tmp_results, 1, recvtype, s);
608 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
609 MPID_SCHED_BARRIER(s);
610 }
611 }
612
613 MPIR_Type_free_impl(&sendtype);
614 MPIR_Type_free_impl(&recvtype);
615
616 mask <<= 1;
617 i++;
618 }
619
620 /* now copy final results from tmp_results to recvbuf */
621 mpi_errno = MPID_Sched_copy(((char *)tmp_results+disps[rank]*extent),
622 recvcount, datatype,
623 recvbuf, recvcount, datatype, s);
624 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
625 MPID_SCHED_BARRIER(s);
626
627 MPIR_SCHED_CHKPMEM_COMMIT(s);
628 fn_exit:
629 return mpi_errno;
630 fn_fail:
631 MPIR_SCHED_CHKPMEM_REAP(s);
632 goto fn_exit;
633 }
634
635 /* Implements the reduce-scatter butterfly algorithm described in J. L. Traff's
636 * "An Improved Algorithm for (Non-commutative) Reduce-Scatter with an Application"
637 * from EuroPVM/MPI 2005. This function currently only implements support for
638 * the power-of-2 case. */
639 #undef FUNCNAME
640 #define FUNCNAME MPIR_Reduce_scatter_block_noncomm
641 #undef FCNAME
642 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Ireduce_scatter_block_noncomm(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPID_Comm * comm_ptr,MPID_Sched_t s)643 int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s)
644 {
645 int mpi_errno = MPI_SUCCESS;
646 int comm_size = comm_ptr->local_size;
647 int rank = comm_ptr->rank;
648 int pof2;
649 int log2_comm_size;
650 int i, k;
651 int recv_offset, send_offset;
652 int block_size, total_count, size;
653 MPI_Aint true_extent, true_lb;
654 int buf0_was_inout;
655 void *tmp_buf0;
656 void *tmp_buf1;
657 void *result_ptr;
658 MPIR_SCHED_CHKPMEM_DECL(2);
659
660 MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
661
662 pof2 = 1;
663 log2_comm_size = 0;
664 while (pof2 < comm_size) {
665 pof2 <<= 1;
666 ++log2_comm_size;
667 }
668
669 /* begin error checking */
670 MPIU_Assert(pof2 == comm_size); /* FIXME this version only works for power of 2 procs */
671 /* end error checking */
672
673 /* size of a block (count of datatype per block, NOT bytes per block) */
674 block_size = recvcount;
675 total_count = block_size * comm_size;
676
677 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf0, void *, true_extent * total_count, mpi_errno, "tmp_buf0");
678 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf1, void *, true_extent * total_count, mpi_errno, "tmp_buf1");
679 /* adjust for potential negative lower bound in datatype */
680 tmp_buf0 = (void *)((char*)tmp_buf0 - true_lb);
681 tmp_buf1 = (void *)((char*)tmp_buf1 - true_lb);
682
683 /* Copy our send data to tmp_buf0. We do this one block at a time and
684 permute the blocks as we go according to the mirror permutation. */
685 for (i = 0; i < comm_size; ++i) {
686 mpi_errno = MPID_Sched_copy(((char *)(sendbuf == MPI_IN_PLACE ? recvbuf : sendbuf) + (i * true_extent * block_size)),
687 block_size, datatype,
688 ((char *)tmp_buf0 + (MPIU_Mirror_permutation(i, log2_comm_size) * true_extent * block_size)),
689 block_size, datatype, s);
690 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
691 }
692 MPID_SCHED_BARRIER(s);
693 buf0_was_inout = 1;
694
695 send_offset = 0;
696 recv_offset = 0;
697 size = total_count;
698 for (k = 0; k < log2_comm_size; ++k) {
699 /* use a double-buffering scheme to avoid local copies */
700 char *incoming_data = (buf0_was_inout ? tmp_buf1 : tmp_buf0);
701 char *outgoing_data = (buf0_was_inout ? tmp_buf0 : tmp_buf1);
702 int peer = rank ^ (0x1 << k);
703 size /= 2;
704
705 if (rank > peer) {
706 /* we have the higher rank: send top half, recv bottom half */
707 recv_offset += size;
708 }
709 else {
710 /* we have the lower rank: recv top half, send bottom half */
711 send_offset += size;
712 }
713
714 mpi_errno = MPID_Sched_send((outgoing_data + send_offset*true_extent),
715 size, datatype, peer, comm_ptr, s);
716 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
717 mpi_errno = MPID_Sched_recv((incoming_data + recv_offset*true_extent),
718 size, datatype, peer, comm_ptr, s);
719 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
720 MPID_SCHED_BARRIER(s);
721
722 /* always perform the reduction at recv_offset, the data at send_offset
723 is now our peer's responsibility */
724 if (rank > peer) {
725 /* higher ranked value so need to call op(received_data, my_data) */
726 mpi_errno = MPID_Sched_reduce((incoming_data + recv_offset*true_extent),
727 (outgoing_data + recv_offset*true_extent),
728 size, datatype, op, s);
729 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
730 buf0_was_inout = buf0_was_inout;
731 }
732 else {
733 /* lower ranked value so need to call op(my_data, received_data) */
734 mpi_errno = MPID_Sched_reduce((outgoing_data + recv_offset*true_extent),
735 (incoming_data + recv_offset*true_extent),
736 size, datatype, op, s);
737 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
738 buf0_was_inout = !buf0_was_inout;
739 }
740 MPID_SCHED_BARRIER(s);
741
742 /* the next round of send/recv needs to happen within the block (of size
743 "size") that we just received and reduced */
744 send_offset = recv_offset;
745 }
746
747 MPIU_Assert(size == recvcount);
748
749 /* copy the reduced data to the recvbuf */
750 result_ptr = (char *)(buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent;
751 mpi_errno = MPID_Sched_copy(result_ptr, size, datatype,
752 recvbuf, size, datatype, s);
753 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
754
755 MPIR_SCHED_CHKPMEM_COMMIT(s);
756 fn_exit:
757 return mpi_errno;
758 fn_fail:
759 MPIR_SCHED_CHKPMEM_REAP(s);
760 goto fn_exit;
761 }
762
763 #undef FUNCNAME
764 #define FUNCNAME MPIR_Ireduce_scatter_block_intra
765 #undef FCNAME
766 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Ireduce_scatter_block_intra(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPID_Comm * comm_ptr,MPID_Sched_t s)767 int MPIR_Ireduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s)
768 {
769 int mpi_errno = MPI_SUCCESS;
770 int is_commutative;
771 int total_count, type_size, nbytes;
772 int comm_size;
773
774 is_commutative = MPIR_Op_is_commutative(op);
775
776 comm_size = comm_ptr->local_size;
777 total_count = recvcount * comm_size;
778 if (total_count == 0) {
779 goto fn_exit;
780 }
781 MPID_Datatype_get_size_macro(datatype, type_size);
782 nbytes = total_count * type_size;
783
784 /* select an appropriate algorithm based on commutivity and message size */
785 if (is_commutative && (nbytes < MPIR_PARAM_REDSCAT_COMMUTATIVE_LONG_MSG_SIZE)) {
786 mpi_errno = MPIR_Ireduce_scatter_block_rec_hlv(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, s);
787 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
788 }
789 else if (is_commutative && (nbytes >= MPIR_PARAM_REDSCAT_COMMUTATIVE_LONG_MSG_SIZE)) {
790 mpi_errno = MPIR_Ireduce_scatter_block_pairwise(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, s);
791 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
792 }
793 else /* (!is_commutative) */ {
794 if (MPIU_is_pof2(comm_size, NULL)) {
795 /* noncommutative, pof2 size */
796 mpi_errno = MPIR_Ireduce_scatter_block_noncomm(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, s);
797 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
798 }
799 else {
800 /* noncommutative and non-pof2, use recursive doubling. */
801 mpi_errno = MPIR_Ireduce_scatter_block_rec_dbl(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, s);
802 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
803 }
804 }
805
806 fn_exit:
807 return mpi_errno;
808 fn_fail:
809 goto fn_exit;
810 }
811
812
813 #undef FUNCNAME
814 #define FUNCNAME MPIR_Ireduce_scatter_block_inter
815 #undef FCNAME
816 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Ireduce_scatter_block_inter(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPID_Comm * comm_ptr,MPID_Sched_t s)817 int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s)
818 {
819 /* Intercommunicator Ireduce_scatter_block.
820 We first do an intercommunicator reduce to rank 0 on left group,
821 then an intercommunicator reduce to rank 0 on right group, followed
822 by local intracommunicator scattervs in each group.
823 */
824 int mpi_errno = MPI_SUCCESS;
825 int rank, root, local_size, total_count;
826 MPI_Aint true_extent, true_lb = 0, extent;
827 void *tmp_buf = NULL;
828 MPID_Comm *newcomm_ptr = NULL;
829 MPIR_SCHED_CHKPMEM_DECL(1);
830
831 rank = comm_ptr->rank;
832 local_size = comm_ptr->local_size;
833
834 total_count = recvcount * local_size;
835
836 if (rank == 0) {
837 /* In each group, rank 0 allocates a temp. buffer for the
838 reduce */
839 MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
840 MPID_Datatype_get_extent_macro(datatype, extent);
841
842 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, total_count*(MPIR_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
843
844 /* adjust for potential negative lower bound in datatype */
845 tmp_buf = (void *)((char*)tmp_buf - true_lb);
846 }
847
848 /* first do a reduce from right group to rank 0 in left group,
849 then from left group to rank 0 in right group*/
850 MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ireduce);
851 if (comm_ptr->is_low_group) {
852 /* reduce from right group to rank 0*/
853 root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
854 mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
855 datatype, op, root, comm_ptr, s);
856 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
857
858 /* sched barrier intentionally omitted here to allow both reductions to
859 * proceed in parallel */
860
861 /* reduce to rank 0 of right group */
862 root = 0;
863 mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
864 datatype, op, root, comm_ptr, s);
865 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
866 }
867 else {
868 /* reduce to rank 0 of right group */
869 root = 0;
870 mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
871 datatype, op, root, comm_ptr, s);
872 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
873
874 /* sched barrier intentionally omitted here to allow both reductions to
875 * proceed in parallel */
876
877 /* reduce from right group to rank 0*/
878 root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
879 mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
880 datatype, op, root, comm_ptr, s);
881 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
882 }
883 MPID_SCHED_BARRIER(s);
884
885 /* Get the local intracommunicator */
886 if (!comm_ptr->local_comm) {
887 mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr);
888 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
889 }
890
891 newcomm_ptr = comm_ptr->local_comm;
892
893 MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Iscatter);
894 mpi_errno = newcomm_ptr->coll_fns->Iscatter(tmp_buf, recvcount, datatype,
895 recvbuf, recvcount, datatype, 0,
896 newcomm_ptr, s);
897 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
898
899 MPIR_SCHED_CHKPMEM_COMMIT(s);
900 fn_exit:
901 return mpi_errno;
902 fn_fail:
903 MPIR_SCHED_CHKPMEM_REAP(s);
904 goto fn_exit;
905 }
906
907 #undef FUNCNAME
908 #define FUNCNAME MPIR_Ireduce_scatter_block_impl
909 #undef FCNAME
910 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Ireduce_scatter_block_impl(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPID_Comm * comm_ptr,MPI_Request * request)911 int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request)
912 {
913 int mpi_errno = MPI_SUCCESS;
914 int tag = -1;
915 MPID_Request *reqp = NULL;
916 MPID_Sched_t s = MPID_SCHED_NULL;
917
918 *request = MPI_REQUEST_NULL;
919
920 mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
921 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
922 mpi_errno = MPID_Sched_create(&s);
923 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
924
925 MPIU_Assert(comm_ptr->coll_fns != NULL);
926 MPIU_Assert(comm_ptr->coll_fns->Ireduce_scatter_block != NULL);
927 mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, s);
928 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
929
930 mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
931 if (reqp)
932 *request = reqp->handle;
933 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
934
935 fn_exit:
936 return mpi_errno;
937 fn_fail:
938 goto fn_exit;
939 }
940
941 #endif /* MPICH_MPI_FROM_PMPI */
942
943 #undef FUNCNAME
944 #define FUNCNAME MPIX_Ireduce_scatter_block
945 #undef FCNAME
946 #define FCNAME MPIU_QUOTE(FUNCNAME)
947 /*@
948 MPIX_Ireduce_scatter_block - XXX description here
949
950 Input Parameters:
951 + sendbuf - starting address of the send buffer (choice)
952 . recvcount - element count per block (non-negative integer)
953 . datatype - data type of elements of input buffer (handle)
954 . op - operation (handle)
955 - comm - communicator (handle)
956
957 Output Parameters:
958 + recvbuf - starting address of the receive buffer (choice)
959 - request - communication request (handle)
960
961 .N ThreadSafe
962
963 .N Fortran
964
965 .N Errors
966 @*/
MPIX_Ireduce_scatter_block(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request * request)967 int MPIX_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
968 {
969 int mpi_errno = MPI_SUCCESS;
970 MPID_Comm *comm_ptr = NULL;
971 MPID_MPI_STATE_DECL(MPID_STATE_MPIX_IREDUCE_SCATTER_BLOCK);
972
973 MPIU_THREAD_CS_ENTER(ALLFUNC,);
974 MPID_MPI_FUNC_ENTER(MPID_STATE_MPIX_IREDUCE_SCATTER_BLOCK);
975
976 /* Validate parameters, especially handles needing to be converted */
977 # ifdef HAVE_ERROR_CHECKING
978 {
979 MPID_BEGIN_ERROR_CHECKS
980 {
981 MPIR_ERRTEST_DATATYPE(datatype, "datatype", mpi_errno);
982 MPIR_ERRTEST_OP(op, mpi_errno);
983 MPIR_ERRTEST_COMM(comm, mpi_errno);
984
985 /* TODO more checks may be appropriate */
986 }
987 MPID_END_ERROR_CHECKS
988 }
989 # endif /* HAVE_ERROR_CHECKING */
990
991 /* Convert MPI object handles to object pointers */
992 MPID_Comm_get_ptr(comm, comm_ptr);
993
994 /* Validate parameters and objects (post conversion) */
995 # ifdef HAVE_ERROR_CHECKING
996 {
997 MPID_BEGIN_ERROR_CHECKS
998 {
999 MPID_Comm_valid_ptr(comm_ptr, mpi_errno);
1000 if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
1001 MPID_Datatype *datatype_ptr = NULL;
1002 MPID_Datatype_get_ptr(datatype, datatype_ptr);
1003 MPID_Datatype_valid_ptr(datatype_ptr, mpi_errno);
1004 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
1005 MPID_Datatype_committed_ptr(datatype_ptr, mpi_errno);
1006 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
1007 }
1008
1009 if (HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN) {
1010 MPID_Op *op_ptr = NULL;
1011 MPID_Op_get_ptr(op, op_ptr);
1012 MPID_Op_valid_ptr(op_ptr, mpi_errno);
1013 }
1014 else if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) {
1015 mpi_errno = ( * MPIR_OP_HDL_TO_DTYPE_FN(op) )(datatype);
1016 }
1017 if (mpi_errno != MPI_SUCCESS) goto fn_fail;
1018
1019 MPIR_ERRTEST_ARGNULL(request,"request", mpi_errno);
1020 /* TODO more checks may be appropriate (counts, in_place, buffer aliasing, etc) */
1021 }
1022 MPID_END_ERROR_CHECKS
1023 }
1024 # endif /* HAVE_ERROR_CHECKING */
1025
1026 /* ... body of routine ... */
1027
1028 mpi_errno = MPIR_Ireduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, request);
1029 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
1030
1031 /* ... end of body of routine ... */
1032
1033 fn_exit:
1034 MPID_MPI_FUNC_EXIT(MPID_STATE_MPIX_IREDUCE_SCATTER_BLOCK);
1035 MPIU_THREAD_CS_EXIT(ALLFUNC,);
1036 return mpi_errno;
1037
1038 fn_fail:
1039 /* --BEGIN ERROR HANDLING-- */
1040 # ifdef HAVE_ERROR_CHECKING
1041 {
1042 mpi_errno = MPIR_Err_create_code(
1043 mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
1044 "**mpix_ireduce_scatter_block", "**mpix_ireduce_scatter_block %p %p %d %D %O %C %p", sendbuf, recvbuf, recvcount, datatype, op, comm, request);
1045 }
1046 # endif
1047 mpi_errno = MPIR_Err_return_comm(comm_ptr, FCNAME, mpi_errno);
1048 goto fn_exit;
1049 /* --END ERROR HANDLING-- */
1050 goto fn_exit;
1051 }
1052