1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "mpiimpl.h"
7
8 /*
9 === BEGIN_MPI_T_CVAR_INFO_BLOCK ===
10
11 cvars:
12 - name : MPIR_CVAR_IREDUCE_SCATTER_BLOCK_RECEXCH_KVAL
13 category : COLLECTIVE
14 type : int
15 default : 2
16 class : none
17 verbosity : MPI_T_VERBOSITY_USER_BASIC
18 scope : MPI_T_SCOPE_ALL_EQ
19 description : >-
20 k value for recursive exchange based ireduce_scatter_block
21
22 - name : MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM
23 category : COLLECTIVE
24 type : enum
25 default : auto
26 class : none
27 verbosity : MPI_T_VERBOSITY_USER_BASIC
28 scope : MPI_T_SCOPE_ALL_EQ
29 description : |-
30 Variable to select ireduce_scatter_block algorithm
31 auto - Internal algorithm selection (can be overridden with MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE)
32 sched_auto - Internal algorithm selection for sched-based algorithms
33 sched_noncommutative - Force noncommutative algorithm
34 sched_recursive_doubling - Force recursive doubling algorithm
35 sched_pairwise - Force pairwise algorithm
36 sched_recursive_halving - Force recursive halving algorithm
37 gentran_recexch - Force generic transport recursive exchange algorithm
38
39 - name : MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM
40 category : COLLECTIVE
41 type : enum
42 default : auto
43 class : none
44 verbosity : MPI_T_VERBOSITY_USER_BASIC
45 scope : MPI_T_SCOPE_ALL_EQ
46 description : |-
47 Variable to select ireduce_scatter_block algorithm
48 auto - Internal algorithm selection (can be overridden with MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE)
49 sched_auto - Internal algorithm selection for sched-based algorithms
50 sched_remote_reduce_local_scatterv - Force remote-reduce-local-scatterv algorithm
51
52 - name : MPIR_CVAR_IREDUCE_SCATTER_BLOCK_DEVICE_COLLECTIVE
53 category : COLLECTIVE
54 type : boolean
55 default : true
56 class : none
57 verbosity : MPI_T_VERBOSITY_USER_BASIC
58 scope : MPI_T_SCOPE_ALL_EQ
59 description : >-
60 This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES
61 is set to "percoll". If set to true, MPI_Ireduce_scatter_block will
62 allow the device to override the MPIR-level collective
63 algorithms. The device might still call the MPIR-level
64 algorithms manually. If set to false, the device-override
65 will be disabled.
66
67 === END_MPI_T_CVAR_INFO_BLOCK ===
68 */
69
70 /* -- Begin Profiling Symbol Block for routine MPI_Ireduce_scatter_block */
71 #if defined(HAVE_PRAGMA_WEAK)
72 #pragma weak MPI_Ireduce_scatter_block = PMPI_Ireduce_scatter_block
73 #elif defined(HAVE_PRAGMA_HP_SEC_DEF)
74 #pragma _HP_SECONDARY_DEF PMPI_Ireduce_scatter_block MPI_Ireduce_scatter_block
75 #elif defined(HAVE_PRAGMA_CRI_DUP)
76 #pragma _CRI duplicate MPI_Ireduce_scatter_block as PMPI_Ireduce_scatter_block
77 #elif defined(HAVE_WEAK_ATTRIBUTE)
78 int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount,
79 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
80 MPI_Request * request)
81 __attribute__ ((weak, alias("PMPI_Ireduce_scatter_block")));
82 #endif
83 /* -- End Profiling Symbol Block */
84
85 /* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
86 the MPI routines */
87 #ifndef MPICH_MPI_FROM_PMPI
88 #undef MPI_Ireduce_scatter_block
89 #define MPI_Ireduce_scatter_block PMPI_Ireduce_scatter_block
90
91
MPIR_Ireduce_scatter_block_allcomm_auto(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Request ** request)92 int MPIR_Ireduce_scatter_block_allcomm_auto(const void *sendbuf, void *recvbuf, int recvcount,
93 MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm_ptr,
94 MPIR_Request ** request)
95 {
96 int mpi_errno = MPI_SUCCESS;
97
98 MPIR_Csel_coll_sig_s coll_sig = {
99 .coll_type = MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK,
100 .comm_ptr = comm_ptr,
101
102 .u.ireduce_scatter_block.sendbuf = sendbuf,
103 .u.ireduce_scatter_block.recvbuf = recvbuf,
104 .u.ireduce_scatter_block.recvcount = recvcount,
105 .u.ireduce_scatter_block.datatype = datatype,
106 .u.ireduce_scatter_block.op = op,
107 };
108
109 MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, coll_sig);
110 MPIR_Assert(cnt);
111
112 switch (cnt->id) {
113 case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_gentran_recexch:
114 mpi_errno =
115 MPIR_Ireduce_scatter_block_intra_gentran_recexch(sendbuf, recvbuf, recvcount,
116 datatype, op, comm_ptr,
117 cnt->u.
118 ireduce_scatter_block.intra_gentran_recexch.
119 k, request);
120 break;
121
122 case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_auto:
123 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_auto, comm_ptr, request,
124 sendbuf, recvbuf, recvcount, datatype, op);
125 break;
126
127 case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_noncommutative:
128 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_noncommutative, comm_ptr,
129 request, sendbuf, recvbuf, recvcount, datatype, op);
130 break;
131
132 case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_pairwise:
133 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_pairwise, comm_ptr, request,
134 sendbuf, recvbuf, recvcount, datatype, op);
135 break;
136
137 case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling:
138 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling, comm_ptr,
139 request, sendbuf, recvbuf, recvcount, datatype, op);
140 break;
141
142 case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_halving:
143 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_recursive_halving, comm_ptr,
144 request, sendbuf, recvbuf, recvcount, datatype, op);
145 break;
146
147 case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_auto:
148 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_inter_sched_auto, comm_ptr, request,
149 sendbuf, recvbuf, recvcount, datatype, op);
150 break;
151
152 case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv:
153 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv,
154 comm_ptr, request, sendbuf, recvbuf, recvcount, datatype, op);
155 break;
156
157 default:
158 MPIR_Assert(0);
159 }
160
161 fn_exit:
162 return mpi_errno;
163 fn_fail:
164 goto fn_exit;
165 }
166
MPIR_Ireduce_scatter_block_intra_sched_auto(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Sched_t s)167 int MPIR_Ireduce_scatter_block_intra_sched_auto(const void *sendbuf, void *recvbuf, int recvcount,
168 MPI_Datatype datatype, MPI_Op op,
169 MPIR_Comm * comm_ptr, MPIR_Sched_t s)
170 {
171 int mpi_errno = MPI_SUCCESS;
172 int is_commutative;
173 int total_count, type_size, nbytes;
174 int comm_size;
175
176 is_commutative = MPIR_Op_is_commutative(op);
177
178 comm_size = comm_ptr->local_size;
179 total_count = recvcount * comm_size;
180 if (total_count == 0) {
181 goto fn_exit;
182 }
183 MPIR_Datatype_get_size_macro(datatype, type_size);
184 nbytes = total_count * type_size;
185
186 /* select an appropriate algorithm based on commutivity and message size */
187 if (is_commutative && (nbytes < MPIR_CVAR_REDUCE_SCATTER_COMMUTATIVE_LONG_MSG_SIZE)) {
188 mpi_errno =
189 MPIR_Ireduce_scatter_block_intra_sched_recursive_halving(sendbuf, recvbuf, recvcount,
190 datatype, op, comm_ptr, s);
191 MPIR_ERR_CHECK(mpi_errno);
192 } else if (is_commutative && (nbytes >= MPIR_CVAR_REDUCE_SCATTER_COMMUTATIVE_LONG_MSG_SIZE)) {
193 mpi_errno =
194 MPIR_Ireduce_scatter_block_intra_sched_pairwise(sendbuf, recvbuf, recvcount, datatype,
195 op, comm_ptr, s);
196 MPIR_ERR_CHECK(mpi_errno);
197 } else { /* (!is_commutative) */
198
199 if (MPL_is_pof2(comm_size, NULL)) {
200 /* noncommutative, pof2 size */
201 mpi_errno =
202 MPIR_Ireduce_scatter_block_intra_sched_noncommutative(sendbuf, recvbuf, recvcount,
203 datatype, op, comm_ptr, s);
204 MPIR_ERR_CHECK(mpi_errno);
205 } else {
206 /* noncommutative and non-pof2, use recursive doubling. */
207 mpi_errno =
208 MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling(sendbuf, recvbuf,
209 recvcount, datatype, op,
210 comm_ptr, s);
211 MPIR_ERR_CHECK(mpi_errno);
212 }
213 }
214
215 fn_exit:
216 return mpi_errno;
217 fn_fail:
218 goto fn_exit;
219 }
220
221
MPIR_Ireduce_scatter_block_inter_sched_auto(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Sched_t s)222 int MPIR_Ireduce_scatter_block_inter_sched_auto(const void *sendbuf, void *recvbuf, int recvcount,
223 MPI_Datatype datatype, MPI_Op op,
224 MPIR_Comm * comm_ptr, MPIR_Sched_t s)
225 {
226 int mpi_errno = MPI_SUCCESS;
227
228 mpi_errno =
229 MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv(sendbuf, recvbuf,
230 recvcount, datatype, op,
231 comm_ptr, s);
232
233 return mpi_errno;
234 }
235
MPIR_Ireduce_scatter_block_sched_auto(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Sched_t s)236 int MPIR_Ireduce_scatter_block_sched_auto(const void *sendbuf, void *recvbuf, int recvcount,
237 MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm_ptr,
238 MPIR_Sched_t s)
239 {
240 int mpi_errno = MPI_SUCCESS;
241
242 if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
243 mpi_errno = MPIR_Ireduce_scatter_block_intra_sched_auto(sendbuf, recvbuf,
244 recvcount, datatype, op, comm_ptr,
245 s);
246 } else {
247 mpi_errno = MPIR_Ireduce_scatter_block_inter_sched_auto(sendbuf, recvbuf,
248 recvcount, datatype, op, comm_ptr,
249 s);
250 }
251
252 return mpi_errno;
253 }
254
MPIR_Ireduce_scatter_block_impl(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Request ** request)255 int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf,
256 int recvcount, MPI_Datatype datatype,
257 MPI_Op op, MPIR_Comm * comm_ptr, MPIR_Request ** request)
258 {
259 int mpi_errno = MPI_SUCCESS;
260 int is_commutative = MPIR_Op_is_commutative(op);
261
262 *request = NULL;
263
264 /* If the user picks one of the transport-enabled algorithms, branch there
265 * before going down to the MPIR_Sched-based algorithms. */
266 /* TODO - Eventually the intention is to replace all of the
267 * MPIR_Sched-based algorithms with transport-enabled algorithms, but that
268 * will require sufficient performance testing and replacement algorithms. */
269 if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
270 switch (MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM) {
271 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_gentran_recexch:
272 MPII_COLLECTIVE_FALLBACK_CHECK(comm_ptr->rank, is_commutative, mpi_errno,
273 "Ireduce_scatter_block gentran_recexch cannot be applied.\n");
274 mpi_errno =
275 MPIR_Ireduce_scatter_block_intra_gentran_recexch(sendbuf, recvbuf,
276 recvcount, datatype, op,
277 comm_ptr,
278 MPIR_CVAR_IREDUCE_SCATTER_BLOCK_RECEXCH_KVAL,
279 request);
280 break;
281
282 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_noncommutative:
283 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_noncommutative, comm_ptr,
284 request, sendbuf, recvbuf, recvcount, datatype, op);
285 break;
286
287 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_pairwise:
288 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_pairwise, comm_ptr,
289 request, sendbuf, recvbuf, recvcount, datatype, op);
290 break;
291
292 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_recursive_halving:
293 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_recursive_halving,
294 comm_ptr, request, sendbuf, recvbuf, recvcount, datatype, op);
295 break;
296
297 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_recursive_doubling:
298 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling,
299 comm_ptr, request, sendbuf, recvbuf, recvcount, datatype, op);
300 break;
301
302 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_auto:
303 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_auto, comm_ptr, request,
304 sendbuf, recvbuf, recvcount, datatype, op);
305 break;
306
307 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_auto:
308 mpi_errno =
309 MPIR_Ireduce_scatter_block_allcomm_auto(sendbuf, recvbuf, recvcount, datatype,
310 op, comm_ptr, request);
311 break;
312
313 default:
314 MPIR_Assert(0);
315 }
316 } else {
317 switch (MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM) {
318 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM_sched_remote_reduce_local_scatterv:
319 MPII_SCHED_WRAPPER
320 (MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv, comm_ptr,
321 request, sendbuf, recvbuf, recvcount, datatype, op);
322 break;
323
324 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM_sched_auto:
325 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_inter_sched_auto, comm_ptr, request,
326 sendbuf, recvbuf, recvcount, datatype, op);
327 break;
328
329 case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM_auto:
330 mpi_errno =
331 MPIR_Ireduce_scatter_block_allcomm_auto(sendbuf, recvbuf, recvcount, datatype,
332 op, comm_ptr, request);
333 break;
334
335 default:
336 MPIR_Assert(0);
337 }
338 }
339
340 MPIR_ERR_CHECK(mpi_errno);
341 goto fn_exit;
342
343 fallback:
344 if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
345 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_auto, comm_ptr, request,
346 sendbuf, recvbuf, recvcount, datatype, op);
347 } else {
348 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_inter_sched_auto, comm_ptr, request,
349 sendbuf, recvbuf, recvcount, datatype, op);
350 }
351
352 fn_exit:
353 return mpi_errno;
354 fn_fail:
355 goto fn_exit;
356 }
357
MPIR_Ireduce_scatter_block(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Request ** request)358 int MPIR_Ireduce_scatter_block(const void *sendbuf, void *recvbuf,
359 int recvcount, MPI_Datatype datatype,
360 MPI_Op op, MPIR_Comm * comm_ptr, MPIR_Request ** request)
361 {
362 int mpi_errno = MPI_SUCCESS;
363 void *in_recvbuf = recvbuf;
364 void *host_sendbuf;
365 void *host_recvbuf;
366
367 MPIR_Coll_host_buffer_alloc(sendbuf, recvbuf, MPIR_Comm_size(comm_ptr) * recvcount, datatype,
368 &host_sendbuf, &host_recvbuf);
369 if (host_sendbuf)
370 sendbuf = host_sendbuf;
371 if (host_recvbuf)
372 recvbuf = host_recvbuf;
373
374 if ((MPIR_CVAR_DEVICE_COLLECTIVES == MPIR_CVAR_DEVICE_COLLECTIVES_all) ||
375 ((MPIR_CVAR_DEVICE_COLLECTIVES == MPIR_CVAR_DEVICE_COLLECTIVES_percoll) &&
376 MPIR_CVAR_IREDUCE_SCATTER_BLOCK_DEVICE_COLLECTIVE)) {
377 mpi_errno =
378 MPID_Ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr,
379 request);
380 } else {
381 mpi_errno = MPIR_Ireduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op,
382 comm_ptr, request);
383 }
384
385 MPIR_Coll_host_buffer_swap_back(host_sendbuf, host_recvbuf, in_recvbuf, recvcount, datatype,
386 *request);
387
388 return mpi_errno;
389 }
390
391 #endif /* MPICH_MPI_FROM_PMPI */
392
393 /*@
394 MPI_Ireduce_scatter_block - Combines values and scatters the results in
395 a nonblocking way
396
397 Input Parameters:
398 + sendbuf - starting address of the send buffer (choice)
399 . recvcount - element count per block (non-negative integer)
400 . datatype - data type of elements of input buffer (handle)
401 . op - operation (handle)
402 - comm - communicator (handle)
403
404 Output Parameters:
405 + recvbuf - starting address of the receive buffer (choice)
406 - request - communication request (handle)
407
408 .N ThreadSafe
409
410 .N Fortran
411
412 .N Errors
413 @*/
MPI_Ireduce_scatter_block(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request * request)414 int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf,
415 int recvcount,
416 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
417 MPI_Request * request)
418 {
419 int mpi_errno = MPI_SUCCESS;
420 MPIR_Comm *comm_ptr = NULL;
421 MPIR_Request *request_ptr = NULL;
422 MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
423
424 MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
425 MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
426
427 /* Validate parameters, especially handles needing to be converted */
428 #ifdef HAVE_ERROR_CHECKING
429 {
430 MPID_BEGIN_ERROR_CHECKS;
431 {
432 MPIR_ERRTEST_DATATYPE(datatype, "datatype", mpi_errno);
433 MPIR_ERRTEST_OP(op, mpi_errno);
434 MPIR_ERRTEST_COMM(comm, mpi_errno);
435
436 /* TODO more checks may be appropriate */
437 }
438 MPID_END_ERROR_CHECKS;
439 }
440 #endif /* HAVE_ERROR_CHECKING */
441
442 /* Convert MPI object handles to object pointers */
443 MPIR_Comm_get_ptr(comm, comm_ptr);
444 MPIR_Assert(comm_ptr != NULL);
445
446 /* Validate parameters and objects (post conversion) */
447 #ifdef HAVE_ERROR_CHECKING
448 {
449 MPID_BEGIN_ERROR_CHECKS;
450 {
451 MPIR_Comm_valid_ptr(comm_ptr, mpi_errno, FALSE);
452 if (!HANDLE_IS_BUILTIN(datatype)) {
453 MPIR_Datatype *datatype_ptr = NULL;
454 MPIR_Datatype_get_ptr(datatype, datatype_ptr);
455 MPIR_Datatype_valid_ptr(datatype_ptr, mpi_errno);
456 if (mpi_errno != MPI_SUCCESS)
457 goto fn_fail;
458 MPIR_Datatype_committed_ptr(datatype_ptr, mpi_errno);
459 if (mpi_errno != MPI_SUCCESS)
460 goto fn_fail;
461 }
462
463 if (!HANDLE_IS_BUILTIN(op)) {
464 MPIR_Op *op_ptr = NULL;
465 MPIR_Op_get_ptr(op, op_ptr);
466 MPIR_Op_valid_ptr(op_ptr, mpi_errno);
467 } else {
468 mpi_errno = (*MPIR_OP_HDL_TO_DTYPE_FN(op)) (datatype);
469 }
470 if (mpi_errno != MPI_SUCCESS)
471 goto fn_fail;
472
473 MPIR_ERRTEST_ARGNULL(request, "request", mpi_errno);
474
475 if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM && sendbuf != MPI_IN_PLACE &&
476 recvcount != 0) {
477 MPIR_ERRTEST_ALIAS_COLL(sendbuf, recvbuf, mpi_errno)
478 }
479 /* TODO more checks may be appropriate (counts, in_place, etc) */
480 }
481 MPID_END_ERROR_CHECKS;
482 }
483 #endif /* HAVE_ERROR_CHECKING */
484
485 /* ... body of routine ... */
486
487 mpi_errno = MPIR_Ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr,
488 &request_ptr);
489 MPIR_ERR_CHECK(mpi_errno);
490
491 /* create a complete request, if needed */
492 if (!request_ptr)
493 request_ptr = MPIR_Request_create_complete(MPIR_REQUEST_KIND__COLL);
494 /* return the handle of the request to the user */
495 *request = request_ptr->handle;
496
497 /* ... end of body of routine ... */
498
499 fn_exit:
500 MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
501 MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
502 return mpi_errno;
503
504 fn_fail:
505 /* --BEGIN ERROR HANDLING-- */
506 #ifdef HAVE_ERROR_CHECKING
507 {
508 mpi_errno =
509 MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER,
510 "**mpi_ireduce_scatter_block",
511 "**mpi_ireduce_scatter_block %p %p %d %D %O %C %p", sendbuf,
512 recvbuf, recvcount, datatype, op, comm, request);
513 }
514 #endif
515 mpi_errno = MPIR_Err_return_comm(comm_ptr, __func__, mpi_errno);
516 goto fn_exit;
517 /* --END ERROR HANDLING-- */
518 }
519