1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "mpiimpl.h"
7 
8 /*
9 === BEGIN_MPI_T_CVAR_INFO_BLOCK ===
10 
11 cvars:
12     - name        : MPIR_CVAR_IREDUCE_SCATTER_BLOCK_RECEXCH_KVAL
13       category    : COLLECTIVE
14       type        : int
15       default     : 2
16       class       : none
17       verbosity   : MPI_T_VERBOSITY_USER_BASIC
18       scope       : MPI_T_SCOPE_ALL_EQ
19       description : >-
20         k value for recursive exchange based ireduce_scatter_block
21 
22     - name        : MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM
23       category    : COLLECTIVE
24       type        : enum
25       default     : auto
26       class       : none
27       verbosity   : MPI_T_VERBOSITY_USER_BASIC
28       scope       : MPI_T_SCOPE_ALL_EQ
29       description : |-
30         Variable to select ireduce_scatter_block algorithm
31         auto - Internal algorithm selection (can be overridden with MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE)
32         sched_auto - Internal algorithm selection for sched-based algorithms
33         sched_noncommutative     - Force noncommutative algorithm
34         sched_recursive_doubling - Force recursive doubling algorithm
35         sched_pairwise           - Force pairwise algorithm
36         sched_recursive_halving  - Force recursive halving algorithm
37         gentran_recexch          - Force generic transport recursive exchange algorithm
38 
39     - name        : MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM
40       category    : COLLECTIVE
41       type        : enum
42       default     : auto
43       class       : none
44       verbosity   : MPI_T_VERBOSITY_USER_BASIC
45       scope       : MPI_T_SCOPE_ALL_EQ
46       description : |-
47         Variable to select ireduce_scatter_block algorithm
48         auto - Internal algorithm selection (can be overridden with MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE)
49         sched_auto - Internal algorithm selection for sched-based algorithms
50         sched_remote_reduce_local_scatterv - Force remote-reduce-local-scatterv algorithm
51 
52     - name        : MPIR_CVAR_IREDUCE_SCATTER_BLOCK_DEVICE_COLLECTIVE
53       category    : COLLECTIVE
54       type        : boolean
55       default     : true
56       class       : none
57       verbosity   : MPI_T_VERBOSITY_USER_BASIC
58       scope       : MPI_T_SCOPE_ALL_EQ
59       description : >-
60         This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES
61         is set to "percoll".  If set to true, MPI_Ireduce_scatter_block will
62         allow the device to override the MPIR-level collective
63         algorithms.  The device might still call the MPIR-level
64         algorithms manually.  If set to false, the device-override
65         will be disabled.
66 
67 === END_MPI_T_CVAR_INFO_BLOCK ===
68 */
69 
70 /* -- Begin Profiling Symbol Block for routine MPI_Ireduce_scatter_block */
71 #if defined(HAVE_PRAGMA_WEAK)
72 #pragma weak MPI_Ireduce_scatter_block = PMPI_Ireduce_scatter_block
73 #elif defined(HAVE_PRAGMA_HP_SEC_DEF)
74 #pragma _HP_SECONDARY_DEF PMPI_Ireduce_scatter_block  MPI_Ireduce_scatter_block
75 #elif defined(HAVE_PRAGMA_CRI_DUP)
76 #pragma _CRI duplicate MPI_Ireduce_scatter_block as PMPI_Ireduce_scatter_block
77 #elif defined(HAVE_WEAK_ATTRIBUTE)
78 int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount,
79                               MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
80                               MPI_Request * request)
81     __attribute__ ((weak, alias("PMPI_Ireduce_scatter_block")));
82 #endif
83 /* -- End Profiling Symbol Block */
84 
85 /* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
86    the MPI routines */
87 #ifndef MPICH_MPI_FROM_PMPI
88 #undef MPI_Ireduce_scatter_block
89 #define MPI_Ireduce_scatter_block PMPI_Ireduce_scatter_block
90 
91 
MPIR_Ireduce_scatter_block_allcomm_auto(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Request ** request)92 int MPIR_Ireduce_scatter_block_allcomm_auto(const void *sendbuf, void *recvbuf, int recvcount,
93                                             MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm_ptr,
94                                             MPIR_Request ** request)
95 {
96     int mpi_errno = MPI_SUCCESS;
97 
98     MPIR_Csel_coll_sig_s coll_sig = {
99         .coll_type = MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK,
100         .comm_ptr = comm_ptr,
101 
102         .u.ireduce_scatter_block.sendbuf = sendbuf,
103         .u.ireduce_scatter_block.recvbuf = recvbuf,
104         .u.ireduce_scatter_block.recvcount = recvcount,
105         .u.ireduce_scatter_block.datatype = datatype,
106         .u.ireduce_scatter_block.op = op,
107     };
108 
109     MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, coll_sig);
110     MPIR_Assert(cnt);
111 
112     switch (cnt->id) {
113         case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_gentran_recexch:
114             mpi_errno =
115                 MPIR_Ireduce_scatter_block_intra_gentran_recexch(sendbuf, recvbuf, recvcount,
116                                                                  datatype, op, comm_ptr,
117                                                                  cnt->u.
118                                                                  ireduce_scatter_block.intra_gentran_recexch.
119                                                                  k, request);
120             break;
121 
122         case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_auto:
123             MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_auto, comm_ptr, request,
124                                sendbuf, recvbuf, recvcount, datatype, op);
125             break;
126 
127         case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_noncommutative:
128             MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_noncommutative, comm_ptr,
129                                request, sendbuf, recvbuf, recvcount, datatype, op);
130             break;
131 
132         case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_pairwise:
133             MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_pairwise, comm_ptr, request,
134                                sendbuf, recvbuf, recvcount, datatype, op);
135             break;
136 
137         case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling:
138             MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling, comm_ptr,
139                                request, sendbuf, recvbuf, recvcount, datatype, op);
140             break;
141 
142         case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_halving:
143             MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_recursive_halving, comm_ptr,
144                                request, sendbuf, recvbuf, recvcount, datatype, op);
145             break;
146 
147         case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_auto:
148             MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_inter_sched_auto, comm_ptr, request,
149                                sendbuf, recvbuf, recvcount, datatype, op);
150             break;
151 
152         case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv:
153             MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv,
154                                comm_ptr, request, sendbuf, recvbuf, recvcount, datatype, op);
155             break;
156 
157         default:
158             MPIR_Assert(0);
159     }
160 
161   fn_exit:
162     return mpi_errno;
163   fn_fail:
164     goto fn_exit;
165 }
166 
MPIR_Ireduce_scatter_block_intra_sched_auto(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Sched_t s)167 int MPIR_Ireduce_scatter_block_intra_sched_auto(const void *sendbuf, void *recvbuf, int recvcount,
168                                                 MPI_Datatype datatype, MPI_Op op,
169                                                 MPIR_Comm * comm_ptr, MPIR_Sched_t s)
170 {
171     int mpi_errno = MPI_SUCCESS;
172     int is_commutative;
173     int total_count, type_size, nbytes;
174     int comm_size;
175 
176     is_commutative = MPIR_Op_is_commutative(op);
177 
178     comm_size = comm_ptr->local_size;
179     total_count = recvcount * comm_size;
180     if (total_count == 0) {
181         goto fn_exit;
182     }
183     MPIR_Datatype_get_size_macro(datatype, type_size);
184     nbytes = total_count * type_size;
185 
186     /* select an appropriate algorithm based on commutivity and message size */
187     if (is_commutative && (nbytes < MPIR_CVAR_REDUCE_SCATTER_COMMUTATIVE_LONG_MSG_SIZE)) {
188         mpi_errno =
189             MPIR_Ireduce_scatter_block_intra_sched_recursive_halving(sendbuf, recvbuf, recvcount,
190                                                                      datatype, op, comm_ptr, s);
191         MPIR_ERR_CHECK(mpi_errno);
192     } else if (is_commutative && (nbytes >= MPIR_CVAR_REDUCE_SCATTER_COMMUTATIVE_LONG_MSG_SIZE)) {
193         mpi_errno =
194             MPIR_Ireduce_scatter_block_intra_sched_pairwise(sendbuf, recvbuf, recvcount, datatype,
195                                                             op, comm_ptr, s);
196         MPIR_ERR_CHECK(mpi_errno);
197     } else {    /* (!is_commutative) */
198 
199         if (MPL_is_pof2(comm_size, NULL)) {
200             /* noncommutative, pof2 size */
201             mpi_errno =
202                 MPIR_Ireduce_scatter_block_intra_sched_noncommutative(sendbuf, recvbuf, recvcount,
203                                                                       datatype, op, comm_ptr, s);
204             MPIR_ERR_CHECK(mpi_errno);
205         } else {
206             /* noncommutative and non-pof2, use recursive doubling. */
207             mpi_errno =
208                 MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling(sendbuf, recvbuf,
209                                                                           recvcount, datatype, op,
210                                                                           comm_ptr, s);
211             MPIR_ERR_CHECK(mpi_errno);
212         }
213     }
214 
215   fn_exit:
216     return mpi_errno;
217   fn_fail:
218     goto fn_exit;
219 }
220 
221 
MPIR_Ireduce_scatter_block_inter_sched_auto(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Sched_t s)222 int MPIR_Ireduce_scatter_block_inter_sched_auto(const void *sendbuf, void *recvbuf, int recvcount,
223                                                 MPI_Datatype datatype, MPI_Op op,
224                                                 MPIR_Comm * comm_ptr, MPIR_Sched_t s)
225 {
226     int mpi_errno = MPI_SUCCESS;
227 
228     mpi_errno =
229         MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv(sendbuf, recvbuf,
230                                                                             recvcount, datatype, op,
231                                                                             comm_ptr, s);
232 
233     return mpi_errno;
234 }
235 
MPIR_Ireduce_scatter_block_sched_auto(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Sched_t s)236 int MPIR_Ireduce_scatter_block_sched_auto(const void *sendbuf, void *recvbuf, int recvcount,
237                                           MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm_ptr,
238                                           MPIR_Sched_t s)
239 {
240     int mpi_errno = MPI_SUCCESS;
241 
242     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
243         mpi_errno = MPIR_Ireduce_scatter_block_intra_sched_auto(sendbuf, recvbuf,
244                                                                 recvcount, datatype, op, comm_ptr,
245                                                                 s);
246     } else {
247         mpi_errno = MPIR_Ireduce_scatter_block_inter_sched_auto(sendbuf, recvbuf,
248                                                                 recvcount, datatype, op, comm_ptr,
249                                                                 s);
250     }
251 
252     return mpi_errno;
253 }
254 
MPIR_Ireduce_scatter_block_impl(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Request ** request)255 int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf,
256                                     int recvcount, MPI_Datatype datatype,
257                                     MPI_Op op, MPIR_Comm * comm_ptr, MPIR_Request ** request)
258 {
259     int mpi_errno = MPI_SUCCESS;
260     int is_commutative = MPIR_Op_is_commutative(op);
261 
262     *request = NULL;
263 
264     /* If the user picks one of the transport-enabled algorithms, branch there
265      * before going down to the MPIR_Sched-based algorithms. */
266     /* TODO - Eventually the intention is to replace all of the
267      * MPIR_Sched-based algorithms with transport-enabled algorithms, but that
268      * will require sufficient performance testing and replacement algorithms. */
269     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
270         switch (MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM) {
271             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_gentran_recexch:
272                 MPII_COLLECTIVE_FALLBACK_CHECK(comm_ptr->rank, is_commutative, mpi_errno,
273                                                "Ireduce_scatter_block gentran_recexch cannot be applied.\n");
274                 mpi_errno =
275                     MPIR_Ireduce_scatter_block_intra_gentran_recexch(sendbuf, recvbuf,
276                                                                      recvcount, datatype, op,
277                                                                      comm_ptr,
278                                                                      MPIR_CVAR_IREDUCE_SCATTER_BLOCK_RECEXCH_KVAL,
279                                                                      request);
280                 break;
281 
282             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_noncommutative:
283                 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_noncommutative, comm_ptr,
284                                    request, sendbuf, recvbuf, recvcount, datatype, op);
285                 break;
286 
287             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_pairwise:
288                 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_pairwise, comm_ptr,
289                                    request, sendbuf, recvbuf, recvcount, datatype, op);
290                 break;
291 
292             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_recursive_halving:
293                 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_recursive_halving,
294                                    comm_ptr, request, sendbuf, recvbuf, recvcount, datatype, op);
295                 break;
296 
297             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_recursive_doubling:
298                 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling,
299                                    comm_ptr, request, sendbuf, recvbuf, recvcount, datatype, op);
300                 break;
301 
302             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_sched_auto:
303                 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_auto, comm_ptr, request,
304                                    sendbuf, recvbuf, recvcount, datatype, op);
305                 break;
306 
307             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTRA_ALGORITHM_auto:
308                 mpi_errno =
309                     MPIR_Ireduce_scatter_block_allcomm_auto(sendbuf, recvbuf, recvcount, datatype,
310                                                             op, comm_ptr, request);
311                 break;
312 
313             default:
314                 MPIR_Assert(0);
315         }
316     } else {
317         switch (MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM) {
318             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM_sched_remote_reduce_local_scatterv:
319                 MPII_SCHED_WRAPPER
320                     (MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv, comm_ptr,
321                      request, sendbuf, recvbuf, recvcount, datatype, op);
322                 break;
323 
324             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM_sched_auto:
325                 MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_inter_sched_auto, comm_ptr, request,
326                                    sendbuf, recvbuf, recvcount, datatype, op);
327                 break;
328 
329             case MPIR_CVAR_IREDUCE_SCATTER_BLOCK_INTER_ALGORITHM_auto:
330                 mpi_errno =
331                     MPIR_Ireduce_scatter_block_allcomm_auto(sendbuf, recvbuf, recvcount, datatype,
332                                                             op, comm_ptr, request);
333                 break;
334 
335             default:
336                 MPIR_Assert(0);
337         }
338     }
339 
340     MPIR_ERR_CHECK(mpi_errno);
341     goto fn_exit;
342 
343   fallback:
344     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
345         MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_intra_sched_auto, comm_ptr, request,
346                            sendbuf, recvbuf, recvcount, datatype, op);
347     } else {
348         MPII_SCHED_WRAPPER(MPIR_Ireduce_scatter_block_inter_sched_auto, comm_ptr, request,
349                            sendbuf, recvbuf, recvcount, datatype, op);
350     }
351 
352   fn_exit:
353     return mpi_errno;
354   fn_fail:
355     goto fn_exit;
356 }
357 
MPIR_Ireduce_scatter_block(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPIR_Comm * comm_ptr,MPIR_Request ** request)358 int MPIR_Ireduce_scatter_block(const void *sendbuf, void *recvbuf,
359                                int recvcount, MPI_Datatype datatype,
360                                MPI_Op op, MPIR_Comm * comm_ptr, MPIR_Request ** request)
361 {
362     int mpi_errno = MPI_SUCCESS;
363     void *in_recvbuf = recvbuf;
364     void *host_sendbuf;
365     void *host_recvbuf;
366 
367     MPIR_Coll_host_buffer_alloc(sendbuf, recvbuf, MPIR_Comm_size(comm_ptr) * recvcount, datatype,
368                                 &host_sendbuf, &host_recvbuf);
369     if (host_sendbuf)
370         sendbuf = host_sendbuf;
371     if (host_recvbuf)
372         recvbuf = host_recvbuf;
373 
374     if ((MPIR_CVAR_DEVICE_COLLECTIVES == MPIR_CVAR_DEVICE_COLLECTIVES_all) ||
375         ((MPIR_CVAR_DEVICE_COLLECTIVES == MPIR_CVAR_DEVICE_COLLECTIVES_percoll) &&
376          MPIR_CVAR_IREDUCE_SCATTER_BLOCK_DEVICE_COLLECTIVE)) {
377         mpi_errno =
378             MPID_Ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr,
379                                        request);
380     } else {
381         mpi_errno = MPIR_Ireduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op,
382                                                     comm_ptr, request);
383     }
384 
385     MPIR_Coll_host_buffer_swap_back(host_sendbuf, host_recvbuf, in_recvbuf, recvcount, datatype,
386                                     *request);
387 
388     return mpi_errno;
389 }
390 
391 #endif /* MPICH_MPI_FROM_PMPI */
392 
393 /*@
394 MPI_Ireduce_scatter_block - Combines values and scatters the results in
395                             a nonblocking way
396 
397 Input Parameters:
398 + sendbuf - starting address of the send buffer (choice)
399 . recvcount - element count per block (non-negative integer)
400 . datatype - data type of elements of input buffer (handle)
401 . op - operation (handle)
402 - comm - communicator (handle)
403 
404 Output Parameters:
405 + recvbuf - starting address of the receive buffer (choice)
406 - request - communication request (handle)
407 
408 .N ThreadSafe
409 
410 .N Fortran
411 
412 .N Errors
413 @*/
MPI_Ireduce_scatter_block(const void * sendbuf,void * recvbuf,int recvcount,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request * request)414 int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf,
415                               int recvcount,
416                               MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
417                               MPI_Request * request)
418 {
419     int mpi_errno = MPI_SUCCESS;
420     MPIR_Comm *comm_ptr = NULL;
421     MPIR_Request *request_ptr = NULL;
422     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
423 
424     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
425     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
426 
427     /* Validate parameters, especially handles needing to be converted */
428 #ifdef HAVE_ERROR_CHECKING
429     {
430         MPID_BEGIN_ERROR_CHECKS;
431         {
432             MPIR_ERRTEST_DATATYPE(datatype, "datatype", mpi_errno);
433             MPIR_ERRTEST_OP(op, mpi_errno);
434             MPIR_ERRTEST_COMM(comm, mpi_errno);
435 
436             /* TODO more checks may be appropriate */
437         }
438         MPID_END_ERROR_CHECKS;
439     }
440 #endif /* HAVE_ERROR_CHECKING */
441 
442     /* Convert MPI object handles to object pointers */
443     MPIR_Comm_get_ptr(comm, comm_ptr);
444     MPIR_Assert(comm_ptr != NULL);
445 
446     /* Validate parameters and objects (post conversion) */
447 #ifdef HAVE_ERROR_CHECKING
448     {
449         MPID_BEGIN_ERROR_CHECKS;
450         {
451             MPIR_Comm_valid_ptr(comm_ptr, mpi_errno, FALSE);
452             if (!HANDLE_IS_BUILTIN(datatype)) {
453                 MPIR_Datatype *datatype_ptr = NULL;
454                 MPIR_Datatype_get_ptr(datatype, datatype_ptr);
455                 MPIR_Datatype_valid_ptr(datatype_ptr, mpi_errno);
456                 if (mpi_errno != MPI_SUCCESS)
457                     goto fn_fail;
458                 MPIR_Datatype_committed_ptr(datatype_ptr, mpi_errno);
459                 if (mpi_errno != MPI_SUCCESS)
460                     goto fn_fail;
461             }
462 
463             if (!HANDLE_IS_BUILTIN(op)) {
464                 MPIR_Op *op_ptr = NULL;
465                 MPIR_Op_get_ptr(op, op_ptr);
466                 MPIR_Op_valid_ptr(op_ptr, mpi_errno);
467             } else {
468                 mpi_errno = (*MPIR_OP_HDL_TO_DTYPE_FN(op)) (datatype);
469             }
470             if (mpi_errno != MPI_SUCCESS)
471                 goto fn_fail;
472 
473             MPIR_ERRTEST_ARGNULL(request, "request", mpi_errno);
474 
475             if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM && sendbuf != MPI_IN_PLACE &&
476                 recvcount != 0) {
477                 MPIR_ERRTEST_ALIAS_COLL(sendbuf, recvbuf, mpi_errno)
478             }
479             /* TODO more checks may be appropriate (counts, in_place, etc) */
480         }
481         MPID_END_ERROR_CHECKS;
482     }
483 #endif /* HAVE_ERROR_CHECKING */
484 
485     /* ... body of routine ...  */
486 
487     mpi_errno = MPIR_Ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr,
488                                            &request_ptr);
489     MPIR_ERR_CHECK(mpi_errno);
490 
491     /* create a complete request, if needed */
492     if (!request_ptr)
493         request_ptr = MPIR_Request_create_complete(MPIR_REQUEST_KIND__COLL);
494     /* return the handle of the request to the user */
495     *request = request_ptr->handle;
496 
497     /* ... end of body of routine ... */
498 
499   fn_exit:
500     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
501     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
502     return mpi_errno;
503 
504   fn_fail:
505     /* --BEGIN ERROR HANDLING-- */
506 #ifdef HAVE_ERROR_CHECKING
507     {
508         mpi_errno =
509             MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER,
510                                  "**mpi_ireduce_scatter_block",
511                                  "**mpi_ireduce_scatter_block %p %p %d %D %O %C %p", sendbuf,
512                                  recvbuf, recvcount, datatype, op, comm, request);
513     }
514 #endif
515     mpi_errno = MPIR_Err_return_comm(comm_ptr, __func__, mpi_errno);
516     goto fn_exit;
517     /* --END ERROR HANDLING-- */
518 }
519