1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "mpiimpl.h"
7 
MPIR_Ireduce_intra_sched_smp(const void * sendbuf,void * recvbuf,int count,MPI_Datatype datatype,MPI_Op op,int root,MPIR_Comm * comm_ptr,MPIR_Sched_t s)8 int MPIR_Ireduce_intra_sched_smp(const void *sendbuf, void *recvbuf, int count,
9                                  MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm * comm_ptr,
10                                  MPIR_Sched_t s)
11 {
12     int mpi_errno = MPI_SUCCESS;
13     int is_commutative;
14     MPI_Aint true_lb, true_extent, extent;
15     void *tmp_buf = NULL;
16     MPIR_Comm *nc;
17     MPIR_Comm *nrc;
18     MPIR_SCHED_CHKPMEM_DECL(1);
19 
20     MPIR_Assert(MPIR_Comm_is_parent_comm(comm_ptr));
21     MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
22 
23     nc = comm_ptr->node_comm;
24     nrc = comm_ptr->node_roots_comm;
25 
26     /* is the op commutative? We do SMP optimizations only if it is. */
27     is_commutative = MPIR_Op_is_commutative(op);
28     if (!is_commutative) {
29         mpi_errno =
30             MPIR_Ireduce_intra_sched_auto(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, s);
31         MPIR_ERR_CHECK(mpi_errno);
32         goto fn_exit;
33     }
34 
35     /* Create a temporary buffer on local roots of all nodes */
36     if (nrc != NULL) {
37         MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
38         MPIR_Datatype_get_extent_macro(datatype, extent);
39 
40         MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, count * (MPL_MAX(extent, true_extent)),
41                                   mpi_errno, "temporary buffer", MPL_MEM_BUFFER);
42         /* adjust for potential negative lower bound in datatype */
43         tmp_buf = (void *) ((char *) tmp_buf - true_lb);
44     }
45 
46     /* do the intranode reduce on all nodes other than the root's node */
47     if (nc != NULL && MPIR_Get_intranode_rank(comm_ptr, root) == -1) {
48         mpi_errno = MPIR_Ireduce_sched_auto(sendbuf, tmp_buf, count, datatype, op, 0, nc, s);
49         MPIR_ERR_CHECK(mpi_errno);
50         MPIR_SCHED_BARRIER(s);
51     }
52 
53     /* do the internode reduce to the root's node */
54     if (nrc != NULL) {
55         if (nrc->rank != MPIR_Get_internode_rank(comm_ptr, root)) {
56             /* I am not on root's node.  Use tmp_buf if we
57              * participated in the first reduce, otherwise use sendbuf */
58             const void *buf = (nc == NULL ? sendbuf : tmp_buf);
59             mpi_errno = MPIR_Ireduce_sched_auto(buf, NULL, count, datatype,
60                                                 op, MPIR_Get_internode_rank(comm_ptr, root), nrc,
61                                                 s);
62             MPIR_ERR_CHECK(mpi_errno);
63             MPIR_SCHED_BARRIER(s);
64         } else {        /* I am on root's node. I have not participated in the earlier reduce. */
65             if (comm_ptr->rank != root) {
66                 /* I am not the root though. I don't have a valid recvbuf.
67                  * Use tmp_buf as recvbuf. */
68 
69                 mpi_errno = MPIR_Ireduce_sched_auto(sendbuf, tmp_buf, count, datatype,
70                                                     op, MPIR_Get_internode_rank(comm_ptr, root),
71                                                     nrc, s);
72                 MPIR_ERR_CHECK(mpi_errno);
73                 MPIR_SCHED_BARRIER(s);
74 
75                 /* point sendbuf at tmp_buf to make final intranode reduce easy */
76                 sendbuf = tmp_buf;
77             } else {
78                 /* I am the root. in_place is automatically handled. */
79 
80                 mpi_errno = MPIR_Ireduce_sched_auto(sendbuf, recvbuf, count, datatype,
81                                                     op, MPIR_Get_internode_rank(comm_ptr, root),
82                                                     nrc, s);
83                 MPIR_ERR_CHECK(mpi_errno);
84                 MPIR_SCHED_BARRIER(s);
85 
86                 /* set sendbuf to MPI_IN_PLACE to make final intranode reduce easy. */
87                 sendbuf = MPI_IN_PLACE;
88             }
89         }
90     }
91 
92     /* do the intranode reduce on the root's node */
93     if (nc != NULL && MPIR_Get_intranode_rank(comm_ptr, root) != -1) {
94         mpi_errno = MPIR_Ireduce_sched_auto(sendbuf, recvbuf, count, datatype,
95                                             op, MPIR_Get_intranode_rank(comm_ptr, root), nc, s);
96         MPIR_ERR_CHECK(mpi_errno);
97         MPIR_SCHED_BARRIER(s);
98     }
99 
100 
101     MPIR_SCHED_CHKPMEM_COMMIT(s);
102   fn_exit:
103     return mpi_errno;
104   fn_fail:
105     MPIR_SCHED_CHKPMEM_REAP(s);
106     goto fn_exit;
107 }
108