1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "mpiimpl.h"
7
8 /* This function implements a binomial tree reduce.
9
10 Cost = lgp.alpha + n.lgp.beta + n.lgp.gamma
11 */
MPIR_Reduce_intra_binomial(const void * sendbuf,void * recvbuf,int count,MPI_Datatype datatype,MPI_Op op,int root,MPIR_Comm * comm_ptr,MPIR_Errflag_t * errflag)12 int MPIR_Reduce_intra_binomial(const void *sendbuf,
13 void *recvbuf,
14 int count,
15 MPI_Datatype datatype,
16 MPI_Op op, int root, MPIR_Comm * comm_ptr, MPIR_Errflag_t * errflag)
17 {
18 int mpi_errno = MPI_SUCCESS;
19 int mpi_errno_ret = MPI_SUCCESS;
20 MPI_Status status;
21 int comm_size, rank, is_commutative, type_size ATTRIBUTE((unused));
22 int mask, relrank, source, lroot;
23 MPI_Aint true_lb, true_extent, extent;
24 void *tmp_buf;
25 MPIR_CHKLMEM_DECL(2);
26
27 if (count == 0)
28 return MPI_SUCCESS;
29
30 comm_size = comm_ptr->local_size;
31 rank = comm_ptr->rank;
32
33 /* Create a temporary buffer */
34
35 MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
36 MPIR_Datatype_get_extent_macro(datatype, extent);
37
38 is_commutative = MPIR_Op_is_commutative(op);
39
40 MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count * (MPL_MAX(extent, true_extent)),
41 mpi_errno, "temporary buffer", MPL_MEM_BUFFER);
42 /* adjust for potential negative lower bound in datatype */
43 tmp_buf = (void *) ((char *) tmp_buf - true_lb);
44
45 /* If I'm not the root, then my recvbuf may not be valid, therefore
46 * I have to allocate a temporary one */
47 if (rank != root) {
48 MPIR_CHKLMEM_MALLOC(recvbuf, void *,
49 count * (MPL_MAX(extent, true_extent)),
50 mpi_errno, "receive buffer", MPL_MEM_BUFFER);
51 recvbuf = (void *) ((char *) recvbuf - true_lb);
52 }
53
54 if ((rank != root) || (sendbuf != MPI_IN_PLACE)) {
55 mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, recvbuf, count, datatype);
56 MPIR_ERR_CHECK(mpi_errno);
57 }
58
59 MPIR_Datatype_get_size_macro(datatype, type_size);
60
61 /* This code is from MPICH-1. */
62
63 /* Here's the algorithm. Relative to the root, look at the bit pattern in
64 * my rank. Starting from the right (lsb), if the bit is 1, send to
65 * the node with that bit zero and exit; if the bit is 0, receive from the
66 * node with that bit set and combine (as long as that node is within the
67 * group)
68 *
69 * Note that by receiving with source selection, we guarantee that we get
70 * the same bits with the same input. If we allowed the parent to receive
71 * the children in any order, then timing differences could cause different
72 * results (roundoff error, over/underflows in some cases, etc).
73 *
74 * Because of the way these are ordered, if root is 0, then this is correct
75 * for both commutative and non-commutitive operations. If root is not
76 * 0, then for non-commutitive, we use a root of zero and then send
77 * the result to the root. To see this, note that the ordering is
78 * mask = 1: (ab)(cd)(ef)(gh) (odds send to evens)
79 * mask = 2: ((ab)(cd))((ef)(gh)) (3,6 send to 0,4)
80 * mask = 4: (((ab)(cd))((ef)(gh))) (4 sends to 0)
81 *
82 * Comments on buffering.
83 * If the datatype is not contiguous, we still need to pass contiguous
84 * data to the user routine.
85 * In this case, we should make a copy of the data in some format,
86 * and send/operate on that.
87 *
88 * In general, we can't use MPI_PACK, because the alignment of that
89 * is rather vague, and the data may not be re-usable. What we actually
90 * need is a "squeeze" operation that removes the skips.
91 */
92 mask = 0x1;
93 if (is_commutative)
94 lroot = root;
95 else
96 lroot = 0;
97 relrank = (rank - lroot + comm_size) % comm_size;
98
99 while (/*(mask & relrank) == 0 && */ mask < comm_size) {
100 /* Receive */
101 if ((mask & relrank) == 0) {
102 source = (relrank | mask);
103 if (source < comm_size) {
104 source = (source + lroot) % comm_size;
105 mpi_errno = MPIC_Recv(tmp_buf, count, datatype, source,
106 MPIR_REDUCE_TAG, comm_ptr, &status, errflag);
107 if (mpi_errno) {
108 /* for communication errors, just record the error but continue */
109 *errflag =
110 MPIX_ERR_PROC_FAILED ==
111 MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
112 MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
113 MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
114 }
115
116 /* The sender is above us, so the received buffer must be
117 * the second argument (in the noncommutative case). */
118 if (is_commutative) {
119 mpi_errno = MPIR_Reduce_local(tmp_buf, recvbuf, count, datatype, op);
120 MPIR_ERR_CHECK(mpi_errno);
121 } else {
122 mpi_errno = MPIR_Reduce_local(recvbuf, tmp_buf, count, datatype, op);
123 MPIR_ERR_CHECK(mpi_errno);
124
125 mpi_errno = MPIR_Localcopy(tmp_buf, count, datatype, recvbuf, count, datatype);
126 MPIR_ERR_CHECK(mpi_errno);
127 }
128 }
129 } else {
130 /* I've received all that I'm going to. Send my result to
131 * my parent */
132 source = ((relrank & (~mask)) + lroot) % comm_size;
133 mpi_errno = MPIC_Send(recvbuf, count, datatype,
134 source, MPIR_REDUCE_TAG, comm_ptr, errflag);
135 if (mpi_errno) {
136 /* for communication errors, just record the error but continue */
137 *errflag =
138 MPIX_ERR_PROC_FAILED ==
139 MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
140 MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
141 MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
142 }
143 break;
144 }
145 mask <<= 1;
146 }
147
148 if (!is_commutative && (root != 0)) {
149 if (rank == 0) {
150 mpi_errno = MPIC_Send(recvbuf, count, datatype, root,
151 MPIR_REDUCE_TAG, comm_ptr, errflag);
152 } else if (rank == root) {
153 mpi_errno = MPIC_Recv(recvbuf, count, datatype, 0,
154 MPIR_REDUCE_TAG, comm_ptr, &status, errflag);
155 }
156 if (mpi_errno) {
157 /* for communication errors, just record the error but continue */
158 *errflag =
159 MPIX_ERR_PROC_FAILED ==
160 MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
161 MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
162 MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
163 }
164 }
165
166 fn_exit:
167 MPIR_CHKLMEM_FREEALL();
168 if (mpi_errno_ret)
169 mpi_errno = mpi_errno_ret;
170 else if (*errflag != MPIR_ERR_NONE)
171 MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail");
172 return mpi_errno;
173 fn_fail:
174 goto fn_exit;
175 }
176