1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor@gmail.com>.
2 // Copyright (C) 2004 The Trustees of Indiana University
3 
4 // Use, modification and distribution is subject to the Boost Software
5 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 
8 //   Authors: Douglas Gregor
9 //            Andrew Lumsdaine
10 
11 // Message Passing Interface 1.1 -- Section 4.9.1. Reduce
12 #ifndef BOOST_MPI_REDUCE_HPP
13 #define BOOST_MPI_REDUCE_HPP
14 
15 #include <boost/mpi/exception.hpp>
16 #include <boost/mpi/datatype.hpp>
17 
18 // For (de-)serializing sends and receives
19 #include <boost/mpi/packed_oarchive.hpp>
20 #include <boost/mpi/packed_iarchive.hpp>
21 
22 // For packed_[io]archive sends and receives
23 #include <boost/mpi/detail/point_to_point.hpp>
24 
25 #include <boost/mpi/communicator.hpp>
26 #include <boost/mpi/environment.hpp>
27 #include <boost/mpi/detail/computation_tree.hpp>
28 #include <boost/mpi/operations.hpp>
29 #include <algorithm>
30 #include <exception>
31 #include <boost/assert.hpp>
32 #include <boost/scoped_array.hpp>
33 
34 namespace boost { namespace mpi {
35 
36 
37 /************************************************************************
38  * Implementation details                                               *
39  ************************************************************************/
40 namespace detail {
41   /**********************************************************************
42    * Simple reduction with MPI_Reduce                                   *
43    **********************************************************************/
44   // We are reducing at the root for a type that has an associated MPI
45   // datatype and operation, so we'll use MPI_Reduce directly.
46   template<typename T, typename Op>
47   void
reduce_impl(const communicator & comm,const T * in_values,int n,T * out_values,Op,int root,mpl::true_,mpl::true_)48   reduce_impl(const communicator& comm, const T* in_values, int n,
49               T* out_values, Op /*op*/, int root, mpl::true_ /*is_mpi_op*/,
50               mpl::true_/*is_mpi_datatype*/)
51   {
52     BOOST_MPI_CHECK_RESULT(MPI_Reduce,
53                            (const_cast<T*>(in_values), out_values, n,
54                             boost::mpi::get_mpi_datatype<T>(*in_values),
55                             (is_mpi_op<Op, T>::op()), root, comm));
56   }
57 
58   // We are reducing to the root for a type that has an associated MPI
59   // datatype and operation, so we'll use MPI_Reduce directly.
60   template<typename T, typename Op>
61   void
reduce_impl(const communicator & comm,const T * in_values,int n,Op,int root,mpl::true_,mpl::true_)62   reduce_impl(const communicator& comm, const T* in_values, int n, Op /*op*/,
63               int root, mpl::true_ /*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/)
64   {
65     BOOST_MPI_CHECK_RESULT(MPI_Reduce,
66                            (const_cast<T*>(in_values), 0, n,
67                             boost::mpi::get_mpi_datatype<T>(*in_values),
68                             (is_mpi_op<Op, T>::op()), root, comm));
69   }
70 
71   /**********************************************************************
72    * User-defined reduction with MPI_Reduce                             *
73    **********************************************************************/
74 
75   // We are reducing at the root for a type that has an associated MPI
76   // datatype but with a custom operation. We'll use MPI_Reduce
77   // directly, but we'll need to create an MPI_Op manually.
78   template<typename T, typename Op>
79   void
reduce_impl(const communicator & comm,const T * in_values,int n,T * out_values,Op op,int root,mpl::false_,mpl::true_)80   reduce_impl(const communicator& comm, const T* in_values, int n,
81               T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/,
82               mpl::true_/*is_mpi_datatype*/)
83   {
84     user_op<Op, T> mpi_op;
85     BOOST_MPI_CHECK_RESULT(MPI_Reduce,
86                            (const_cast<T*>(in_values), out_values, n,
87                             boost::mpi::get_mpi_datatype<T>(*in_values),
88                             mpi_op.get_mpi_op(), root, comm));
89   }
90 
91   // We are reducing to the root for a type that has an associated MPI
92   // datatype but with a custom operation. We'll use MPI_Reduce
93   // directly, but we'll need to create an MPI_Op manually.
94   template<typename T, typename Op>
95   void
reduce_impl(const communicator & comm,const T * in_values,int n,Op op,int root,mpl::false_,mpl::true_)96   reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
97               int root, mpl::false_/*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/)
98   {
99     user_op<Op, T> mpi_op;
100     BOOST_MPI_CHECK_RESULT(MPI_Reduce,
101                            (const_cast<T*>(in_values), 0, n,
102                             boost::mpi::get_mpi_datatype<T>(*in_values),
103                             mpi_op.get_mpi_op(), root, comm));
104   }
105 
106   /**********************************************************************
107    * User-defined, tree-based reduction for non-MPI data types          *
108    **********************************************************************/
109 
110   // Commutative reduction
111   template<typename T, typename Op>
112   void
tree_reduce_impl(const communicator & comm,const T * in_values,int n,T * out_values,Op op,int root,mpl::true_)113   tree_reduce_impl(const communicator& comm, const T* in_values, int n,
114                    T* out_values, Op op, int root,
115                    mpl::true_ /*is_commutative*/)
116   {
117     std::copy(in_values, in_values + n, out_values);
118 
119     int size = comm.size();
120     int rank = comm.rank();
121 
122     // The computation tree we will use.
123     detail::computation_tree tree(rank, size, root);
124 
125     int tag = environment::collectives_tag();
126 
127     MPI_Status status;
128     int children = 0;
129     for (int child = tree.child_begin();
130          children < tree.branching_factor() && child != root;
131          ++children, child = (child + 1) % size) {
132       // Receive archive
133       packed_iarchive ia(comm);
134       detail::packed_archive_recv(comm, child, tag, ia, status);
135 
136       T incoming;
137       for (int i = 0; i < n; ++i) {
138         ia >> incoming;
139         out_values[i] = op(out_values[i], incoming);
140       }
141     }
142 
143     // For non-roots, send the result to the parent.
144     if (tree.parent() != rank) {
145       packed_oarchive oa(comm);
146       for (int i = 0; i < n; ++i)
147         oa << out_values[i];
148       detail::packed_archive_send(comm, tree.parent(), tag, oa);
149     }
150   }
151 
152   // Commutative reduction from a non-root.
153   template<typename T, typename Op>
154   void
tree_reduce_impl(const communicator & comm,const T * in_values,int n,Op op,int root,mpl::true_)155   tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
156                    int root, mpl::true_ /*is_commutative*/)
157   {
158     scoped_array<T> results(new T[n]);
159     detail::tree_reduce_impl(comm, in_values, n, results.get(), op, root,
160                              mpl::true_());
161   }
162 
163   // Non-commutative reduction
164   template<typename T, typename Op>
165   void
tree_reduce_impl(const communicator & comm,const T * in_values,int n,T * out_values,Op op,int root,mpl::false_)166   tree_reduce_impl(const communicator& comm, const T* in_values, int n,
167                    T* out_values, Op op, int root,
168                    mpl::false_ /*is_commutative*/)
169   {
170     int tag = environment::collectives_tag();
171 
172     int left_child = root / 2;
173     int right_child = (root + comm.size()) / 2;
174 
175     MPI_Status status;
176     if (left_child != root) {
177       // Receive value from the left child and merge it with the value
178       // we had incoming.
179       packed_iarchive ia(comm);
180       detail::packed_archive_recv(comm, left_child, tag, ia, status);
181       T incoming;
182       for (int i = 0; i < n; ++i) {
183         ia >> incoming;
184         out_values[i] = op(incoming, in_values[i]);
185       }
186     } else {
187       // There was no left value, so copy our incoming value.
188       std::copy(in_values, in_values + n, out_values);
189     }
190 
191     if (right_child != root) {
192       // Receive value from the right child and merge it with the
193       // value we had incoming.
194       packed_iarchive ia(comm);
195       detail::packed_archive_recv(comm, right_child, tag, ia, status);
196       T incoming;
197       for (int i = 0; i < n; ++i) {
198         ia >> incoming;
199         out_values[i] = op(out_values[i], incoming);
200       }
201     }
202   }
203 
204   // Non-commutative reduction from a non-root.
205   template<typename T, typename Op>
206   void
tree_reduce_impl(const communicator & comm,const T * in_values,int n,Op op,int root,mpl::false_)207   tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
208                    int root, mpl::false_ /*is_commutative*/)
209   {
210     int size = comm.size();
211     int rank = comm.rank();
212 
213     int tag = environment::collectives_tag();
214 
215     // Determine our parents and children in the commutative binary
216     // computation tree.
217     int grandparent = root;
218     int parent = root;
219     int left_bound = 0;
220     int right_bound = size;
221     int left_child, right_child;
222     do {
223       left_child = (left_bound + parent) / 2;
224       right_child = (parent + right_bound) / 2;
225 
226       if (rank < parent) {
227         // Go left.
228         grandparent = parent;
229         right_bound = parent;
230         parent = left_child;
231       } else if (rank > parent) {
232         // Go right.
233         grandparent = parent;
234         left_bound = parent + 1;
235         parent = right_child;
236       } else {
237         // We've found the parent
238         break;
239       }
240     } while (true);
241 
242     // Our parent is the grandparent of our children. This is a slight
243     // abuse of notation, but it makes the send-to-parent below make
244     // more sense.
245     parent = grandparent;
246 
247     MPI_Status status;
248     scoped_array<T> out_values(new T[n]);
249     if (left_child != rank) {
250       // Receive value from the left child and merge it with the value
251       // we had incoming.
252       packed_iarchive ia(comm);
253       detail::packed_archive_recv(comm, left_child, tag, ia, status);
254       T incoming;
255       for (int i = 0; i < n; ++i) {
256         ia >> incoming;
257         out_values[i] = op(incoming, in_values[i]);
258       }
259     } else {
260       // There was no left value, so copy our incoming value.
261       std::copy(in_values, in_values + n, out_values.get());
262     }
263 
264     if (right_child != rank) {
265       // Receive value from the right child and merge it with the
266       // value we had incoming.
267       packed_iarchive ia(comm);
268       detail::packed_archive_recv(comm, right_child, tag, ia, status);
269       T incoming;
270       for (int i = 0; i < n; ++i) {
271         ia >> incoming;
272         out_values[i] = op(out_values[i], incoming);
273       }
274     }
275 
276     // Send the combined value to our parent.
277     packed_oarchive oa(comm);
278     for (int i = 0; i < n; ++i)
279       oa << out_values[i];
280     detail::packed_archive_send(comm, parent, tag, oa);
281   }
282 
283   // We are reducing at the root for a type that has no associated MPI
284   // datatype and operation, so we'll use a simple tree-based
285   // algorithm.
286   template<typename T, typename Op>
287   void
reduce_impl(const communicator & comm,const T * in_values,int n,T * out_values,Op op,int root,mpl::false_,mpl::false_)288   reduce_impl(const communicator& comm, const T* in_values, int n,
289               T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/,
290               mpl::false_ /*is_mpi_datatype*/)
291   {
292     detail::tree_reduce_impl(comm, in_values, n, out_values, op, root,
293                              is_commutative<Op, T>());
294   }
295 
296   // We are reducing to the root for a type that has no associated MPI
297   // datatype and operation, so we'll use a simple tree-based
298   // algorithm.
299   template<typename T, typename Op>
300   void
reduce_impl(const communicator & comm,const T * in_values,int n,Op op,int root,mpl::false_,mpl::false_)301   reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
302               int root, mpl::false_ /*is_mpi_op*/,
303               mpl::false_ /*is_mpi_datatype*/)
304   {
305     detail::tree_reduce_impl(comm, in_values, n, op, root,
306                              is_commutative<Op, T>());
307   }
308 } // end namespace detail
309 
310 template<typename T, typename Op>
311 void
reduce(const communicator & comm,const T * in_values,int n,T * out_values,Op op,int root)312 reduce(const communicator& comm, const T* in_values, int n, T* out_values,
313        Op op, int root)
314 {
315   if (comm.rank() == root)
316     detail::reduce_impl(comm, in_values, n, out_values, op, root,
317                         is_mpi_op<Op, T>(), is_mpi_datatype<T>());
318   else
319     detail::reduce_impl(comm, in_values, n, op, root,
320                         is_mpi_op<Op, T>(), is_mpi_datatype<T>());
321 }
322 
323 template<typename T, typename Op>
324 void
reduce(const communicator & comm,const T * in_values,int n,Op op,int root)325 reduce(const communicator& comm, const T* in_values, int n, Op op, int root)
326 {
327   BOOST_ASSERT(comm.rank() != root);
328 
329   detail::reduce_impl(comm, in_values, n, op, root,
330                       is_mpi_op<Op, T>(), is_mpi_datatype<T>());
331 }
332 
333 template<typename T, typename Op>
334 void
reduce(const communicator & comm,std::vector<T> const & in_values,Op op,int root)335 reduce(const communicator & comm, std::vector<T> const & in_values, Op op,
336        int root)
337 {
338   reduce(comm, detail::c_data(in_values), in_values.size(), op, root);
339 }
340 
341 template<typename T, typename Op>
342 void
reduce(const communicator & comm,std::vector<T> const & in_values,std::vector<T> & out_values,Op op,int root)343 reduce(const communicator & comm, std::vector<T> const & in_values,
344        std::vector<T> & out_values, Op op, int root)
345 {
346   if (root == comm.rank()) out_values.resize(in_values.size());
347   reduce(comm, detail::c_data(in_values), in_values.size(), detail::c_data(out_values), op,
348          root);
349 }
350 
351 
352 template<typename T, typename Op>
353 void
reduce(const communicator & comm,const T & in_value,T & out_value,Op op,int root)354 reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
355        int root)
356 {
357   if (comm.rank() == root)
358     detail::reduce_impl(comm, &in_value, 1, &out_value, op, root,
359                         is_mpi_op<Op, T>(), is_mpi_datatype<T>());
360   else
361     detail::reduce_impl(comm, &in_value, 1, op, root,
362                         is_mpi_op<Op, T>(), is_mpi_datatype<T>());
363 }
364 
365 template<typename T, typename Op>
reduce(const communicator & comm,const T & in_value,Op op,int root)366 void reduce(const communicator& comm, const T& in_value, Op op, int root)
367 {
368   BOOST_ASSERT(comm.rank() != root);
369 
370   detail::reduce_impl(comm, &in_value, 1, op, root,
371                       is_mpi_op<Op, T>(), is_mpi_datatype<T>());
372 }
373 
374 } } // end namespace boost::mpi
375 
376 #endif // BOOST_MPI_REDUCE_HPP
377