1 // @HEADER
2 // ************************************************************************
3 //
4 //               Rapid Optimization Library (ROL) Package
5 //                 Copyright (2014) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact lead developers:
38 //              Drew Kouri   (dpkouri@sandia.gov) and
39 //              Denis Ridzal (dridzal@sandia.gov)
40 //
41 // ************************************************************************
42 // @HEADER
43 
44 
45 #ifndef ROL_PINTCOMMUNICATIONUTILITIES_HPP
46 #define ROL_PINTCOMMUNICATIONUTILITIES_HPP
47 
48 #include <vector>
49 #include <mpi.h>
50 
51 #include "Teuchos_GlobalMPISession.hpp"
52 
53 #include "ROL_TimeStamp.hpp"
54 #include "ROL_PinTCommunicators.hpp"
55 #include "ROL_PinTVectorCommunication.hpp"
56 #include "ROL_PinTVector.hpp"
57 
58 namespace ROL {
59 namespace PinT { // a utilities namespace for PinT
60 
61 //! Used for serialization of a vector of time stamps so it can be moved using MPI
62 template <typename RealT>
serializeTimeStamps(const std::vector<ROL::TimeStamp<RealT>> & stamps,std::vector<double> & serialized,int startIndex)63 void serializeTimeStamps(const std::vector<ROL::TimeStamp<RealT>> & stamps,std::vector<double> & serialized,int startIndex)
64 {
65   serialized.clear();
66 
67   // note that initial step ignores the false step
68   for(size_t i=startIndex;i<stamps.size();i++) {
69     const ROL::TimeStamp<RealT> & ts = stamps[i];
70 
71     // only thing that works now
72     assert(ts.t.size()==2);
73 
74     serialized.push_back(ts.t.at(0));
75     serialized.push_back(ts.t.at(1));
76   }
77 
78   // sanity check of post condition
79   assert(serialized.size()==(stamps.size()-startIndex)*2);
80 }
81 
82 //! Used for serialization of a vector of time stamps so it can be moved using MPI
83 template <typename RealT>
deserializeTimeStamps(const std::vector<double> & serialized,std::vector<ROL::TimeStamp<RealT>> & stamps,int startIndex)84 void deserializeTimeStamps(const std::vector<double> & serialized,std::vector<ROL::TimeStamp<RealT>> & stamps,int startIndex)
85 {
86   stamps.clear();
87 
88   // sanity check
89   assert(serialized.size() % 2 == 0);
90 
91   // allocate space
92   stamps.resize(serialized.size() / 2 - startIndex);
93 
94   // note that initial step ignores the false step
95   for(size_t i=startIndex,ind=2*startIndex;i<stamps.size();i++,ind+=2) {
96     ROL::TimeStamp<RealT> & ts = stamps[i];
97 
98     ts.t.resize(2);
99     ts.t[0] = serialized[ind];
100     ts.t[1] = serialized[ind+1];
101   }
102 
103   // sanity check of post condition
104   assert(serialized.size()==(stamps.size()-startIndex)*2);
105 }
106 
107 /**
108  * \brief This method distributes time stamps towards a coarse subset of processors.
109  *
110  * Currently the implementation sends all the time stamps (equally distributed in sequence) to
111  * half of the coarse processors.
112  *
113  * \param[in] fineDistrStamps The time stamps distributed on the fine level processors
114  * \param[out] coarseDistrStamps The time stamps distributed on the coarse level processors
115  * \param[in] communicators PinT communicators used on the fine level
116  * \param[in] startIndex Index to start the fine level distributed time stamps.
117  *
118  * \note I use the phrase export because its going from fine to coarse and is called from the
119  *       fine level. That is the communicators that are being used are the fine PinTCommunicators.
120  */
121 template <typename RealT>
exportToCoarseDistribution_TimeStamps(const std::vector<ROL::TimeStamp<RealT>> & fineDistrStamps,std::vector<ROL::TimeStamp<RealT>> & coarseDistrStamps,const ROL::PinTCommunicators & communicators,int startIndex)122 bool exportToCoarseDistribution_TimeStamps(const std::vector<ROL::TimeStamp<RealT>> & fineDistrStamps,
123                                            std::vector<ROL::TimeStamp<RealT>> & coarseDistrStamps,
124                                            const ROL::PinTCommunicators & communicators,
125                                            int startIndex)
126 {
127   MPI_Comm comm = communicators.getTimeCommunicator();
128   int myRank    = communicators.getTimeRank();
129   int procSize  = communicators.getTimeSize();
130 
131   std::vector<RealT> serialized;
132   serializeTimeStamps(fineDistrStamps,serialized,startIndex);
133 
134   // send to your lower processor
135   if(myRank % 2 ==0) {
136     MPI_Send(const_cast<RealT*>(&serialized[0]),int(serialized.size()),MPI_DOUBLE,myRank/2,0,comm);
137   }
138   else {
139     MPI_Send(const_cast<RealT*>(&serialized[0]),int(serialized.size()),MPI_DOUBLE,(myRank-1)/2,0,comm);
140   }
141 
142   if(myRank < procSize / 2) {
143     std::vector<double> newSerialized;
144 
145     int count = 0;
146     MPI_Status status;
147     std::vector<double> buffer(2*serialized.size());
148 
149     // recieve first block
150     MPI_Recv(&buffer[0],int(buffer.size()),MPI_DOUBLE,2*myRank,0,comm,&status);
151     MPI_Get_count(&status,MPI_DOUBLE,&count);
152     newSerialized.insert(newSerialized.end(),buffer.begin(),buffer.begin()+count);
153 
154     // recieve second block
155     MPI_Recv(&buffer[0],int(buffer.size()),MPI_DOUBLE,2*myRank+1,0,comm,&status);
156     MPI_Get_count(&status,MPI_DOUBLE,&count);
157     newSerialized.insert(newSerialized.end(),buffer.begin(),buffer.begin()+count);
158 
159     deserializeTimeStamps(newSerialized,coarseDistrStamps,0);
160 
161     return true;
162   }
163 
164   return false;
165 }
166 
167 // next two functions communicate from the fine processor mesh to the coarse processor mesh
168 
169 /**
170  * \brief Send a block of vectors to the corresponding processor in the coarse processor grid.
171  *
172  * Currently the implmentation will send two processors of vectors in the fine grid to one processor
173  * of vectors in the coarse grid
174  *
175  * \param[in] fineVectors Fine vectors to send to the coarse grid.
176  * \param[in] vectorComm Object that encapsulates vector communication
177  * \param[in] communicators Object that seperates spatial parallelism from temporal parallelism
178  */
179 template <typename RealT>
sendToCoarseDistribution_Vector(const std::vector<ROL::Ptr<ROL::Vector<RealT>>> & fineVectors,const ROL::PinTVectorCommunication<RealT> & vectorComm,const ROL::PinTCommunicators & communicators)180 void sendToCoarseDistribution_Vector(const std::vector<ROL::Ptr<ROL::Vector<RealT>>> & fineVectors,
181                                      const ROL::PinTVectorCommunication<RealT> & vectorComm,
182                                      const ROL::PinTCommunicators & communicators)
183 {
184   MPI_Comm comm = communicators.getTimeCommunicator();
185   int myRank    = communicators.getTimeRank();
186 
187   int targetRank = -1;
188   if(myRank % 2 == 0) {
189     targetRank = myRank/2;
190   } else {
191     targetRank = (myRank-1)/2;
192   }
193 
194   // send to your lower processor
195   for(size_t i=0;i<fineVectors.size();i++) {
196     auto & vec = *fineVectors[i];
197     // std::cout << "    sending " << myRank << "/" << communicators.getTimeSize() << " " << fineVectors[i] << std::endl;
198     vectorComm.send(comm,targetRank,vec,i);
199   }
200 }
201 
202 /**
203  * \brief Recieve a block of vectors from the corresponding processor in the fine processor grid.
204  *
205  * Currently the implmentation will recieve from two processors vectors in the fine grid to one processor
206  * of vectors in the coarse grid
207  *
208  * \param[in] coarseVectors Destination for any coarse vectors
209  * \param[in] vectorComm Object that encapsulates vector communication
210  * \param[in] communicators Object that seperates spatial parallelism from temporal parallelism
211  * \param[in] size_a Number of vectors to recieve from the first processor in the fine grid.
212  * \param[in] size_b Number of vectors to recieve from the second processor in the fine grid.
213  *
214  * \note This asserts the coarseVectors size is "size_a + size_b"
215  */
216 template <typename RealT>
recvFromFineDistribution_Vector(std::vector<ROL::Ptr<ROL::Vector<RealT>>> & coarseVectors,const ROL::PinTVectorCommunication<RealT> & vectorComm,const ROL::PinTCommunicators & communicators,int size_a,int size_b)217 void recvFromFineDistribution_Vector(std::vector<ROL::Ptr<ROL::Vector<RealT>>> & coarseVectors,
218                                      const ROL::PinTVectorCommunication<RealT> & vectorComm,
219                                      const ROL::PinTCommunicators & communicators,
220                                      int size_a,
221                                      int size_b)
222 {
223   MPI_Comm comm = communicators.getTimeCommunicator();
224   int myRank    = communicators.getTimeRank();
225   int procSize  = communicators.getTimeSize();
226 
227   assert(int(coarseVectors.size())==size_a+size_b); // this is the expected size
228 
229   // only if this is in the right set
230   if(myRank < procSize / 2) {
231     // recieve from previously sent processors
232     for(int i=0;i<std::max(size_a,size_b);i++) {
233       if(i<size_a)
234         vectorComm.recv(comm,2*myRank,*coarseVectors[i],i);
235       if(i<size_b)
236         vectorComm.recv(comm,2*myRank+1,*coarseVectors[size_a+i],i);
237     }
238   }
239   else {
240     assert(false);
241 
242     return;
243   }
244 }
245 
246 // next two functions communicate from the coarse processor mesh to the fine processor mesh
247 
248 /**
249  * \brief Send a block of vectors to the corresponding processor in the coarse processor grid.
250  *
251  * Currently the implmentation will send two processors of vectors in the fine grid to one processor
252  * of vectors in the coarse grid
253  *
254  * \param[in] coarseVectors_a Coarse vectors to send to the fine grid processors a.
255  * \param[in] coarseVectors_a Coarse vectors to send to the fine grid processors b.
256  * \param[in] vectorComm Object that encapsulates vector communication
257  * \param[in] communicators Object that seperates spatial parallelism from temporal parallelism
258  *
259  * \note This asserts the coarseVectors size is "size_a + size_b"
260  */
261 template <typename RealT>
sendToFineDistribution_Vector(const std::vector<ROL::Ptr<ROL::Vector<RealT>>> & coarseVectors_a,const std::vector<ROL::Ptr<ROL::Vector<RealT>>> & coarseVectors_b,const ROL::PinTVectorCommunication<RealT> & vectorComm,const ROL::PinTCommunicators & communicators)262 void sendToFineDistribution_Vector(const std::vector<ROL::Ptr<ROL::Vector<RealT>>> & coarseVectors_a,
263                                    const std::vector<ROL::Ptr<ROL::Vector<RealT>>> & coarseVectors_b,
264                                    const ROL::PinTVectorCommunication<RealT> & vectorComm,
265                                    const ROL::PinTCommunicators & communicators)
266 {
267   MPI_Comm comm = communicators.getTimeCommunicator();
268   int myRank    = communicators.getTimeRank();
269 
270   // send to your lower processor
271   for(size_t i=0;i<coarseVectors_a.size();i++) {
272     vectorComm.send(comm,2*myRank,*coarseVectors_a[i],i);
273   }
274 
275   for(size_t i=0;i<coarseVectors_b.size();i++) {
276     vectorComm.send(comm,2*myRank+1,*coarseVectors_b[i],i);
277   }
278 }
279 
280 /**
281  * \brief Recieve a block of vectors from the corresponding processor in the fine processor grid.
282  *
283  * Currently the implmentation will recieve from two processors vectors in the fine grid to one processor
284  * of vectors in the coarse grid
285  *
286  * \param[in] fineVectors Destination for any fine vectors
287  * \param[in] vectorComm Object that encapsulates vector communication
288  * \param[in] communicators Object that seperates spatial parallelism from temporal parallelism
289  */
290 template <typename RealT>
recvFromCoarseDistribution_Vector(std::vector<ROL::Ptr<ROL::Vector<RealT>>> & fineVectors,const ROL::PinTVectorCommunication<RealT> & vectorComm,const ROL::PinTCommunicators & communicators)291 void recvFromCoarseDistribution_Vector(std::vector<ROL::Ptr<ROL::Vector<RealT>>> & fineVectors,
292                                      const ROL::PinTVectorCommunication<RealT> & vectorComm,
293                                      const ROL::PinTCommunicators & communicators)
294 {
295   MPI_Comm comm = communicators.getTimeCommunicator();
296   int myRank    = communicators.getTimeRank();
297 
298   int targetRank = -1;
299   if(myRank % 2 == 0) {
300     targetRank = myRank/2;
301   } else {
302     targetRank = (myRank-1)/2;
303   }
304 
305   // send to your lower processor
306   for(int i=0;i<int(fineVectors.size());i++) {
307     vectorComm.recv(comm,targetRank,*fineVectors[i],i);
308   }
309 }
310 
311 } // namespace PinT
312 } // namespace ROL
313 
314 #endif
315