1 // -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 // vi: set et ts=4 sw=2 sts=2:
3 
4 /**
5  * @file
6  * @brief Implements a MPIGuard which detects an error on a remote process
7  * @author Christian Engwer
8  * @ingroup ParallelCommunication
9  */
10 
11 #ifndef DUNE_COMMON_MPIGUARD_HH
12 #define DUNE_COMMON_MPIGUARD_HH
13 
14 #include "mpihelper.hh"
15 #include "communication.hh"
16 #include "mpicommunication.hh"
17 #include <dune/common/exceptions.hh>
18 
19 namespace Dune
20 {
21 
22 #ifndef DOXYGEN
23 
24   /*
25      Interface class for the communication needed by MPIGuard
26    */
27   struct GuardCommunicator
28   {
29     // cleanup
~GuardCommunicatorDune::GuardCommunicator30     virtual ~GuardCommunicator() {};
31     // all the communication methods we need
32     virtual int rank() = 0;
33     virtual int size() = 0;
34     virtual int sum(int i) = 0;
35     // create a new GuardCommunicator pointer
36     template <class C>
37     static GuardCommunicator * create(const CollectiveCommunication<C> & c);
38 #if HAVE_MPI
39     inline
40     static GuardCommunicator * create(const MPI_Comm & c);
41 #endif
42   };
43 
44   namespace {
45     /*
46        templated implementation of different communication classes
47      */
48     // the default class will always fail, due to the missing implementation of "sum"
49     template <class Imp>
50     struct GenericGuardCommunicator
51       : public GuardCommunicator
52     {};
53     // specialization for Communication
54     template <class T>
55     struct GenericGuardCommunicator< Communication<T> >
56       : public GuardCommunicator
57     {
58       const Communication<T> comm;
GenericGuardCommunicatorDune::__anon01a332210111::GenericGuardCommunicator59       GenericGuardCommunicator(const Communication<T> & c) :
60         comm(c) {}
rankDune::__anon01a332210111::GenericGuardCommunicator61       int rank() override { return comm.rank(); };
sizeDune::__anon01a332210111::GenericGuardCommunicator62       int size() override { return comm.size(); };
sumDune::__anon01a332210111::GenericGuardCommunicator63       int sum(int i) override { return comm.sum(i); }
64     };
65 
66 #if HAVE_MPI
67     // specialization for MPI_Comm
68     template <>
69     struct GenericGuardCommunicator<MPI_Comm>
70       : public GenericGuardCommunicator< Communication<MPI_Comm> >
71     {
GenericGuardCommunicatorDune::__anon01a332210111::GenericGuardCommunicator72       GenericGuardCommunicator(const MPI_Comm & c) :
73         GenericGuardCommunicator< Communication<MPI_Comm> >(
74           Communication<MPI_Comm>(c)) {}
75     };
76 #endif
77   }   // anonymous namespace
78 
79   template<class C>
create(const CollectiveCommunication<C> & comm)80   GuardCommunicator * GuardCommunicator::create(const CollectiveCommunication<C> & comm)
81   {
82     return new GenericGuardCommunicator< CollectiveCommunication<C> >(comm);
83   }
84 
85 #if HAVE_MPI
create(const MPI_Comm & comm)86   GuardCommunicator * GuardCommunicator::create(const MPI_Comm & comm)
87   {
88     return new GenericGuardCommunicator< CollectiveCommunication<MPI_Comm> >(comm);
89   }
90 #endif
91 
92 #endif
93 
94   /*! @brief This exception is thrown if the MPIGuard detects an error on a remote process
95       @ingroup ParallelCommunication
96    */
97   class MPIGuardError : public ParallelError {};
98 
99   /*! @brief detects a thrown exception and communicates to all other processes
100       @ingroup ParallelCommunication
101 
102      @code
103      {
104        MPIGuard guard(...);
105 
106        do_something();
107 
108        // tell the guard that you successfully passed a critical operation
109        guard.finalize();
110        // reactivate the guard for the next critical operation
111        guard.reactivate();
112 
113        int result = do_something_else();
114 
115        // tell the guard the result of your operation
116        guard.finalize(result == success);
117      }
118      @endcode
119 
120      You create a MPIGuard object. If an exception is risen on a
121      process the MPIGuard detects the exception, because the finalize
122      method was not called.  When reaching the finalize call all
123      other processes are informed that an error occurred and the
124      MPIGuard throws an exception of type MPIGuardError.
125 
126      @note You can initialize the MPIGuard from different types of communication objects:
127      - MPIHelper
128      - Communication
129      - MPI_Comm
130    */
131   class MPIGuard
132   {
133     GuardCommunicator * comm_;
134     bool active_;
135 
136     // we don't want to copy this class
137     MPIGuard (const MPIGuard &);
138 
139   public:
140     /*! @brief create an MPIGuard operating on the Communicator of the global Dune::MPIHelper
141 
142        @param active should the MPIGuard be active upon creation?
143      */
MPIGuard(bool active=true)144     MPIGuard (bool active=true) :
145       comm_(GuardCommunicator::create(
146               MPIHelper::getCommunication())),
147       active_(active)
148     {}
149 
150     /*! @brief create an MPIGuard operating on the Communicator of a special Dune::MPIHelper m
151 
152        @param m a reference to an MPIHelper
153        @param active should the MPIGuard be active upon creation?
154      */
MPIGuard(MPIHelper & m,bool active=true)155     MPIGuard (MPIHelper & m, bool active=true) :
156       comm_(GuardCommunicator::create(
157               m.getCommunication())),
158       active_(active)
159     {}
160 
161     /*! @brief create an MPIGuard operating on an arbitrary communicator.
162 
163        Supported types for the communication object are:
164        - MPIHelper
165        - Communication
166        - MPI_Comm
167 
168        @param comm reference to a communication object
169        @param active should the MPIGuard be active upon creation?
170      */
171     template <class C>
MPIGuard(const C & comm,bool active=true)172     MPIGuard (const C & comm, bool active=true) :
173       comm_(GuardCommunicator::create(comm)),
174       active_(active)
175     {}
176 
177 #if HAVE_MPI
MPIGuard(const MPI_Comm & comm,bool active=true)178      MPIGuard (const MPI_Comm & comm, bool active=true) :
179       comm_(GuardCommunicator::create(comm)),
180       active_(active)
181     {}
182 #endif
183 
184     /*! @brief destroy the guard and check for undetected exceptions
185      */
~MPIGuard()186     ~MPIGuard()
187     {
188       if (active_)
189       {
190         active_ = false;
191         finalize(false);
192       }
193       delete comm_;
194     }
195 
196     /*! @brief reactivate the guard.
197 
198        If the guard is still active finalize(true) is called first.
199      */
reactivate()200     void reactivate() {
201       if (active_ == true)
202         finalize();
203       active_ = true;
204     }
205 
206     /*! @brief stop the guard.
207 
208        If no success parameter is passed, the guard assumes that
209        everything worked as planned.  All errors are communicated
210        and an exception of type MPIGuardError is thrown if an error
211        (or exception) occurred on any of the processors in the
212        communicator.
213 
214        @param success inform the guard about possible errors
215      */
finalize(bool success=true)216     void finalize(bool success = true)
217     {
218       int result = success ? 0 : 1;
219       bool was_active = active_;
220       active_ = false;
221       result = comm_->sum(result);
222       if (result>0 && was_active)
223       {
224         DUNE_THROW(MPIGuardError, "Terminating process "
225                    << comm_->rank() << " due to "
226                    << result << " remote error(s)");
227       }
228     }
229   };
230 
231 }
232 
233 #endif // DUNE_COMMON_MPIGUARD_HH
234