1 // -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 // vi: set et ts=4 sw=2 sts=2: 3 4 /** 5 * @file 6 * @brief Implements a MPIGuard which detects an error on a remote process 7 * @author Christian Engwer 8 * @ingroup ParallelCommunication 9 */ 10 11 #ifndef DUNE_COMMON_MPIGUARD_HH 12 #define DUNE_COMMON_MPIGUARD_HH 13 14 #include "mpihelper.hh" 15 #include "communication.hh" 16 #include "mpicommunication.hh" 17 #include <dune/common/exceptions.hh> 18 19 namespace Dune 20 { 21 22 #ifndef DOXYGEN 23 24 /* 25 Interface class for the communication needed by MPIGuard 26 */ 27 struct GuardCommunicator 28 { 29 // cleanup ~GuardCommunicatorDune::GuardCommunicator30 virtual ~GuardCommunicator() {}; 31 // all the communication methods we need 32 virtual int rank() = 0; 33 virtual int size() = 0; 34 virtual int sum(int i) = 0; 35 // create a new GuardCommunicator pointer 36 template <class C> 37 static GuardCommunicator * create(const CollectiveCommunication<C> & c); 38 #if HAVE_MPI 39 inline 40 static GuardCommunicator * create(const MPI_Comm & c); 41 #endif 42 }; 43 44 namespace { 45 /* 46 templated implementation of different communication classes 47 */ 48 // the default class will always fail, due to the missing implementation of "sum" 49 template <class Imp> 50 struct GenericGuardCommunicator 51 : public GuardCommunicator 52 {}; 53 // specialization for Communication 54 template <class T> 55 struct GenericGuardCommunicator< Communication<T> > 56 : public GuardCommunicator 57 { 58 const Communication<T> comm; GenericGuardCommunicatorDune::__anon01a332210111::GenericGuardCommunicator59 GenericGuardCommunicator(const Communication<T> & c) : 60 comm(c) {} rankDune::__anon01a332210111::GenericGuardCommunicator61 int rank() override { return comm.rank(); }; sizeDune::__anon01a332210111::GenericGuardCommunicator62 int size() override { return comm.size(); }; sumDune::__anon01a332210111::GenericGuardCommunicator63 int sum(int i) override { return comm.sum(i); } 64 }; 65 66 #if HAVE_MPI 67 // specialization for MPI_Comm 68 template <> 69 struct GenericGuardCommunicator<MPI_Comm> 70 : public GenericGuardCommunicator< Communication<MPI_Comm> > 71 { GenericGuardCommunicatorDune::__anon01a332210111::GenericGuardCommunicator72 GenericGuardCommunicator(const MPI_Comm & c) : 73 GenericGuardCommunicator< Communication<MPI_Comm> >( 74 Communication<MPI_Comm>(c)) {} 75 }; 76 #endif 77 } // anonymous namespace 78 79 template<class C> create(const CollectiveCommunication<C> & comm)80 GuardCommunicator * GuardCommunicator::create(const CollectiveCommunication<C> & comm) 81 { 82 return new GenericGuardCommunicator< CollectiveCommunication<C> >(comm); 83 } 84 85 #if HAVE_MPI create(const MPI_Comm & comm)86 GuardCommunicator * GuardCommunicator::create(const MPI_Comm & comm) 87 { 88 return new GenericGuardCommunicator< CollectiveCommunication<MPI_Comm> >(comm); 89 } 90 #endif 91 92 #endif 93 94 /*! @brief This exception is thrown if the MPIGuard detects an error on a remote process 95 @ingroup ParallelCommunication 96 */ 97 class MPIGuardError : public ParallelError {}; 98 99 /*! @brief detects a thrown exception and communicates to all other processes 100 @ingroup ParallelCommunication 101 102 @code 103 { 104 MPIGuard guard(...); 105 106 do_something(); 107 108 // tell the guard that you successfully passed a critical operation 109 guard.finalize(); 110 // reactivate the guard for the next critical operation 111 guard.reactivate(); 112 113 int result = do_something_else(); 114 115 // tell the guard the result of your operation 116 guard.finalize(result == success); 117 } 118 @endcode 119 120 You create a MPIGuard object. If an exception is risen on a 121 process the MPIGuard detects the exception, because the finalize 122 method was not called. When reaching the finalize call all 123 other processes are informed that an error occurred and the 124 MPIGuard throws an exception of type MPIGuardError. 125 126 @note You can initialize the MPIGuard from different types of communication objects: 127 - MPIHelper 128 - Communication 129 - MPI_Comm 130 */ 131 class MPIGuard 132 { 133 GuardCommunicator * comm_; 134 bool active_; 135 136 // we don't want to copy this class 137 MPIGuard (const MPIGuard &); 138 139 public: 140 /*! @brief create an MPIGuard operating on the Communicator of the global Dune::MPIHelper 141 142 @param active should the MPIGuard be active upon creation? 143 */ MPIGuard(bool active=true)144 MPIGuard (bool active=true) : 145 comm_(GuardCommunicator::create( 146 MPIHelper::getCommunication())), 147 active_(active) 148 {} 149 150 /*! @brief create an MPIGuard operating on the Communicator of a special Dune::MPIHelper m 151 152 @param m a reference to an MPIHelper 153 @param active should the MPIGuard be active upon creation? 154 */ MPIGuard(MPIHelper & m,bool active=true)155 MPIGuard (MPIHelper & m, bool active=true) : 156 comm_(GuardCommunicator::create( 157 m.getCommunication())), 158 active_(active) 159 {} 160 161 /*! @brief create an MPIGuard operating on an arbitrary communicator. 162 163 Supported types for the communication object are: 164 - MPIHelper 165 - Communication 166 - MPI_Comm 167 168 @param comm reference to a communication object 169 @param active should the MPIGuard be active upon creation? 170 */ 171 template <class C> MPIGuard(const C & comm,bool active=true)172 MPIGuard (const C & comm, bool active=true) : 173 comm_(GuardCommunicator::create(comm)), 174 active_(active) 175 {} 176 177 #if HAVE_MPI MPIGuard(const MPI_Comm & comm,bool active=true)178 MPIGuard (const MPI_Comm & comm, bool active=true) : 179 comm_(GuardCommunicator::create(comm)), 180 active_(active) 181 {} 182 #endif 183 184 /*! @brief destroy the guard and check for undetected exceptions 185 */ ~MPIGuard()186 ~MPIGuard() 187 { 188 if (active_) 189 { 190 active_ = false; 191 finalize(false); 192 } 193 delete comm_; 194 } 195 196 /*! @brief reactivate the guard. 197 198 If the guard is still active finalize(true) is called first. 199 */ reactivate()200 void reactivate() { 201 if (active_ == true) 202 finalize(); 203 active_ = true; 204 } 205 206 /*! @brief stop the guard. 207 208 If no success parameter is passed, the guard assumes that 209 everything worked as planned. All errors are communicated 210 and an exception of type MPIGuardError is thrown if an error 211 (or exception) occurred on any of the processors in the 212 communicator. 213 214 @param success inform the guard about possible errors 215 */ finalize(bool success=true)216 void finalize(bool success = true) 217 { 218 int result = success ? 0 : 1; 219 bool was_active = active_; 220 active_ = false; 221 result = comm_->sum(result); 222 if (result>0 && was_active) 223 { 224 DUNE_THROW(MPIGuardError, "Terminating process " 225 << comm_->rank() << " due to " 226 << result << " remote error(s)"); 227 } 228 } 229 }; 230 231 } 232 233 #endif // DUNE_COMMON_MPIGUARD_HH 234