1 // @HEADER
2 // ***********************************************************************
3 //
4 //                    Teuchos: Common Tools Package
5 //                 Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef TEUCHOS_MPI_COMM_HPP
43 #define TEUCHOS_MPI_COMM_HPP
44 
45 /// \file Teuchos_DefaultMpiComm.hpp
46 /// \brief Implementation of Teuchos wrappers for MPI.
47 ///
48 /// \warning It only makes sense to include this file if MPI is enabled.
49 
50 #include <Teuchos_ConfigDefs.hpp>
51 
52 // If MPI is not enabled, disable the contents of this file.
53 #ifdef HAVE_TEUCHOS_MPI
54 
55 #include "Teuchos_Comm.hpp"
56 #include "Teuchos_CommUtilities.hpp"
57 #include "Teuchos_OrdinalTraits.hpp"
58 #include "Teuchos_OpaqueWrapper.hpp"
59 #include "Teuchos_MpiReductionOpSetter.hpp"
60 #include "Teuchos_SerializationTraitsHelpers.hpp"
61 #include "Teuchos_Workspace.hpp"
62 #include "Teuchos_TypeNameTraits.hpp"
63 #include "Teuchos_as.hpp"
64 #include "Teuchos_Assert.hpp"
65 #include <mpi.h>
66 #include <iterator>
67 
68 // This must be defined globally for the whole program!
69 //#define TEUCHOS_MPI_COMM_DUMP
70 
71 #ifdef TEUCHOS_MPI_COMM_DUMP
72 #  include "Teuchos_VerboseObject.hpp"
73 #endif
74 
75 namespace Teuchos {
76 
77 //! Human-readable string version of the given MPI error code.
78 std::string
79 mpiErrorCodeToString (const int err);
80 
81 namespace details {
82   /// \brief Give \c comm to MPI_Comm_free, if MPI is not yet finalized.
83   ///
84   /// This function "frees" the given communicator by giving it to
85   /// MPI_Comm_free.  It only does so if MPI_Finalize has not yet been
86   /// called.  If MPI_Finalize has been called, this function does
87   /// nothing, since it is not legal to call most MPI functions after
88   /// MPI_Finalize has been called.  This function also ignores any
89   /// errors returned by MPI_Finalize, which makes it suitable for use
90   /// in a destructor.
91   ///
92   /// \note This function may allow a memory leak in your program, if
93   ///   you have allowed the MPI_Comm to persist after MPI_Finalize
94   ///   has been called.
95   void safeCommFree (MPI_Comm* comm);
96 
97   /// Set the given communicator's error handler to \c handler.
98   ///
99   /// If the MPI version is >= 2, this calls MPI_Comm_set_handler().
100   /// If the MPI version is 1, this calls MPI_Errhandler_set().
101   int setCommErrhandler (MPI_Comm comm, MPI_Errhandler handler);
102 
103 } // namespace details
104 
105 #ifdef TEUCHOS_MPI_COMM_DUMP
106 template<typename Ordinal, typename T>
dumpBuffer(const std::string & funcName,const std::string & buffName,const Ordinal bytes,const T buff[])107 void dumpBuffer(
108   const std::string &funcName, const std::string &buffName
109   ,const Ordinal bytes, const T buff[]
110   )
111 {
112   Teuchos::RCP<Teuchos::FancyOStream>
113     out = Teuchos::VerboseObjectBase::getDefaultOStream();
114   Teuchos::OSTab tab(out);
115   *out
116     << "\n" << funcName << "::" << buffName << ":\n";
117   tab.incrTab();
118   for( Ordinal i = 0; i < bytes; ++i ) {
119     *out << buffName << "[" << i << "] = '" << buff[i] << "'\n";
120   }
121   *out << "\n";
122 }
123 #endif // TEUCHOS_MPI_COMM_DUMP
124 
125 /// \class MpiCommStatus
126 /// \brief MPI-specific implementation of CommStatus.
127 ///
128 /// Users would not normally create an instance of this class.  The
129 /// only time they might wish to do so is to encapsulate an MPI_Status
130 /// returned by an external library or by their own code, and pass it
131 /// into one of our functions like wait() or waitAll().
132 ///
133 /// \tparam OrdinalType The same template parameter as \c Comm.  Only
134 ///   use \c int here.  We only make this a template class for
135 ///   compatibility with \c Comm.
136 template<class OrdinalType>
137 class MpiCommStatus : public CommStatus<OrdinalType> {
138 public:
MpiCommStatus(MPI_Status status)139   MpiCommStatus (MPI_Status status) : status_ (status) {}
140 
141   //! Destructor (declared virtual for memory safety)
~MpiCommStatus()142   virtual ~MpiCommStatus() {}
143 
144   //! The source rank that sent the message.
getSourceRank()145   OrdinalType getSourceRank () { return status_.MPI_SOURCE; }
146 
147   //! The tag of the received message.
getTag()148   OrdinalType getTag () { return status_.MPI_TAG; }
149 
150   //! The error code of the received message.
getError()151   OrdinalType getError () { return status_.MPI_ERROR; }
152 
153 private:
154   //! We forbid default construction syntactically.
155   MpiCommStatus ();
156 
157   //! The raw MPI_Status struct that this class encapsulates.
158   MPI_Status status_;
159 };
160 
161 /// \fn mpiCommStatus
162 /// \brief Nonmember constructor for MpiCommStatus.
163 /// \relates MpiCommStatus
164 template<class OrdinalType>
165 inline RCP<MpiCommStatus<OrdinalType> >
mpiCommStatus(MPI_Status rawMpiStatus)166 mpiCommStatus (MPI_Status rawMpiStatus)
167 {
168   return rcp (new MpiCommStatus<OrdinalType> (rawMpiStatus));
169 }
170 
171 /// \class MpiCommRequestBase
172 /// \brief Base class MPI implementation of CommRequest.
173 /// \tparam OrdinalType Same as the template parameter of Comm.
174 ///
175 /// This class wraps MPI_Request, which is MPI's reification of a
176 /// nonblocking communication operation.
177 ///
178 /// Users would not normally create an instance of this class.  Calls
179 /// to nonblocking communication operations (such as ireceive() or
180 /// isend()) return a pointer to a CommRequest.  If the Comm is an
181 /// MpiComm, then the returned CommRequest is an MpiCommRequest.
182 ///
183 /// Users might wish to create an MpiCommRequest directly if they want
184 /// to encapsulate an MPI_Request returned by an external library or
185 /// by their own code.
186 template<class OrdinalType>
187 class MpiCommRequestBase : public CommRequest<OrdinalType> {
188 public:
189   //! Default constructor.
MpiCommRequestBase()190   MpiCommRequestBase () :
191     rawMpiRequest_ (MPI_REQUEST_NULL)
192   {}
193 
194   //! Constructor (from a raw MPI_Request).
MpiCommRequestBase(MPI_Request rawMpiRequest)195   MpiCommRequestBase (MPI_Request rawMpiRequest) :
196     rawMpiRequest_ (rawMpiRequest)
197   {}
198 
199   /// \brief Return and relinquish ownership of the raw MPI_Request.
200   ///
201   /// "Relinquish ownership" means that this object sets its raw
202   /// MPI_Request to <tt>MPI_REQUEST_NULL</tt>, but returns the
203   /// original MPI_Request.  This effectively gives the caller
204   /// ownership of the raw MPI_Request.  This prevents hanging
205   /// requests.
releaseRawMpiRequest()206   MPI_Request releaseRawMpiRequest()
207   {
208     MPI_Request tmp_rawMpiRequest = rawMpiRequest_;
209     rawMpiRequest_ = MPI_REQUEST_NULL;
210     return tmp_rawMpiRequest;
211   }
212 
213   //! Whether the raw MPI_Request is <tt>MPI_REQUEST_NULL</tt>.
isNull() const214   bool isNull() const {
215     return rawMpiRequest_ == MPI_REQUEST_NULL;
216   }
217 
218   /// \brief Wait on this communication request to complete.
219   ///
220   /// This is a blocking operation.  The user is responsible for
221   /// avoiding deadlock.  (For example, a receive must have a matching
222   /// send, otherwise a wait on the receive will wait forever.)
wait()223   RCP<CommStatus<OrdinalType> > wait () {
224     MPI_Status rawMpiStatus;
225     // Whether this function satisfies the strong exception guarantee
226     // depends on whether MPI_Wait modifies its input request on error.
227     const int err = MPI_Wait (&rawMpiRequest_, &rawMpiStatus);
228     TEUCHOS_TEST_FOR_EXCEPTION(
229       err != MPI_SUCCESS, std::runtime_error,
230       "Teuchos: MPI_Wait() failed with error \""
231       << mpiErrorCodeToString (err));
232     // MPI_Wait sets the MPI_Request to MPI_REQUEST_NULL on success.
233     return mpiCommStatus<OrdinalType> (rawMpiStatus);
234   }
235 
236   /// \brief Cancel the communication request, and return its status.
237   ///
238   /// If this request is invalid or has already been invalidated, this
239   /// method returns null.
cancel()240   RCP<CommStatus<OrdinalType> > cancel () {
241     if (rawMpiRequest_ == MPI_REQUEST_NULL) {
242       return null;
243     }
244     else {
245       int err = MPI_Cancel (&rawMpiRequest_);
246       TEUCHOS_TEST_FOR_EXCEPTION(
247         err != MPI_SUCCESS, std::runtime_error,
248         "Teuchos: MPI_Cancel failed with the following error: "
249         << mpiErrorCodeToString (err));
250 
251       // Wait on the request.  If successful, MPI_Wait will set the
252       // MPI_Request to MPI_REQUEST_NULL.  The returned status may
253       // still be useful; for example, one may call MPI_Test_cancelled
254       // to test an MPI_Status from a nonblocking send.
255       MPI_Status status;
256       err = MPI_Wait (&rawMpiRequest_, &status);
257       TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
258         "Teuchos::MpiCommStatus::cancel: MPI_Wait failed with the following "
259         "error: " << mpiErrorCodeToString (err));
260       return mpiCommStatus<OrdinalType> (status);
261     }
262   }
263 
264   //! Destructor; cancels the request if it is still pending.
~MpiCommRequestBase()265   virtual ~MpiCommRequestBase () {
266     if (rawMpiRequest_ != MPI_REQUEST_NULL) {
267       // We're in a destructor, so don't throw errors.  However, if
268       // MPI_Cancel fails, it's probably a bad idea to call MPI_Wait.
269       const int err = MPI_Cancel (&rawMpiRequest_);
270       if (err == MPI_SUCCESS) {
271         // The MPI_Cancel succeeded.  Now wait on the request.  Ignore
272         // any reported error, since we can't do anything about those
273         // in the destructor (other than kill the program).  If
274         // successful, MPI_Wait will set the MPI_Request to
275         // MPI_REQUEST_NULL.  We ignore the returned MPI_Status, since
276         // if the user let the request fall out of scope, she must not
277         // care about the status.
278         //
279         // mfh 21 Oct 2012: The MPI standard requires completing a
280         // canceled request by calling a function like MPI_Wait,
281         // MPI_Test, or MPI_Request_free.  MPI_Wait on a canceled
282         // request behaves like a local operation (it does not
283         // communicate or block waiting for communication).  One could
284         // also call MPI_Request_free instead of MPI_Wait, but
285         // MPI_Request_free is intended more for persistent requests
286         // (created with functions like MPI_Recv_init).
287         (void) MPI_Wait (&rawMpiRequest_, MPI_STATUS_IGNORE);
288       }
289     }
290   }
291 
292 private:
293   //! The raw MPI request (an opaque object).
294   MPI_Request rawMpiRequest_;
295 };
296 
297 /// \class MpiCommRequest
298 /// \brief MPI implementation of CommRequest.
299 /// \tparam OrdinalType Same as the template parameter of Comm.
300 ///
301 /// This class wraps MPI_Request, which is MPI's reification of a
302 /// nonblocking communication operation.
303 ///
304 /// Users would not normally create an instance of this class.  Calls
305 /// to nonblocking communication operations (such as \c ireceive() or
306 /// \c isend()) return a pointer to a CommRequest.  If the Comm is an
307 /// MpiComm, then the returned CommRequest is an MpiCommRequest.
308 ///
309 /// Users might wish to create an MpiCommRequest directly if they want
310 /// to encapsulate an MPI_Request returned by an external library or
311 /// by their own code.
312 template<class OrdinalType>
313 class MpiCommRequest : public MpiCommRequestBase<OrdinalType> {
314 public:
315   //! Default constructor.
MpiCommRequest()316   MpiCommRequest () :
317     MpiCommRequestBase<OrdinalType> (MPI_REQUEST_NULL),
318     numBytes_ (0)
319   {}
320 
321   //! Constructor (from a raw MPI_Request).
MpiCommRequest(MPI_Request rawMpiRequest,const ArrayView<char>::size_type numBytesInMessage)322   MpiCommRequest (MPI_Request rawMpiRequest,
323                   const ArrayView<char>::size_type numBytesInMessage) :
324     MpiCommRequestBase<OrdinalType> (rawMpiRequest),
325     numBytes_ (numBytesInMessage)
326   {}
327 
328   /// \brief Number of bytes in the nonblocking send or receive request.
329   ///
330   /// Remembering this is inexpensive, and is also useful for
331   /// debugging (e.g., for detecting whether the send and receive have
332   /// matching message lengths).
numBytes() const333   ArrayView<char>::size_type numBytes () const {
334     return numBytes_;
335   }
336 
337   //! Destructor; cancels the request if it is still pending.
~MpiCommRequest()338   virtual ~MpiCommRequest () {}
339 
340 private:
341   //! Number of bytes in the nonblocking send or receive request.
342   ArrayView<char>::size_type numBytes_;
343 };
344 
345 /// \fn mpiCommRequest
346 /// \brief Nonmember constructor for MpiCommRequest.
347 /// \tparam OrdinalType Same as the template parameter of MpiCommRequest.
348 /// \relates MpiCommRequest
349 ///
350 /// \param rawMpiRequest [in] The raw MPI_Request opaque object.
351 /// \param numBytes [in] The number of bytes in the nonblocking
352 ///   send or receive request.
353 template<class OrdinalType>
354 inline RCP<MpiCommRequest<OrdinalType> >
mpiCommRequest(MPI_Request rawMpiRequest,const ArrayView<char>::size_type numBytes)355 mpiCommRequest (MPI_Request rawMpiRequest,
356                 const ArrayView<char>::size_type numBytes)
357 {
358   return rcp (new MpiCommRequest<OrdinalType> (rawMpiRequest, numBytes));
359 }
360 
361 /// \class MpiComm
362 /// \brief Implementation of Comm that uses MPI for communication.
363 /// \tparam Ordinal The index type for communication; same as the
364 ///   template parameter of Comm.
365 ///
366 /// This class uses MPI (the Message Passing Interface) to implement
367 /// the Comm interface.  It includes constructors that take an
368 /// MPI_Comm from the application.
369 ///
370 /// Assertions:
371 /// - <tt>getRawMpiComm().get() != NULL<tt>
372 /// - <tt>*getRawMpiComm() != MPI_COMM_NULL</tt>
373 /// - <tt>getSize() > 0</tt>
374 /// - <tt>0 <= getRank() && getRank() < getSize()</tt>
375 ///
376 template<typename Ordinal>
377 class MpiComm : public Comm<Ordinal> {
378 public:
379   //! @name Constructors
380   //@{
381 
382   /// \brief Construct an MpiComm with an MPI_Comm.
383   ///
384   /// This constructs an MpiComm that uses the given "raw" MPI
385   /// communicator underneath.  The MPI_Comm must be valid for the
386   /// lifetime of this MpiComm.  You are responsible for freeing the
387   /// MPI_Comm (using MPI_Comm_free) if necessary.
388   ///
389   /// This constructor should be used only in one of two cases:
390   /// 1. When the given MPI_Comm is one of the predefined
391   ///    communicators that need not and must not be freed after use,
392   ///    like MPI_COMM_WORLD or MPI_COMM_SELF.
393   /// 2. When you know that the given MPI_Comm will not be freed until
394   ///    after the lifetime of this MpiComm.
395   ///
396   /// If you cannot be sure of either of these two conditions, you
397   /// should use the version of the constructor that takes an
398   /// <tt>RCP<const OpaqueWrapper<MPI_Comm> ></tt>.
399   ///
400   /// Precondition:
401   /// - <tt>rawMpiComm != MPI_COMM_NULL</tt>
402   explicit MpiComm (MPI_Comm rawMpiComm);
403 
404   /// \brief Construct an MpiComm with the given wrapped MPI_Comm.
405   ///
406   /// This constructs an MpiComm that uses the given "raw" MPI
407   /// communicator underneath.  This version of the constructor
408   /// accepts the MPI_Comm wrapped in an OpaqueWrapper, which along
409   /// with the RCP has the option to free the MPI_Comm (via
410   /// MPI_Comm_free) after use if necessary.  You are responsible when
411   /// creating the OpaqueWrapper for supplying a "free" function if
412   /// needed.  We recommend using details::safeCommFree for the "free"
413   /// function, if one is needed.
414   ///
415   /// Preconditions:
416   /// - <tt>rawMpiComm.get() != NULL</tt>
417   /// - <tt>*rawMpiComm != MPI_COMM_NULL</tt>
418   MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm);
419 
420   /// \brief Construct an MpiComm with a wrapped MPI_Comm and a default tag.
421   ///
422   /// This constructor has the same meaning as the one-argument
423   /// constructor that takes RCP<const OpaqueWrapper<MPI_Comm> >,
424   /// except that it sets the default message tag on all processes to
425   /// \c defaultTag.  This avoids the MPI_Bcast that the other two
426   /// constructors do.
427   ///
428   /// This constructor is declared private for now, because it is an
429   /// implementation detail of duplicate().  We may choose to expose
430   /// it in the future.
431   ///
432   /// Preconditions:
433   ///   - <tt>rawMpiComm.get() != NULL</tt>
434   ///   - <tt>*rawMpiComm != MPI_COMM_NULL</tt>
435   ///   - \c defaultTag is the same on all processes in the given
436   ///     communicator
437   MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm,
438            const int defaultTag);
439 
440   /**
441    * \brief Construct a communicator with a new context with the same
442    *   properties as the original.
443    *
444    * The newly constructed communicator will have a duplicate communication
445    * space that has the same properties (e.g. processes, attributes,
446    * topologies) as the input communicator.
447    *
448    * \param other The communicator to copy from.
449    *
450    * <b>Preconditions:</b><ul>
451    * <li><tt>
452    * other.getRawMpiComm().get() != NULL && *other.getRawMpiComm() != NULL
453    * </tt></li>
454    * </ul>
455    */
456   MpiComm (const MpiComm<Ordinal>& other);
457 
458   /** \brief Return the embedded wrapped opaque <tt>MPI_Comm</tt> object. */
getRawMpiComm() const459   RCP<const OpaqueWrapper<MPI_Comm> > getRawMpiComm () const {
460     return rawMpiComm_;
461   }
462 
463   /// \brief Set the MPI error handler for this communicator.
464   ///
465   /// \param errHandler [in] The error handler to set.  If null, do
466   ///   nothing.
467   ///
468   /// MPI lets you set an error handler function specific to each
469   /// communicator.  (See Section 8.3 of the MPI 3.0 Standard.)
470   /// MpiComm wraps this functionality using this method.  You must
471   /// first either create an error handler using
472   /// MPI_Comm_create_errhandler() (or MPI_Errhandler_create() if you
473   /// are stuck with an MPI 1 implementation), or use one of the
474   /// default error handlers that the MPI standard or your MPI
475   /// implementation provides.  You will need to wrap the MPI error
476   /// handler in an OpaqueWrapper.  (See the documentation of
477   /// OpaqueWrapper for the rationale behind not using MPI's opaque
478   /// objects directly.)
479   ///
480   /// MpiComm will not attempt to call MPI_Errhandler_free() on the
481   /// error handler you provide.  You are responsible for arranging
482   /// that this be done.  Note that MPI_Comm_create_errhandler()
483   /// (which creates an error handler, given a function pointer) does
484   /// not attach the error handler to an MPI_Comm, so the lifetime of
485   /// the error handler is not tied to the MPI_Comm to which it is
486   /// assigned.  An error handler can be assigned to more than one
487   /// MPI_Comm, in fact.  You just need to guarantee that if you
488   /// create a custom error handler, then that handler gets freed at
489   /// some point.  "The call to <tt>MPI_FINALIZE</tt> does not free
490   /// objects created by MPI calls; these objects are freed using
491   /// <tt>MPI_xxx_FREE</tt> calls" (Section 8.7, MPI 3.0 Standard).
492   /// Note that it is legitimate to call MPI_Errhandler_free() right
493   /// after setting the MPI_Comm's error handler; see Section 8.3.4 of
494   /// the MPI 3.0 Standard ("The error handler [given to
495   /// MPI_Errhandler_free] will be deallocated after all the objects
496   /// associated with it (communicator, window, or file) have been
497   /// deallocated").  You might instead attach your error handler as a
498   /// attribute to <tt>MPI_COMM_SELF</tt>, in such a way that
499   /// MPI_Errhandler_free() will be called when <tt>MPI_COMM_SELF</tt>
500   /// is freed (which MPI_Finalize() does automatically).  We do not
501   /// take responsibility for doing any of these things; you are
502   /// responsible for freeing the error handler.
503   ///
504   /// Here is an example showing how to change an MpiComm's error
505   /// handler.  The default error handler for any <tt>MPI_Comm</tt> is
506   /// <tt>MPI_ERRORS_ARE_FATAL</tt>.  This handler immediately aborts
507   /// if MPI encounters an error, without returning an error code from
508   /// the MPI function.  Suppose that instead you would like MPI
509   /// functions to return an error code if MPI should encounter an
510   /// error.  (In that case, Teuchos' MPI wrappers will detect the
511   /// returned error code and throw an exception with an appropriate
512   /// error message.  If MPI aborts immediately on error, Teuchos
513   /// won't have the chance to detect and report the error.)  If so,
514   /// you may set the error handler to MPI_ERRORS_RETURN, one of MPI's
515   /// built-in error handlers.  Here is how you may do this for an
516   /// MpiComm:
517   /// \code
518   /// // Suppose that you've already created this MpiComm.
519   /// RCP<const MpiComm<int> > comm = ...;
520   ///
521   /// // Wrap the error handler.
522   /// RCP<const OpaqueWrapper<MPI_Errhandler> > errHandler =
523   ///   rcp (new OpaqueWrapper<MPI_Errhandler> (MPI_ERRORS_RETURN));
524   /// // Set the MpiComm's error handler.
525   /// comm->setErrorHandler (errHandler);
526   /// \endcode
527   void setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler);
528 
529   //@}
530   //! @name Implementation of Comm interface
531   //@{
532 
533   //! The calling process' rank.
534   virtual int getRank() const;
535 
536   //! The number of processes in the communicator.
537   virtual int getSize() const;
538 
539   //! Execute a barrier; must be called collectively.
540   virtual void barrier() const;
541 
542   /** \brief . */
543   virtual void broadcast(
544     const int rootRank, const Ordinal bytes, char buffer[]
545     ) const;
546 
547   //! Gather values from all processes to the root process.
548   virtual void
549   gather (const Ordinal sendBytes, const char sendBuffer[],
550           const Ordinal recvBytes, char recvBuffer[],
551           const int root) const;
552   /** \brief . */
553   virtual void gatherAll(
554     const Ordinal sendBytes, const char sendBuffer[]
555     ,const Ordinal recvBytes, char recvBuffer[]
556     ) const;
557   /** \brief . */
558   virtual void reduceAll(
559     const ValueTypeReductionOp<Ordinal,char> &reductOp
560     ,const Ordinal bytes, const char sendBuffer[], char globalReducts[]
561     ) const;
562   /** \brief . */
563   virtual void scan(
564     const ValueTypeReductionOp<Ordinal,char> &reductOp
565     ,const Ordinal bytes, const char sendBuffer[], char scanReducts[]
566     ) const;
567   /** \brief . */
568   virtual void send(
569     const Ordinal bytes, const char sendBuffer[], const int destRank
570     ) const;
571   /** \brief . */
572   virtual void
573   send (const Ordinal bytes,
574         const char sendBuffer[],
575         const int destRank,
576         const int tag) const;
577   /** \brief . */
578   virtual void ssend(
579     const Ordinal bytes, const char sendBuffer[], const int destRank
580     ) const;
581   //! Variant of ssend() that takes a message tag.
582   virtual void
583   ssend (const Ordinal bytes,
584          const char sendBuffer[],
585          const int destRank,
586          const int tag) const;
587   /** \brief . */
588   virtual int receive(
589     const int sourceRank, const Ordinal bytes, char recvBuffer[]
590     ) const;
591   /** \brief . */
592   virtual void readySend(
593     const ArrayView<const char> &sendBuffer,
594     const int destRank
595     ) const;
596   //! Variant of readySend() that accepts a message tag.
597   virtual void
598   readySend (const Ordinal bytes,
599              const char sendBuffer[],
600              const int destRank,
601              const int tag) const;
602   /** \brief . */
603   virtual RCP<CommRequest<Ordinal> > isend(
604     const ArrayView<const char> &sendBuffer,
605     const int destRank
606     ) const;
607   //! Variant of isend() that takes a tag.
608   virtual RCP<CommRequest<Ordinal> >
609   isend (const ArrayView<const char> &sendBuffer,
610          const int destRank,
611          const int tag) const;
612   /** \brief . */
613   virtual RCP<CommRequest<Ordinal> > ireceive(
614     const ArrayView<char> &Buffer,
615     const int sourceRank
616     ) const;
617   /** \brief . */
618   virtual RCP<CommRequest<Ordinal> >
619   ireceive (const ArrayView<char> &Buffer,
620             const int sourceRank,
621             const int tag) const;
622   /** \brief . */
623   virtual void waitAll(
624     const ArrayView<RCP<CommRequest<Ordinal> > > &requests
625     ) const;
626   /** \brief . */
627   virtual void
628   waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
629            const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const;
630   /** \brief . */
631   virtual RCP<CommStatus<Ordinal> >
632   wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const;
633   /** \brief . */
634   virtual RCP< Comm<Ordinal> > duplicate() const;
635   /** \brief . */
636   virtual RCP< Comm<Ordinal> > split(const int color, const int key) const;
637   /** \brief . */
638   virtual RCP< Comm<Ordinal> > createSubcommunicator(
639     const ArrayView<const int>& ranks) const;
640 
641   //@}
642   //! @name Implementation of Describable interface
643   //@{
644 
645   /** \brief . */
646   std::string description() const;
647 
648   //@}
649 
650   // These should be private but the PGI compiler requires them be public
651 
652   static int const minTag_ = 26000; // These came from Teuchos::MpiComm???
653   static int const maxTag_ = 26099; // ""
654 
655   /// \brief The current tag.
656   ///
657   /// \warning This method is ONLY for use by Teuchos developers.
658   ///   Users should not depend on the interface of this method.
659   ///   It may change or disappear at any time without warning.
getTag() const660   int getTag () const { return tag_; }
661 
662 private:
663 
664   /// \brief Set internal data members once the rawMpiComm_ data member is valid.
665   ///
666   /// This method should only be called from MpiComm's constructor.
667   void setupMembersFromComm();
668   static int tagCounter_;
669 
670   /// \brief The "raw" MPI_Comm (communicator).
671   ///
672   /// We wrap the MPI_Comm so that it is freed automatically when its
673   /// reference count goes to zero, if it does need to be freed after
674   /// use by calling MPI_Comm_free.  (Predefined MPI_Comm, which
675   /// include but are not limited to MPI_COMM_WORLD and MPI_COMM_SELF,
676   /// need not and must not be freed after use.)
677   RCP<const OpaqueWrapper<MPI_Comm> > rawMpiComm_;
678 
679   //! The rank of the calling process.
680   int rank_;
681 
682   //! The number of processes in the communicator.
683   int size_;
684 
685   /// \brief The current tag, to use for all MPI functions that need it.
686   ///
687   /// Each MpiComm instance always uses the same tag.  Different
688   /// MpiComm instances use different tags.  The tag is set in
689   /// MpiComm's constructor.  Please refer to
690   /// <a href="https://software.sandia.gov/bugzilla/show_bug.cgi?id=5740">Bug 5740</a>
691   /// for further discussion.
692   int tag_;
693 
694   //! MPI error handler.  If null, MPI uses the default error handler.
695   RCP<const OpaqueWrapper<MPI_Errhandler> > customErrorHandler_;
696 
697   void assertRank(const int rank, const std::string &rankName) const;
698 
699   // Not defined and not to be called!
700   MpiComm();
701 
702 #ifdef TEUCHOS_MPI_COMM_DUMP
703 public:
704   static bool show_dump;
705 #endif // TEUCHOS_MPI_COMM_DUMP
706 
707 };
708 
709 
710 /** \brief Helper function that creates a dynamically allocated
711  * <tt>MpiComm</tt> object or returns <tt>Teuchos::null</tt> to correctly
712  * represent a null communicator.
713  *
714  * <b>Postconditions:</b></ul>
715  * <li>[<tt>rawMpiComm.get()!=NULL && *rawMpiComm!=MPI_COMM_NULL</tt>]
716  *     <tt>return.get()!=NULL</tt>
717  * <li>[<tt>rawMpiComm.get()==NULL || *rawMpiComm==MPI_COMM_NULL</tt>]
718  *     <tt>return.get()==NULL</tt>
719  * </ul>
720  *
721  * \relates MpiComm
722  */
723 template<typename Ordinal>
724 RCP<MpiComm<Ordinal> >
725 createMpiComm(
726   const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm
727   );
728 
729 
730 /** \brief Helper function that extracts a raw <tt>MPI_Comm</tt> object out of
731  * a <tt>Teuchos::MpiComm</tt> wrapper object.
732  *
733  * <b>Preconditions:</b></ul>
734  * <li><tt>dynamic_cast<const MpiComm<Ordinal>*>(&comm) != 0</tt>
735  * </ul>
736  *
737  * If the underlying type is not an <tt>MpiComm<Ordinal></tt> object, then the
738  * function with throw an exception which contains the type information as for
739  * why it failed.
740  *
741  * <b>WARNING:</b> The lifetime of the returned <tt>MPI_Comm</tt> object is
742  * controlled by the owning <tt>RCP<OpaqueWrapper<MPI_Comm> ></tt> object and
743  * is not guaranteed to live the entire life of the program.  Therefore, only
744  * use this function to grab and use the underlying <tt>MPI_Comm</tt> object
745  * in a vary narrow scope and then forget it.  If you need it again, get it
746  * off of the <tt>comm</tt> object each time.
747  *
748  * If you want a memory safe <tt><tt>RCP<OpaqueWrapper<MPI_Comm> ></tt> to the
749  * raw <tt>MPI_Comm</tt> object, then call:
750  *
751  * \verbatim
752  * dyn_cast<const MpiComm<Ordinal> >(comm).getRawMpiComm()
753  * \endverbatim
754  *
755  * \relates MpiComm
756  */
757 template<typename Ordinal>
758 MPI_Comm
759 getRawMpiComm(const Comm<Ordinal> &comm);
760 
761 
762 // ////////////////////////
763 // Implementations
764 
765 
766 // Static members
767 
768 
769 template<typename Ordinal>
770 int MpiComm<Ordinal>::tagCounter_ = MpiComm<Ordinal>::minTag_;
771 
772 
773 // Constructors
774 
775 
776 template<typename Ordinal>
777 MpiComm<Ordinal>::
MpiComm(const RCP<const OpaqueWrapper<MPI_Comm>> & rawMpiComm)778 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm)
779 {
780   TEUCHOS_TEST_FOR_EXCEPTION(
781     rawMpiComm.get () == NULL, std::invalid_argument,
782     "Teuchos::MpiComm constructor: The input RCP is null.");
783   TEUCHOS_TEST_FOR_EXCEPTION(
784     *rawMpiComm == MPI_COMM_NULL, std::invalid_argument,
785     "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL.");
786 
787   rawMpiComm_ = rawMpiComm;
788 
789   // mfh 09 Jul 2013: Please resist the temptation to modify the given
790   // MPI communicator's error handler here.  See Bug 5943.  Note that
791   // an MPI communicator's default error handler is
792   // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without
793   // returning an error code from the MPI function).  Users who want
794   // MPI functions instead to return an error code if they encounter
795   // an error, should set the error handler to MPI_ERRORS_RETURN.  DO
796   // NOT SET THE ERROR HANDLER HERE!!!  Teuchos' MPI wrappers should
797   // always check the error code returned by an MPI function,
798   // regardless of the error handler.  Users who want to set the error
799   // handler on an MpiComm may call its setErrorHandler method.
800 
801   setupMembersFromComm ();
802 }
803 
804 
805 template<typename Ordinal>
806 MpiComm<Ordinal>::
MpiComm(const RCP<const OpaqueWrapper<MPI_Comm>> & rawMpiComm,const int defaultTag)807 MpiComm (const RCP<const OpaqueWrapper<MPI_Comm> >& rawMpiComm,
808          const int defaultTag)
809 {
810   TEUCHOS_TEST_FOR_EXCEPTION(
811     rawMpiComm.get () == NULL, std::invalid_argument,
812     "Teuchos::MpiComm constructor: The input RCP is null.");
813   TEUCHOS_TEST_FOR_EXCEPTION(
814     *rawMpiComm == MPI_COMM_NULL, std::invalid_argument,
815     "Teuchos::MpiComm constructor: The given MPI_Comm is MPI_COMM_NULL.");
816 
817   rawMpiComm_ = rawMpiComm;
818   // Set size_ (the number of processes in the communicator).
819   int err = MPI_Comm_size (*rawMpiComm_, &size_);
820   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
821     "Teuchos::MpiComm constructor: MPI_Comm_size failed with "
822     "error \"" << mpiErrorCodeToString (err) << "\".");
823   // Set rank_ (the calling process' rank).
824   err = MPI_Comm_rank (*rawMpiComm_, &rank_);
825   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
826     "Teuchos::MpiComm constructor: MPI_Comm_rank failed with "
827     "error \"" << mpiErrorCodeToString (err) << "\".");
828   tag_ = defaultTag; // set the default message tag
829 }
830 
831 
832 template<typename Ordinal>
MpiComm(MPI_Comm rawMpiComm)833 MpiComm<Ordinal>::MpiComm (MPI_Comm rawMpiComm)
834 {
835   TEUCHOS_TEST_FOR_EXCEPTION(rawMpiComm == MPI_COMM_NULL,
836     std::invalid_argument, "Teuchos::MpiComm constructor: The given MPI_Comm "
837     "is MPI_COMM_NULL.");
838   // We don't supply a "free" function here, since this version of the
839   // constructor makes the caller responsible for freeing rawMpiComm
840   // after use if necessary.
841   rawMpiComm_ = opaqueWrapper<MPI_Comm> (rawMpiComm);
842 
843   // mfh 09 Jul 2013: Please resist the temptation to modify the given
844   // MPI communicator's error handler here.  See Bug 5943.  Note that
845   // an MPI communicator's default error handler is
846   // MPI_ERRORS_ARE_FATAL, which immediately aborts on error (without
847   // returning an error code from the MPI function).  Users who want
848   // MPI functions instead to return an error code if they encounter
849   // an error, should set the error handler to MPI_ERRORS_RETURN.  DO
850   // NOT SET THE ERROR HANDLER HERE!!!  Teuchos' MPI wrappers should
851   // always check the error code returned by an MPI function,
852   // regardless of the error handler.  Users who want to set the error
853   // handler on an MpiComm may call its setErrorHandler method.
854 
855   setupMembersFromComm ();
856 }
857 
858 
859 template<typename Ordinal>
MpiComm(const MpiComm<Ordinal> & other)860 MpiComm<Ordinal>::MpiComm (const MpiComm<Ordinal>& other) :
861   rawMpiComm_ (opaqueWrapper<MPI_Comm> (MPI_COMM_NULL)) // <- This will be set below
862 {
863   // These are logic errors, since they violate MpiComm's invariants.
864   RCP<const OpaqueWrapper<MPI_Comm> > origCommPtr = other.getRawMpiComm ();
865   TEUCHOS_TEST_FOR_EXCEPTION(origCommPtr == null, std::logic_error,
866     "Teuchos::MpiComm copy constructor: "
867     "The input's getRawMpiComm() method returns null.");
868   MPI_Comm origComm = *origCommPtr;
869   TEUCHOS_TEST_FOR_EXCEPTION(origComm == MPI_COMM_NULL, std::logic_error,
870     "Teuchos::MpiComm copy constructor: "
871     "The input's raw MPI_Comm is MPI_COMM_NULL.");
872 
873   // mfh 19 Oct 2012: Don't change the behavior of MpiComm's copy
874   // constructor for now.  Later, we'll switch to the version that
875   // calls MPI_Comm_dup.  For now, we just copy other's handle over.
876   // Note that the new MpiComm's tag is still different than the input
877   // MpiComm's tag.  See Bug 5740.
878   if (true) {
879     rawMpiComm_ = origCommPtr;
880   }
881   else { // false (not run)
882     MPI_Comm newComm;
883     const int err = MPI_Comm_dup (origComm, &newComm);
884     TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
885       "Teuchos::MpiComm copy constructor: MPI_Comm_dup failed with "
886       "the following error: " << mpiErrorCodeToString (err));
887     // No side effects until after everything has succeeded.
888     rawMpiComm_ = opaqueWrapper (newComm, details::safeCommFree);
889   }
890 
891   setupMembersFromComm ();
892 }
893 
894 
895 template<typename Ordinal>
setupMembersFromComm()896 void MpiComm<Ordinal>::setupMembersFromComm ()
897 {
898   int err = MPI_Comm_size (*rawMpiComm_, &size_);
899   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
900     "Teuchos::MpiComm constructor: MPI_Comm_size failed with "
901     "error \"" << mpiErrorCodeToString (err) << "\".");
902   err = MPI_Comm_rank (*rawMpiComm_, &rank_);
903   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
904     "Teuchos::MpiComm constructor: MPI_Comm_rank failed with "
905     "error \"" << mpiErrorCodeToString (err) << "\".");
906 
907   // Set the default tag to make unique across all communicators
908   if (tagCounter_ > maxTag_) {
909     tagCounter_ = minTag_;
910   }
911   tag_ = tagCounter_++;
912   // Ensure that the same tag is used on all processes.
913   //
914   // FIXME (mfh 09 Jul 2013) This would not be necessary if MpiComm
915   // were just to call MPI_Comm_dup (as every library should) when
916   // given its communicator.  Of course, MPI_Comm_dup may also be
917   // implemented as a collective, and may even be more expensive than
918   // a broadcast.  If we do decide to use MPI_Comm_dup, we can get rid
919   // of the broadcast below, and also get rid of tag_, tagCounter_,
920   // minTag_, and maxTag_.
921   MPI_Bcast (&tag_, 1, MPI_INT, 0, *rawMpiComm_);
922 }
923 
924 
925 template<typename Ordinal>
926 void
927 MpiComm<Ordinal>::
setErrorHandler(const RCP<const OpaqueWrapper<MPI_Errhandler>> & errHandler)928 setErrorHandler (const RCP<const OpaqueWrapper<MPI_Errhandler> >& errHandler)
929 {
930   if (! is_null (errHandler)) {
931     const int err = details::setCommErrhandler (*getRawMpiComm (), *errHandler);
932     TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
933       "Teuchos::MpiComm: Setting the MPI_Comm's error handler failed with "
934       "error \"" << mpiErrorCodeToString (err) << "\".");
935   }
936   // Wait to set this until the end, in case setting the error handler
937   // doesn't succeed.
938   customErrorHandler_ = errHandler;
939 }
940 
941 //
942 // Overridden from Comm
943 //
944 
945 template<typename Ordinal>
getRank() const946 int MpiComm<Ordinal>::getRank() const
947 {
948   return rank_;
949 }
950 
951 
952 template<typename Ordinal>
getSize() const953 int MpiComm<Ordinal>::getSize() const
954 {
955   return size_;
956 }
957 
958 
959 template<typename Ordinal>
barrier() const960 void MpiComm<Ordinal>::barrier() const
961 {
962   TEUCHOS_COMM_TIME_MONITOR(
963     "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::barrier()"
964     );
965   const int err = MPI_Barrier (*rawMpiComm_);
966   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
967     "Teuchos::MpiComm::barrier: MPI_Barrier failed with error \""
968     << mpiErrorCodeToString (err) << "\".");
969 }
970 
971 
972 template<typename Ordinal>
broadcast(const int rootRank,const Ordinal bytes,char buffer[]) const973 void MpiComm<Ordinal>::broadcast(
974   const int rootRank, const Ordinal bytes, char buffer[]
975   ) const
976 {
977   TEUCHOS_COMM_TIME_MONITOR(
978     "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::broadcast(...)"
979     );
980   const int err = MPI_Bcast (buffer, bytes, MPI_CHAR, rootRank, *rawMpiComm_);
981   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
982     "Teuchos::MpiComm::broadcast: MPI_Bcast failed with error \""
983     << mpiErrorCodeToString (err) << "\".");
984 }
985 
986 
987 template<typename Ordinal>
gatherAll(const Ordinal sendBytes,const char sendBuffer[],const Ordinal recvBytes,char recvBuffer[]) const988 void MpiComm<Ordinal>::gatherAll(
989   const Ordinal sendBytes, const char sendBuffer[],
990   const Ordinal recvBytes, char recvBuffer[]
991   ) const
992 {
993   TEUCHOS_COMM_TIME_MONITOR(
994     "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gatherAll(...)"
995     );
996   TEUCHOS_ASSERT_EQUALITY((sendBytes*size_), recvBytes );
997   const int err =
998     MPI_Allgather (const_cast<char *>(sendBuffer), sendBytes, MPI_CHAR,
999                    recvBuffer, sendBytes, MPI_CHAR, *rawMpiComm_);
1000   // NOTE: 'sendBytes' is being sent above for the MPI arg recvcount (which is
1001   // very confusing in the MPI documentation) for MPI_Allgether(...).
1002 
1003   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1004     "Teuchos::MpiComm::gatherAll: MPI_Allgather failed with error \""
1005     << mpiErrorCodeToString (err) << "\".");
1006 }
1007 
1008 
1009 template<typename Ordinal>
1010 void
gather(const Ordinal sendBytes,const char sendBuffer[],const Ordinal recvBytes,char recvBuffer[],const int root) const1011 MpiComm<Ordinal>::gather (const Ordinal sendBytes,
1012                           const char sendBuffer[],
1013                           const Ordinal recvBytes,
1014                           char recvBuffer[],
1015                           const int root) const
1016 {
1017   (void) recvBytes; // silence compile warning for "unused parameter"
1018 
1019   TEUCHOS_COMM_TIME_MONITOR(
1020     "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::gather(...)"
1021     );
1022   const int err =
1023     MPI_Gather (const_cast<char *> (sendBuffer), sendBytes, MPI_CHAR,
1024                 recvBuffer, sendBytes, MPI_CHAR, root, *rawMpiComm_);
1025   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1026     "Teuchos::MpiComm::gather: MPI_Gather failed with error \""
1027     << mpiErrorCodeToString (err) << "\".");
1028 }
1029 
1030 
1031 template<typename Ordinal>
1032 void
1033 MpiComm<Ordinal>::
reduceAll(const ValueTypeReductionOp<Ordinal,char> & reductOp,const Ordinal bytes,const char sendBuffer[],char globalReducts[]) const1034 reduceAll (const ValueTypeReductionOp<Ordinal,char> &reductOp,
1035            const Ordinal bytes,
1036            const char sendBuffer[],
1037            char globalReducts[]) const
1038 {
1039   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::reduceAll(...)" );
1040   int err = MPI_SUCCESS;
1041 
1042   Details::MpiReductionOp<Ordinal> opWrap (reductOp);
1043   MPI_Op op = Details::setMpiReductionOp (opWrap);
1044 
1045   // FIXME (mfh 23 Nov 2014) Ross decided to mash every type into
1046   // char.  This can cause correctness issues if we're actually doing
1047   // a reduction over, say, double.  Thus, he creates a custom
1048   // MPI_Datatype here that represents a contiguous block of char, so
1049   // that MPI doesn't split up the reduction type and thus do the sum
1050   // wrong.  It's a hack but it works.
1051 
1052   MPI_Datatype char_block;
1053   err = MPI_Type_contiguous (bytes, MPI_CHAR, &char_block);
1054   TEUCHOS_TEST_FOR_EXCEPTION(
1055     err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1056     "MPI_Type_contiguous failed with error \"" << mpiErrorCodeToString (err)
1057     << "\".");
1058   err = MPI_Type_commit (&char_block);
1059   TEUCHOS_TEST_FOR_EXCEPTION(
1060     err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1061     "MPI_Type_commit failed with error \"" << mpiErrorCodeToString (err)
1062     << "\".");
1063 
1064   if (sendBuffer == globalReducts) {
1065     // NOTE (mfh 31 May 2017) This is only safe if the communicator is
1066     // NOT an intercomm.  The usual case is that communicators are
1067     // intracomms.
1068     err = MPI_Allreduce (MPI_IN_PLACE, globalReducts, 1,
1069                          char_block, op, *rawMpiComm_);
1070   }
1071   else {
1072     err = MPI_Allreduce (const_cast<char*> (sendBuffer), globalReducts, 1,
1073                          char_block, op, *rawMpiComm_);
1074   }
1075   if (err != MPI_SUCCESS) {
1076     // Don't throw until we release the type resources we allocated
1077     // above.  If freeing fails for some reason, let the memory leak
1078     // go; we already have more serious problems if MPI_Allreduce
1079     // doesn't work.
1080     (void) MPI_Type_free (&char_block);
1081     TEUCHOS_TEST_FOR_EXCEPTION(
1082       true, std::runtime_error, "Teuchos::reduceAll (MPI, custom op): "
1083       "MPI_Allreduce failed with error \"" << mpiErrorCodeToString (err)
1084       << "\".");
1085   }
1086   err = MPI_Type_free (&char_block);
1087   TEUCHOS_TEST_FOR_EXCEPTION(
1088     err != MPI_SUCCESS, std::runtime_error, "Teuchos::reduceAll: "
1089     "MPI_Type_free failed with error \"" << mpiErrorCodeToString (err)
1090     << "\".");
1091 }
1092 
1093 
1094 template<typename Ordinal>
scan(const ValueTypeReductionOp<Ordinal,char> & reductOp,const Ordinal bytes,const char sendBuffer[],char scanReducts[]) const1095 void MpiComm<Ordinal>::scan(
1096   const ValueTypeReductionOp<Ordinal,char> &reductOp
1097   ,const Ordinal bytes, const char sendBuffer[], char scanReducts[]
1098   ) const
1099 {
1100   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::scan(...)" );
1101 
1102   Details::MpiReductionOp<Ordinal> opWrap (reductOp);
1103   MPI_Op op = Details::setMpiReductionOp (opWrap);
1104   const int err =
1105     MPI_Scan (const_cast<char*> (sendBuffer), scanReducts, bytes, MPI_CHAR,
1106               op, *rawMpiComm_);
1107   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1108     "Teuchos::MpiComm::scan: MPI_Scan() failed with error \""
1109     << mpiErrorCodeToString (err) << "\".");
1110 }
1111 
1112 
1113 template<typename Ordinal>
1114 void
send(const Ordinal bytes,const char sendBuffer[],const int destRank) const1115 MpiComm<Ordinal>::send (const Ordinal bytes,
1116                         const char sendBuffer[],
1117                         const int destRank) const
1118 {
1119   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" );
1120 
1121 #ifdef TEUCHOS_MPI_COMM_DUMP
1122   if(show_dump) {
1123     dumpBuffer<Ordinal,char>(
1124       "Teuchos::MpiComm<Ordinal>::send(...)"
1125       ,"sendBuffer", bytes, sendBuffer
1126       );
1127   }
1128 #endif // TEUCHOS_MPI_COMM_DUMP
1129 
1130   const int err = MPI_Send (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1131                             destRank, tag_, *rawMpiComm_);
1132   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1133     "Teuchos::MpiComm::send: MPI_Send() failed with error \""
1134     << mpiErrorCodeToString (err) << "\".");
1135 }
1136 
1137 
1138 template<typename Ordinal>
1139 void
send(const Ordinal bytes,const char sendBuffer[],const int destRank,const int tag) const1140 MpiComm<Ordinal>::send (const Ordinal bytes,
1141                         const char sendBuffer[],
1142                         const int destRank,
1143                         const int tag) const
1144 {
1145   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::send(...)" );
1146   const int err = MPI_Send (const_cast<char*> (sendBuffer), bytes, MPI_CHAR,
1147                             destRank, tag, *rawMpiComm_);
1148   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1149     "Teuchos::MpiComm::send: MPI_Send() failed with error \""
1150     << mpiErrorCodeToString (err) << "\".");
1151 }
1152 
1153 
1154 template<typename Ordinal>
1155 void
ssend(const Ordinal bytes,const char sendBuffer[],const int destRank) const1156 MpiComm<Ordinal>::ssend (const Ordinal bytes,
1157                          const char sendBuffer[],
1158                          const int destRank) const
1159 {
1160   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" );
1161 
1162 #ifdef TEUCHOS_MPI_COMM_DUMP
1163   if(show_dump) {
1164     dumpBuffer<Ordinal,char>(
1165       "Teuchos::MpiComm<Ordinal>::send(...)"
1166       ,"sendBuffer", bytes, sendBuffer
1167       );
1168   }
1169 #endif // TEUCHOS_MPI_COMM_DUMP
1170 
1171   const int err = MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1172                              destRank, tag_, *rawMpiComm_);
1173   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1174     "Teuchos::MpiComm::send: MPI_Ssend() failed with error \""
1175     << mpiErrorCodeToString (err) << "\".");
1176 }
1177 
1178 template<typename Ordinal>
1179 void
ssend(const Ordinal bytes,const char sendBuffer[],const int destRank,const int tag) const1180 MpiComm<Ordinal>::ssend (const Ordinal bytes,
1181                          const char sendBuffer[],
1182                          const int destRank,
1183                          const int tag) const
1184 {
1185   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ssend(...)" );
1186   const int err =
1187     MPI_Ssend (const_cast<char*>(sendBuffer), bytes, MPI_CHAR,
1188                destRank, tag, *rawMpiComm_);
1189   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1190     "Teuchos::MpiComm::send: MPI_Ssend() failed with error \""
1191     << mpiErrorCodeToString (err) << "\".");
1192 }
1193 
1194 template<typename Ordinal>
readySend(const ArrayView<const char> & sendBuffer,const int destRank) const1195 void MpiComm<Ordinal>::readySend(
1196   const ArrayView<const char> &sendBuffer,
1197   const int destRank
1198   ) const
1199 {
1200   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" );
1201 
1202 #ifdef TEUCHOS_MPI_COMM_DUMP
1203   if(show_dump) {
1204     dumpBuffer<Ordinal,char>(
1205       "Teuchos::MpiComm<Ordinal>::readySend(...)"
1206       ,"sendBuffer", bytes, sendBuffer
1207       );
1208   }
1209 #endif // TEUCHOS_MPI_COMM_DUMP
1210 
1211   const int err =
1212     MPI_Rsend (const_cast<char*>(sendBuffer.getRawPtr()), static_cast<int>(sendBuffer.size()),
1213                MPI_CHAR, destRank, tag_, *rawMpiComm_);
1214   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1215     "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \""
1216     << mpiErrorCodeToString (err) << "\".");
1217 }
1218 
1219 
1220 template<typename Ordinal>
1221 void MpiComm<Ordinal>::
readySend(const Ordinal bytes,const char sendBuffer[],const int destRank,const int tag) const1222 readySend (const Ordinal bytes,
1223            const char sendBuffer[],
1224            const int destRank,
1225            const int tag) const
1226 {
1227   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::readySend" );
1228   const int err =
1229     MPI_Rsend (const_cast<char*> (sendBuffer), bytes,
1230                MPI_CHAR, destRank, tag, *rawMpiComm_);
1231   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1232     "Teuchos::MpiComm::readySend: MPI_Rsend() failed with error \""
1233     << mpiErrorCodeToString (err) << "\".");
1234 }
1235 
1236 
1237 template<typename Ordinal>
1238 int
receive(const int sourceRank,const Ordinal bytes,char recvBuffer[]) const1239 MpiComm<Ordinal>::receive (const int sourceRank,
1240                            const Ordinal bytes,
1241                            char recvBuffer[]) const
1242 {
1243   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::receive(...)" );
1244 
1245   // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1246   // will take an incoming message from any process, as long as the
1247   // tag matches.
1248   const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1249 
1250   MPI_Status status;
1251   const int err = MPI_Recv (recvBuffer, bytes, MPI_CHAR, theSrcRank, tag_,
1252                             *rawMpiComm_, &status);
1253   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1254     "Teuchos::MpiComm::receive: MPI_Recv() failed with error \""
1255     << mpiErrorCodeToString (err) << "\".");
1256 
1257 #ifdef TEUCHOS_MPI_COMM_DUMP
1258   if (show_dump) {
1259     dumpBuffer<Ordinal,char> ("Teuchos::MpiComm<Ordinal>::receive(...)",
1260                               "recvBuffer", bytes, recvBuffer);
1261   }
1262 #endif // TEUCHOS_MPI_COMM_DUMP
1263 
1264   // Returning the source rank is useful in the MPI_ANY_SOURCE case.
1265   return status.MPI_SOURCE;
1266 }
1267 
1268 
1269 template<typename Ordinal>
1270 RCP<CommRequest<Ordinal> >
isend(const ArrayView<const char> & sendBuffer,const int destRank) const1271 MpiComm<Ordinal>::isend (const ArrayView<const char> &sendBuffer,
1272                          const int destRank) const
1273 {
1274   using Teuchos::as;
1275   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" );
1276 
1277   MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1278   const int err =
1279     MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()),
1280                as<Ordinal> (sendBuffer.size ()), MPI_CHAR,
1281                destRank, tag_, *rawMpiComm_, &rawMpiRequest);
1282   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1283     "Teuchos::MpiComm::isend: MPI_Isend() failed with error \""
1284     << mpiErrorCodeToString (err) << "\".");
1285 
1286   return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ());
1287 }
1288 
1289 
1290 template<typename Ordinal>
1291 RCP<CommRequest<Ordinal> >
1292 MpiComm<Ordinal>::
isend(const ArrayView<const char> & sendBuffer,const int destRank,const int tag) const1293 isend (const ArrayView<const char> &sendBuffer,
1294        const int destRank,
1295        const int tag) const
1296 {
1297   using Teuchos::as;
1298   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::isend(...)" );
1299 
1300   MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1301   const int err =
1302     MPI_Isend (const_cast<char*> (sendBuffer.getRawPtr ()),
1303                as<Ordinal> (sendBuffer.size ()), MPI_CHAR,
1304                destRank, tag, *rawMpiComm_, &rawMpiRequest);
1305   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1306     "Teuchos::MpiComm::isend: MPI_Isend() failed with error \""
1307     << mpiErrorCodeToString (err) << "\".");
1308 
1309   return mpiCommRequest<Ordinal> (rawMpiRequest, sendBuffer.size ());
1310 }
1311 
1312 
1313 template<typename Ordinal>
1314 RCP<CommRequest<Ordinal> >
ireceive(const ArrayView<char> & recvBuffer,const int sourceRank) const1315 MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer,
1316                             const int sourceRank) const
1317 {
1318   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" );
1319 
1320   // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1321   // will take an incoming message from any process, as long as the
1322   // tag matches.
1323   const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1324 
1325   MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1326   const int err =
1327     MPI_Irecv (const_cast<char*>(recvBuffer.getRawPtr()), recvBuffer.size(),
1328                MPI_CHAR, theSrcRank, tag_, *rawMpiComm_, &rawMpiRequest);
1329   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1330     "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \""
1331     << mpiErrorCodeToString (err) << "\".");
1332 
1333   return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size());
1334 }
1335 
1336 template<typename Ordinal>
1337 RCP<CommRequest<Ordinal> >
ireceive(const ArrayView<char> & recvBuffer,const int sourceRank,const int tag) const1338 MpiComm<Ordinal>::ireceive (const ArrayView<char> &recvBuffer,
1339                             const int sourceRank,
1340                             const int tag) const
1341 {
1342   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::ireceive(...)" );
1343 
1344   // A negative source rank indicates MPI_ANY_SOURCE, namely that we
1345   // will take an incoming message from any process, as long as the
1346   // tag matches.
1347   const int theSrcRank = (sourceRank < 0) ? MPI_ANY_SOURCE : sourceRank;
1348 
1349   MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
1350   const int err =
1351     MPI_Irecv (const_cast<char*> (recvBuffer.getRawPtr ()), recvBuffer.size (),
1352                MPI_CHAR, theSrcRank, tag, *rawMpiComm_, &rawMpiRequest);
1353   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
1354     "Teuchos::MpiComm::ireceive: MPI_Irecv() failed with error \""
1355     << mpiErrorCodeToString (err) << "\".");
1356 
1357   return mpiCommRequest<Ordinal> (rawMpiRequest, recvBuffer.size ());
1358 }
1359 
1360 namespace {
1361   // Called by the two-argument MpiComm::waitAll() variant.
1362   template<typename Ordinal>
1363   void
waitAllImpl(const ArrayView<RCP<CommRequest<Ordinal>>> & requests,const ArrayView<MPI_Status> & rawMpiStatuses)1364   waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
1365                const ArrayView<MPI_Status>& rawMpiStatuses)
1366   {
1367     typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1368     const size_type count = requests.size();
1369     // waitAllImpl() is not meant to be called by users, so it's a bug
1370     // for the two views to have different lengths.
1371     TEUCHOS_TEST_FOR_EXCEPTION(rawMpiStatuses.size() != count,
1372       std::logic_error, "Teuchos::MpiComm's waitAllImpl: rawMpiStatus.size() = "
1373       << rawMpiStatuses.size() << " != requests.size() = " << requests.size()
1374       << ".  Please report this bug to the Tpetra developers.");
1375     if (count == 0) {
1376       return; // No requests on which to wait
1377     }
1378 
1379     // MpiComm wraps MPI and can't expose any MPI structs or opaque
1380     // objects.  Thus, we have to unpack requests into a separate array.
1381     // If that's too slow, then your code should just call into MPI
1382     // directly.
1383     //
1384     // Pull out the raw MPI requests from the wrapped requests.
1385     // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL, but
1386     // we keep track just to inform the user.
1387     bool someNullRequests = false;
1388     Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL);
1389     for (int i = 0; i < count; ++i) {
1390       RCP<CommRequest<Ordinal> > request = requests[i];
1391       if (! is_null (request)) {
1392         RCP<MpiCommRequestBase<Ordinal> > mpiRequest =
1393           rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request);
1394         // releaseRawMpiRequest() sets the MpiCommRequest's raw
1395         // MPI_Request to MPI_REQUEST_NULL.  This makes waitAll() not
1396         // satisfy the strong exception guarantee.  That's OK because
1397         // MPI_Waitall() doesn't promise that it satisfies the strong
1398         // exception guarantee, and we would rather conservatively
1399         // invalidate the handles than leave dangling requests around
1400         // and risk users trying to wait on the same request twice.
1401         rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest();
1402       }
1403       else { // Null requests map to MPI_REQUEST_NULL
1404         rawMpiRequests[i] = MPI_REQUEST_NULL;
1405         someNullRequests = true;
1406       }
1407     }
1408 
1409     // This is the part where we've finally peeled off the wrapper and
1410     // we can now interact with MPI directly.
1411     //
1412     // One option in the one-argument version of waitAll() is to ignore
1413     // the statuses completely.  MPI lets you pass in the named constant
1414     // MPI_STATUSES_IGNORE for the MPI_Status array output argument in
1415     // MPI_Waitall(), which would tell MPI not to bother with the
1416     // statuses.  However, we want the statuses because we can use them
1417     // for detailed error diagnostics in case something goes wrong.
1418     const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(),
1419                                  rawMpiStatuses.getRawPtr());
1420 
1421     // In MPI_Waitall(), an error indicates that one or more requests
1422     // failed.  In that case, there could be requests that completed
1423     // (their MPI_Status' error field is MPI_SUCCESS), and other
1424     // requests that have not completed yet but have not necessarily
1425     // failed (MPI_PENDING).  We make no attempt here to wait on the
1426     // pending requests.  It doesn't make sense for us to do so, because
1427     // in general Teuchos::Comm doesn't attempt to provide robust
1428     // recovery from failed messages.
1429     if (err != MPI_SUCCESS) {
1430       if (err == MPI_ERR_IN_STATUS) {
1431         //
1432         // When MPI_Waitall returns MPI_ERR_IN_STATUS (a standard error
1433         // class), it's telling us to check the error codes in the
1434         // returned statuses.  In that case, we do so and generate a
1435         // detailed exception message.
1436         //
1437         // Figure out which of the requests failed.
1438         Array<std::pair<size_type, int> > errorLocationsAndCodes;
1439         for (size_type k = 0; k < rawMpiStatuses.size(); ++k) {
1440           const int curErr = rawMpiStatuses[k].MPI_ERROR;
1441           if (curErr != MPI_SUCCESS) {
1442             errorLocationsAndCodes.push_back (std::make_pair (k, curErr));
1443           }
1444         }
1445         const size_type numErrs = errorLocationsAndCodes.size();
1446         if (numErrs > 0) {
1447           // There was at least one error.  Assemble a detailed
1448           // exception message reporting which requests failed,
1449           // their error codes, and their source
1450           std::ostringstream os;
1451           os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1452              << mpiErrorCodeToString (err) << "\".  Of the " << count
1453              << " total request" << (count != 1 ? "s" : "") << ", " << numErrs
1454              << " failed.  Here are the indices of the failed requests, and the "
1455             "error codes extracted from their returned MPI_Status objects:"
1456              << std::endl;
1457           for (size_type k = 0; k < numErrs; ++k) {
1458             const size_type errInd = errorLocationsAndCodes[k].first;
1459             os << "Request " << errInd << ": MPI_ERROR = "
1460                << mpiErrorCodeToString (rawMpiStatuses[errInd].MPI_ERROR)
1461                << std::endl;
1462           }
1463           if (someNullRequests) {
1464             os << "  On input to MPI_Waitall, there was at least one MPI_"
1465               "Request that was MPI_REQUEST_NULL.  MPI_Waitall should not "
1466               "normally fail in that case, but we thought we should let you know "
1467               "regardless.";
1468           }
1469           TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1470         }
1471         // If there were no actual errors in the returned statuses,
1472         // well, then I guess everything is OK.  Just keep going.
1473       }
1474       else {
1475         std::ostringstream os;
1476         os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1477            << mpiErrorCodeToString (err) << "\".";
1478         if (someNullRequests) {
1479           os << "  On input to MPI_Waitall, there was at least one MPI_Request "
1480             "that was MPI_REQUEST_NULL.  MPI_Waitall should not normally fail in "
1481             "that case, but we thought we should let you know regardless.";
1482         }
1483         TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1484       }
1485     }
1486 
1487     // Invalidate the input array of requests by setting all entries
1488     // to null.
1489     std::fill (requests.begin(), requests.end(), null);
1490   }
1491 
1492 
1493 
1494   // Called by the one-argument MpiComm::waitAll() variant.
1495   template<typename Ordinal>
1496   void
waitAllImpl(const ArrayView<RCP<CommRequest<Ordinal>>> & requests)1497   waitAllImpl (const ArrayView<RCP<CommRequest<Ordinal> > >& requests)
1498   {
1499     typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1500     const size_type count = requests.size ();
1501     if (count == 0) {
1502       return; // No requests on which to wait
1503     }
1504 
1505     // MpiComm wraps MPI and can't expose any MPI structs or opaque
1506     // objects.  Thus, we have to unpack requests into a separate
1507     // array.  If that's too slow, then your code should just call
1508     // into MPI directly.
1509     //
1510     // Pull out the raw MPI requests from the wrapped requests.
1511     // MPI_Waitall should not fail if a request is MPI_REQUEST_NULL,
1512     // but we keep track just to inform the user.
1513     bool someNullRequests = false;
1514     Array<MPI_Request> rawMpiRequests (count, MPI_REQUEST_NULL);
1515     for (int i = 0; i < count; ++i) {
1516       RCP<CommRequest<Ordinal> > request = requests[i];
1517       if (! request.is_null ()) {
1518         RCP<MpiCommRequestBase<Ordinal> > mpiRequest =
1519           rcp_dynamic_cast<MpiCommRequestBase<Ordinal> > (request);
1520         // releaseRawMpiRequest() sets the MpiCommRequest's raw
1521         // MPI_Request to MPI_REQUEST_NULL.  This makes waitAll() not
1522         // satisfy the strong exception guarantee.  That's OK because
1523         // MPI_Waitall() doesn't promise that it satisfies the strong
1524         // exception guarantee, and we would rather conservatively
1525         // invalidate the handles than leave dangling requests around
1526         // and risk users trying to wait on the same request twice.
1527         rawMpiRequests[i] = mpiRequest->releaseRawMpiRequest ();
1528       }
1529       else { // Null requests map to MPI_REQUEST_NULL
1530         rawMpiRequests[i] = MPI_REQUEST_NULL;
1531         someNullRequests = true;
1532       }
1533     }
1534 
1535     // This is the part where we've finally peeled off the wrapper and
1536     // we can now interact with MPI directly.
1537     //
1538     // MPI lets us pass in the named constant MPI_STATUSES_IGNORE for
1539     // the MPI_Status array output argument in MPI_Waitall(), which
1540     // tells MPI not to bother writing out the statuses.
1541     const int err = MPI_Waitall (count, rawMpiRequests.getRawPtr(),
1542                                  MPI_STATUSES_IGNORE);
1543 
1544     // In MPI_Waitall(), an error indicates that one or more requests
1545     // failed.  In that case, there could be requests that completed
1546     // (their MPI_Status' error field is MPI_SUCCESS), and other
1547     // requests that have not completed yet but have not necessarily
1548     // failed (MPI_PENDING).  We make no attempt here to wait on the
1549     // pending requests.  It doesn't make sense for us to do so,
1550     // because in general Teuchos::Comm doesn't attempt to provide
1551     // robust recovery from failed messages.
1552     if (err != MPI_SUCCESS) {
1553       std::ostringstream os;
1554       os << "Teuchos::MpiComm::waitAll: MPI_Waitall() failed with error \""
1555          << mpiErrorCodeToString (err) << "\".";
1556       if (someNullRequests) {
1557         os << std::endl << "On input to MPI_Waitall, there was at least one "
1558           "MPI_Request that was MPI_REQUEST_NULL.  MPI_Waitall should not "
1559           "normally fail in that case, but we thought we should let you know "
1560           "regardless.";
1561       }
1562       TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
1563     }
1564 
1565     // Invalidate the input array of requests by setting all entries
1566     // to null.  We delay this until the end, since some
1567     // implementations of CommRequest might hold the only reference to
1568     // the communication buffer, and we don't want that to go away
1569     // until we've waited on the communication operation.
1570     std::fill (requests.begin(), requests.end(), null);
1571   }
1572 
1573 } // namespace (anonymous)
1574 
1575 
1576 
1577 template<typename Ordinal>
1578 void
1579 MpiComm<Ordinal>::
waitAll(const ArrayView<RCP<CommRequest<Ordinal>>> & requests) const1580 waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests) const
1581 {
1582   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests)" );
1583   // Call the one-argument version of waitAllImpl, to avoid overhead
1584   // of handling statuses (which the user didn't want anyway).
1585   waitAllImpl<Ordinal> (requests);
1586 }
1587 
1588 
1589 template<typename Ordinal>
1590 void
1591 MpiComm<Ordinal>::
waitAll(const ArrayView<RCP<CommRequest<Ordinal>>> & requests,const ArrayView<RCP<CommStatus<Ordinal>>> & statuses) const1592 waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
1593          const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const
1594 {
1595   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::waitAll(requests, statuses)" );
1596 
1597   typedef typename ArrayView<RCP<CommRequest<Ordinal> > >::size_type size_type;
1598   const size_type count = requests.size();
1599 
1600   TEUCHOS_TEST_FOR_EXCEPTION(count != statuses.size(),
1601     std::invalid_argument, "Teuchos::MpiComm::waitAll: requests.size() = "
1602     << count << " != statuses.size() = " << statuses.size() << ".");
1603 
1604   Array<MPI_Status> rawMpiStatuses (count);
1605   waitAllImpl<Ordinal> (requests, rawMpiStatuses());
1606 
1607   // Repackage the raw MPI_Status structs into the wrappers.
1608   for (size_type i = 0; i < count; ++i) {
1609     statuses[i] = mpiCommStatus<Ordinal> (rawMpiStatuses[i]);
1610   }
1611 }
1612 
1613 
1614 template<typename Ordinal>
1615 RCP<CommStatus<Ordinal> >
wait(const Ptr<RCP<CommRequest<Ordinal>>> & request) const1616 MpiComm<Ordinal>::wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const
1617 {
1618   TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm::wait(...)" );
1619 
1620   if (is_null (*request)) {
1621     return null; // Nothing to wait on ...
1622   }
1623   else {
1624     RCP<CommStatus<Ordinal> > status = (*request)->wait ();
1625     // mfh 22 Oct 2012: The unit tests expect waiting on the
1626     // CommRequest to invalidate it by setting it to null.
1627     *request = null;
1628     return status;
1629   }
1630 }
1631 
1632 template<typename Ordinal>
1633 RCP< Comm<Ordinal> >
duplicate() const1634 MpiComm<Ordinal>::duplicate() const
1635 {
1636   MPI_Comm origRawComm = *rawMpiComm_;
1637   MPI_Comm newRawComm = MPI_COMM_NULL;
1638   const int err = MPI_Comm_dup (origRawComm, &newRawComm);
1639   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, "Teuchos"
1640     "::MpiComm::duplicate: MPI_Comm_dup failed with the following error: "
1641     << mpiErrorCodeToString (err));
1642 
1643   // Wrap the raw communicator, and pass the (const) wrapped
1644   // communicator to MpiComm's constructor.  We created the raw comm,
1645   // so we have to supply a function that frees it after use.
1646   RCP<OpaqueWrapper<MPI_Comm> > wrapped =
1647     opaqueWrapper<MPI_Comm> (newRawComm, details::safeCommFree);
1648   // Since newComm's raw MPI_Comm is the result of an MPI_Comm_dup,
1649   // its messages cannot collide with those of any other MpiComm.
1650   // This means we can assign its tag without an MPI_Bcast.
1651   RCP<MpiComm<Ordinal> > newComm =
1652     rcp (new MpiComm<Ordinal> (wrapped.getConst (), minTag_));
1653   return rcp_implicit_cast<Comm<Ordinal> > (newComm);
1654 }
1655 
1656 
1657 template<typename Ordinal>
1658 RCP< Comm<Ordinal> >
split(const int color,const int key) const1659 MpiComm<Ordinal>::split(const int color, const int key) const
1660 {
1661   MPI_Comm newComm;
1662   const int splitReturn =
1663     MPI_Comm_split (*rawMpiComm_,
1664                     color < 0 ? MPI_UNDEFINED : color,
1665                     key,
1666                     &newComm);
1667   TEUCHOS_TEST_FOR_EXCEPTION(
1668     splitReturn != MPI_SUCCESS,
1669     std::logic_error,
1670     "Teuchos::MpiComm::split: Failed to create communicator with color "
1671     << color << "and key " << key << ".  MPI_Comm_split failed with error \""
1672     << mpiErrorCodeToString (splitReturn) << "\".");
1673   if (newComm == MPI_COMM_NULL) {
1674     return RCP< Comm<Ordinal> >();
1675   } else {
1676     RCP<const OpaqueWrapper<MPI_Comm> > wrapped =
1677       opaqueWrapper<MPI_Comm> (newComm, details::safeCommFree);
1678     // Since newComm's raw MPI_Comm is the result of an
1679     // MPI_Comm_split, its messages cannot collide with those of any
1680     // other MpiComm.  This means we can assign its tag without an
1681     // MPI_Bcast.
1682     return rcp (new MpiComm<Ordinal> (wrapped, minTag_));
1683   }
1684 }
1685 
1686 
1687 template<typename Ordinal>
1688 RCP< Comm<Ordinal> >
createSubcommunicator(const ArrayView<const int> & ranks) const1689 MpiComm<Ordinal>::createSubcommunicator(const ArrayView<const int> &ranks) const
1690 {
1691   int err = MPI_SUCCESS; // For error codes returned by MPI functions
1692 
1693   // Get the group that this communicator is in.
1694   MPI_Group thisGroup;
1695   err = MPI_Comm_group (*rawMpiComm_, &thisGroup);
1696   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1697     "Failed to obtain the current communicator's group.  "
1698     "MPI_Comm_group failed with error \""
1699     << mpiErrorCodeToString (err) << "\".");
1700 
1701   // Create a new group with the specified members.
1702   MPI_Group newGroup;
1703   // It's rude to cast away const, but MPI functions demand it.
1704   //
1705   // NOTE (mfh 14 Aug 2012) Please don't ask for &ranks[0] unless you
1706   // know that ranks.size() > 0.  That's why I'm using getRawPtr().
1707   err = MPI_Group_incl (thisGroup, ranks.size(),
1708                         const_cast<int*> (ranks.getRawPtr ()), &newGroup);
1709   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1710     "Failed to create subgroup.  MPI_Group_incl failed with error \""
1711     << mpiErrorCodeToString (err) << "\".");
1712 
1713   // Create a new communicator from the new group.
1714   MPI_Comm newComm;
1715   try {
1716     err = MPI_Comm_create (*rawMpiComm_, newGroup, &newComm);
1717     TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1718       "Failed to create subcommunicator.  MPI_Comm_create failed with error \""
1719       << mpiErrorCodeToString (err) << "\".");
1720   } catch (...) {
1721     // Attempt to free the new group before rethrowing.  If
1722     // successful, this will prevent a memory leak due to the "lost"
1723     // group that was allocated successfully above.  Since we're
1724     // throwing std::logic_error anyway, we can only promise
1725     // best-effort recovery; thus, we don't check the error code.
1726     (void) MPI_Group_free (&newGroup);
1727     (void) MPI_Group_free (&thisGroup);
1728     throw;
1729   }
1730 
1731   // We don't need the group any more, so free it.
1732   err = MPI_Group_free (&newGroup);
1733   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1734     "Failed to free subgroup.  MPI_Group_free failed with error \""
1735     << mpiErrorCodeToString (err) << "\".");
1736   err = MPI_Group_free (&thisGroup);
1737   TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::logic_error,
1738     "Failed to free subgroup.  MPI_Group_free failed with error \""
1739     << mpiErrorCodeToString (err) << "\".");
1740 
1741   if (newComm == MPI_COMM_NULL) {
1742     return RCP<Comm<Ordinal> > ();
1743   } else {
1744     using Teuchos::details::safeCommFree;
1745     typedef OpaqueWrapper<MPI_Comm> ow_type;
1746     RCP<const ow_type> wrapper =
1747       rcp_implicit_cast<const ow_type> (opaqueWrapper (newComm, safeCommFree));
1748     // Since newComm's raw MPI_Comm is the result of an
1749     // MPI_Comm_create, its messages cannot collide with those of any
1750     // other MpiComm.  This means we can assign its tag without an
1751     // MPI_Bcast.
1752     return rcp (new MpiComm<Ordinal> (wrapper, minTag_));
1753   }
1754 }
1755 
1756 
1757 // Overridden from Describable
1758 
1759 
1760 template<typename Ordinal>
description() const1761 std::string MpiComm<Ordinal>::description() const
1762 {
1763   std::ostringstream oss;
1764   oss
1765     << typeName(*this)
1766     << "{"
1767     << "size="<<size_
1768     << ",rank="<<rank_
1769     << ",rawMpiComm="<<static_cast<MPI_Comm>(*rawMpiComm_)
1770     <<"}";
1771   return oss.str();
1772 }
1773 
1774 
1775 #ifdef TEUCHOS_MPI_COMM_DUMP
1776 template<typename Ordinal>
1777 bool MpiComm<Ordinal>::show_dump = false;
1778 #endif
1779 
1780 
1781 // private
1782 
1783 
1784 template<typename Ordinal>
assertRank(const int rank,const std::string & rankName) const1785 void MpiComm<Ordinal>::assertRank(const int rank, const std::string &rankName) const
1786 {
1787   TEUCHOS_TEST_FOR_EXCEPTION(
1788     ! ( 0 <= rank && rank < size_ ), std::logic_error
1789     ,"Error, "<<rankName<<" = " << rank << " is not < 0 or is not"
1790     " in the range [0,"<<size_-1<<"]!"
1791     );
1792 }
1793 
1794 
1795 } // namespace Teuchos
1796 
1797 
1798 template<typename Ordinal>
1799 Teuchos::RCP<Teuchos::MpiComm<Ordinal> >
createMpiComm(const RCP<const OpaqueWrapper<MPI_Comm>> & rawMpiComm)1800 Teuchos::createMpiComm(
1801   const RCP<const OpaqueWrapper<MPI_Comm> > &rawMpiComm
1802   )
1803 {
1804   if( rawMpiComm.get()!=NULL && *rawMpiComm != MPI_COMM_NULL )
1805     return rcp(new MpiComm<Ordinal>(rawMpiComm));
1806   return Teuchos::null;
1807 }
1808 
1809 
1810 template<typename Ordinal>
1811 MPI_Comm
getRawMpiComm(const Comm<Ordinal> & comm)1812 Teuchos::getRawMpiComm(const Comm<Ordinal> &comm)
1813 {
1814   return *(
1815     dyn_cast<const MpiComm<Ordinal> >(comm).getRawMpiComm()
1816     );
1817 }
1818 
1819 
1820 #endif // HAVE_TEUCHOS_MPI
1821 #endif // TEUCHOS_MPI_COMM_HPP
1822 
1823