1 // @HEADER
2 // ***********************************************************************
3 //
4 //                    Teuchos: Common Tools Package
5 //                 Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef TEUCHOS_COMM_HPP
43 #define TEUCHOS_COMM_HPP
44 
45 #include "Teuchos_ReductionOp.hpp"
46 #include "Teuchos_ArrayRCP.hpp"
47 
48 
49 namespace Teuchos {
50 
51 /// \class CommStatus
52 /// \brief Encapsulation of the result of a receive (blocking or nonblocking).
53 ///
54 /// An instance of this class encapsulates the result of a receive.
55 /// (An MPI implementation would wrap MPI_Status.)  You can query it
56 /// for information like the rank of the process that sent you the
57 /// message.  (This is useful if your receive specified a negative
58 /// source rank, indicating that you would accept a message from any
59 /// process in the communicator.)
60 ///
61 /// \tparam OrdinalType The same template parameter as Comm.  Only use
62 ///   \c int here.  We only make this a template class for
63 ///   compatibility with Comm.
64 ///
65 /// \note For now, this class only exposes the rank of the process
66 ///   that sent the message (the "source rank") and its tag.  Later,
67 ///   we might expose other fields of MPI_Status in this interface.
68 ///   For now, you can attempt a dynamic cast to MpiCommStatus to
69 ///   access all three fields (MPI_SOURCE, MPI_TAG, and MPI_ERROR).
70 template<class OrdinalType>
71 class CommStatus {
72 public:
73   //! Destructor (declared virtual for memory safety)
~CommStatus()74   virtual ~CommStatus() {}
75 
76   //! The source rank that sent the message.
77   virtual OrdinalType getSourceRank () = 0;
78 
79   //! The tag of the received message.
80   virtual OrdinalType getTag () = 0;
81 };
82 
83 // Forward declaration for CommRequest::wait.
84 template<class OrdinalType>
85 class Comm;
86 
87 /// \class CommRequest
88 /// \brief Encapsulation of a pending nonblocking communication operation.
89 /// \tparam OrdinalType Same as the template parameter of Comm.
90 ///
91 /// An instance of (a subclass of) this class represents a nonblocking
92 /// communication operation, such as a nonblocking send, receive, or
93 /// collective.  To wait on the communication operation, you may give
94 /// the CommRequest to functions like wait() or waitAll() (which may
95 /// be found in Teuchos_CommHelpers.hpp).  Here is an example of how
96 /// to use wait().
97 /// \code
98 /// const int sourceRank = ...; // Rank of the sending process.
99 /// RCP<const Comm<int> > comm = ...; // The communicator.
100 /// ArrayRCP<double> buf (...); // Buffer for incoming data.
101 /// RCP<CommRequest<int> > req = ireceive (comm, buf, sourceRank);
102 ///
103 /// // ... Do some other things ...
104 ///
105 /// // Wait on the request.  This blocks on the sending process.
106 /// // When it finishes, it invalidates the req reference, and
107 /// // returns a status (which wraps MPI_Status in an MPI
108 /// // implementation).
109 /// RCP<CommStatus<int> > status = wait (comm, ptr (&req));
110 /// \endcode
111 ///
112 /// This object's destructor cancels the request without
113 /// communication.  If you wish, you may rely on this behavior for
114 /// speculative communication.  For example:
115 /// \code
116 /// const int sourceRank = ...; // Rank of the sending process.
117 /// RCP<const Comm<int> > comm = ...; // The communicator.
118 /// ArrayRCP<double> buf (...); // Buffer for incoming data.
119 /// RCP<CommRequest<int> > req = ireceive (comm, buf, sourceRank);
120 ///
121 /// // ... Do some other things ...
122 /// // ... Find out we didn't need to receive data ...
123 ///
124 /// // This cancels the request.  We could also just let
125 /// // the one reference to the request fall out of scope.
126 /// req = null;
127 /// \endcode
128 ///
129 /// \note To implementers: The MPI implementation of this class
130 ///   (MpiCommRequest) wraps MPI_Request.  The MPI version of
131 ///   waitAll() will need to unpack the array of wrapped requests, and
132 ///   then pack up the resulting MPI_Request after waiting on them.
133 ///   It would be preferable to have a class \c CommRequests that
134 ///   encapsulates a set of requests, so that you can avoid this
135 ///   unpacking and packing.
136 template<class OrdinalType>
137 class CommRequest : public Teuchos::Describable {
138 public:
139   /// \brief Destructor; cancels the request if it is still pending.
140   ///
141   /// Canceling a communication request must always be a local
142   /// operation.  An MPI implementation may achieve this by first
143   /// calling MPI_Cancel to cancel the request, then calling MPI_Wait
144   /// (which behaves as a local operation for a canceled request) to
145   /// complete the canceled request (as required by the MPI standard).
~CommRequest()146   virtual ~CommRequest() {}
147 
148   /// Wait on this request (a blocking operation).
149   virtual RCP<CommStatus<OrdinalType> > wait () = 0;
150 };
151 
152 /// \class Comm
153 /// \brief Abstract interface for distributed-memory communication.
154 /// \tparam Ordinal Type of indices used for communication.
155 ///
156 /// \section Teuchos_Comm_What What is Comm?
157 ///
158 /// This class is Teuchos' interface to distributed-memory
159 /// communication between one or more parallel processes.  It presents
160 /// an interface very much like that of MPI (the Message Passing
161 /// Interface).  Teuchos provides two implementations of Comm:
162 /// - An MPI (Message Passing Interface) implementation, MpiComm
163 /// - A "serial" implementation, SerialComm, that only has one process
164 ///
165 /// Comm is an abstract interface.  You cannot create a Comm directly.
166 /// You have to create one of the subclasses.  The normal way to
167 /// handle a Comm is to pass it around using RCP (a reference-counted
168 /// "smart" pointer).  For example:
169 ///
170 /// \code
171 /// // Make a Comm.  This one happens to wrap MPI_COMM_WORLD.
172 /// RCP<const Comm<int> > comm = rcp (new MpiComm (MPI_COMM_WORLD));
173 /// // Equivalent of MPI_Comm_rank
174 /// const int myRank = comm->getRank ();
175 /// // Equivalent of MPI_Comm_size
176 /// const int numProcs = comm->getSize ();
177 /// // Equivalent of MPI_Comm_barrier
178 /// comm->barrier ();
179 /// \endcode
180 ///
181 /// Comm's communication methods that actually send or receive data
182 /// accept that data as an array of \c char.  You should never call
183 /// these methods directly.  Instead, you should use the nonmember
184 /// "helper" functions in Teuchos_CommHelpers.hpp.  These methods are
185 /// templated on the \c Packet type, that is, the type of data you
186 /// want to send or receive.  See the example below.
187 ///
188 /// \section Teuchos_Comm_Handle Treat <tt>RCP<const Comm<int> ></tt> like an opaque handle
189 ///
190 /// You should consider an <tt>RCP<const Comm<int> ></tt> as
191 /// equivalent to the MPI_Comm opaque handle, except that the RCP also
192 /// does reference counting to ensure memory safety when using the
193 /// same communicator in different parts of the code.  That is,
194 /// copying the RCP does not create a new communicator; the following
195 /// two codes do about the same thing, except with a different syntax
196 /// (and reference counting in the second code).
197 ///
198 /// Raw MPI_Comm handles:
199 /// \code
200 /// MPI_Comm comm = ...;
201 /// // sameComm is THE SAME COMMUNICATOR as comm.
202 /// MPI_Comm sameComm = comm;
203 /// \endcode
204 ///
205 /// Reference-counted pointers to Comm:
206 /// \code
207 /// RCP<const Comm<int> > comm = ...;
208 /// // *sameComm is THE SAME COMMUNICATOR as *comm.
209 /// RCP<const Comm<int> > sameComm = comm;
210 /// \endcode
211 ///
212 /// If you want to make a "new communicator" rather than just "copy
213 /// the handle," you should call the duplicate() method.  This has the
214 /// same behavior as MPI_Comm_dup (which see).
215 ///
216 /// The "reference counting" feature means that the subclass of Comm
217 /// will take care of freeing the underlying MPI_Comm (and any other
218 /// data structures it may use) by calling MPI_Comm_free if necessary,
219 /// once the reference count of the RCP goes to zero.
220 ///
221 /// \warning Do <i>not</i> pass around subclasses of Comm by value!
222 ///   Comm or its subclasses by themselves do not have handle
223 ///   semantics.  Their copy constructors likely do not behave as you
224 ///   would expect if the classes had handle semantics.
225 ///
226 /// \section Teuchos_Comm_How How do I make a Comm?
227 ///
228 /// Comm works whether or not you have build Trilinos with MPI
229 /// support.  If you want to make a "default" Comm that is the
230 /// equivalent of MPI_COMM_WORLD, but you don't know if your Trilinos
231 /// with MPI enabled, you may use GlobalMPISession to call MPI_Init if
232 /// necessary, and DefaultComm to "get a default communicator."  For
233 /// example:
234 /// \code
235 /// int main (int argc, char* argv[]) {
236 ///   using Teuchos::Comm;
237 ///   using Teuchos::DefaultComm;
238 ///   using Teuchos::RCP;
239 ///
240 ///   // This replaces the call to MPI_Init.  If you didn't
241 ///   // build with MPI, this doesn't call MPI functions.
242 ///   Teuchos::GlobalMPISesssion session (&argc, &argv, NULL);
243 ///   // comm is the equivalent of MPI_COMM_WORLD.
244 ///   RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
245 ///
246 ///   // ... use comm in your code as you would use MPI_COMM_WORLD ...
247 ///
248 ///   // We don't need to call MPI_Finalize, since the
249 ///   // destructor of GlobalMPISession does that for us.
250 ///   return EXIT_SUCCESS;
251 /// }
252 /// \endcode
253 /// This code works whether or not you built Trilinos with MPI
254 /// support.  It is not necessary to use GlobalMPISession, but it's
255 /// useful so you don't have to remember to call MPI_Finalize.  If you
256 /// don't want to use GlobalMPISession, you can still call
257 /// <tt>DefaultComm<int>::getComm()</tt>, though you must have called
258 /// MPI_Init first if you build Trilinos with MPI support.
259 /// Furthermore, if you know MPI is present, you don't need to use
260 /// DefaultComm.  You may simply pass MPI_COMM_WORLD directly to
261 /// MpiComm, like this:
262 /// \code
263 /// RCP<const Comm<int> > comm = rcp (new MpiComm (MPI_COMM_WORLD));
264 /// \endcode
265 /// You may also pass an arbitrary MPI_Comm directly into MpiComm's
266 /// constructor, though you are responsible for freeing it after use
267 /// (via MPI_Comm_free) if necessary.  You may automate the freeing
268 /// of your MPI_Comm by using OpaqueWrapper (which see).
269 ///
270 /// \section Teuchos_Comm_Use How do I use Comm?
271 ///
272 /// As we mentioned above, for communication of data with Comm, you
273 /// you should use the nonmember "helper" functions in
274 /// Teuchos_CommHelpers.hpp.  These methods are templated on the
275 /// <tt>Packet</tt> type, that is, the type of data you want to send
276 /// or receive.  For example, suppose you have two processes (with
277 /// ranks 0 and 1, respectively), and you want to send an array of
278 /// 10 <tt>double</tt> from Process 0 to Process 1.  Both processes have
279 /// defined <tt>RCP<const Comm<int> > comm</tt> as above.  Here is the
280 /// code on Process 0:
281 /// \code
282 /// const int count = 10; // Send 10 doubles
283 /// double values[10] = ...;
284 /// const int destinationRank = 1; // Send to Process 1
285 /// // You may be able to omit the template arguments of 'send' here.
286 /// Teuchos::send<int, double> (*comm, 10, values, destinationRank);
287 /// \endcode
288 /// Here is the code on Process 1:
289 /// \code
290 /// const int count = 10; // Receive 10 doubles
291 /// double values[10]; // Will be overwritten by receive
292 /// const int sourceRank = 0; // Receive from Process 0
293 /// // You may be able to omit the template arguments of 'receive' here.
294 /// Teuchos::receive<int, double> (*comm, sourceRank, 10, values);
295 /// \endcode
296 /// Please refer to the documentation in Teuchos_CommHelpers.hpp for
297 /// more details.
298 ///
299 /// \section Teuchos_Comm_Former Former documentation
300 ///
301 /// This interface is templated on the ordinal type but only deals with buffers
302 /// of untyped data represented as arrays <tt>char</tt> type. All reduction
303 /// operations that are initiated by the concreate communicator object are
304 /// performed by user-defined <tt>ReductOpBase</tt> objects.  It is the
305 /// responsibility of the <tt>ReductOpBase</tt> object to know what the currect
306 /// data type is, to perform casts or serializations/unserializations to and
307 /// from <tt>char[]</tt> buffers, and to know how to reduce the objects
308 /// correctly.  It is strictly up to the client to correctly convert data types
309 /// to <tt>char[]</tt> arrays but there is a great deal of helper code to make
310 /// this easy and safe.
311 template<typename Ordinal>
312 class Comm : virtual public Describable {
313 public:
314   /// \brief The current tag.
315   ///
316   /// \warning This method is ONLY for use by Teuchos developers.
317   ///   Users should not depend on the interface of this method.
318   ///   It may change or disappear at any time without warning.
319   virtual int getTag () const = 0;
320 
321   //! @name Destructor
322   //@{
323 
324   //! Destructor, declared virtual for safety of derived classes.
~Comm()325   virtual ~Comm() {}
326   //@}
327 
328   //! @name Query functions
329   //@{
330 
331   /** \brief Returns the rank of this process.
332    *
333    * <b>Postconditions:</b><ul>
334    * <li><tt>0 <= return && return < this->getSize()</tt>
335    * </ul>
336    */
337   virtual int getRank() const = 0;
338 
339   /** \brief Returns the number of processes that make up this communicator.
340    *
341    * <b>Postconditions:</b><ul>
342    * <li><tt>return > 0</tt>
343    * </ul>
344    */
345   virtual int getSize() const = 0;
346 
347   //@}
348 
349   //! @name Collective Operations
350   //@{
351 
352   /** \brief Pause every process in <tt>*this</tt> communicator until all the
353    * processes reach this point.
354    */
355   virtual void barrier() const = 0;
356 
357   /** \brief Broadcast values from the root process to the slave processes.
358    *
359    * \param rootRank [in] The rank of the root process.
360    *
361    * \param count [in] The number of bytes in <tt>buffer[]</tt>.
362    *
363    * \param buffer [in/out] Array (length <tt>bytes</tt>) of packed data.
364    * Must be set on input on the root processes with rank <tt>root</tt>.  On
365    * output, each processs, including the root process contains the data.
366    *
367    * <b>Preconditions:</b><ul>
368    * <li><tt>0 <= rootRank && rootRank < this->getSize()</tt>
369    * </ul>
370    */
371   virtual void broadcast(
372     const int rootRank, const Ordinal bytes, char buffer[]
373     ) const = 0;
374 
375   //! Gather values from all processes to the root process.
376   virtual void
377   gather (const Ordinal sendBytes, const char sendBuffer[],
378           const Ordinal recvBytes, char recvBuffer[],
379           const int root) const = 0;
380 
381   /** \brief Gather values from each process to collect on all processes.
382    *
383    * \param sendBytes [in] Number of entires in <tt>sendBuffer[]</tt> on
384    * input.
385    *
386    * \param sendBuffer [in] Array (length <tt>sendBytes</tt>) of data being
387    * sent from each process.
388    *
389    * \param recvBytes [in] Number of entires in <tt>recvBuffer[]</tt> which
390    * must be equal to <tt>sendBytes*this->getSize()</tt>.  This field is just
391    * here for debug checking.
392    *
393    * \param recvBuffer [out] Array (length <tt>recvBytes</tt>) of all of the
394    * entires sent from each processes.  Specifically,
395    * <tt>recvBuffer[sendBytes*j+i]</tt>, for <tt>j=0...this->getSize()-1</tt>
396    * and <tt>i=0...sendBytes-1</tt>, is the entry <tt>sendBuffer[i]</tt> from
397    * process with rank <tt>j</tt>.
398    *
399    * <b>Preconditions:</b><ul>
400    * <li><tt>recvBytes==sendBytes*this->getSize()</tt>
401    * </ul>
402    */
403   virtual void gatherAll(
404     const Ordinal sendBytes, const char sendBuffer[]
405     ,const Ordinal recvBytes, char recvBuffer[]
406     ) const = 0;
407 
408   /** \brief Global reduction.
409    *
410    * \param reductOp [in] The user-defined reduction operation
411    *
412    * \param bytes [in] The length of the buffers <tt>sendBuffer[]</tt> and
413    * <tt>globalReducts[]</tt>.
414    *
415    * \param sendBuffer [in] Array (length <tt>bytes</tt>) of the data
416    * contributed from each process.
417    *
418    * \param globalReducts [out] Array (length <tt>bytes</tt>) of the global
419    * reduction from each process.
420    */
421   virtual void reduceAll(
422     const ValueTypeReductionOp<Ordinal,char> &reductOp
423     ,const Ordinal bytes, const char sendBuffer[], char globalReducts[]
424     ) const = 0;
425 
426   /** \brief Scan reduction.
427    *
428    * \param reductOp [in] The user-defined reduction operation
429    *
430    * \param bytes [in] The length of the buffers <tt>sendBuffer[]</tt> and
431    * <tt>scanReducts[]</tt>.
432    *
433    * \param sendBuffer [in] Array (length <tt>bytes</tt>) of the data
434    * contributed from each process.
435    *
436    * \param scanReducts [out] Array (length <tt>bytes</tt>) of the reduction
437    * up to and including this process.
438    */
439         virtual void scan(
440     const ValueTypeReductionOp<Ordinal,char> &reductOp
441     ,const Ordinal bytes, const char sendBuffer[], char scanReducts[]
442     ) const = 0;
443 
444   //! @name Blocking Point-to-Point Operations
445   //@{
446 
447   /** \brief Possibly blocking send of data from this process to another process.
448    *
449    * This routine does not return until you can reuse the send buffer.
450    * Whether this routine blocks depends on whether the MPI
451    * implementation buffers.
452    *
453    * \param bytes [in] The number of bytes of data being passed between
454    * processes.
455    *
456    * \param sendBuffer [in] Array (length <tt>bytes</tt>) of data being sent
457    * from this process.  This buffer can be immediately destroyed or reused as
458    * soon as the function exits (that is why this function is "blocking").
459    *
460    * \param destRank [in] The rank of the process to receive the data.
461    *
462    * <b>Preconditions:</b><ul>
463    * <li><tt>0 <= destRank && destRank < this->getSize()</tt>
464    * <li><tt>destRank != this->getRank()</tt>
465    * </ul>
466    */
467   virtual void send(
468     const Ordinal bytes, const char sendBuffer[], const int destRank
469     ) const = 0;
470 
471   //! Variant of send() that takes a tag.
472   virtual void
473   send (const Ordinal bytes,
474         const char sendBuffer[],
475         const int destRank,
476         const int tag) const = 0;
477 
478   /** \brief Always blocking send of data from this process to another process.
479    *
480    * This routine blocks until the matching receive posts.  After it
481    * returns, you are allowed to reuse the send buffer.
482    *
483    * \param bytes [in] The number of bytes of data being passed between
484    * processes.
485    *
486    * \param sendBuffer [in] Array (length <tt>bytes</tt>) of data being sent
487    * from this process.  This buffer can be immediately destroyed or reused as
488    * soon as the function exits (that is why this function is "blocking").
489    *
490    * \param destRank [in] The rank of the process to receive the data.
491    *
492    * <b>Preconditions:</b><ul>
493    * <li><tt>0 <= destRank && destRank < this->getSize()</tt>
494    * <li><tt>destRank != this->getRank()</tt>
495    * </ul>
496    */
497   virtual void ssend(
498     const Ordinal bytes, const char sendBuffer[], const int destRank
499     ) const = 0;
500 
501   //! Variant of ssend() that takes a message tag.
502   virtual void
503   ssend (const Ordinal bytes,
504          const char sendBuffer[],
505          const int destRank,
506          const int tag) const = 0;
507 
508   /** \brief Blocking receive of data from this process to another process.
509    *
510    * \param sourceRank [in] The rank of the process to receive the data from.
511    * If <tt>sourceRank < 0</tt> then data will be received from any process.
512    *
513    * \param bytes [in] The number of bytes of data being passed between
514    * processes.
515    *
516    * \param recvBuffer [out] Array (length <tt>bytes</tt>) of data being
517    * received from this process.  This buffer can be immediately used to
518    * access the data as soon as the function exits (that is why this function
519    * is "blocking").
520    *
521    * <b>Preconditions:</b><ul>
522    * <li>[<tt>sourceRank >= 0] <tt>sourceRank < this->getSize()</tt>
523    * <li><tt>sourceRank != this->getRank()</tt>
524    * </ul>
525    *
526    * \return Returns the senders rank.
527    */
528   virtual int receive(
529     const int sourceRank, const Ordinal bytes, char recvBuffer[]
530     ) const = 0;
531 
532 
533   /** \brief Ready send of data from this process to another process.
534    *
535    * \param sendBuffer [in] The data to be sent.
536    *
537    * \param destRank [in] The rank of the process to receive the data.
538    *
539    * <b>Preconditions:</b><ul>
540    * <li><tt>0 <= destRank && destRank < this->getSize()</tt>
541    * <li><tt>destRank != this->getRank()</tt>
542    * </ul>
543    */
544   virtual void readySend(
545     const ArrayView<const char> &sendBuffer,
546     const int destRank
547     ) const = 0;
548 
549   //! Variant of readySend() that accepts a message tag.
550   virtual void
551   readySend (const Ordinal bytes,
552              const char sendBuffer[],
553              const int destRank,
554              const int tag) const = 0;
555 
556   //@}
557   //! @name Non-blocking Point-to-Point Operations
558   //@{
559 
560   /** \brief Non-blocking send.
561    *
562    * \param sendBuffer [in] The data buffer to be sent.
563    *
564    * \param destRank [in] The rank of the process to receive the data.
565    *
566    * <b>Preconditions:</b><ul>
567    * <li><tt>0 <= destRank && destRank < this->getSize()</tt>
568    * <li><tt>destRank != this->getRank()</tt>
569    * </ul>
570    */
571   virtual RCP<CommRequest<Ordinal> > isend(
572     const ArrayView<const char> &sendBuffer,
573     const int destRank
574     ) const = 0;
575 
576   //! Variant of isend() that takes a tag.
577   virtual RCP<CommRequest<Ordinal> >
578   isend (const ArrayView<const char> &sendBuffer,
579          const int destRank,
580          const int tag) const = 0;
581 
582   /** \brief Non-blocking receive.
583    *
584    * \param recvBuffer [out] The location for storing the received data.
585    *
586    * \param sourceRank [in] The rank of the process to receive the data from.
587    * If <tt>sourceRank < 0</tt> then data will be received from any process.
588    *
589    * <b>Preconditions:</b><ul>
590    * <li>[<tt>sourceRank >= 0] <tt>sourceRank < this->getSize()</tt>
591    * <li><tt>sourceRank != this->getRank()</tt>
592    * </ul>
593    *
594    * \return Returns the senders rank.
595    */
596   virtual RCP<CommRequest<Ordinal> > ireceive(
597     const ArrayView<char> &recvBuffer,
598     const int sourceRank
599     ) const = 0;
600 
601   //! Variant of ireceive that takes a tag.
602   virtual RCP<CommRequest<Ordinal> >
603   ireceive (const ArrayView<char> &recvBuffer,
604             const int sourceRank,
605             const int tag) const = 0;
606 
607   /** \brief Wait on a set of communication requests.
608    *
609    * <b>Preconditions:</b><ul>
610    * <li> <tt>requests.size() > 0</tt>
611    * </ul>
612    *
613    * <b>Postconditions:</b><ul>
614    * <li> <tt>is_null(request[i]))</tt> for <tt>i=0...requests.size()-1</tt>
615    * </ul>
616    */
617   virtual void waitAll(
618     const ArrayView<RCP<CommRequest<Ordinal> > > &requests
619     ) const = 0;
620 
621   /// \brief Wait on communication requests, and return their statuses.
622   ///
623   /// \pre requests.size() == statuses.size()
624   ///
625   /// \pre For i in 0, 1, ..., requests.size()-1, requests[i] is
626   ///   either null or requests[i] was returned by an ireceive() or
627   ///   isend().
628   ///
629   /// \post For i in 0, 1, ..., requests.size()-1,
630   ///   requests[i].is_null() is true.
631   ///
632   /// \param requests [in/out] On input: the requests on which to
633   ///   wait.  On output: all set to null.
634   ///
635   /// \param statuses [out] The status results of waiting on the
636   ///   requests.
637   virtual void
638   waitAll (const ArrayView<RCP<CommRequest<Ordinal> > >& requests,
639            const ArrayView<RCP<CommStatus<Ordinal> > >& statuses) const = 0;
640 
641   /// \brief Wait on a single communication request, and return its status.
642   ///
643   /// \param request [in/out] On input: request is not null, and
644   /// <tt>*request</tt> is either null (in which case this function
645   /// does nothing and returns null) or an RCP of a valid CommRequest
646   /// instance representing an outstanding communication request.  On
647   /// output: If the communication request completed successfully, we
648   /// set <tt>*request</tt> to null, indicating that the request has
649   /// completed.  (This helps prevent common bugs like trying to
650   /// complete the same request twice.)
651   ///
652   /// \return If *request is null, this method returns null.
653   /// Otherwise this method returns a CommStatus instance representing
654   /// the result of completing the request.  In the case of a
655   /// nonblocking receive request, you can query the CommStatus
656   /// instance for the process ID of the sending process.  (This is
657   /// useful for receiving from any process via \c MPI_ANY_SOURCE.)
658   ///
659   /// \pre <tt>!is_null(request)</tt> (that is, the Ptr is not null).
660   /// \post <tt>is_null(*request)</tt> (that is, the RCP is null).
661   ///
662   /// This function blocks until the communication operation
663   /// associated with the CommRequest object has completed.
664   virtual RCP<CommStatus<Ordinal> >
665   wait (const Ptr<RCP<CommRequest<Ordinal> > >& request) const = 0;
666 
667   //@}
668 
669   //! @name Subcommunicator Operations
670   //@{
671 
672   /**
673    * \brief Duplicate this communicator.
674    *
675    * Make a copy of this communicator with a duplicate communication
676    * space.  Note that the returned communicator has the same
677    * properties (including process ranks, attributes and topologies)
678    * as this communicator, but is distinct from the original.
679    * "Distinct" means that if you send a message on the original
680    * communicator, you can't receive it on the new one, and vice
681    * versa.  The new communicator represents a separate message space.
682    * This has the same semantics as MPI_Comm_dup.  (In fact, the
683    * subclass MpiComm implements this using MPI_Comm_dup.)
684    *
685    * Most users don't want to do this.  The duplicate() method returns
686    * a <i>new communicator</i>.  In MPI terms, it is a <i>different
687    * MPI_Comm</i>.  If you want a shallow copy of the handle, you
688    * should pass the <tt>Comm<Ordinal><tt> around by const pointer,
689    * like this:
690    * \code
691    * RCP<const Comm<int> > comm = ...; // my original communicator
692    * // ... do some stuff with comm ...
693    * // Make a shallow copy.
694    * RCP<const Comm<int> > diffHandleSameComm = comm;
695    * // ... do some stuff with diffHandleSameComm ...
696    * \endcode
697    * This behaves the same as the following "raw MPI" code:
698    * \code
699    * MPI_Comm comm = ...; // my original communicator
700    * // ... do some stuff with comm ...
701    * // Make a shallow copy.
702    * MPI_Comm diffHandleSameComm = comm;
703    * // ... do some stuff with diffHandleSameComm ...
704    * \endcode
705    * The subclass of Comm ensures that the "raw" MPI handle is freed
706    * only after the last reference to it by a subclass instance
707    * disappears.  (It does reference counting underneath.)
708    *
709    * Please, please do not invoke the copy constructor or assignment
710    * operator of Comm.  Of course it's not legal to do that anyway,
711    * because Comm is pure virtual.  However, even if you could do it,
712    * you must never do this!  For example, do <i>not</i> do this:
713    * \code
714    * RCP<const Comm<int> > comm = ...; // my original communicator
715    * // ... do some stuff with comm ...
716    * // DO NOT DO THIS, EVER!!!  THIS IS VERY BAD!!!
717    * RCP<const Comm<int> > badComm (new Comm<int> (*comm));
718    * \endcode
719    * and do <i>not</i> do this:
720    * \code
721    * RCP<const Comm<int> > comm = ...; // my original communicator
722    * // ... do some stuff with comm ...
723    * // DO NOT DO THIS, EITHER!!!  THIS IS JUST AS BAD!!!
724    * RCP<const Comm<int> > badComm = rcp (new Comm<int> (*comm));
725    * \endcode
726    * This is bad because it ignores the subclass' data.  Depending on
727    * the subclass of Comm that you are actually using, it may be
728    * appropriate to invoke the copy constructor or assignment operator
729    * of the specific subclass, but <i>never</i> those of Comm itself.
730    *
731    * Users are not responsible for freeing the returned communicator.
732    * The destructor of the subclass of Comm handles that itself.
733    *
734    * In an MPI implementation, the returned communicator is created
735    * using MPI_Comm_dup, with the resulting semantic implications.
736    *
737    * \return A new communicator.
738    */
739   virtual RCP< Comm > duplicate() const = 0;
740 
741   /**
742    * \brief Split a communicator into subcommunicators based on color
743    * and key.
744    *
745    * Partition this communicator into multiple disjoint groups, and
746    * return the communicator corresponding to the group to which this
747    * process belongs.  There will be as many groups as there are
748    * globally many distinct values for the <tt>color</tt> parameter.
749    * Within each subset of the partition, the ranks will be ordered
750    * according to the key value each process passed for the
751    * <tt>key</tt> parameter. If multiple processes pass the same value
752    * for <tt>key</tt>, then they will be ordered according to their
753    * rank in the original communicator.  To return a valid
754    * communicator, this function requires a nonnegative value for
755    * <tt>color</tt>.  If <tt>color</tt> is negative, this method will
756    * return a null communicator.
757    *
758    * This method must be called as a collective on all processes in
759    * this communicator.  That is, if this method is called at all, it
760    * must be called on all processes in the communicator.
761    *
762    * Users are not responsible for freeing the returned communicator.
763    * The destructor of the subclass of Comm handles that itself.
764    *
765    * In an MPI implementation, the returned communicator is created
766    * using MPI_Comm_split, with the resulting semantic implications.
767    *
768    * \param color [in] An integer representing the color for the local
769    *   rank.  In the MPI implementation, if this is negative,
770    *   MPI_Comm_split gets <tt>MPI_UNDEFINED</tt> as the color.
771    *
772    * \param key [in] A key value to order processes of the same color.
773    *   In the MPI implementation, this is passed directly to
774    *   MPI_Comm_split.
775    *
776    * \return A partitioned communicator.
777    */
778   virtual RCP<Comm> split (const int color, const int key) const = 0;
779 
780   /**
781    * \brief Create a subcommunicator containing the specified processes.
782    *
783    * Create and return a subcommunicator of this communicator.  The
784    * subcommunicator contains the processes in this communicator with
785    * the given ranks, in which they are listed in the input vector.
786    * Processes whose ranks are not included in the input vector will
787    * be given a null communicator.
788    *
789    * This method must be called as a collective on all processes in
790    * this communicator.  That is, if this method is called at all, it
791    * must be called on all processes in the communicator.
792    *
793    * Users are not responsible for freeing the returned communicator.
794    * The destructor of the subclass of Comm handles that itself.
795    *
796    * In an MPI implementation, the subcommunicator is created using
797    * MPI_Comm_create, with the resulting semantic implications.
798    *
799    * \param ranks The ranks of the processes to include in the subcommunicator.
800    * \return The subcommunicator.
801    */
802   virtual RCP<Comm>
803   createSubcommunicator (const ArrayView<const int>& ranks) const = 0;
804   //@}
805 
806 }; // class Comm
807 
808 } // namespace Teuchos
809 
810 #endif // TEUCHOS_COMM_HPP
811