1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 /* FIXME: This header should contain only the definitions exported to the
7    mpiimpl.h level */
8 
9 #ifndef MPIDPRE_H_INCLUDED
10 #define MPIDPRE_H_INCLUDED
11 
12 /* Tell the compiler that we're going to declare struct MPIR_Request later */
13 struct MPIR_Request;
14 
15 #if defined(HAVE_SYS_TYPES_H)
16 #include <sys/types.h>
17 #endif
18 
19 #ifdef HAVE_LIBHCOLL
20 #include "hcoll/api/hcoll_dte.h"
21 #endif
22 
23 typedef struct {
24 #ifdef HAVE_LIBHCOLL
25     hcoll_datatype_t hcoll_datatype;
26 #endif
27     int foo; /* Shut up the compiler */
28 } MPIDI_Devdt_t;
29 #define MPID_DEV_DATATYPE_DECL   MPIDI_Devdt_t   dev;
30 
31 #include "mpid_thread.h"
32 
33 #include "mpid_sched.h"
34 
35 union MPIDI_CH3_Pkt;
36 struct MPIDI_VC;
37 struct MPIR_Request;
38 
39 /* PktHandler function:
40    vc  (INPUT) -- vc on which the packet was received
41    pkt (INPUT) -- pointer to packet header (aligned access).
42    data (INPUT) -- pointer to beginning of data
43    buflen (I/O) -- IN: number of bytes received into receive buffer
44                    OUT: number of bytes processed by the handler function
45    req (OUTPUT) -- NULL, if the whole message has been processed by the handler
46                    function, otherwise, pointer to the receive request for this
47                    message.  The IOV will be set describing where the rest of the
48                    message should be received.
49    (This decl needs to come before mpidi_ch3_pre.h)
50 */
51 typedef int MPIDI_CH3_PktHandler_Fcn(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *pkt, void *data,
52 				     intptr_t *buflen, struct MPIR_Request **req );
53 
54 /* Include definitions from the channel which must exist before items in this
55    file (mpidpre.h) or the file it includes (mpiimpl.h) can be defined. */
56 #include "mpidi_ch3_pre.h"
57 
58 /* FIXME: Who defines this name */
59 /* As of 8/1/06, no-one defined MSGS_UNORDERED.  We should consider
60    moving support for unordered messages to a different part of the code
61    However, note that sequence numbers may be useful in other contexts,
62    including identifying messages when multithreaded (for better profiling
63    tools) and handling cancellations (rather than relying on unique
64    request ids)
65 */
66 #if defined (MPIDI_CH3_MSGS_UNORDERED)
67 #define MPID_USE_SEQUENCE_NUMBERS
68 #endif
69 
70 #if defined(MPID_USE_SEQUENCE_NUMBERS)
71 typedef unsigned long MPID_Seqnum_t;
72 #endif
73 
74 #include "mpichconf.h"
75 
76 #if CH3_RANK_BITS == 16
77 typedef int16_t MPIDI_Rank_t;
78 #elif CH3_RANK_BITS == 32
79 typedef int32_t MPIDI_Rank_t;
80 #endif /* CH3_RANK_BITS */
81 
82 /* For the typical communication system for which the ch3 channel is
83    appropriate, 16 bits is sufficient for the rank.  By also using 16
84    bits for the context, we can reduce the size of the match
85    information, which is beneficial for slower communication
86    links. Further, this allows the total structure size to be 64 bits
87    and the search operations can be optimized on 64-bit platforms. We
88    use a union of the actual required structure with a uintptr_t, so
89    in this optimized case, the "whole" field can be used for
90    comparisons.
91 
92    Note that the MPICH code (in src/mpi) uses int for rank (and usually for
93    contextids, though some work is needed there).
94 
95    Note:  We need to check for truncation of rank in MPID_Init - it should
96    confirm that the size of comm_world is less than 2^15, and in an communicator
97    create (that may make use of dynamically created processes) that the
98    size of the communicator is within range.
99 
100    If any part of the definition of this type is changed, those changes
101    must be reflected in the debugger interface in src/mpi/debugger/dll_mpich.c
102    and dbgstub.c
103 */
104 typedef struct MPIDI_Message_match_parts {
105     int32_t tag;
106     MPIDI_Rank_t rank;
107     MPIR_Context_id_t context_id;
108 } MPIDI_Message_match_parts_t;
109 typedef union {
110     MPIDI_Message_match_parts_t parts;
111     uintptr_t whole;
112 } MPIDI_Message_match;
113 
114 /* Provides MPIDI_CH3_Pkt_t.  Must come after MPIDI_Message_match definition. */
115 #include "mpidpkt.h"
116 
117 /*
118  * THIS IS OBSOLETE AND UNUSED, BUT RETAINED FOR ITS DESCRIPTIONS OF THE
119  * VARIOUS STATES.  Note that this is not entirely accurate, as the
120  * CA_COMPLETE state could depend on the packet type (e.g., for RMA
121  * operations).
122  *
123  * MPIDI_CA_t
124  *
125  * An enumeration of the actions to perform when the requested I/O operation
126  * has completed.
127  *
128  * MPIDI_CH3_CA_COMPLETE - The last operation for this request has completed.
129  * The completion counter should be decremented.  If
130  * it has reached zero, then the request should be released by calling
131  * MPIR_Request_free().
132  *
133  * MPIDI_CH3_CA_UNPACK_UEBUF_AND_COMPLETE - This is a special case of the
134  * MPIDI_CH3_CA_COMPLETE.  The data for an unexpected
135  * eager messaage has been stored into a temporary buffer and needs to be
136  * copied/unpacked into the user buffer before the
137  * completion counter can be decremented, etc.
138  *
139  * MPIDI_CH3_CA_UNPACK_SRBUF_AND_COMPLETE - This is a special case of the
140  * MPIDI_CH3_CA_COMPLETE.  The data from the completing
141  * read has been stored into a temporary send/receive buffer and needs to be
142  * copied/unpacked into the user buffer before the
143  * completion counter can be decremented, etc.
144  *
145  * MPIDI_CH3_CA_RELOAD_IOV - This request contains more segments of data than
146  * the IOV or buffer space allow.  Since the
147  * previously request operation has completed, the IOV in the request should
148  * be reload at this time.
149  *
150  * MPIDI_CH3_CA_UNPACK_SRBUF_AND_RELOAD_IOV - This is a special case of the
151  * MPIDI_CH3_CA_RELOAD_IOV.  The data from the
152  * completing read operation has been stored into a temporary send/receive
153  * buffer and needs to be copied/unpacked into the user
154  * buffer before the IOV is reloaded.
155  *
156  * MPIDI_CH3_CA_END_CH3 - This not a real action, but rather a marker.
157  * All actions numerically less than MPID_CA_END are defined
158  * by channel device.  Any actions numerically greater than MPIDI_CA_END are
159  * internal to the channel instance and must be handled
160  * by the channel instance.
161  */
162 
163 #define MPID_Comm_commit_pre_hook(comm_) MPIDI_CH3I_Comm_commit_pre_hook(comm_)
164 #define MPID_Comm_commit_post_hook(comm_) MPIDI_CH3I_Comm_commit_post_hook(comm_)
165 #define MPID_Comm_free_hook(comm_) MPIDI_CH3I_Comm_destroy_hook(comm_)
166 
167 #ifndef HAVE_MPIDI_VCRT
168 #define HAVE_MPIDI_VCRT
169 typedef struct MPIDI_VC * MPIDI_VCR;
170 #endif
171 
172 typedef struct MPIDI_CH3I_comm
173 {
174     int anysource_enabled;  /* TRUE iff this anysource recvs can be posted on this communicator */
175     int last_ack_rank;      /* The rank of the last acknowledged failure */
176     int waiting_for_revoke; /* The number of other processes from which we are
177                              * waiting for a revoke message before we can release
178                              * the context id */
179 
180     int is_disconnected;    /* set to TRUE if this communicator was
181                              * disconnected as a part of
182                              * MPI_COMM_DISCONNECT; FALSE otherwise. */
183 
184     struct MPIDI_VCRT *vcrt;          /* virtual connecton reference table */
185     struct MPIDI_VCRT *local_vcrt;    /* local virtual connecton reference table */
186 
187     struct MPIR_Comm *next; /* next pointer for list of communicators */
188     struct MPIR_Comm *prev; /* prev pointer for list of communicators */
189     MPIDI_CH3I_CH_comm_t ch;
190 }
191 MPIDI_CH3I_comm_t;
192 
193 #define MPID_DEV_COMM_DECL MPIDI_CH3I_comm_t dev;
194 
195 #ifndef DEFINED_REQ
196 #define DEFINED_REQ
197 #if defined(MPID_USE_SEQUENCE_NUMBERS)
198 #   define MPIDI_REQUEST_SEQNUM	\
199         MPID_Seqnum_t seqnum;
200 #else
201 #   define MPIDI_REQUEST_SEQNUM
202 #endif
203 
204 /* Here we add RMA sync types to specify types
205  * of synchronizations the origin is going to
206  * perform to the target. */
207 
208 /* There are four kinds of synchronizations: NONE,
209  * FLUSH_LOCAL, FLUSH, UNLOCK.
210  * (1) NONE means there is no special synchronization,
211  * origin just issues as many operations as it can,
212  * excluding the last operation which is a piggyback
213  * candidate;
214  * (2) FLUSH_LOCAL means origin wants to do a
215  * FLUSH_LOCAL sync and issues out all pending
216  * operations including the piggyback candidate;
217  * (3) FLUSH means origin wants to do a FLUSH sync
218  * and issues out all pending operations including
219  * the last op piggybacked with a FLUSH flag to
220  * detect remote completion;
221  * (4) UNLOCK means origin issues all pending operations
222  * incuding the last op piggybacked with an UNLOCK
223  * flag to release the lock on target and detect remote
224  * completion.
225  * Note that FLUSH_LOCAL is a superset of NONE, FLUSH
226  * is a superset of FLUSH_LOCAL, and UNLOCK is a superset
227  * of FLUSH.
228  */
229 /* We start with an arbitrarily chosen number (58), to help with
230  * debugging when a sync type is not initialized or wrongly
231  * initialized. */
232 enum MPIDI_RMA_sync_types {
233     MPIDI_RMA_SYNC_NONE = 58,
234     MPIDI_RMA_SYNC_FLUSH_LOCAL,
235     MPIDI_RMA_SYNC_FLUSH,
236     MPIDI_RMA_SYNC_UNLOCK
237 };
238 
239 /* We start with an arbitrarily chosen number (63), to help with
240  * debugging when a window state is not initialized or wrongly
241  * initialized. */
242 enum MPIDI_RMA_states {
243     /* window-wide states */
244     MPIDI_RMA_NONE = 63,
245     MPIDI_RMA_FENCE_ISSUED,           /* access / exposure */
246     MPIDI_RMA_FENCE_GRANTED,          /* access / exposure */
247     MPIDI_RMA_PSCW_ISSUED,            /* access */
248     MPIDI_RMA_PSCW_GRANTED,           /* access */
249     MPIDI_RMA_PSCW_EXPO,              /* exposure */
250     MPIDI_RMA_PER_TARGET,             /* access */
251     MPIDI_RMA_LOCK_ALL_CALLED,        /* access */
252     MPIDI_RMA_LOCK_ALL_ISSUED,        /* access */
253     MPIDI_RMA_LOCK_ALL_GRANTED,       /* access */
254 
255     /* target-specific states */
256     MPIDI_RMA_LOCK_CALLED,            /* access */
257     MPIDI_RMA_LOCK_ISSUED,            /* access */
258     MPIDI_RMA_LOCK_GRANTED,           /* access */
259 };
260 
261 /* We start with an arbitrarily chosen number (19), to help with
262  * debugging when a lock state is not initialized or wrongly
263  * initialized. */
264 enum MPIDI_CH3_Lock_states {
265     MPIDI_CH3_WIN_LOCK_NONE = 19,
266     MPIDI_CH3_WIN_LOCK_CALLED,
267     MPIDI_CH3_WIN_LOCK_REQUESTED,
268     MPIDI_CH3_WIN_LOCK_GRANTED,
269     MPIDI_CH3_WIN_LOCK_FLUSH
270 };
271 
272 enum MPIDI_Win_info_arv_vals_accumulate_ordering {
273     MPIDI_ACC_ORDER_RAR = 1,
274     MPIDI_ACC_ORDER_RAW = 2,
275     MPIDI_ACC_ORDER_WAR = 4,
276     MPIDI_ACC_ORDER_WAW = 8
277 };
278 
279 /* We start with an arbitrarily chosen number (11), to help with
280  * debugging when an window info is not initialized or wrongly
281  * initialized. */
282 enum MPIDI_Win_info_arg_vals_accumulate_ops {
283     MPIDI_ACC_OPS_SAME_OP = 11,
284     MPIDI_ACC_OPS_SAME_OP_NO_OP
285 };
286 
287 struct MPIDI_Win_info_args {
288     int no_locks;               /* valid flavor = all */
289     int accumulate_ordering;
290     int accumulate_ops;
291     int same_size;
292     int same_disp_unit;
293     int alloc_shared_noncontig;
294     int alloc_shm;
295 };
296 
297 struct MPIDI_RMA_op;            /* forward decl from mpidrma.h */
298 
299 typedef struct MPIDI_Win_basic_info {
300     void *base_addr;
301     MPI_Aint size;
302     int disp_unit;
303     MPI_Win win_handle;
304 } MPIDI_Win_basic_info_t;
305 
306 #define MPIDI_DEV_WIN_DECL                                               \
307     volatile int at_completion_counter;  /* completion counter for operations \
308                                  targeting this window */                \
309     void **shm_base_addrs; /* shared memory windows -- array of base     \
310                               addresses of the windows of all processes  \
311                               in this process's address space */         \
312     MPIDI_Win_basic_info_t *basic_info_table;                            \
313     volatile int current_lock_type;   /* current lock type on this window (as target)   \
314                               * (none, shared, exclusive) */             \
315     volatile int shared_lock_ref_cnt;                                    \
316     struct MPIDI_RMA_Target_lock_entry volatile *target_lock_queue_head;  /* list of unsatisfied locks */  \
317     struct MPIDI_Win_info_args info_args;                                \
318     int shm_allocated; /* flag: TRUE iff this window has a shared memory \
319                           region associated with it */                   \
320     struct MPIDI_RMA_Op *op_pool_start; /* start pointer used for freeing */\
321     struct MPIDI_RMA_Op *op_pool_head;  /* pool of operations */              \
322     struct MPIDI_RMA_Target *target_pool_start; /* start pointer used for freeing */\
323     struct MPIDI_RMA_Target *target_pool_head; /* pool of targets */          \
324     struct MPIDI_RMA_Slot *slots;                                        \
325     int num_slots;                                                       \
326     struct {                                                             \
327         enum MPIDI_RMA_states access_state;                              \
328         enum MPIDI_RMA_states exposure_state;                            \
329     } states;                                                            \
330     int num_targets_with_pending_net_ops; /* keep track of number of     \
331                                              targets that has non-empty  \
332                                              net pending op list. */     \
333     int *start_ranks_in_win_grp;                                         \
334     int start_grp_size;                                                  \
335     int lock_all_assert;                                                 \
336     int lock_epoch_count; /* number of lock access epoch on this process */ \
337     int outstanding_locks; /* when issuing multiple lock requests in     \
338                             MPI_WIN_LOCK_ALL, this counter keeps track   \
339                             of number of locks not being granted yet. */ \
340     struct MPIDI_RMA_Target_lock_entry *target_lock_entry_pool_start;   \
341     struct MPIDI_RMA_Target_lock_entry *target_lock_entry_pool_head;    \
342     int current_target_lock_data_bytes;                                 \
343     int sync_request_cnt; /* This counter tracks number of              \
344                              incomplete sync requests (used in          \
345                              Win_fence and PSCW). */                    \
346     int active; /* specify if this window is active or not */           \
347     struct MPIR_Win *prev;                                              \
348     struct MPIR_Win *next;                                              \
349     int outstanding_acks; /* keep track of # of outstanding ACKs window \
350                              wide. */                                   \
351 
352 extern struct MPIR_Win *MPIDI_RMA_Win_active_list_head, *MPIDI_RMA_Win_inactive_list_head;
353 
354 extern int MPIDI_CH3I_RMA_Active_req_cnt;
355 extern int MPIDI_CH3I_RMA_Progress_hook_id;
356 
357 #ifdef MPIDI_CH3_WIN_DECL
358 #define MPID_DEV_WIN_DECL \
359 MPIDI_DEV_WIN_DECL \
360 MPIDI_CH3_WIN_DECL
361 #else
362 #define MPID_DEV_WIN_DECL \
363 MPIDI_DEV_WIN_DECL
364 #endif
365 
366 
367 typedef struct MPIDI_Request {
368     MPIDI_Message_match match;
369     MPIDI_Message_match mask;
370 
371     /* user_buf, user_count, and datatype needed to process
372        rendezvous messages. */
373     void        *user_buf;
374     MPI_Aint   user_count;
375     MPI_Datatype datatype;
376     int drop_data;
377 
378     /* msg_offset, and msgsize are used when processing
379        non-contiguous datatypes */
380     intptr_t msg_offset;
381     intptr_t msgsize;
382     intptr_t orig_msg_offset;
383 
384     /* Pointer to datatype for reference counting purposes */
385     struct MPIR_Datatype* datatype_ptr;
386 
387     /* iov and iov_count define the data to be transferred/received.
388        iov_offset points to the current head element in the IOV */
389     struct iovec iov[MPL_IOV_LIMIT];
390     int iov_count;
391     size_t iov_offset;
392 
393     /* OnDataAvail is the action to take when data is now available.
394        For example, when an operation described by an iov has
395        completed.  This replaces the MPIDI_CA_t (completion action)
396        field used through MPICH 1.0.4. */
397     int (*OnDataAvail)( struct MPIDI_VC *, struct MPIR_Request *, int * );
398     /* OnFinal is used in the following case:
399        OnDataAvail is set to a function, and that function has processed
400        all of the data.  At that point, the OnDataAvail function can
401        reset OnDataAvail to OnFinal.  This is normally used when processing
402        non-contiguous data, where there is one more action to take (such
403        as a get-response) when processing of the non-contiguous data
404        completes. This value need not be initialized unless OnDataAvail
405        is set to a non-null value (and then only in certain cases) */
406     int (*OnFinal)( struct MPIDI_VC *, struct MPIR_Request *, int * );
407 
408     /* tmpbuf and tmpbuf_sz describe temporary storage used for things like
409        unexpected eager messages and packing/unpacking
410        buffers.  tmpuf_off is the current offset into the temporary buffer. */
411     void          *tmpbuf;
412     intptr_t tmpbuf_off;
413     intptr_t tmpbuf_sz;
414 
415     intptr_t recv_data_sz;
416     MPI_Request    sender_req_id;
417 
418     unsigned int   state;
419     int            cancel_pending;
420 
421     /* This field seems to be used for unexpected messages.  Unexpected messages
422      * need to go through two steps: matching and receiving the data.  These
423      * steps could happen in either order though, so this field is initialized
424      * to 2.  It is decremented when the request is matched and also when all of
425      * the data is available.  Once it reaches 0 it should be safe to copy from
426      * the temporary buffer (if there is one) to the user buffer.  This field is
427      * related to, but not quite the same thing as the completion counter (cc). */
428     /* MT access should be controlled by the MSGQUEUE CS when the req is still
429      * unexpected, exclusive access otherwise */
430     int            recv_pending_count;
431 
432     /* The next several fields are used to hold state for ongoing RMA operations */
433     MPI_Op op;
434     /* For accumulate, since data is first read into a tmp_buf */
435     void *real_user_buf;
436     /* For derived datatypes at target. */
437     void *flattened_type;
438     /* req. handle needed to implement derived datatype gets.
439      * It also used for remembering user request of request-based RMA operations. */
440     MPI_Request request_handle;
441     MPI_Win     target_win_handle;
442     MPI_Win     source_win_handle;
443     int pkt_flags; /* pkt_flags that were included in the original RMA packet header */
444     struct MPIDI_RMA_Target_lock_entry *target_lock_queue_entry;
445     MPI_Request resp_request_handle; /* Handle for get_accumulate response */
446 
447     void *ext_hdr_ptr; /* Pointer to extended packet header.
448                         * It is allocated in RMA issuing/pkt_handler functions,
449                         * and freed when release request. */
450     intptr_t ext_hdr_sz;
451 
452     struct MPIDI_RMA_Target *rma_target_ptr;
453 
454     MPIDI_REQUEST_SEQNUM
455 
456     /* Occasionally, when a message cannot be sent, we need to cache the
457        data that is required.  The fields above (such as userbuf and tmpbuf)
458        are used for the message data.  However, we also need space for the
459        message packet. This field provide a generic location for that.
460        Question: do we want to make this a link instead of reserving
461        a fixed spot in the request? */
462     MPIDI_CH3_Pkt_t pending_pkt;
463 
464     /* Notes about request_completed_cb:
465      *
466      *   1. The callback function is triggered when this requests
467      *      completion count reaches 0.
468      *
469      *   2. The callback function should be nonblocking.
470      *
471      *   3. The callback function should not poke the progress engine,
472      *      or call any function that pokes the progress engine.
473      *
474      *   4. The callback function can complete other requests, thus
475      *      calling those requests' callback functions.  However, the
476      *      recursion depth of request completion function is limited.
477      *      If we ever need deeper recurisve calls, we need to change
478      *      to an iterative design instead of a recursive design for
479      *      request completion.
480      *
481      *   5. In multithreaded programs, since the callback function is
482      *      nonblocking and never calls the progress engine, it would
483      *      never yield the lock to other threads.  So the recursion
484      *      should be multithreading-safe.
485      */
486     int (*request_completed_cb)(struct MPIR_Request *);
487 
488     /* partner send request when a receive request is created by the
489      * sender (only used for self send) */
490     struct MPIR_Request * partner_request;
491 
492     struct MPIR_Request * next;
493 } MPIDI_Request;
494 #define MPIR_REQUEST_DECL MPIDI_Request dev;
495 
496 #if defined(MPIDI_CH3_REQUEST_DECL)
497 #define MPID_DEV_REQUEST_DECL			\
498 MPIR_REQUEST_DECL				\
499 MPIDI_CH3_REQUEST_DECL
500 #else
501 #define MPID_DEV_REQUEST_DECL			\
502 MPIR_REQUEST_DECL
503 #endif
504 
505 #ifdef MPIDI_CH3_REQUEST_KIND_DECL
506 #define MPID_REQUEST_KIND_DECL MPIDI_CH3_REQUEST_KIND_DECL
507 #endif
508 
509 #endif
510 
511 /* FIXME: This ifndef test is a temp until mpidpre is cleaned of
512    all items that do not belong (e.g., all items not needed by the
513    top layers of MPICH) */
514 /* FIXME: The progress routines will be made into ch3-common definitions, not
515    channel specific.  Channels that need more will need to piggy back or
516    otherwise override */
517 typedef struct {
518 #if defined(MPIDI_CH3_PROGRESS_STATE_DECL)
519     MPIDI_CH3_PROGRESS_STATE_DECL
520 #else
521     int foo;
522 #endif
523 } MPID_Progress_state;
524 
525 typedef struct {
526     int gpid[2];
527 } MPIDI_Gpid;
528 
529 /* Tell initthread to prepare a private comm_world */
530 #define MPID_NEEDS_ICOMM_WORLD
531 
532 int MPID_Init(int required, int *provided);
533 
534 int MPID_InitCompleted( void );
535 
536 int MPID_Finalize(void);
537 
538 int MPID_Abort( MPIR_Comm *comm, int mpi_errno, int exit_code, const char *error_msg );
539 
540 int MPID_Open_port(MPIR_Info *, char *);
541 int MPID_Close_port(const char *);
542 
543 int MPID_Comm_accept(const char *, MPIR_Info *, int, MPIR_Comm *, MPIR_Comm **);
544 
545 int MPID_Comm_connect(const char *, MPIR_Info *, int, MPIR_Comm *, MPIR_Comm **);
546 
547 int MPID_Comm_disconnect(MPIR_Comm *);
548 
549 int MPID_Comm_spawn_multiple(int, char *[], char **[], const int [], MPIR_Info* [],
550                              int, MPIR_Comm *, MPIR_Comm **, int []);
551 
552 int MPID_Comm_failure_ack(MPIR_Comm *comm);
553 
554 int MPID_Comm_failure_get_acked(MPIR_Comm *comm, MPIR_Group **failed_group_ptr);
555 
556 int MPID_Comm_get_all_failed_procs(MPIR_Comm *comm_ptr, MPIR_Group **failed_group, int tag);
557 
558 int MPID_Comm_revoke(MPIR_Comm *comm, int is_remote);
559 
560 int MPID_Send( const void *buf, MPI_Aint count, MPI_Datatype datatype,
561 	       int dest, int tag, MPIR_Comm *comm, int context_offset,
562 	       MPIR_Request **request );
563 
564 int MPID_Send_coll( const void *buf, MPI_Aint count, MPI_Datatype datatype,
565                     int dest, int tag, MPIR_Comm *comm, int context_offset,
566                     MPIR_Request **request, MPIR_Errflag_t * errflag );
567 
568 int MPID_Rsend( const void *buf, int count, MPI_Datatype datatype,
569 		int dest, int tag, MPIR_Comm *comm, int context_offset,
570 		MPIR_Request **request );
571 
572 int MPID_Ssend( const void *buf, MPI_Aint count, MPI_Datatype datatype,
573 		int dest, int tag, MPIR_Comm *comm, int context_offset,
574 		MPIR_Request **request );
575 
576 int MPID_Isend( const void *buf, MPI_Aint count, MPI_Datatype datatype,
577 		int dest, int tag, MPIR_Comm *comm, int context_offset,
578 		MPIR_Request **request );
579 
580 int MPID_Isend_coll( const void *buf, MPI_Aint count, MPI_Datatype datatype,
581                      int dest, int tag, MPIR_Comm *comm, int context_offset,
582                      MPIR_Request **request, MPIR_Errflag_t * errflag );
583 
584 int MPID_Irsend( const void *buf, int count, MPI_Datatype datatype,
585 		 int dest, int tag, MPIR_Comm *comm, int context_offset,
586 		 MPIR_Request **request );
587 
588 int MPID_Issend( const void *buf, int count, MPI_Datatype datatype,
589 		 int dest, int tag, MPIR_Comm *comm, int context_offset,
590 		 MPIR_Request **request );
591 
592 int MPID_Recv( void *buf, MPI_Aint count, MPI_Datatype datatype,
593 	       int source, int tag, MPIR_Comm *comm, int context_offset,
594 	       MPI_Status *status, MPIR_Request **request );
595 
596 int MPID_Irecv( void *buf, MPI_Aint count, MPI_Datatype datatype,
597 		int source, int tag, MPIR_Comm *comm, int context_offset,
598 		MPIR_Request **request );
599 
600 int MPID_Send_init( const void *buf, int count, MPI_Datatype datatype,
601 		    int dest, int tag, MPIR_Comm *comm, int context_offset,
602 		    MPIR_Request **request );
603 
604 int MPID_Bsend_init(const void *, int, MPI_Datatype, int, int, MPIR_Comm *,
605 		   int, MPIR_Request **);
606 int MPID_Rsend_init( const void *buf, int count, MPI_Datatype datatype,
607 		     int dest, int tag, MPIR_Comm *comm, int context_offset,
608 		     MPIR_Request **request );
609 int MPID_Ssend_init( const void *buf, int count, MPI_Datatype datatype,
610 		     int dest, int tag, MPIR_Comm *comm, int context_offset,
611 		     MPIR_Request **request );
612 
613 int MPID_Recv_init( void *buf, int count, MPI_Datatype datatype,
614 		    int source, int tag, MPIR_Comm *comm, int context_offset,
615 		    MPIR_Request **request );
616 
617 int MPID_Startall(int count, MPIR_Request *requests[]);
618 
619 int MPID_Probe(int, int, MPIR_Comm *, int, MPI_Status *);
620 int MPID_Iprobe(int, int, MPIR_Comm *, int, int *, MPI_Status *);
621 
622 int MPID_Mprobe(int source, int tag, MPIR_Comm *comm, int context_offset,
623                 MPIR_Request **message, MPI_Status *status);
624 
625 int MPID_Improbe(int source, int tag, MPIR_Comm *comm, int context_offset,
626                  int *flag, MPIR_Request **message, MPI_Status *status);
627 
628 int MPID_Imrecv(void *buf, int count, MPI_Datatype datatype,
629                 MPIR_Request *message, MPIR_Request **rreqp);
630 
631 int MPID_Mrecv(void *buf, int count, MPI_Datatype datatype,
632                MPIR_Request *message, MPI_Status *status, MPIR_Request **rreq);
633 
634 int MPID_Cancel_send(MPIR_Request *);
635 int MPID_Cancel_recv(MPIR_Request *);
636 
637 MPI_Aint MPID_Aint_add(MPI_Aint base, MPI_Aint disp);
638 
639 MPI_Aint MPID_Aint_diff(MPI_Aint addr1, MPI_Aint addr2);
640 
641 int MPID_Win_create(void *, MPI_Aint, int, MPIR_Info *, MPIR_Comm *,
642                     MPIR_Win **);
643 int MPID_Win_free(MPIR_Win **);
644 
645 int MPID_Put(const void *, int, MPI_Datatype, int, MPI_Aint, int,
646              MPI_Datatype, MPIR_Win *);
647 int MPID_Get(void *, int, MPI_Datatype, int, MPI_Aint, int,
648              MPI_Datatype, MPIR_Win *);
649 int MPID_Accumulate(const void *, int, MPI_Datatype, int, MPI_Aint, int,
650                     MPI_Datatype, MPI_Op, MPIR_Win *);
651 
652 int MPID_Win_fence(int, MPIR_Win *);
653 int MPID_Win_post(MPIR_Group *group_ptr, int assert, MPIR_Win *win_ptr);
654 int MPID_Win_start(MPIR_Group *group_ptr, int assert, MPIR_Win *win_ptr);
655 int MPID_Win_test(MPIR_Win *win_ptr, int *flag);
656 int MPID_Win_wait(MPIR_Win *win_ptr);
657 int MPID_Win_complete(MPIR_Win *win_ptr);
658 
659 int MPID_Win_lock(int lock_type, int dest, int assert, MPIR_Win *win_ptr);
660 int MPID_Win_unlock(int dest, MPIR_Win *win_ptr);
661 
662 int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPIR_Info *info,
663                       MPIR_Comm *comm, void *baseptr, MPIR_Win **win);
664 int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPIR_Info *info_ptr, MPIR_Comm *comm_ptr,
665                              void *base_ptr, MPIR_Win **win_ptr);
666 int MPID_Win_shared_query(MPIR_Win *win, int rank, MPI_Aint *size, int *disp_unit,
667                           void *baseptr);
668 int MPID_Win_create_dynamic(MPIR_Info *info, MPIR_Comm *comm, MPIR_Win **win);
669 int MPID_Win_attach(MPIR_Win *win, void *base, MPI_Aint size);
670 int MPID_Win_detach(MPIR_Win *win, const void *base);
671 int MPID_Win_get_info(MPIR_Win *win, MPIR_Info **info_used);
672 int MPID_Win_set_info(MPIR_Win *win, MPIR_Info *info);
673 
674 int MPID_Get_accumulate(const void *origin_addr, int origin_count,
675                         MPI_Datatype origin_datatype, void *result_addr, int result_count,
676                         MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
677                         int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win *win);
678 int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
679                       MPI_Datatype datatype, int target_rank, MPI_Aint target_disp,
680                       MPI_Op op, MPIR_Win *win);
681 int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
682                           void *result_addr, MPI_Datatype datatype, int target_rank,
683                           MPI_Aint target_disp, MPIR_Win *win);
684 int MPID_Rput(const void *origin_addr, int origin_count,
685               MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
686               int target_count, MPI_Datatype target_datatype, MPIR_Win *win,
687               MPIR_Request **request);
688 int MPID_Rget(void *origin_addr, int origin_count,
689               MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
690               int target_count, MPI_Datatype target_datatype, MPIR_Win *win,
691               MPIR_Request **request);
692 int MPID_Raccumulate(const void *origin_addr, int origin_count,
693                      MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
694                      int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win *win,
695                      MPIR_Request **request);
696 int MPID_Rget_accumulate(const void *origin_addr, int origin_count,
697                          MPI_Datatype origin_datatype, void *result_addr, int result_count,
698                          MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
699                          int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win *win,
700                          MPIR_Request **request);
701 
702 int MPID_Win_lock_all(int assert, MPIR_Win *win);
703 int MPID_Win_unlock_all(MPIR_Win *win);
704 int MPID_Win_flush(int rank, MPIR_Win *win);
705 int MPID_Win_flush_all(MPIR_Win *win);
706 int MPID_Win_flush_local(int rank, MPIR_Win *win);
707 int MPID_Win_flush_local_all(MPIR_Win *win);
708 int MPID_Win_sync(MPIR_Win *win);
709 
710 void MPID_Progress_start(MPID_Progress_state * state);
711 int MPID_Progress_wait(MPID_Progress_state * state);
712 void MPID_Progress_end(MPID_Progress_state * stae);
713 int MPID_Progress_poke(void);
714 
715 int MPID_Get_processor_name( char *name, int namelen, int *resultlen);
716 int MPID_Get_universe_size(int  * universe_size);
717 int MPID_Comm_get_lpid(MPIR_Comm *comm_ptr, int idx, int * lpid_ptr, bool is_remote);
718 
719 void MPID_Request_create_hook(MPIR_Request *);
720 void MPID_Request_free_hook(MPIR_Request *);
721 void MPID_Request_destroy_hook(MPIR_Request *);
722 int MPID_Request_complete(MPIR_Request *);
723 
724 void *MPID_Alloc_mem( size_t size, MPIR_Info *info );
725 int MPID_Free_mem( void *ptr );
726 
727 /* Prototypes and definitions for the node ID code.  This is used to support
728    hierarchical collectives in a (mostly) device-independent way. */
729 int MPID_Get_node_id(MPIR_Comm *comm, int rank, int *id_p);
730 int MPID_Get_max_node_id(MPIR_Comm *comm, int *max_id_p);
731 
732 int MPID_Type_commit_hook(MPIR_Datatype * type);
733 int MPID_Type_free_hook(MPIR_Datatype * type);
734 int MPID_Op_commit_hook(MPIR_Op * op);
735 int MPID_Op_free_hook(MPIR_Op * op);
736 
737 #endif /* MPIDPRE_H_INCLUDED */
738