1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3 * (C) 2001 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
5 *
6 * Portions of this code were written by Microsoft. Those portions are
7 * Copyright (c) 2007 Microsoft Corporation. Microsoft grants
8 * permission to use, reproduce, prepare derivative works, and to
9 * redistribute to others. The code is licensed "as is." The User
10 * bears the risk of using it. Microsoft gives no express warranties,
11 * guarantees or conditions. To the extent permitted by law, Microsoft
12 * excludes the implied warranties of merchantability, fitness for a
13 * particular purpose and non-infringement.
14 */
15 #ifndef MPIIMPL_H_INCLUDED
16 #define MPIIMPL_H_INCLUDED
17
18 /*
19 * This file is the temporary home of most of the definitions used to
20 * implement MPICH. We will eventually divide this file into logical
21 * pieces once we are certain of the relationships between the components.
22 */
23
24 /* style: define:vsnprintf:1 sig:0 */
25 /* style: allow:printf:3 sig:0 */
26
27 /* Include the mpi definitions */
28 #include "mpi.h"
29
30 /* There are a few definitions that must be made *before* the mpichconf.h
31 file is included. These include the definitions of the error levels and some
32 thread granularity constants */
33 #include "mpichconfconst.h"
34
35 /* Data computed by configure. This is included *after* mpi.h because we
36 do not want mpi.h to depend on any other files or configure flags */
37 #include "mpichconf.h"
38
39 /* if we are defining this, we must define it before including mpl.h */
40 #if defined(MPICH_DEBUG_MEMINIT)
41 #define MPL_VG_ENABLED 1
42 #endif
43 #include "mpl.h"
44
45 #include <stdio.h>
46 #ifdef STDC_HEADERS
47 #include <stdlib.h>
48 #include <stdarg.h>
49 #include <string.h>
50 #else
51 #ifdef HAVE_STDLIB_H
52 #include <stdlib.h>
53 #endif
54 #ifdef HAVE_STDARG_H
55 #include <stdarg.h>
56 #endif
57 #ifdef HAVE_STRING_H
58 #include <string.h>
59 #endif
60 #endif
61
62 #ifdef HAVE_SYS_TYPES_H
63 #include <sys/types.h>
64 #endif
65
66 /* for MAXHOSTNAMELEN under Linux and OSX */
67 #ifdef HAVE_SYS_PARAM_H
68 #include <sys/param.h>
69 #endif
70
71 #if defined (HAVE_USLEEP)
72 #include <unistd.h>
73 #if defined (NEEDS_USLEEP_DECL)
74 int usleep(useconds_t usec);
75 #endif
76 #endif
77
78 #if (!defined MAXHOSTNAMELEN) && (!defined MAX_HOSTNAME_LEN)
79 #define MAX_HOSTNAME_LEN 256
80 #elif !defined MAX_HOSTNAME_LEN
81 #define MAX_HOSTNAME_LEN MAXHOSTNAMELEN
82 #endif
83
84 /* Default PMI version to use */
85 #define MPIU_DEFAULT_PMI_VERSION 1
86 #define MPIU_DEFAULT_PMI_SUBVERSION 1
87
88 /* This allows us to keep names local to a single file when we can use
89 weak symbols */
90 #ifdef USE_WEAK_SYMBOLS
91 #define PMPI_LOCAL static
92 #else
93 #define PMPI_LOCAL
94 #endif
95
96 /* Fix for universal endianess added in autoconf 2.62 */
97 #ifdef WORDS_UNIVERSAL_ENDIAN
98 #if defined(__BIG_ENDIAN__)
99 #elif defined(__LITTLE_ENDIAN__)
100 #define WORDS_LITTLEENDIAN
101 #else
102 #error 'Universal endianess defined without __BIG_ENDIAN__ or __LITTLE_ENDIAN__'
103 #endif
104 #endif
105
106 /* Include some basic (and easily shared) definitions */
107 #include "mpibase.h"
108
109 /* FIXME: The code base should not define two of these */
110 /* This is used to quote a name in a definition (see FUNCNAME/FCNAME below) */
111 #ifndef MPIDI_QUOTE
112 #define MPIDI_QUOTE(A) MPIDI_QUOTE2(A)
113 #define MPIDI_QUOTE2(A) #A
114 #endif
115
116 /*
117 Include the implementation definitions (e.g., error reporting, thread
118 portability)
119 More detailed documentation is contained in the MPICH2 and ADI3 manuals.
120 */
121 /* FIXME: ... to do ... */
122 #include "mpitypedefs.h"
123
124 /* This is the default implementation of MPIU_Memcpy. We define this
125 before including mpidpre.h so that it can be used when a device or
126 channel can use it if it's overriding MPIU_Memcpy. */
MPIU_DBG_ATTRIBUTE_NOINLINE(unused)127 MPIU_DBG_ATTRIBUTE_NOINLINE
128 ATTRIBUTE((unused))
129 static MPIU_DBG_INLINE_KEYWORD void MPIUI_Memcpy(void * dst, const void * src, size_t len)
130 {
131 memcpy(dst, src, len);
132 }
133
134 /* Include definitions from the device which must exist before items in this
135 file (mpiimpl.h) can be defined. mpidpre.h must be included before any
136 files that allow the device to override or extend any terms; this includes
137 mpiimplthread.h and mpiutil.h */
138 /* ------------------------------------------------------------------------- */
139 #include "mpidpre.h"
140 /* ------------------------------------------------------------------------- */
141
142 /* Overriding memcpy:
143 Devices and channels can override the default implementation of
144 MPIU_Memcpy by defining the MPIU_Memcpy macro. The implementation
145 can call MPIUI_Memcpy for the default memcpy implementation.
146 Note that MPIU_Memcpy and MPIUI_Memcpy return void rather than a
147 pointer to the destination buffer. This is different from C89
148 memcpy.
149 */
150 #ifndef MPIU_Memcpy
151 #define MPIU_Memcpy(dst, src, len) \
152 do { \
153 MPIU_MEM_CHECK_MEMCPY((dst),(src),(len)); \
154 MPIUI_Memcpy((dst), (src), (len)); \
155 } while (0)
156 #endif
157
158 #include "mpiimplthread.h"
159 #include "mpiutil.h"
160
161 /* ------------------------------------------------------------------------- */
162 /* mpidebug.h */
163 /* ------------------------------------------------------------------------- */
164 /* Debugging and printf control */
165 /* Use these *only* for debugging output intended for the implementors
166 and maintainers of MPICH. Do *not* use these for any output that
167 general users may normally see. Use either the error code creation
168 routines for error messages or MPIU_msg_printf etc. for general messages
169 (MPIU_msg_printf will go through gettext).
170
171 FIXME: Document all of these macros
172
173 NOTE: These macros and values are deprecated. See
174 www.mcs.anl.gov/mpi/mpich2/developer/design/debugmsg.htm for
175 the new design (only partially implemented at this time).
176
177 The implementation is in mpidbg.h
178 */
179 #include "mpidbg.h"
180
181 #if defined(MPICH_DBG_OUTPUT)
182 #define MPIU_DBG_PRINTF(e) \
183 { \
184 if (MPIU_dbg_state != MPIU_DBG_STATE_NONE) \
185 { \
186 MPIU_dbg_printf e; \
187 } \
188 }
189 /* The first argument is a place holder to allow the selection of a subset
190 of debugging events. The second is a placeholder to allow a numeric
191 level of debugging within that class. The third is the debugging text */
192 #define MPIU_DBG_PRINTF_CLASS(_c,_l,_e) MPIU_DBG_PRINTF(_e)
193 #else
194 #define MPIU_DBG_PRINTF(e)
195 #define MPIU_DBG_PRINTF_CLASS(_c,_l,_e)
196 #endif
197
198 /* The follow is temporarily provided for backward compatibility. Any code
199 using dbg_printf should be updated to use MPIU_DBG_PRINTF. */
200 #define dbg_printf MPIU_dbg_printf
201
202 /* ------------------------------------------------------------------------- */
203 /* end of mpidebug.h */
204 /* ------------------------------------------------------------------------- */
205
206 /* Routines for memory management */
207 #include "mpimem.h"
208
209 /*
210 * Use MPIU_SYSCALL to wrap system calls; this provides a convenient point
211 * for timing the calls and keeping track of the use of system calls.
212 * This macro simply invokes the system call and does not even handle
213 * EINTR.
214 * To use,
215 * MPIU_SYSCALL( return-value, name-of-call, args-in-parenthesis )
216 * e.g., change "n = read(fd,buf,maxn);" into
217 * MPIU_SYSCALL( n,read,(fd,buf,maxn) );
218 * An example that prints each syscall to stdout is shown below.
219 */
220 #ifdef USE_LOG_SYSCALLS
221 #define MPIU_SYSCALL(a_,b_,c_) { \
222 printf( "[%d]about to call %s\n", MPIR_Process.comm_world->rank,#b_);\
223 fflush(stdout); errno = 0;\
224 a_ = b_ c_; \
225 if ((a_)>=0 || errno==0) {\
226 printf( "[%d]%s returned %d\n", \
227 MPIR_Process.comm_world->rank, #b_, a_ );\
228 } \
229 else { \
230 printf( "[%d]%s returned %d (errno = %d,%s)\n", \
231 MPIR_Process.comm_world->rank, \
232 #b_, a_, errno, MPIU_Strerror(errno));\
233 }; fflush(stdout);}
234 #else
235 #define MPIU_SYSCALL(a_,b_,c_) a_ = b_ c_
236 #endif
237
238 /*TDSOverview.tex
239
240 MPI has a number of data structures, most of which are represented by
241 an opaque handle in an MPI program. In the MPICH implementation of MPI,
242 these handles are represented
243 as integers; this makes implementation of the C/Fortran handle transfer
244 calls (part of MPI-2) easy.
245
246 MPID objects (again with the possible exception of 'MPI_Request's)
247 are allocated by a common set of object allocation functions.
248 These are
249 .vb
250 void *MPIU_Handle_obj_create( MPIU_Object_alloc_t *objmem )
251 void MPIU_Handle_obj_destroy( MPIU_Object_alloc_t *objmem, void *object )
252 .ve
253 where 'objmem' is a pointer to a memory allocation object that knows
254 enough to allocate objects, including the
255 size of the object and the location of preallocated memory, as well
256 as the type of memory allocator. By providing the routines to allocate and
257 free the memory, we make it easy to use the same interface to allocate both
258 local and shared memory for objects (always using the same kind for each
259 type of object).
260
261 The names create/destroy were chosen because they are different from
262 new/delete (C++ operations) and malloc/free.
263 Any name choice will have some conflicts with other uses, of course.
264
265 Reference Counts:
266 Many MPI objects have reference count semantics.
267 The semantics of MPI require that many objects that have been freed by the
268 user
269 (e.g., with 'MPI_Type_free' or 'MPI_Comm_free') remain valid until all
270 pending
271 references to that object (e.g., by an 'MPI_Irecv') are complete. There
272 are several ways to implement this; MPICH uses `reference counts` in the
273 objects. To support the 'MPI_THREAD_MULTIPLE' level of thread-safety, these
274 reference counts must be accessed and updated atomically.
275 A reference count for
276 `any` object can be incremented (atomically)
277 with 'MPIU_Object_add_ref(objptr)'
278 and decremented with 'MPIU_Object_release_ref(objptr,newval_ptr)'.
279 These have been designed so that then can be implemented as inlined
280 macros rather than function calls, even in the multithreaded case, and
281 can use special processor instructions that guarantee atomicity to
282 avoid thread locks.
283 The decrement routine sets the value pointed at by 'inuse_ptr' to 0 if
284 the postdecrement value of the reference counter is zero, and to a non-zero
285 value otherwise. If this value is zero, then the routine that decremented
286 the
287 reference count should free the object. This may be as simple as
288 calling 'MPIU_Handle_obj_destroy' (for simple objects with no other allocated
289 storage) or may require calling a separate routine to destroy the object.
290 Because MPI uses 'MPI_xxx_free' to both decrement the reference count and
291 free the object if the reference count is zero, we avoid the use of 'free'
292 in the MPID routines.
293
294 The 'inuse_ptr' approach is used rather than requiring the post-decrement
295 value because, for reference-count semantics, all that is necessary is
296 to know when the reference count reaches zero, and this can sometimes
297 be implemented more cheaply that requiring the post-decrement value (e.g.,
298 on IA32, there is an instruction for this operation).
299
300 Question:
301 Should we state that this is a macro so that we can use a register for
302 the output value? That avoids a store. Alternately, have the macro
303 return the value as if it was a function?
304
305 Structure Definitions:
306 The structure definitions in this document define `only` that part of
307 a structure that may be used by code that is making use of the ADI.
308 Thus, some structures, such as 'MPID_Comm', have many defined fields;
309 these are used to support MPI routines such as 'MPI_Comm_size' and
310 'MPI_Comm_remote_group'. Other structures may have few or no defined
311 members; these structures have no fields used outside of the ADI.
312 In C++ terms, all members of these structures are 'private'.
313
314 For the initial implementation, we expect that the structure definitions
315 will be designed for the multimethod device. However, all items that are
316 specific to a particular device (including the multi-method device)
317 will be placed at the end of the structure;
318 the document will clearly identify the members that all implementations
319 will provide. This simplifies much of the code in both the ADI and the
320 implementation of the MPI routines because structure member can be directly
321 accessed rather than using some macro or C++ style method interface.
322
323 T*/
324
325 /* mpi_lang.h - Prototypes for language specific routines. Currently used to
326 * set keyval attribute callbacks
327 */
328 #include "mpi_lang.h"
329 /* Known language bindings */
330 /*E
331 MPID_Lang_t - Known language bindings for MPI
332
333 A few operations in MPI need to know what language they were called from
334 or created by. This type enumerates the possible languages so that
335 the MPI implementation can choose the correct behavior. An example of this
336 are the keyval attribute copy and delete functions.
337
338 Module:
339 Attribute-DS
340 E*/
341 typedef enum MPID_Lang_t { MPID_LANG_C
342 #ifdef HAVE_FORTRAN_BINDING
343 , MPID_LANG_FORTRAN
344 , MPID_LANG_FORTRAN90
345 #endif
346 #ifdef HAVE_CXX_BINDING
347 , MPID_LANG_CXX
348 #endif
349 } MPID_Lang_t;
350
351 /* Macros for the MPI handles (e.g., the object that encodes an
352 MPI_Datatype) */
353 #include "mpihandlemem.h"
354
355 /* This routine is used to install an attribute free routine for datatypes
356 at finalize-time */
357 void MPIR_DatatypeAttrFinalize( void );
358
359 /* ------------------------------------------------------------------------- */
360 /* Should the following be moved into mpihandlemem.h ?*/
361 /* ------------------------------------------------------------------------- */
362
363 /* Routines to initialize handle allocations */
364 /* These are now internal to the handlemem package
365 void *MPIU_Handle_direct_init( void *, int, int, int );
366 void *MPIU_Handle_indirect_init( void *(**)[], int *, int, int, int, int );
367 int MPIU_Handle_free( void *((*)[]), int );
368 */
369 /* Convert Handles to objects for MPI types that have predefined objects */
370 /* TODO examine generated assembly for this construct, it's probably suboptimal
371 * on Blue Gene. An if/else if/else might help the compiler out. It also lets
372 * us hint that one case is likely(), usually the BUILTIN case. */
373 #define MPID_Getb_ptr(kind,a,bmsk,ptr) \
374 { \
375 switch (HANDLE_GET_KIND(a)) { \
376 case HANDLE_KIND_BUILTIN: \
377 ptr=MPID_##kind##_builtin+((a)&(bmsk)); \
378 break; \
379 case HANDLE_KIND_DIRECT: \
380 ptr=MPID_##kind##_direct+HANDLE_INDEX(a); \
381 break; \
382 case HANDLE_KIND_INDIRECT: \
383 ptr=((MPID_##kind*) \
384 MPIU_Handle_get_ptr_indirect(a,&MPID_##kind##_mem)); \
385 break; \
386 case HANDLE_KIND_INVALID: \
387 default: \
388 ptr=0; \
389 break; \
390 } \
391 }
392
393 /* Convert handles to objects for MPI types that do _not_ have any predefined
394 objects */
395 #define MPID_Get_ptr(kind,a,ptr) \
396 { \
397 switch (HANDLE_GET_KIND(a)) { \
398 case HANDLE_KIND_DIRECT: \
399 ptr=MPID_##kind##_direct+HANDLE_INDEX(a); \
400 break; \
401 case HANDLE_KIND_INDIRECT: \
402 ptr=((MPID_##kind*) \
403 MPIU_Handle_get_ptr_indirect(a,&MPID_##kind##_mem)); \
404 break; \
405 case HANDLE_KIND_INVALID: \
406 case HANDLE_KIND_BUILTIN: \
407 default: \
408 ptr=0; \
409 break; \
410 } \
411 }
412
413 /* FIXME: the masks should be defined with the handle definitions instead
414 of inserted here as literals */
415 #define MPID_Comm_get_ptr(a,ptr) MPID_Getb_ptr(Comm,a,0x03ffffff,ptr)
416 #define MPID_Group_get_ptr(a,ptr) MPID_Getb_ptr(Group,a,0x03ffffff,ptr)
417 #define MPID_File_get_ptr(a,ptr) MPID_Get_ptr(File,a,ptr)
418 #define MPID_Errhandler_get_ptr(a,ptr) MPID_Getb_ptr(Errhandler,a,0x3,ptr)
419 #define MPID_Op_get_ptr(a,ptr) MPID_Getb_ptr(Op,a,0x000000ff,ptr)
420 #define MPID_Info_get_ptr(a,ptr) MPID_Get_ptr(Info,a,ptr)
421 #define MPID_Win_get_ptr(a,ptr) MPID_Get_ptr(Win,a,ptr)
422 #define MPID_Request_get_ptr(a,ptr) MPID_Get_ptr(Request,a,ptr)
423 #define MPID_Grequest_class_get_ptr(a,ptr) MPID_Get_ptr(Grequest_class,a,ptr)
424 /* Keyvals have a special format. This is roughly MPID_Get_ptrb, but
425 the handle index is in a smaller bit field. In addition,
426 there is no storage for the builtin keyvals.
427 For the indirect case, we mask off the part of the keyval that is
428 in the bits normally used for the indirect block index.
429 */
430 #define MPID_Keyval_get_ptr(a,ptr) \
431 { \
432 switch (HANDLE_GET_KIND(a)) { \
433 case HANDLE_KIND_BUILTIN: \
434 ptr=0; \
435 break; \
436 case HANDLE_KIND_DIRECT: \
437 ptr=MPID_Keyval_direct+((a)&0x3fffff); \
438 break; \
439 case HANDLE_KIND_INDIRECT: \
440 ptr=((MPID_Keyval*) \
441 MPIU_Handle_get_ptr_indirect((a)&0xfc3fffff,&MPID_Keyval_mem)); \
442 break; \
443 case HANDLE_KIND_INVALID: \
444 default: \
445 ptr=0; \
446 break; \
447 } \
448 }
449
450 /* Valid pointer checks */
451 /* This test is lame. Should eventually include cookie test
452 and in-range addresses */
453 #define MPID_Valid_ptr(kind,ptr,err) \
454 {if (!(ptr)) { err = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, \
455 "**nullptrtype", "**nullptrtype %s", #kind ); } }
456 #define MPID_Valid_ptr_class(kind,ptr,errclass,err) \
457 {if (!(ptr)) { err = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, errclass, \
458 "**nullptrtype", "**nullptrtype %s", #kind ); } }
459
460 #define MPID_Info_valid_ptr(ptr,err) MPID_Valid_ptr_class(Info,ptr,MPI_ERR_INFO,err)
461 /* Check not only for a null pointer but for an invalid communicator,
462 such as one that has been freed. Let's try the ref_count as the test
463 for now */
464 /* ticket #1441: check (refcount<=0) to cover the case of 0, an "over-free" of
465 * -1 or similar, and the 0xecec... case when --enable-g=mem is used */
466 #define MPID_Comm_valid_ptr(ptr,err) { \
467 MPID_Valid_ptr_class(Comm,ptr,MPI_ERR_COMM,err); \
468 if ((ptr) && MPIU_Object_get_ref(ptr) <= 0) { \
469 MPIU_ERR_SET(err,MPI_ERR_COMM,"**comm"); \
470 ptr = 0; \
471 } \
472 }
473 #define MPID_Group_valid_ptr(ptr,err) MPID_Valid_ptr_class(Group,ptr,MPI_ERR_GROUP,err)
474 #define MPID_Win_valid_ptr(ptr,err) MPID_Valid_ptr_class(Win,ptr,MPI_ERR_WIN,err)
475 #define MPID_Op_valid_ptr(ptr,err) MPID_Valid_ptr_class(Op,ptr,MPI_ERR_OP,err)
476 #define MPID_Errhandler_valid_ptr(ptr,err) MPID_Valid_ptr_class(Errhandler,ptr,MPI_ERR_ARG,err)
477 #define MPID_File_valid_ptr(ptr,err) MPID_Valid_ptr_class(File,ptr,MPI_ERR_FILE,err)
478 #define MPID_Request_valid_ptr(ptr,err) MPID_Valid_ptr_class(Request,ptr,MPI_ERR_REQUEST,err)
479 #define MPID_Keyval_valid_ptr(ptr,err) MPID_Valid_ptr_class(Keyval,ptr,MPI_ERR_KEYVAL,err)
480
481 /* FIXME:
482 Generic pointer test. This is applied to any address, not just one from
483 an MPI object.
484 Currently unimplemented (returns success except for null pointers.
485 With a little work, could check that the pointer is properly aligned,
486 using something like
487 ((p) == 0 || ((char *)(p) & MPID_Alignbits[alignment] != 0)
488 where MPID_Alignbits is set with a mask whose bits must be zero in a
489 properly aligned quantity. For systems with no alignment rules,
490 all of these masks are zero, and this part of test can be eliminated.
491 */
492 #define MPID_Pointer_is_invalid(p,alignment) ((p) == 0)
493 /* Fixme: The following MPID_ALIGNED_xxx values are temporary. They
494 need to be computed by configure and included in the mpichconf.h file.
495 Note that they cannot be set conservatively (i.e., as sizeof(object)),
496 since the runtime system may generate objects with lesser alignment
497 rules if the processor allows them.
498 */
499 #define MPID_ALIGNED_PTR_INT 1
500 #define MPID_ALIGNED_PTR_LONG 1
501 #define MPID_ALIGNED_PTR_VOIDP 1
502 /* ------------------------------------------------------------------------- */
503 /* end of code that should the following be moved into mpihandlemem.h ?*/
504 /* ------------------------------------------------------------------------- */
505
506 /* ------------------------------------------------------------------------- */
507 /* Info */
508 /*TInfoOverview.tex
509
510 'MPI_Info' provides a way to create a list of '(key,value)' pairs
511 where the 'key' and 'value' are both strings. Because many routines, both
512 in the MPI implementation and in related APIs such as the PMI process
513 management interface, require 'MPI_Info' arguments, we define a simple
514 structure for each 'MPI_Info' element. Elements are allocated by the
515 generic object allocator; the head element is always empty (no 'key'
516 or 'value' is defined on the head element).
517
518 For simplicity, we have not abstracted the info data structures;
519 routines that want to work with the linked list may do so directly.
520 Because the 'MPI_Info' type is a handle and not a pointer, an MPIU
521 (utility) routine is provided to handle the
522 deallocation of 'MPID_Info' elements. See the implementation of
523 'MPI_Info_create' for how an Info type is allocated.
524
525 Thread Safety:
526
527 The info interface itself is not thread-robust. In particular, the routines
528 'MPI_INFO_GET_NKEYS' and 'MPI_INFO_GET_NTHKEY' assume that no other
529 thread modifies the info key. (If the info routines had the concept
530 of a next value, they would not be thread safe. As it stands, a user
531 must be careful if several threads have access to the same info object.)
532 Further, 'MPI_INFO_DUP', while not
533 explicitly advising implementers to be careful of one thread modifying the
534 'MPI_Info' structure while 'MPI_INFO_DUP' is copying it, requires that the
535 operation take place in a thread-safe manner.
536 There isn'' much that we can do about these cases. There are other cases
537 that must be handled. In particular, multiple threads are allowed to
538 update the same info value. Thus, all of the update routines must be thread
539 safe; the simple implementation used in the MPICH implementation uses locks.
540 Note that the 'MPI_Info_delete' call does not need a lock; the defintion of
541 thread-safety means that any order of the calls functions correctly; since
542 it invalid either to delete the same 'MPI_Info' twice or to modify an
543 'MPI_Info' that has been deleted, only one thread at a time can call
544 'MPI_Info_free' on any particular 'MPI_Info' value.
545
546 T*/
547 /*S
548 MPID_Info - Structure of an MPID info
549
550 Notes:
551 There is no reference count because 'MPI_Info' values, unlike other MPI
552 objects, may be changed after they are passed to a routine without
553 changing the routine''s behavior. In other words, any routine that uses
554 an 'MPI_Info' object must make a copy or otherwise act on any info value
555 that it needs.
556
557 A linked list is used because the typical 'MPI_Info' list will be short
558 and a simple linked list is easy to implement and to maintain. Similarly,
559 a single structure rather than separate header and element structures are
560 defined for simplicity. No separate thread lock is provided because
561 info routines are not performance critical; they may use the single
562 critical section lock in the 'MPIR_Process' structure when they need a
563 thread lock.
564
565 This particular form of linked list (in particular, with this particular
566 choice of the first two members) is used because it allows us to use
567 the same routines to manage this list as are used to manage the
568 list of free objects (in the file 'src/util/mem/handlemem.c'). In
569 particular, if lock-free routines for updating a linked list are
570 provided, they can be used for managing the 'MPID_Info' structure as well.
571
572 The MPI standard requires that keys can be no less that 32 characters and
573 no more than 255 characters. There is no mandated limit on the size
574 of values.
575
576 Module:
577 Info-DS
578 S*/
579 typedef struct MPID_Info {
580 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
581 struct MPID_Info *next;
582 char *key;
583 char *value;
584 } MPID_Info;
585 extern MPIU_Object_alloc_t MPID_Info_mem;
586 /* Preallocated info objects */
587 extern MPID_Info MPID_Info_direct[];
588 /* ------------------------------------------------------------------------- */
589
590 /* ------------------------------------------------------------------------- */
591 /* Error Handlers */
592 /*E
593 MPID_Errhandler_fn - MPID Structure to hold an error handler function
594
595 Notes:
596 The MPI-1 Standard declared only the C version of this, implicitly
597 assuming that 'int' and 'MPI_Fint' were the same.
598
599 Since Fortran does not have a C-style variable number of arguments
600 interface, the Fortran interface simply accepts two arguments. Some
601 calling conventions for Fortran (particularly under Windows) require
602 this.
603
604 Module:
605 ErrHand-DS
606
607 Questions:
608 What do we want to do about C++? Do we want a hook for a routine that can
609 be called to throw an exception in C++, particularly if we give C++ access
610 to this structure? Does the C++ handler need to be different (not part
611 of the union)?
612
613 E*/
614 typedef union MPID_Errhandler_fn {
615 void (*C_Comm_Handler_function) ( MPI_Comm *, int *, ... );
616 void (*F77_Handler_function) ( MPI_Fint *, MPI_Fint * );
617 void (*C_Win_Handler_function) ( MPI_Win *, int *, ... );
618 void (*C_File_Handler_function) ( MPI_File *, int *, ... );
619 } MPID_Errhandler_fn;
620
621 /*S
622 MPID_Errhandler - Description of the error handler structure
623
624 Notes:
625 Device-specific information may indicate whether the error handler is active;
626 this can help prevent infinite recursion in error handlers caused by
627 user-error without requiring the user to be as careful. We might want to
628 make this part of the interface so that the 'MPI_xxx_call_errhandler'
629 routines would check.
630
631 It is useful to have a way to indicate that the errhandler is no longer
632 valid, to help catch the case where the user has freed the errhandler but
633 is still using a copy of the 'MPI_Errhandler' value. We may want to
634 define the 'id' value for deleted errhandlers.
635
636 Module:
637 ErrHand-DS
638 S*/
639 typedef struct MPID_Errhandler {
640 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
641 MPID_Lang_t language;
642 MPID_Object_kind kind;
643 MPID_Errhandler_fn errfn;
644 /* Other, device-specific information */
645 #ifdef MPID_DEV_ERRHANDLER_DECL
646 MPID_DEV_ERRHANDLER_DECL
647 #endif
648 } MPID_Errhandler;
649 extern MPIU_Object_alloc_t MPID_Errhandler_mem;
650 /* Preallocated errhandler objects */
651 extern MPID_Errhandler MPID_Errhandler_builtin[];
652 extern MPID_Errhandler MPID_Errhandler_direct[];
653
654 /* We never reference count the builtin error handler objects, regardless of how
655 * we decide to reference count the other predefined objects. If we get to the
656 * point where we never reference count *any* of the builtin objects then we
657 * should probably remove these checks and let them fall through to the checks
658 * for BUILTIN down in the MPIU_Object_* routines. */
659 #define MPIR_Errhandler_add_ref( _errhand ) \
660 do { \
661 if (HANDLE_GET_KIND((_errhand)->handle) != HANDLE_KIND_BUILTIN) { \
662 MPIU_Object_add_ref( _errhand ); \
663 } \
664 } while (0)
665 #define MPIR_Errhandler_release_ref( _errhand, _inuse ) \
666 do { \
667 if (HANDLE_GET_KIND((_errhand)->handle) != HANDLE_KIND_BUILTIN) { \
668 MPIU_Object_release_ref( (_errhand), (_inuse) ); \
669 } \
670 else { \
671 *(_inuse) = 1; \
672 } \
673 } while (0)
674 /* ------------------------------------------------------------------------- */
675
676 /* ------------------------------------------------------------------------- */
677 /* Keyvals and attributes */
678 /*TKyOverview.tex
679
680 Keyvals are MPI objects that, unlike most MPI objects, are defined to be
681 integers rather than a handle (e.g., 'MPI_Comm'). However, they really
682 `are` MPI opaque objects and are handled by the MPICH implementation in
683 the same way as all other MPI opaque objects. The only difference is that
684 there is no 'typedef int MPI_Keyval;' in 'mpi.h'. In particular, keyvals
685 are encoded (for direct and indirect references) in the same way that
686 other MPI opaque objects are
687
688 Each keyval has a copy and a delete function associated with it.
689 Unfortunately, these have a slightly different calling sequence for
690 each language, particularly when the size of a pointer is
691 different from the size of a Fortran integer. The unions
692 'MPID_Copy_function' and 'MPID_Delete_function' capture the differences
693 in a single union type.
694
695 The above comment is out of date but has never been updated as it should
696 have to match the introduction of a different interface. Beware!
697
698 Notes:
699
700 In the original design, retrieving a attribute from a different
701 language that set it was thought to be an error. The MPI Forum
702 decided that this should be allowed, and after much discussion, the
703 behavior was defined. Thus, we need to record what sort of
704 attribute was provided, and be able to properly return the correct
705 value in each case. See MPI 2.2, Section 16.3.7 (Attributes) for
706 specific requirements. One consequence of this is that the value
707 that is returned may have a different length that how it was set.
708 On little-endian platforms (e.g., x86), this doesn't cause much of a
709 problem, because the address is that of the least significant byte,
710 and the lower bytes have the data that is needed in the case that
711 the desired attribute type is shorter than the stored attribute.
712 However, on a big-endian platform (e.g., IBM POWER), since the most
713 significant bytes are stored first, depending on the length of the
714 result type, the address of the result may not be the beginning of
715 the memory area. For example, assume that an MPI_Fint is 4 bytes
716 and a void * (and a Fortran INTEGER of kind MPI_ADDRESS_KIND) is 8
717 bytes, and let the attribute store the value in an 8 byte integer in
718 a field named "value". On a little-endian platform, the address of
719 the value is always the beginning of the field "value". On a
720 big-endian platform, the address of the value is the beginning of
721 the field if the return type is a pointer (e.g., from C) or Fortran
722 (KIND=MPI_ADDRESS_KIND), and the address of the beginning of the
723 field + 4 if the return type is a Fortran 77 integer (and, as
724 specified above, an MPI_Fint is 4 bytes shorter than a void *).
725
726 For the big-endian case, it is possible to manage these shifts (using
727 WORDS_LITTLEENDIAN to detect the big-endian case). Alternatively,
728 at a small cost in space, copies in variables of the correct length
729 can be maintained. At this writing, the code in src/mpi/attr makes
730 use of WORDS_LITTLEENDIAN to provide the appropriate code for the most
731 common cases.
732
733 T*/
734 /*TAttrOverview.tex
735 *
736 * The MPI standard allows `attributes`, essentially an '(integer,pointer)'
737 * pair, to be attached to communicators, windows, and datatypes.
738 * The integer is a `keyval`, which is allocated by a call (at the MPI level)
739 * to 'MPI_Comm/Type/Win_create_keyval'. The pointer is the value of
740 * the attribute.
741 * Attributes are primarily intended for use by the user, for example, to save
742 * information on a communicator, but can also be used to pass data to the
743 * MPI implementation. For example, an attribute may be used to pass
744 * Quality of Service information to an implementation to be used with
745 * communication on a particular communicator.
746 * To provide the most general access by the ADI to all attributes, the
747 * ADI defines a collection of routines that are used by the implementation
748 * of the MPI attribute routines (such as 'MPI_Comm_get_attr').
749 * In addition, the MPI routines involving attributes will invoke the
750 * corresponding 'hook' functions (e.g., 'MPID_Dev_comm_attr_set_hook')
751 * should the device define them.
752 *
753 * Attributes on windows and datatypes are defined by MPI but not of
754 * interest (as yet) to the device.
755 *
756 * In addition, there are seven predefined attributes that the device must
757 * supply to the implementation. This is accomplished through
758 * data values that are part of the 'MPIR_Process' data block.
759 * The predefined keyvals on 'MPI_COMM_WORLD' are\:
760 *.vb
761 * Keyval Related Module
762 * MPI_APPNUM Dynamic
763 * MPI_HOST Core
764 * MPI_IO Core
765 * MPI_LASTUSEDCODE Error
766 * MPI_TAG_UB Communication
767 * MPI_UNIVERSE_SIZE Dynamic
768 * MPI_WTIME_IS_GLOBAL Timer
769 *.ve
770 * The values stored in the 'MPIR_Process' block are the actual values. For
771 * example, the value of 'MPI_TAG_UB' is the integer value of the largest tag.
772 * The
773 * value of 'MPI_WTIME_IS_GLOBAL' is a '1' for true and '0' for false. Likely
774 * values for 'MPI_IO' and 'MPI_HOST' are 'MPI_ANY_SOURCE' and 'MPI_PROC_NULL'
775 * respectively.
776 *
777 T*/
778
779 /* Include the attribute access routines that permit access to the
780 attribute or its pointer, needed for cross-language access to attributes */
781 #include "mpi_attr.h"
782
783 /* Because Comm, Datatype, and File handles are all ints, and because
784 attributes are otherwise identical between the three types, we
785 only store generic copy and delete functions. This allows us to use
786 common code for the attribute set, delete, and dup functions */
787 /*E
788 MPID_Copy_function - MPID Structure to hold an attribute copy function
789
790 Notes:
791 The appropriate element of this union is selected by using the language
792 field of the 'keyval'.
793
794 Because 'MPI_Comm', 'MPI_Win', and 'MPI_Datatype' are all 'int's in
795 MPICH2, we use a single C copy function rather than have separate
796 ones for the Communicator, Window, and Datatype attributes.
797
798 There are no corresponding typedefs for the Fortran functions. The
799 F77 function corresponds to the Fortran 77 binding used in MPI-1 and the
800 F90 function corresponds to the Fortran 90 binding used in MPI-2.
801
802 Module:
803 Attribute-DS
804
805 E*/
806 int
807 MPIR_Attr_copy_c_proxy(
808 MPI_Comm_copy_attr_function* user_function,
809 int handle,
810 int keyval,
811 void* extra_state,
812 MPIR_AttrType attrib_type,
813 void* attrib,
814 void** attrib_copy,
815 int* flag
816 );
817
818 typedef struct MPID_Copy_function {
819 int (*C_CopyFunction)( int, int, void *, void *, void *, int * );
820 void (*F77_CopyFunction) ( MPI_Fint *, MPI_Fint *, MPI_Fint *, MPI_Fint *,
821 MPI_Fint *, MPI_Fint *, MPI_Fint * );
822 void (*F90_CopyFunction) ( MPI_Fint *, MPI_Fint *, MPI_Aint *, MPI_Aint *,
823 MPI_Aint *, MPI_Fint *, MPI_Fint * );
824 /* The generic lang-independent user_function and proxy will
825 * replace the lang dependent copy funcs above
826 * Currently the lang-indpendent funcs are used only for keyvals
827 */
828 MPI_Comm_copy_attr_function *user_function;
829 MPID_Attr_copy_proxy *proxy;
830 /* The C++ function is the same as the C function */
831 } MPID_Copy_function;
832
833 /*E
834 MPID_Delete_function - MPID Structure to hold an attribute delete function
835
836 Notes:
837 The appropriate element of this union is selected by using the language
838 field of the 'keyval'.
839
840 Because 'MPI_Comm', 'MPI_Win', and 'MPI_Datatype' are all 'int's in
841 MPICH2, we use a single C delete function rather than have separate
842 ones for the Communicator, Window, and Datatype attributes.
843
844 There are no corresponding typedefs for the Fortran functions. The
845 F77 function corresponds to the Fortran 77 binding used in MPI-1 and the
846 F90 function corresponds to the Fortran 90 binding used in MPI-2.
847
848 Module:
849 Attribute-DS
850
851 E*/
852 int
853 MPIR_Attr_delete_c_proxy(
854 MPI_Comm_delete_attr_function* user_function,
855 int handle,
856 int keyval,
857 MPIR_AttrType attrib_type,
858 void* attrib,
859 void* extra_state
860 );
861
862 typedef struct MPID_Delete_function {
863 int (*C_DeleteFunction) ( int, int, void *, void * );
864 void (*F77_DeleteFunction)( MPI_Fint *, MPI_Fint *, MPI_Fint *, MPI_Fint *,
865 MPI_Fint * );
866 void (*F90_DeleteFunction)( MPI_Fint *, MPI_Fint *, MPI_Aint *, MPI_Aint *,
867 MPI_Fint * );
868 /* The generic lang-independent user_function and proxy will
869 * replace the lang dependent copy funcs above
870 * Currently the lang-indpendent funcs are used only for keyvals
871 */
872 MPI_Comm_delete_attr_function *user_function;
873 MPID_Attr_delete_proxy *proxy;
874 } MPID_Delete_function;
875
876 /*S
877 MPID_Keyval - Structure of an MPID keyval
878
879 Module:
880 Attribute-DS
881
882 S*/
883 typedef struct MPID_Keyval {
884 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
885 MPID_Object_kind kind;
886 int was_freed;
887 void *extra_state;
888 MPID_Copy_function copyfn;
889 MPID_Delete_function delfn;
890 /* other, device-specific information */
891 #ifdef MPID_DEV_KEYVAL_DECL
892 MPID_DEV_KEYVAL_DECL
893 #endif
894 } MPID_Keyval;
895
896 #define MPIR_Keyval_add_ref( _keyval ) \
897 do { \
898 MPIU_Object_add_ref( _keyval ); \
899 } while(0)
900
901 #define MPIR_Keyval_release_ref( _keyval, _inuse ) \
902 do { \
903 MPIU_Object_release_ref( _keyval, _inuse ); \
904 } while(0)
905
906
907 /* Attribute values in C/C++ are void * and in Fortran are ADDRESS_SIZED
908 integers. Normally, these are the same size, but in at least one
909 case, the address-sized integers was selected as longer than void *
910 to work with the datatype code used in the I/O library. While this
911 is really a limitation in the current Datatype implementation. */
912 #ifdef USE_AINT_FOR_ATTRVAL
913 typedef MPI_Aint MPID_AttrVal_t;
914 #else
915 typedef void * MPID_AttrVal_t;
916 #endif
917
918 /* Attributes need no ref count or handle, but since we want to use the
919 common block allocator for them, we must provide those elements
920 */
921 /*S
922 MPID_Attribute - Structure of an MPID attribute
923
924 Notes:
925 Attributes don''t have 'ref_count's because they don''t have reference
926 count semantics. That is, there are no shallow copies or duplicates
927 of an attibute. An attribute is copied when the communicator that
928 it is attached to is duplicated. Subsequent operations, such as
929 'MPI_Comm_attr_free', can change the attribute list for one of the
930 communicators but not the other, making it impractical to keep the
931 same list. (We could defer making the copy until the list is changed,
932 but even then, there would be no reference count on the individual
933 attributes.)
934
935 A pointer to the keyval, rather than the (integer) keyval itself is
936 used since there is no need within the attribute structure to make
937 it any harder to find the keyval structure.
938
939 The attribute value is a 'void *'. If 'sizeof(MPI_Fint)' > 'sizeof(void*)',
940 then this must be changed (no such system has been encountered yet).
941 For the Fortran 77 routines in the case where 'sizeof(MPI_Fint)' <
942 'sizeof(void*)', the high end of the 'void *' value is used. That is,
943 we cast it to 'MPI_Fint *' and use that value.
944
945 MPI defines three kinds of attributes (see MPI 2.1, Section 16.3, pages
946 487-488 (the standard says two, but there are really three, as discussed
947 below). These are pointer-valued attributes and two types of integer-valued
948 attributes.
949 Pointer-valued attributes are used in C.
950 Integer-valued attributes are used in Fortran. These are of type either
951 INTEGER or INTEGER(KIND=MPI_ADDRESS_KIND).
952
953 The predefined attributes are a combination of INTEGER and pointers.
954
955 Module:
956 Attribute-DS
957
958 S*/
959 typedef struct MPID_Attribute {
960 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
961 MPID_Keyval *keyval; /* Keyval structure for this attribute */
962
963 struct MPID_Attribute *next; /* Pointer to next in the list */
964 MPIR_AttrType attrType; /* Type of the attribute */
965 long pre_sentinal; /* Used to detect user errors in accessing
966 the value */
967 MPID_AttrVal_t value; /* Stored value. An Aint must be at least
968 as large as an address - some builds
969 may make an Aint larger than a void * */
970 long post_sentinal; /* Like pre_sentinal */
971 /* other, device-specific information */
972 #ifdef MPID_DEV_ATTR_DECL
973 MPID_DEV_ATTR_DECL
974 #endif
975 } MPID_Attribute;
976 /* ------------------------------------------------------------------------- */
977
978 /*---------------------------------------------------------------------------
979 * Groups are *not* a major data structure in MPICH-2. They are provided
980 * only because they are required for the group operations (e.g.,
981 * MPI_Group_intersection) and for the scalable RMA synchronization
982 *---------------------------------------------------------------------------*/
983 /* This structure is used to implement the group operations such as
984 MPI_Group_translate_ranks */
985 typedef struct MPID_Group_pmap_t {
986 int lrank; /* Local rank in group (between 0 and size-1) */
987 int lpid; /* local process id, from VCONN */
988 int next_lpid; /* Index of next lpid (in lpid order) */
989 int flag; /* marker, used to implement group operations */
990 } MPID_Group_pmap_t;
991
992 /* Any changes in the MPID_Group structure must be made to the
993 predefined value in MPID_Group_builtin for MPI_GROUP_EMPTY in
994 src/mpi/group/grouputil.c */
995 /*S
996 MPID_Group - Description of the Group data structure
997
998 The processes in the group of 'MPI_COMM_WORLD' have lpid values 0 to 'size'-1,
999 where 'size' is the size of 'MPI_COMM_WORLD'. Processes created by
1000 'MPI_Comm_spawn' or 'MPI_Comm_spawn_multiple' or added by 'MPI_Comm_attach'
1001 or
1002 'MPI_Comm_connect'
1003 are numbered greater than 'size - 1' (on the calling process). See the
1004 discussion of LocalPID values.
1005
1006 Note that when dynamic process creation is used, the pids are `not` unique
1007 across the universe of connected MPI processes. This is ok, as long as
1008 pids are interpreted `only` on the process that owns them.
1009
1010 Only for MPI-1 are the lpid''s equal to the `global` pids. The local pids
1011 can be thought of as a reference not to the remote process itself, but
1012 how the remote process can be reached from this process. We may want to
1013 have a structure 'MPID_Lpid_t' that contains information on the remote
1014 process, such as (for TCP) the hostname, ip address (it may be different if
1015 multiple interfaces are supported; we may even want plural ip addresses for
1016 stripping communication), and port (or ports). For shared memory connected
1017 processes, it might have the address of a remote queue. The lpid number
1018 is an index into a table of 'MPID_Lpid_t'''s that contain this (device- and
1019 method-specific) information.
1020
1021 Module:
1022 Group-DS
1023
1024 S*/
1025 typedef struct MPID_Group {
1026 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
1027 int size; /* Size of a group */
1028 int rank; /* rank of this process relative to this
1029 group */
1030 int idx_of_first_lpid;
1031 MPID_Group_pmap_t *lrank_to_lpid; /* Array mapping a local rank to local
1032 process number */
1033 int is_local_dense_monotonic; /* see NOTE-G1 */
1034
1035 /* We may want some additional data for the RMA syncrhonization calls */
1036 /* Other, device-specific information */
1037 #ifdef MPID_DEV_GROUP_DECL
1038 MPID_DEV_GROUP_DECL
1039 #endif
1040 } MPID_Group;
1041
1042 /* NOTE-G1: is_local_dense_monotonic will be true iff the group meets the
1043 * following criteria:
1044 * 1) the lpids are all in the range [0,size-1], i.e. a subset of comm world
1045 * 2) the pids are sequentially numbered in increasing order, without any gaps,
1046 * stride, or repetitions
1047 *
1048 * This additional information allows us to handle the common case (insofar as
1049 * group ops are common) for MPI_Group_translate_ranks where group2 is
1050 * group_of(MPI_COMM_WORLD), or some simple subset. This is an important use
1051 * case for many MPI tool libraries, such as Scalasca.
1052 */
1053
1054 extern MPIU_Object_alloc_t MPID_Group_mem;
1055 /* Preallocated group objects */
1056 #define MPID_GROUP_N_BUILTIN 1
1057 extern MPID_Group MPID_Group_builtin[MPID_GROUP_N_BUILTIN];
1058 extern MPID_Group MPID_Group_direct[];
1059
1060 /* Object for empty group */
1061 extern MPID_Group * const MPID_Group_empty;
1062
1063 #define MPIR_Group_add_ref( _group ) \
1064 do { MPIU_Object_add_ref( _group ); } while (0)
1065
1066 #define MPIR_Group_release_ref( _group, _inuse ) \
1067 do { MPIU_Object_release_ref( _group, _inuse ); } while (0)
1068
1069 void MPIR_Group_setup_lpid_list( MPID_Group * );
1070 int MPIR_GroupCheckVCRSubset( MPID_Group *group_ptr, int vsize, MPID_VCR *vcr, int *idx );
1071
1072 /* ------------------------------------------------------------------------- */
1073
1074 /*E
1075 MPID_Comm_kind_t - Name the two types of communicators
1076 E*/
1077 typedef enum MPID_Comm_kind_t {
1078 MPID_INTRACOMM = 0,
1079 MPID_INTERCOMM = 1 } MPID_Comm_kind_t;
1080
1081 /* ideally we could add these to MPID_Comm_kind_t, but there's too much existing
1082 * code that assumes that the only valid values are INTRACOMM or INTERCOMM */
1083 typedef enum MPID_Comm_hierarchy_kind_t {
1084 MPID_HIERARCHY_FLAT = 0, /* no hierarchy */
1085 MPID_HIERARCHY_PARENT = 1, /* has subcommunicators */
1086 MPID_HIERARCHY_NODE_ROOTS = 2, /* is the subcomm for node roots */
1087 MPID_HIERARCHY_NODE = 3, /* is the subcomm for a node */
1088 MPID_HIERARCHY_SIZE /* cardinality of this enum */
1089 } MPID_Comm_hierarchy_kind_t;
1090 /* Communicators */
1091
1092 /*S
1093 MPID_Comm - Description of the Communicator data structure
1094
1095 Notes:
1096 Note that the size and rank duplicate data in the groups that
1097 make up this communicator. These are used often enough that this
1098 optimization is valuable.
1099
1100 This definition provides only a 16-bit integer for context id''s .
1101 This should be sufficient for most applications. However, extending
1102 this to a 32-bit (or longer) integer should be easy.
1103
1104 There are two context ids. One is used for sending and one for
1105 receiving. In the case of an Intracommunicator, they are the same
1106 context id. They differ in the case of intercommunicators, where
1107 they may come from processes in different comm worlds (in the
1108 case of MPI-2 dynamic process intercomms).
1109
1110 The virtual connection table is an explicit member of this structure.
1111 This contains the information used to contact a particular process,
1112 indexed by the rank relative to this communicator.
1113
1114 Groups are allocated lazily. That is, the group pointers may be
1115 null, created only when needed by a routine such as 'MPI_Comm_group'.
1116 The local process ids needed to form the group are available within
1117 the virtual connection table.
1118 For intercommunicators, we may want to always have the groups. If not,
1119 we either need the 'local_group' or we need a virtual connection table
1120 corresponding to the 'local_group' (we may want this anyway to simplify
1121 the implementation of the intercommunicator collective routines).
1122
1123 The pointer to the structure 'MPID_Collops' containing pointers to the
1124 collective
1125 routines allows an implementation to replace each routine on a
1126 routine-by-routine basis. By default, this pointer is null, as are the
1127 pointers within the structure. If either pointer is null, the implementation
1128 uses the generic provided implementation. This choice, rather than
1129 initializing the table with pointers to all of the collective routines,
1130 is made to reduce the space used in the communicators and to eliminate the
1131 need to include the implementation of all collective routines in all MPI
1132 executables, even if the routines are not used.
1133
1134 The macro 'MPID_HAS_HETERO' may be defined by a device to indicate that
1135 the device supports MPI programs that must communicate between processes with
1136 different data representations (e.g., different sized integers or different
1137 byte orderings). If the device does need to define this value, it should
1138 be defined in the file 'mpidpre.h'.
1139
1140 Please note that the local_size and remote_size fields can be confusing. For
1141 intracommunicators both fields are always equal to the size of the
1142 communicator. For intercommunicators local_size is equal to the size of
1143 local_group while remote_size is equal to the size of remote_group.
1144
1145 Module:
1146 Communicator-DS
1147
1148 Question:
1149 For fault tolerance, do we want to have a standard field for communicator
1150 health? For example, ok, failure detected, all (live) members of failed
1151 communicator have acked.
1152 S*/
1153 typedef struct MPID_Comm {
1154 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
1155 MPIR_Context_id_t context_id; /* Send context id. See notes */
1156 MPIR_Context_id_t recvcontext_id; /* Send context id. See notes */
1157 int remote_size; /* Value of MPI_Comm_(remote)_size */
1158 int rank; /* Value of MPI_Comm_rank */
1159 MPID_VCRT vcrt; /* virtual connecton reference table */
1160 MPID_VCR * vcr; /* alias to the array of virtual connections
1161 in vcrt */
1162 MPID_VCRT local_vcrt; /* local virtual connecton reference table */
1163 MPID_VCR * local_vcr; /* alias to the array of local virtual
1164 connections in local vcrt */
1165 MPID_Attribute *attributes; /* List of attributes */
1166 int local_size; /* Value of MPI_Comm_size for local group */
1167 MPID_Group *local_group, /* Groups in communicator. */
1168 *remote_group; /* The local and remote groups are the
1169 same for intra communicators */
1170 MPID_Comm_kind_t comm_kind; /* MPID_INTRACOMM or MPID_INTERCOMM */
1171 char name[MPI_MAX_OBJECT_NAME]; /* Required for MPI-2 */
1172 MPID_Errhandler *errhandler; /* Pointer to the error handler structure */
1173 struct MPID_Comm *local_comm; /* Defined only for intercomms, holds
1174 an intracomm for the local group */
1175
1176 MPID_Comm_hierarchy_kind_t hierarchy_kind; /* flat, parent, node, or node_roots */
1177 struct MPID_Comm *node_comm; /* Comm of processes in this comm that are on
1178 the same node as this process. */
1179 struct MPID_Comm *node_roots_comm; /* Comm of root processes for other nodes. */
1180 int *intranode_table; /* intranode_table[i] gives the rank in
1181 node_comm of rank i in this comm or -1 if i
1182 is not in this process' node_comm.
1183 It is of size 'local_size'. */
1184 int *internode_table; /* internode_table[i] gives the rank in
1185 node_roots_comm of rank i in this comm.
1186 It is of size 'local_size'. */
1187
1188 int is_low_group; /* For intercomms only, this boolean is
1189 set for all members of one of the
1190 two groups of processes and clear for
1191 the other. It enables certain
1192 intercommunicator collective operations
1193 that wish to use half-duplex operations
1194 to implement a full-duplex operation */
1195 struct MPID_Comm *comm_next;/* Provides a chain through all active
1196 communicators */
1197 struct MPID_Collops *coll_fns; /* Pointer to a table of functions
1198 implementing the collective
1199 routines */
1200 struct MPID_TopoOps *topo_fns; /* Pointer to a table of functions
1201 implementting the topology routines
1202 */
1203 int next_sched_tag; /* used by the NBC schedule code to allocate tags */
1204 #ifdef MPID_HAS_HETERO
1205 int is_hetero;
1206 #endif
1207 /* Other, device-specific information */
1208 #ifdef MPID_DEV_COMM_DECL
1209 MPID_DEV_COMM_DECL
1210 #endif
1211 } MPID_Comm;
1212 extern MPIU_Object_alloc_t MPID_Comm_mem;
1213
1214 /* this function should not be called by normal code! */
1215 int MPIR_Comm_delete_internal(MPID_Comm * comm_ptr, int isDisconnect);
1216
1217 #define MPIR_Comm_add_ref(_comm) \
1218 do { MPIU_Object_add_ref((_comm)); } while (0)
1219 #define MPIR_Comm_release_ref( _comm, _inuse ) \
1220 do { MPIU_Object_release_ref( _comm, _inuse ); } while (0)
1221
1222
1223 /* Release a reference to a communicator. If there are no pending
1224 references, delete the communicator and recover all storage and
1225 context ids.
1226
1227 This routine has been inlined because keeping it as a separate routine
1228 results in a >5% performance hit for the SQMR benchmark.
1229 */
1230 #undef FUNCNAME
1231 #define FUNCNAME MPIR_Comm_release
1232 #undef FCNAME
1233 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Comm_release(MPID_Comm * comm_ptr,int isDisconnect)1234 static inline int MPIR_Comm_release(MPID_Comm * comm_ptr, int isDisconnect)
1235 {
1236 int mpi_errno = MPI_SUCCESS;
1237 int in_use;
1238
1239 MPIR_Comm_release_ref(comm_ptr, &in_use);
1240 if (unlikely(!in_use)) {
1241 /* the following routine should only be called by this function and its
1242 * "_always" variant. */
1243 mpi_errno = MPIR_Comm_delete_internal(comm_ptr, isDisconnect);
1244 /* not ERR_POPing here to permit simpler inlining. Our caller will
1245 * still report the error from the comm_delete level. */
1246 }
1247
1248 return mpi_errno;
1249 }
1250 #undef FUNCNAME
1251 #undef FCNAME
1252
1253 /* MPIR_Comm_release_always is the same as MPIR_Comm_release except it uses
1254 MPIR_Comm_release_ref_always instead.
1255 */
1256 int MPIR_Comm_release_always(MPID_Comm *comm_ptr, int isDisconnect);
1257
1258
1259 /* Preallocated comm objects. There are 3: comm_world, comm_self, and
1260 a private (non-user accessible) dup of comm world that is provided
1261 if needed in MPI_Finalize. Having a separate version of comm_world
1262 avoids possible interference with User code */
1263 #define MPID_COMM_N_BUILTIN 3
1264 extern MPID_Comm MPID_Comm_builtin[MPID_COMM_N_BUILTIN];
1265 extern MPID_Comm MPID_Comm_direct[];
1266 /* This is the handle for the internal MPI_COMM_WORLD . The "2" at the end
1267 of the handle is 3-1 (e.g., the index in the builtin array) */
1268 #define MPIR_ICOMM_WORLD ((MPI_Comm)0x44000002)
1269
1270 /* The following preprocessor macros provide bitfield access information for
1271 * context ID values. They follow a uniform naming pattern:
1272 *
1273 * MPID_CONTEXT_foo_WIDTH - the width in bits of the field
1274 * MPID_CONTEXT_foo_MASK - A valid bit mask for bit-wise AND and OR operations
1275 * with exactly all of the bits in the field set.
1276 * MPID_CONTEXT_foo_SHIFT - The number of bits that the field should be shifted
1277 * rightwards to place it in the least significant bits
1278 * of the ID. There may still be higher order bits
1279 * from other fields, so the _MASK should be used first
1280 * if you want to reliably retrieve the exact value of
1281 * the field.
1282 */
1283
1284 /* yields an rvalue that is the value of the field_name_ in the least significant bits */
1285 #define MPID_CONTEXT_READ_FIELD(field_name_,id_) \
1286 (((id_) & MPID_CONTEXT_##field_name_##_MASK) >> MPID_CONTEXT_##field_name_##_SHIFT)
1287 /* yields an rvalue that is the old_id_ with field_name_ set to field_val_ */
1288 #define MPID_CONTEXT_SET_FIELD(field_name_,old_id_,field_val_) \
1289 ((old_id_ & ~MPID_CONTEXT_##field_name_##_MASK) | ((field_val_) << MPID_CONTEXT_##field_name_##_SHIFT))
1290
1291 /* Context suffixes for separating pt2pt and collective communication */
1292 #define MPID_CONTEXT_SUFFIX_WIDTH (1)
1293 #define MPID_CONTEXT_SUFFIX_SHIFT (0)
1294 #define MPID_CONTEXT_SUFFIX_MASK ((1 << MPID_CONTEXT_SUFFIX_WIDTH) - 1)
1295 #define MPID_CONTEXT_INTRA_PT2PT (0)
1296 #define MPID_CONTEXT_INTRA_COLL (1)
1297 #define MPID_CONTEXT_INTER_PT2PT (0)
1298 #define MPID_CONTEXT_INTER_COLL (1)
1299
1300 /* Used to derive context IDs for sub-communicators from a parent communicator's
1301 context ID value. This field comes after the one bit suffix.
1302 values are shifted left by 1. */
1303 #define MPID_CONTEXT_SUBCOMM_WIDTH (2)
1304 #define MPID_CONTEXT_SUBCOMM_SHIFT (MPID_CONTEXT_SUFFIX_WIDTH + MPID_CONTEXT_SUFFIX_SHIFT)
1305 #define MPID_CONTEXT_SUBCOMM_MASK (((1 << MPID_CONTEXT_SUBCOMM_WIDTH) - 1) << MPID_CONTEXT_SUBCOMM_SHIFT)
1306
1307 /* these values may be added/subtracted directly to/from an existing context ID
1308 * in order to determine the context ID of the child/parent */
1309 #define MPID_CONTEXT_PARENT_OFFSET (0 << MPID_CONTEXT_SUBCOMM_SHIFT)
1310 #define MPID_CONTEXT_INTRANODE_OFFSET (1 << MPID_CONTEXT_SUBCOMM_SHIFT)
1311 #define MPID_CONTEXT_INTERNODE_OFFSET (2 << MPID_CONTEXT_SUBCOMM_SHIFT)
1312
1313 /* this field (IS_LOCALCOM) is used to derive a context ID for local
1314 * communicators of intercommunicators without communication */
1315 #define MPID_CONTEXT_IS_LOCALCOMM_WIDTH (1)
1316 #define MPID_CONTEXT_IS_LOCALCOMM_SHIFT (MPID_CONTEXT_SUBCOMM_SHIFT + MPID_CONTEXT_SUBCOMM_WIDTH)
1317 #define MPID_CONTEXT_IS_LOCALCOMM_MASK (((1 << MPID_CONTEXT_IS_LOCALCOMM_WIDTH) - 1) << MPID_CONTEXT_IS_LOCALCOMM_SHIFT)
1318
1319 /* MPIR_MAX_CONTEXT_MASK is the number of ints that make up the bit vector that
1320 * describes the context ID prefix space.
1321 *
1322 * The following must hold:
1323 * (num_bits_in_vector) <= (maximum_context_id_prefix)
1324 * which is the following in concrete terms:
1325 * MPIR_MAX_CONTEXT_MASK*MPIR_CONTEXT_INT_BITS <= 2**(MPIR_CONTEXT_ID_BITS - (MPID_CONTEXT_PREFIX_SHIFT + MPID_CONTEXT_DYNAMIC_PROC_WIDTH))
1326 *
1327 * We currently always assume MPIR_CONTEXT_INT_BITS is 32, regardless of the
1328 * value of sizeof(int)*CHAR_BITS. We also make the assumption that CHAR_BITS==8.
1329 *
1330 * For a 16-bit context id field and CHAR_BITS==8, this implies MPIR_MAX_CONTEXT_MASK <= 256
1331 */
1332
1333 /* number of bits to shift right by in order to obtain the context ID prefix */
1334 #define MPID_CONTEXT_PREFIX_SHIFT (MPID_CONTEXT_IS_LOCALCOMM_SHIFT + MPID_CONTEXT_IS_LOCALCOMM_WIDTH)
1335 #define MPID_CONTEXT_PREFIX_WIDTH (MPIR_CONTEXT_ID_BITS - (MPID_CONTEXT_PREFIX_SHIFT + MPID_CONTEXT_DYNAMIC_PROC_WIDTH))
1336 #define MPID_CONTEXT_PREFIX_MASK (((1 << MPID_CONTEXT_PREFIX_WIDTH) - 1) << MPID_CONTEXT_PREFIX_SHIFT)
1337
1338 #define MPID_CONTEXT_DYNAMIC_PROC_WIDTH (1) /* the upper half is reserved for dynamic procs */
1339 #define MPID_CONTEXT_DYNAMIC_PROC_SHIFT (MPIR_CONTEXT_ID_BITS - MPID_CONTEXT_DYNAMIC_PROC_WIDTH) /* the upper half is reserved for dynamic procs */
1340 #define MPID_CONTEXT_DYNAMIC_PROC_MASK (((1 << MPID_CONTEXT_DYNAMIC_PROC_WIDTH) - 1) << MPID_CONTEXT_DYNAMIC_PROC_SHIFT)
1341
1342 /* should probably be (sizeof(int)*CHAR_BITS) once we make the code CHAR_BITS-clean */
1343 #define MPIR_CONTEXT_INT_BITS (32)
1344 #define MPIR_CONTEXT_ID_BITS (sizeof(MPIR_Context_id_t)*8) /* 8 --> CHAR_BITS eventually */
1345 #define MPIR_MAX_CONTEXT_MASK \
1346 ((1 << (MPIR_CONTEXT_ID_BITS - (MPID_CONTEXT_PREFIX_SHIFT + MPID_CONTEXT_DYNAMIC_PROC_WIDTH))) / MPIR_CONTEXT_INT_BITS)
1347
1348 /* Utility routines. Where possible, these are kept in the source directory
1349 with the other comm routines (src/mpi/comm, in mpicomm.h). However,
1350 to create a new communicator after a spawn or connect-accept operation,
1351 the device may need to create a new contextid */
1352 int MPIR_Get_contextid( MPID_Comm *, MPIR_Context_id_t *context_id );
1353 int MPIR_Get_contextid_sparse(MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id, int ignore_id);
1354 int MPIR_Get_contextid_sparse_group(MPID_Comm *comm_ptr, MPID_Group *group_ptr, int tag, MPIR_Context_id_t *context_id, int ignore_id);
1355 void MPIR_Free_contextid( MPIR_Context_id_t );
1356
1357 /* ------------------------------------------------------------------------- */
1358
1359 /* Requests */
1360 /* This currently defines a single structure type for all requests.
1361 Eventually, we may want a union type, as used in MPICH-1 */
1362 /* NOTE-R1: MPID_REQUEST_MPROBE signifies that this is a request created by
1363 * MPI_Mprobe or MPI_Improbe. Since we use MPI_Request objects as our
1364 * MPI_Message objects, we use this separate kind in order to provide stronger
1365 * error checking. Once a message (backed by a request) is promoted to a real
1366 * request by calling MPI_Mrecv/MPI_Imrecv, we actually modify the kind to be
1367 * MPID_REQUEST_RECV in order to keep completion logic as simple as possible. */
1368 /*E
1369 MPID_Request_kind - Kinds of MPI Requests
1370
1371 Module:
1372 Request-DS
1373
1374 E*/
1375 typedef enum MPID_Request_kind_t {
1376 MPID_REQUEST_UNDEFINED,
1377 MPID_REQUEST_SEND,
1378 MPID_REQUEST_RECV,
1379 MPID_PREQUEST_SEND,
1380 MPID_PREQUEST_RECV,
1381 MPID_UREQUEST,
1382 MPID_COLL_REQUEST,
1383 MPID_REQUEST_MPROBE, /* see NOTE-R1 */
1384 MPID_LAST_REQUEST_KIND
1385 #ifdef MPID_DEV_REQUEST_KIND_DECL
1386 , MPID_DEV_REQUEST_KIND_DECL
1387 #endif
1388 } MPID_Request_kind_t;
1389
1390 /* Typedefs for Fortran generalized requests */
1391 typedef void (MPIR_Grequest_f77_cancel_function)(void *, MPI_Fint*, MPI_Fint *);
1392 typedef void (MPIR_Grequest_f77_free_function)(void *, MPI_Fint *);
1393 typedef void (MPIR_Grequest_f77_query_function)(void *, MPI_Fint *, MPI_Fint *);
1394
1395 /* vtable-ish structure holding generalized request function pointers and other
1396 * state. Saves ~48 bytes in pt2pt requests on many platforms. */
1397 struct MPID_Grequest_fns {
1398 MPI_Grequest_cancel_function *cancel_fn;
1399 MPI_Grequest_free_function *free_fn;
1400 MPI_Grequest_query_function *query_fn;
1401 MPIX_Grequest_poll_function *poll_fn;
1402 MPIX_Grequest_wait_function *wait_fn;
1403 void *grequest_extra_state;
1404 MPIX_Grequest_class greq_class;
1405 MPID_Lang_t greq_lang; /* language that defined
1406 the generalize req */
1407 };
1408
1409 /* see mpiimplthread.h for the def of MPID_cc_t and related functions/macros */
1410 #define MPID_Request_is_complete(req_) (MPID_cc_is_complete((req_)->cc_ptr))
1411
1412 /*S
1413 MPID_Request - Description of the Request data structure
1414
1415 Module:
1416 Request-DS
1417
1418 Notes:
1419 If it is necessary to remember the MPI datatype, this information is
1420 saved within the device-specific fields provided by 'MPID_DEV_REQUEST_DECL'.
1421
1422 Requests come in many flavors, as stored in the 'kind' field. It is
1423 expected that each kind of request will have its own structure type
1424 (e.g., 'MPID_Request_send_t') that extends the 'MPID_Request'.
1425
1426 S*/
1427 typedef struct MPID_Request {
1428 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
1429 MPID_Request_kind_t kind;
1430 /* pointer to the completion counter */
1431 /* This is necessary for the case when an operation is described by a
1432 list of requests */
1433 MPID_cc_t *cc_ptr;
1434 /* A comm is needed to find the proper error handler */
1435 MPID_Comm *comm;
1436 /* completion counter. Ensure cc and status are in the same cache
1437 line, assuming the cache line size is a multiple of 32 bytes
1438 and 32-bit integers */
1439 MPID_cc_t cc;
1440 /* Status is needed for wait/test/recv */
1441 MPI_Status status;
1442 /* Persistent requests have their own "real" requests. Receive requests
1443 have partnering send requests when src=dest. etc. */
1444 struct MPID_Request *partner_request;
1445
1446 /* User-defined request support via a "vtable". Saves space in the already
1447 * bloated request for regular pt2pt and NBC requests. */
1448 struct MPID_Grequest_fns *greq_fns;
1449
1450 /* Other, device-specific information */
1451 #ifdef MPID_DEV_REQUEST_DECL
1452 MPID_DEV_REQUEST_DECL
1453 #endif
1454 } MPID_Request ATTRIBUTE((__aligned__(32)));
1455
1456 extern MPIU_Object_alloc_t MPID_Request_mem;
1457 /* Preallocated request objects */
1458 extern MPID_Request MPID_Request_direct[];
1459
1460 #define MPIR_Request_add_ref( _req ) \
1461 do { MPIU_Object_add_ref( _req ); } while (0)
1462
1463 #define MPIR_Request_release_ref( _req, _inuse ) \
1464 do { MPIU_Object_release_ref( _req, _inuse ); } while (0)
1465
1466 /* These macros allow us to implement a sendq when debugger support is
1467 selected. As there is extra overhead for this, we only do this
1468 when specifically requested
1469 */
1470 #ifdef HAVE_DEBUGGER_SUPPORT
1471 void MPIR_WaitForDebugger( void );
1472 void MPIR_DebuggerSetAborting( const char * );
1473 void MPIR_Sendq_remember(MPID_Request *, int, int, int );
1474 void MPIR_Sendq_forget(MPID_Request *);
1475 void MPIR_CommL_remember( MPID_Comm * );
1476 void MPIR_CommL_forget( MPID_Comm * );
1477
1478 #define MPIR_SENDQ_REMEMBER(_a,_b,_c,_d) MPIR_Sendq_remember(_a,_b,_c,_d)
1479 #define MPIR_SENDQ_FORGET(_a) MPIR_Sendq_forget(_a)
1480 #define MPIR_COMML_REMEMBER(_a) MPIR_CommL_remember( _a )
1481 #define MPIR_COMML_FORGET(_a) MPIR_CommL_forget( _a )
1482 #else
1483 #define MPIR_SENDQ_REMEMBER(a,b,c,d)
1484 #define MPIR_SENDQ_FORGET(a)
1485 #define MPIR_COMML_REMEMBER(_a)
1486 #define MPIR_COMML_FORGET(_a)
1487 #endif
1488
1489 /* must come after MPID_Comm is declared/defined */
1490 int MPIR_Get_contextid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcommp, MPID_Request **req);
1491 int MPIR_Get_intercomm_contextid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcommp, MPID_Request **req);
1492
1493 /* ------------------------------------------------------------------------- */
1494 /* Prototypes and definitions for the node ID code. This is used to support
1495 hierarchical collectives in a (mostly) device-independent way. */
1496 #if defined(MPID_USE_NODE_IDS)
1497 /* MPID_Node_id_t is a signed integer type defined by the device in mpidpre.h. */
1498 int MPID_Get_node_id(MPID_Comm *comm, int rank, MPID_Node_id_t *id_p);
1499 int MPID_Get_max_node_id(MPID_Comm *comm, MPID_Node_id_t *max_id_p);
1500 #endif
1501
1502 /* ------------------------------------------------------------------------- */
1503 /*S
1504 MPID_Progress_state - object to hold progress state when using the blocking
1505 progress routines.
1506
1507 Module:
1508 Misc
1509
1510 Notes:
1511 The device must define MPID_PROGRESS_STATE_DECL. It should include any state
1512 that needs to be maintained between calls to MPID_Progress_{start,wait,end}.
1513 S*/
1514 typedef struct MPID_Progress_state
1515 {
1516 MPID_PROGRESS_STATE_DECL
1517 }
1518 MPID_Progress_state;
1519 /* ------------------------------------------------------------------------- */
1520
1521 /* ------------------------------------------------------------------------- */
1522 /* end of mpirma.h (in src/mpi/rma?) */
1523 /* ------------------------------------------------------------------------- */
1524
1525 /* Windows */
1526 #ifdef USE_MPID_RMA_TABLE
1527 struct MPID_Win;
1528 typedef struct MPID_RMA_Ops {
1529 int (*Win_free)(struct MPID_Win **);
1530
1531 int (*Put)(const void *, int, MPI_Datatype, int, MPI_Aint, int, MPI_Datatype,
1532 struct MPID_Win *);
1533 int (*Get)(void *, int, MPI_Datatype, int, MPI_Aint, int, MPI_Datatype,
1534 struct MPID_Win *);
1535 int (*Accumulate)(const void *, int, MPI_Datatype, int, MPI_Aint, int,
1536 MPI_Datatype, MPI_Op, struct MPID_Win *);
1537
1538 int (*Win_fence)(int, struct MPID_Win *);
1539 int (*Win_post)(MPID_Group *, int, struct MPID_Win *);
1540 int (*Win_start)(MPID_Group *, int, struct MPID_Win *);
1541 int (*Win_complete)(struct MPID_Win *);
1542 int (*Win_wait)(struct MPID_Win *);
1543 int (*Win_test)(struct MPID_Win *, int *);
1544
1545 int (*Win_lock)(int, int, int, struct MPID_Win *);
1546 int (*Win_unlock)(int, struct MPID_Win *);
1547
1548 /* MPI-3 Functions */
1549 int (*Win_attach)(struct MPID_Win *, void *, MPI_Aint);
1550 int (*Win_detach)(struct MPID_Win *, const void *);
1551 int (*Win_shared_query)(struct MPID_Win *, int, MPI_Aint *, int *, void *);
1552
1553 int (*Win_lock_all)(int, struct MPID_Win *);
1554 int (*Win_unlock_all)(struct MPID_Win *);
1555
1556 int (*Win_flush)(int, struct MPID_Win *);
1557 int (*Win_flush_all)(struct MPID_Win *);
1558 int (*Win_flush_local)(int, struct MPID_Win *);
1559 int (*Win_flush_local_all)(struct MPID_Win *);
1560 int (*Win_sync)(struct MPID_Win *);
1561
1562 int (*Get_accumulate)(const void *, int , MPI_Datatype, void *, int,
1563 MPI_Datatype, int, MPI_Aint, int, MPI_Datatype, MPI_Op,
1564 struct MPID_Win *);
1565 int (*Fetch_and_op)(const void *, void *, MPI_Datatype, int, MPI_Aint, MPI_Op,
1566 struct MPID_Win *);
1567 int (*Compare_and_swap)(const void *, const void *, void *, MPI_Datatype, int,
1568 MPI_Aint, struct MPID_Win *);
1569
1570 int (*Rput)(const void *, int, MPI_Datatype, int, MPI_Aint, int, MPI_Datatype,
1571 struct MPID_Win *, MPID_Request**);
1572 int (*Rget)(void *, int, MPI_Datatype, int, MPI_Aint, int, MPI_Datatype,
1573 struct MPID_Win *, MPID_Request**);
1574 int (*Raccumulate)(const void *, int, MPI_Datatype, int, MPI_Aint, int,
1575 MPI_Datatype, MPI_Op, struct MPID_Win *, MPID_Request**);
1576 int (*Rget_accumulate)(const void *, int , MPI_Datatype, void *, int,
1577 MPI_Datatype, int, MPI_Aint, int, MPI_Datatype, MPI_Op,
1578 struct MPID_Win *, MPID_Request**);
1579
1580 } MPID_RMAFns;
1581 #define MPID_RMAFNS_VERSION 2
1582 /* Note that the memory allocation/free routines do not take a window,
1583 so they must be initialized separately, and are a per-run, not per-window
1584 object. If the device can manage different kinds of memory allocations,
1585 these routines must internally provide that flexibility. */
1586 /*
1587 void *(*Alloc_mem)(size_t, MPID_Info *);
1588 int (*Free_mem)(void *);
1589 */
1590 #endif
1591
1592 /*S
1593 MPID_Win - Description of the Window Object data structure.
1594
1595 Module:
1596 Win-DS
1597
1598 Notes:
1599 The following 3 keyvals are defined for attributes on all MPI
1600 Window objects\:
1601 .vb
1602 MPI_WIN_SIZE
1603 MPI_WIN_BASE
1604 MPI_WIN_DISP_UNIT
1605 .ve
1606 These correspond to the values in 'length', 'start_address', and
1607 'disp_unit'.
1608
1609 The communicator in the window is the same communicator that the user
1610 provided to 'MPI_Win_create' (not a dup). However, each intracommunicator
1611 has a special context id that may be used if MPI communication is used
1612 by the implementation to implement the RMA operations.
1613
1614 There is no separate window group; the group of the communicator should be
1615 used.
1616
1617 Question:
1618 Should a 'MPID_Win' be defined after 'MPID_Segment' in case the device
1619 wants to
1620 store a queue of pending put/get operations, described with 'MPID_Segment'
1621 (or 'MPID_Request')s?
1622
1623 S*/
1624 typedef struct MPID_Win {
1625 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
1626 int fence_cnt; /* 0 = no fence has been called;
1627 1 = fence has been called */
1628 MPID_Errhandler *errhandler; /* Pointer to the error handler structure */
1629 void *base;
1630 MPI_Aint size;
1631 int disp_unit; /* Displacement unit of *local* window */
1632 MPID_Attribute *attributes;
1633 MPID_Group *start_group_ptr; /* group passed in MPI_Win_start */
1634 int start_assert; /* assert passed to MPI_Win_start */
1635 MPID_Comm *comm_ptr; /* Pointer to comm of window (dup) */
1636 int myrank; /* Rank of this process in comm (used to
1637 detect operations on self) */
1638 int lockRank; /* If within an MPI_Win_lock epoch,
1639 the rank that we locked */
1640 #ifdef USE_THREADED_WINDOW_CODE
1641 /* These were causing compilation errors. We need to figure out how to
1642 integrate threads into MPICH2 before including these fields. */
1643 /* FIXME: The test here should be within a test for threaded support */
1644 #ifdef HAVE_PTHREAD_H
1645 pthread_t wait_thread_id; /* id of thread handling MPI_Win_wait */
1646 pthread_t passive_target_thread_id; /* thread for passive target RMA */
1647 #elif defined(HAVE_WINTHREADS)
1648 HANDLE wait_thread_id;
1649 HANDLE passive_target_thread_id;
1650 #endif
1651 #endif
1652 /* */
1653 #ifdef USE_MPID_RMA_TABLE
1654 MPID_RMAFns RMAFns;
1655 #endif
1656 /* These are COPIES of the values so that addresses to them
1657 can be returned as attributes. They are initialized by the
1658 MPI_Win_get_attr function.
1659
1660 These values are constant for the lifetime of the window, so
1661 this is thread-safe.
1662 */
1663 int copyDispUnit;
1664 MPI_Aint copySize;
1665
1666 char name[MPI_MAX_OBJECT_NAME];
1667
1668 MPIR_Win_flavor_t create_flavor;
1669 MPIR_Win_model_t model;
1670 MPIR_Win_flavor_t copyCreateFlavor;
1671 MPIR_Win_model_t copyModel;
1672
1673 /* Other, device-specific information */
1674 #ifdef MPID_DEV_WIN_DECL
1675 MPID_DEV_WIN_DECL
1676 #endif
1677 } MPID_Win;
1678 extern MPIU_Object_alloc_t MPID_Win_mem;
1679 /* Preallocated win objects */
1680 extern MPID_Win MPID_Win_direct[];
1681
1682 enum MPID_Win_lock_states {
1683 /* LOCKED = 0, 1, ... */
1684 MPID_WIN_STATE_UNLOCKED = -1,
1685 MPID_WIN_STATE_LOCKED_ALL = -2
1686 };
1687
1688 /* ------------------------------------------------------------------------- */
1689 /* also in mpirma.h ?*/
1690 /* ------------------------------------------------------------------------- */
1691
1692 /*
1693 * Good Memory (may be required for passive target operations on MPI_Win)
1694 */
1695
1696 /*@
1697 MPID_Alloc_mem - Allocate memory suitable for passive target RMA operations
1698
1699 Input Parameter:
1700 + size - Number of types to allocate.
1701 - info - Info object
1702
1703 Return value:
1704 Pointer to the allocated memory. If the memory is not available,
1705 returns null.
1706
1707 Notes:
1708 This routine is used to implement 'MPI_Alloc_mem'. It is for that reason
1709 that there is no communicator argument.
1710
1711 This memory may `only` be freed with 'MPID_Free_mem'.
1712
1713 This is a `local`, not a collective operation. It functions more like a
1714 good form of 'malloc' than collective shared-memory allocators such as
1715 the 'shmalloc' found on SGI systems.
1716
1717 Implementations of this routine may wish to use 'MPID_Memory_register'.
1718 However, this routine has slighly different requirements, so a separate
1719 entry point is provided.
1720
1721 Question:
1722 Since this takes an info object, should there be an error routine in the
1723 case that the info object contains an error?
1724
1725 Module:
1726 Win
1727 @*/
1728 void *MPID_Alloc_mem( size_t size, MPID_Info *info );
1729
1730 /*@
1731 MPID_Free_mem - Frees memory allocated with 'MPID_Alloc_mem'
1732
1733 Input Parameter:
1734 . ptr - Pointer to memory allocated by 'MPID_Alloc_mem'.
1735
1736 Return value:
1737 'MPI_SUCCESS' if memory was successfully freed; an MPI error code otherwise.
1738
1739 Notes:
1740 The return value is provided because it may not be easy to validate the
1741 value of 'ptr' without attempting to free the memory.
1742
1743 Module:
1744 Win
1745 @*/
1746 int MPID_Free_mem( void *ptr );
1747
1748 /*@
1749 MPID_Mem_was_alloced - Return true if this memory was allocated with
1750 'MPID_Alloc_mem'
1751
1752 Input Parameters:
1753 + ptr - Address of memory
1754 - size - Size of reqion in bytes.
1755
1756 Return value:
1757 True if the memory was allocated with 'MPID_Alloc_mem', false otherwise.
1758
1759 Notes:
1760 This routine may be needed by 'MPI_Win_create' to ensure that the memory
1761 for passive target RMA operations was allocated with 'MPI_Mem_alloc'.
1762 This may be used, for example, for ensuring that memory used with
1763 passive target operations was allocated with 'MPID_Alloc_mem'.
1764
1765 Module:
1766 Win
1767 @*/
1768 int MPID_Mem_was_alloced( void *ptr ); /* brad : this isn't used or implemented anywhere */
1769
1770 /* ------------------------------------------------------------------------- */
1771 /* end of also in mpirma.h ? */
1772 /* ------------------------------------------------------------------------- */
1773
1774 /* ------------------------------------------------------------------------- */
1775 /* Reduction and accumulate operations */
1776 /*E
1777 MPID_Op_kind - Enumerates types of MPI_Op types
1778
1779 Notes:
1780 These are needed for implementing 'MPI_Accumulate', since only predefined
1781 operations are allowed for that operation.
1782
1783 A gap in the enum values was made allow additional predefined operations
1784 to be inserted. This might include future additions to MPI or experimental
1785 extensions (such as a Read-Modify-Write operation).
1786
1787 Module:
1788 Collective-DS
1789 E*/
1790 typedef enum MPID_Op_kind { MPID_OP_NULL=0, MPID_OP_MAX=1, MPID_OP_MIN=2,
1791 MPID_OP_SUM=3, MPID_OP_PROD=4,
1792 MPID_OP_LAND=5, MPID_OP_BAND=6, MPID_OP_LOR=7, MPID_OP_BOR=8,
1793 MPID_OP_LXOR=9, MPID_OP_BXOR=10, MPID_OP_MAXLOC=11,
1794 MPID_OP_MINLOC=12, MPID_OP_REPLACE=13,
1795 MPID_OP_NO_OP=14,
1796 MPID_OP_USER_NONCOMMUTE=32, MPID_OP_USER=33 }
1797 MPID_Op_kind;
1798
1799 /*S
1800 MPID_User_function - Definition of a user function for MPI_Op types.
1801
1802 Notes:
1803 This includes a 'const' to make clear which is the 'in' argument and
1804 which the 'inout' argument, and to indicate that the 'count' and 'datatype'
1805 arguments are unchanged (they are addresses in an attempt to allow
1806 interoperation with Fortran). It includes 'restrict' to emphasize that
1807 no overlapping operations are allowed.
1808
1809 We need to include a Fortran version, since those arguments will
1810 have type 'MPI_Fint *' instead. We also need to add a test to the
1811 test suite for this case; in fact, we need tests for each of the handle
1812 types to ensure that the transfered handle works correctly.
1813
1814 This is part of the collective module because user-defined operations
1815 are valid only for the collective computation routines and not for
1816 RMA accumulate.
1817
1818 Yes, the 'restrict' is in the correct location. C compilers that
1819 support 'restrict' should be able to generate code that is as good as a
1820 Fortran compiler would for these functions.
1821
1822 We should note on the manual pages for user-defined operations that
1823 'restrict' should be used when available, and that a cast may be
1824 required when passing such a function to 'MPI_Op_create'.
1825
1826 Question:
1827 Should each of these function types have an associated typedef?
1828
1829 Should there be a C++ function here?
1830
1831 Module:
1832 Collective-DS
1833 S*/
1834 typedef union MPID_User_function {
1835 void (*c_function) ( const void *, void *,
1836 const int *, const MPI_Datatype * );
1837 void (*f77_function) ( const void *, void *,
1838 const MPI_Fint *, const MPI_Fint * );
1839 } MPID_User_function;
1840 /* FIXME: Should there be "restrict" in the definitions above, e.g.,
1841 (*c_function)( const void restrict * , void restrict *, ... )? */
1842
1843 /*S
1844 MPID_Op - MPI_Op structure
1845
1846 Notes:
1847 All of the predefined functions are commutative. Only user functions may
1848 be noncummutative, so there are two separate op types for commutative and
1849 non-commutative user-defined operations.
1850
1851 Operations do not require reference counts because there are no nonblocking
1852 operations that accept user-defined operations. Thus, there is no way that
1853 a valid program can free an 'MPI_Op' while it is in use.
1854
1855 Module:
1856 Collective-DS
1857 S*/
1858 typedef struct MPID_Op {
1859 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
1860 MPID_Op_kind kind;
1861 MPID_Lang_t language;
1862 MPID_User_function function;
1863 } MPID_Op;
1864 #define MPID_OP_N_BUILTIN 15
1865 extern MPID_Op MPID_Op_builtin[MPID_OP_N_BUILTIN];
1866 extern MPID_Op MPID_Op_direct[];
1867 extern MPIU_Object_alloc_t MPID_Op_mem;
1868
1869 #define MPIR_Op_add_ref(_op) \
1870 do { MPIU_Object_add_ref(_op); } while (0)
1871 #define MPIR_Op_release_ref( _op, _inuse ) \
1872 do { MPIU_Object_release_ref( _op, _inuse ); } while (0)
1873
1874 /* release and free-if-not-in-use helper */
1875 #define MPIR_Op_release(op_p_) \
1876 do { \
1877 int in_use_; \
1878 MPIR_Op_release_ref((op_p_), &in_use_); \
1879 if (!in_use_) { \
1880 MPIU_Handle_obj_free(&MPID_Op_mem, (op_p_)); \
1881 } \
1882 } while (0)
1883
1884 /* ------------------------------------------------------------------------- */
1885
1886 /* ------------------------------------------------------------------------- */
1887 /* mpicoll.h (in src/mpi/coll?) */
1888 /* ------------------------------------------------------------------------- */
1889
1890 /* Collective operations */
1891 typedef struct MPID_Collops {
1892 int ref_count; /* Supports lazy copies */
1893 /* Contains pointers to the functions for the MPI collectives */
1894 int (*Barrier) (MPID_Comm *, int *);
1895 int (*Bcast) (void*, int, MPI_Datatype, int, MPID_Comm *, int *);
1896 int (*Gather) (const void*, int, MPI_Datatype, void*, int, MPI_Datatype,
1897 int, MPID_Comm *, int *);
1898 int (*Gatherv) (const void*, int, MPI_Datatype, void*, const int *, const int *,
1899 MPI_Datatype, int, MPID_Comm *, int *);
1900 int (*Scatter) (const void*, int, MPI_Datatype, void*, int, MPI_Datatype,
1901 int, MPID_Comm *, int *);
1902 int (*Scatterv) (const void*, const int *, const int *, MPI_Datatype,
1903 void*, int, MPI_Datatype, int, MPID_Comm *, int *);
1904 int (*Allgather) (const void*, int, MPI_Datatype, void*, int,
1905 MPI_Datatype, MPID_Comm *, int *);
1906 int (*Allgatherv) (const void*, int, MPI_Datatype, void*, const int *,
1907 const int *, MPI_Datatype, MPID_Comm *, int *);
1908 int (*Alltoall) (const void*, int, MPI_Datatype, void*, int, MPI_Datatype,
1909 MPID_Comm *, int *);
1910 int (*Alltoallv) (const void*, const int *, const int *, MPI_Datatype,
1911 void*, const int *, const int *, MPI_Datatype, MPID_Comm *,
1912 int *);
1913 int (*Alltoallw) (const void*, const int *, const int *, const MPI_Datatype *, void*,
1914 const int *, const int *, const MPI_Datatype *, MPID_Comm *, int *);
1915 int (*Reduce) (const void*, void*, int, MPI_Datatype, MPI_Op, int,
1916 MPID_Comm *, int *);
1917 int (*Allreduce) (const void*, void*, int, MPI_Datatype, MPI_Op,
1918 MPID_Comm *, int *);
1919 int (*Reduce_scatter) (const void*, void*, const int *, MPI_Datatype, MPI_Op,
1920 MPID_Comm *, int *);
1921 int (*Scan) (const void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm *, int * );
1922 int (*Exscan) (const void*, void*, int, MPI_Datatype, MPI_Op, MPID_Comm *, int * );
1923 int (*Reduce_scatter_block) (const void*, void*, int, MPI_Datatype, MPI_Op,
1924 MPID_Comm *, int *);
1925
1926 /* MPI-3 nonblocking collectives */
1927 int (*Ibarrier)(MPID_Comm *comm_ptr, MPID_Sched_t s);
1928 int (*Ibcast)(void *buffer, int count, MPI_Datatype datatype, int root,
1929 MPID_Comm *comm_ptr, MPID_Sched_t s);
1930 int (*Igather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
1931 int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
1932 MPID_Sched_t s);
1933 int (*Igatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
1934 const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root,
1935 MPID_Comm *comm_ptr, MPID_Sched_t s);
1936 int (*Iscatter)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
1937 int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
1938 MPID_Sched_t s);
1939 int (*Iscatterv)(const void *sendbuf, const int *sendcounts, const int *displs,
1940 MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
1941 int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
1942 int (*Iallgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
1943 int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
1944 MPID_Sched_t s);
1945 int (*Iallgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
1946 const int *recvcounts, const int *displs, MPI_Datatype recvtype,
1947 MPID_Comm *comm_ptr, MPID_Sched_t s);
1948 int (*Ialltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
1949 int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
1950 MPID_Sched_t s);
1951 int (*Ialltoallv)(const void *sendbuf, const int *sendcounts, const int *sdispls,
1952 MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
1953 const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
1954 MPID_Sched_t s);
1955 int (*Ialltoallw)(const void *sendbuf, const int *sendcounts, const int *sdispls,
1956 const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
1957 const int *rdispls, const MPI_Datatype *recvtypes,
1958 MPID_Comm *comm_ptr, MPID_Sched_t s);
1959 int (*Ireduce)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
1960 int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
1961 int (*Iallreduce)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
1962 MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
1963 int (*Ireduce_scatter)(const void *sendbuf, void *recvbuf, const int *recvcounts,
1964 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
1965 int (*Ireduce_scatter_block)(const void *sendbuf, void *recvbuf, int recvcount,
1966 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
1967 MPID_Sched_t s);
1968 int (*Iscan)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
1969 MPID_Comm *comm_ptr, MPID_Sched_t s);
1970 int (*Iexscan)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
1971 MPID_Comm *comm_ptr, MPID_Sched_t s);
1972
1973 struct MPID_Collops *prev_coll_fns; /* when overriding this table, set this to point to the old table */
1974
1975 /* MPI-3 neighborhood collectives (blocking & nonblocking) */
1976 int (*Neighbor_allgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
1977 void *recvbuf, int recvcount, MPI_Datatype recvtype,
1978 MPID_Comm *comm_ptr);
1979 int (*Neighbor_allgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
1980 void *recvbuf, const int recvcounts[], const int displs[],
1981 MPI_Datatype recvtype, MPID_Comm *comm_ptr);
1982 int (*Neighbor_alltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
1983 void *recvbuf, int recvcount, MPI_Datatype recvtype,
1984 MPID_Comm *comm_ptr);
1985 int (*Neighbor_alltoallv)(const void *sendbuf, const int sendcounts[], const int sdispls[],
1986 MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
1987 const int rdispls[], MPI_Datatype recvtype, MPID_Comm *comm_ptr);
1988 int (*Neighbor_alltoallw)(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[],
1989 const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
1990 const MPI_Aint rdispls[], const MPI_Datatype recvtypes[],
1991 MPID_Comm *comm_ptr);
1992 int (*Ineighbor_allgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
1993 void *recvbuf, int recvcount, MPI_Datatype recvtype,
1994 MPID_Comm *comm_ptr, MPID_Sched_t s);
1995 int (*Ineighbor_allgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
1996 void *recvbuf, const int recvcounts[], const int displs[],
1997 MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
1998 int (*Ineighbor_alltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
1999 void *recvbuf, int recvcount, MPI_Datatype recvtype,
2000 MPID_Comm *comm_ptr, MPID_Sched_t s);
2001 int (*Ineighbor_alltoallv)(const void *sendbuf, const int sendcounts[], const int sdispls[],
2002 MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
2003 const int rdispls[], MPI_Datatype recvtype, MPID_Comm *comm_ptr,
2004 MPID_Sched_t s);
2005 int (*Ineighbor_alltoallw)(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[],
2006 const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
2007 const MPI_Aint rdispls[], const MPI_Datatype recvtypes[],
2008 MPID_Comm *comm_ptr, MPID_Sched_t s);
2009 } MPID_Collops;
2010
2011 #define MPIR_BARRIER_TAG 1
2012 /* ------------------------------------------------------------------------- */
2013 /* end of mpicoll.h (in src/mpi/coll? */
2014 /* ------------------------------------------------------------------------- */
2015
2016 /* ------------------------------------------------------------------------- */
2017 /* mpitopo.h (in src/mpi/topo? */
2018 /*
2019 * The following struture allows the device detailed control over the
2020 * functions that are used to implement the topology routines. If either
2021 * the pointer to this structure is null or any individual entry is null,
2022 * the default function is used (this follows exactly the same rules as the
2023 * collective operations, provided in the MPID_Collops structure).
2024 */
2025 /* ------------------------------------------------------------------------- */
2026
2027 typedef struct MPID_TopoOps {
2028 int (*cartCreate)( const MPID_Comm *, int, const int[], const int [],
2029 int, MPI_Comm * );
2030 int (*cartMap) ( const MPID_Comm *, int, const int[], const int [],
2031 int * );
2032 int (*graphCreate)( const MPID_Comm *, int, const int[], const int [],
2033 int, MPI_Comm * );
2034 int (*graphMap) ( const MPID_Comm *, int, const int[], const int[],
2035 int * );
2036 } MPID_TopoOps;
2037 /* ------------------------------------------------------------------------- */
2038 /* end of mpitopo.h (in src/mpi/topo? */
2039 /* ------------------------------------------------------------------------- */
2040
2041
2042 typedef struct MPID_CommOps {
2043 int (*split_type)(MPID_Comm *, int, int, MPID_Info *, MPID_Comm **);
2044 } MPID_CommOps;
2045 extern struct MPID_CommOps *MPID_Comm_fns; /* Communicator creation functions */
2046
2047
2048 /* Per process data */
2049 typedef enum MPIR_MPI_State_t { MPICH_PRE_INIT=0, MPICH_WITHIN_MPI=1,
2050 MPICH_POST_FINALIZED=2 } MPIR_MPI_State_t;
2051
2052 typedef struct PreDefined_attrs {
2053 int appnum; /* Application number provided by mpiexec (MPI-2) */
2054 int host; /* host */
2055 int io; /* standard io allowed */
2056 int lastusedcode; /* last used error code (MPI-2) */
2057 int tag_ub; /* Maximum message tag */
2058 int universe; /* Universe size from mpiexec (MPI-2) */
2059 int wtime_is_global; /* Wtime is global over processes in COMM_WORLD */
2060 } PreDefined_attrs;
2061
2062 struct MPID_Datatype;
2063
2064 typedef struct MPICH_PerProcess_t {
2065 MPIR_MPI_State_t initialized; /* Is MPI initalized? */
2066 int do_error_checks; /* runtime error check control */
2067 struct MPID_Comm *comm_world; /* Easy access to comm_world for
2068 error handler */
2069 struct MPID_Comm *comm_self; /* Easy access to comm_self */
2070 struct MPID_Comm *comm_parent; /* Easy access to comm_parent */
2071 struct MPID_Comm *icomm_world; /* An internal version of comm_world
2072 that is separate from user's
2073 versions */
2074 PreDefined_attrs attrs; /* Predefined attribute values */
2075 int tagged_coll_mask; /* Tag space mask for tagged collectives */
2076
2077 /* The topology routines dimsCreate is independent of any communicator.
2078 If this pointer is null, the default routine is used */
2079 int (*dimsCreate)( int, int, int *);
2080
2081 /* Attribute dup functions. Here for lazy initialization */
2082 int (*attr_dup)( int, MPID_Attribute *, MPID_Attribute ** );
2083 int (*attr_free)( int, MPID_Attribute ** );
2084 /* There is no win_attr_dup function because there can be no MPI_Win_dup
2085 function */
2086 /* Routine to get the messages corresponding to dynamically created
2087 error messages */
2088 const char *(*errcode_to_string)( int );
2089 #ifdef HAVE_CXX_BINDING
2090 /* Routines to call C++ functions from the C implementation of the
2091 MPI reduction and attribute routines */
2092 void (*cxx_call_op_fn)(const void *, void *, int, MPI_Datatype,
2093 MPI_User_function * );
2094 /* Error handling functions. As for the attribute functions,
2095 we pass the integer file/comm/win, the address of the error code,
2096 and the C function to call (itself a function defined by the
2097 C++ interface and exported to C). The first argument is used
2098 to specify the kind (comm,file,win) */
2099 void (*cxx_call_errfn) ( int, int *, int *, void (*)(void) );
2100 #endif /* HAVE_CXX_BINDING */
2101 } MPICH_PerProcess_t;
2102 extern MPICH_PerProcess_t MPIR_Process;
2103
2104 /* ------------------------------------------------------------------------- */
2105 /* In MPICH2, each function has an "enter" and "exit" macro. These can be
2106 * used to add various features to each function at compile time, or they
2107 * can be set to empty to provide the fastest possible production version.
2108 *
2109 * There are at this time three choices of features (beyond the empty choice)
2110 * 1. timing (controlled by macros in mpitimerimpl.h)
2111 * These collect data on when each function began and finished; the
2112 * resulting data can be displayed using special programs
2113 * 2. Debug logging (selected with --enable-g=log)
2114 * Invokes MPIU_DBG_MSG at the entry and exit for each routine
2115 * 3. Additional memory validation of the memory arena (--enable-g=memarena)
2116 */
2117 /* ------------------------------------------------------------------------- */
2118 /* allow the timing module the opportunity to define the macros */
2119 #include "mpifunc.h"
2120 #if !defined(NEEDS_FUNC_ENTER_EXIT_DEFS)
2121 /* If no timing choice is selected, this sets the entry/exit macros
2122 to empty */
2123 # include "mpitimerimpl.h"
2124 #endif
2125
2126 #ifdef NEEDS_FUNC_ENTER_EXIT_DEFS
2127 /* mpich layer definitions */
2128 #define MPID_MPI_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2129 #define MPID_MPI_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2130 #define MPID_MPI_PT2PT_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2131 #define MPID_MPI_PT2PT_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2132 #define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a) MPIR_FUNC_ENTER(a)
2133 #define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a) MPIR_FUNC_EXIT(a)
2134 #define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a) MPIR_FUNC_ENTER(a)
2135 #define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a) MPIR_FUNC_ENTER(a)
2136 #define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a) MPIR_FUNC_EXIT(a)
2137 #define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a) MPIR_FUNC_EXIT(a)
2138 #define MPID_MPI_COLL_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2139 #define MPID_MPI_COLL_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2140 #define MPID_MPI_RMA_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2141 #define MPID_MPI_RMA_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2142 #define MPID_MPI_INIT_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2143 #define MPID_MPI_INIT_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2144 #define MPID_MPI_FINALIZE_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2145 #define MPID_MPI_FINALIZE_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2146
2147 /* device layer definitions */
2148 #define MPIDI_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2149 #define MPIDI_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2150 #define MPIDI_PT2PT_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2151 #define MPIDI_PT2PT_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2152 #define MPIDI_PT2PT_FUNC_ENTER_FRONT(a) MPIR_FUNC_ENTER(a)
2153 #define MPIDI_PT2PT_FUNC_EXIT_FRONT(a) MPIR_FUNC_EXIT(a)
2154 #define MPIDI_PT2PT_FUNC_ENTER_BACK(a) MPIR_FUNC_ENTER(a)
2155 #define MPIDI_PT2PT_FUNC_ENTER_BOTH(a) MPIR_FUNC_ENTER(a)
2156 #define MPIDI_PT2PT_FUNC_EXIT_BACK(a) MPIR_FUNC_EXIT(a)
2157 #define MPIDI_PT2PT_FUNC_EXIT_BOTH(a) MPIR_FUNC_EXIT(a)
2158 #define MPIDI_COLL_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2159 #define MPIDI_COLL_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2160 #define MPIDI_RMA_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2161 #define MPIDI_RMA_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2162 #define MPIDI_INIT_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2163 #define MPIDI_INIT_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2164 #define MPIDI_FINALIZE_FUNC_ENTER(a) MPIR_FUNC_ENTER(a)
2165 #define MPIDI_FINALIZE_FUNC_EXIT(a) MPIR_FUNC_EXIT(a)
2166
2167 /* evaporate the timing macros since timing is not selected */
2168 #define MPIU_Timer_init(rank, size)
2169 #define MPIU_Timer_finalize()
2170 #endif /* NEEDS_FUNC_ENTER_EXIT_DEFS */
2171
2172 /* Definitions for error handling and reporting */
2173 #include "mpierror.h"
2174 #include "mpierrs.h"
2175
2176 /* Definitions for instrumentation (currently used within RMA code) */
2177 #include "mpiinstr.h"
2178
2179 /* FIXME: This routine is only used within mpi/src/err/errutil.c and
2180 smpd. We may not want to export it. */
2181 void MPIR_Err_print_stack(FILE *, int);
2182
2183 /* ------------------------------------------------------------------------- */
2184
2185 /* FIXME: Move these to the communicator block; make sure that all
2186 objects have such hooks */
2187 #ifndef HAVE_DEV_COMM_HOOK
2188 #define MPID_Dev_comm_create_hook( a ) MPI_SUCCESS
2189 #define MPID_Dev_comm_destroy_hook( a ) MPI_SUCCESS
2190 #endif
2191
2192 /* ------------------------------------------------------------------------- */
2193 /* FIXME: What is the scope of these functions? Can they be moved into
2194 src/mpi/pt2pt? */
2195 /* ------------------------------------------------------------------------- */
2196
2197 /* Do not set MPI_ERROR (only set if ERR_IN_STATUS is returned */
2198 #define MPIR_Status_set_empty(status_) \
2199 { \
2200 if ((status_) != MPI_STATUS_IGNORE) \
2201 { \
2202 (status_)->MPI_SOURCE = MPI_ANY_SOURCE; \
2203 (status_)->MPI_TAG = MPI_ANY_TAG; \
2204 (status_)->count = 0; \
2205 (status_)->cancelled = FALSE; \
2206 } \
2207 }
2208 /* See MPI 1.1, section 3.11, Null Processes */
2209 /* Do not set MPI_ERROR (only set if ERR_IN_STATUS is returned */
2210 #define MPIR_Status_set_procnull(status_) \
2211 { \
2212 if ((status_) != MPI_STATUS_IGNORE) \
2213 { \
2214 (status_)->MPI_SOURCE = MPI_PROC_NULL; \
2215 (status_)->MPI_TAG = MPI_ANY_TAG; \
2216 (status_)->count = 0; \
2217 (status_)->cancelled = FALSE; \
2218 } \
2219 }
2220
2221 #define MPIR_Request_extract_status(request_ptr_, status_) \
2222 { \
2223 if ((status_) != MPI_STATUS_IGNORE) \
2224 { \
2225 int error__; \
2226 \
2227 /* According to the MPI 1.1 standard page 22 lines 9-12, the MPI_ERROR field may not be modified except by the \
2228 functions in section 3.7.5 which return MPI_ERR_IN_STATUSES (MPI_Wait{all,some} and MPI_Test{all,some}). */ \
2229 error__ = (status_)->MPI_ERROR; \
2230 *(status_) = (request_ptr_)->status; \
2231 (status_)->MPI_ERROR = error__; \
2232 } \
2233 }
2234 /* ------------------------------------------------------------------------- */
2235
2236 /* FIXME: The bindings should be divided into three groups:
2237 1. ADI3 routines. These should have structure comment documentation, e.g.,
2238 the text from doc/adi3/adi3.c
2239 2. General utility routines. These should have a short description
2240 3. Local utility routines, e.g., routines used within a single subdirectory.
2241 These should be moved into an include file in that subdirectory
2242 */
2243 /* Bindings for internal routines */
2244 /*@ MPIR_Add_finalize - Add a routine to be called when MPI_Finalize is invoked
2245
2246 + routine - Routine to call
2247 . extra - Void pointer to data to pass to the routine
2248 - priority - Indicates the priority of this callback and controls the order
2249 in which callbacks are executed. Use a priority of zero for most handlers;
2250 higher priorities will be executed first.
2251
2252 Notes:
2253 The routine 'MPID_Finalize' is executed with priority
2254 'MPIR_FINALIZE_CALLBACK_PRIO' (currently defined as 5). Handlers with
2255 a higher priority execute before 'MPID_Finalize' is called; those with
2256 a lower priority after 'MPID_Finalize' is called.
2257 @*/
2258 void MPIR_Add_finalize( int (*routine)( void * ), void *extra, int priority );
2259
2260 #define MPIR_FINALIZE_CALLBACK_PRIO 5
2261 #define MPIR_FINALIZE_CALLBACK_HANDLE_CHECK_PRIO 1
2262 #define MPIR_FINALIZE_CALLBACK_DEFAULT_PRIO 0
2263 #define MPIR_FINALIZE_CALLBACK_MAX_PRIO 10
2264
2265 /*int MPIR_Comm_attr_dup(MPID_Comm *, MPID_Attribute **);
2266 int MPIR_Comm_attr_delete(MPID_Comm *, MPID_Attribute *);*/
2267 int MPIR_Comm_copy( MPID_Comm *, int, MPID_Comm ** );
2268 int MPIR_Comm_copy_data(MPID_Comm *comm_ptr, MPID_Comm **outcomm_ptr);
2269
2270 /* Fortran keyvals are set with functions in mpi_f77interface.h */
2271 #ifdef HAVE_CXX_BINDING
2272 extern void MPIR_Keyval_set_cxx( int, void (*)(void), void (*)(void) );
2273 extern void MPIR_Op_set_cxx( MPI_Op, void (*)(void) );
2274 extern void MPIR_Errhandler_set_cxx( MPI_Errhandler, void (*)(void) );
2275 #endif
2276
2277 int MPIR_Group_create( int, MPID_Group ** );
2278 int MPIR_Group_release(MPID_Group *group_ptr);
2279
2280 int MPIR_dup_fn ( MPI_Comm, int, void *, void *, void *, int * );
2281 /* marks a request as complete, extracting the status */
2282 int MPIR_Request_complete(MPI_Request *, MPID_Request *, MPI_Status *, int *);
2283
2284 int MPIR_Request_get_error(MPID_Request *);
2285 /* run the progress engine until the given request is complete */
2286 int MPIR_Progress_wait_request(MPID_Request *req);
2287
2288 /* The following routines perform the callouts to the user routines registered
2289 as part of a generalized request. They handle any language binding issues
2290 that are necessary. They are used when completing, freeing, cancelling or
2291 extracting the status from a generalized request. */
2292 int MPIR_Grequest_cancel(MPID_Request * request_ptr, int complete);
2293 int MPIR_Grequest_query(MPID_Request * request_ptr);
2294 int MPIR_Grequest_free(MPID_Request * request_ptr);
2295
2296 /* this routine was added to support our extension relaxing the progress rules
2297 * for generalized requests */
2298 int MPIR_Grequest_progress_poke(int count, MPID_Request **request_ptrs,
2299 MPI_Status array_of_statuses[] );
2300 int MPIR_Grequest_waitall(int count, MPID_Request * const * request_ptrs);
2301
2302 /* ------------------------------------------------------------------------- */
2303 /* Prototypes for language-specific routines, such as routines to set
2304 Fortran keyval attributes */
2305 #ifdef HAVE_FORTRAN_BINDING
2306 #include "mpi_f77interface.h"
2307 #endif
2308
2309 /* ADI Bindings */
2310 /*@
2311 MPID_Init - Initialize the device
2312
2313 Input Parameters:
2314 + argc_p - Pointer to the argument count
2315 . argv_p - Pointer to the argument list
2316 - requested - Requested level of thread support. Values are the same as
2317 for the 'required' argument to 'MPI_Init_thread', except that we define
2318 an enum for these values.
2319
2320 Output Parameters:
2321 + provided - Provided level of thread support. May be less than the
2322 requested level of support.
2323 . has_args - Set to true if 'argc_p' and 'argv_p' contain the command
2324 line arguments. See below.
2325 - has_env - Set to true if the environment of the process has been
2326 set as the user expects. See below.
2327
2328 Return value:
2329 Returns 'MPI_SUCCESS' on success and an MPI error code on failure. Failure
2330 can happen when, for example, the device is unable to start or contact the
2331 number of processes specified by the 'mpiexec' command.
2332
2333 Notes:
2334 Null arguments for 'argc_p' and 'argv_p' `must` be valid (see MPI-2, section
2335 4.2)
2336
2337 Multi-method devices should initialize each method within this call.
2338 They can use environment variables and/or command-line arguments
2339 to decide which methods to initialize (but note that they must not
2340 `depend` on using command-line arguments).
2341
2342 This call also initializes all MPID data needed by the device. This
2343 includes the 'MPID_Request's and any other data structures used by
2344 the device.
2345
2346 The arguments 'has_args' and 'has_env' indicate whether the process was
2347 started with command-line arguments or environment variables. In some
2348 cases, only the root process is started with these values; in others,
2349 the startup environment ensures that each process receives the
2350 command-line arguments and environment variables that the user expects.
2351 While the MPI standard makes no requirements that command line arguments or
2352 environment variables are provided to all processes, most users expect a
2353 common environment. These variables allow an MPI implementation (that is
2354 based on ADI-3) to provide both of these by making use of MPI communication
2355 after 'MPID_Init' is called but before 'MPI_Init' returns to the user, if
2356 the process management environment does not provide this service.
2357
2358
2359 This routine is used to implement both 'MPI_Init' and 'MPI_Init_thread'.
2360
2361 Setting the environment requires a 'setenv' function. Some
2362 systems may not have this. In that case, the documentation must make
2363 clear that the environment may not be propagated to the generated processes.
2364
2365 Module:
2366 MPID_CORE
2367
2368 Questions:
2369
2370 The values for 'has_args' and 'has_env' are boolean.
2371 They could be more specific. For
2372 example, the value could indicate the rank in 'MPI_COMM_WORLD' of a
2373 process that has the values; the value 'MPI_ANY_SOURCE' (or a '-1') could
2374 indicate that the value is available on all processes (including this one).
2375 We may want this since otherwise the processes may need to determine whether
2376 any process needs the command line. Another option would be to use positive
2377 values in the same way that the 'color' argument is used in 'MPI_Comm_split';
2378 a negative value indicates the member of the processes with that color that
2379 has the values of the command line arguments (or environment). This allows
2380 for non-SPMD programs.
2381
2382 Do we require that the startup environment (e.g., whatever 'mpiexec' is
2383 using to start processes) is responsible for delivering
2384 the command line arguments and environment variables that the user expects?
2385 That is, if the user is running an SPMD program, and expects each process
2386 to get the same command line argument, who is responsible for this?
2387 The 'has_args' and 'has_env' values are intended to allow the ADI to
2388 handle this while taking advantage of any support that the process
2389 manager framework may provide.
2390
2391 Alternately, how do we find out from the process management environment
2392 whether it took care of the environment or the command line arguments?
2393 Do we need a 'PMI_Env_query' function that can answer these questions
2394 dynamically (in case a different process manager is used through the same
2395 interface)?
2396
2397 Can we fix the Fortran command-line arguments? That is, can we arrange for
2398 'iargc' and 'getarg' (and the POSIX equivalents) to return the correct
2399 values? See, for example, the Absoft implementations of 'getarg'.
2400 We could also contact PGI about the Portland Group compilers, and of
2401 course the 'g77' source code is available.
2402 Does each process have the same values for the environment variables
2403 when this routine returns?
2404
2405 If we don''t require that all processes get the same argument list,
2406 we need to find out if they did anyway so that 'MPI_Init_thread' can
2407 fixup the list for the user. This argues for another return value that
2408 flags how much of the environment the 'MPID_Init' routine set up
2409 so that the 'MPI_Init_thread' call can provide the rest. The reason
2410 for this is that, even though the MPI standard does not require it,
2411 a user-friendly implementation should, in the SPMD mode, give each
2412 process the same environment and argument lists unless the user
2413 explicitly directed otherwise.
2414
2415 How does this interface to PMI? Do we need to know anything? Should
2416 this call have an info argument to support PMI?
2417
2418 The following questions involve how environment variables and command
2419 line arguments are used to control the behavior of the implementation.
2420 Many of these values must be determined at the time that 'MPID_Init'
2421 is called. These all should be considered in the context of the
2422 parameter routines described in the MPICH2 Design Document.
2423
2424 Are there recommended environment variable names? For example, in ADI-2,
2425 there are many debugging options that are part of the common device.
2426 In MPI-2, we can''t require command line arguments, so any such options
2427 must also have environment variables. E.g., 'MPICH_ADI_DEBUG' or
2428 'MPICH_ADI_DB'.
2429
2430 Names that are explicitly prohibited? For example, do we want to
2431 reserve any names that 'MPI_Init_thread' (as opposed to 'MPID_Init')
2432 might use?
2433
2434 How does information on command-line arguments and environment variables
2435 recognized by the device get added to the documentation?
2436
2437 What about control for other impact on the environment? For example,
2438 what signals should the device catch (e.g., 'SIGFPE'? 'SIGTRAP'?)?
2439 Which of these should be optional (e.g., ignore or leave signal alone)
2440 or selectable (e.g., port to listen on)? For example, catching 'SIGTRAP'
2441 causes problems for 'gdb', so we''d like to be able to leave 'SIGTRAP'
2442 unchanged in some cases.
2443
2444 Another environment variable should control whether fault-tolerance is
2445 desired. If fault-tolerance is selected, then some collective operations
2446 will need to use different algorithms and most fatal errors detected by the
2447 MPI implementation should abort only the affected process, not all processes.
2448 @*/
2449 int MPID_Init( int *argc_p, char ***argv_p, int requested,
2450 int *provided, int *has_args, int *has_env );
2451
2452 /* was:
2453 int MPID_Init( int *argc_p, char ***argv_p,
2454 int requested, int *provided,
2455 MPID_Comm **parent_comm, int *has_args, int *has_env ); */
2456
2457 /*@
2458 MPID_InitCompleted - Notify the device that the MPI_Init or MPI_Initthread
2459 call has completed setting up MPI
2460
2461 Notes:
2462 This call allows the device to complete any setup that it wishes to perform
2463 and for which it needs to access any of the structures (such as 'MPIR_Process')
2464 that are initialized after 'MPID_Init' is called. If the device does not need
2465 any extra operations, then it may provide either an empty function or even
2466 define this as a macro with the value 'MPI_SUCCESS'.
2467 @*/
2468 int MPID_InitCompleted( void );
2469
2470 /*@
2471 MPID_Finalize - Perform the device-specific termination of an MPI job
2472
2473 Return Value:
2474 'MPI_SUCCESS' or a valid MPI error code. Normally, this routine will
2475 return 'MPI_SUCCESS'. Only in extrordinary circumstances can this
2476 routine fail; for example, if some process stops responding during the
2477 finalize step. In this case, 'MPID_Finalize' should return an MPI
2478 error code indicating the reason that it failed.
2479
2480 Notes:
2481
2482 Module:
2483 MPID_CORE
2484
2485 Questions:
2486 Need to check the MPI-2 requirements on 'MPI_Finalize' with respect to
2487 things like which process must remain after 'MPID_Finalize' is called.
2488 @*/
2489 int MPID_Finalize(void);
2490 /*@
2491 MPID_Abort - Abort at least the processes in the specified communicator.
2492
2493 Input Parameters:
2494 + comm - Communicator of processes to abort
2495 . mpi_errno - MPI error code containing the reason for the abort
2496 . exit_code - Exit code to return to the calling environment. See notes.
2497 - error_msg - Optional error message
2498
2499 Return value:
2500 'MPI_SUCCESS' or an MPI error code. Normally, this routine should not
2501 return, since the calling process must be a member of the communicator.
2502 However, under some circumstances, the 'MPID_Abort' might fail; in this
2503 case, returning an error indication is appropriate.
2504
2505 Notes:
2506
2507 In a fault-tolerant MPI implementation, this operation should abort `only`
2508 the processes in the specified communicator. Any communicator that shares
2509 processes with the aborted communicator becomes invalid. For more
2510 details, see (paper not yet written on fault-tolerant MPI).
2511
2512 In particular, if the communicator is 'MPI_COMM_SELF', only the calling
2513 process should be aborted.
2514
2515 The 'exit_code' is the exit code that this particular process will
2516 attempt to provide to the 'mpiexec' or other program invocation
2517 environment. See 'mpiexec' for a discussion of how exit codes from
2518 many processes may be combined.
2519
2520 If the error_msg field is non-NULL this string will be used as the message
2521 with the abort output. Otherwise, the output message will be base on the
2522 error message associated with the mpi_errno.
2523
2524 An external agent that is aborting processes can invoke this with either
2525 'MPI_COMM_WORLD' or 'MPI_COMM_SELF'. For example, if the process manager
2526 wishes to abort a group of processes, it should cause 'MPID_Abort' to
2527 be invoked with 'MPI_COMM_SELF' on each process in the group.
2528
2529 Question:
2530 An alternative design is to provide an 'MPID_Group' instead of a
2531 communicator. This would allow a process manager to ask the ADI
2532 to kill an entire group of processes without needing a communicator.
2533 However, the implementation of 'MPID_Abort' will either do this by
2534 communicating with other processes or by requesting the process manager
2535 to kill the processes. That brings up this question: should
2536 'MPID_Abort' use 'PMI' to kill processes? Should it be required to
2537 notify the process manager? What about persistent resources (such
2538 as SYSV segments or forked processes)?
2539
2540 This suggests that for any persistent resource, an exit handler be
2541 defined. These would be executed by 'MPID_Abort' or 'MPID_Finalize'.
2542 See the implementation of 'MPI_Finalize' for an example of exit callbacks.
2543 In addition, code that registered persistent resources could use persistent
2544 storage (i.e., a file) to record that information, allowing cleanup
2545 utilities (such as 'mpiexec') to remove any resources left after the
2546 process exits.
2547
2548 'MPI_Finalize' requires that attributes on 'MPI_COMM_SELF' be deleted
2549 before anything else happens; this allows libraries to attach end-of-job
2550 actions to 'MPI_Finalize'. It is valuable to have a similar
2551 capability on 'MPI_Abort', with the caveat that 'MPI_Abort' may not
2552 guarantee that the run-on-abort routines were called. This provides a
2553 consistent way for the MPICH implementation to handle freeing any
2554 persistent resources. However, such callbacks must be limited since
2555 communication may not be possible once 'MPI_Abort' is called. Further,
2556 any callbacks must guarantee that they have finite termination.
2557
2558 One possible extension would be to allow `users` to add actions to be
2559 run when 'MPI_Abort' is called, perhaps through a special attribute value
2560 applied to 'MPI_COMM_SELF'. Note that is is incorrect to call the delete
2561 functions for the normal attributes on 'MPI_COMM_SELF' because MPI
2562 only specifies that those are run on 'MPI_Finalize' (i.e., normal
2563 termination).
2564
2565 Module:
2566 MPID_CORE
2567 @*/
2568
2569 /* FIXME: the 4th argument isn't part of the original design and isn't documented */
2570
2571 # if 0
2572 int MPID_Abort( MPID_Comm *comm, int mpi_errno, int exit_code, const char *error_msg );
2573 #endif
2574 /* FIXME: Should we turn off this flag and only declare MPID_Abort in mpiutil.h? */
2575 /* We want to also declare MPID_Abort in mpiutil.h if mpiimpl.h is not used */
2576 #define HAS_MPID_ABORT_DECL
2577
2578 int MPID_Open_port(MPID_Info *, char *);
2579 int MPID_Close_port(const char *);
2580
2581 /*@
2582 MPID_Comm_accept - MPID entry point for MPI_Comm_accept
2583
2584 Input Parameters:
2585 + port_name - port name
2586 . info - info
2587 . root - root
2588 - comm - communicator
2589
2590 Output Parameters:
2591 . MPI_Comm *newcomm - new communicator
2592
2593 Return Value:
2594 'MPI_SUCCESS' or a valid MPI error code.
2595 @*/
2596 int MPID_Comm_accept(const char *, MPID_Info *, int, MPID_Comm *, MPID_Comm **);
2597
2598 /*@
2599 MPID_Comm_connect - MPID entry point for MPI_Comm_connect
2600
2601 Input Parameters:
2602 + port_name - port name
2603 . info - info
2604 . root - root
2605 - comm - communicator
2606
2607 Output Parameters:
2608 . newcomm_ptr - new intercommunicator
2609
2610 Return Value:
2611 'MPI_SUCCESS' or a valid MPI error code.
2612 @*/
2613 int MPID_Comm_connect(const char *, MPID_Info *, int, MPID_Comm *, MPID_Comm **);
2614
2615 int MPID_Comm_disconnect(MPID_Comm *);
2616
2617 int MPID_Comm_spawn_multiple(int, char *[], char **[], const int [], MPID_Info* [],
2618 int, MPID_Comm *, MPID_Comm **, int []);
2619
2620 /*@
2621 MPID_Comm_group_failed - MPID entry point for MPI_Comm_group_failed
2622
2623 Input Parameters:
2624 . comm - communicator
2625
2626 Output Parameters
2627 . failed_group_ptr - group of failed processes
2628
2629 Return Value:
2630 'MPI_SUCCESS' or a valid MPI error code.
2631 @*/
2632 int MPID_Comm_group_failed(MPID_Comm *comm, MPID_Group **failed_group_ptr);
2633
2634 /*@
2635 MPID_Comm_remote_group_failed - MPID entry point for MPI_Comm_remote_group_failed
2636
2637 Input Parameters:
2638 . comm - intercommunicator
2639
2640 Output Parameters
2641 . failed_group_ptr - group of failed processes in comm's remote group
2642
2643 Return Value:
2644 'MPI_SUCCESS' or a valid MPI error code.
2645 @*/
2646 int MPID_Comm_remote_group_failed(MPID_Comm *comm, MPID_Group **failed_group_ptr);
2647
2648 /*@
2649 MPID_Comm_reenable_anysource - MPID entry point for MPI_Comm_reenable_anysource
2650
2651 Input Parameters:
2652 . comm - communicator
2653
2654 Output Parameters
2655 . failed_group_ptr - group of failed processes
2656
2657 Return Value:
2658 'MPI_SUCCESS' or a valid MPI error code.
2659 @*/
2660 int MPID_Comm_reenable_anysource(MPID_Comm *comm, MPID_Group **failed_group_ptr);
2661
2662 /*@
2663 MPID_Send - MPID entry point for MPI_Send
2664
2665 Notes:
2666 The only difference between this and 'MPI_Send' is that the basic
2667 error checks (e.g., valid communicator, datatype, dest, and tag)
2668 have been made, the MPI opaque objects have been replaced by
2669 MPID objects, a context id offset is provided in addition to the
2670 communicator, and a request may be returned. The context offset is
2671 added to the context of the communicator
2672 to get the context it used by the message.
2673 A request is returned only if the ADI implementation was unable to
2674 complete the send of the message. In that case, the usual 'MPI_Wait'
2675 logic should be used to complete the request. This approach is used to
2676 allow a simple implementation of the ADI. The ADI is free to always
2677 complete the message and never return a request.
2678
2679 Module:
2680 Communication
2681
2682 @*/
2683 int MPID_Send( const void *buf, int count, MPI_Datatype datatype,
2684 int dest, int tag, MPID_Comm *comm, int context_offset,
2685 MPID_Request **request );
2686
2687 /*@
2688 MPID_Rsend - MPID entry point for MPI_Rsend
2689
2690 Notes:
2691 The only difference between this and 'MPI_Rsend' is that the basic
2692 error checks (e.g., valid communicator, datatype, dest, and tag)
2693 have been made, the MPI opaque objects have been replaced by
2694 MPID objects, a context id offset is provided in addition to the
2695 communicator, and a request may be returned. The context offset is
2696 added to the context of the communicator
2697 to get the context it used by the message.
2698 A request is returned only if the ADI implementation was unable to
2699 complete the send of the message. In that case, the usual 'MPI_Wait'
2700 logic should be used to complete the request. This approach is used to
2701 allow a simple implementation of the ADI. The ADI is free to always
2702 complete the message and never return a request.
2703
2704 Module:
2705 Communication
2706
2707 @*/
2708 int MPID_Rsend( const void *buf, int count, MPI_Datatype datatype,
2709 int dest, int tag, MPID_Comm *comm, int context_offset,
2710 MPID_Request **request );
2711
2712 /*@
2713 MPID_Ssend - MPID entry point for MPI_Ssend
2714
2715 Notes:
2716 The only difference between this and 'MPI_Ssend' is that the basic
2717 error checks (e.g., valid communicator, datatype, dest, and tag)
2718 have been made, the MPI opaque objects have been replaced by
2719 MPID objects, a context id offset is provided in addition to the
2720 communicator, and a request may be returned. The context offset is
2721 added to the context of the communicator
2722 to get the context it used by the message.
2723 A request is returned only if the ADI implementation was unable to
2724 complete the send of the message. In that case, the usual 'MPI_Wait'
2725 logic should be used to complete the request. This approach is used to
2726 allow a simple implementation of the ADI. The ADI is free to always
2727 complete the message and never return a request.
2728
2729 Module:
2730 Communication
2731
2732 @*/
2733 int MPID_Ssend( const void *buf, int count, MPI_Datatype datatype,
2734 int dest, int tag, MPID_Comm *comm, int context_offset,
2735 MPID_Request **request );
2736
2737 /*@
2738 MPID_tBsend - Attempt a send and return if it would block
2739
2740 Notes:
2741 This has the semantics of 'MPI_Bsend', except that it returns the internal
2742 error code 'MPID_WOULD_BLOCK' if the message can''t be sent immediately
2743 (t is for "try").
2744
2745 The reason that this interface is chosen over a query to check whether
2746 a message `can` be sent is that the query approach is not
2747 thread-safe. Since the decision on whether a message can be sent
2748 without blocking depends (among other things) on the state of flow
2749 control managed by the device, this approach also gives the device
2750 the greatest freedom in implementing flow control. In particular,
2751 if another MPI process can change the flow control parameters, then
2752 even in a single-threaded implementation, it would not be safe to
2753 return, for example, a message size that could be sent with 'MPI_Bsend'.
2754
2755 This routine allows an MPI implementation to optimize 'MPI_Bsend'
2756 for the case when the message can be delivered without blocking the
2757 calling process. An ADI implementation is free to have this routine
2758 always return 'MPID_WOULD_BLOCK', but is encouraged not to.
2759
2760 To allow the MPI implementation to avoid trying this routine when it
2761 is not implemented by the ADI, the C preprocessor constant 'MPID_HAS_TBSEND'
2762 should be defined if this routine has a nontrivial implementation.
2763
2764 This is an optional routine. The MPI code for 'MPI_Bsend' will attempt
2765 to call this routine only if the device defines 'MPID_HAS_TBSEND'.
2766
2767 Module:
2768 Communication
2769 @*/
2770 int MPID_tBsend( const void *buf, int count, MPI_Datatype datatype,
2771 int dest, int tag, MPID_Comm *comm, int context_offset );
2772
2773 /*@
2774 MPID_Isend - MPID entry point for MPI_Isend
2775
2776 Notes:
2777 The only difference between this and 'MPI_Isend' is that the basic
2778 error checks (e.g., valid communicator, datatype, dest, and tag)
2779 have been made, the MPI opaque objects have been replaced by
2780 MPID objects, and a context id offset is provided in addition to the
2781 communicator. This offset is added to the context of the communicator
2782 to get the context it used by the message.
2783
2784 Module:
2785 Communication
2786
2787 @*/
2788 int MPID_Isend( const void *buf, int count, MPI_Datatype datatype,
2789 int dest, int tag, MPID_Comm *comm, int context_offset,
2790 MPID_Request **request );
2791
2792 /*@
2793 MPID_Irsend - MPID entry point for MPI_Irsend
2794
2795 Notes:
2796 The only difference between this and 'MPI_Irsend' is that the basic
2797 error checks (e.g., valid communicator, datatype, dest, and tag)
2798 have been made, the MPI opaque objects have been replaced by
2799 MPID objects, and a context id offset is provided in addition to the
2800 communicator. This offset is added to the context of the communicator
2801 to get the context it used by the message.
2802
2803 Module:
2804 Communication
2805
2806 @*/
2807 int MPID_Irsend( const void *buf, int count, MPI_Datatype datatype,
2808 int dest, int tag, MPID_Comm *comm, int context_offset,
2809 MPID_Request **request );
2810
2811 /*@
2812 MPID_Issend - MPID entry point for MPI_Issend
2813
2814 Notes:
2815 The only difference between this and 'MPI_Issend' is that the basic
2816 error checks (e.g., valid communicator, datatype, dest, and tag)
2817 have been made, the MPI opaque objects have been replaced by
2818 MPID objects, and a context id offset is provided in addition to the
2819 communicator. This offset is added to the context of the communicator
2820 to get the context it used by the message.
2821
2822 Module:
2823 Communication
2824
2825 @*/
2826 int MPID_Issend( const void *buf, int count, MPI_Datatype datatype,
2827 int dest, int tag, MPID_Comm *comm, int context_offset,
2828 MPID_Request **request );
2829
2830 /*@
2831 MPID_Recv - MPID entry point for MPI_Recv
2832
2833 Notes:
2834 The only difference between this and 'MPI_Recv' is that the basic
2835 error checks (e.g., valid communicator, datatype, source, and tag)
2836 have been made, the MPI opaque objects have been replaced by
2837 MPID objects, a context id offset is provided in addition to the
2838 communicator, and a request may be returned. The context offset is added
2839 to the context of the communicator to get the context it used by the message.
2840 As in 'MPID_Send', the request is returned only if the operation did not
2841 complete. Conversely, the status object is populated with valid information
2842 only if the operation completed.
2843
2844 Module:
2845 Communication
2846
2847 @*/
2848 int MPID_Recv( void *buf, int count, MPI_Datatype datatype,
2849 int source, int tag, MPID_Comm *comm, int context_offset,
2850 MPI_Status *status, MPID_Request **request );
2851
2852
2853 /*@
2854 MPID_Irecv - MPID entry point for MPI_Irecv
2855
2856 Notes:
2857 The only difference between this and 'MPI_Irecv' is that the basic
2858 error checks (e.g., valid communicator, datatype, source, and tag)
2859 have been made, the MPI opaque objects have been replaced by
2860 MPID objects, and a context id offset is provided in addition to the
2861 communicator. This offset is added to the context of the communicator
2862 to get the context it used by the message.
2863
2864 Module:
2865 Communication
2866
2867 @*/
2868 int MPID_Irecv( void *buf, int count, MPI_Datatype datatype,
2869 int source, int tag, MPID_Comm *comm, int context_offset,
2870 MPID_Request **request );
2871
2872 /*@
2873 MPID_Send_init - MPID entry point for MPI_Send_init
2874
2875 Notes:
2876 The only difference between this and 'MPI_Send_init' is that the basic
2877 error checks (e.g., valid communicator, datatype, dest, and tag)
2878 have been made, the MPI opaque objects have been replaced by
2879 MPID objects, and a context id offset is provided in addition to the
2880 communicator. This offset is added to the context of the communicator
2881 to get the context it used by the message.
2882
2883 Module:
2884 Communication
2885
2886 @*/
2887 int MPID_Send_init( const void *buf, int count, MPI_Datatype datatype,
2888 int dest, int tag, MPID_Comm *comm, int context_offset,
2889 MPID_Request **request );
2890
2891 int MPID_Bsend_init(const void *, int, MPI_Datatype, int, int, MPID_Comm *,
2892 int, MPID_Request **);
2893 /*@
2894 MPID_Rsend_init - MPID entry point for MPI_Rsend_init
2895
2896 Notes:
2897 The only difference between this and 'MPI_Rsend_init' is that the basic
2898 error checks (e.g., valid communicator, datatype, dest, and tag)
2899 have been made, the MPI opaque objects have been replaced by
2900 MPID objects, and a context id offset is provided in addition to the
2901 communicator. This offset is added to the context of the communicator
2902 to get the context it used by the message.
2903
2904 Module:
2905 Communication
2906
2907 @*/
2908 int MPID_Rsend_init( const void *buf, int count, MPI_Datatype datatype,
2909 int dest, int tag, MPID_Comm *comm, int context_offset,
2910 MPID_Request **request );
2911 /*@
2912 MPID_Ssend_init - MPID entry point for MPI_Ssend_init
2913
2914 Notes:
2915 The only difference between this and 'MPI_Ssend_init' is that the basic
2916 error checks (e.g., valid communicator, datatype, dest, and tag)
2917 have been made, the MPI opaque objects have been replaced by
2918 MPID objects, and a context id offset is provided in addition to the
2919 communicator. This offset is added to the context of the communicator
2920 to get the context it used by the message.
2921
2922 Module:
2923 Communication
2924
2925 @*/
2926 int MPID_Ssend_init( const void *buf, int count, MPI_Datatype datatype,
2927 int dest, int tag, MPID_Comm *comm, int context_offset,
2928 MPID_Request **request );
2929
2930 /*@
2931 MPID_Recv_init - MPID entry point for MPI_Recv_init
2932
2933 Notes:
2934 The only difference between this and 'MPI_Recv_init' is that the basic
2935 error checks (e.g., valid communicator, datatype, source, and tag)
2936 have been made, the MPI opaque objects have been replaced by
2937 MPID objects, and a context id offset is provided in addition to the
2938 communicator. This offset is added to the context of the communicator
2939 to get the context it used by the message.
2940
2941 Module:
2942 Communication
2943
2944 @*/
2945 int MPID_Recv_init( void *buf, int count, MPI_Datatype datatype,
2946 int source, int tag, MPID_Comm *comm, int context_offset,
2947 MPID_Request **request );
2948
2949 /*@
2950 MPID_Startall - MPID entry point for MPI_Startall
2951
2952 Notes:
2953 The only difference between this and 'MPI_Startall' is that the basic
2954 error checks (e.g., count) have been made, and the MPI opaque objects
2955 have been replaced by pointers to MPID objects.
2956
2957 Rationale:
2958 This allows the device to schedule communication involving multiple requests,
2959 whereas an implementation built on just 'MPID_Start' would force the
2960 ADI to initiate the communication in the order encountered.
2961 @*/
2962 int MPID_Startall(int count, MPID_Request *requests[]);
2963
2964 /*@
2965 MPID_Probe - Block until a matching request is found and return information
2966 about it
2967
2968 Input Parameters:
2969 + source - rank to match (or 'MPI_ANY_SOURCE')
2970 . tag - Tag to match (or 'MPI_ANY_TAG')
2971 . comm - communicator to match.
2972 - context_offset - context id offset of communicator to match
2973
2974 Output Parameter:
2975 . status - 'MPI_Status' set as defined by 'MPI_Probe'
2976
2977
2978 Return Value:
2979 Error code.
2980
2981 Notes:
2982 Note that the values returned in 'status' will be valid for a subsequent
2983 MPI receive operation only if no other thread attempts to receive the same
2984 message.
2985 (See the
2986 discussion of probe in Section 8.7.2 Clarifications of the MPI-2 standard.)
2987
2988 Providing the 'context_offset' is necessary at this level to support the
2989 way in which the MPICH implementation uses context ids in the implementation
2990 of other operations. The communicator is present to allow the device
2991 to use message-queues attached to particular communicators or connections
2992 between processes.
2993
2994 Module:
2995 Request
2996
2997 @*/
2998 int MPID_Probe(int, int, MPID_Comm *, int, MPI_Status *);
2999 /*@
3000 MPID_Iprobe - Look for a matching request in the receive queue
3001 but do not remove or return it
3002
3003 Input Parameters:
3004 + source - rank to match (or 'MPI_ANY_SOURCE')
3005 . tag - Tag to match (or 'MPI_ANY_TAG')
3006 . comm - communicator to match.
3007 - context_offset - context id offset of communicator to match
3008
3009 Output Parameter:
3010 + flag - true if a matching request was found, false otherwise.
3011 - status - 'MPI_Status' set as defined by 'MPI_Iprobe' (only valid when return
3012 flag is true).
3013
3014 Return Value:
3015 Error Code.
3016
3017 Notes:
3018 Note that the values returned in 'status' will be valid for a subsequent
3019 MPI receive operation only if no other thread attempts to receive the same
3020 message.
3021 (See the
3022 discussion of probe in Section 8.7.2 (Clarifications) of the MPI-2 standard.)
3023
3024 Providing the 'context_offset' is necessary at this level to support the
3025 way in which the MPICH implementation uses context ids in the implementation
3026 of other operations. The communicator is present to allow the device
3027 to use message-queues attached to particular communicators or connections
3028 between processes.
3029
3030 Devices that rely solely on polling to make progress should call
3031 MPID_Progress_poke() (or some equivalent function) if a matching request
3032 could not be found. This insures that progress continues to be made even if
3033 the application is calling MPI_Iprobe() from within a loop not containing
3034 calls to any other MPI functions.
3035
3036 Module:
3037 Request
3038
3039 @*/
3040 int MPID_Iprobe(int, int, MPID_Comm *, int, int *, MPI_Status *);
3041
3042 /*@
3043 MPID_Mprobe - Block until a matching request is found and return information
3044 about it, including a message handle for later reception.
3045
3046 Input Parameters:
3047 + source - rank to match (or 'MPI_ANY_SOURCE')
3048 . tag - Tag to match (or 'MPI_ANY_TAG')
3049 . comm - communicator to match.
3050 - context_offset - context id offset of communicator to match
3051
3052 Output Parameter:
3053 + message - 'MPID_Request' (logically a message) set as defined by 'MPI_Mprobe'
3054 - status - 'MPI_Status' set as defined by 'MPI_Mprobe'
3055
3056 Return Value:
3057 Error code.
3058
3059 Providing the 'context_offset' is necessary at this level to support the
3060 way in which the MPICH implementation uses context ids in the implementation
3061 of other operations. The communicator is present to allow the device
3062 to use message-queues attached to particular communicators or connections
3063 between processes.
3064
3065 Module:
3066 Request
3067
3068 @*/
3069 int MPID_Mprobe(int source, int tag, MPID_Comm *comm, int context_offset,
3070 MPID_Request **message, MPI_Status *status);
3071
3072 /*@
3073 MPID_Improbe - Look for a matching request in the receive queue and return
3074 information about it, including a message handle for later reception.
3075
3076 Input Parameters:
3077 + source - rank to match (or 'MPI_ANY_SOURCE')
3078 . tag - Tag to match (or 'MPI_ANY_TAG')
3079 . comm - communicator to match.
3080 - context_offset - context id offset of communicator to match
3081
3082 Output Parameter:
3083 + flag - 'flag' set as defined by 'MPI_Improbe'
3084 . message - 'MPID_Request' (logically a message) set as defined by 'MPI_Improbe'
3085 - status - 'MPI_Status' set as defined by 'MPI_Improbe'
3086
3087 Return Value:
3088 Error code.
3089
3090 Providing the 'context_offset' is necessary at this level to support the
3091 way in which the MPICH implementation uses context ids in the implementation
3092 of other operations. The communicator is present to allow the device
3093 to use message-queues attached to particular communicators or connections
3094 between processes.
3095
3096 Module:
3097 Request
3098
3099 @*/
3100 int MPID_Improbe(int source, int tag, MPID_Comm *comm, int context_offset,
3101 int *flag, MPID_Request **message, MPI_Status *status);
3102
3103 /*@
3104 MPID_Imrecv - Begin receiving the message indicated by the given message
3105 handle and return a request object for later completion.
3106
3107 Input Parameters:
3108 + count - number of elements to receive
3109 . datatype - datatype of each recv buffer element
3110 - message - 'MPID_Request' (logically a message) set as defined by 'MPI_Mprobe'
3111
3112 Output Parameter:
3113 + buf - receive buffer
3114 - request - request object for completing the recv
3115
3116 Return Value:
3117 Error code.
3118
3119 Module:
3120 Request
3121
3122 NOTE: under most implementations the request object returned will
3123 probably be some modified version of the "message" object passed in.
3124
3125 @*/
3126 int MPID_Imrecv(void *buf, int count, MPI_Datatype datatype,
3127 MPID_Request *message, MPID_Request **rreqp);
3128
3129 /*@
3130 MPID_Mrecv - Receive the message indicated by the given message handle.
3131
3132 Input Parameters:
3133 + count - number of elements to receive
3134 . datatype - datatype of each recv buffer element
3135 - message - 'MPID_Request' (logically a message) set as defined by 'MPI_Mprobe'
3136
3137 Output Parameter:
3138 + buf - receive buffer
3139 - status - 'MPI_Status' set as defined by 'MPI_Mrecv'
3140
3141 Return Value:
3142 Error code.
3143
3144 Module:
3145 Request
3146
3147 NOTE: under most implementations the request object returned will
3148 probably be some modified version of the "message" object passed in.
3149
3150 @*/
3151 int MPID_Mrecv(void *buf, int count, MPI_Datatype datatype,
3152 MPID_Request *message, MPI_Status *status);
3153
3154 /*@
3155 MPID_Cancel_send - Cancel the indicated send request
3156
3157 Input Parameter:
3158 . request - Send request to cancel
3159
3160 Return Value:
3161 MPI error code.
3162
3163 Notes:
3164 Cancel is a tricky operation, particularly for sends. Read the
3165 discussion in the MPI-1 and MPI-2 documents carefully. This call
3166 only requests that the request be cancelled; a subsequent wait
3167 or test must first succeed (i.e., the request completion counter must be
3168 zeroed).
3169
3170 Module:
3171 Request
3172
3173 @*/
3174 int MPID_Cancel_send(MPID_Request *);
3175 /*@
3176 MPID_Cancel_recv - Cancel the indicated recv request
3177
3178 Input Parameter:
3179 . request - Receive request to cancel
3180
3181 Return Value:
3182 MPI error code.
3183
3184 Notes:
3185 This cancels a pending receive request. In many cases, this is implemented
3186 by simply removing the request from a pending receive request queue.
3187 However, some ADI implementations may maintain these queues in special
3188 places, such as within a NIC (Network Interface Card).
3189 This call only requests that the request be cancelled; a subsequent wait
3190 or test must first succeed (i.e., the request completion counter must be
3191 zeroed).
3192
3193 Module:
3194 Request
3195
3196 @*/
3197 int MPID_Cancel_recv(MPID_Request *);
3198
3199 /* MPI-2 RMA Routines */
3200
3201 int MPID_Win_create(void *, MPI_Aint, int, MPID_Info *, MPID_Comm *,
3202 MPID_Win **);
3203 int MPID_Win_free(MPID_Win **);
3204
3205 int MPID_Put(void *, int, MPI_Datatype, int, MPI_Aint, int,
3206 MPI_Datatype, MPID_Win *);
3207 int MPID_Get(void *, int, MPI_Datatype, int, MPI_Aint, int,
3208 MPI_Datatype, MPID_Win *);
3209 int MPID_Accumulate(void *, int, MPI_Datatype, int, MPI_Aint, int,
3210 MPI_Datatype, MPI_Op, MPID_Win *);
3211
3212 int MPID_Win_fence(int, MPID_Win *);
3213 int MPID_Win_post(MPID_Group *group_ptr, int assert, MPID_Win *win_ptr);
3214 int MPID_Win_start(MPID_Group *group_ptr, int assert, MPID_Win *win_ptr);
3215 int MPID_Win_test(MPID_Win *win_ptr, int *flag);
3216 int MPID_Win_wait(MPID_Win *win_ptr);
3217 int MPID_Win_complete(MPID_Win *win_ptr);
3218
3219 int MPID_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr);
3220 int MPID_Win_unlock(int dest, MPID_Win *win_ptr);
3221
3222 /* MPI-3 RMA Routines */
3223
3224 int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPID_Info *info,
3225 MPID_Comm *comm, void *baseptr, MPID_Win **win);
3226 int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPID_Info *info_ptr, MPID_Comm *comm_ptr,
3227 void **base_ptr, MPID_Win **win_ptr);
3228 int MPID_Win_shared_query(MPID_Win *win, int rank, MPI_Aint *size, int *disp_unit,
3229 void *baseptr);
3230 int MPID_Win_create_dynamic(MPID_Info *info, MPID_Comm *comm, MPID_Win **win);
3231 int MPID_Win_attach(MPID_Win *win, void *base, MPI_Aint size);
3232 int MPID_Win_detach(MPID_Win *win, const void *base);
3233
3234 int MPID_Get_accumulate(const void *origin_addr, int origin_count,
3235 MPI_Datatype origin_datatype, void *result_addr, int result_count,
3236 MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
3237 int target_count, MPI_Datatype target_datatype, MPI_Op op, MPID_Win *win);
3238 int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
3239 MPI_Datatype datatype, int target_rank, MPI_Aint target_disp,
3240 MPI_Op op, MPID_Win *win);
3241 int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
3242 void *result_addr, MPI_Datatype datatype, int target_rank,
3243 MPI_Aint target_disp, MPID_Win *win);
3244 int MPID_Rput(const void *origin_addr, int origin_count,
3245 MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
3246 int target_count, MPI_Datatype target_datatype, MPID_Win *win,
3247 MPID_Request **request);
3248 int MPID_Rget(void *origin_addr, int origin_count,
3249 MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
3250 int target_count, MPI_Datatype target_datatype, MPID_Win *win,
3251 MPID_Request **request);
3252 int MPID_Raccumulate(const void *origin_addr, int origin_count,
3253 MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
3254 int target_count, MPI_Datatype target_datatype, MPI_Op op, MPID_Win *win,
3255 MPID_Request **request);
3256 int MPID_Rget_accumulate(const void *origin_addr, int origin_count,
3257 MPI_Datatype origin_datatype, void *result_addr, int result_count,
3258 MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
3259 int target_count, MPI_Datatype target_datatype, MPI_Op op, MPID_Win *win,
3260 MPID_Request **request);
3261
3262 int MPID_Win_lock_all(int assert, MPID_Win *win);
3263 int MPID_Win_unlock_all(MPID_Win *win);
3264 int MPID_Win_flush(int rank, MPID_Win *win);
3265 int MPID_Win_flush_all(MPID_Win *win);
3266 int MPID_Win_flush_local(int rank, MPID_Win *win);
3267 int MPID_Win_flush_local_all(MPID_Win *win);
3268 int MPID_Win_sync(MPID_Win *win);
3269
3270
3271 /*@
3272 MPID_Progress_start - Begin a block of operations that check the completion
3273 counters in requests.
3274
3275 Input parameters:
3276 . state - pointer to a progress state variable
3277
3278 Notes:
3279 This routine is informs the progress engine that a block of code follows that
3280 will examine the completion counter of some 'MPID_Request' objects and then
3281 call 'MPID_Progress_wait' zero or more times followed by a call to
3282 'MPID_Progress_end'.
3283
3284 The progress state variable must be specific to the thread calling it. If at
3285 all possible, the state should be declared as an auto variable and thus
3286 allocated on the stack of the current thread. Thread specific storage could
3287 be used instead, but doing such would incur additional (and typically
3288 unnecessary) overhead.
3289
3290 This routine is needed to properly implement blocking tests when
3291 multithreaded progress engines are used. In a single-threaded implementation
3292 of the ADI, this may be defined as an empty macro.
3293
3294 Module:
3295 Communication
3296 @*/
3297 void MPID_Progress_start(MPID_Progress_state * state);
3298 /*@
3299 MPID_Progress_wait - Wait for some communication since 'MPID_Progress_start'
3300
3301 Input parameters:
3302 . state - pointer to the progress state initialized by MPID_Progress_start
3303
3304 Return value:
3305 An mpi error code.
3306
3307 Notes:
3308 This instructs the progress engine to wait until some communication event
3309 happens since 'MPID_Progress_start' was called. This call blocks the
3310 calling thread (only, not the process).
3311
3312 Module:
3313 Communication
3314 @*/
3315 int MPID_Progress_wait(MPID_Progress_state * state);
3316 /*@
3317 MPID_Progress_end - End a block of operations begun with 'MPID_Progress_start'
3318
3319 Input parameters:
3320 . state - pointer to the progress state variable passed to
3321 'MPID_Progress_start'
3322
3323 Notes:
3324 This routine instructs the progress engine to end the block begun with
3325 'MPID_Progress_start'. The progress engine is not required to check for any
3326 pending communication.
3327
3328 The purpose of this call is to release any locks initiated by
3329 'MPID_Progess_start' or 'MPID_Progess_wait'. In a single threaded ADI
3330 implementation, this may be defined as an empty macro.
3331
3332 Module:
3333 Communication
3334 @*/
3335 void MPID_Progress_end(MPID_Progress_state * stae);
3336 /*@
3337 MPID_Progress_test - Check for communication
3338
3339 Return value:
3340 An mpi error code.
3341
3342 Notes:
3343 Unlike 'MPID_Progress_wait', this routine is nonblocking. Therefore, it
3344 does not require the use of 'MPID_Progress_start' and 'MPID_Progress_end'.
3345
3346 Module:
3347 Communication
3348 @*/
3349 int MPID_Progress_test(void);
3350 /*@
3351 MPID_Progress_poke - Allow a progress engine to check for pending
3352 communication
3353
3354 Return value:
3355 An mpi error code.
3356
3357 Notes:
3358 This routine provides a way to invoke the progress engine in a polling
3359 implementation of the ADI. This routine must be nonblocking.
3360
3361 A multithreaded implementation is free to define this as an empty macro.
3362
3363 Module:
3364 Communication
3365 @*/
3366 int MPID_Progress_poke(void);
3367
3368 /*@
3369 MPID_Request_create - Create and return a bare request
3370
3371 Return value:
3372 A pointer to a new request object.
3373
3374 Notes:
3375 This routine is intended for use by 'MPI_Grequest_start' only. Note that
3376 once a request is created with this routine, any progress engine must assume
3377 that an outside function can complete a request with
3378 'MPID_Request_set_completed'.
3379
3380 The request object returned by this routine should be initialized such that
3381 ref_count is one and handle contains a valid handle referring to the object.
3382 @*/
3383 MPID_Request * MPID_Request_create(void);
3384 void MPID_Request_set_completed(MPID_Request *);
3385 /*@
3386 MPID_Request_release - Release a request
3387
3388 Input Parameter:
3389 . request - request to release
3390
3391 Notes:
3392 This routine is called to release a reference to request object. If
3393 the reference count of the request object has reached zero, the object will
3394 be deallocated.
3395
3396 Module:
3397 Request
3398 @*/
3399 void MPID_Request_release(MPID_Request *);
3400
3401 typedef struct MPID_Grequest_class {
3402 MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
3403 MPI_Grequest_query_function *query_fn;
3404 MPI_Grequest_free_function *free_fn;
3405 MPI_Grequest_cancel_function *cancel_fn;
3406 MPIX_Grequest_poll_function *poll_fn;
3407 MPIX_Grequest_wait_function *wait_fn;
3408 struct MPID_Grequest_class *next;
3409 } MPID_Grequest_class;
3410
3411
3412 /* types and other internal defintions that must be kept out of mpi.h */
3413
3414 /* forward decls */
3415 struct MPIR_T_pvar_info;
3416
3417 struct MPIR_T_enum {
3418 /* TODO replace this struct's contents with a real implementation once we
3419 * actually have/support an enum type */
3420 int dummy;
3421 };
3422 struct MPIR_T_cvar_handle {
3423 struct MPIR_Param_t *p;
3424 };
3425 struct MPIR_T_pvar_session {
3426 /* a utlist-managed list (see mpl_utlist.h) -- _must_ be initialized to NULL
3427 * at alloc time or the macros won't work */
3428 struct MPIR_T_pvar_handle *hlist;
3429
3430 /* TODO does anything else need to go in here at this stage? */
3431 };
3432
3433
3434 typedef int MPIR_T_pvar_handle_creator_fn(void *obj_handle,
3435 struct MPIR_T_pvar_handle *handle,
3436 int *countp);
3437 enum MPIR_T_pvar_impl_kind {
3438 /* generic read/write impl is fine, just deref pointer */
3439 MPIR_T_PVAR_IMPL_SIMPLE,
3440
3441 /* read/write are dispatched to callbacks to load/store variable values */
3442 MPIR_T_PVAR_IMPL_CB
3443 };
3444
3445 /* These are descriptors that lower level intialization code creates and feeds
3446 * into the overall MPIX_T_pvar_ system in order to permit the upper level code
3447 * to implement MPIX_T_pvar_{get_num,get_info,handle_alloc}. */
3448 struct MPIR_T_pvar_info {
3449 int idx; /* pvar index value for pvar_get_info and friends */
3450
3451 /* fields for get_info */
3452 char *name;
3453 enum MPIR_T_verbosity_t verbosity;
3454 enum MPIR_T_pvar_class_t varclass;
3455 MPI_Datatype dtype;
3456 struct MPIR_T_enum *etype;
3457 char *desc;
3458 enum MPIR_T_bind_t binding;
3459 int readonly;
3460 int continuous;
3461 int atomic;
3462
3463 /* fields for handle_alloc */
3464 enum MPIR_T_pvar_impl_kind impl_kind;
3465 void *var_state;
3466 MPIR_T_pvar_handle_creator_fn *create_fn;
3467 };
3468
3469 struct MPIR_T_pvar_handle {
3470 /* for linked list of handles attached to the pvar_session */
3471 struct MPIR_T_pvar_handle *next;
3472 struct MPIR_T_pvar_handle *prev;
3473
3474 struct MPIR_T_pvar_info *info;
3475 struct MPIR_T_pvar_session *session;
3476 int count;
3477
3478 int bytes; /* for _IMPL_SIMPLE */
3479
3480 /* for _IMPL_CB types this vtable prevents us from having to duplicate
3481 * multiple sets of function pointers in each handle at the expense of an
3482 * extra pointer indirection */
3483 struct MPIR_T_pvar_hnd_vtable *vtable;
3484 void *handle_state;
3485 int free_handle_state; /* boolean -- true iff the "handle_state" pointer
3486 * should be freed when this handle is freed */
3487 };
3488
3489 /* vtable structure for handle "objects". Implements all major handle operations */
3490 struct MPIR_T_pvar_hnd_vtable {
3491 int (*free)(struct MPIR_T_pvar_session *session, struct MPIR_T_pvar_handle *handle);
3492 int (*start)(struct MPIR_T_pvar_session *session, struct MPIR_T_pvar_handle *handle);
3493 int (*stop)(struct MPIR_T_pvar_session *session, struct MPIR_T_pvar_handle *handle);
3494 int (*read)(struct MPIR_T_pvar_session *session, struct MPIR_T_pvar_handle *handle, void *buf);
3495 int (*write)(struct MPIR_T_pvar_session *session, struct MPIR_T_pvar_handle *handle, void *buf);
3496 int (*reset)(struct MPIR_T_pvar_session *session, struct MPIR_T_pvar_handle *handle);
3497 int (*readreset)(struct MPIR_T_pvar_session *session, struct MPIR_T_pvar_handle *handle, void *buf);
3498 };
3499
3500 /* Called by lower-level initialization code to add pvars to the global list.
3501 * Will cause the value returned by MPIX_T_pvar_get_num to be incremented and
3502 * sets up that new index to work with get_info, handle_alloc, etc. */
3503 int MPIR_T_pvar_add(const char *name,
3504 enum MPIR_T_verbosity_t verbosity,
3505 enum MPIR_T_pvar_class_t varclass,
3506 MPI_Datatype dtype,
3507 struct MPIR_T_enum *enumtype,
3508 const char *desc,
3509 enum MPIR_T_bind_t bind,
3510 int readonly,
3511 int continuous,
3512 int atomic,
3513 enum MPIR_T_pvar_impl_kind impl_kind,
3514 void *var_state,
3515 MPIR_T_pvar_handle_creator_fn *create_fn,
3516 int *index_p);
3517
3518 int MPIR_T_get_num_pvars(int *num);
3519 int MPIR_T_get_pvar_info_by_idx(int idx, struct MPIR_T_pvar_info **info_p);
3520 int MPIR_T_finalize_pvars(void);
3521 void MPIU_Tool_strncpy(char *dst, const char *src, int *len);
3522
3523 /*TTopoOverview.tex
3524 *
3525 * The MPI collective and topology routines can benefit from information
3526 * about the topology of the underlying interconnect. Unfortunately, there
3527 * is no best form for the representation (the MPI-1 Forum tried to define
3528 * such a representation, but was unable to). One useful decomposition
3529 * that has been used in cluster enviroments is a hierarchical decomposition.
3530 *
3531 * The other obviously useful topology information would match the needs of
3532 * 'MPI_Cart_create'. However, it may be simpler to for the device to
3533 * implement this routine directly.
3534 *
3535 * Other useful information could be the topology information that matches
3536 * the needs of the collective operation, such as spanning trees and rings.
3537 * These may be added to ADI3 later.
3538 *
3539 * Question: Should we define a cart create function? Dims create?
3540 *
3541 * Usage:
3542 * This routine has nothing to do with the choice of communication method
3543 * that a implementation of the ADI may make. It is intended only to
3544 * communicate information on the heirarchy of processes, if any, to
3545 * the implementation of the collective communication routines. This routine
3546 * may also be useful for the MPI Graph topology functions.
3547 *
3548 T*/
3549
3550 /*@
3551 MPID_Topo_cluster_info - Return information on the hierarchy of
3552 interconnections
3553
3554 Input Parameter:
3555 . comm - Communicator to study. May be 'NULL', in which case 'MPI_COMM_WORLD'
3556 is the effective communicator.
3557
3558 Output Parameters:
3559 + levels - The number of levels in the hierarchy.
3560 To simplify the use of this routine, the maximum value is
3561 'MPID_TOPO_CLUSTER_MAX_LEVELS' (typically 8 or less).
3562 . my_cluster - For each level, the id of the cluster that the calling process
3563 belongs to.
3564 - my_rank - For each level, the rank of the calling process in its cluster
3565
3566 Notes:
3567 This routine returns a description of the system in terms of nested
3568 clusters of processes. Levels are numbered from zero. At each level,
3569 each process may belong to no more than cluster; if a process is in any
3570 cluster at level i, it must be in some cluster at level i-1.
3571
3572 The communicator argument allows this routine to be used in the dynamic
3573 process case (i.e., with communicators that are created after 'MPI_Init'
3574 and that involve processes that are not part of 'MPI_COMM_WORLD').
3575
3576 For non-hierarchical systems, this routine simply returns a single
3577 level containing all processes.
3578
3579 Sample Outputs:
3580 For a single, switch-connected cluster or a uniform-memory-access (UMA)
3581 symmetric multiprocessor (SMP), the return values could be
3582 .vb
3583 level my_cluster my_rank
3584 0 0 rank in comm_world
3585 .ve
3586 This is also a valid response for `any` device.
3587
3588 For a switch-connected cluster of 2 processor SMPs
3589 .vb
3590 level my_cluster my_rank
3591 0 0 rank in comm_world
3592 1 0 to p/2 0 or 1
3593 .ve
3594 where the value each process on the same SMP has the same value for
3595 'my_cluster[1]' and a different value for 'my_rank[1]'.
3596
3597 For two SMPs connected by a network,
3598 .vb
3599 level my_cluster my_rank
3600 0 0 rank in comm_world
3601 1 0 or 1 0 to # on SMP
3602 .ve
3603
3604 An example with more than 2 levels is a collection of clusters, each with
3605 SMP nodes.
3606
3607 Limitations:
3608 This approach does not provide a representations for topologies that
3609 are not hierarchical. For example, a mesh interconnect is a single-level
3610 cluster in this view.
3611
3612 Module:
3613 Topology
3614 @*/
3615 int MPID_Topo_cluster_info( MPID_Comm *comm,
3616 int *levels, int my_cluster[], int my_rank[] );
3617
3618 /*@
3619 MPID_Get_processor_name - Return the name of the current processor
3620
3621 Input Parameter:
3622 . namelen - Length of name
3623
3624 Output Parameters:
3625 + name - A unique specifier for the actual (as opposed to virtual) node. This
3626 must be an array of size at least 'MPI_MAX_PROCESSOR_NAME'.
3627 - resultlen - Length (in characters) of the name. If this pointer is null,
3628 this value is not set.
3629
3630 Notes:
3631 The name returned should identify a particular piece of hardware;
3632 the exact format is implementation defined. This name may or may not
3633 be the same as might be returned by 'gethostname', 'uname', or 'sysinfo'.
3634
3635 This routine is essentially an MPID version of 'MPI_Get_processor_name' .
3636 It must be part of the device because not all environments support calls
3637 to return the processor name. The additional argument (input name
3638 length) is used to provide better error checking and to ensure that
3639 the input buffer is large enough (rather than assuming that it is
3640 'MPI_MAX_PROCESSOR_NAME' long).
3641 @*/
3642 int MPID_Get_processor_name( char *name, int namelen, int *resultlen);
3643
3644 void MPID_Errhandler_free(MPID_Errhandler *errhan_ptr);
3645
3646 /*@
3647 MPID_Get_universe_size - Return the number of processes that the current
3648 process management environment can handle
3649
3650 Output Parameters:
3651 . universe_size - the universe size; MPIR_UNIVERSE_SIZE_NOT_AVAILABLE if the
3652 size cannot be determined
3653
3654 Return value:
3655 A MPI error code.
3656 @*/
3657 int MPID_Get_universe_size(int * universe_size);
3658
3659 #define MPIR_UNIVERSE_SIZE_NOT_SET -1
3660 #define MPIR_UNIVERSE_SIZE_NOT_AVAILABLE -2
3661
3662 /*
3663 * FIXME: VCs should not be exposed to the top layer, which implies that these routines should not be exposed either. Instead,
3664 * the creation, duplication and destruction of communicator objects should be communicated to the device, allowing the device to
3665 * manage the underlying connections in a way that is appropriate (and efficient).
3666 */
3667
3668 /*@
3669 MPID_VCRT_Create - Create a virtual connection reference table
3670 @*/
3671 int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr);
3672
3673 /*@
3674 MPID_VCRT_Add_ref - Add a reference to a VCRT
3675 @*/
3676 int MPID_VCRT_Add_ref(MPID_VCRT vcrt);
3677
3678 /*@
3679 MPID_VCRT_Release - Release a reference to a VCRT
3680
3681 Notes:
3682 The 'isDisconnect' argument allows this routine to handle the special
3683 case of 'MPI_Comm_disconnect', which needs to take special action
3684 if all references to a VC are removed.
3685 @*/
3686 int MPID_VCRT_Release(MPID_VCRT vcrt, int isDisconnect);
3687
3688 /*@
3689 MPID_VCRT_Get_ptr -
3690 @*/
3691 int MPID_VCRT_Get_ptr(MPID_VCRT vcrt, MPID_VCR **vc_pptr);
3692
3693 /*@
3694 MPID_VCR_Dup - Create a duplicate reference to a virtual connection
3695 @*/
3696 int MPID_VCR_Dup(MPID_VCR orig_vcr, MPID_VCR * new_vcr);
3697
3698 /*@
3699 MPID_VCR_Get_lpid - Get the local process id that corresponds to a
3700 virtual connection reference.
3701
3702 Notes:
3703 The local process ids are described elsewhere. Basically, they are
3704 a nonnegative number by which this process can refer to other processes
3705 to which it is connected. These are local process ids because different
3706 processes may use different ids to identify the same target process
3707 @*/
3708 int MPID_VCR_Get_lpid(MPID_VCR vcr, int * lpid_ptr);
3709
3710 /* prototypes and declarations for the MPID_Sched interface for nonblocking
3711 * collectives */
3712 #include "mpir_nbc.h"
3713
3714 #include "mpiimplthreadpost.h"
3715
3716 /* Include definitions from the device which require items defined by this
3717 file (mpiimpl.h). */
3718 #include "mpidpost.h"
3719
3720 /* tunable parameter values */
3721 #include "mpich_param_vals.h"
3722
3723 /* Tags for point to point operations which implement collective and other
3724 internal operations */
3725 #define MPIR_BARRIER_TAG 1
3726 #define MPIR_BCAST_TAG 2
3727 #define MPIR_GATHER_TAG 3
3728 #define MPIR_GATHERV_TAG 4
3729 #define MPIR_SCATTER_TAG 5
3730 #define MPIR_SCATTERV_TAG 6
3731 #define MPIR_ALLGATHER_TAG 7
3732 #define MPIR_ALLGATHERV_TAG 8
3733 #define MPIR_ALLTOALL_TAG 9
3734 #define MPIR_ALLTOALLV_TAG 10
3735 #define MPIR_REDUCE_TAG 11
3736 #define MPIR_USER_REDUCE_TAG 12
3737 #define MPIR_USER_REDUCEA_TAG 13
3738 #define MPIR_ALLREDUCE_TAG 14
3739 #define MPIR_USER_ALLREDUCE_TAG 15
3740 #define MPIR_USER_ALLREDUCEA_TAG 16
3741 #define MPIR_REDUCE_SCATTER_TAG 17
3742 #define MPIR_USER_REDUCE_SCATTER_TAG 18
3743 #define MPIR_USER_REDUCE_SCATTERA_TAG 19
3744 #define MPIR_SCAN_TAG 20
3745 #define MPIR_USER_SCAN_TAG 21
3746 #define MPIR_USER_SCANA_TAG 22
3747 #define MPIR_LOCALCOPY_TAG 23
3748 #define MPIR_EXSCAN_TAG 24
3749 #define MPIR_ALLTOALLW_TAG 25
3750 #define MPIR_TOPO_A_TAG 26
3751 #define MPIR_TOPO_B_TAG 27
3752 #define MPIR_REDUCE_SCATTER_BLOCK_TAG 28
3753 #define MPIR_ERROR_TAG 29
3754 #define MPIR_FIRST_NBC_TAG 30
3755
3756 /* These functions are used in the implementation of collective and
3757 other internal operations. They are wrappers around MPID send/recv
3758 functions. They do sends/receives by setting the context offset to
3759 MPID_CONTEXT_INTRA(INTER)_COLL. */
3760 int MPIC_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
3761 MPI_Comm comm);
3762 int MPIC_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
3763 MPI_Comm comm, MPI_Status *status);
3764 int MPIC_Ssend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
3765 MPI_Comm comm);
3766 int MPIC_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3767 int dest, int sendtag, void *recvbuf, int recvcount,
3768 MPI_Datatype recvtype, int source, int recvtag,
3769 MPI_Comm comm, MPI_Status *status);
3770 int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype type,
3771 int dest, int sendtag,
3772 int source, int recvtag,
3773 MPI_Comm comm, MPI_Status *status);
3774 int MPIR_Localcopy(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3775 void *recvbuf, int recvcount, MPI_Datatype recvtype);
3776 int MPIC_Irecv(void *buf, int count, MPI_Datatype datatype, int
3777 source, int tag, MPI_Comm comm, MPI_Request *request);
3778 int MPIC_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
3779 MPI_Comm comm, MPI_Request *request);
3780 int MPIC_Wait(MPID_Request * request_ptr);
3781 int MPIC_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status);
3782
3783 /* FT versions of te MPIC_ functions */
3784 int MPIC_Send_ft(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
3785 MPI_Comm comm, int *errflag);
3786 int MPIC_Recv_ft(void *buf, int count, MPI_Datatype datatype, int source, int tag,
3787 MPI_Comm comm, MPI_Status *status, int *errflag);
3788 int MPIC_Ssend_ft(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
3789 MPI_Comm comm, int *errflag);
3790 int MPIC_Sendrecv_ft(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3791 int dest, int sendtag, void *recvbuf, int recvcount,
3792 MPI_Datatype recvtype, int source, int recvtag,
3793 MPI_Comm comm, MPI_Status *status, int *errflag);
3794 int MPIC_Sendrecv_replace_ft(void *buf, int count, MPI_Datatype datatype,
3795 int dest, int sendtag,
3796 int source, int recvtag,
3797 MPI_Comm comm, MPI_Status *status, int *errflag);
3798 int MPIC_Isend_ft(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
3799 MPI_Comm comm, MPI_Request *request, int *errflag);
3800 int MPIC_Irecv_ft(void *buf, int count, MPI_Datatype datatype, int source,
3801 int tag, MPI_Comm comm, MPI_Request *request);
3802 int MPIC_Waitall_ft(int numreq, MPI_Request requests[], MPI_Status statuses[], int *errflag);
3803
3804
3805 void MPIR_MAXF ( void *, void *, int *, MPI_Datatype * ) ;
3806 void MPIR_MINF ( void *, void *, int *, MPI_Datatype * ) ;
3807 void MPIR_SUM ( void *, void *, int *, MPI_Datatype * ) ;
3808 void MPIR_PROD ( void *, void *, int *, MPI_Datatype * ) ;
3809 void MPIR_LAND ( void *, void *, int *, MPI_Datatype * ) ;
3810 void MPIR_BAND ( void *, void *, int *, MPI_Datatype * ) ;
3811 void MPIR_LOR ( void *, void *, int *, MPI_Datatype * ) ;
3812 void MPIR_BOR ( void *, void *, int *, MPI_Datatype * ) ;
3813 void MPIR_LXOR ( void *, void *, int *, MPI_Datatype * ) ;
3814 void MPIR_BXOR ( void *, void *, int *, MPI_Datatype * ) ;
3815 void MPIR_MAXLOC ( void *, void *, int *, MPI_Datatype * ) ;
3816 void MPIR_MINLOC ( void *, void *, int *, MPI_Datatype * ) ;
3817 void MPIR_REPLACE ( void *, void *, int *, MPI_Datatype * ) ;
3818 void MPIR_NO_OP ( void *, void *, int *, MPI_Datatype * ) ;
3819
3820 int MPIR_MAXF_check_dtype ( MPI_Datatype ) ;
3821 int MPIR_MINF_check_dtype ( MPI_Datatype ) ;
3822 int MPIR_SUM_check_dtype ( MPI_Datatype ) ;
3823 int MPIR_PROD_check_dtype ( MPI_Datatype ) ;
3824 int MPIR_LAND_check_dtype ( MPI_Datatype ) ;
3825 int MPIR_BAND_check_dtype ( MPI_Datatype ) ;
3826 int MPIR_LOR_check_dtype ( MPI_Datatype ) ;
3827 int MPIR_BOR_check_dtype ( MPI_Datatype ) ;
3828 int MPIR_LXOR_check_dtype ( MPI_Datatype ) ;
3829 int MPIR_BXOR_check_dtype ( MPI_Datatype ) ;
3830 int MPIR_MAXLOC_check_dtype ( MPI_Datatype ) ;
3831 int MPIR_MINLOC_check_dtype ( MPI_Datatype ) ;
3832 int MPIR_REPLACE_check_dtype ( MPI_Datatype ) ;
3833 int MPIR_NO_OP_check_dtype ( MPI_Datatype ) ;
3834
3835 #define MPIR_PREDEF_OP_COUNT 14
3836 extern MPI_User_function *MPIR_Op_table[];
3837
3838 typedef int (MPIR_Op_check_dtype_fn) ( MPI_Datatype );
3839 extern MPIR_Op_check_dtype_fn *MPIR_Op_check_dtype_table[];
3840
3841 #define MPIR_OP_HDL_TO_FN(op) MPIR_Op_table[((op)&0xf) - 1]
3842 #define MPIR_OP_HDL_TO_DTYPE_FN(op) MPIR_Op_check_dtype_table[((op)&0xf) - 1]
3843
3844 #if !defined MPIR_MIN
3845 #define MPIR_MIN(a,b) (((a)>(b))?(b):(a))
3846 #endif /* MPIR_MIN */
3847
3848 #if !defined MPIR_MAX
3849 #define MPIR_MAX(a,b) (((b)>(a))?(b):(a))
3850 #endif /* MPIR_MAX */
3851
3852 int MPIR_Type_is_rma_atomic(MPI_Datatype type);
3853 int MPIR_Compare_equal(const void *a, const void *b, MPI_Datatype type);
3854
3855 int MPIR_Allgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3856 void *recvbuf, int recvcount, MPI_Datatype recvtype,
3857 MPID_Comm *comm_ptr, int *errflag );
3858 int MPIR_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3859 void *recvbuf, int recvcount, MPI_Datatype recvtype,
3860 MPID_Comm *comm_ptr, int *errflag );
3861 int MPIR_Allgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3862 void *recvbuf, int recvcount, MPI_Datatype recvtype,
3863 MPID_Comm *comm_ptr, int *errflag );
3864 int MPIR_Allgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3865 void *recvbuf, int recvcount, MPI_Datatype recvtype,
3866 MPID_Comm *comm_ptr, int *errflag );
3867 int MPIR_Allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3868 void *recvbuf, const int *recvcounts, const int *displs,
3869 MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag );
3870 int MPIR_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3871 void *recvbuf, const int *recvcounts, const int *displs,
3872 MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag );
3873 int MPIR_Allgatherv_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3874 void *recvbuf, const int *recvcounts, const int *displs,
3875 MPI_Datatype recvtype, MPID_Comm *comm_pt, int *errflag );
3876 int MPIR_Allgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3877 void *recvbuf, const int *recvcounts, const int *displs,
3878 MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag );
3879 int MPIR_Allreduce_impl(const void *sendbuf, void *recvbuf, int count,
3880 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
3881 int MPIR_Allreduce(const void *sendbuf, void *recvbuf, int count,
3882 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
3883 int MPIR_Allreduce_intra(const void *sendbuf, void *recvbuf, int count,
3884 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
3885 int MPIR_Allreduce_inter(const void *sendbuf, void *recvbuf, int count,
3886 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
3887 int MPIR_Alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3888 void *recvbuf, int recvcount, MPI_Datatype recvtype,
3889 MPID_Comm *comm_ptr, int *errflag);
3890 int MPIR_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3891 void *recvbuf, int recvcount, MPI_Datatype recvtype,
3892 MPID_Comm *comm_ptr, int *errflag);
3893 int MPIR_Alltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3894 void *recvbuf, int recvcount, MPI_Datatype recvtype,
3895 MPID_Comm *comm_ptr, int *errflag);
3896 int MPIR_Alltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
3897 void *recvbuf, int recvcount, MPI_Datatype recvtype,
3898 MPID_Comm *comm_ptr, int *errflag);
3899 int MPIR_Alltoallv_impl(const void *sendbuf, const int *sendcnts, const int *sdispls,
3900 MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
3901 const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
3902 int *errflag);
3903 int MPIR_Alltoallv(const void *sendbuf, const int *sendcnts, const int *sdispls,
3904 MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
3905 const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, int *errflag);
3906 int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcnts, const int *sdispls,
3907 MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
3908 const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
3909 int *errflag);
3910 int MPIR_Alltoallv_inter(const void *sendbuf, const int *sendcnts, const int *sdispls,
3911 MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
3912 const int *rdispls, MPI_Datatype recvtype,
3913 MPID_Comm *comm_ptr, int *errflag);
3914 int MPIR_Alltoallw_impl(const void *sendbuf, const int *sendcnts, const int *sdispls,
3915 const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcnts,
3916 const int *rdispls, const MPI_Datatype *recvtypes, MPID_Comm *comm_ptr,
3917 int *errflag);
3918 int MPIR_Alltoallw(const void *sendbuf, const int *sendcnts, const int *sdispls,
3919 const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcnts,
3920 const int *rdispls, const MPI_Datatype *recvtypes, MPID_Comm *comm_ptr,
3921 int *errflag);
3922 int MPIR_Alltoallw_intra(const void *sendbuf, const int *sendcnts, const int *sdispls,
3923 const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcnts,
3924 const int *rdispls, const MPI_Datatype *recvtypes, MPID_Comm *comm_ptr,
3925 int *errflag);
3926 int MPIR_Alltoallw_inter(const void *sendbuf, const int *sendcnts, const int *sdispls,
3927 const MPI_Datatype *sendtypes, void *recvbuf,
3928 const int *recvcnts, const int *rdispls, const MPI_Datatype *recvtypes,
3929 MPID_Comm *comm_ptr, int *errflag);
3930 int MPIR_Bcast_inter(void *buffer, int count, MPI_Datatype datatype,
3931 int root, MPID_Comm *comm_ptr, int *errflag);
3932 int MPIR_Bcast_intra (void *buffer, int count, MPI_Datatype datatype, int
3933 root, MPID_Comm *comm_ptr, int *errflag);
3934 int MPIR_Bcast (void *buffer, int count, MPI_Datatype datatype, int
3935 root, MPID_Comm *comm_ptr, int *errflag);
3936 int MPIR_Bcast_impl (void *buffer, int count, MPI_Datatype datatype, int
3937 root, MPID_Comm *comm_ptr, int *errflag);
3938 int MPIR_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
3939 MPI_Op op, MPID_Comm *comm_ptr, int *errflag );
3940 int MPIR_Exscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
3941 MPI_Op op, MPID_Comm *comm_ptr, int *errflag );
3942 int MPIR_Gather_impl (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
3943 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
3944 int root, MPID_Comm *comm_ptr, int *errflag);
3945 int MPIR_Gather (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
3946 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
3947 int root, MPID_Comm *comm_ptr, int *errflag);
3948 int MPIR_Gather_intra (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
3949 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
3950 int root, MPID_Comm *comm_ptr, int *errflag);
3951 int MPIR_Gather_inter (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
3952 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
3953 int root, MPID_Comm *comm_ptr, int *errflag );
3954 int MPIR_Gatherv (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
3955 void *recvbuf, const int *recvcnts, const int *displs,
3956 MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, int *errflag);
3957 int MPIR_Gatherv_impl (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
3958 void *recvbuf, const int *recvcnts, const int *displs,
3959 MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, int *errflag);
3960 int MPIR_Reduce_scatter_impl(const void *sendbuf, void *recvbuf, const int *recvcnts,
3961 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
3962 int MPIR_Reduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcnts,
3963 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
3964 int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int *recvcnts,
3965 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
3966 int MPIR_Reduce_scatter_inter(const void *sendbuf, void *recvbuf, const int *recvcnts,
3967 MPI_Datatype datatype, MPI_Op op,
3968 MPID_Comm *comm_ptr, int *errflag);
3969 int MPIR_Reduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recvcount,
3970 MPI_Datatype datatype, MPI_Op op, MPID_Comm
3971 *comm_ptr, int *errflag );
3972 int MPIR_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount,
3973 MPI_Datatype datatype, MPI_Op op, MPID_Comm
3974 *comm_ptr, int *errflag );
3975 int MPIR_Reduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount,
3976 MPI_Datatype datatype, MPI_Op op, MPID_Comm
3977 *comm_ptr, int *errflag );
3978 int MPIR_Reduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount,
3979 MPI_Datatype datatype, MPI_Op op, MPID_Comm
3980 *comm_ptr, int *errflag);
3981 int MPIR_Reduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
3982 MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag );
3983 int MPIR_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
3984 MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag );
3985 int MPIR_Reduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
3986 MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag );
3987 int MPIR_Reduce_inter (const void *sendbuf, void *recvbuf, int count, MPI_Datatype
3988 datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, int *errflag);
3989 int MPIR_Scan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
3990 MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
3991 int MPIR_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
3992 MPI_Op op, MPID_Comm *comm_ptr, int *errflag);
3993 int MPIR_Scatter_impl(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
3994 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
3995 int root, MPID_Comm *comm_ptr, int *errflag );
3996 int MPIR_Scatter(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
3997 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
3998 int root, MPID_Comm *comm_ptr, int *errflag );
3999 int MPIR_Scatter_intra(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
4000 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
4001 int root, MPID_Comm *comm_ptr, int *errflag );
4002 int MPIR_Scatter_inter(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
4003 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
4004 int root, MPID_Comm *comm_ptr, int *errflag );
4005 int MPIR_Scatterv_impl (const void *sendbuf, const int *sendcnts, const int *displs,
4006 MPI_Datatype sendtype, void *recvbuf, int recvcnt,
4007 MPI_Datatype recvtype, int root, MPID_Comm
4008 *comm_ptr, int *errflag);
4009 int MPIR_Scatterv (const void *sendbuf, const int *sendcnts, const int *displs,
4010 MPI_Datatype sendtype, void *recvbuf, int recvcnt,
4011 MPI_Datatype recvtype, int root, MPID_Comm
4012 *comm_ptr, int *errflag);
4013 int MPIR_Barrier_impl( MPID_Comm *comm_ptr, int *errflag);
4014 int MPIR_Barrier( MPID_Comm *comm_ptr, int *errflag);
4015 int MPIR_Barrier_intra( MPID_Comm *comm_ptr, int *errflag);
4016 int MPIR_Barrier_inter( MPID_Comm *comm_ptr, int *errflag);
4017
4018 int MPIR_Reduce_local_impl(const void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op);
4019
4020 int MPIR_Setup_intercomm_localcomm( MPID_Comm * );
4021
4022 int MPIR_Comm_create( MPID_Comm ** );
4023
4024 /* comm_create helper functions, used by both comm_create and comm_create_group */
4025 int MPIR_Comm_create_calculate_mapping(MPID_Group *group_ptr,
4026 MPID_Comm *comm_ptr,
4027 MPID_VCR **mapping_vcr_out,
4028 int **mapping_out);
4029 int MPIR_Comm_create_create_and_map_vcrt(int n,
4030 int *mapping,
4031 MPID_VCR *mapping_vcr,
4032 MPID_VCRT *out_vcrt,
4033 MPID_VCR **out_vcr);
4034
4035 int MPIR_Comm_commit( MPID_Comm * );
4036
4037 int MPIR_Comm_is_node_aware( MPID_Comm * );
4038
4039 int MPIR_Comm_is_node_consecutive( MPID_Comm *);
4040
4041 void MPIR_Free_err_dyncodes( void );
4042
4043 int MPIR_Comm_idup_impl(MPID_Comm *comm_ptr, MPID_Comm **newcomm, MPID_Request **reqp);
4044
4045 int MPIR_Allreduce_group(void *sendbuf, void *recvbuf, int count,
4046 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
4047 MPID_Group *group_ptr, int tag, int *errflag);
4048 int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
4049 MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
4050 MPID_Group *group_ptr, int tag, int *errflag);
4051
4052
4053 int MPIR_Barrier_group(MPID_Comm *comm_ptr, MPID_Group *group_ptr, int tag, int *errflag);
4054
4055
4056 /* topology impl functions */
4057 int MPIR_Dist_graph_neighbors_count_impl(MPID_Comm *comm_ptr, int *indegree, int *outdegree, int *weighted);
4058 int MPIR_Dist_graph_neighbors_impl(MPID_Comm *comm_ptr,
4059 int maxindegree, int sources[], int sourceweights[],
4060 int maxoutdegree, int destinations[], int destweights[]);
4061 int MPIR_Graph_neighbors_count_impl(MPID_Comm *comm_ptr, int rank, int *nneighbors);
4062 int MPIR_Graph_neighbors_impl(MPID_Comm *comm_ptr, int rank, int maxneighbors, int *neighbors);
4063 int MPIR_Cart_shift_impl(MPID_Comm *comm_ptr, int direction, int displ, int *source, int *dest);
4064
4065 /* begin impl functions for NBC */
4066 int MPIR_Ibarrier_impl(MPID_Comm *comm_ptr, MPI_Request *request);
4067 int MPIR_Ibcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPI_Request *request);
4068 int MPIR_Igather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPI_Request *request);
4069 int MPIR_Igatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPI_Request *request);
4070 int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPI_Request *request);
4071 int MPIR_Iscatterv_impl(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPI_Request *request);
4072 int MPIR_Iallgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request);
4073 int MPIR_Iallgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request);
4074 int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request);
4075 int MPIR_Ialltoallv_impl(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request);
4076 int MPIR_Ialltoallw_impl(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPID_Comm *comm_ptr, MPI_Request *request);
4077 int MPIR_Ireduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, MPI_Request *request);
4078 int MPIR_Iallreduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request);
4079 int MPIR_Ireduce_scatter_impl(const void *sendbuf, void *recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request);
4080 int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request);
4081 int MPIR_Iscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request);
4082 int MPIR_Iexscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request);
4083 /* end impl functions for NBC */
4084
4085 /* begin impl functions for neighborhood collectives */
4086 int MPIR_Ineighbor_allgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request);
4087 int MPIR_Ineighbor_allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request);
4088 int MPIR_Ineighbor_alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request);
4089 int MPIR_Ineighbor_alltoallv_impl(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request);
4090 int MPIR_Ineighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPID_Comm *comm_ptr, MPI_Request *request);
4091 int MPIR_Neighbor_allgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
4092 int MPIR_Neighbor_allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPID_Comm *comm_ptr);
4093 int MPIR_Neighbor_alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
4094 int MPIR_Neighbor_alltoallv_impl(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPID_Comm *comm_ptr);
4095 int MPIR_Neighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPID_Comm *comm_ptr);
4096 /* end impl functions for neighborhood collectives */
4097
4098 /* neighborhood collective default algorithms */
4099 int MPIR_Neighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
4100 int MPIR_Neighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPID_Comm *comm_ptr);
4101 int MPIR_Neighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr);
4102 int MPIR_Neighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPID_Comm *comm_ptr);
4103 int MPIR_Neighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPID_Comm *comm_ptr);
4104 int MPIR_Ineighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4105 int MPIR_Ineighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4106 int MPIR_Ineighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4107 int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4108 int MPIR_Ineighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPID_Comm *comm_ptr, MPID_Sched_t s);
4109
4110 /* nonblocking collective default algorithms */
4111 int MPIR_Ibcast_intra(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4112 int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4113 int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4114 int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4115 int MPIR_Iscatter_for_bcast(void *tmp_buf, int root, MPID_Comm *comm_ptr, int nbytes, MPID_Sched_t s);
4116 int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4117 int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4118 int MPIR_Ibarrier_intra(MPID_Comm *comm_ptr, MPID_Sched_t s);
4119 int MPIR_Ibarrier_inter(MPID_Comm *comm_ptr, MPID_Sched_t s);
4120 int MPIR_Ireduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4121 int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4122 int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4123 int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4124 int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4125 int MPIR_Ialltoallv_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4126 int MPIR_Ialltoallv_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4127 int MPIR_Iallreduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4128 int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4129 int MPIR_Iallreduce_naive(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4130 int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4131 int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4132 int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4133 int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4134 int MPIR_Igather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4135 int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4136 int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4137 int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4138 int MPIR_Iscatterv(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4139 int MPIR_Ireduce_scatter_intra(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4140 int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4141 int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4142 int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4143 int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4144 int MPIR_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
4145 int MPIR_Ireduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4146 int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4147 int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4148 int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4149 int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4150 int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4151 int MPIR_Ialltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4152 int MPIR_Ialltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4153 int MPIR_Ialltoall_inplace(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4154 int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4155 int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4156 int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4157 int MPIR_Iallgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4158 int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4159 int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4160 int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4161 int MPIR_Iallgather_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4162 int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4163 int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4164 int MPIR_Iallgatherv_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4165 int MPIR_Iallgatherv_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4166 int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s);
4167 int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4168 int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4169 int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
4170 int MPIR_Ialltoallw_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPID_Comm *comm_ptr, MPID_Sched_t s);
4171 int MPIR_Ialltoallw_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPID_Comm *comm_ptr, MPID_Sched_t s);
4172
4173 /* begin impl functions for MPI_T (MPIX_T_ right now) */
4174 int MPIR_T_init_thread_impl(int required, int *provided);
4175 int MPIR_T_finalize_impl(void);
4176 int MPIR_T_enum_get_info_impl(MPIX_T_enum enumtype, int num, char *name, int *name_len);
4177 int MPIR_T_enum_get_item_impl(MPIX_T_enum enumtype, int num, int *value, char *name, int *name_len);
4178 int MPIR_T_cvar_get_num_impl(int *num_cvar);
4179 int MPIR_T_cvar_get_info_impl(int cvar_index, char *name, int *name_len, int *verbosity, MPI_Datatype *datatype, MPIX_T_enum *enumtype, char *desc, int *desc_len, int *binding, int *scope);
4180 int MPIR_T_cvar_handle_alloc_impl(int cvar_index, void *obj_handle, MPIX_T_cvar_handle *handle, int *count);
4181 int MPIR_T_cvar_handle_free_impl(MPIX_T_cvar_handle *handle);
4182 int MPIR_T_cvar_read_impl(MPIX_T_cvar_handle handle, void *buf);
4183 int MPIR_T_cvar_write_impl(MPIX_T_cvar_handle handle, void *buf);
4184 int MPIR_T_pvar_get_num_impl(int *num_pvar);
4185 int MPIR_T_pvar_get_info_impl(int pvar_index, char *name, int *name_len, int *verbosity, int *var_class, MPI_Datatype *datatype, MPIX_T_enum *enumtype, char *desc, int *desc_len, int *binding, int *readonly, int *continuous, int *atomic);
4186 int MPIR_T_pvar_session_create_impl(MPIX_T_pvar_session *session);
4187 int MPIR_T_pvar_session_free_impl(MPIX_T_pvar_session *session);
4188 int MPIR_T_pvar_handle_alloc_impl(MPIX_T_pvar_session session, int pvar_index, void *obj_handle, MPIX_T_pvar_handle *handle, int *count);
4189 int MPIR_T_pvar_handle_free_impl(MPIX_T_pvar_session session, MPIX_T_pvar_handle *handle);
4190 int MPIR_T_pvar_start_impl(MPIX_T_pvar_session session, MPIX_T_pvar_handle handle);
4191 int MPIR_T_pvar_stop_impl(MPIX_T_pvar_session session, MPIX_T_pvar_handle handle);
4192 int MPIR_T_pvar_read_impl(MPIX_T_pvar_session session, MPIX_T_pvar_handle handle, void *buf);
4193 int MPIR_T_pvar_write_impl(MPIX_T_pvar_session session, MPIX_T_pvar_handle handle, void *buf);
4194 int MPIR_T_pvar_reset_impl(MPIX_T_pvar_session session, MPIX_T_pvar_handle handle);
4195 int MPIR_T_pvar_readreset_impl(MPIX_T_pvar_session session, MPIX_T_pvar_handle handle, void *buf);
4196 int MPIR_T_category_get_num_impl(int *num_cat);
4197 int MPIR_T_category_get_info_impl(int cat_index, char *name, int *name_len, char *desc, int *desc_len, int *num_controlvars, int *num_pvars, int *num_categories);
4198 int MPIR_T_category_get_cvars_impl(int cat_index, int len, int indices[]);
4199 int MPIR_T_category_get_pvars_impl(int cat_index[], int len, int indices[]);
4200 int MPIR_T_category_get_categories_impl(int cat_index, int len, int indices[]);
4201 int MPIR_T_category_changed_impl(int *stamp);
4202 /* end impl functions for MPI_T (MPIX_T_ right now) */
4203
4204 int MPIR_T_is_initialized(void);
4205
4206 /* random initializers */
4207 int MPIR_Group_init(void);
4208 int MPIR_Comm_init(MPID_Comm *);
4209
4210
4211 /* Collective functions cannot be called from multiple threads. These
4212 are stubs used in the collective communication calls to check for
4213 user error. Currently they are just being macroed out. */
4214 #define MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER(comm_ptr)
4215 #define MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT(comm_ptr)
4216
4217 /* Miscellaneous */
4218 void MPIU_SetTimeout( int );
4219
4220 #if defined(HAVE_VSNPRINTF) && defined(NEEDS_VSNPRINTF_DECL) && !defined(vsnprintf)
4221 int vsnprintf(char *str, size_t size, const char *format, va_list ap);
4222 # endif
4223
4224 /* Routines for determining local and remote processes */
4225
4226 int MPIU_Find_local_and_external(struct MPID_Comm *comm, int *local_size_p, int *local_rank_p, int **local_ranks_p,
4227 int *external_size_p, int *external_rank_p, int **external_ranks_p,
4228 int **intranode_table, int **internode_table_p);
4229 int MPIU_Get_internode_rank(MPID_Comm *comm_ptr, int r);
4230 int MPIU_Get_intranode_rank(MPID_Comm *comm_ptr, int r);
4231
4232 /* Trivial accessor macros */
4233
4234 #define MPIR_Comm_rank(comm_ptr) ((comm_ptr)->rank)
4235 #define MPIR_Comm_size(comm_ptr) ((comm_ptr)->local_size)
4236 #define MPIR_Type_extent_impl(datatype, extent_ptr) MPID_Datatype_get_extent_macro(datatype, *(extent_ptr))
4237 #define MPIR_Type_size_impl(datatype, size) MPID_Datatype_get_size_macro(datatype, *(size))
4238 #define MPIR_Test_cancelled_impl(status, flag) *(flag) = (status)->cancelled
4239
4240 /* MPIR_ functions. These are versions of MPI_ functions appropriate for calling within MPI */
4241 int MPIR_Cancel_impl(MPID_Request *request_ptr);
4242 struct MPIR_Topology;
4243 void MPIR_Cart_rank_impl(struct MPIR_Topology *cart_ptr, const int *coords, int *rank);
4244 int MPIR_Cart_create_impl(MPID_Comm *comm_ptr, int ndims, const int dims[],
4245 const int periods[], int reorder, MPI_Comm *comm_cart);
4246 int MPIR_Cart_map_impl(const MPID_Comm *comm_ptr, int ndims, const int dims[],
4247 const int periodic[], int *newrank);
4248 int MPIR_Close_port_impl(const char *port_name);
4249 int MPIR_Open_port_impl(MPID_Info *info_ptr, char *port_name);
4250 void MPIR_Info_get_impl(MPID_Info *info_ptr, const char *key, int valuelen, char *value, int *flag);
4251 void MPIR_Info_get_nkeys_impl(MPID_Info *info_ptr, int *nkeys);
4252 int MPIR_Info_get_nthkey_impl(MPID_Info *info, int n, char *key);
4253 void MPIR_Info_get_valuelen_impl(MPID_Info *info_ptr, const char *key, int *valuelen, int *flag);
4254 int MPIR_Comm_delete_attr_impl(MPID_Comm *comm_ptr, MPID_Keyval *keyval_ptr);
4255 int MPIR_Comm_create_keyval_impl(MPI_Comm_copy_attr_function *comm_copy_attr_fn,
4256 MPI_Comm_delete_attr_function *comm_delete_attr_fn,
4257 int *comm_keyval, void *extra_state);
4258 int MPIR_Comm_accept_impl(const char * port_name, MPID_Info * info_ptr, int root,
4259 MPID_Comm * comm_ptr, MPID_Comm ** newcomm_ptr);
4260 int MPIR_Comm_connect_impl(const char * port_name, MPID_Info * info_ptr, int root,
4261 MPID_Comm * comm_ptr, MPID_Comm ** newcomm_ptr);
4262 int MPIR_Comm_create_errhandler_impl(MPI_Comm_errhandler_function *function,
4263 MPI_Errhandler *errhandler);
4264 int MPIR_Comm_dup_impl(MPID_Comm *comm_ptr, MPID_Comm **newcomm_ptr);
4265 int MPIR_Comm_free_impl(MPID_Comm * comm_ptr);
4266 void MPIR_Comm_free_keyval_impl(int keyval);
4267 void MPIR_Comm_get_errhandler_impl(MPID_Comm *comm_ptr, MPID_Errhandler **errhandler_ptr);
4268 void MPIR_Comm_set_errhandler_impl(MPID_Comm *comm_ptr, MPID_Errhandler *errhandler_ptr);
4269 void MPIR_Comm_get_name_impl(MPID_Comm *comm, char *comm_name, int *resultlen);
4270 int MPIR_Intercomm_merge_impl(MPID_Comm *comm_ptr, int high, MPID_Comm **new_intracomm_ptr);
4271 int MPIR_Intercomm_create_impl(MPID_Comm *local_comm_ptr, int local_leader,
4272 MPID_Comm *peer_comm_ptr, int remote_leader, int tag,
4273 MPID_Comm **new_intercomm_ptr);
4274 int MPIR_Comm_group_impl(MPID_Comm *comm_ptr, MPID_Group **group_ptr);
4275 int MPIR_Comm_remote_group_impl(MPID_Comm *comm_ptr, MPID_Group **group_ptr);
4276 int MPIR_Comm_group_failed_impl(MPID_Comm *comm, MPID_Group **failed_group_ptr);
4277 int MPIR_Comm_remote_group_failed_impl(MPID_Comm *comm, MPID_Group **failed_group_ptr);
4278 int MPIR_Comm_split_impl(MPID_Comm *comm_ptr, int color, int key, MPID_Comm **newcomm_ptr);
4279 int MPIR_Comm_split_type_impl(MPID_Comm *comm_ptr, int split_type, int key, MPID_Info *info_ptr,
4280 MPID_Comm **newcomm_ptr);
4281 int MPIR_Group_compare_impl(MPID_Group *group_ptr1, MPID_Group *group_ptr2, int *result);
4282 int MPIR_Group_difference_impl(MPID_Group *group_ptr1, MPID_Group *group_ptr2, MPID_Group **new_group_ptr);
4283 int MPIR_Group_excl_impl(MPID_Group *group_ptr, int n, const int *ranks, MPID_Group **new_group_ptr);
4284 int MPIR_Group_free_impl(MPID_Group *group_ptr);
4285 int MPIR_Group_incl_impl(MPID_Group *group_ptr, int n, const int *ranks, MPID_Group **new_group_ptr);
4286 int MPIR_Group_intersection_impl(MPID_Group *group_ptr1, MPID_Group *group_ptr2, MPID_Group **new_group_ptr);
4287 int MPIR_Group_range_excl_impl(MPID_Group *group_ptr, int n, int ranges[][3], MPID_Group **new_group_ptr);
4288 int MPIR_Group_range_incl_impl(MPID_Group *group_ptr, int n, int ranges[][3], MPID_Group **new_group_ptr);
4289 int MPIR_Group_translate_ranks_impl(MPID_Group *group_ptr1, int n, const int *ranks1,
4290 MPID_Group *group_ptr2, int *ranks2);
4291 int MPIR_Group_union_impl(MPID_Group *group_ptr1, MPID_Group *group_ptr2, MPID_Group **new_group_ptr);
4292 void MPIR_Get_count_impl(const MPI_Status *status, MPI_Datatype datatype, int *count);
4293 void MPIR_Grequest_complete_impl(MPID_Request *request_ptr);
4294 int MPIR_Grequest_start_impl(MPI_Grequest_query_function *query_fn,
4295 MPI_Grequest_free_function *free_fn,
4296 MPI_Grequest_cancel_function *cancel_fn,
4297 void *extra_state, MPID_Request **request_ptr);
4298 int MPIR_Graph_map_impl(const MPID_Comm *comm_ptr, int nnodes,
4299 const int indx[], const int edges[], int *newrank);
4300 int MPIR_Type_commit_impl(MPI_Datatype *datatype);
4301 int MPIR_Type_create_struct_impl(int count,
4302 const int array_of_blocklengths[],
4303 const MPI_Aint array_of_displacements[],
4304 const MPI_Datatype array_of_types[],
4305 MPI_Datatype *newtype);
4306 int MPIR_Type_create_indexed_block_impl(int count,
4307 int blocklength,
4308 const int array_of_displacements[],
4309 MPI_Datatype oldtype,
4310 MPI_Datatype *newtype);
4311 int MPIR_Type_create_hindexed_block_impl(int count, int blocklength,
4312 const MPI_Aint array_of_displacements[],
4313 MPI_Datatype oldtype, MPI_Datatype *newtype);
4314 int MPIR_Type_contiguous_impl(int count,
4315 MPI_Datatype old_type,
4316 MPI_Datatype *new_type_p);
4317 void MPIR_Type_get_extent_impl(MPI_Datatype datatype, MPI_Aint *lb, MPI_Aint *extent);
4318 void MPIR_Type_get_true_extent_impl(MPI_Datatype datatype, MPI_Aint *true_lb, MPI_Aint *true_extent);
4319 void MPIR_Type_get_envelope_impl(MPI_Datatype datatype, int *num_integers, int *num_addresses,
4320 int *num_datatypes, int *combiner);
4321 int MPIR_Type_hvector_impl(int count, int blocklen, MPI_Aint stride, MPI_Datatype old_type, MPI_Datatype *newtype_p);
4322 int MPIR_Type_indexed_impl(int count, const int blocklens[], const int indices[],
4323 MPI_Datatype old_type, MPI_Datatype *newtype);
4324 void MPIR_Type_free_impl(MPI_Datatype *datatype);
4325 int MPIR_Type_vector_impl(int count, int blocklength, int stride, MPI_Datatype old_type, MPI_Datatype *newtype_p);
4326 int MPIR_Type_struct_impl(int count, const int blocklens[], const MPI_Aint indices[], const MPI_Datatype old_types[], MPI_Datatype *newtype);
4327 int MPIR_Pack_impl(const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, int outcount, int *position);
4328 void MPIR_Pack_size_impl(int incount, MPI_Datatype datatype, int *size);
4329 int MPIR_Unpack_impl(const void *inbuf, int insize, int *position,
4330 void *outbuf, int outcount, MPI_Datatype datatype);
4331 void MPIR_Type_lb_impl(MPI_Datatype datatype, MPI_Aint *displacement);
4332 int MPIR_Ibsend_impl(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
4333 MPID_Comm *comm_ptr, MPI_Request *request);
4334 int MPIR_Test_impl(MPI_Request *request, int *flag, MPI_Status *status);
4335 int MPIR_Wait_impl(MPI_Request *request, MPI_Status *status);
4336 int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
4337 MPI_Status array_of_statuses[]);
4338 int MPIR_Comm_set_attr_impl(MPID_Comm *comm_ptr, int comm_keyval, void *attribute_val,
4339 MPIR_AttrType attrType);
4340
4341
4342 /* The "fastpath" version of MPIR_Request_complete. It only handles
4343 * MPID_REQUEST_SEND and MPID_REQUEST_RECV kinds, and it does not attempt to
4344 * deal with status structures under the assumption that bleeding fast code will
4345 * pass either MPI_STATUS_IGNORE or MPI_STATUSES_IGNORE as appropriate. This
4346 * routine (or some a variation of it) is an unfortunately necessary stunt to
4347 * get high message rates on key benchmarks for high-end systems.
4348 */
4349 #undef FUNCNAME
4350 #define FUNCNAME MPIR_Request_complete_fastpath
4351 #undef FCNAME
4352 #define FCNAME MPIU_QUOTE(FUNCNAME)
MPIR_Request_complete_fastpath(MPI_Request * request,MPID_Request * request_ptr)4353 static inline int MPIR_Request_complete_fastpath(MPI_Request *request, MPID_Request *request_ptr)
4354 {
4355 int mpi_errno = MPI_SUCCESS;
4356
4357 MPIU_Assert(request_ptr->kind == MPID_REQUEST_SEND || request_ptr->kind == MPID_REQUEST_RECV);
4358
4359 if (request_ptr->kind == MPID_REQUEST_SEND) {
4360 /* FIXME: are Ibsend requests added to the send queue? */
4361 MPIR_SENDQ_FORGET(request_ptr);
4362 }
4363
4364 /* the completion path for SEND and RECV is the same at this time, modulo
4365 * the SENDQ hook above */
4366 mpi_errno = request_ptr->status.MPI_ERROR;
4367 MPID_Request_release(request_ptr);
4368 *request = MPI_REQUEST_NULL;
4369
4370 /* avoid normal fn_exit/fn_fail jump pattern to reduce jumps and compiler confusion */
4371 return mpi_errno;
4372 }
4373
4374 /* avoid conflicts in source files with old-style "char FCNAME[]" vars */
4375 #undef FUNCNAME
4376 #undef FCNAME
4377
4378 #endif /* MPIIMPL_INCLUDED */
4379