1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #ifndef MPIDPRE_H_INCLUDED
7 #define MPIDPRE_H_INCLUDED
8 
9 #ifdef HAVE_SYS_TYPES_H
10 #include <sys/types.h>
11 #endif
12 
13 #ifdef HAVE_LIBHCOLL
14 #include "hcoll/api/hcoll_dte.h"
15 #endif
16 
17 #include "mpid_thread.h"
18 #include "mpid_sched.h"
19 #include "netmodpre.h"
20 #ifndef MPIDI_CH4_DIRECT_NETMOD
21 #include "shmpre.h"
22 #endif
23 #include "uthash.h"
24 #include "ch4_csel_container.h"
25 #include "ch4i_workq_types.h"
26 
27 /* Currently, workq is a configure-time only option and guarded by macro
28  * MPIDI_CH4_USE_WORK_QUEUES. If we want to enable runtime option, we will
29  * need to switch everywhere from "#ifdef MPIDI_CH4_USE_WORK_QUEUES" into
30  * runtime "if - else".
31  */
32 #ifdef MPIDI_CH4_USE_MT_DIRECT
33 #define MPIDI_CH4_MT_MODEL MPIDI_CH4_MT_DIRECT
34 #elif defined MPIDI_CH4_USE_MT_HANDOFF
35 #define MPIDI_CH4_MT_MODEL MPIDI_CH4_MT_HANDOFF
36 #elif defined MPIDI_CH4_USE_MT_RUNTIME
37 #define MPIDI_CH4_MT_MODEL MPIDI_global.settings.mt_model
38 #else
39 #error "Unknown MT model or MT model not defined"
40 #endif
41 
42 typedef struct {
43 #ifdef HAVE_LIBHCOLL
44     hcoll_datatype_t hcoll_datatype;
45 #endif
46     union {
47     MPIDI_NM_DT_DECL} netmod;
48 } MPIDI_Devdt_t;
49 #define MPID_DEV_DATATYPE_DECL   MPIDI_Devdt_t   dev;
50 
51 typedef struct {
52     int flag;
53     int progress_made;
54     int vci_count;              /* number of vcis that need progress */
55     int progress_counts[MPIDI_CH4_MAX_VCIS];
56     uint8_t vci[MPIDI_CH4_MAX_VCIS];    /* list of vcis that need progress */
57 } MPID_Progress_state;
58 
59 typedef enum {
60     MPIDI_PTYPE_RECV,
61     MPIDI_PTYPE_SEND,
62     MPIDI_PTYPE_BSEND,
63     MPIDI_PTYPE_SSEND
64 } MPIDI_ptype;
65 
66 #define MPIDIG_REQ_BUSY           (0x1)
67 #define MPIDIG_REQ_PEER_SSEND     (0x1 << 1)
68 #define MPIDIG_REQ_UNEXPECTED     (0x1 << 2)
69 #define MPIDIG_REQ_UNEXP_DQUED    (0x1 << 3)
70 #define MPIDIG_REQ_UNEXP_CLAIMED  (0x1 << 4)
71 #define MPIDIG_REQ_RCV_NON_CONTIG (0x1 << 5)
72 #define MPIDIG_REQ_MATCHED (0x1 << 6)
73 #define MPIDIG_REQ_RTS (0x1 << 7)
74 #define MPIDIG_REQ_IN_PROGRESS (0x1 << 8)
75 
76 #define MPIDI_PARENT_PORT_KVSKEY "PARENT_ROOT_PORT_NAME"
77 #define MPIDI_MAX_KVS_VALUE_LEN  4096
78 
79 typedef struct MPIDIG_sreq_t {
80     /* persistent send fields */
81     const void *src_buf;
82     MPI_Count count;
83     MPI_Datatype datatype;
84     int rank;
85     MPIR_Context_id_t context_id;
86 } MPIDIG_sreq_t;
87 
88 typedef struct MPIDIG_rreq_t {
89     /* mrecv fields */
90     void *mrcv_buffer;
91     uint64_t mrcv_count;
92     MPI_Datatype mrcv_datatype;
93 
94     uint64_t ignore;
95     MPIR_Request *peer_req_ptr;
96     MPIR_Request *match_req;
97     MPIR_Request *request;
98 
99     struct MPIDIG_rreq_t *prev, *next;
100 } MPIDIG_rreq_t;
101 
102 typedef struct MPIDIG_put_req_t {
103     MPIR_Win *win_ptr;
104     MPIR_Request *preq_ptr;
105     void *flattened_dt;
106     MPIR_Datatype *dt;
107     void *origin_addr;
108     int origin_count;
109     MPI_Datatype origin_datatype;
110     void *target_addr;
111     MPI_Datatype target_datatype;
112 } MPIDIG_put_req_t;
113 
114 typedef struct MPIDIG_get_req_t {
115     MPIR_Win *win_ptr;
116     MPIR_Request *greq_ptr;
117     void *addr;
118     MPI_Datatype datatype;
119     int count;
120     void *flattened_dt;
121     MPIR_Datatype *dt;
122     MPI_Datatype target_datatype;
123 } MPIDIG_get_req_t;
124 
125 typedef struct MPIDIG_cswap_req_t {
126     MPIR_Win *win_ptr;
127     MPIR_Request *creq_ptr;
128     void *addr;
129     MPI_Datatype datatype;
130     void *data;
131     void *result_addr;
132 } MPIDIG_cswap_req_t;
133 
134 typedef struct MPIDIG_acc_req_t {
135     MPIR_Win *win_ptr;
136     MPIR_Request *req_ptr;
137     MPI_Datatype origin_datatype;
138     MPI_Datatype target_datatype;
139     int origin_count;
140     int target_count;
141     void *target_addr;
142     void *flattened_dt;
143     void *data;
144     size_t data_sz;
145     MPI_Op op;
146     void *result_addr;
147     int result_count;
148     void *origin_addr;
149     MPI_Datatype result_datatype;
150 } MPIDIG_acc_req_t;
151 
152 typedef int (*MPIDIG_req_cmpl_cb) (MPIR_Request * req);
153 
154 /* structure used for supporting asynchronous payload transfer */
155 typedef enum {
156     MPIDIG_RECV_DATATYPE,       /* use the datatype info in MPIDIG_req_t */
157     MPIDIG_RECV_CONTIG,         /* set and use the contig recv-buffer info */
158     MPIDIG_RECV_IOV             /* set and use the iov recv-buffer info */
159 } MPIDIG_recv_type;
160 
161 typedef struct MPIDIG_req_async {
162     MPIDIG_recv_type recv_type;
163     MPI_Aint in_data_sz;
164     MPI_Aint offset;
165     struct iovec *iov_ptr;      /* used with MPIDIG_RECV_IOV */
166     int iov_num;                /* used with MPIDIG_RECV_IOV */
167     struct iovec iov_one;       /* used with MPIDIG_RECV_CONTIG */
168 } MPIDIG_rreq_async_t;
169 
170 typedef struct MPIDIG_sreq_async {
171     MPI_Datatype datatype;
172     MPI_Aint data_sz_left;
173     MPI_Aint offset;
174     int seg_issued;
175     int seg_completed;
176 } MPIDIG_sreq_async_t;
177 
178 typedef struct MPIDIG_req_ext_t {
179     union {
180         MPIDIG_sreq_t sreq;
181         MPIDIG_rreq_t rreq;
182         MPIDIG_put_req_t preq;
183         MPIDIG_get_req_t greq;
184         MPIDIG_cswap_req_t creq;
185         MPIDIG_acc_req_t areq;
186     };
187 
188     MPIDIG_rreq_async_t recv_async;
189     MPIDIG_sreq_async_t send_async;
190     struct iovec *iov;
191     MPIDIG_req_cmpl_cb target_cmpl_cb;
192     uint64_t seq_no;
193     MPIR_Request *request;
194     uint64_t status;
195     struct MPIDIG_req_ext_t *next, *prev;
196 
197 } MPIDIG_req_ext_t;
198 
199 typedef struct MPIDIG_req_t {
200     union {
201     MPIDI_NM_REQUEST_AM_DECL} netmod_am;
202 #ifndef MPIDI_CH4_DIRECT_NETMOD
203     struct {
204     MPIDI_SHM_REQUEST_AM_DECL} shm_am;
205 #endif
206     MPIDIG_req_ext_t *req;
207     void *buffer;
208     MPI_Aint count;
209     int rank;
210     int tag;
211     MPIR_Context_id_t context_id;
212     MPI_Datatype datatype;
213 } MPIDIG_req_t;
214 
215 /* Structure to capture arguments for pt2pt persistent communications */
216 typedef struct MPIDI_prequest {
217     MPIDI_ptype p_type;         /* persistent request type */
218     void *buffer;
219     MPI_Aint count;
220     int rank;
221     int tag;
222     MPIR_Context_id_t context_id;
223     MPI_Datatype datatype;
224 } MPIDI_prequest_t;
225 
226 typedef struct {
227 #ifndef MPIDI_CH4_DIRECT_NETMOD
228     int is_local;
229     /* Anysource handling. Netmod and shm specific requests are cross
230      * referenced. This must be present all of the time to avoid lots of extra
231      * ifdefs in the code. */
232     struct MPIR_Request *anysource_partner_request;
233 #endif
234 
235     union {
236         /* The first fields are used by the MPIDIG apis */
237         MPIDIG_req_t am;
238 
239         /* Used by pt2pt persistent communication */
240         MPIDI_prequest_t preq;
241 
242         /* Used by the netmod direct apis */
243         union {
244         MPIDI_NM_REQUEST_DECL} netmod;
245 
246 #ifndef MPIDI_CH4_DIRECT_NETMOD
247         union {
248         MPIDI_SHM_REQUEST_DECL} shm;
249 #endif
250 
251 #ifdef MPIDI_CH4_USE_WORK_QUEUES
252         MPIDI_workq_elemt_t command;
253 #endif
254     } ch4;
255 } MPIDI_Devreq_t;
256 #define MPIDI_REQUEST_HDR_SIZE              offsetof(struct MPIR_Request, dev.ch4.netmod)
257 #define MPIDI_REQUEST(req,field)       (((req)->dev).field)
258 #define MPIDIG_REQUEST(req,field)       (((req)->dev.ch4.am).field)
259 #define MPIDI_PREQUEST(req,field)       (((req)->dev.ch4.preq).field)
260 
261 #ifdef MPIDI_CH4_USE_WORK_QUEUES
262 /* `(r)->dev.ch4.am.req` might not be allocated right after SHM_mpi_recv when
263  * the operations are enqueued with the handoff model. */
264 #define MPIDIG_REQUEST_IN_PROGRESS(r)   ((r)->dev.ch4.am.req && ((r)->dev.ch4.am.req->status & MPIDIG_REQ_IN_PROGRESS))
265 #else
266 #define MPIDIG_REQUEST_IN_PROGRESS(r)   ((r)->dev.ch4.am.req->status & MPIDIG_REQ_IN_PROGRESS)
267 #endif /* #ifdef MPIDI_CH4_USE_WORK_QUEUES */
268 
269 #ifndef MPIDI_CH4_DIRECT_NETMOD
270 #define MPIDI_REQUEST_ANYSOURCE_PARTNER(req)  (((req)->dev).anysource_partner_request)
271 #define MPIDI_REQUEST_SET_LOCAL(req, is_local_, partner_) \
272     do { \
273         (req)->dev.is_local = is_local_; \
274         (req)->dev.anysource_partner_request = partner_; \
275     } while (0)
276 #else
277 #define MPIDI_REQUEST_ANYSOURCE_PARTNER(req)  NULL
278 #define MPIDI_REQUEST_SET_LOCAL(req, is_local_, partner_)  do { } while (0)
279 #endif
280 
281 MPL_STATIC_INLINE_PREFIX void MPID_Request_create_hook(struct MPIR_Request *req);
282 MPL_STATIC_INLINE_PREFIX void MPID_Request_free_hook(struct MPIR_Request *req);
283 MPL_STATIC_INLINE_PREFIX void MPID_Request_destroy_hook(struct MPIR_Request *req);
284 
285 typedef struct MPIDIG_win_shared_info {
286     uint32_t disp_unit;
287     size_t size;
288     void *shm_base_addr;
289 } MPIDIG_win_shared_info_t;
290 
291 #define MPIDIG_ACCU_ORDER_RAR (1)
292 #define MPIDIG_ACCU_ORDER_RAW (1 << 1)
293 #define MPIDIG_ACCU_ORDER_WAR (1 << 2)
294 #define MPIDIG_ACCU_ORDER_WAW (1 << 3)
295 
296 typedef enum {
297     MPIDIG_ACCU_SAME_OP,
298     MPIDIG_ACCU_SAME_OP_NO_OP
299 } MPIDIG_win_info_accumulate_ops;
300 
301 #define MPIDIG_ACCU_NUM_OP (MPIR_OP_N_BUILTIN)  /* builtin reduce op + cswap */
302 
303 typedef struct MPIDIG_win_info_args_t {
304     int no_locks;
305     int same_size;
306     int same_disp_unit;
307     int accumulate_ordering;
308     int alloc_shared_noncontig;
309     MPIDIG_win_info_accumulate_ops accumulate_ops;
310 
311     /* hints to tradeoff atomicity support */
312     uint32_t which_accumulate_ops;      /* Arbitrary combination of {1<<max|1<<min|1<<sum|...}
313                                          * with bit shift defined by op index (0<=index<MPIDIG_ACCU_NUM_OP).
314                                          * any_op and none are two special values.
315                                          * any_op by default. */
316     bool accumulate_noncontig_dtype;    /* true by default. */
317     MPI_Aint accumulate_max_bytes;      /* Non-negative integer, -1 (unlimited) by default.
318                                          * TODO: can be set to win_size.*/
319     bool disable_shm_accumulate;        /* false by default. */
320     bool coll_attach;           /* false by default. Valid only for dynamic window */
321 
322     /* alloc_shm: MPICH specific hint (same in CH3).
323      * If true, MPICH will try to use shared memory routines for the window.
324      * Default is true for allocate-based windows, and false for other
325      * windows. Note that this hint can be also used in create-based windows,
326      * and it means the user window buffer is allocated over shared memory,
327      * thus RMA operation can use shm routines. */
328     int alloc_shm;
329 } MPIDIG_win_info_args_t;
330 
331 struct MPIDIG_win_lock {
332     struct MPIDIG_win_lock *next;
333     int rank;
334     uint16_t mtype;
335     uint16_t type;
336 };
337 
338 typedef struct MPIDIG_win_lock_recvd {
339     struct MPIDIG_win_lock *head;
340     struct MPIDIG_win_lock *tail;
341     int type;                   /* current lock's type */
342     unsigned count;             /* count of granted locks (not received) */
343 } MPIDIG_win_lock_recvd_t;
344 
345 typedef struct MPIDIG_win_target_sync_lock {
346     /* NOTE: use volatile to avoid compiler optimization which keeps reading
347      * register value when no dependency or function pointer is found in fully
348      * inlined code.*/
349     volatile unsigned locked;   /* locked == 0 or 1 */
350 } MPIDIG_win_target_sync_lock_t;
351 
352 typedef struct MPIDIG_win_sync_lock {
353     unsigned count;             /* count of lock epochs on the window */
354 } MPIDIG_win_sync_lock_t;
355 
356 typedef struct MPIDIG_win_sync_lockall {
357     /* NOTE: use volatile to avoid compiler optimization which keeps reading
358      * register value when no dependency or function pointer is found in fully
359      * inlined code.*/
360     volatile unsigned allLocked;        /* 0 <= allLocked < size */
361 } MPIDIG_win_sync_lockall_t;
362 
363 typedef struct MPIDIG_win_sync_pscw {
364     struct MPIR_Group *group;
365     /* NOTE: use volatile to avoid compiler optimization which keeps reading
366      * register value when no dependency or function pointer is found in fully
367      * inlined code.*/
368     volatile unsigned count;
369 } MPIDIG_win_sync_pscw_t;
370 
371 typedef struct MPIDIG_win_target_sync {
372     int access_epoch_type;      /* NONE, LOCK. */
373     MPIDIG_win_target_sync_lock_t lock;
374     uint32_t assert_mode;       /* bit-vector OR of zero or more of the following integer constant:
375                                  * MPI_MODE_NOCHECK, MPI_MODE_NOSTORE, MPI_MODE_NOPUT, MPI_MODE_NOPRECEDE, MPI_MODE_NOSUCCEED. */
376 } MPIDIG_win_target_sync_t;
377 
378 typedef struct MPIDIG_win_sync {
379     int access_epoch_type;      /* NONE, FENCE, LOCKALL, START,
380                                  * LOCK (refer to target_sync). */
381     int exposure_epoch_type;    /* NONE, FENCE, POST. */
382     uint32_t assert_mode;       /* bit-vector OR of zero or more of the following integer constant:
383                                  * MPI_MODE_NOCHECK, MPI_MODE_NOSTORE, MPI_MODE_NOPUT, MPI_MODE_NOPRECEDE, MPI_MODE_NOSUCCEED. */
384 
385     /* access epochs */
386     /* TODO: Can we put access epochs in union,
387      * since no concurrent epochs is allowed ? */
388     MPIDIG_win_sync_pscw_t sc;
389     MPIDIG_win_sync_lockall_t lockall;
390     MPIDIG_win_sync_lock_t lock;
391 
392     /* exposure epochs */
393     MPIDIG_win_sync_pscw_t pw;
394     MPIDIG_win_lock_recvd_t lock_recvd;
395 } MPIDIG_win_sync_t;
396 
397 typedef struct MPIDIG_win_target {
398     MPIR_cc_t local_cmpl_cnts;  /* increase at OP issuing, decrease at local completion */
399     MPIR_cc_t remote_cmpl_cnts; /* increase at OP issuing, decrease at remote completion */
400     MPIR_cc_t remote_acc_cmpl_cnts;     /* for acc only, increase at OP issuing, decrease at remote completion */
401     MPIDIG_win_target_sync_t sync;
402     int rank;
403     UT_hash_handle hash_handle;
404 } MPIDIG_win_target_t;
405 
406 typedef struct MPIDIG_win_t {
407     uint64_t win_id;
408     void *mmap_addr;
409     int64_t mmap_sz;
410 
411     /* per-window OP completion for fence */
412     MPIR_cc_t local_cmpl_cnts;  /* increase at OP issuing, decrease at local completion */
413     MPIR_cc_t remote_cmpl_cnts; /* increase at OP issuing, decrease at remote completion */
414     MPIR_cc_t remote_acc_cmpl_cnts;     /* for acc only, increase at OP issuing, decrease at remote completion */
415 
416     MPIDIG_win_sync_t sync;
417     MPIDIG_win_info_args_t info_args;
418     MPIDIG_win_shared_info_t *shared_table;
419 
420     /* per-target structure for sync and OP completion. */
421     MPIDIG_win_target_t *targets;
422 } MPIDIG_win_t;
423 
424 typedef enum {
425     MPIDI_WINATTR_DIRECT_INTRA_COMM = 1,
426     MPIDI_WINATTR_SHM_ALLOCATED = 2,    /* shm optimized flag (0 or 1), set at shmmod win initialization time.
427                                          * Equal to 1 if the window has a shared memory region associated with it
428                                          * and the shmmod supports load/store based RMA operations over the window
429                                          * (e.g., may rely on support of interprocess mutex). */
430     MPIDI_WINATTR_ACCU_NO_SHM = 4,      /* shortcut of disable_shm_accumulate in MPIDIG_win_info_args_t. */
431     MPIDI_WINATTR_ACCU_SAME_OP_NO_OP = 8,
432     MPIDI_WINATTR_NM_REACHABLE = 16,    /* whether a netmod may reach the window. Set by netmod at win init.
433                                          * Each netmod decides the definition of "reachable" at win_init based on
434                                          * its internal optimization. */
435     MPIDI_WINATTR_NM_DYNAMIC_MR = 32,   /* whether the memory region is registered dynamically. Valid only for
436                                          * dynamic window. Set by netmod. */
437     MPIDI_WINATTR_LAST_BIT
438 } MPIDI_winattr_bit_t;
439 
440 typedef unsigned MPIDI_winattr_t;       /* bit-vector of zero or multiple integer attributes defined in MPIDI_winattr_bit_t. */
441 
442 typedef struct {
443     MPIDI_winattr_t winattr;    /* attributes for performance optimization at fast path. */
444     MPIDIG_win_t am;
445     union {
446     MPIDI_NM_WIN_DECL} netmod;
447 #ifndef MPIDI_CH4_DIRECT_NETMOD
448     struct {
449         /* multiple shmmods may co-exist. */
450     MPIDI_SHM_WIN_DECL} shm;
451 #endif
452 } MPIDI_Devwin_t;
453 
454 #define MPIDIG_WIN(win,field)        (((win)->dev.am).field)
455 #define MPIDI_WIN(win,field)         ((win)->dev).field
456 
457 typedef unsigned MPIDI_locality_t;
458 
459 typedef struct MPIDIG_comm_t {
460     MPIDIG_rreq_t *posted_list;
461     MPIDIG_rreq_t *unexp_list;
462     uint32_t window_instance;
463 #ifdef HAVE_DEBUGGER_SUPPORT
464     MPIDIG_rreq_t **posted_head_ptr;
465     MPIDIG_rreq_t **unexp_head_ptr;
466 #endif
467 } MPIDIG_comm_t;
468 
469 #define MPIDI_CALC_STRIDE(rank, stride, blocksize, offset) \
470     ((rank) / (blocksize) * ((stride) - (blocksize)) + (rank) + (offset))
471 
472 #define MPIDI_CALC_STRIDE_SIMPLE(rank, stride, offset) \
473     ((rank) * (stride) + (offset))
474 
475 typedef enum {
476     MPIDI_RANK_MAP_DIRECT,
477     MPIDI_RANK_MAP_DIRECT_INTRA,
478     MPIDI_RANK_MAP_OFFSET,
479     MPIDI_RANK_MAP_OFFSET_INTRA,
480     MPIDI_RANK_MAP_STRIDE,
481     MPIDI_RANK_MAP_STRIDE_INTRA,
482     MPIDI_RANK_MAP_STRIDE_BLOCK,
483     MPIDI_RANK_MAP_STRIDE_BLOCK_INTRA,
484     MPIDI_RANK_MAP_LUT,
485     MPIDI_RANK_MAP_LUT_INTRA,
486     MPIDI_RANK_MAP_MLUT,
487     MPIDI_RANK_MAP_NONE
488 } MPIDI_rank_map_mode;
489 
490 typedef int MPIDI_lpid_t;
491 typedef struct {
492     int avtid;
493     int lpid;
494 } MPIDI_gpid_t;
495 
496 typedef struct {
497     MPIR_OBJECT_HEADER;
498     MPIDI_lpid_t lpid[];
499 } MPIDI_rank_map_lut_t;
500 
501 typedef struct {
502     MPIR_OBJECT_HEADER;
503     MPIDI_gpid_t gpid[];
504 } MPIDI_rank_map_mlut_t;
505 
506 typedef struct {
507     MPIDI_rank_map_mode mode;
508     int avtid;
509     int size;
510 
511     union {
512         int offset;
513         struct {
514             int offset;
515             int stride;
516             int blocksize;
517         } stride;
518     } reg;
519 
520     union {
521         struct {
522             MPIDI_rank_map_lut_t *t;
523             MPIDI_lpid_t *lpid;
524         } lut;
525         struct {
526             MPIDI_rank_map_mlut_t *t;
527             MPIDI_gpid_t *gpid;
528         } mlut;
529     } irreg;
530 } MPIDI_rank_map_t;
531 
532 typedef struct MPIDI_Devcomm_t {
533     struct {
534         /* The first fields are used by the AM(MPIDIG) apis */
535         MPIDIG_comm_t am;
536 
537         /* Used by the netmod direct apis */
538         union {
539         MPIDI_NM_COMM_DECL} netmod;
540 
541 #ifndef MPIDI_CH4_DIRECT_NETMOD
542         union {
543         MPIDI_SHM_COMM_DECL} shm;
544 #endif
545 
546         MPIDI_rank_map_t map;
547         MPIDI_rank_map_t local_map;
548         void *csel_comm;        /* collective selection handle */
549     } ch4;
550 } MPIDI_Devcomm_t;
551 #define MPIDIG_COMM(comm,field) ((comm)->dev.ch4.am).field
552 #define MPIDI_COMM(comm,field) ((comm)->dev.ch4).field
553 
554 typedef struct {
555     union {
556     MPIDI_NM_OP_DECL} netmod;
557 } MPIDI_Devop_t;
558 
559 #define MPID_DEV_REQUEST_DECL    MPIDI_Devreq_t  dev;
560 #define MPID_DEV_WIN_DECL        MPIDI_Devwin_t  dev;
561 #define MPID_DEV_COMM_DECL       MPIDI_Devcomm_t dev;
562 #define MPID_DEV_OP_DECL         MPIDI_Devop_t   dev;
563 
564 typedef struct MPIDI_av_entry {
565     union {
566     MPIDI_NM_ADDR_DECL} netmod;
567 #ifdef MPIDI_BUILD_CH4_LOCALITY_INFO
568     MPIDI_locality_t is_local;
569 #endif
570 } MPIDI_av_entry_t;
571 
572 typedef struct {
573     MPIR_OBJECT_HEADER;
574     int size;
575     MPIDI_av_entry_t table[];
576 } MPIDI_av_table_t;
577 
578 extern MPIDI_av_table_t **MPIDI_av_table;
579 extern MPIDI_av_table_t *MPIDI_av_table0;
580 
581 #define MPIDIU_get_av_table(avtid) (MPIDI_av_table[(avtid)])
582 #define MPIDIU_get_av(avtid, lpid) (MPIDI_av_table[(avtid)]->table[(lpid)])
583 
584 #define MPIDIU_get_node_map(avtid)   (MPIDI_global.node_map[(avtid)])
585 
586 #define HAVE_DEV_COMM_HOOK
587 
588 /*
589  * operation for (avtid, lpid) to/from "lupid"
590  * 1 bit is reserved for "new_avt_mark". It will be cleared before accessing
591  * the avtid and lpid. Therefore, the avtid mask does have that bit set to 0
592  */
593 #define MPIDIU_AVTID_BITS                    (7)
594 #define MPIDIU_LPID_BITS                     (8 * sizeof(int) - (MPIDIU_AVTID_BITS + 1))
595 #define MPIDIU_LPID_MASK                     (0xFFFFFFFFU >> (MPIDIU_AVTID_BITS + 1))
596 #define MPIDIU_AVTID_MASK                    (~MPIDIU_LPID_MASK)
597 #define MPIDIU_NEW_AVT_MARK                  (0x80000000U)
598 #define MPIDIU_LUPID_CREATE(avtid, lpid)      (((avtid) << MPIDIU_LPID_BITS) | (lpid))
599 #define MPIDIU_LUPID_GET_AVTID(lupid)          ((((lupid) & MPIDIU_AVTID_MASK) >> MPIDIU_LPID_BITS))
600 #define MPIDIU_LUPID_GET_LPID(lupid)           (((lupid) & MPIDIU_LPID_MASK))
601 #define MPIDIU_LUPID_SET_NEW_AVT_MARK(lupid)   ((lupid) |= MPIDIU_NEW_AVT_MARK)
602 #define MPIDIU_LUPID_CLEAR_NEW_AVT_MARK(lupid) ((lupid) &= (~MPIDIU_NEW_AVT_MARK))
603 #define MPIDIU_LUPID_IS_NEW_AVT(lupid)         ((lupid) & MPIDIU_NEW_AVT_MARK)
604 
605 #define MPIDI_DYNPROC_MASK                 (0x80000000U)
606 
607 #define MPID_INTERCOMM_NO_DYNPROC(comm) \
608     (MPIDI_COMM((comm),map).avtid == 0 && MPIDI_COMM((comm),local_map).avtid == 0)
609 
610 int MPIDI_check_for_failed_procs(void);
611 
612 #ifdef HAVE_SIGNAL
613 void MPIDI_sigusr1_handler(int sig);
614 #endif
615 
616 #include "mpidu_pre.h"
617 
618 #endif /* MPIDPRE_H_INCLUDED */
619