1 /*
2  * Copyright (c) 2013-2018 Intel, Inc. All rights reserved
3  *
4  * Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
5  * $COPYRIGHT$
6  *
7  * Additional copyrights may follow
8  *
9  * $HEADER$
10  */
11 
12 #ifndef MTL_OFI_TYPES_H_HAS_BEEN_INCLUDED
13 #define MTL_OFI_TYPES_H_HAS_BEEN_INCLUDED
14 
15 #include "mtl_ofi.h"
16 
17 BEGIN_C_DECLS
18 
19 /**
20  * MTL Module Interface
21  */
22 
23 typedef struct mca_mtl_ofi_context_t {
24     /* Transmit and receive contexts */
25     struct fid_ep *tx_ep;
26     struct fid_ep *rx_ep;
27 
28     /* Completion queue */
29     struct fid_cq *cq;
30 
31     /* Thread locking */
32     opal_mutex_t context_lock;
33 } mca_mtl_ofi_context_t;
34 
35 typedef struct mca_mtl_ofi_module_t {
36     mca_mtl_base_module_t base;
37 
38     /** Fabric Domain handle */
39     struct fid_fabric *fabric;
40 
41     /** Access Domain handle */
42     struct fid_domain *domain;
43 
44     /** Address vector handle */
45     struct fid_av *av;
46 
47     /* Multi-threaded Application flag */
48     bool mpi_thread_multiple;
49 
50     /* Scalable Endpoint attributes */
51     struct fid_ep *sep;                 /* Endpoint object */
52     mca_mtl_ofi_context_t *ofi_ctxt;    /* OFI contexts */
53     int threshold_comm_context_id;      /* Set threshold communicator ID */
54     int *comm_to_context;               /* Map communicator ID to context */
55     int rx_ctx_bits;                    /* Bits used for RX context */
56     int total_ctxts_used;               /* Total number of contexts used */
57     int enable_sep;                     /* MCA to enable/disable SEP feature */
58     int thread_grouping;                /* MCA for thread grouping feature */
59     int num_ofi_contexts;               /* MCA for number of contexts to use */
60 
61     /** Endpoint name length */
62     size_t epnamelen;
63 
64     /** "Any source" address */
65     fi_addr_t any_addr;
66 
67     /** OFI provider name */
68     char *provider_name;
69 
70     /** Maximum inject size */
71     size_t max_inject_size;
72 
73     /** Largest message that can be sent in a single send. */
74     size_t max_msg_size;
75 
76     /** Maximum number of CQ events to read in OFI Progress */
77     int ofi_progress_event_count;
78 
79     /** Use FI_REMOTE_CQ_DATA*/
80     bool fi_cq_data;
81 
82     /** Info used to create the OFI tag **/
83     unsigned long long source_rank_tag_mask;
84     int num_bits_source_rank;
85     unsigned long long source_rank_mask;
86     unsigned long long mpi_tag_mask;
87     int num_bits_mpi_tag;
88     int num_peers;
89 
90     /** Synchronous protocol tag bits */
91     unsigned long long sync_send;
92     unsigned long long sync_send_ack;
93     unsigned long long sync_proto_mask;
94 
95     /** Optimized function Symbol Tables **/
96     struct ompi_mtl_ofi_symtable sym_table;
97 
98 } mca_mtl_ofi_module_t;
99 
100 extern mca_mtl_ofi_module_t ompi_mtl_ofi;
101 
102 typedef struct mca_mtl_ofi_component_t {
103     /** Base MTL component */
104     mca_mtl_base_component_2_0_0_t super;
105 } mca_mtl_ofi_component_t;
106 
107 typedef enum {
108     OFI_REGULAR_EP  = 0,
109     OFI_SCALABLE_EP,
110 } mca_mtl_ofi_ep_type;
111 
112 /*
113  * Define upper limit for number of events read from a CQ.
114  * Setting this to 100 as this was deemed optimal from empirical data.
115  * If one wants to read lesser number of events from the CQ, the MCA
116  * variable can be used.
117  */
118 #define MTL_OFI_MAX_PROG_EVENT_COUNT    100
119 
120 /*OFI TAG:
121  * Define 3 different OFI tag distributions:
122  * 1) Support FI_REMOTE_CQ_DATA: No need for source rank in the tag
123  * 2) ofi_tag_1: fallback when no FI_REMOTE_CQ_DATA is supported
124  * 3) ofi_tag_2: Alternative tag when no FI_REMOTE_CQ_DATA is supported
125  *    with more bits for the communicator ID.
126  * More details of the tags are in the README file (mtl_ofi_tag_mode).
127 */
128 
129 #define MTL_OFI_MINIMUM_CID_BITS        (8)
130 
131 /* Support FI_REMOTE_CQ_DATA, send the source rank in the CQ data (4 Bytes is the minimum)
132  *  01234567 01234567 01234567 012345  67  01234567 01234567 01234567 01234567
133  *                                   |    |
134  *           context_id              |prot|          message tag
135  */
136 #define MTL_OFI_PROTO_BIT_COUNT         (2)
137 #define MTL_OFI_HIGHEST_TAG_BIT         (0x8000000000000000ULL)
138 
139 #define MTL_OFI_CID_MASK_DATA           (0xFFFFFFFC00000000ULL)
140 #define MTL_OFI_CID_BIT_COUNT_DATA      (30)
141 #define MTL_OFI_TAG_MASK_DATA           (0x00000000FFFFFFFFULL)
142 #define MTL_OFI_TAG_BIT_COUNT_DATA      (32)
143 #define MTL_OFI_PROTO_MASK_DATA         (0x0000000300000000ULL)
144 #define MTL_OFI_SYNC_SEND_DATA          (0x0000000100000000ULL)
145 #define MTL_OFI_SYNC_SEND_ACK_DATA      (0x0000000200000000ULL)
146 
147 /* Send tag with CQ_DATA */
148 __opal_attribute_always_inline__ static inline uint64_t
mtl_ofi_create_send_tag_CQD(int comm_id,int tag)149 mtl_ofi_create_send_tag_CQD(int comm_id, int tag)
150 {
151     uint64_t  match_bits = comm_id;
152     match_bits = (match_bits << (MTL_OFI_TAG_BIT_COUNT_DATA
153                                 + MTL_OFI_PROTO_BIT_COUNT));
154     match_bits |= (tag & MTL_OFI_TAG_MASK_DATA);
155     return match_bits;
156 }
157 
158 /* Receive tag with CQ_DATA */
159 __opal_attribute_always_inline__ static inline void
mtl_ofi_create_recv_tag_CQD(uint64_t * match_bits,uint64_t * mask_bits,int comm_id,int tag)160 mtl_ofi_create_recv_tag_CQD(uint64_t *match_bits, uint64_t *mask_bits,
161                             int comm_id, int tag)
162 {
163     *mask_bits  = ompi_mtl_ofi.sync_proto_mask;
164     *match_bits = (uint64_t) comm_id;
165     *match_bits = (*match_bits << (MTL_OFI_PROTO_BIT_COUNT
166                                 +  MTL_OFI_TAG_BIT_COUNT_DATA));
167     if (MPI_ANY_TAG == tag) {
168         /* Special negative tags are used for collective operations.
169          * MPI_ANY_TAG should not match these special tags.
170          * See ompi/mca/coll/base/coll_tags.h
171          */
172         *mask_bits  |= (ompi_mtl_ofi.mpi_tag_mask>>1);
173     } else {
174         *match_bits |= (ompi_mtl_ofi.mpi_tag_mask & tag);
175     }
176 }
177 
178 /*
179 * ofi_tag_1: fallback when no FI_REMOTE_CQ_DATA is supported
180 *
181 *  01234567 0123 4567 01234567 012345   67   01234567 01234567 01234567 01234567
182 *               |                     |    |
183 *    Comm id    |     source          |prot|           message tag
184 */
185 
186 #define MTL_OFI_CID_BIT_COUNT_1         (12)
187 #define MTL_OFI_SOURCE_TAG_MASK_1       (0x000FFFFC00000000ULL)
188 #define MTL_OFI_SOURCE_BIT_COUNT_1      (18)
189 #define MTL_OFI_SOURCE_MASK_1           (0x000000000003FFFFULL)
190 #define MTL_OFI_TAG_MASK_1              (0x00000000FFFFFFFFULL)
191 #define MTL_OFI_TAG_BIT_COUNT_1         (32)
192 #define MTL_OFI_PROTO_MASK_1            (0x0000000300000000ULL)
193 #define MTL_OFI_SYNC_SEND_1             (0x0000000100000000ULL)
194 #define MTL_OFI_SYNC_SEND_ACK_1         (0x0000000200000000ULL)
195 
196 /*
197 * ofi_tag_2: Alternative tag when no FI_REMOTE_CQ_DATA is supported
198 *
199 *  01234567 01234567 01234567 01234567 01234567 01  23   4567 01234567 01234567
200 *                            |                    |    |
201 *                Comm id     |     source         |prot|     message tag
202 */
203 
204 #define MTL_OFI_CID_BIT_COUNT_2         (24)
205 #define MTL_OFI_SOURCE_TAG_MASK_2       (0x000000FFFFC00000ULL)
206 #define MTL_OFI_SOURCE_BIT_COUNT_2      (18)
207 #define MTL_OFI_SOURCE_MASK_2           (0x000000000003FFFFULL)
208 #define MTL_OFI_TAG_MASK_2              (0x00000000000FFFFFULL)
209 #define MTL_OFI_TAG_BIT_COUNT_2         (20)
210 #define MTL_OFI_PROTO_MASK_2            (0x0000000000300000ULL)
211 #define MTL_OFI_SYNC_SEND_2             (0x0000000000100000ULL)
212 #define MTL_OFI_SYNC_SEND_ACK_2         (0x0000000000200000ULL)
213 
214 /* Send tag */
215 __opal_attribute_always_inline__ static inline uint64_t
mtl_ofi_create_send_tag(int comm_id,int source,int tag)216 mtl_ofi_create_send_tag(int comm_id, int source, int tag)
217 {
218     uint64_t  match_bits = comm_id;
219     match_bits = (match_bits << ompi_mtl_ofi.num_bits_source_rank);
220     match_bits |= (uint64_t)(source & ompi_mtl_ofi.source_rank_mask);
221     match_bits = (match_bits << (ompi_mtl_ofi.num_bits_mpi_tag
222                                  + MTL_OFI_PROTO_BIT_COUNT));
223     match_bits |= (tag & ompi_mtl_ofi.mpi_tag_mask);
224     return match_bits;
225 }
226 
227 /* Receive tag*/
228 __opal_attribute_always_inline__ static inline void
mtl_ofi_create_recv_tag(uint64_t * match_bits,uint64_t * mask_bits,int comm_id,int source,int tag)229 mtl_ofi_create_recv_tag(uint64_t *match_bits, uint64_t *mask_bits,
230                             int comm_id, int source, int tag)
231 {
232     *mask_bits  = ompi_mtl_ofi.sync_proto_mask;
233     *match_bits = comm_id;
234     *match_bits = (*match_bits << ompi_mtl_ofi.num_bits_source_rank);
235 
236     if (MPI_ANY_SOURCE == source) {
237         *match_bits = (*match_bits << (ompi_mtl_ofi.num_bits_mpi_tag
238                                     + MTL_OFI_PROTO_BIT_COUNT));
239         *mask_bits |= ompi_mtl_ofi.source_rank_tag_mask;
240     } else {
241         *match_bits |= (uint64_t)(source & ompi_mtl_ofi.source_rank_mask);
242         *match_bits = (*match_bits << (ompi_mtl_ofi.num_bits_mpi_tag
243                                  + MTL_OFI_PROTO_BIT_COUNT));
244     }
245 
246     if (MPI_ANY_TAG == tag) {
247         /* Special negative tags are used for collective operations.
248          * MPI_ANY_TAG should not match these special tags.
249          * See ompi/mca/coll/base/coll_tags.h
250          */
251           *mask_bits  |= (ompi_mtl_ofi.mpi_tag_mask>>1);
252     } else {
253         *match_bits |= (ompi_mtl_ofi.mpi_tag_mask & tag);
254     }
255 }
256 
257 #define MTL_OFI_SET_SYNC_SEND(match_bits)          \
258         match_bits |= ompi_mtl_ofi.sync_send
259 
260 #define MTL_OFI_IS_SYNC_SEND(match_bits)           \
261     (ompi_mtl_ofi.sync_send == (ompi_mtl_ofi.sync_proto_mask & match_bits))
262 
263 #define MTL_OFI_IS_SYNC_SEND_ACK(match_bits)       \
264     (ompi_mtl_ofi.sync_send_ack == (ompi_mtl_ofi.sync_proto_mask & match_bits))
265 
266 #define MTL_OFI_GET_TAG(match_bits)                \
267     ((int)(match_bits & ompi_mtl_ofi.mpi_tag_mask))
268 
269 __opal_attribute_always_inline__ static inline int
mtl_ofi_get_source(struct fi_cq_tagged_entry * wc)270 mtl_ofi_get_source(struct fi_cq_tagged_entry *wc)
271 {
272     int src;
273     if (ompi_mtl_ofi.fi_cq_data) {
274         src = (int) wc->data;
275     }
276     else {
277         src = (int)((wc->tag >> (MTL_OFI_PROTO_BIT_COUNT +
278                     ompi_mtl_ofi.num_bits_mpi_tag)) & ompi_mtl_ofi.source_rank_mask);
279     }
280 
281     return src;
282 }
283 END_C_DECLS
284 
285 #endif /* MTL_OFI_TYPES_H_HAS_BEEN_INCLUDED */
286