1 /*
2 * Copyright (c) 2016-2018 Intel Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #ifndef _OFI_SHM_H_
34 #define _OFI_SHM_H_
35
36 #include "config.h"
37
38 #include <stdint.h>
39 #include <stddef.h>
40
41 #include <ofi_atom.h>
42 #include <ofi_proto.h>
43 #include <ofi_mem.h>
44 #include <ofi_rbuf.h>
45
46 #include <rdma/providers/fi_prov.h>
47
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51
52
53 #define SMR_VERSION 1
54
55 #ifdef HAVE_ATOMICS
56 #define SMR_FLAG_ATOMIC (1 << 0)
57 #else
58 #define SMR_FLAG_ATOMIC (0 << 0)
59 #endif
60
61 #if ENABLE_DEBUG
62 #define SMR_FLAG_DEBUG (1 << 1)
63 #else
64 #define SMR_FLAG_DEBUG (0 << 1)
65 #endif
66
67
68 #define SMR_CMD_SIZE 128 /* align with 64-byte cache line */
69
70 /* SMR op_src: Specifies data source location */
71 enum {
72 smr_src_inline, /* command data */
73 smr_src_inject, /* inject buffers */
74 smr_src_iov, /* reference iovec via CMA */
75 smr_src_mmap, /* mmap-based fallback protocol */
76 smr_src_sar, /* segmentation fallback protocol */
77 };
78
79 #define SMR_REMOTE_CQ_DATA (1 << 0)
80 #define SMR_RMA_REQ (1 << 1)
81 #define SMR_TX_COMPLETION (1 << 2)
82 #define SMR_RX_COMPLETION (1 << 3)
83 #define SMR_MULTI_RECV (1 << 4)
84
85 /* CMA capability */
86 enum {
87 SMR_CMA_CAP_NA,
88 SMR_CMA_CAP_ON,
89 SMR_CMA_CAP_OFF,
90 };
91
92 /*
93 * Unique smr_op_hdr for smr message protocol:
94 * addr - local fi_addr of peer sending msg (for shm lookup)
95 * op - type of op (ex. ofi_op_msg, defined in ofi_proto.h)
96 * op_src - msg src (ex. smr_src_inline, defined above)
97 * op_flags - operation flags (ex. SMR_REMOTE_CQ_DATA, defined above)
98 * src_data - src of additional op data (inject offset / resp offset)
99 * data - remote CQ data
100 */
101 struct smr_msg_hdr {
102 uint64_t msg_id;
103 fi_addr_t addr;
104 uint32_t op;
105 uint16_t op_src;
106 uint16_t op_flags;
107
108 uint64_t size;
109 uint64_t src_data;
110 uint64_t data;
111 union {
112 uint64_t tag;
113 struct {
114 uint8_t datatype;
115 uint8_t atomic_op;
116 };
117 };
118 };
119
120 #define SMR_MSG_DATA_LEN (SMR_CMD_SIZE - sizeof(struct smr_msg_hdr))
121 #define SMR_COMP_DATA_LEN (SMR_MSG_DATA_LEN / 2)
122 union smr_cmd_data {
123 uint8_t msg[SMR_MSG_DATA_LEN];
124 struct {
125 size_t iov_count;
126 struct iovec iov[(SMR_MSG_DATA_LEN - sizeof(size_t)) /
127 sizeof(struct iovec)];
128 };
129 struct {
130 uint8_t buf[SMR_COMP_DATA_LEN];
131 uint8_t comp[SMR_COMP_DATA_LEN];
132 };
133 struct {
134 uint64_t sar;
135 };
136 };
137
138 struct smr_cmd_msg {
139 struct smr_msg_hdr hdr;
140 union smr_cmd_data data;
141 };
142
143 #define SMR_RMA_DATA_LEN (128 - sizeof(uint64_t))
144 struct smr_cmd_rma {
145 uint64_t rma_count;
146 union {
147 struct fi_rma_iov rma_iov[SMR_RMA_DATA_LEN /
148 sizeof(struct fi_rma_iov)];
149 struct fi_rma_ioc rma_ioc[SMR_RMA_DATA_LEN /
150 sizeof(struct fi_rma_ioc)];
151 };
152 };
153
154 struct smr_cmd {
155 union {
156 struct smr_cmd_msg msg;
157 struct smr_cmd_rma rma;
158 };
159 };
160
161 #define SMR_INJECT_SIZE 4096
162 #define SMR_COMP_INJECT_SIZE (SMR_INJECT_SIZE / 2)
163 #define SMR_SAR_SIZE 16384
164
165 struct smr_addr {
166 char name[NAME_MAX];
167 fi_addr_t addr;
168 };
169
170 struct smr_peer_data {
171 struct smr_addr addr;
172 uint64_t sar_status;
173 };
174
175 extern struct dlist_entry ep_name_list;
176 extern pthread_mutex_t ep_list_lock;
177
178 struct smr_region;
179
180 struct smr_ep_name {
181 char name[NAME_MAX];
182 struct smr_region *region;
183 struct dlist_entry entry;
184 };
185
186 struct smr_peer {
187 struct smr_addr peer;
188 struct smr_region *region;
189 };
190
191 #define SMR_MAX_PEERS 256
192
193 struct smr_map {
194 fastlock_t lock;
195 struct smr_peer peers[SMR_MAX_PEERS];
196 };
197
198 struct smr_region {
199 uint8_t version;
200 uint8_t resv;
201 uint16_t flags;
202 int pid;
203 uint8_t cma_cap;
204 void *base_addr;
205 fastlock_t lock; /* lock for shm access
206 Must hold smr->lock before tx/rx cq locks
207 in order to progress or post recv */
208 struct smr_map *map;
209
210 size_t total_size;
211 size_t cmd_cnt; /* Doubles as a tracker for number of cmds AND
212 number of inject buffers available for use,
213 to ensure 1:1 ratio of cmds to inject bufs.
214 Might not always be paired consistently with
215 cmd alloc/free depending on protocol
216 (Ex. unexpected messages, RMA requests) */
217 size_t sar_cnt;
218
219 /* offsets from start of smr_region */
220 size_t cmd_queue_offset;
221 size_t resp_queue_offset;
222 size_t inject_pool_offset;
223 size_t sar_pool_offset;
224 size_t peer_data_offset;
225 size_t name_offset;
226 };
227
228 struct smr_resp {
229 uint64_t msg_id;
230 uint64_t status;
231 };
232
233 struct smr_inject_buf {
234 union {
235 uint8_t data[SMR_INJECT_SIZE];
236 struct {
237 uint8_t buf[SMR_COMP_INJECT_SIZE];
238 uint8_t comp[SMR_COMP_INJECT_SIZE];
239 };
240 };
241 };
242
243 enum {
244 SMR_SAR_FREE = 0, /* buffer can be used */
245 SMR_SAR_READY, /* buffer has data in it */
246 };
247
248 struct smr_sar_buf {
249 uint64_t status;
250 uint8_t buf[SMR_SAR_SIZE];
251 };
252
253 struct smr_sar_msg {
254 struct smr_sar_buf sar[2];
255 };
256
257 OFI_DECLARE_CIRQUE(struct smr_cmd, smr_cmd_queue);
258 OFI_DECLARE_CIRQUE(struct smr_resp, smr_resp_queue);
259 DECLARE_SMR_FREESTACK(struct smr_inject_buf, smr_inject_pool);
260 DECLARE_SMR_FREESTACK(struct smr_sar_msg, smr_sar_pool);
261
smr_peer_region(struct smr_region * smr,int i)262 static inline struct smr_region *smr_peer_region(struct smr_region *smr, int i)
263 {
264 return smr->map->peers[i].region;
265 }
smr_cmd_queue(struct smr_region * smr)266 static inline struct smr_cmd_queue *smr_cmd_queue(struct smr_region *smr)
267 {
268 return (struct smr_cmd_queue *) ((char *) smr + smr->cmd_queue_offset);
269 }
smr_resp_queue(struct smr_region * smr)270 static inline struct smr_resp_queue *smr_resp_queue(struct smr_region *smr)
271 {
272 return (struct smr_resp_queue *) ((char *) smr + smr->resp_queue_offset);
273 }
smr_inject_pool(struct smr_region * smr)274 static inline struct smr_inject_pool *smr_inject_pool(struct smr_region *smr)
275 {
276 return (struct smr_inject_pool *) ((char *) smr + smr->inject_pool_offset);
277 }
smr_peer_data(struct smr_region * smr)278 static inline struct smr_peer_data *smr_peer_data(struct smr_region *smr)
279 {
280 return (struct smr_peer_data *) ((char *) smr + smr->peer_data_offset);
281 }
smr_sar_pool(struct smr_region * smr)282 static inline struct smr_sar_pool *smr_sar_pool(struct smr_region *smr)
283 {
284 return (struct smr_sar_pool *) ((char *) smr + smr->sar_pool_offset);
285 }
smr_name(struct smr_region * smr)286 static inline const char *smr_name(struct smr_region *smr)
287 {
288 return (const char *) smr + smr->name_offset;
289 }
290
smr_set_map(struct smr_region * smr,struct smr_map * map)291 static inline void smr_set_map(struct smr_region *smr, struct smr_map *map)
292 {
293 smr->map = map;
294 }
295
296 struct smr_attr {
297 const char *name;
298 size_t rx_count;
299 size_t tx_count;
300 };
301
302 size_t smr_calculate_size_offsets(size_t tx_count, size_t rx_count,
303 size_t *cmd_offset, size_t *resp_offset,
304 size_t *inject_offset, size_t *sar_offset,
305 size_t *peer_offset, size_t *name_offset);
306 void smr_cma_check(struct smr_region *region, struct smr_region *peer_region);
307 void smr_cleanup(void);
308 int smr_map_create(const struct fi_provider *prov, int peer_count,
309 struct smr_map **map);
310 int smr_map_to_region(const struct fi_provider *prov,
311 struct smr_peer *peer_buf);
312 void smr_map_to_endpoint(struct smr_region *region, int index);
313 void smr_unmap_from_endpoint(struct smr_region *region, int index);
314 void smr_exchange_all_peers(struct smr_region *region);
315 int smr_map_add(const struct fi_provider *prov,
316 struct smr_map *map, const char *name, int id);
317 void smr_map_del(struct smr_map *map, int id);
318 void smr_map_free(struct smr_map *map);
319
320 struct smr_region *smr_map_get(struct smr_map *map, int id);
321
322 int smr_create(const struct fi_provider *prov, struct smr_map *map,
323 const struct smr_attr *attr, struct smr_region **smr);
324 void smr_free(struct smr_region *smr);
325
326 #ifdef __cplusplus
327 }
328 #endif
329
330 #endif /* _OFI_SHM_H_ */
331