1 /*
2  * Copyright (c) 2016-2018 Intel Corporation. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #ifndef _OFI_SHM_H_
34 #define _OFI_SHM_H_
35 
36 #include "config.h"
37 
38 #include <stdint.h>
39 #include <stddef.h>
40 
41 #include <ofi_atom.h>
42 #include <ofi_proto.h>
43 #include <ofi_mem.h>
44 #include <ofi_rbuf.h>
45 
46 #include <rdma/providers/fi_prov.h>
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 
53 #define SMR_VERSION	1
54 
55 #ifdef HAVE_ATOMICS
56 #define SMR_FLAG_ATOMIC	(1 << 0)
57 #else
58 #define SMR_FLAG_ATOMIC	(0 << 0)
59 #endif
60 
61 #if ENABLE_DEBUG
62 #define SMR_FLAG_DEBUG	(1 << 1)
63 #else
64 #define SMR_FLAG_DEBUG	(0 << 1)
65 #endif
66 
67 
68 #define SMR_CMD_SIZE		128	/* align with 64-byte cache line */
69 
70 /* SMR op_src: Specifies data source location */
71 enum {
72 	smr_src_inline,	/* command data */
73 	smr_src_inject,	/* inject buffers */
74 	smr_src_iov,	/* reference iovec via CMA */
75 	smr_src_mmap,	/* mmap-based fallback protocol */
76 	smr_src_sar,	/* segmentation fallback protocol */
77 };
78 
79 #define SMR_REMOTE_CQ_DATA	(1 << 0)
80 #define SMR_RMA_REQ		(1 << 1)
81 #define SMR_TX_COMPLETION	(1 << 2)
82 #define SMR_RX_COMPLETION	(1 << 3)
83 #define SMR_MULTI_RECV		(1 << 4)
84 
85 /* CMA capability */
86 enum {
87 	SMR_CMA_CAP_NA,
88 	SMR_CMA_CAP_ON,
89 	SMR_CMA_CAP_OFF,
90 };
91 
92 /*
93  * Unique smr_op_hdr for smr message protocol:
94  * 	addr - local fi_addr of peer sending msg (for shm lookup)
95  * 	op - type of op (ex. ofi_op_msg, defined in ofi_proto.h)
96  * 	op_src - msg src (ex. smr_src_inline, defined above)
97  * 	op_flags - operation flags (ex. SMR_REMOTE_CQ_DATA, defined above)
98  * 	src_data - src of additional op data (inject offset / resp offset)
99  * 	data - remote CQ data
100  */
101 struct smr_msg_hdr {
102 	uint64_t		msg_id;
103 	fi_addr_t		addr;
104 	uint32_t		op;
105 	uint16_t		op_src;
106 	uint16_t		op_flags;
107 
108 	uint64_t		size;
109 	uint64_t		src_data;
110 	uint64_t		data;
111 	union {
112 		uint64_t	tag;
113 		struct {
114 			uint8_t	datatype;
115 			uint8_t	atomic_op;
116 		};
117 	};
118 };
119 
120 #define SMR_MSG_DATA_LEN	(SMR_CMD_SIZE - sizeof(struct smr_msg_hdr))
121 #define SMR_COMP_DATA_LEN	(SMR_MSG_DATA_LEN / 2)
122 union smr_cmd_data {
123 	uint8_t			msg[SMR_MSG_DATA_LEN];
124 	struct {
125 		size_t		iov_count;
126 		struct iovec	iov[(SMR_MSG_DATA_LEN - sizeof(size_t)) /
127 				    sizeof(struct iovec)];
128 	};
129 	struct {
130 		uint8_t		buf[SMR_COMP_DATA_LEN];
131 		uint8_t		comp[SMR_COMP_DATA_LEN];
132 	};
133 	struct {
134 		uint64_t	sar;
135 	};
136 };
137 
138 struct smr_cmd_msg {
139 	struct smr_msg_hdr	hdr;
140 	union smr_cmd_data	data;
141 };
142 
143 #define SMR_RMA_DATA_LEN	(128 - sizeof(uint64_t))
144 struct smr_cmd_rma {
145 	uint64_t		rma_count;
146 	union {
147 		struct fi_rma_iov	rma_iov[SMR_RMA_DATA_LEN /
148 						sizeof(struct fi_rma_iov)];
149 		struct fi_rma_ioc	rma_ioc[SMR_RMA_DATA_LEN /
150 						sizeof(struct fi_rma_ioc)];
151 	};
152 };
153 
154 struct smr_cmd {
155 	union {
156 		struct smr_cmd_msg	msg;
157 		struct smr_cmd_rma	rma;
158 	};
159 };
160 
161 #define SMR_INJECT_SIZE		4096
162 #define SMR_COMP_INJECT_SIZE	(SMR_INJECT_SIZE / 2)
163 #define SMR_SAR_SIZE		16384
164 
165 struct smr_addr {
166 	char		name[NAME_MAX];
167 	fi_addr_t	addr;
168 };
169 
170 struct smr_peer_data {
171 	struct smr_addr		addr;
172 	uint64_t		sar_status;
173 };
174 
175 extern struct dlist_entry ep_name_list;
176 extern pthread_mutex_t ep_list_lock;
177 
178 struct smr_region;
179 
180 struct smr_ep_name {
181 	char name[NAME_MAX];
182 	struct smr_region *region;
183 	struct dlist_entry entry;
184 };
185 
186 struct smr_peer {
187 	struct smr_addr		peer;
188 	struct smr_region	*region;
189 };
190 
191 #define SMR_MAX_PEERS	256
192 
193 struct smr_map {
194 	fastlock_t	lock;
195 	struct smr_peer	peers[SMR_MAX_PEERS];
196 };
197 
198 struct smr_region {
199 	uint8_t		version;
200 	uint8_t		resv;
201 	uint16_t	flags;
202 	int		pid;
203 	uint8_t		cma_cap;
204 	void		*base_addr;
205 	fastlock_t	lock; /* lock for shm access
206 				 Must hold smr->lock before tx/rx cq locks
207 				 in order to progress or post recv */
208 	struct smr_map	*map;
209 
210 	size_t		total_size;
211 	size_t		cmd_cnt; /* Doubles as a tracker for number of cmds AND
212 				    number of inject buffers available for use,
213 				    to ensure 1:1 ratio of cmds to inject bufs.
214 				    Might not always be paired consistently with
215 				    cmd alloc/free depending on protocol
216 				    (Ex. unexpected messages, RMA requests) */
217 	size_t		sar_cnt;
218 
219 	/* offsets from start of smr_region */
220 	size_t		cmd_queue_offset;
221 	size_t		resp_queue_offset;
222 	size_t		inject_pool_offset;
223 	size_t		sar_pool_offset;
224 	size_t		peer_data_offset;
225 	size_t		name_offset;
226 };
227 
228 struct smr_resp {
229 	uint64_t	msg_id;
230 	uint64_t	status;
231 };
232 
233 struct smr_inject_buf {
234 	union {
235 		uint8_t		data[SMR_INJECT_SIZE];
236 		struct {
237 			uint8_t	buf[SMR_COMP_INJECT_SIZE];
238 			uint8_t comp[SMR_COMP_INJECT_SIZE];
239 		};
240 	};
241 };
242 
243 enum {
244 	SMR_SAR_FREE = 0, /* buffer can be used */
245 	SMR_SAR_READY, /* buffer has data in it */
246 };
247 
248 struct smr_sar_buf {
249 	uint64_t	status;
250 	uint8_t		buf[SMR_SAR_SIZE];
251 };
252 
253 struct smr_sar_msg {
254 	struct smr_sar_buf	sar[2];
255 };
256 
257 OFI_DECLARE_CIRQUE(struct smr_cmd, smr_cmd_queue);
258 OFI_DECLARE_CIRQUE(struct smr_resp, smr_resp_queue);
259 DECLARE_SMR_FREESTACK(struct smr_inject_buf, smr_inject_pool);
260 DECLARE_SMR_FREESTACK(struct smr_sar_msg, smr_sar_pool);
261 
smr_peer_region(struct smr_region * smr,int i)262 static inline struct smr_region *smr_peer_region(struct smr_region *smr, int i)
263 {
264 	return smr->map->peers[i].region;
265 }
smr_cmd_queue(struct smr_region * smr)266 static inline struct smr_cmd_queue *smr_cmd_queue(struct smr_region *smr)
267 {
268 	return (struct smr_cmd_queue *) ((char *) smr + smr->cmd_queue_offset);
269 }
smr_resp_queue(struct smr_region * smr)270 static inline struct smr_resp_queue *smr_resp_queue(struct smr_region *smr)
271 {
272 	return (struct smr_resp_queue *) ((char *) smr + smr->resp_queue_offset);
273 }
smr_inject_pool(struct smr_region * smr)274 static inline struct smr_inject_pool *smr_inject_pool(struct smr_region *smr)
275 {
276 	return (struct smr_inject_pool *) ((char *) smr + smr->inject_pool_offset);
277 }
smr_peer_data(struct smr_region * smr)278 static inline struct smr_peer_data *smr_peer_data(struct smr_region *smr)
279 {
280 	return (struct smr_peer_data *) ((char *) smr + smr->peer_data_offset);
281 }
smr_sar_pool(struct smr_region * smr)282 static inline struct smr_sar_pool *smr_sar_pool(struct smr_region *smr)
283 {
284 	return (struct smr_sar_pool *) ((char *) smr + smr->sar_pool_offset);
285 }
smr_name(struct smr_region * smr)286 static inline const char *smr_name(struct smr_region *smr)
287 {
288 	return (const char *) smr + smr->name_offset;
289 }
290 
smr_set_map(struct smr_region * smr,struct smr_map * map)291 static inline void smr_set_map(struct smr_region *smr, struct smr_map *map)
292 {
293 	smr->map = map;
294 }
295 
296 struct smr_attr {
297 	const char	*name;
298 	size_t		rx_count;
299 	size_t		tx_count;
300 };
301 
302 size_t smr_calculate_size_offsets(size_t tx_count, size_t rx_count,
303 				  size_t *cmd_offset, size_t *resp_offset,
304 				  size_t *inject_offset, size_t *sar_offset,
305 				  size_t *peer_offset, size_t *name_offset);
306 void	smr_cma_check(struct smr_region *region, struct smr_region *peer_region);
307 void	smr_cleanup(void);
308 int	smr_map_create(const struct fi_provider *prov, int peer_count,
309 		       struct smr_map **map);
310 int	smr_map_to_region(const struct fi_provider *prov,
311 			  struct smr_peer *peer_buf);
312 void	smr_map_to_endpoint(struct smr_region *region, int index);
313 void	smr_unmap_from_endpoint(struct smr_region *region, int index);
314 void	smr_exchange_all_peers(struct smr_region *region);
315 int	smr_map_add(const struct fi_provider *prov,
316 		    struct smr_map *map, const char *name, int id);
317 void	smr_map_del(struct smr_map *map, int id);
318 void	smr_map_free(struct smr_map *map);
319 
320 struct smr_region *smr_map_get(struct smr_map *map, int id);
321 
322 int	smr_create(const struct fi_provider *prov, struct smr_map *map,
323 		   const struct smr_attr *attr, struct smr_region **smr);
324 void	smr_free(struct smr_region *smr);
325 
326 #ifdef __cplusplus
327 }
328 #endif
329 
330 #endif /* _OFI_SHM_H_ */
331