1 /*
2  * Copyright (c) 2015-2018 Intel Corporation, Inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #if HAVE_CONFIG_H
34 #  include <config.h>
35 #endif /* HAVE_CONFIG_H */
36 
37 #include <sys/types.h>
38 #include <sys/statvfs.h>
39 #include <pthread.h>
40 #include <stdint.h>
41 #include <stddef.h>
42 
43 #include <rdma/fabric.h>
44 #include <rdma/fi_atomic.h>
45 #include <rdma/fi_cm.h>
46 #include <rdma/fi_domain.h>
47 #include <rdma/fi_endpoint.h>
48 #include <rdma/fi_eq.h>
49 #include <rdma/fi_errno.h>
50 #include <rdma/fi_rma.h>
51 #include <rdma/fi_tagged.h>
52 #include <rdma/fi_trigger.h>
53 #include <rdma/providers/fi_prov.h>
54 
55 #include <ofi.h>
56 #include <ofi_enosys.h>
57 #include <ofi_shm.h>
58 #include <ofi_rbuf.h>
59 #include <ofi_list.h>
60 #include <ofi_signal.h>
61 #include <ofi_util.h>
62 #include <ofi_atomic.h>
63 
64 #ifndef _SMR_H_
65 #define _SMR_H_
66 
67 struct smr_env {
68 	size_t sar_threshold;
69 };
70 
71 extern struct smr_env smr_env;
72 extern struct fi_provider smr_prov;
73 extern struct fi_info smr_info;
74 extern struct util_prov smr_util_prov;
75 
76 int smr_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
77 		void *context);
78 
79 struct smr_av {
80 	struct util_av		util_av;
81 	struct smr_map		*smr_map;
82 	size_t			used;
83 };
84 
85 int smr_domain_open(struct fid_fabric *fabric, struct fi_info *info,
86 		struct fid_domain **dom, void *context);
87 
88 int smr_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
89 		struct fid_eq **eq, void *context);
90 
91 int smr_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
92 		struct fid_av **av, void *context);
93 
94 int smr_query_atomic(struct fid_domain *domain, enum fi_datatype datatype,
95 		enum fi_op op, struct fi_atomic_attr *attr, uint64_t flags);
96 
97 #define SMR_IOV_LIMIT		4
98 
99 struct smr_rx_entry {
100 	struct dlist_entry	entry;
101 	void			*context;
102 	fi_addr_t		addr;
103 	uint64_t		tag;
104 	uint64_t		ignore;
105 	struct iovec		iov[SMR_IOV_LIMIT];
106 	uint32_t		iov_count;
107 	uint16_t		flags;
108 	uint64_t		err;
109 };
110 
111 struct smr_tx_entry {
112 	struct smr_cmd	cmd;
113 	fi_addr_t	addr;
114 	void		*context;
115 	struct iovec	iov[SMR_IOV_LIMIT];
116 	uint32_t	iov_count;
117 	size_t		bytes_done;
118 	int		next;
119 	void		*map_ptr;
120 	struct smr_ep_name *map_name;
121 };
122 
123 struct smr_sar_entry {
124 	struct dlist_entry	entry;
125 	struct smr_cmd		cmd;
126 	struct smr_rx_entry	rx_entry;
127 	size_t			bytes_done;
128 	int			next;
129 	struct iovec		iov[SMR_IOV_LIMIT];
130 	size_t			iov_count;
131 };
132 
133 struct smr_ep;
134 typedef int (*smr_rx_comp_func)(struct smr_ep *ep, void *context, uint32_t op,
135 		uint16_t flags, size_t len, void *buf, fi_addr_t addr,
136 		uint64_t tag, uint64_t data, uint64_t err);
137 typedef int (*smr_tx_comp_func)(struct smr_ep *ep, void *context, uint32_t op,
138 		uint16_t flags, uint64_t err);
139 
140 
141 struct smr_match_attr {
142 	fi_addr_t	addr;
143 	uint64_t	tag;
144 	uint64_t	ignore;
145 };
146 
smr_match_addr(fi_addr_t addr,fi_addr_t match_addr)147 static inline int smr_match_addr(fi_addr_t addr, fi_addr_t match_addr)
148 {
149 	return (addr == FI_ADDR_UNSPEC) || (match_addr == FI_ADDR_UNSPEC) ||
150 		(addr == match_addr);
151 }
152 
smr_match_tag(uint64_t tag,uint64_t ignore,uint64_t match_tag)153 static inline int smr_match_tag(uint64_t tag, uint64_t ignore, uint64_t match_tag)
154 {
155 	return ((tag | ignore) == (match_tag | ignore));
156 }
157 
158 struct smr_unexp_msg {
159 	struct dlist_entry entry;
160 	struct smr_cmd cmd;
161 };
162 
163 DECLARE_FREESTACK(struct smr_rx_entry, smr_recv_fs);
164 DECLARE_FREESTACK(struct smr_unexp_msg, smr_unexp_fs);
165 DECLARE_FREESTACK(struct smr_tx_entry, smr_pend_fs);
166 DECLARE_FREESTACK(struct smr_sar_entry, smr_sar_fs);
167 
168 struct smr_queue {
169 	struct dlist_entry list;
170 	dlist_func_t *match_func;
171 };
172 
173 struct smr_fabric {
174 	struct util_fabric	util_fabric;
175 	int			dom_idx;
176 };
177 
178 struct smr_domain {
179 	struct util_domain	util_domain;
180 	int			dom_idx;
181 	int			ep_idx;
182 	int			fast_rma;
183 };
184 
185 #define SMR_PREFIX	"fi_shm://"
186 #define SMR_PREFIX_NS	"fi_ns://"
187 
smr_no_prefix(const char * addr)188 static inline const char *smr_no_prefix(const char *addr)
189 {
190 	char *start;
191 
192 	return (start = strstr(addr, "://")) ? start + 3 : addr;
193 }
194 
195 #define SMR_RMA_ORDER (OFI_ORDER_RAR_SET | OFI_ORDER_RAW_SET | FI_ORDER_RAS |	\
196 		       OFI_ORDER_WAR_SET | OFI_ORDER_WAW_SET | FI_ORDER_WAS |	\
197 		       FI_ORDER_SAR | FI_ORDER_SAW)
198 #define smr_fast_rma_enabled(mode, order) ((mode & FI_MR_VIRT_ADDR) && \
199 			!(order & SMR_RMA_ORDER))
200 
smr_get_offset(void * base,void * addr)201 static inline uint64_t smr_get_offset(void *base, void *addr)
202 {
203 	return (uintptr_t) ((char *) addr - (char *) base);
204 }
205 
smr_get_ptr(void * base,uint64_t offset)206 static inline void *smr_get_ptr(void *base, uint64_t offset)
207 {
208 	return (char *) base + (uintptr_t) offset;
209 }
210 
211 struct smr_ep {
212 	struct util_ep		util_ep;
213 	smr_rx_comp_func	rx_comp;
214 	smr_tx_comp_func	tx_comp;
215 	size_t			tx_size;
216 	size_t			rx_size;
217 	size_t			min_multi_recv_size;
218 	const char		*name;
219 	uint64_t		msg_id;
220 	struct smr_region	*region;
221 	struct smr_recv_fs	*recv_fs; /* protected by rx_cq lock */
222 	struct smr_queue	recv_queue;
223 	struct smr_queue	trecv_queue;
224 	struct smr_unexp_fs	*unexp_fs;
225 	struct smr_pend_fs	*pend_fs;
226 	struct smr_sar_fs	*sar_fs;
227 	struct smr_queue	unexp_msg_queue;
228 	struct smr_queue	unexp_tagged_queue;
229 	struct dlist_entry	sar_list;
230 };
231 
232 #define smr_ep_rx_flags(smr_ep) ((smr_ep)->util_ep.rx_op_flags)
233 #define smr_ep_tx_flags(smr_ep) ((smr_ep)->util_ep.tx_op_flags)
234 
smr_mmap_name(char * shm_name,const char * ep_name,uint64_t msg_id)235 static inline int smr_mmap_name(char *shm_name, const char *ep_name,
236 				uint64_t msg_id)
237 {
238 	return snprintf(shm_name, NAME_MAX - 1, "%s_%ld",
239 			ep_name, msg_id);
240 }
241 
242 int smr_endpoint(struct fid_domain *domain, struct fi_info *info,
243 		  struct fid_ep **ep, void *context);
244 
245 int smr_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
246 		struct fid_cq **cq_fid, void *context);
247 int smr_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
248 		  struct fid_cntr **cntr_fid, void *context);
249 
250 int smr_verify_peer(struct smr_ep *ep, int peer_id);
251 
252 void smr_format_pend_resp(struct smr_tx_entry *pend, struct smr_cmd *cmd,
253 			  void *context, const struct iovec *iov,
254 			  uint32_t iov_count, fi_addr_t id,
255 			  struct smr_resp *resp);
256 void smr_generic_format(struct smr_cmd *cmd, fi_addr_t peer_id, uint32_t op,
257 			uint64_t tag, uint64_t data, uint64_t op_flags);
258 void smr_format_inline(struct smr_cmd *cmd, const struct iovec *iov,
259 		       size_t count);
260 void smr_format_inject(struct smr_cmd *cmd, const struct iovec *iov,
261 		       size_t count, struct smr_region *smr,
262 		       struct smr_inject_buf *tx_buf);
263 void smr_format_iov(struct smr_cmd *cmd, const struct iovec *iov, size_t count,
264 		    size_t total_len, struct smr_region *smr,
265 		    struct smr_resp *resp);
266 int smr_format_mmap(struct smr_ep *ep, struct smr_cmd *cmd,
267 		    const struct iovec *iov, size_t count, size_t total_len,
268 		    struct smr_tx_entry *pend, struct smr_resp *resp);
269 void smr_format_sar(struct smr_cmd *cmd, const struct iovec *iov, size_t count,
270 		    size_t total_len, struct smr_region *smr,
271 		    struct smr_region *peer_smr, struct smr_sar_msg *sar_msg,
272 		    struct smr_tx_entry *pending, struct smr_resp *resp);
273 size_t smr_copy_to_sar(struct smr_sar_msg *sar_msg, struct smr_resp *resp,
274 		       struct smr_cmd *cmd, const struct iovec *iov, size_t count,
275 		       size_t *bytes_done, int *next);
276 size_t smr_copy_from_sar(struct smr_sar_msg *sar_msg, struct smr_resp *resp,
277 			 struct smr_cmd *cmd, const struct iovec *iov, size_t count,
278 			 size_t *bytes_done, int *next);
279 
280 int smr_complete_tx(struct smr_ep *ep, void *context, uint32_t op,
281 		uint16_t flags, uint64_t err);
282 int smr_tx_comp(struct smr_ep *ep, void *context, uint32_t op,
283 		uint16_t flags, uint64_t err);
284 int smr_tx_comp_signal(struct smr_ep *ep, void *context, uint32_t op,
285 		uint16_t flags, uint64_t err);
286 int smr_complete_rx(struct smr_ep *ep, void *context, uint32_t op,
287 		uint16_t flags, size_t len, void *buf, fi_addr_t addr,
288 		uint64_t tag, uint64_t data, uint64_t err);
289 int smr_rx_comp(struct smr_ep *ep, void *context, uint32_t op,
290 		uint16_t flags, size_t len, void *buf, fi_addr_t addr,
291 		uint64_t tag, uint64_t data, uint64_t err);
292 int smr_rx_src_comp(struct smr_ep *ep, void *context, uint32_t op,
293 		uint16_t flags, size_t len, void *buf, fi_addr_t addr,
294 		uint64_t tag, uint64_t data, uint64_t err);
295 int smr_rx_comp_signal(struct smr_ep *ep, void *context, uint32_t op,
296 		uint16_t flags, size_t len, void *buf, fi_addr_t addr,
297 		uint64_t tag, uint64_t data, uint64_t err);
298 int smr_rx_src_comp_signal(struct smr_ep *ep, void *context, uint32_t op,
299 		uint16_t flags, size_t len, void *buf, fi_addr_t addr,
300 		uint64_t tag, uint64_t data, uint64_t err);
301 
302 uint64_t smr_rx_cq_flags(uint32_t op, uint16_t op_flags);
303 
304 void smr_ep_progress(struct util_ep *util_ep);
305 
306 int smr_progress_unexp_queue(struct smr_ep *ep, struct smr_rx_entry *entry,
307 			     struct smr_queue *unexp_queue);
308 
309 #endif
310