1 /*
2 * Copyright (c) 2015-2018 Intel Corporation, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #if HAVE_CONFIG_H
34 # include <config.h>
35 #endif /* HAVE_CONFIG_H */
36
37 #include <sys/types.h>
38 #include <sys/statvfs.h>
39 #include <pthread.h>
40 #include <stdint.h>
41 #include <stddef.h>
42
43 #include <rdma/fabric.h>
44 #include <rdma/fi_atomic.h>
45 #include <rdma/fi_cm.h>
46 #include <rdma/fi_domain.h>
47 #include <rdma/fi_endpoint.h>
48 #include <rdma/fi_eq.h>
49 #include <rdma/fi_errno.h>
50 #include <rdma/fi_rma.h>
51 #include <rdma/fi_tagged.h>
52 #include <rdma/fi_trigger.h>
53 #include <rdma/providers/fi_prov.h>
54
55 #include <ofi.h>
56 #include <ofi_enosys.h>
57 #include <ofi_shm.h>
58 #include <ofi_rbuf.h>
59 #include <ofi_list.h>
60 #include <ofi_signal.h>
61 #include <ofi_util.h>
62 #include <ofi_atomic.h>
63
64 #ifndef _SMR_H_
65 #define _SMR_H_
66
67 struct smr_env {
68 size_t sar_threshold;
69 };
70
71 extern struct smr_env smr_env;
72 extern struct fi_provider smr_prov;
73 extern struct fi_info smr_info;
74 extern struct util_prov smr_util_prov;
75
76 int smr_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
77 void *context);
78
79 struct smr_av {
80 struct util_av util_av;
81 struct smr_map *smr_map;
82 size_t used;
83 };
84
85 int smr_domain_open(struct fid_fabric *fabric, struct fi_info *info,
86 struct fid_domain **dom, void *context);
87
88 int smr_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
89 struct fid_eq **eq, void *context);
90
91 int smr_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
92 struct fid_av **av, void *context);
93
94 int smr_query_atomic(struct fid_domain *domain, enum fi_datatype datatype,
95 enum fi_op op, struct fi_atomic_attr *attr, uint64_t flags);
96
97 #define SMR_IOV_LIMIT 4
98
99 struct smr_rx_entry {
100 struct dlist_entry entry;
101 void *context;
102 fi_addr_t addr;
103 uint64_t tag;
104 uint64_t ignore;
105 struct iovec iov[SMR_IOV_LIMIT];
106 uint32_t iov_count;
107 uint16_t flags;
108 uint64_t err;
109 };
110
111 struct smr_tx_entry {
112 struct smr_cmd cmd;
113 fi_addr_t addr;
114 void *context;
115 struct iovec iov[SMR_IOV_LIMIT];
116 uint32_t iov_count;
117 size_t bytes_done;
118 int next;
119 void *map_ptr;
120 struct smr_ep_name *map_name;
121 };
122
123 struct smr_sar_entry {
124 struct dlist_entry entry;
125 struct smr_cmd cmd;
126 struct smr_rx_entry rx_entry;
127 size_t bytes_done;
128 int next;
129 struct iovec iov[SMR_IOV_LIMIT];
130 size_t iov_count;
131 };
132
133 struct smr_ep;
134 typedef int (*smr_rx_comp_func)(struct smr_ep *ep, void *context, uint32_t op,
135 uint16_t flags, size_t len, void *buf, fi_addr_t addr,
136 uint64_t tag, uint64_t data, uint64_t err);
137 typedef int (*smr_tx_comp_func)(struct smr_ep *ep, void *context, uint32_t op,
138 uint16_t flags, uint64_t err);
139
140
141 struct smr_match_attr {
142 fi_addr_t addr;
143 uint64_t tag;
144 uint64_t ignore;
145 };
146
smr_match_addr(fi_addr_t addr,fi_addr_t match_addr)147 static inline int smr_match_addr(fi_addr_t addr, fi_addr_t match_addr)
148 {
149 return (addr == FI_ADDR_UNSPEC) || (match_addr == FI_ADDR_UNSPEC) ||
150 (addr == match_addr);
151 }
152
smr_match_tag(uint64_t tag,uint64_t ignore,uint64_t match_tag)153 static inline int smr_match_tag(uint64_t tag, uint64_t ignore, uint64_t match_tag)
154 {
155 return ((tag | ignore) == (match_tag | ignore));
156 }
157
158 struct smr_unexp_msg {
159 struct dlist_entry entry;
160 struct smr_cmd cmd;
161 };
162
163 DECLARE_FREESTACK(struct smr_rx_entry, smr_recv_fs);
164 DECLARE_FREESTACK(struct smr_unexp_msg, smr_unexp_fs);
165 DECLARE_FREESTACK(struct smr_tx_entry, smr_pend_fs);
166 DECLARE_FREESTACK(struct smr_sar_entry, smr_sar_fs);
167
168 struct smr_queue {
169 struct dlist_entry list;
170 dlist_func_t *match_func;
171 };
172
173 struct smr_fabric {
174 struct util_fabric util_fabric;
175 int dom_idx;
176 };
177
178 struct smr_domain {
179 struct util_domain util_domain;
180 int dom_idx;
181 int ep_idx;
182 int fast_rma;
183 };
184
185 #define SMR_PREFIX "fi_shm://"
186 #define SMR_PREFIX_NS "fi_ns://"
187
smr_no_prefix(const char * addr)188 static inline const char *smr_no_prefix(const char *addr)
189 {
190 char *start;
191
192 return (start = strstr(addr, "://")) ? start + 3 : addr;
193 }
194
195 #define SMR_RMA_ORDER (OFI_ORDER_RAR_SET | OFI_ORDER_RAW_SET | FI_ORDER_RAS | \
196 OFI_ORDER_WAR_SET | OFI_ORDER_WAW_SET | FI_ORDER_WAS | \
197 FI_ORDER_SAR | FI_ORDER_SAW)
198 #define smr_fast_rma_enabled(mode, order) ((mode & FI_MR_VIRT_ADDR) && \
199 !(order & SMR_RMA_ORDER))
200
smr_get_offset(void * base,void * addr)201 static inline uint64_t smr_get_offset(void *base, void *addr)
202 {
203 return (uintptr_t) ((char *) addr - (char *) base);
204 }
205
smr_get_ptr(void * base,uint64_t offset)206 static inline void *smr_get_ptr(void *base, uint64_t offset)
207 {
208 return (char *) base + (uintptr_t) offset;
209 }
210
211 struct smr_ep {
212 struct util_ep util_ep;
213 smr_rx_comp_func rx_comp;
214 smr_tx_comp_func tx_comp;
215 size_t tx_size;
216 size_t rx_size;
217 size_t min_multi_recv_size;
218 const char *name;
219 uint64_t msg_id;
220 struct smr_region *region;
221 struct smr_recv_fs *recv_fs; /* protected by rx_cq lock */
222 struct smr_queue recv_queue;
223 struct smr_queue trecv_queue;
224 struct smr_unexp_fs *unexp_fs;
225 struct smr_pend_fs *pend_fs;
226 struct smr_sar_fs *sar_fs;
227 struct smr_queue unexp_msg_queue;
228 struct smr_queue unexp_tagged_queue;
229 struct dlist_entry sar_list;
230 };
231
232 #define smr_ep_rx_flags(smr_ep) ((smr_ep)->util_ep.rx_op_flags)
233 #define smr_ep_tx_flags(smr_ep) ((smr_ep)->util_ep.tx_op_flags)
234
smr_mmap_name(char * shm_name,const char * ep_name,uint64_t msg_id)235 static inline int smr_mmap_name(char *shm_name, const char *ep_name,
236 uint64_t msg_id)
237 {
238 return snprintf(shm_name, NAME_MAX - 1, "%s_%ld",
239 ep_name, msg_id);
240 }
241
242 int smr_endpoint(struct fid_domain *domain, struct fi_info *info,
243 struct fid_ep **ep, void *context);
244
245 int smr_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
246 struct fid_cq **cq_fid, void *context);
247 int smr_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
248 struct fid_cntr **cntr_fid, void *context);
249
250 int smr_verify_peer(struct smr_ep *ep, int peer_id);
251
252 void smr_format_pend_resp(struct smr_tx_entry *pend, struct smr_cmd *cmd,
253 void *context, const struct iovec *iov,
254 uint32_t iov_count, fi_addr_t id,
255 struct smr_resp *resp);
256 void smr_generic_format(struct smr_cmd *cmd, fi_addr_t peer_id, uint32_t op,
257 uint64_t tag, uint64_t data, uint64_t op_flags);
258 void smr_format_inline(struct smr_cmd *cmd, const struct iovec *iov,
259 size_t count);
260 void smr_format_inject(struct smr_cmd *cmd, const struct iovec *iov,
261 size_t count, struct smr_region *smr,
262 struct smr_inject_buf *tx_buf);
263 void smr_format_iov(struct smr_cmd *cmd, const struct iovec *iov, size_t count,
264 size_t total_len, struct smr_region *smr,
265 struct smr_resp *resp);
266 int smr_format_mmap(struct smr_ep *ep, struct smr_cmd *cmd,
267 const struct iovec *iov, size_t count, size_t total_len,
268 struct smr_tx_entry *pend, struct smr_resp *resp);
269 void smr_format_sar(struct smr_cmd *cmd, const struct iovec *iov, size_t count,
270 size_t total_len, struct smr_region *smr,
271 struct smr_region *peer_smr, struct smr_sar_msg *sar_msg,
272 struct smr_tx_entry *pending, struct smr_resp *resp);
273 size_t smr_copy_to_sar(struct smr_sar_msg *sar_msg, struct smr_resp *resp,
274 struct smr_cmd *cmd, const struct iovec *iov, size_t count,
275 size_t *bytes_done, int *next);
276 size_t smr_copy_from_sar(struct smr_sar_msg *sar_msg, struct smr_resp *resp,
277 struct smr_cmd *cmd, const struct iovec *iov, size_t count,
278 size_t *bytes_done, int *next);
279
280 int smr_complete_tx(struct smr_ep *ep, void *context, uint32_t op,
281 uint16_t flags, uint64_t err);
282 int smr_tx_comp(struct smr_ep *ep, void *context, uint32_t op,
283 uint16_t flags, uint64_t err);
284 int smr_tx_comp_signal(struct smr_ep *ep, void *context, uint32_t op,
285 uint16_t flags, uint64_t err);
286 int smr_complete_rx(struct smr_ep *ep, void *context, uint32_t op,
287 uint16_t flags, size_t len, void *buf, fi_addr_t addr,
288 uint64_t tag, uint64_t data, uint64_t err);
289 int smr_rx_comp(struct smr_ep *ep, void *context, uint32_t op,
290 uint16_t flags, size_t len, void *buf, fi_addr_t addr,
291 uint64_t tag, uint64_t data, uint64_t err);
292 int smr_rx_src_comp(struct smr_ep *ep, void *context, uint32_t op,
293 uint16_t flags, size_t len, void *buf, fi_addr_t addr,
294 uint64_t tag, uint64_t data, uint64_t err);
295 int smr_rx_comp_signal(struct smr_ep *ep, void *context, uint32_t op,
296 uint16_t flags, size_t len, void *buf, fi_addr_t addr,
297 uint64_t tag, uint64_t data, uint64_t err);
298 int smr_rx_src_comp_signal(struct smr_ep *ep, void *context, uint32_t op,
299 uint16_t flags, size_t len, void *buf, fi_addr_t addr,
300 uint64_t tag, uint64_t data, uint64_t err);
301
302 uint64_t smr_rx_cq_flags(uint32_t op, uint16_t op_flags);
303
304 void smr_ep_progress(struct util_ep *util_ep);
305
306 int smr_progress_unexp_queue(struct smr_ep *ep, struct smr_rx_entry *entry,
307 struct smr_queue *unexp_queue);
308
309 #endif
310