1 /*
2 * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include <ofi_util.h>
38 #include <ofi_coll.h>
39 #include "rxm.h"
40
rxm_cntr_open(struct fid_domain * domain,struct fi_cntr_attr * attr,struct fid_cntr ** cntr_fid,void * context)41 int rxm_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
42 struct fid_cntr **cntr_fid, void *context)
43 {
44 int ret;
45 struct util_cntr *cntr;
46
47 cntr = calloc(1, sizeof(*cntr));
48 if (!cntr)
49 return -FI_ENOMEM;
50
51 ret = ofi_cntr_init(&rxm_prov, domain, attr, cntr,
52 &ofi_cntr_progress, context);
53 if (ret)
54 goto free;
55
56 *cntr_fid = &cntr->cntr_fid;
57 return FI_SUCCESS;
58
59 free:
60 free(cntr);
61 return ret;
62 }
63
64 static struct fi_ops_domain rxm_domain_ops = {
65 .size = sizeof(struct fi_ops_domain),
66 .av_open = rxm_av_open,
67 .cq_open = rxm_cq_open,
68 .endpoint = rxm_endpoint,
69 .scalable_ep = fi_no_scalable_ep,
70 .cntr_open = rxm_cntr_open,
71 .poll_open = fi_poll_create,
72 .stx_ctx = fi_no_stx_context,
73 .srx_ctx = fi_no_srx_context,
74 .query_atomic = rxm_ep_query_atomic,
75 .query_collective = ofi_query_collective,
76 };
77
rxm_mr_remove_map_entry(struct rxm_mr * mr)78 static void rxm_mr_remove_map_entry(struct rxm_mr *mr)
79 {
80 fastlock_acquire(&mr->domain->util_domain.lock);
81 (void) ofi_mr_map_remove(&mr->domain->util_domain.mr_map,
82 mr->mr_fid.key);
83 fastlock_release(&mr->domain->util_domain.lock);
84 }
85
rxm_mr_add_map_entry(struct util_domain * domain,struct fi_mr_attr * msg_attr,struct rxm_mr * rxm_mr)86 static int rxm_mr_add_map_entry(struct util_domain *domain,
87 struct fi_mr_attr *msg_attr,
88 struct rxm_mr *rxm_mr)
89 {
90 uint64_t temp_key;
91 int ret;
92
93 msg_attr->requested_key = rxm_mr->mr_fid.key;
94
95 fastlock_acquire(&domain->lock);
96 ret = ofi_mr_map_insert(&domain->mr_map, msg_attr, &temp_key, rxm_mr);
97 if (OFI_UNLIKELY(ret)) {
98 FI_WARN(&rxm_prov, FI_LOG_DOMAIN,
99 "MR map insert for atomic verification failed %d\n",
100 ret);
101 } else {
102 assert(rxm_mr->mr_fid.key == temp_key);
103 }
104 fastlock_release(&domain->lock);
105
106 return ret;
107 }
108
rxm_domain_close(fid_t fid)109 static int rxm_domain_close(fid_t fid)
110 {
111 struct rxm_domain *rxm_domain;
112 int ret;
113
114 rxm_domain = container_of(fid, struct rxm_domain, util_domain.domain_fid.fid);
115
116 ret = fi_close(&rxm_domain->msg_domain->fid);
117 if (ret)
118 return ret;
119
120 ret = ofi_domain_close(&rxm_domain->util_domain);
121 if (ret)
122 return ret;
123
124 free(rxm_domain);
125 return 0;
126 }
127
128 static struct fi_ops rxm_domain_fi_ops = {
129 .size = sizeof(struct fi_ops),
130 .close = rxm_domain_close,
131 .bind = fi_no_bind,
132 .control = fi_no_control,
133 .ops_open = fi_no_ops_open,
134 };
135
rxm_mr_close(fid_t fid)136 static int rxm_mr_close(fid_t fid)
137 {
138 struct rxm_mr *rxm_mr;
139 int ret;
140
141 rxm_mr = container_of(fid, struct rxm_mr, mr_fid.fid);
142
143 if (rxm_mr->domain->util_domain.info_domain_caps & FI_ATOMIC)
144 rxm_mr_remove_map_entry(rxm_mr);
145
146 ret = fi_close(&rxm_mr->msg_mr->fid);
147 if (ret)
148 FI_WARN(&rxm_prov, FI_LOG_DOMAIN, "Unable to close MSG MR\n");
149
150 ofi_atomic_dec32(&rxm_mr->domain->util_domain.ref);
151 free(rxm_mr);
152 return ret;
153 }
154
155 static struct fi_ops rxm_mr_ops = {
156 .size = sizeof(struct fi_ops),
157 .close = rxm_mr_close,
158 .bind = fi_no_bind,
159 .control = fi_no_control,
160 .ops_open = fi_no_ops_open,
161 };
162
rxm_msg_mr_reg_internal(struct rxm_domain * rxm_domain,const void * buf,size_t len,uint64_t acs,uint64_t flags,struct fid_mr ** mr)163 int rxm_msg_mr_reg_internal(struct rxm_domain *rxm_domain, const void *buf,
164 size_t len, uint64_t acs, uint64_t flags, struct fid_mr **mr)
165 {
166 int ret, tries = 0;
167
168 /* If we can't get a key within 1024 tries, give up */
169 do {
170 ret = fi_mr_reg(rxm_domain->msg_domain, buf, len, acs, 0,
171 rxm_domain->mr_key++ | FI_PROV_SPECIFIC,
172 flags, mr, NULL);
173 } while (ret == -FI_ENOKEY && tries++ < 1024);
174
175 return ret;
176 }
177
rxm_msg_mr_closev(struct fid_mr ** mr,size_t count)178 void rxm_msg_mr_closev(struct fid_mr **mr, size_t count)
179 {
180 int ret;
181 size_t i;
182
183 for (i = 0; i < count; i++) {
184 if (mr[i]) {
185 ret = fi_close(&mr[i]->fid);
186 if (ret)
187 FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
188 "Unable to close msg mr: %zu\n", i);
189 mr[i] = NULL;
190 }
191 }
192 }
193
rxm_msg_mr_regv(struct rxm_ep * rxm_ep,const struct iovec * iov,size_t count,size_t reg_limit,uint64_t access,struct fid_mr ** mr)194 int rxm_msg_mr_regv(struct rxm_ep *rxm_ep, const struct iovec *iov,
195 size_t count, size_t reg_limit, uint64_t access,
196 struct fid_mr **mr)
197 {
198 struct rxm_domain *rxm_domain;
199 size_t i;
200 int ret;
201
202 rxm_domain = container_of(rxm_ep->util_ep.domain, struct rxm_domain,
203 util_domain);
204
205 for (i = 0; i < count && reg_limit; i++) {
206 size_t len = MIN(iov[i].iov_len, reg_limit);
207 ret = rxm_msg_mr_reg_internal(rxm_domain, iov[i].iov_base,
208 len, access, 0, &mr[i]);
209 if (ret)
210 goto err;
211 reg_limit -= len;
212 }
213 return 0;
214 err:
215 rxm_msg_mr_closev(mr, count);
216 return ret;
217 }
218
219 static uint64_t
rxm_mr_get_msg_access(struct rxm_domain * rxm_domain,uint64_t access)220 rxm_mr_get_msg_access(struct rxm_domain *rxm_domain, uint64_t access)
221 {
222 /* Additional flags to use RMA read for large message transfers */
223 access |= FI_READ | FI_REMOTE_READ;
224
225 if (rxm_domain->mr_local)
226 access |= FI_WRITE;
227 return access;
228 }
229
rxm_mr_init(struct rxm_mr * rxm_mr,struct rxm_domain * domain,void * context)230 static void rxm_mr_init(struct rxm_mr *rxm_mr, struct rxm_domain *domain,
231 void *context)
232 {
233 rxm_mr->mr_fid.fid.fclass = FI_CLASS_MR;
234 rxm_mr->mr_fid.fid.context = context;
235 rxm_mr->mr_fid.fid.ops = &rxm_mr_ops;
236 /* Store msg_mr as rxm_mr descriptor so that we can get its key when
237 * the app passes msg_mr as the descriptor in fi_send and friends.
238 * The key would be used in large message transfer protocol and RMA. */
239 rxm_mr->mr_fid.mem_desc = rxm_mr->msg_mr;
240 rxm_mr->mr_fid.key = fi_mr_key(rxm_mr->msg_mr);
241 rxm_mr->domain = domain;
242 ofi_atomic_inc32(&domain->util_domain.ref);
243 }
244
rxm_mr_regattr(struct fid * fid,const struct fi_mr_attr * attr,uint64_t flags,struct fid_mr ** mr)245 static int rxm_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr,
246 uint64_t flags, struct fid_mr **mr)
247 {
248 struct rxm_domain *rxm_domain;
249 struct fi_mr_attr msg_attr = *attr;
250 struct rxm_mr *rxm_mr;
251 int ret;
252
253 rxm_domain = container_of(fid, struct rxm_domain,
254 util_domain.domain_fid.fid);
255
256 rxm_mr = calloc(1, sizeof(*rxm_mr));
257 if (!rxm_mr)
258 return -FI_ENOMEM;
259
260 msg_attr.access = rxm_mr_get_msg_access(rxm_domain, attr->access);
261
262 ret = fi_mr_regattr(rxm_domain->msg_domain, &msg_attr,
263 flags, &rxm_mr->msg_mr);
264 if (ret) {
265 FI_WARN(&rxm_prov, FI_LOG_DOMAIN, "Unable to register MSG MR\n");
266 goto err;
267 }
268 rxm_mr_init(rxm_mr, rxm_domain, attr->context);
269 *mr = &rxm_mr->mr_fid;
270
271 if (rxm_domain->util_domain.info_domain_caps & FI_ATOMIC) {
272 ret = rxm_mr_add_map_entry(&rxm_domain->util_domain,
273 &msg_attr, rxm_mr);
274 if (ret)
275 goto map_err;
276 }
277
278 return 0;
279
280 map_err:
281 fi_close(&rxm_mr->mr_fid.fid);
282 return ret;
283 err:
284 free(rxm_mr);
285 return ret;
286
287 }
288
rxm_mr_regv(struct fid * fid,const struct iovec * iov,size_t count,uint64_t access,uint64_t offset,uint64_t requested_key,uint64_t flags,struct fid_mr ** mr,void * context)289 static int rxm_mr_regv(struct fid *fid, const struct iovec *iov, size_t count,
290 uint64_t access, uint64_t offset, uint64_t requested_key,
291 uint64_t flags, struct fid_mr **mr, void *context)
292 {
293 struct rxm_domain *rxm_domain;
294 struct rxm_mr *rxm_mr;
295 int ret;
296 struct fi_mr_attr msg_attr = {
297 .mr_iov = iov,
298 .iov_count = count,
299 .access = access,
300 .offset = offset,
301 .requested_key = requested_key,
302 .context = context,
303 };
304
305 rxm_domain = container_of(fid, struct rxm_domain,
306 util_domain.domain_fid.fid);
307
308 rxm_mr = calloc(1, sizeof(*rxm_mr));
309 if (!rxm_mr)
310 return -FI_ENOMEM;
311
312 access = rxm_mr_get_msg_access(rxm_domain, access);
313
314 ret = fi_mr_regv(rxm_domain->msg_domain, iov, count, access, offset,
315 requested_key, flags, &rxm_mr->msg_mr, context);
316 if (ret) {
317 FI_WARN(&rxm_prov, FI_LOG_DOMAIN, "Unable to register MSG MR\n");
318 goto err;
319 }
320 rxm_mr_init(rxm_mr, rxm_domain, context);
321 *mr = &rxm_mr->mr_fid;
322
323 if (rxm_domain->util_domain.info_domain_caps & FI_ATOMIC) {
324 ret = rxm_mr_add_map_entry(&rxm_domain->util_domain,
325 &msg_attr, rxm_mr);
326 if (ret)
327 goto map_err;
328 }
329
330 return 0;
331 map_err:
332 fi_close(&rxm_mr->mr_fid.fid);
333 return ret;
334 err:
335 free(rxm_mr);
336 return ret;
337 }
338
rxm_mr_reg(struct fid * fid,const void * buf,size_t len,uint64_t access,uint64_t offset,uint64_t requested_key,uint64_t flags,struct fid_mr ** mr,void * context)339 static int rxm_mr_reg(struct fid *fid, const void *buf, size_t len,
340 uint64_t access, uint64_t offset, uint64_t requested_key,
341 uint64_t flags, struct fid_mr **mr, void *context)
342 {
343 struct iovec iov;
344
345 iov.iov_base = (void *) buf;
346 iov.iov_len = len;
347 return rxm_mr_regv(fid, &iov, 1, access, offset, requested_key,
348 flags, mr, context);
349 }
350
351 static struct fi_ops_mr rxm_domain_mr_ops = {
352 .size = sizeof(struct fi_ops_mr),
353 .reg = rxm_mr_reg,
354 .regv = rxm_mr_regv,
355 .regattr = rxm_mr_regattr,
356 };
357
rxm_domain_open(struct fid_fabric * fabric,struct fi_info * info,struct fid_domain ** domain,void * context)358 int rxm_domain_open(struct fid_fabric *fabric, struct fi_info *info,
359 struct fid_domain **domain, void *context)
360 {
361 int ret;
362 struct rxm_domain *rxm_domain;
363 struct rxm_fabric *rxm_fabric;
364 struct fi_info *msg_info;
365
366 rxm_domain = calloc(1, sizeof(*rxm_domain));
367 if (!rxm_domain)
368 return -FI_ENOMEM;
369
370 rxm_fabric = container_of(fabric, struct rxm_fabric, util_fabric.fabric_fid);
371
372 ret = ofi_get_core_info(fabric->api_version, NULL, NULL, 0, &rxm_util_prov,
373 info, rxm_info_to_core, &msg_info);
374 if (ret)
375 goto err1;
376
377 ret = fi_domain(rxm_fabric->msg_fabric, msg_info,
378 &rxm_domain->msg_domain, context);
379 if (ret)
380 goto err2;
381
382 ret = ofi_domain_init(fabric, info, &rxm_domain->util_domain, context);
383 if (ret) {
384 goto err3;
385 }
386
387 /* We turn off the mr map mode bit FI_MR_PROV_KEY. We always use the
388 * key returned by the MSG provider. That key may be generated by the
389 * MSG provider, or will be provided as input by the rxm provider.
390 */
391 rxm_domain->util_domain.mr_map.mode &= ~FI_MR_PROV_KEY;
392
393 rxm_domain->max_atomic_size = rxm_ep_max_atomic_size(info);
394 *domain = &rxm_domain->util_domain.domain_fid;
395 (*domain)->fid.ops = &rxm_domain_fi_ops;
396 /* Replace MR ops set by ofi_domain_init() */
397 (*domain)->mr = &rxm_domain_mr_ops;
398 (*domain)->ops = &rxm_domain_ops;
399
400 rxm_domain->mr_local = ofi_mr_local(msg_info) && !ofi_mr_local(info);
401
402 fi_freeinfo(msg_info);
403 return 0;
404 err3:
405 fi_close(&rxm_domain->msg_domain->fid);
406 err2:
407 fi_freeinfo(msg_info);
408 err1:
409 free(rxm_domain);
410 return ret;
411 }
412