1 /*
2  * Copyright (c) 2016 Intel Corporation, Inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #include <ofi_util.h>
38 #include <ofi_coll.h>
39 #include "rxm.h"
40 
rxm_cntr_open(struct fid_domain * domain,struct fi_cntr_attr * attr,struct fid_cntr ** cntr_fid,void * context)41 int rxm_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
42 		  struct fid_cntr **cntr_fid, void *context)
43 {
44 	int ret;
45 	struct util_cntr *cntr;
46 
47 	cntr = calloc(1, sizeof(*cntr));
48 	if (!cntr)
49 		return -FI_ENOMEM;
50 
51 	ret = ofi_cntr_init(&rxm_prov, domain, attr, cntr,
52 			    &ofi_cntr_progress, context);
53 	if (ret)
54 		goto free;
55 
56 	*cntr_fid = &cntr->cntr_fid;
57 	return FI_SUCCESS;
58 
59 free:
60 	free(cntr);
61 	return ret;
62 }
63 
64 static struct fi_ops_domain rxm_domain_ops = {
65 	.size = sizeof(struct fi_ops_domain),
66 	.av_open = rxm_av_open,
67 	.cq_open = rxm_cq_open,
68 	.endpoint = rxm_endpoint,
69 	.scalable_ep = fi_no_scalable_ep,
70 	.cntr_open = rxm_cntr_open,
71 	.poll_open = fi_poll_create,
72 	.stx_ctx = fi_no_stx_context,
73 	.srx_ctx = fi_no_srx_context,
74 	.query_atomic = rxm_ep_query_atomic,
75 	.query_collective = ofi_query_collective,
76 };
77 
rxm_mr_remove_map_entry(struct rxm_mr * mr)78 static void rxm_mr_remove_map_entry(struct rxm_mr *mr)
79 {
80 	fastlock_acquire(&mr->domain->util_domain.lock);
81 	(void) ofi_mr_map_remove(&mr->domain->util_domain.mr_map,
82 				 mr->mr_fid.key);
83 	fastlock_release(&mr->domain->util_domain.lock);
84 }
85 
rxm_mr_add_map_entry(struct util_domain * domain,struct fi_mr_attr * msg_attr,struct rxm_mr * rxm_mr)86 static int rxm_mr_add_map_entry(struct util_domain *domain,
87 				struct fi_mr_attr *msg_attr,
88 				struct rxm_mr *rxm_mr)
89 {
90 	uint64_t temp_key;
91 	int ret;
92 
93 	msg_attr->requested_key = rxm_mr->mr_fid.key;
94 
95 	fastlock_acquire(&domain->lock);
96 	ret = ofi_mr_map_insert(&domain->mr_map, msg_attr, &temp_key, rxm_mr);
97 	if (OFI_UNLIKELY(ret)) {
98 		FI_WARN(&rxm_prov, FI_LOG_DOMAIN,
99 			"MR map insert for atomic verification failed %d\n",
100 			ret);
101 	} else {
102 		assert(rxm_mr->mr_fid.key == temp_key);
103 	}
104 	fastlock_release(&domain->lock);
105 
106 	return ret;
107 }
108 
rxm_domain_close(fid_t fid)109 static int rxm_domain_close(fid_t fid)
110 {
111 	struct rxm_domain *rxm_domain;
112 	int ret;
113 
114 	rxm_domain = container_of(fid, struct rxm_domain, util_domain.domain_fid.fid);
115 
116 	ret = fi_close(&rxm_domain->msg_domain->fid);
117 	if (ret)
118 		return ret;
119 
120 	ret = ofi_domain_close(&rxm_domain->util_domain);
121 	if (ret)
122 		return ret;
123 
124 	free(rxm_domain);
125 	return 0;
126 }
127 
128 static struct fi_ops rxm_domain_fi_ops = {
129 	.size = sizeof(struct fi_ops),
130 	.close = rxm_domain_close,
131 	.bind = fi_no_bind,
132 	.control = fi_no_control,
133 	.ops_open = fi_no_ops_open,
134 };
135 
rxm_mr_close(fid_t fid)136 static int rxm_mr_close(fid_t fid)
137 {
138 	struct rxm_mr *rxm_mr;
139 	int ret;
140 
141 	rxm_mr = container_of(fid, struct rxm_mr, mr_fid.fid);
142 
143 	if (rxm_mr->domain->util_domain.info_domain_caps & FI_ATOMIC)
144 		rxm_mr_remove_map_entry(rxm_mr);
145 
146 	ret = fi_close(&rxm_mr->msg_mr->fid);
147 	if (ret)
148 		FI_WARN(&rxm_prov, FI_LOG_DOMAIN, "Unable to close MSG MR\n");
149 
150 	ofi_atomic_dec32(&rxm_mr->domain->util_domain.ref);
151 	free(rxm_mr);
152 	return ret;
153 }
154 
155 static struct fi_ops rxm_mr_ops = {
156 	.size = sizeof(struct fi_ops),
157 	.close = rxm_mr_close,
158 	.bind = fi_no_bind,
159 	.control = fi_no_control,
160 	.ops_open = fi_no_ops_open,
161 };
162 
rxm_msg_mr_reg_internal(struct rxm_domain * rxm_domain,const void * buf,size_t len,uint64_t acs,uint64_t flags,struct fid_mr ** mr)163 int rxm_msg_mr_reg_internal(struct rxm_domain *rxm_domain, const void *buf,
164 	size_t len, uint64_t acs, uint64_t flags, struct fid_mr **mr)
165 {
166 	int ret, tries = 0;
167 
168 	/* If we can't get a key within 1024 tries, give up */
169 	do {
170 		ret = fi_mr_reg(rxm_domain->msg_domain, buf, len, acs, 0,
171 				rxm_domain->mr_key++ | FI_PROV_SPECIFIC,
172 				flags, mr, NULL);
173 	} while (ret == -FI_ENOKEY && tries++ < 1024);
174 
175 	return ret;
176 }
177 
rxm_msg_mr_closev(struct fid_mr ** mr,size_t count)178 void rxm_msg_mr_closev(struct fid_mr **mr, size_t count)
179 {
180 	int ret;
181 	size_t i;
182 
183 	for (i = 0; i < count; i++) {
184 		if (mr[i]) {
185 			ret = fi_close(&mr[i]->fid);
186 			if (ret)
187 				FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
188 					"Unable to close msg mr: %zu\n", i);
189 			mr[i] = NULL;
190 		}
191 	}
192 }
193 
rxm_msg_mr_regv(struct rxm_ep * rxm_ep,const struct iovec * iov,size_t count,size_t reg_limit,uint64_t access,struct fid_mr ** mr)194 int rxm_msg_mr_regv(struct rxm_ep *rxm_ep, const struct iovec *iov,
195 		    size_t count, size_t reg_limit, uint64_t access,
196 		    struct fid_mr **mr)
197 {
198 	struct rxm_domain *rxm_domain;
199 	size_t i;
200 	int ret;
201 
202 	rxm_domain = container_of(rxm_ep->util_ep.domain, struct rxm_domain,
203 				  util_domain);
204 
205 	for (i = 0; i < count && reg_limit; i++) {
206 		size_t len = MIN(iov[i].iov_len, reg_limit);
207 		ret = rxm_msg_mr_reg_internal(rxm_domain, iov[i].iov_base,
208 					      len, access, 0, &mr[i]);
209 		if (ret)
210 			goto err;
211 		reg_limit -= len;
212 	}
213 	return 0;
214 err:
215 	rxm_msg_mr_closev(mr, count);
216 	return ret;
217 }
218 
219 static uint64_t
rxm_mr_get_msg_access(struct rxm_domain * rxm_domain,uint64_t access)220 rxm_mr_get_msg_access(struct rxm_domain *rxm_domain, uint64_t access)
221 {
222 	/* Additional flags to use RMA read for large message transfers */
223 	access |= FI_READ | FI_REMOTE_READ;
224 
225 	if (rxm_domain->mr_local)
226 		access |= FI_WRITE;
227 	return access;
228 }
229 
rxm_mr_init(struct rxm_mr * rxm_mr,struct rxm_domain * domain,void * context)230 static void rxm_mr_init(struct rxm_mr *rxm_mr, struct rxm_domain *domain,
231 			void *context)
232 {
233 	rxm_mr->mr_fid.fid.fclass = FI_CLASS_MR;
234 	rxm_mr->mr_fid.fid.context = context;
235 	rxm_mr->mr_fid.fid.ops = &rxm_mr_ops;
236 	/* Store msg_mr as rxm_mr descriptor so that we can get its key when
237 	 * the app passes msg_mr as the descriptor in fi_send and friends.
238 	 * The key would be used in large message transfer protocol and RMA. */
239 	rxm_mr->mr_fid.mem_desc = rxm_mr->msg_mr;
240 	rxm_mr->mr_fid.key = fi_mr_key(rxm_mr->msg_mr);
241 	rxm_mr->domain = domain;
242 	ofi_atomic_inc32(&domain->util_domain.ref);
243 }
244 
rxm_mr_regattr(struct fid * fid,const struct fi_mr_attr * attr,uint64_t flags,struct fid_mr ** mr)245 static int rxm_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr,
246 			  uint64_t flags, struct fid_mr **mr)
247 {
248 	struct rxm_domain *rxm_domain;
249 	struct fi_mr_attr msg_attr = *attr;
250 	struct rxm_mr *rxm_mr;
251 	int ret;
252 
253 	rxm_domain = container_of(fid, struct rxm_domain,
254 				  util_domain.domain_fid.fid);
255 
256 	rxm_mr = calloc(1, sizeof(*rxm_mr));
257 	if (!rxm_mr)
258 		return -FI_ENOMEM;
259 
260 	msg_attr.access = rxm_mr_get_msg_access(rxm_domain, attr->access);
261 
262 	ret = fi_mr_regattr(rxm_domain->msg_domain, &msg_attr,
263 			    flags, &rxm_mr->msg_mr);
264 	if (ret) {
265 		FI_WARN(&rxm_prov, FI_LOG_DOMAIN, "Unable to register MSG MR\n");
266 		goto err;
267 	}
268 	rxm_mr_init(rxm_mr, rxm_domain, attr->context);
269 	*mr = &rxm_mr->mr_fid;
270 
271 	if (rxm_domain->util_domain.info_domain_caps & FI_ATOMIC) {
272 		ret = rxm_mr_add_map_entry(&rxm_domain->util_domain,
273 					   &msg_attr, rxm_mr);
274 		if (ret)
275 			goto map_err;
276 	}
277 
278 	return 0;
279 
280 map_err:
281 	fi_close(&rxm_mr->mr_fid.fid);
282 	return ret;
283 err:
284 	free(rxm_mr);
285 	return ret;
286 
287 }
288 
rxm_mr_regv(struct fid * fid,const struct iovec * iov,size_t count,uint64_t access,uint64_t offset,uint64_t requested_key,uint64_t flags,struct fid_mr ** mr,void * context)289 static int rxm_mr_regv(struct fid *fid, const struct iovec *iov, size_t count,
290 		       uint64_t access, uint64_t offset, uint64_t requested_key,
291 		       uint64_t flags, struct fid_mr **mr, void *context)
292 {
293 	struct rxm_domain *rxm_domain;
294 	struct rxm_mr *rxm_mr;
295 	int ret;
296 	struct fi_mr_attr msg_attr = {
297 		.mr_iov = iov,
298 		.iov_count = count,
299 		.access = access,
300 		.offset = offset,
301 		.requested_key = requested_key,
302 		.context = context,
303 	};
304 
305 	rxm_domain = container_of(fid, struct rxm_domain,
306 				  util_domain.domain_fid.fid);
307 
308 	rxm_mr = calloc(1, sizeof(*rxm_mr));
309 	if (!rxm_mr)
310 		return -FI_ENOMEM;
311 
312 	access = rxm_mr_get_msg_access(rxm_domain, access);
313 
314 	ret = fi_mr_regv(rxm_domain->msg_domain, iov, count, access, offset,
315 			 requested_key, flags, &rxm_mr->msg_mr, context);
316 	if (ret) {
317 		FI_WARN(&rxm_prov, FI_LOG_DOMAIN, "Unable to register MSG MR\n");
318 		goto err;
319 	}
320 	rxm_mr_init(rxm_mr, rxm_domain, context);
321 	*mr = &rxm_mr->mr_fid;
322 
323 	if (rxm_domain->util_domain.info_domain_caps & FI_ATOMIC) {
324 		ret = rxm_mr_add_map_entry(&rxm_domain->util_domain,
325 					   &msg_attr, rxm_mr);
326 		if (ret)
327 			goto map_err;
328 	}
329 
330 	return 0;
331 map_err:
332 	fi_close(&rxm_mr->mr_fid.fid);
333 	return ret;
334 err:
335 	free(rxm_mr);
336 	return ret;
337 }
338 
rxm_mr_reg(struct fid * fid,const void * buf,size_t len,uint64_t access,uint64_t offset,uint64_t requested_key,uint64_t flags,struct fid_mr ** mr,void * context)339 static int rxm_mr_reg(struct fid *fid, const void *buf, size_t len,
340 		      uint64_t access, uint64_t offset, uint64_t requested_key,
341 		      uint64_t flags, struct fid_mr **mr, void *context)
342 {
343 	struct iovec iov;
344 
345 	iov.iov_base = (void *) buf;
346 	iov.iov_len = len;
347 	return rxm_mr_regv(fid, &iov, 1, access, offset, requested_key,
348 			   flags, mr, context);
349 }
350 
351 static struct fi_ops_mr rxm_domain_mr_ops = {
352 	.size = sizeof(struct fi_ops_mr),
353 	.reg = rxm_mr_reg,
354 	.regv = rxm_mr_regv,
355 	.regattr = rxm_mr_regattr,
356 };
357 
rxm_domain_open(struct fid_fabric * fabric,struct fi_info * info,struct fid_domain ** domain,void * context)358 int rxm_domain_open(struct fid_fabric *fabric, struct fi_info *info,
359 		struct fid_domain **domain, void *context)
360 {
361 	int ret;
362 	struct rxm_domain *rxm_domain;
363 	struct rxm_fabric *rxm_fabric;
364 	struct fi_info *msg_info;
365 
366 	rxm_domain = calloc(1, sizeof(*rxm_domain));
367 	if (!rxm_domain)
368 		return -FI_ENOMEM;
369 
370 	rxm_fabric = container_of(fabric, struct rxm_fabric, util_fabric.fabric_fid);
371 
372 	ret = ofi_get_core_info(fabric->api_version, NULL, NULL, 0, &rxm_util_prov,
373 				info, rxm_info_to_core, &msg_info);
374 	if (ret)
375 		goto err1;
376 
377 	ret = fi_domain(rxm_fabric->msg_fabric, msg_info,
378 			&rxm_domain->msg_domain, context);
379 	if (ret)
380 		goto err2;
381 
382 	ret = ofi_domain_init(fabric, info, &rxm_domain->util_domain, context);
383 	if (ret) {
384 		goto err3;
385 	}
386 
387 	/* We turn off the mr map mode bit FI_MR_PROV_KEY.  We always use the
388 	 * key returned by the MSG provider.  That key may be generated by the
389 	 * MSG provider, or will be provided as input by the rxm provider.
390 	 */
391 	rxm_domain->util_domain.mr_map.mode &= ~FI_MR_PROV_KEY;
392 
393 	rxm_domain->max_atomic_size = rxm_ep_max_atomic_size(info);
394 	*domain = &rxm_domain->util_domain.domain_fid;
395 	(*domain)->fid.ops = &rxm_domain_fi_ops;
396 	/* Replace MR ops set by ofi_domain_init() */
397 	(*domain)->mr = &rxm_domain_mr_ops;
398 	(*domain)->ops = &rxm_domain_ops;
399 
400 	rxm_domain->mr_local = ofi_mr_local(msg_info) && !ofi_mr_local(info);
401 
402 	fi_freeinfo(msg_info);
403 	return 0;
404 err3:
405 	fi_close(&rxm_domain->msg_domain->fid);
406 err2:
407 	fi_freeinfo(msg_info);
408 err1:
409 	free(rxm_domain);
410 	return ret;
411 }
412