1 /*
2  * Copyright (C) 2016 by Argonne National Laboratory.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include "rdma/bgq/fi_bgq.h"
33 
34 #include <ofi.h>
35 #include <ofi_enosys.h>
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
42 #include <unistd.h>
43 #include <limits.h>
44 
45 /* forward declaration */
46 int fi_bgq_endpoint_rx_tx (struct fid_domain *dom, struct fi_info *info,
47 		struct fid_ep **ep, void *context, const int rx_index, const int tx_index);
48 
fi_bgq_close_sep(fid_t fid)49 static int fi_bgq_close_sep(fid_t fid)
50 {
51 	int ret;
52 	struct fi_bgq_sep *bgq_sep = container_of(fid, struct fi_bgq_sep, ep_fid);
53 
54 	ret = fi_bgq_fid_check(fid, FI_CLASS_SEP, "scalable endpoint");
55 	if (ret)
56 		return ret;
57 
58 	ret = fi_bgq_ref_dec(&bgq_sep->av->ref_cnt, "address vector");
59 	if (ret)
60 		return ret;
61 
62 	ret = fi_bgq_ref_finalize(&bgq_sep->ref_cnt, "scalable endpoint");
63 	if (ret)
64 		return ret;
65 
66 	ret = fi_bgq_ref_dec(&bgq_sep->domain->ref_cnt, "domain");
67 	if (ret)
68 		return ret;
69 
70 	free(bgq_sep->info->ep_attr);
71 	free(bgq_sep->info);
72 	void * memptr = bgq_sep->memptr;
73 	free(memptr);
74 
75 	return 0;
76 }
77 
fi_bgq_control_sep(fid_t fid,int command,void * arg)78 static int fi_bgq_control_sep(fid_t fid, int command, void *arg)
79 {
80 	struct fid_ep *ep __attribute__ ((unused));
81 	ep = container_of(fid, struct fid_ep, fid);
82 	return 0;
83 }
84 
fi_bgq_tx_ctx(struct fid_ep * sep,int index,struct fi_tx_attr * attr,struct fid_ep ** tx_ep,void * context)85 static int fi_bgq_tx_ctx(struct fid_ep *sep, int index,
86 			struct fi_tx_attr *attr, struct fid_ep **tx_ep,
87 			void *context)
88 {
89 	int ret;
90 	struct fi_info info = {0};
91 	struct fi_tx_attr tx_attr = {0};
92 	struct fi_ep_attr ep_attr = {0};
93 	struct fi_domain_attr dom_attr = {0};
94 	struct fi_fabric_attr fab_attr = {0};
95 	struct fi_bgq_sep *bgq_sep;
96 	struct fi_bgq_ep  *bgq_tx_ep;
97 
98 	if (!sep || !attr || !tx_ep) {
99 		errno = FI_EINVAL;
100 		return -errno;
101 	}
102 
103 	bgq_sep = container_of(sep, struct fi_bgq_sep, ep_fid);
104 
105 	uint64_t caps = attr->caps;	/* TODO - "By default, a transmit context inherits the properties of its associated endpoint. However, applications may request context specific attributes through the attr parameter." */
106 
107 	if ((caps & FI_MSG || caps & FI_TAGGED) && (caps & FI_RECV)) {
108 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
109 				"FI_MSG|FI_TAGGED with FI_RECV capability specified for a TX context\n");
110 		caps &= ~FI_RECV;
111 	}
112 
113 	if ((caps & FI_RMA || caps & FI_ATOMIC) && (caps & FI_REMOTE_READ || caps & FI_REMOTE_WRITE)) {
114 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
115 				"FI_RMA|FI_ATOMIC with FI_REMOTE_READ|FI_REMOTE_WRITE capability specified for a TX context\n");
116 		caps &= ~FI_REMOTE_READ;
117 		caps &= ~FI_REMOTE_WRITE;
118 	}
119 
120 	if (caps & FI_MSG || caps & FI_TAGGED) {
121 		caps |= FI_SEND;
122 	}
123 
124 	if (caps & FI_RMA || caps & FI_ATOMIC) {
125 		caps |= FI_READ;
126 		caps |= FI_WRITE;
127 	}
128 
129 	if (ofi_recv_allowed(caps) || ofi_rma_target_allowed(caps)) {
130 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
131 				"RX capabilities specified for TX context\n");
132 		errno = FI_EINVAL;
133 		return -errno;
134 	}
135 
136 	if (!ofi_send_allowed(caps) && !ofi_rma_initiate_allowed(caps)) {
137 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
138 				"TX capabilities not specified for TX context\n");
139 		errno = FI_EINVAL;
140 		return -errno;
141 	}
142 
143 	if (bgq_sep->domain->tx.count >= fi_bgq_domain_get_tx_max(bgq_sep->domain)) {
144 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
145 				"TX ctx count exceeded (max %lu, created %lu)\n",
146 				fi_bgq_domain_get_tx_max(bgq_sep->domain), bgq_sep->domain->tx.count);
147 		errno = FI_EINVAL;
148 		return -errno;
149 	}
150 
151 	info.caps = caps;
152 	info.mode = attr->mode;
153 
154 	info.tx_attr = &tx_attr;
155 	memcpy(info.tx_attr, attr, sizeof(*info.tx_attr));
156 
157 	info.ep_attr = &ep_attr;
158 	memcpy(info.ep_attr, bgq_sep->info->ep_attr, sizeof(*info.ep_attr));
159 
160 	info.domain_attr = &dom_attr;
161 	memcpy(info.domain_attr, bgq_sep->info->domain_attr, sizeof(*info.domain_attr));
162 
163 	info.fabric_attr = &fab_attr;
164 	memcpy(info.fabric_attr, bgq_sep->info->fabric_attr, sizeof(*info.fabric_attr));
165 #ifdef FI_BGQ_TRACE
166         fprintf(stderr,"fi_bgq_tx_ctx calling fi_bgq_endpoint_rx_tx with tx index %d\n",index);
167 #endif
168 
169 	ret = fi_bgq_endpoint_rx_tx((struct fid_domain *)bgq_sep->domain,
170 		&info, tx_ep, context, -1, index);
171 	if (ret) {
172 		goto err;
173 	}
174 
175 	bgq_tx_ep = container_of(*tx_ep, struct fi_bgq_ep, ep_fid);
176 	bgq_tx_ep->ep_fid.fid.fclass = FI_CLASS_TX_CTX;
177 
178 	bgq_tx_ep->av = bgq_sep->av;
179 	fi_bgq_ref_inc(&bgq_tx_ep->av->ref_cnt, "address vector");
180 
181 	bgq_tx_ep->sep = container_of(sep, struct fi_bgq_sep, ep_fid);
182 
183 	++ bgq_sep->domain->tx.count;
184 
185 	fi_bgq_ref_inc(&bgq_sep->ref_cnt, "scalable endpoint");
186 
187 	attr->caps = caps;
188 
189 	return 0;
190 
191 err:
192 	return -errno;
193 }
194 
fi_bgq_rx_ctx(struct fid_ep * sep,int index,struct fi_rx_attr * attr,struct fid_ep ** rx_ep,void * context)195 static int fi_bgq_rx_ctx(struct fid_ep *sep, int index,
196 			struct fi_rx_attr *attr, struct fid_ep **rx_ep,
197 			void *context)
198 {
199 	int ret;
200 	struct fi_info info = {0};
201 	struct fi_bgq_sep *bgq_sep;
202 	struct fi_bgq_ep  *bgq_rx_ep;
203 
204 	if (!sep || !attr || !rx_ep) {
205 		errno = FI_EINVAL;
206 		return -errno;
207 	}
208 
209 	bgq_sep = container_of(sep, struct fi_bgq_sep, ep_fid);
210 
211 	uint64_t caps = attr->caps;	/* TODO - "By default, a receive context inherits the properties of its associated endpoint. However, applications may request context specific attributes through the attr parameter." */
212 
213 	if ((caps & FI_MSG || caps & FI_TAGGED) && (caps & FI_SEND)) {
214 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
215 				"FI_MSG|FI_TAGGED with FI_SEND capability specified for a RX context\n");
216 		caps &= ~FI_SEND;
217 	}
218 
219 	if ((caps & FI_RMA || caps & FI_ATOMIC) && (caps & FI_READ || caps & FI_WRITE)) {
220 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
221 				"FI_RMA|FI_ATOMIC with FI_READ|FI_WRITE capability specified for a RX context\n");
222 		caps &= ~FI_READ;
223 		caps &= ~FI_WRITE;
224 	}
225 
226 	if (caps & FI_MSG || caps & FI_TAGGED) {
227 		caps |= FI_RECV;
228 	}
229 
230 	if (caps & FI_RMA || caps & FI_ATOMIC) {
231 		caps |= FI_REMOTE_READ;
232 		caps |= FI_REMOTE_WRITE;
233 	}
234 
235 	if (ofi_send_allowed(caps) || ofi_rma_initiate_allowed(caps)) {
236 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
237 				"TX capabilities specified for RX context\n");
238 		errno = FI_EINVAL;
239 		return -errno;
240 	}
241 
242 	if (!ofi_recv_allowed(caps) && !ofi_rma_target_allowed(caps)) {
243 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
244 				"RX capabilities not specified for RX context\n");
245 		errno = FI_EINVAL;
246 		return -errno;
247 	}
248 
249 	if (bgq_sep->domain->rx.count >= fi_bgq_domain_get_rx_max(bgq_sep->domain)) {
250 		FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
251 				"RX ctx count exceeded (max %lu, created %lu)\n",
252 				fi_bgq_domain_get_rx_max(bgq_sep->domain), bgq_sep->domain->rx.count);
253 		errno = FI_EINVAL;
254 		return -errno;
255 	}
256 
257 	info.caps = caps;
258 	info.mode = attr->mode;
259 
260 	info.rx_attr = calloc(1, sizeof(*info.rx_attr));
261 	if (!info.rx_attr) {
262 		errno = FI_ENOMEM;
263 		goto err;
264 	}
265 
266 	info.rx_attr->caps     = caps;
267 	info.rx_attr->mode     = attr->mode;
268 	info.rx_attr->op_flags = attr->op_flags;
269 	info.rx_attr->msg_order = attr->msg_order;
270 	info.rx_attr->total_buffered_recv = attr->total_buffered_recv;
271 	info.rx_attr->iov_limit = attr->iov_limit;
272 
273 	info.ep_attr = calloc(1, sizeof(*info.ep_attr));
274 	if (!info.ep_attr) {
275 		errno = FI_ENOMEM;
276 		goto err;
277 	}
278 	memcpy(info.ep_attr, bgq_sep->info->ep_attr,
279 			sizeof(*info.ep_attr));
280 
281 	info.domain_attr = calloc(1, sizeof(*info.domain_attr));
282 	if (!info.domain_attr) {
283 		errno = FI_ENOMEM;
284 		goto err;
285 	}
286 	memcpy(info.domain_attr, bgq_sep->info->domain_attr,
287 			sizeof(*info.domain_attr));
288 
289 	info.fabric_attr = calloc(1, sizeof(*info.fabric_attr));
290 	if (!info.fabric_attr) {
291 		errno = FI_ENOMEM;
292 		goto err;
293 	}
294 	memcpy(info.fabric_attr, bgq_sep->info->fabric_attr,
295 			sizeof(*info.fabric_attr));
296 
297 #ifdef FI_BGQ_TRACE
298         fprintf(stderr,"fi_bgq_tx_ctx calling fi_bgq_endpoint_rx_tx with rx index %d\n",index);
299 #endif
300 	ret = fi_bgq_endpoint_rx_tx(&bgq_sep->domain->domain_fid, &info,
301 			rx_ep, context, index, -1);
302 	if (ret) {
303 		goto err;
304 	}
305 
306 	bgq_rx_ep = container_of(*rx_ep, struct fi_bgq_ep, ep_fid);
307 	bgq_rx_ep->ep_fid.fid.fclass = FI_CLASS_RX_CTX;
308 
309 	bgq_rx_ep->sep = container_of(sep, struct fi_bgq_sep, ep_fid);
310 
311 	bgq_rx_ep->av = bgq_sep->av;
312 	fi_bgq_ref_inc(&bgq_rx_ep->av->ref_cnt, "address vector");
313 
314 	++ bgq_sep->domain->rx.count;
315 
316 	fi_bgq_ref_inc(&bgq_sep->ref_cnt, "scalable endpoint");
317 
318 	return 0;
319 
320 err:
321 	if (info.fabric_attr)
322 		free(info.fabric_attr);
323 	if (info.domain_attr)
324 		free(info.domain_attr);
325 	if (info.ep_attr)
326 		free(info.ep_attr);
327 	if (info.tx_attr)
328 		free(info.tx_attr);
329 	return -errno;
330 }
331 
fi_bgq_bind_sep(struct fid * fid,struct fid * bfid,uint64_t flags)332 static int fi_bgq_bind_sep(struct fid *fid, struct fid *bfid,
333 		uint64_t flags)
334 {
335 	int ret = 0;
336 	struct fi_bgq_sep *bgq_sep = container_of(fid, struct fi_bgq_sep, ep_fid);
337 	struct fi_bgq_av *bgq_av;
338 
339 	if (!fid || !bfid) {
340 		errno = FI_EINVAL;
341 		return -errno;
342 	}
343 
344 	switch (bfid->fclass) {
345 	case FI_CLASS_AV:
346 		bgq_av = container_of(bfid, struct fi_bgq_av, av_fid);
347 		fi_bgq_ref_inc(&bgq_av->ref_cnt, "address vector");
348 		bgq_sep->av = bgq_av;
349 		break;
350 	default:
351 		errno = FI_ENOSYS;
352 		return -errno;
353 	}
354 
355 	return ret;
356 }
357 
358 static struct fi_ops fi_bgq_fi_ops = {
359 	.size		= sizeof(struct fi_ops),
360 	.close		= fi_bgq_close_sep,
361 	.bind		= fi_bgq_bind_sep,
362 	.control	= fi_bgq_control_sep,
363 	.ops_open	= fi_no_ops_open
364 };
365 
366 static struct fi_ops_ep fi_bgq_sep_ops = {
367 	.size		= sizeof(struct fi_ops_ep),
368 	.cancel		= fi_no_cancel,
369 	.getopt		= fi_no_getopt,
370 	.setopt		= fi_no_setopt,
371 	.tx_ctx		= fi_bgq_tx_ctx,
372 	.rx_ctx		= fi_bgq_rx_ctx,
373 	.rx_size_left   = fi_no_rx_size_left,
374 	.tx_size_left   = fi_no_tx_size_left
375 };
376 
fi_bgq_scalable_ep(struct fid_domain * domain,struct fi_info * info,struct fid_ep ** sep,void * context)377 int fi_bgq_scalable_ep (struct fid_domain *domain,
378 	struct fi_info *info,
379 	struct fid_ep **sep,
380 	void *context)
381 {
382 	struct fi_bgq_sep *bgq_sep = NULL;
383 
384 	if (!info || !domain) {
385 		errno = FI_EINVAL;
386 		goto err;
387 	}
388 
389 	void * memptr = NULL;
390 	memptr = malloc(sizeof(struct fi_bgq_sep)+L2_CACHE_LINE_SIZE);
391 	if (!memptr) {
392 		errno = FI_ENOMEM;
393 		goto err;
394 	}
395 	memset(memptr, 0, sizeof(struct fi_bgq_sep)+L2_CACHE_LINE_SIZE);
396 	bgq_sep = (struct fi_bgq_sep *)(((uintptr_t)memptr+L2_CACHE_LINE_SIZE) & ~(L2_CACHE_LINE_SIZE-1));
397 	bgq_sep->memptr = memptr;
398 	memptr = NULL;
399 
400 	bgq_sep->domain = (struct fi_bgq_domain *) domain;
401 
402 	bgq_sep->ep_fid.fid.fclass	= FI_CLASS_SEP;
403 	bgq_sep->ep_fid.fid.context	= context;
404 	bgq_sep->ep_fid.fid.ops		= &fi_bgq_fi_ops;
405 	bgq_sep->ep_fid.ops		= &fi_bgq_sep_ops;
406 
407         int ret = fi_bgq_init_cm_ops((struct fid_ep *)&(bgq_sep->ep_fid), info);
408         if (ret)
409                 goto err;
410 
411 	bgq_sep->info = calloc(1, sizeof (struct fi_info));
412 	if (!bgq_sep->info) {
413 		errno = FI_ENOMEM;
414 		goto err;
415 	}
416 	memcpy(bgq_sep->info, info, sizeof (struct fi_info));
417 	bgq_sep->info->next = NULL;
418 	bgq_sep->info->ep_attr = calloc(1, sizeof(struct fi_ep_attr));
419 	if (!bgq_sep->info->ep_attr) {
420 		errno = FI_ENOMEM;
421 		goto err;
422 	}
423 	memcpy(bgq_sep->info->ep_attr, info->ep_attr, sizeof(struct fi_ep_attr));
424 
425 #ifdef FI_BGQ_TRACE
426 	fprintf(stderr,"fi_bgq_scalable_ep - called with %ld tx %ld rx\n",bgq_sep->info->ep_attr->tx_ctx_cnt,bgq_sep->info->ep_attr->rx_ctx_cnt);
427 #endif
428 	/*
429 	 * fi_endpoint.3
430 	 *
431 	 * "tx_ctx_cnt - Transmit Context Count
432 	 * 	Number of transmit contexts to associate with the endpoint. If
433 	 * 	not specified (0), 1 context will be assigned if the endpoint
434 	 * 	supports outbound transfers."
435 	 */
436 	if (0 == bgq_sep->info->ep_attr->tx_ctx_cnt) {
437 		bgq_sep->info->ep_attr->tx_ctx_cnt = 1;
438 	}
439 
440 	/*
441 	 * fi_endpoint.3
442 	 *
443 	 * "rx_ctx_cnt - Receive Context Count
444 	 * 	Number of receive contexts to associate with the endpoint. If
445 	 * 	not specified, 1 context will be assigned if the endpoint
446 	 * 	supports inbound transfers."
447 	 */
448 	if (0 == bgq_sep->info->ep_attr->rx_ctx_cnt) {
449 		bgq_sep->info->ep_attr->rx_ctx_cnt = 1;
450 	}
451 
452 	fi_bgq_ref_init(&bgq_sep->domain->fabric->node, &bgq_sep->ref_cnt, "scalable endpoint");
453 	fi_bgq_ref_inc(&bgq_sep->domain->ref_cnt, "domain");
454 
455 	*sep = &bgq_sep->ep_fid;
456 
457 	return 0;
458 err:
459 	if (bgq_sep) {
460 		if (bgq_sep->info) {
461 			if (bgq_sep->info->ep_attr)
462 				free(bgq_sep->info->ep_attr);
463 			free(bgq_sep->info);
464 		}
465 		memptr = bgq_sep->memptr;
466 		free(memptr);
467 	}
468 	return -errno;
469 }
470