1 /*
2 * Copyright (C) 2016 by Argonne National Laboratory.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 #include "rdma/bgq/fi_bgq.h"
33
34 #include <ofi.h>
35 #include <ofi_enosys.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
42 #include <unistd.h>
43 #include <limits.h>
44
45 /* forward declaration */
46 int fi_bgq_endpoint_rx_tx (struct fid_domain *dom, struct fi_info *info,
47 struct fid_ep **ep, void *context, const int rx_index, const int tx_index);
48
fi_bgq_close_sep(fid_t fid)49 static int fi_bgq_close_sep(fid_t fid)
50 {
51 int ret;
52 struct fi_bgq_sep *bgq_sep = container_of(fid, struct fi_bgq_sep, ep_fid);
53
54 ret = fi_bgq_fid_check(fid, FI_CLASS_SEP, "scalable endpoint");
55 if (ret)
56 return ret;
57
58 ret = fi_bgq_ref_dec(&bgq_sep->av->ref_cnt, "address vector");
59 if (ret)
60 return ret;
61
62 ret = fi_bgq_ref_finalize(&bgq_sep->ref_cnt, "scalable endpoint");
63 if (ret)
64 return ret;
65
66 ret = fi_bgq_ref_dec(&bgq_sep->domain->ref_cnt, "domain");
67 if (ret)
68 return ret;
69
70 free(bgq_sep->info->ep_attr);
71 free(bgq_sep->info);
72 void * memptr = bgq_sep->memptr;
73 free(memptr);
74
75 return 0;
76 }
77
fi_bgq_control_sep(fid_t fid,int command,void * arg)78 static int fi_bgq_control_sep(fid_t fid, int command, void *arg)
79 {
80 struct fid_ep *ep __attribute__ ((unused));
81 ep = container_of(fid, struct fid_ep, fid);
82 return 0;
83 }
84
fi_bgq_tx_ctx(struct fid_ep * sep,int index,struct fi_tx_attr * attr,struct fid_ep ** tx_ep,void * context)85 static int fi_bgq_tx_ctx(struct fid_ep *sep, int index,
86 struct fi_tx_attr *attr, struct fid_ep **tx_ep,
87 void *context)
88 {
89 int ret;
90 struct fi_info info = {0};
91 struct fi_tx_attr tx_attr = {0};
92 struct fi_ep_attr ep_attr = {0};
93 struct fi_domain_attr dom_attr = {0};
94 struct fi_fabric_attr fab_attr = {0};
95 struct fi_bgq_sep *bgq_sep;
96 struct fi_bgq_ep *bgq_tx_ep;
97
98 if (!sep || !attr || !tx_ep) {
99 errno = FI_EINVAL;
100 return -errno;
101 }
102
103 bgq_sep = container_of(sep, struct fi_bgq_sep, ep_fid);
104
105 uint64_t caps = attr->caps; /* TODO - "By default, a transmit context inherits the properties of its associated endpoint. However, applications may request context specific attributes through the attr parameter." */
106
107 if ((caps & FI_MSG || caps & FI_TAGGED) && (caps & FI_RECV)) {
108 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
109 "FI_MSG|FI_TAGGED with FI_RECV capability specified for a TX context\n");
110 caps &= ~FI_RECV;
111 }
112
113 if ((caps & FI_RMA || caps & FI_ATOMIC) && (caps & FI_REMOTE_READ || caps & FI_REMOTE_WRITE)) {
114 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
115 "FI_RMA|FI_ATOMIC with FI_REMOTE_READ|FI_REMOTE_WRITE capability specified for a TX context\n");
116 caps &= ~FI_REMOTE_READ;
117 caps &= ~FI_REMOTE_WRITE;
118 }
119
120 if (caps & FI_MSG || caps & FI_TAGGED) {
121 caps |= FI_SEND;
122 }
123
124 if (caps & FI_RMA || caps & FI_ATOMIC) {
125 caps |= FI_READ;
126 caps |= FI_WRITE;
127 }
128
129 if (ofi_recv_allowed(caps) || ofi_rma_target_allowed(caps)) {
130 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
131 "RX capabilities specified for TX context\n");
132 errno = FI_EINVAL;
133 return -errno;
134 }
135
136 if (!ofi_send_allowed(caps) && !ofi_rma_initiate_allowed(caps)) {
137 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
138 "TX capabilities not specified for TX context\n");
139 errno = FI_EINVAL;
140 return -errno;
141 }
142
143 if (bgq_sep->domain->tx.count >= fi_bgq_domain_get_tx_max(bgq_sep->domain)) {
144 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
145 "TX ctx count exceeded (max %lu, created %lu)\n",
146 fi_bgq_domain_get_tx_max(bgq_sep->domain), bgq_sep->domain->tx.count);
147 errno = FI_EINVAL;
148 return -errno;
149 }
150
151 info.caps = caps;
152 info.mode = attr->mode;
153
154 info.tx_attr = &tx_attr;
155 memcpy(info.tx_attr, attr, sizeof(*info.tx_attr));
156
157 info.ep_attr = &ep_attr;
158 memcpy(info.ep_attr, bgq_sep->info->ep_attr, sizeof(*info.ep_attr));
159
160 info.domain_attr = &dom_attr;
161 memcpy(info.domain_attr, bgq_sep->info->domain_attr, sizeof(*info.domain_attr));
162
163 info.fabric_attr = &fab_attr;
164 memcpy(info.fabric_attr, bgq_sep->info->fabric_attr, sizeof(*info.fabric_attr));
165 #ifdef FI_BGQ_TRACE
166 fprintf(stderr,"fi_bgq_tx_ctx calling fi_bgq_endpoint_rx_tx with tx index %d\n",index);
167 #endif
168
169 ret = fi_bgq_endpoint_rx_tx((struct fid_domain *)bgq_sep->domain,
170 &info, tx_ep, context, -1, index);
171 if (ret) {
172 goto err;
173 }
174
175 bgq_tx_ep = container_of(*tx_ep, struct fi_bgq_ep, ep_fid);
176 bgq_tx_ep->ep_fid.fid.fclass = FI_CLASS_TX_CTX;
177
178 bgq_tx_ep->av = bgq_sep->av;
179 fi_bgq_ref_inc(&bgq_tx_ep->av->ref_cnt, "address vector");
180
181 bgq_tx_ep->sep = container_of(sep, struct fi_bgq_sep, ep_fid);
182
183 ++ bgq_sep->domain->tx.count;
184
185 fi_bgq_ref_inc(&bgq_sep->ref_cnt, "scalable endpoint");
186
187 attr->caps = caps;
188
189 return 0;
190
191 err:
192 return -errno;
193 }
194
fi_bgq_rx_ctx(struct fid_ep * sep,int index,struct fi_rx_attr * attr,struct fid_ep ** rx_ep,void * context)195 static int fi_bgq_rx_ctx(struct fid_ep *sep, int index,
196 struct fi_rx_attr *attr, struct fid_ep **rx_ep,
197 void *context)
198 {
199 int ret;
200 struct fi_info info = {0};
201 struct fi_bgq_sep *bgq_sep;
202 struct fi_bgq_ep *bgq_rx_ep;
203
204 if (!sep || !attr || !rx_ep) {
205 errno = FI_EINVAL;
206 return -errno;
207 }
208
209 bgq_sep = container_of(sep, struct fi_bgq_sep, ep_fid);
210
211 uint64_t caps = attr->caps; /* TODO - "By default, a receive context inherits the properties of its associated endpoint. However, applications may request context specific attributes through the attr parameter." */
212
213 if ((caps & FI_MSG || caps & FI_TAGGED) && (caps & FI_SEND)) {
214 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
215 "FI_MSG|FI_TAGGED with FI_SEND capability specified for a RX context\n");
216 caps &= ~FI_SEND;
217 }
218
219 if ((caps & FI_RMA || caps & FI_ATOMIC) && (caps & FI_READ || caps & FI_WRITE)) {
220 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
221 "FI_RMA|FI_ATOMIC with FI_READ|FI_WRITE capability specified for a RX context\n");
222 caps &= ~FI_READ;
223 caps &= ~FI_WRITE;
224 }
225
226 if (caps & FI_MSG || caps & FI_TAGGED) {
227 caps |= FI_RECV;
228 }
229
230 if (caps & FI_RMA || caps & FI_ATOMIC) {
231 caps |= FI_REMOTE_READ;
232 caps |= FI_REMOTE_WRITE;
233 }
234
235 if (ofi_send_allowed(caps) || ofi_rma_initiate_allowed(caps)) {
236 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
237 "TX capabilities specified for RX context\n");
238 errno = FI_EINVAL;
239 return -errno;
240 }
241
242 if (!ofi_recv_allowed(caps) && !ofi_rma_target_allowed(caps)) {
243 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
244 "RX capabilities not specified for RX context\n");
245 errno = FI_EINVAL;
246 return -errno;
247 }
248
249 if (bgq_sep->domain->rx.count >= fi_bgq_domain_get_rx_max(bgq_sep->domain)) {
250 FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN,
251 "RX ctx count exceeded (max %lu, created %lu)\n",
252 fi_bgq_domain_get_rx_max(bgq_sep->domain), bgq_sep->domain->rx.count);
253 errno = FI_EINVAL;
254 return -errno;
255 }
256
257 info.caps = caps;
258 info.mode = attr->mode;
259
260 info.rx_attr = calloc(1, sizeof(*info.rx_attr));
261 if (!info.rx_attr) {
262 errno = FI_ENOMEM;
263 goto err;
264 }
265
266 info.rx_attr->caps = caps;
267 info.rx_attr->mode = attr->mode;
268 info.rx_attr->op_flags = attr->op_flags;
269 info.rx_attr->msg_order = attr->msg_order;
270 info.rx_attr->total_buffered_recv = attr->total_buffered_recv;
271 info.rx_attr->iov_limit = attr->iov_limit;
272
273 info.ep_attr = calloc(1, sizeof(*info.ep_attr));
274 if (!info.ep_attr) {
275 errno = FI_ENOMEM;
276 goto err;
277 }
278 memcpy(info.ep_attr, bgq_sep->info->ep_attr,
279 sizeof(*info.ep_attr));
280
281 info.domain_attr = calloc(1, sizeof(*info.domain_attr));
282 if (!info.domain_attr) {
283 errno = FI_ENOMEM;
284 goto err;
285 }
286 memcpy(info.domain_attr, bgq_sep->info->domain_attr,
287 sizeof(*info.domain_attr));
288
289 info.fabric_attr = calloc(1, sizeof(*info.fabric_attr));
290 if (!info.fabric_attr) {
291 errno = FI_ENOMEM;
292 goto err;
293 }
294 memcpy(info.fabric_attr, bgq_sep->info->fabric_attr,
295 sizeof(*info.fabric_attr));
296
297 #ifdef FI_BGQ_TRACE
298 fprintf(stderr,"fi_bgq_tx_ctx calling fi_bgq_endpoint_rx_tx with rx index %d\n",index);
299 #endif
300 ret = fi_bgq_endpoint_rx_tx(&bgq_sep->domain->domain_fid, &info,
301 rx_ep, context, index, -1);
302 if (ret) {
303 goto err;
304 }
305
306 bgq_rx_ep = container_of(*rx_ep, struct fi_bgq_ep, ep_fid);
307 bgq_rx_ep->ep_fid.fid.fclass = FI_CLASS_RX_CTX;
308
309 bgq_rx_ep->sep = container_of(sep, struct fi_bgq_sep, ep_fid);
310
311 bgq_rx_ep->av = bgq_sep->av;
312 fi_bgq_ref_inc(&bgq_rx_ep->av->ref_cnt, "address vector");
313
314 ++ bgq_sep->domain->rx.count;
315
316 fi_bgq_ref_inc(&bgq_sep->ref_cnt, "scalable endpoint");
317
318 return 0;
319
320 err:
321 if (info.fabric_attr)
322 free(info.fabric_attr);
323 if (info.domain_attr)
324 free(info.domain_attr);
325 if (info.ep_attr)
326 free(info.ep_attr);
327 if (info.tx_attr)
328 free(info.tx_attr);
329 return -errno;
330 }
331
fi_bgq_bind_sep(struct fid * fid,struct fid * bfid,uint64_t flags)332 static int fi_bgq_bind_sep(struct fid *fid, struct fid *bfid,
333 uint64_t flags)
334 {
335 int ret = 0;
336 struct fi_bgq_sep *bgq_sep = container_of(fid, struct fi_bgq_sep, ep_fid);
337 struct fi_bgq_av *bgq_av;
338
339 if (!fid || !bfid) {
340 errno = FI_EINVAL;
341 return -errno;
342 }
343
344 switch (bfid->fclass) {
345 case FI_CLASS_AV:
346 bgq_av = container_of(bfid, struct fi_bgq_av, av_fid);
347 fi_bgq_ref_inc(&bgq_av->ref_cnt, "address vector");
348 bgq_sep->av = bgq_av;
349 break;
350 default:
351 errno = FI_ENOSYS;
352 return -errno;
353 }
354
355 return ret;
356 }
357
358 static struct fi_ops fi_bgq_fi_ops = {
359 .size = sizeof(struct fi_ops),
360 .close = fi_bgq_close_sep,
361 .bind = fi_bgq_bind_sep,
362 .control = fi_bgq_control_sep,
363 .ops_open = fi_no_ops_open
364 };
365
366 static struct fi_ops_ep fi_bgq_sep_ops = {
367 .size = sizeof(struct fi_ops_ep),
368 .cancel = fi_no_cancel,
369 .getopt = fi_no_getopt,
370 .setopt = fi_no_setopt,
371 .tx_ctx = fi_bgq_tx_ctx,
372 .rx_ctx = fi_bgq_rx_ctx,
373 .rx_size_left = fi_no_rx_size_left,
374 .tx_size_left = fi_no_tx_size_left
375 };
376
fi_bgq_scalable_ep(struct fid_domain * domain,struct fi_info * info,struct fid_ep ** sep,void * context)377 int fi_bgq_scalable_ep (struct fid_domain *domain,
378 struct fi_info *info,
379 struct fid_ep **sep,
380 void *context)
381 {
382 struct fi_bgq_sep *bgq_sep = NULL;
383
384 if (!info || !domain) {
385 errno = FI_EINVAL;
386 goto err;
387 }
388
389 void * memptr = NULL;
390 memptr = malloc(sizeof(struct fi_bgq_sep)+L2_CACHE_LINE_SIZE);
391 if (!memptr) {
392 errno = FI_ENOMEM;
393 goto err;
394 }
395 memset(memptr, 0, sizeof(struct fi_bgq_sep)+L2_CACHE_LINE_SIZE);
396 bgq_sep = (struct fi_bgq_sep *)(((uintptr_t)memptr+L2_CACHE_LINE_SIZE) & ~(L2_CACHE_LINE_SIZE-1));
397 bgq_sep->memptr = memptr;
398 memptr = NULL;
399
400 bgq_sep->domain = (struct fi_bgq_domain *) domain;
401
402 bgq_sep->ep_fid.fid.fclass = FI_CLASS_SEP;
403 bgq_sep->ep_fid.fid.context = context;
404 bgq_sep->ep_fid.fid.ops = &fi_bgq_fi_ops;
405 bgq_sep->ep_fid.ops = &fi_bgq_sep_ops;
406
407 int ret = fi_bgq_init_cm_ops((struct fid_ep *)&(bgq_sep->ep_fid), info);
408 if (ret)
409 goto err;
410
411 bgq_sep->info = calloc(1, sizeof (struct fi_info));
412 if (!bgq_sep->info) {
413 errno = FI_ENOMEM;
414 goto err;
415 }
416 memcpy(bgq_sep->info, info, sizeof (struct fi_info));
417 bgq_sep->info->next = NULL;
418 bgq_sep->info->ep_attr = calloc(1, sizeof(struct fi_ep_attr));
419 if (!bgq_sep->info->ep_attr) {
420 errno = FI_ENOMEM;
421 goto err;
422 }
423 memcpy(bgq_sep->info->ep_attr, info->ep_attr, sizeof(struct fi_ep_attr));
424
425 #ifdef FI_BGQ_TRACE
426 fprintf(stderr,"fi_bgq_scalable_ep - called with %ld tx %ld rx\n",bgq_sep->info->ep_attr->tx_ctx_cnt,bgq_sep->info->ep_attr->rx_ctx_cnt);
427 #endif
428 /*
429 * fi_endpoint.3
430 *
431 * "tx_ctx_cnt - Transmit Context Count
432 * Number of transmit contexts to associate with the endpoint. If
433 * not specified (0), 1 context will be assigned if the endpoint
434 * supports outbound transfers."
435 */
436 if (0 == bgq_sep->info->ep_attr->tx_ctx_cnt) {
437 bgq_sep->info->ep_attr->tx_ctx_cnt = 1;
438 }
439
440 /*
441 * fi_endpoint.3
442 *
443 * "rx_ctx_cnt - Receive Context Count
444 * Number of receive contexts to associate with the endpoint. If
445 * not specified, 1 context will be assigned if the endpoint
446 * supports inbound transfers."
447 */
448 if (0 == bgq_sep->info->ep_attr->rx_ctx_cnt) {
449 bgq_sep->info->ep_attr->rx_ctx_cnt = 1;
450 }
451
452 fi_bgq_ref_init(&bgq_sep->domain->fabric->node, &bgq_sep->ref_cnt, "scalable endpoint");
453 fi_bgq_ref_inc(&bgq_sep->domain->ref_cnt, "domain");
454
455 *sep = &bgq_sep->ep_fid;
456
457 return 0;
458 err:
459 if (bgq_sep) {
460 if (bgq_sep->info) {
461 if (bgq_sep->info->ep_attr)
462 free(bgq_sep->info->ep_attr);
463 free(bgq_sep->info);
464 }
465 memptr = bgq_sep->memptr;
466 free(memptr);
467 }
468 return -errno;
469 }
470