1 /*
2 * Copyright (c) 2015-2018 Cray Inc. All rights reserved.
3 * Copyright (c) 2015-2018 Los Alamos National Security, LLC.
4 * All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <sys/mman.h>
39 #include <signal.h>
40
41 #include "gnix.h"
42 #include "gnix_nic.h"
43 #include "gnix_cm_nic.h"
44 #include "gnix_vc.h"
45 #include "gnix_mbox_allocator.h"
46 #include "gnix_util.h"
47 #include "fi_ext_gni.h"
48
49 /*
50 * TODO: make this a domain parameter
51 */
52 #define GNIX_VC_FL_MIN_SIZE 128
53 #define GNIX_VC_FL_INIT_REFILL_SIZE 10
54
55 static int gnix_nics_per_ptag[GNI_PTAG_MAX];
56 struct dlist_entry gnix_nic_list_ptag[GNI_PTAG_MAX];
57 DLIST_HEAD(gnix_nic_list);
58 pthread_mutex_t gnix_nic_list_lock = PTHREAD_MUTEX_INITIALIZER;
59
60 /*
61 * globals
62 */
63
64 uint32_t gnix_max_nics_per_ptag = GNIX_DEF_MAX_NICS_PER_PTAG;
65
66 /*
67 * local variables
68 */
69
70 static struct gnix_nic_attr default_attr = {
71 .gni_cdm_hndl = NULL,
72 .gni_nic_hndl = NULL
73 };
74
75 /*******************************************************************************
76 * Helper functions.
77 ******************************************************************************/
78
79 /*
80 * this function is intended to be invoked as an argument to pthread_create,
81 */
__gnix_nic_prog_thread_fn(void * the_arg)82 static void *__gnix_nic_prog_thread_fn(void *the_arg)
83 {
84 int ret = FI_SUCCESS, prev_state;
85 int retry = 0;
86 uint32_t which;
87 struct gnix_nic *nic = (struct gnix_nic *)the_arg;
88 sigset_t sigmask;
89 gni_cq_handle_t cqv[2];
90 gni_return_t status;
91 gni_cq_entry_t cqe;
92
93 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
94
95 /*
96 * temporarily disable cancelability while we set up
97 * some stuff
98 */
99
100 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &prev_state);
101
102 /*
103 * help out Cray core-spec, say we're not an app thread
104 * and can be run on core-spec cpus.
105 */
106
107 ret = _gnix_task_is_not_app();
108 if (ret)
109 GNIX_WARN(FI_LOG_EP_CTRL,
110 "_gnix_task_is_not_app call returned %d\n",
111 ret);
112
113 /*
114 * block all signals, don't want this thread to catch
115 * signals that may be for app threads
116 */
117
118 memset(&sigmask, 0, sizeof(sigset_t));
119 ret = sigfillset(&sigmask);
120 if (ret) {
121 GNIX_WARN(FI_LOG_EP_CTRL,
122 "sigfillset call returned %d\n", ret);
123 } else {
124
125 ret = pthread_sigmask(SIG_SETMASK,
126 &sigmask, NULL);
127 if (ret)
128 GNIX_WARN(FI_LOG_EP_CTRL,
129 "pthread_sigmask call returned %d\n", ret);
130 }
131
132 /*
133 * okay now we're ready to be cancelable.
134 */
135
136 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &prev_state);
137
138 pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
139
140 cqv[0] = nic->tx_cq_blk;
141 cqv[1] = nic->rx_cq_blk;
142
143 try_again:
144 status = GNI_CqVectorMonitor(cqv,
145 2,
146 -1,
147 &which);
148
149 switch (status) {
150 case GNI_RC_SUCCESS:
151
152 /*
153 * first dequeue RX CQEs
154 */
155 if (nic->rx_cq_blk != nic->rx_cq && which == 1) {
156 do {
157 status = GNI_CqGetEvent(nic->rx_cq_blk,
158 &cqe);
159 } while (status == GNI_RC_SUCCESS);
160 }
161 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &prev_state);
162 _gnix_nic_progress(nic);
163 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &prev_state);
164 retry = 1;
165 break;
166 case GNI_RC_TIMEOUT:
167 case GNI_RC_NOT_DONE:
168 /* Invalid state indicates call interrupted by signal using various tools */
169 case GNI_RC_INVALID_STATE:
170 retry = 1;
171 break;
172 case GNI_RC_INVALID_PARAM:
173 case GNI_RC_ERROR_RESOURCE:
174 case GNI_RC_ERROR_NOMEM:
175 retry = 0;
176 GNIX_WARN(FI_LOG_EP_CTRL,
177 "GNI_CqGetEvent returned %s\n",
178 gni_err_str[status]);
179 break;
180 default:
181 retry = 0;
182 GNIX_WARN(FI_LOG_EP_CTRL,
183 "GNI_CqGetEvent returned unexpected code %s\n",
184 gni_err_str[status]);
185 break;
186 }
187
188 if (retry)
189 goto try_again;
190
191 return NULL;
192 }
193
194 /*
195 * setup memory registration for remote GNI_PostCqWrite's to target
196 */
197
__nic_setup_irq_cq(struct gnix_nic * nic)198 static int __nic_setup_irq_cq(struct gnix_nic *nic)
199 {
200 int ret = FI_SUCCESS;
201 size_t len;
202 gni_return_t status;
203 int fd = -1;
204 void *mmap_addr;
205 int vmdh_index = -1;
206 int flags = GNI_MEM_READWRITE;
207 struct gnix_auth_key *info;
208 struct fi_gni_auth_key key;
209
210 len = (size_t)sysconf(_SC_PAGESIZE);
211
212 mmap_addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
213 MAP_SHARED | MAP_ANON, fd, 0);
214 if (mmap_addr == MAP_FAILED) {
215 GNIX_WARN(FI_LOG_EP_CTRL, "mmap failed - %s\n",
216 strerror(errno));
217 ret = -errno;
218 goto err;
219 }
220
221 nic->irq_mmap_addr = mmap_addr;
222 nic->irq_mmap_len = len;
223
224 /* On some systems, the page may not be zero'd from first use.
225 Memset it here */
226 memset(mmap_addr, 0x0, len);
227
228 if (nic->using_vmdh) {
229 key.type = GNIX_AKT_RAW;
230 key.raw.protection_key = nic->cookie;
231
232 info = _gnix_auth_key_lookup((uint8_t *) &key, sizeof(key));
233 assert(info);
234
235 if (!nic->mdd_resources_set) {
236 /* check to see if the ptag registration limit was set
237 yet or not -- becomes read-only after success */
238 ret = _gnix_auth_key_enable(info);
239 if (ret != FI_SUCCESS && ret != -FI_EBUSY) {
240 GNIX_WARN(FI_LOG_DOMAIN,
241 "failed to enable authorization key, "
242 "unexpected error rc=%d\n", ret);
243 }
244
245 status = GNI_SetMddResources(nic->gni_nic_hndl,
246 (info->attr.prov_key_limit +
247 info->attr.user_key_limit));
248 if (status != GNI_RC_SUCCESS) {
249 GNIX_FATAL(FI_LOG_DOMAIN,
250 "failed to set MDD resources, rc=%d\n",
251 status);
252 }
253
254 nic->mdd_resources_set = 1;
255 }
256 vmdh_index = _gnix_get_next_reserved_key(info);
257 if (vmdh_index <= 0) {
258 GNIX_FATAL(FI_LOG_DOMAIN,
259 "failed to get next reserved key, "
260 "rc=%d\n", vmdh_index);
261 }
262
263 flags |= GNI_MEM_USE_VMDH;
264 }
265
266 status = GNI_MemRegister(nic->gni_nic_hndl,
267 (uint64_t) nic->irq_mmap_addr,
268 len,
269 nic->rx_cq_blk,
270 flags,
271 vmdh_index,
272 &nic->irq_mem_hndl);
273 if (status != GNI_RC_SUCCESS) {
274 ret = gnixu_to_fi_errno(status);
275 GNIX_WARN(FI_LOG_EP_CTRL,
276 "GNI_MemRegister returned %s\n",
277 gni_err_str[status]);
278 goto err_w_mmap;
279 }
280
281 #if 0
282 fprintf(stderr,"registered ireq memhndl 0x%016lx 0x%016lx\n",
283 nic->irq_mem_hndl.qword1,
284 nic->irq_mem_hndl.qword2);
285 #endif
286
287
288 return ret;
289
290 err_w_mmap:
291 munmap(mmap_addr, len);
292 err:
293 return ret;
294 }
295
296 /*
297 * release resources previously set up for remote
298 * GNI_PostCqWrite's to target
299 */
__nic_teardown_irq_cq(struct gnix_nic * nic)300 static int __nic_teardown_irq_cq(struct gnix_nic *nic)
301 {
302 int ret = FI_SUCCESS;
303 gni_return_t status;
304
305 if (nic == NULL)
306 return ret;
307
308 if (nic->irq_mmap_addr == NULL)
309 return ret;
310
311 if ((nic->irq_mem_hndl.qword1) ||
312 (nic->irq_mem_hndl.qword2)) {
313 status = GNI_MemDeregister(nic->gni_nic_hndl,
314 &nic->irq_mem_hndl);
315 if (status != GNI_RC_SUCCESS) {
316 ret = gnixu_to_fi_errno(status);
317 GNIX_WARN(FI_LOG_EP_CTRL,
318 "GNI_MemDeregister returned %s\n",
319 gni_err_str[status]);
320 }
321 }
322
323 munmap(nic->irq_mmap_addr,
324 nic->irq_mmap_len);
325 return ret;
326 }
327
328
329 /*
330 * place holder for better attributes checker
331 */
__gnix_nic_check_attr_sanity(struct gnix_nic_attr * attr)332 static int __gnix_nic_check_attr_sanity(struct gnix_nic_attr *attr)
333 {
334 return FI_SUCCESS;
335 }
336
337 static inline struct gnix_tx_descriptor *
__desc_lkup_by_id(struct gnix_nic * nic,int desc_id)338 __desc_lkup_by_id(struct gnix_nic *nic, int desc_id)
339 {
340 struct gnix_tx_descriptor *tx_desc;
341
342 assert((desc_id >= 0) && (desc_id <= nic->max_tx_desc_id));
343 tx_desc = &nic->tx_desc_base[desc_id];
344 return tx_desc;
345 }
346
__nic_rx_overrun(struct gnix_nic * nic)347 static int __nic_rx_overrun(struct gnix_nic *nic)
348 {
349 int i, max_id, ret;
350 struct gnix_vc *vc;
351 gni_return_t status;
352 gni_cq_entry_t cqe;
353
354 GNIX_WARN(FI_LOG_EP_DATA, "\n");
355
356 /* clear out the CQ */
357 /*
358 * TODO: really need to process CQEs better for error reporting,
359 * etc.
360 */
361 while ((status = GNI_CqGetEvent(nic->rx_cq, &cqe)) == GNI_RC_SUCCESS);
362 assert(status == GNI_RC_NOT_DONE);
363
364 COND_ACQUIRE(nic->requires_lock, &nic->vc_id_lock);
365 max_id = nic->vc_id_table_count;
366 COND_RELEASE(nic->requires_lock, &nic->vc_id_lock);
367 /*
368 * TODO: optimization would
369 * be to keep track of last time
370 * this happened and where smsg msgs.
371 * were found.
372 */
373 for (i = 0; i < max_id; i++) {
374 ret = _gnix_test_bit(&nic->vc_id_bitmap, i);
375 if (ret) {
376 vc = __gnix_nic_elem_by_rem_id(nic, i);
377 ret = _gnix_vc_rx_schedule(vc);
378 assert(ret == FI_SUCCESS);
379 }
380 }
381
382 return FI_SUCCESS;
383 }
384
__process_rx_cqe(struct gnix_nic * nic,gni_cq_entry_t cqe)385 static int __process_rx_cqe(struct gnix_nic *nic, gni_cq_entry_t cqe)
386 {
387 int ret = FI_SUCCESS, vc_id = 0;
388 struct gnix_vc *vc;
389
390 vc_id = GNI_CQ_GET_INST_ID(cqe);
391
392 /*
393 * its possible this vc has been destroyed, so may get NULL
394 * back.
395 */
396
397 vc = __gnix_nic_elem_by_rem_id(nic, vc_id);
398 if (vc != NULL) {
399 switch (vc->conn_state) {
400 case GNIX_VC_CONNECTING:
401 GNIX_DEBUG(FI_LOG_EP_DATA,
402 "Scheduling VC for RX processing (%p)\n",
403 vc);
404 ret = _gnix_vc_rx_schedule(vc);
405 assert(ret == FI_SUCCESS);
406 break;
407 case GNIX_VC_CONNECTED:
408 GNIX_DEBUG(FI_LOG_EP_DATA,
409 "Processing VC RX (%p)\n",
410 vc);
411 ret = _gnix_vc_rx_schedule(vc);
412 assert(ret == FI_SUCCESS);
413 break;
414 default:
415 break; /* VC not in a state for scheduling or
416 SMSG processing */
417 }
418 }
419
420 return ret;
421 }
422
__nic_rx_progress(struct gnix_nic * nic)423 static int __nic_rx_progress(struct gnix_nic *nic)
424 {
425 int ret = FI_SUCCESS;
426 gni_return_t status = GNI_RC_NOT_DONE;
427 gni_cq_entry_t cqe;
428
429 status = GNI_CqTestEvent(nic->rx_cq);
430 if (status == GNI_RC_NOT_DONE)
431 return FI_SUCCESS;
432
433 COND_ACQUIRE(nic->requires_lock, &nic->lock);
434
435 do {
436 status = GNI_CqGetEvent(nic->rx_cq, &cqe);
437 if (OFI_UNLIKELY(status == GNI_RC_NOT_DONE)) {
438 ret = FI_SUCCESS;
439 break;
440 }
441
442 if (OFI_LIKELY(status == GNI_RC_SUCCESS)) {
443 /* Find and schedule the associated VC. */
444 ret = __process_rx_cqe(nic, cqe);
445 if (ret != FI_SUCCESS) {
446 GNIX_WARN(FI_LOG_EP_DATA,
447 "process_rx_cqe() failed: %d\n",
448 ret);
449 }
450 } else if (status == GNI_RC_ERROR_RESOURCE) {
451 /* The remote CQ was overrun. Events related to any VC
452 * could have been missed. Schedule each VC to be sure
453 * all messages are processed. */
454 assert(GNI_CQ_OVERRUN(cqe));
455 __nic_rx_overrun(nic);
456 } else {
457 GNIX_WARN(FI_LOG_EP_DATA,
458 "GNI_CqGetEvent returned %s\n",
459 gni_err_str[status]);
460 ret = gnixu_to_fi_errno(status);
461 break;
462 }
463 } while (1);
464
465 COND_RELEASE(nic->requires_lock, &nic->lock);
466
467 return ret;
468 }
469
_gnix_nic_txd_err_inject(struct gnix_nic * nic,struct gnix_tx_descriptor * txd)470 void _gnix_nic_txd_err_inject(struct gnix_nic *nic,
471 struct gnix_tx_descriptor *txd)
472 {
473 slist_insert_tail(&txd->err_list, &nic->err_txds);
474 }
475
__gnix_nic_txd_err_get(struct gnix_nic * nic,struct gnix_tx_descriptor ** txd)476 static int __gnix_nic_txd_err_get(struct gnix_nic *nic,
477 struct gnix_tx_descriptor **txd)
478 {
479 struct slist_entry *list_entry;
480 struct gnix_tx_descriptor *txd_p;
481
482 list_entry = slist_remove_head(&nic->err_txds);
483 if (list_entry) {
484 txd_p = container_of(list_entry,
485 struct gnix_tx_descriptor,
486 err_list);
487 *txd = txd_p;
488 return 1;
489 }
490
491 return 0;
492 }
493
__nic_get_completed_txd(struct gnix_nic * nic,gni_cq_handle_t hw_cq,struct gnix_tx_descriptor ** txd,gni_return_t * tx_status)494 static void __nic_get_completed_txd(struct gnix_nic *nic,
495 gni_cq_handle_t hw_cq,
496 struct gnix_tx_descriptor **txd,
497 gni_return_t *tx_status)
498 {
499 gni_post_descriptor_t *gni_desc;
500 struct gnix_tx_descriptor *txd_p = NULL;
501 struct gnix_fab_req *req;
502 gni_return_t status;
503 int msg_id;
504 gni_cq_entry_t cqe;
505 uint32_t recov = 1;
506
507 if (__gnix_nic_txd_err_get(nic, &txd_p)) {
508 *txd = txd_p;
509 *tx_status = GNI_RC_TRANSACTION_ERROR;
510 return;
511 }
512
513 status = GNI_CqGetEvent(hw_cq, &cqe);
514 if (status == GNI_RC_NOT_DONE) {
515 *txd = NULL;
516 *tx_status = GNI_RC_NOT_DONE;
517 return;
518 }
519
520 assert(status == GNI_RC_SUCCESS ||
521 status == GNI_RC_TRANSACTION_ERROR);
522
523 if (OFI_UNLIKELY(status == GNI_RC_TRANSACTION_ERROR)) {
524 status = GNI_CqErrorRecoverable(cqe, &recov);
525 if (status == GNI_RC_SUCCESS) {
526 if (!recov) {
527 char ebuf[512];
528
529 GNI_CqErrorStr(cqe, ebuf, sizeof(ebuf));
530 GNIX_WARN(FI_LOG_EP_DATA,
531 "CQ error status: %s\n",
532 ebuf);
533 }
534 } else {
535 GNIX_WARN(FI_LOG_EP_DATA,
536 "GNI_CqErrorRecover returned: %s\n",
537 gni_err_str[status]);
538 recov = 0; /* assume something bad has happened */
539 }
540 }
541
542 if (GNI_CQ_GET_TYPE(cqe) == GNI_CQ_EVENT_TYPE_POST) {
543 status = GNI_GetCompleted(hw_cq, cqe, &gni_desc);
544
545 assert(status == GNI_RC_SUCCESS ||
546 status == GNI_RC_TRANSACTION_ERROR);
547
548 txd_p = container_of(gni_desc,
549 struct gnix_tx_descriptor,
550 gni_desc);
551 } else if (GNI_CQ_GET_TYPE(cqe) == GNI_CQ_EVENT_TYPE_SMSG) {
552 msg_id = GNI_CQ_GET_MSG_ID(cqe);
553 txd_p = __desc_lkup_by_id(nic, msg_id);
554 }
555
556 if (OFI_UNLIKELY(txd_p == NULL))
557 GNIX_FATAL(FI_LOG_EP_DATA, "Unexpected CQE: 0x%lx", cqe);
558
559 /*
560 * set retry count on the request to max to force
561 * delivering error'd CQ event to application
562 */
563 if (!recov) {
564 status = GNI_RC_TRANSACTION_ERROR;
565 req = txd_p->req;
566 if (req)
567 req->tx_failures = UINT_MAX;
568 }
569
570 *tx_status = status;
571 *txd = txd_p;
572
573 }
574
__nic_tx_progress(struct gnix_nic * nic,gni_cq_handle_t cq)575 static int __nic_tx_progress(struct gnix_nic *nic, gni_cq_handle_t cq)
576 {
577 int ret = FI_SUCCESS;
578 gni_return_t tx_status;
579 struct gnix_tx_descriptor *txd;
580
581 do {
582 txd = NULL;
583
584 COND_ACQUIRE(nic->requires_lock, &nic->lock);
585 __nic_get_completed_txd(nic, cq, &txd,
586 &tx_status);
587 COND_RELEASE(nic->requires_lock, &nic->lock);
588
589 if (txd && txd->completer_fn) {
590 ret = txd->completer_fn(txd, tx_status);
591 if (ret != FI_SUCCESS) {
592 /*
593 * TODO: need to post error to CQ
594 */
595 GNIX_WARN(FI_LOG_EP_DATA,
596 "TXD completer failed: %d", ret);
597 }
598 }
599
600 if ((txd == NULL) || ret != FI_SUCCESS)
601 break;
602 } while (1);
603
604 return ret;
605 }
606
_gnix_nic_progress(void * arg)607 int _gnix_nic_progress(void *arg)
608 {
609 struct gnix_nic *nic = (struct gnix_nic *)arg;
610 int ret = FI_SUCCESS;
611
612 ret = __nic_tx_progress(nic, nic->tx_cq);
613 if (OFI_UNLIKELY(ret != FI_SUCCESS))
614 return ret;
615
616 if (nic->tx_cq_blk && nic->tx_cq_blk != nic->tx_cq) {
617 ret = __nic_tx_progress(nic, nic->tx_cq_blk);
618 if (OFI_UNLIKELY(ret != FI_SUCCESS))
619 return ret;
620 }
621
622 ret = __nic_rx_progress(nic);
623 if (ret != FI_SUCCESS)
624 return ret;
625
626 ret = _gnix_vc_nic_progress(nic);
627 if (ret != FI_SUCCESS)
628 return ret;
629
630 return ret;
631 }
632
_gnix_nic_free_rem_id(struct gnix_nic * nic,int remote_id)633 int _gnix_nic_free_rem_id(struct gnix_nic *nic, int remote_id)
634 {
635 assert(nic);
636
637 if ((remote_id < 0) || (remote_id > nic->vc_id_table_count))
638 return -FI_EINVAL;
639
640 _gnix_clear_bit(&nic->vc_id_bitmap, remote_id);
641
642 return FI_SUCCESS;
643 }
644
645 /*
646 * this function is needed to allow for quick lookup of a vc based on
647 * the contents of the GNI CQE coming off of the GNI RX CQ associated
648 * with GNI nic being used by this VC. Using a bitmap to expedite
649 * scanning vc's in the case of a GNI CQ overrun.
650 */
651
_gnix_nic_get_rem_id(struct gnix_nic * nic,int * remote_id,void * entry)652 int _gnix_nic_get_rem_id(struct gnix_nic *nic, int *remote_id, void *entry)
653 {
654 int ret = FI_SUCCESS;
655 void **table_base;
656
657 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
658
659 /*
660 * TODO: really need to search bitmap for clear
661 * bit before resizing the table
662 */
663
664 COND_ACQUIRE(nic->requires_lock, &nic->vc_id_lock);
665 if (nic->vc_id_table_capacity == nic->vc_id_table_count) {
666 table_base = realloc(nic->vc_id_table,
667 2 * nic->vc_id_table_capacity *
668 sizeof(void *));
669 if (table_base == NULL) {
670 ret = -FI_ENOMEM;
671 goto err;
672 }
673 nic->vc_id_table_capacity *= 2;
674 nic->vc_id_table = table_base;
675
676 ret = _gnix_realloc_bitmap(&nic->vc_id_bitmap,
677 nic->vc_id_table_capacity);
678 if (ret != FI_SUCCESS) {
679 assert(ret == -FI_ENOMEM);
680 goto err;
681 }
682 }
683
684 nic->vc_id_table[nic->vc_id_table_count] = entry;
685 *remote_id = nic->vc_id_table_count;
686
687 /*
688 * set bit in the bitmap
689 */
690
691 _gnix_set_bit(&nic->vc_id_bitmap, nic->vc_id_table_count);
692
693 ++(nic->vc_id_table_count);
694 err:
695 COND_RELEASE(nic->requires_lock, &nic->vc_id_lock);
696 return ret;
697 }
698
699 /*
700 * allocate a free list of tx descs for a gnix_nic struct.
701 */
702
__gnix_nic_tx_freelist_init(struct gnix_nic * nic,int n_descs)703 static int __gnix_nic_tx_freelist_init(struct gnix_nic *nic, int n_descs)
704 {
705 int i, ret = FI_SUCCESS;
706 struct gnix_tx_descriptor *desc_base, *desc_ptr;
707
708 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
709
710 /*
711 * set up free list of tx descriptors.
712 */
713
714 desc_base = calloc(n_descs, sizeof(struct gnix_tx_descriptor));
715 if (desc_base == NULL) {
716 ret = -FI_ENOMEM;
717 goto err;
718 }
719
720 dlist_init(&nic->tx_desc_free_list);
721 dlist_init(&nic->tx_desc_active_list);
722
723 for (i = 0, desc_ptr = desc_base; i < n_descs; i++, desc_ptr++) {
724 desc_ptr->id = i;
725 dlist_insert_tail(&desc_ptr->list,
726 &nic->tx_desc_free_list);
727 }
728
729 nic->max_tx_desc_id = n_descs - 1;
730 nic->tx_desc_base = desc_base;
731
732 fastlock_init(&nic->tx_desc_lock);
733
734 return ret;
735
736 err:
737 return ret;
738
739 }
740
741 /*
742 * clean up the tx descs free list
743 */
__gnix_nic_tx_freelist_destroy(struct gnix_nic * nic)744 static void __gnix_nic_tx_freelist_destroy(struct gnix_nic *nic)
745 {
746 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
747
748 free(nic->tx_desc_base);
749 fastlock_destroy(&nic->tx_desc_lock);
750 }
751
752 /*
753 * free a gnix nic and associated resources if refcnt drops to 0
754 */
755
__nic_destruct(void * obj)756 static void __nic_destruct(void *obj)
757 {
758 int ret = FI_SUCCESS;
759 gni_return_t status = GNI_RC_SUCCESS;
760 struct gnix_nic *nic = (struct gnix_nic *) obj;
761
762 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
763
764 /* Get us out of the progression tables we are destroying the nic
765 * and we don't want the wait progression thread to progress us
766 * after our structures are destroyed.
767 */
768 pthread_mutex_lock(&gnix_nic_list_lock);
769
770 dlist_remove(&nic->gnix_nic_list);
771 --gnix_nics_per_ptag[nic->ptag];
772 dlist_remove(&nic->ptag_nic_list);
773
774 pthread_mutex_unlock(&gnix_nic_list_lock);
775 __gnix_nic_tx_freelist_destroy(nic);
776
777 /*
778 *free irq cq related resources
779 */
780
781 ret = __nic_teardown_irq_cq(nic);
782 if (ret != FI_SUCCESS)
783 GNIX_WARN(FI_LOG_EP_CTRL,
784 "__nic_teardown_irq_cq returned %s\n",
785 fi_strerror(-ret));
786
787 /*
788 * kill off progress thread, if any
789 */
790
791 if (nic->progress_thread) {
792
793 ret = pthread_cancel(nic->progress_thread);
794 if ((ret != 0) && (ret != ESRCH)) {
795 GNIX_WARN(FI_LOG_EP_CTRL,
796 "pthread_cancel returned %d\n", ret);
797 goto err;
798 }
799
800 ret = pthread_join(nic->progress_thread,
801 NULL);
802 if ((ret != 0) && (ret != ESRCH)) {
803 GNIX_WARN(FI_LOG_EP_CTRL,
804 "pthread_join returned %d\n", ret);
805 goto err;
806 }
807
808 GNIX_INFO(FI_LOG_EP_CTRL, "pthread_join returned %d\n", ret);
809 nic->progress_thread = 0;
810 }
811
812 /* Must free mboxes first, because the MR has a pointer to the
813 * nic handles below */
814 ret = _gnix_mbox_allocator_destroy(nic->mbox_hndl);
815 if (ret != FI_SUCCESS)
816 GNIX_WARN(FI_LOG_EP_CTRL,
817 "_gnix_mbox_allocator_destroy returned %s\n",
818 fi_strerror(-ret));
819
820 /*
821 * see comments in the nic constructor about why
822 * the following code section is currently stubbed out.
823 */
824 #if 0
825 ret = _gnix_mbox_allocator_destroy(nic->s_rdma_buf_hndl);
826 if (ret != FI_SUCCESS)
827 GNIX_WARN(FI_LOG_EP_CTRL,
828 "_gnix_mbox_allocator_destroy returned %s\n",
829 fi_strerror(-ret));
830
831 ret = _gnix_mbox_allocator_destroy(nic->r_rdma_buf_hndl);
832 if (ret != FI_SUCCESS)
833 GNIX_WARN(FI_LOG_EP_CTRL,
834 "_gnix_mbox_allocator_destroy returned %s\n",
835 fi_strerror(-ret));
836 #endif
837
838 if (!nic->gni_cdm_hndl) {
839 GNIX_WARN(FI_LOG_EP_CTRL, "No CDM attached to nic, nic=%p");
840 }
841
842 assert(nic->gni_cdm_hndl != NULL);
843
844 if (nic->rx_cq != NULL && nic->rx_cq != nic->rx_cq_blk) {
845 status = GNI_CqDestroy(nic->rx_cq);
846 if (status != GNI_RC_SUCCESS) {
847 GNIX_WARN(FI_LOG_EP_CTRL,
848 "GNI_CqDestroy returned %s\n",
849 gni_err_str[status]);
850 ret = gnixu_to_fi_errno(status);
851 goto err;
852 }
853 }
854
855 if (nic->rx_cq_blk != NULL) {
856 status = GNI_CqDestroy(nic->rx_cq_blk);
857 if (status != GNI_RC_SUCCESS) {
858 GNIX_WARN(FI_LOG_EP_CTRL,
859 "GNI_CqDestroy returned %s\n",
860 gni_err_str[status]);
861 ret = gnixu_to_fi_errno(status);
862 goto err;
863 }
864 }
865
866 if (nic->tx_cq != NULL && nic->tx_cq != nic->tx_cq_blk) {
867 status = GNI_CqDestroy(nic->tx_cq);
868 if (status != GNI_RC_SUCCESS) {
869 GNIX_WARN(FI_LOG_EP_CTRL,
870 "GNI_CqDestroy returned %s\n",
871 gni_err_str[status]);
872 ret = gnixu_to_fi_errno(status);
873 goto err;
874 }
875 }
876
877 if (nic->tx_cq_blk != NULL) {
878 status = GNI_CqDestroy(nic->tx_cq_blk);
879 if (status != GNI_RC_SUCCESS) {
880 GNIX_WARN(FI_LOG_EP_CTRL,
881 "GNI_CqDestroy returned %s\n",
882 gni_err_str[status]);
883 ret = gnixu_to_fi_errno(status);
884 goto err;
885 }
886 }
887
888 if (nic->allocd_gni_res & GNIX_NIC_CDM_ALLOCD) {
889 status = GNI_CdmDestroy(nic->gni_cdm_hndl);
890 if (status != GNI_RC_SUCCESS) {
891 GNIX_WARN(FI_LOG_EP_CTRL,
892 "GNI_CdmDestroy returned %s\n",
893 gni_err_str[status]);
894 ret = gnixu_to_fi_errno(status);
895 goto err;
896 }
897 }
898
899 if (nic->vc_id_table != NULL) {
900 free(nic->vc_id_table);
901 } else {
902 GNIX_WARN(FI_LOG_EP_CTRL, "vc_id_table was NULL\n");
903 }
904
905 /*
906 * destroy VC free list associated with this nic
907 */
908
909 _gnix_fl_destroy(&nic->vc_freelist);
910
911 /*
912 * remove the nic from the linked lists
913 * for the domain and the global nic list
914 */
915
916 err:
917 _gnix_free_bitmap(&nic->vc_id_bitmap);
918
919 free(nic);
920 }
921
_gnix_nic_free(struct gnix_nic * nic)922 int _gnix_nic_free(struct gnix_nic *nic)
923 {
924 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
925
926 if (nic == NULL)
927 return -FI_EINVAL;
928
929 _gnix_ref_put(nic);
930
931 return FI_SUCCESS;
932 }
933
934 /*
935 * allocate a gnix_nic struct using attributes of the domain
936 */
937
gnix_nic_alloc(struct gnix_fid_domain * domain,struct gnix_nic_attr * attr,struct gnix_nic ** nic_ptr)938 int gnix_nic_alloc(struct gnix_fid_domain *domain,
939 struct gnix_nic_attr *attr,
940 struct gnix_nic **nic_ptr)
941 {
942 int ret = FI_SUCCESS;
943 struct gnix_nic *nic = NULL;
944 uint32_t device_addr;
945 gni_return_t status;
946 uint32_t fake_cdm_id = GNIX_CREATE_CDM_ID;
947 gni_smsg_attr_t smsg_mbox_attr;
948 struct gnix_nic_attr *nic_attr = &default_attr;
949 uint32_t num_corespec_cpus = 0;
950 bool must_alloc_nic = false;
951 bool free_list_inited = false;
952 struct gnix_auth_key *auth_key;
953
954 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
955
956 *nic_ptr = NULL;
957 nic_attr->gni_cdm_modes = gnix_cdm_modes;
958
959 if (attr) {
960 ret = __gnix_nic_check_attr_sanity(attr);
961 if (ret != FI_SUCCESS)
962 return ret;
963 nic_attr = attr;
964 must_alloc_nic = nic_attr->must_alloc;
965 }
966
967 auth_key = nic_attr->auth_key;
968
969 /*
970 * If we've maxed out the number of nics for this domain/ptag,
971 * search the list of existing nics. Take the gnix_nic_list_lock
972 * here since the gnix_nic_list will be manipulated whether or
973 * not we attach to an existing nic or create a new one.
974 *
975 * Should not matter much that this is a pretty fat critical section
976 * since endpoint setup for RDM type will typically occur near
977 * app startup, likely in a single threaded region, and for the
978 * case of MSG, where there will likely be many 100s of EPs, after
979 * a few initial slow times through this section when nics are created,
980 * max nic count for the ptag will be reached and only the first part
981 * of the critical section - iteration over existing nics - will be
982 * happening.
983 */
984
985 pthread_mutex_lock(&gnix_nic_list_lock);
986
987 /*
988 * we can reuse previously allocated nics as long as a
989 * must_alloc is not specified in the nic_attr arg.
990 */
991
992 if ((must_alloc_nic == false) &&
993 (gnix_nics_per_ptag[auth_key->ptag] >= gnix_max_nics_per_ptag)) {
994 assert(!dlist_empty(&gnix_nic_list_ptag[auth_key->ptag]));
995
996 nic = dlist_first_entry(&gnix_nic_list_ptag[auth_key->ptag],
997 struct gnix_nic, ptag_nic_list);
998 dlist_remove(&nic->ptag_nic_list);
999 dlist_insert_tail(&nic->ptag_nic_list,
1000 &gnix_nic_list_ptag[auth_key->ptag]);
1001 _gnix_ref_get(nic);
1002
1003 GNIX_INFO(FI_LOG_EP_CTRL, "Reusing NIC:%p\n", nic);
1004 }
1005
1006 /*
1007 * no nic found create a cdm and attach
1008 */
1009
1010 if (!nic) {
1011
1012 nic = calloc(1, sizeof(struct gnix_nic));
1013 if (nic == NULL) {
1014 ret = -FI_ENOMEM;
1015 goto err;
1016 }
1017
1018 nic->using_vmdh = domain->using_vmdh;
1019
1020 if (nic_attr->use_cdm_id == false) {
1021 ret = _gnix_cm_nic_create_cdm_id(domain, &fake_cdm_id);
1022 if (ret != FI_SUCCESS) {
1023 GNIX_WARN(FI_LOG_EP_CTRL,
1024 "_gnix_cm_nic_create_cdm_id returned %s\n",
1025 fi_strerror(-ret));
1026 goto err;
1027 }
1028 } else
1029 fake_cdm_id = nic_attr->cdm_id;
1030
1031 if (nic_attr->gni_cdm_hndl == NULL) {
1032 status = GNI_CdmCreate(fake_cdm_id,
1033 auth_key->ptag,
1034 auth_key->cookie,
1035 gnix_cdm_modes,
1036 &nic->gni_cdm_hndl);
1037 if (status != GNI_RC_SUCCESS) {
1038 GNIX_WARN(FI_LOG_EP_CTRL, "GNI_CdmCreate returned %s\n",
1039 gni_err_str[status]);
1040 ret = gnixu_to_fi_errno(status);
1041 goto err1;
1042 }
1043 nic->allocd_gni_res |= GNIX_NIC_CDM_ALLOCD;
1044 } else {
1045 nic->gni_cdm_hndl = nic_attr->gni_cdm_hndl;
1046 }
1047
1048 /*
1049 * Okay, now go for the attach
1050 */
1051
1052 if (nic_attr->gni_nic_hndl == NULL) {
1053 status = GNI_CdmAttach(nic->gni_cdm_hndl,
1054 0,
1055 &device_addr,
1056 &nic->gni_nic_hndl);
1057 if (status != GNI_RC_SUCCESS) {
1058 GNIX_WARN(FI_LOG_EP_CTRL, "GNI_CdmAttach returned %s\n",
1059 gni_err_str[status]);
1060 _gnix_dump_gni_res(auth_key->ptag);
1061 ret = gnixu_to_fi_errno(status);
1062 goto err1;
1063 }
1064 } else
1065 nic->gni_nic_hndl = nic_attr->gni_nic_hndl;
1066
1067 /*
1068 * create TX CQs - first polling, then blocking
1069 */
1070
1071 status = GNI_CqCreate(nic->gni_nic_hndl,
1072 domain->params.tx_cq_size,
1073 0, /* no delay count */
1074 GNI_CQ_BLOCKING |
1075 domain->gni_cq_modes,
1076 NULL, /* useless handler */
1077 NULL, /* useless handler
1078 context */
1079 &nic->tx_cq_blk);
1080 if (status != GNI_RC_SUCCESS) {
1081 GNIX_WARN(FI_LOG_EP_CTRL,
1082 "GNI_CqCreate returned %s\n",
1083 gni_err_str[status]);
1084 _gnix_dump_gni_res(auth_key->ptag);
1085 ret = gnixu_to_fi_errno(status);
1086 goto err1;
1087 }
1088
1089 /* Use blocking CQs for all operations if eager_auto_progress
1090 * is used. */
1091 if (domain->params.eager_auto_progress) {
1092 nic->tx_cq = nic->tx_cq_blk;
1093 } else {
1094 status = GNI_CqCreate(nic->gni_nic_hndl,
1095 domain->params.tx_cq_size,
1096 0, /* no delay count */
1097 domain->gni_cq_modes,
1098 NULL, /* useless handler */
1099 NULL, /* useless handler ctx */
1100 &nic->tx_cq);
1101 if (status != GNI_RC_SUCCESS) {
1102 GNIX_WARN(FI_LOG_EP_CTRL,
1103 "GNI_CqCreate returned %s\n",
1104 gni_err_str[status]);
1105 _gnix_dump_gni_res(auth_key->ptag);
1106 ret = gnixu_to_fi_errno(status);
1107 goto err1;
1108 }
1109 }
1110
1111
1112 /*
1113 * create RX CQs - first polling, then blocking
1114 */
1115
1116 status = GNI_CqCreate(nic->gni_nic_hndl,
1117 domain->params.rx_cq_size,
1118 0,
1119 GNI_CQ_BLOCKING |
1120 domain->gni_cq_modes,
1121 NULL,
1122 NULL,
1123 &nic->rx_cq_blk);
1124 if (status != GNI_RC_SUCCESS) {
1125 GNIX_WARN(FI_LOG_EP_CTRL,
1126 "GNI_CqCreate returned %s\n",
1127 gni_err_str[status]);
1128 _gnix_dump_gni_res(auth_key->ptag);
1129 ret = gnixu_to_fi_errno(status);
1130 goto err1;
1131 }
1132
1133 /* Use blocking CQs for all operations if eager_auto_progress
1134 * is used. */
1135 if (domain->params.eager_auto_progress) {
1136 nic->rx_cq = nic->rx_cq_blk;
1137 } else {
1138 status = GNI_CqCreate(nic->gni_nic_hndl,
1139 domain->params.rx_cq_size,
1140 0,
1141 domain->gni_cq_modes,
1142 NULL,
1143 NULL,
1144 &nic->rx_cq);
1145 if (status != GNI_RC_SUCCESS) {
1146 GNIX_WARN(FI_LOG_EP_CTRL,
1147 "GNI_CqCreate returned %s\n",
1148 gni_err_str[status]);
1149 _gnix_dump_gni_res(auth_key->ptag);
1150 ret = gnixu_to_fi_errno(status);
1151 goto err1;
1152 }
1153 }
1154
1155 nic->device_addr = device_addr;
1156 nic->ptag = auth_key->ptag;
1157 nic->cookie = auth_key->cookie;
1158
1159 nic->vc_id_table_capacity = domain->params.vc_id_table_capacity;
1160 nic->vc_id_table = malloc(sizeof(void *) *
1161 nic->vc_id_table_capacity);
1162 if (nic->vc_id_table == NULL) {
1163 GNIX_WARN(FI_LOG_EP_CTRL,
1164 "malloc of vc_id_table failed\n");
1165 ret = -FI_ENOMEM;
1166 goto err1;
1167 }
1168
1169 ret = _gnix_alloc_bitmap(&nic->vc_id_bitmap,
1170 nic->vc_id_table_capacity, NULL);
1171 if (ret != FI_SUCCESS) {
1172 GNIX_WARN(FI_LOG_EP_CTRL,
1173 "alloc_bitmap returned %d\n", ret);
1174 goto err1;
1175 }
1176 fastlock_init(&nic->vc_id_lock);
1177
1178 /*
1179 * initialize free list for VC's
1180 * In addition to hopefully allowing for a more compact
1181 * allocation of VC structs, the free list is also import
1182 * because there is a window of time when using auto progress
1183 * that a thread may be going through the progress engine
1184 * while one of the application threads is actively tearing
1185 * down an endpoint (and hence its associated VCs) before the
1186 * rem_id for the vc is removed from the vector.
1187 * As a consequence, it is important that
1188 * the memory allocated within the freelist allocator not be
1189 * returned to the system prior to the freelist being destroyed
1190 * as part of the nic destructor procedure. The freelist is
1191 * destroyed in that procedure after the progress thread
1192 * has been joined.
1193 */
1194
1195 ret = _gnix_fl_init_ts(sizeof(struct gnix_vc),
1196 offsetof(struct gnix_vc, fr_list),
1197 GNIX_VC_FL_MIN_SIZE,
1198 GNIX_VC_FL_INIT_REFILL_SIZE,
1199 0,
1200 0,
1201 &nic->vc_freelist);
1202 if (ret == FI_SUCCESS) {
1203 free_list_inited = true;
1204 } else {
1205 GNIX_DEBUG(FI_LOG_EP_DATA, "_gnix_fl_init returned: %s\n",
1206 fi_strerror(-ret));
1207 goto err1;
1208 }
1209
1210 fastlock_init(&nic->lock);
1211
1212 ret = __gnix_nic_tx_freelist_init(nic,
1213 domain->params.tx_cq_size);
1214 if (ret != FI_SUCCESS)
1215 goto err1;
1216
1217 fastlock_init(&nic->prog_vcs_lock);
1218 dlist_init(&nic->prog_vcs);
1219
1220 _gnix_ref_init(&nic->ref_cnt, 1, __nic_destruct);
1221
1222 smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
1223 smsg_mbox_attr.mbox_maxcredit = domain->params.mbox_maxcredit;
1224 smsg_mbox_attr.msg_maxsize = domain->params.mbox_msg_maxsize;
1225
1226 status = GNI_SmsgBufferSizeNeeded(&smsg_mbox_attr,
1227 &nic->mem_per_mbox);
1228 if (status != GNI_RC_SUCCESS) {
1229 GNIX_WARN(FI_LOG_EP_CTRL,
1230 "GNI_SmsgBufferSizeNeeded returned %s\n",
1231 gni_err_str[status]);
1232 ret = gnixu_to_fi_errno(status);
1233 goto err1;
1234 }
1235
1236 /*
1237 * set up mailbox allocator for SMSG mailboxes
1238 */
1239
1240 ret = _gnix_mbox_allocator_create(nic,
1241 nic->rx_cq,
1242 domain->params.mbox_page_size,
1243 (size_t)nic->mem_per_mbox,
1244 domain->params.mbox_num_per_slab,
1245 &nic->mbox_hndl);
1246
1247 if (ret != FI_SUCCESS) {
1248 GNIX_WARN(FI_LOG_EP_CTRL,
1249 "_gnix_mbox_alloc returned %s\n",
1250 fi_strerror(-ret));
1251 goto err1;
1252 }
1253
1254 /*
1255 * use the mailbox allocator system to set up an
1256 * pre-pinned RDMA bounce buffers for longer eager
1257 * messages and other cases where zero-copy
1258 * can't be safely used.
1259 *
1260 * One set of blocks is used for the send side.
1261 * A second set of blocks is used for the receive
1262 * side. Both sets of blocks are registered against
1263 * the blocking RX CQ for this nic.
1264 *
1265 * TODO: hardwired constants, uff
1266 * TODO: better to use a buddy allocator or some other
1267 * allocator
1268 * Disable these for now as we're not using and they
1269 * chew up a lot of IOMMU space per nic.
1270 */
1271
1272 #if 0
1273 ret = _gnix_mbox_allocator_create(nic,
1274 NULL,
1275 GNIX_PAGE_2MB,
1276 65536,
1277 512,
1278 &nic->s_rdma_buf_hndl);
1279 if (ret != FI_SUCCESS) {
1280 GNIX_WARN(FI_LOG_EP_CTRL,
1281 "_gnix_mbox_alloc returned %s\n",
1282 fi_strerror(-ret));
1283 _gnix_dump_gni_res(domain->ptag);
1284 goto err1;
1285 }
1286
1287 ret = _gnix_mbox_allocator_create(nic,
1288 NULL,
1289 GNIX_PAGE_2MB,
1290 65536,
1291 512,
1292 &nic->r_rdma_buf_hndl);
1293 if (ret != FI_SUCCESS) {
1294 GNIX_WARN(FI_LOG_EP_CTRL,
1295 "_gnix_mbox_alloc returned %s\n",
1296 fi_strerror(-ret));
1297 _gnix_dump_gni_res(domain->ptag);
1298 goto err1;
1299 }
1300 #endif
1301
1302 ret = __nic_setup_irq_cq(nic);
1303 if (ret != FI_SUCCESS) {
1304 GNIX_WARN(FI_LOG_EP_CTRL,
1305 "__nic_setup_irq_cq returned %s\n",
1306 fi_strerror(-ret));
1307 _gnix_dump_gni_res(auth_key->ptag);
1308 goto err1;
1309 }
1310
1311 /*
1312 * if the domain is using PROGRESS_AUTO for data, set up
1313 * a progress thread.
1314 */
1315
1316 if (domain->data_progress == FI_PROGRESS_AUTO) {
1317
1318 /*
1319 * tell CLE job container that next thread should be
1320 * runnable anywhere in the cpuset, don't treat as
1321 * an error if one is returned, may have perf issues
1322 * though...
1323 */
1324
1325 ret = _gnix_get_num_corespec_cpus(&num_corespec_cpus);
1326 if (ret != FI_SUCCESS) {
1327 GNIX_WARN(FI_LOG_EP_CTRL,
1328 "failed to get num corespec cpus\n");
1329 }
1330 if (num_corespec_cpus > 0) {
1331 ret = _gnix_job_disable_affinity_apply();
1332 } else {
1333 ret = _gnix_job_enable_unassigned_cpus();
1334 }
1335 if (ret != 0)
1336 GNIX_WARN(FI_LOG_EP_CTRL,
1337 "job_disable/unassigned cpus returned %d\n",
1338 ret);
1339
1340 ret = pthread_create(&nic->progress_thread,
1341 NULL,
1342 __gnix_nic_prog_thread_fn,
1343 (void *)nic);
1344 if (ret)
1345 GNIX_WARN(FI_LOG_EP_CTRL,
1346 "pthread_create call returned %d\n", ret);
1347 }
1348
1349 dlist_insert_tail(&nic->gnix_nic_list, &gnix_nic_list);
1350 dlist_insert_tail(&nic->ptag_nic_list,
1351 &gnix_nic_list_ptag[auth_key->ptag]);
1352
1353 nic->smsg_callbacks = gnix_ep_smsg_callbacks;
1354
1355 ++gnix_nics_per_ptag[auth_key->ptag];
1356
1357 GNIX_INFO(FI_LOG_EP_CTRL, "Allocated NIC:%p\n", nic);
1358 }
1359
1360 if (nic) {
1361 nic->requires_lock = domain->thread_model != FI_THREAD_COMPLETION;
1362 nic->using_vmdh = domain->using_vmdh;
1363 }
1364
1365 *nic_ptr = nic;
1366 goto out;
1367
1368 err1:
1369 ofi_atomic_dec32(&gnix_id_counter);
1370 err:
1371 if (nic != NULL) {
1372 __nic_teardown_irq_cq(nic);
1373 if (nic->r_rdma_buf_hndl != NULL)
1374 _gnix_mbox_allocator_destroy(nic->r_rdma_buf_hndl);
1375 if (nic->s_rdma_buf_hndl != NULL)
1376 _gnix_mbox_allocator_destroy(nic->s_rdma_buf_hndl);
1377 if (nic->mbox_hndl != NULL)
1378 _gnix_mbox_allocator_destroy(nic->mbox_hndl);
1379 if (nic->rx_cq != NULL && nic->rx_cq != nic->rx_cq_blk)
1380 GNI_CqDestroy(nic->rx_cq);
1381 if (nic->rx_cq_blk != NULL)
1382 GNI_CqDestroy(nic->rx_cq_blk);
1383 if (nic->tx_cq != NULL && nic->tx_cq != nic->tx_cq_blk)
1384 GNI_CqDestroy(nic->tx_cq);
1385 if (nic->tx_cq_blk != NULL)
1386 GNI_CqDestroy(nic->tx_cq_blk);
1387 if ((nic->gni_cdm_hndl != NULL) && (nic->allocd_gni_res &
1388 GNIX_NIC_CDM_ALLOCD))
1389 GNI_CdmDestroy(nic->gni_cdm_hndl);
1390 if (free_list_inited == true)
1391 _gnix_fl_destroy(&nic->vc_freelist);
1392 free(nic);
1393 }
1394
1395 out:
1396 pthread_mutex_unlock(&gnix_nic_list_lock);
1397 return ret;
1398 }
1399
_gnix_nic_init(void)1400 void _gnix_nic_init(void)
1401 {
1402 int i, rc;
1403
1404 for (i = 0; i < GNI_PTAG_MAX; i++) {
1405 dlist_init(&gnix_nic_list_ptag[i]);
1406 }
1407
1408 rc = _gnix_nics_per_rank(&gnix_max_nics_per_ptag);
1409 if (rc == FI_SUCCESS) {
1410 GNIX_DEBUG(FI_LOG_FABRIC, "gnix_max_nics_per_ptag: %u\n",
1411 gnix_max_nics_per_ptag);
1412 } else {
1413 GNIX_WARN(FI_LOG_FABRIC, "_gnix_nics_per_rank failed: %d\n",
1414 rc);
1415 }
1416
1417 if (getenv("GNIX_MAX_NICS") != NULL)
1418 gnix_max_nics_per_ptag = atoi(getenv("GNIX_MAX_NICS"));
1419
1420 /*
1421 * Well if we didn't get 1 nic, that means we must really be doing
1422 * FMA sharing.
1423 */
1424
1425 if (gnix_max_nics_per_ptag == 0) {
1426 gnix_max_nics_per_ptag = 1;
1427 GNIX_WARN(FI_LOG_FABRIC, "Using inter-procss FMA sharing\n");
1428 }
1429 }
1430
1431