1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2009      Cisco Systems, Inc.  All rights reserved.
5  * Copyright (c) 2014      NVIDIA Corporation.  All rights reserved.
6  * Copyright (c) 2014-2015 Research Organization for Information Science
7  *                         and Technology (RIST). All rights reserved.
8  * Copyright (c) 2014      Bull SAS.  All rights reserved.
9  * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
10  *                         reserved.
11  * $COPYRIGHT$
12  *
13  * Additional copyrights may follow
14  *
15  * $HEADER$
16  */
17 
18 #include "opal_config.h"
19 
20 #include <infiniband/verbs.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <fcntl.h>
24 #include <errno.h>
25 #ifdef HAVE_UNISTD_H
26 #include <unistd.h>
27 #endif
28 #include <dlfcn.h>
29 
30 #include "opal/mca/btl/base/base.h"
31 #include "btl_openib_xrc.h"
32 #include "btl_openib.h"
33 
34 #if HAVE_XRC
35 #define SIZE_OF3(A, B, C) (sizeof(A) + sizeof(B) + sizeof(C))
36 
37 static void ib_address_constructor(ib_address_t *ib_addr);
38 static void ib_address_destructor(ib_address_t *ib_addr);
39 
40 OBJ_CLASS_INSTANCE(ib_address_t,
41                    opal_list_item_t,
42                    ib_address_constructor,
43                    ib_address_destructor);
44 
45 /* This func. opens XRC domain */
mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t * device)46 int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
47 {
48     int len;
49     char *xrc_file_name;
50     const char *dev_name;
51 #if OPAL_HAVE_CONNECTX_XRC_DOMAINS
52     struct ibv_xrcd_init_attr xrcd_attr;
53 #endif
54 
55     dev_name = ibv_get_device_name(device->ib_dev);
56     len = asprintf(&xrc_file_name,
57             "%s"OPAL_PATH_SEP"openib_xrc_domain_%s",
58             opal_process_info.job_session_dir, dev_name);
59     if (0 > len) {
60         BTL_ERROR(("Failed to allocate memomry for XRC file name: %s\n",
61                    strerror(errno)));
62         return OPAL_ERROR;
63     }
64 
65     device->xrc_fd = open(xrc_file_name, O_CREAT, S_IWUSR|S_IRUSR);
66     if (0 > device->xrc_fd) {
67         BTL_ERROR(("Failed to open XRC domain file %s, errno says %s\n",
68                 xrc_file_name,strerror(errno)));
69         free(xrc_file_name);
70         return OPAL_ERROR;
71     }
72 #if OPAL_HAVE_CONNECTX_XRC_DOMAINS
73     memset(&xrcd_attr, 0, sizeof xrcd_attr);
74     xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS;
75     xrcd_attr.fd = device->xrc_fd;
76     xrcd_attr.oflags = O_CREAT;
77     device->xrcd = ibv_open_xrcd(device->ib_dev_context, &xrcd_attr);
78     if (NULL == device->xrcd) {
79 #else
80     device->xrc_domain = ibv_open_xrc_domain(device->ib_dev_context, device->xrc_fd, O_CREAT);
81     if (NULL == device->xrc_domain) {
82 #endif
83         BTL_ERROR(("Failed to open XRC domain\n"));
84         close(device->xrc_fd);
85         free(xrc_file_name);
86         return OPAL_ERROR;
87     }
88 
89     return OPAL_SUCCESS;
90 }
91 
92 /* This func. closes XRC domain */
93 int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device)
94 {
95 #if OPAL_HAVE_CONNECTX_XRC_DOMAINS
96     if (NULL == device->xrcd) {
97 #else
98     if (NULL == device->xrc_domain) {
99 #endif
100         /* No XRC domain, just exit */
101         return OPAL_SUCCESS;
102     }
103 #if OPAL_HAVE_CONNECTX_XRC_DOMAINS
104     if (ibv_close_xrcd(device->xrcd)) {
105 #else
106     if (ibv_close_xrc_domain(device->xrc_domain)) {
107 #endif
108         BTL_ERROR(("Failed to close XRC domain, errno %d says %s\n",
109                     device->xrc_fd, strerror(errno)));
110         return OPAL_ERROR;
111     }
112     /* do we need to check exit status */
113     if (close(device->xrc_fd)) {
114         BTL_ERROR(("Failed to close XRC file descriptor, errno %d says %s\n",
115                 device->xrc_fd, strerror(errno)));
116         return OPAL_ERROR;
117     }
118     return OPAL_SUCCESS;
119 }
120 
121 static void ib_address_constructor(ib_address_t *ib_addr)
122 {
123     ib_addr->key = NULL;
124     ib_addr->subnet_id = 0;
125     ib_addr->lid = 0;
126     ib_addr->status = MCA_BTL_IB_ADDR_CLOSED;
127     ib_addr->qp = NULL;
128     ib_addr->max_wqe = 0;
129     /* NTH: make the addr_lock recursive because mca_btl_openib_endpoint_connected can call
130      * into the CPC with the lock held. The alternative would be to drop the lock but the
131      * lock is never obtained in a critical path. */
132     OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_recursive_mutex_t);
133     OBJ_CONSTRUCT(&ib_addr->pending_ep, opal_list_t);
134 }
135 
136 static void ib_address_destructor(ib_address_t *ib_addr)
137 {
138     if (NULL != ib_addr->key) {
139         free(ib_addr->key);
140     }
141     OBJ_DESTRUCT(&ib_addr->addr_lock);
142     OBJ_DESTRUCT(&ib_addr->pending_ep);
143 }
144 
145 static int ib_address_init(ib_address_t *ib_addr, uint16_t lid, uint64_t s_id, opal_jobid_t ep_jobid)
146 {
147     ib_addr->key = malloc(SIZE_OF3(s_id, lid, ep_jobid));
148     if (NULL == ib_addr->key) {
149         BTL_ERROR(("Failed to allocate memory for key\n"));
150         return OPAL_ERROR;
151     }
152     memset(ib_addr->key, 0, SIZE_OF3(s_id, lid, ep_jobid));
153     /* creating the key = lid + s_id + ep_jobid */
154     memcpy(ib_addr->key, &lid, sizeof(lid));
155     memcpy((void*)((char*)ib_addr->key + sizeof(lid)), &s_id, sizeof(s_id));
156     memcpy((void*)((char*)ib_addr->key + sizeof(lid) + sizeof(s_id)),
157             &ep_jobid, sizeof(ep_jobid));
158     /* caching lid and subnet id */
159     ib_addr->subnet_id = s_id;
160     ib_addr->lid = lid;
161 
162     return OPAL_SUCCESS;
163 }
164 
165 /* Create new entry in hash table for subnet_id and lid,
166  * update the endpoint pointer.
167  * Before call to this function you need to protect with
168  */
169 int mca_btl_openib_ib_address_add_new (uint16_t lid, uint64_t s_id,
170         opal_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep)
171 {
172     void *tmp;
173     int ret = OPAL_SUCCESS;
174     struct ib_address_t *ib_addr = OBJ_NEW(ib_address_t);
175 
176     ret = ib_address_init(ib_addr, lid, s_id, ep_jobid);
177     if (OPAL_SUCCESS != ret ) {
178         BTL_ERROR(("XRC Internal error. Failed to init ib_addr\n"));
179         OBJ_DESTRUCT(ib_addr);
180         return ret;
181     }
182     /* is it already in the table ?*/
183     OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
184     if (OPAL_SUCCESS != opal_hash_table_get_value_ptr(&mca_btl_openib_component.ib_addr_table,
185                 ib_addr->key,
186                 SIZE_OF3(s_id, lid, ep_jobid), &tmp)) {
187         /* It is new one, lets put it on the table */
188         ret = opal_hash_table_set_value_ptr(&mca_btl_openib_component.ib_addr_table,
189                 ib_addr->key, SIZE_OF3(s_id, lid, ep_jobid), (void*)ib_addr);
190         if (OPAL_SUCCESS != ret) {
191             BTL_ERROR(("XRC Internal error."
192                         " Failed to add element to mca_btl_openib_component.ib_addr_table\n"));
193             OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
194             OBJ_DESTRUCT(ib_addr);
195             return ret;
196         }
197         /* update the endpoint with pointer to ib address */
198         ep->ib_addr = ib_addr;
199     } else {
200         /* so we have this one in the table, just add the pointer to the endpoint */
201         ep->ib_addr = (ib_address_t *)tmp;
202         assert(lid == ep->ib_addr->lid && s_id == ep->ib_addr->subnet_id);
203         OBJ_DESTRUCT(ib_addr);
204     }
205     OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
206 
207     return ret;
208 }
209 #endif
210