1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2014 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
14 * Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights
15 * reserved.
16 * Copyright (c) 2006-2007 Voltaire All rights reserved.
17 * Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
18 * Copyright (c) 2010-2011 IBM Corporation. All rights reserved.
19 * Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved
20 * Copyright (c) 2013-2014 Intel, Inc. All rights reserved
21 * Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved.
22 * Copyright (c) 2014 Bull SAS. All rights reserved.
23 * Copyright (c) 2015 Research Organization for Information Science
24 * and Technology (RIST). All rights reserved.
25 *
26 * $COPYRIGHT$
27 *
28 * Additional copyrights may follow
29 *
30 * $HEADER$
31 */
32
33 #include "opal_config.h"
34
35 #ifdef HAVE_SYS_TIME_H
36 #include <sys/time.h>
37 #endif
38 #include <time.h>
39 #include <errno.h>
40 #include <string.h>
41
42 #include "opal_stdint.h"
43 #include "opal/util/output.h"
44 #include "opal/util/proc.h"
45 #include "opal/util/show_help.h"
46 #include "opal/class/opal_free_list.h"
47
48 #include "btl_openib_endpoint.h"
49 #include "btl_openib_proc.h"
50 #include "btl_openib_xrc.h"
51 #include "btl_openib_async.h"
52 #include "connect/connect.h"
53
54 static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
55 static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
56
acquire_wqe(mca_btl_openib_endpoint_t * ep,mca_btl_openib_send_frag_t * frag)57 static inline int acquire_wqe(mca_btl_openib_endpoint_t *ep,
58 mca_btl_openib_send_frag_t *frag)
59 {
60 int qp = to_base_frag(frag)->base.order;
61 int prio = !(to_base_frag(frag)->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY);
62
63 if(qp_get_wqe(ep, qp) < 0) {
64 qp_put_wqe(ep, qp);
65 opal_list_append(&ep->qps[qp].no_wqe_pending_frags[prio],
66 (opal_list_item_t *)frag);
67 return OPAL_ERR_OUT_OF_RESOURCE;
68 }
69
70 return OPAL_SUCCESS;
71 }
72
73 /* this function is called with endpoint->endpoint_lock held */
mca_btl_openib_endpoint_post_send(mca_btl_openib_endpoint_t * endpoint,mca_btl_openib_send_frag_t * frag)74 int mca_btl_openib_endpoint_post_send(mca_btl_openib_endpoint_t *endpoint,
75 mca_btl_openib_send_frag_t *frag)
76 {
77 int prio = to_base_frag(frag)->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY;
78 mca_btl_openib_header_t *hdr = frag->hdr;
79 mca_btl_base_descriptor_t *des = &to_base_frag(frag)->base;
80 int qp, ib_rc, rc;
81 bool do_rdma = false;
82 size_t size;
83
84 if(OPAL_LIKELY(des->order == MCA_BTL_NO_ORDER))
85 des->order = frag->qp_idx;
86
87 qp = des->order;
88
89 if(acquire_wqe(endpoint, frag) != OPAL_SUCCESS)
90 return OPAL_ERR_RESOURCE_BUSY;
91
92 size = des->des_segments->seg_len + frag->coalesced_length;
93
94 rc = mca_btl_openib_endpoint_credit_acquire (endpoint, qp, prio, size,
95 &do_rdma, frag, true);
96 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
97 qp_put_wqe(endpoint, qp);
98 return OPAL_ERR_RESOURCE_BUSY;
99 }
100
101 qp_reset_signal_count(endpoint, qp);
102 ib_rc = post_send(endpoint, frag, do_rdma, 1);
103
104 if(!ib_rc)
105 return OPAL_SUCCESS;
106
107 if(endpoint->nbo)
108 BTL_OPENIB_HEADER_NTOH(*hdr);
109
110 mca_btl_openib_endpoint_credit_release (endpoint, qp, do_rdma, frag);
111
112 qp_put_wqe(endpoint, qp);
113
114 BTL_ERROR(("error posting send request error %d: %s. size = %lu\n",
115 ib_rc, strerror(ib_rc), size));
116 return OPAL_ERROR;
117 }
118
119
120
121 OBJ_CLASS_INSTANCE(mca_btl_openib_endpoint_t,
122 opal_list_item_t, mca_btl_openib_endpoint_construct,
123 mca_btl_openib_endpoint_destruct);
124
125 /*
126 * Initialize state of the endpoint instance.
127 *
128 */
endpoint_alloc_qp(void)129 static mca_btl_openib_qp_t *endpoint_alloc_qp(void)
130 {
131 mca_btl_openib_qp_t *qp = (mca_btl_openib_qp_t *) calloc(1, sizeof(mca_btl_openib_qp_t));
132 if(!qp) {
133 BTL_ERROR(("Failed to allocate memory for qp"));
134 return NULL;
135 }
136
137 OBJ_CONSTRUCT(&qp->lock, opal_mutex_t);
138
139 return qp;
140 }
141
142 static void
endpoint_init_qp_pp(mca_btl_openib_endpoint_qp_t * ep_qp,const int qp)143 endpoint_init_qp_pp(mca_btl_openib_endpoint_qp_t *ep_qp, const int qp)
144 {
145 mca_btl_openib_qp_info_t *qp_info = &mca_btl_openib_component.qp_infos[qp];
146 ep_qp->qp = endpoint_alloc_qp();
147 ep_qp->qp->users++;
148
149 /* local credits are set here such that on initial posting
150 * of the receive buffers we end up with zero credits to return
151 * to our peer. The peer initializes his sd_credits to reflect this
152 * below. Note that this may be a problem for iWARP as the sender
153 * now has credits even if the receive buffers are not yet posted
154 */
155 ep_qp->u.pp_qp.rd_credits = -qp_info->rd_num;
156
157 ep_qp->u.pp_qp.rd_posted = 0;
158 ep_qp->u.pp_qp.cm_sent = 0;
159 ep_qp->u.pp_qp.cm_return = -qp_info->u.pp_qp.rd_rsv;
160 ep_qp->u.pp_qp.cm_received = qp_info->u.pp_qp.rd_rsv;
161
162 /* initialize the local view of credits */
163 ep_qp->u.pp_qp.sd_credits = qp_info->rd_num;
164
165 /* number of available send WQEs */
166 ep_qp->qp->sd_wqe = qp_info->rd_num;
167 }
168
169 static void
endpoint_init_qp_srq(mca_btl_openib_endpoint_qp_t * ep_qp,const int qp)170 endpoint_init_qp_srq(mca_btl_openib_endpoint_qp_t *ep_qp, const int qp)
171 {
172 ep_qp->qp = endpoint_alloc_qp();
173 ep_qp->qp->users++;
174
175 /* number of available send WQEs */
176 ep_qp->qp->sd_wqe = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
177 }
178
179 static void
endpoint_init_qp_xrc(mca_btl_base_endpoint_t * ep,const int qp)180 endpoint_init_qp_xrc(mca_btl_base_endpoint_t *ep, const int qp)
181 {
182 int max = ep->endpoint_btl->device->ib_dev_attr.max_qp_wr -
183 (mca_btl_openib_component.use_eager_rdma ?
184 mca_btl_openib_component.max_eager_rdma : 0);
185 mca_btl_openib_endpoint_qp_t *ep_qp = &ep->qps[qp];
186 int32_t wqe, incr = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
187 int rc;
188
189 opal_mutex_lock (&ep->ib_addr->addr_lock);
190
191 ep_qp->qp = ep->ib_addr->qp;
192 if (ep->ib_addr->max_wqe + incr > max) {
193 /* make sure that we don't overrun maximum supported by device */
194 incr = max - ep->ib_addr->max_wqe;
195 }
196
197 wqe = ep->ib_addr->max_wqe + incr +
198 (mca_btl_openib_component.use_eager_rdma ?
199 mca_btl_openib_component.max_eager_rdma : 0);
200
201 ep->ib_addr->max_wqe += incr;
202
203 if (NULL != ep_qp->qp->lcl_qp) {
204 struct ibv_qp_attr qp_attr;
205
206 /* if this is modified the code in udcm_xrc_send_qp_create may
207 * need to be updated as well */
208 qp_attr.cap.max_recv_wr = 0;
209 qp_attr.cap.max_send_wr = wqe;
210 qp_attr.cap.max_inline_data = ep->endpoint_btl->device->max_inline_data;
211 qp_attr.cap.max_send_sge = 1;
212 qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */
213 rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP);
214 if (0 == rc) {
215 opal_atomic_add_32 (&ep_qp->qp->sd_wqe, incr);
216 }
217 } else {
218 ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe;
219 }
220 ep_qp->qp->users++;
221 opal_mutex_unlock (&ep->ib_addr->addr_lock);
222 }
223
endpoint_init_qp(mca_btl_base_endpoint_t * ep,const int qp)224 static void endpoint_init_qp(mca_btl_base_endpoint_t *ep, const int qp)
225 {
226 mca_btl_openib_endpoint_qp_t *ep_qp = &ep->qps[qp];
227
228 ep_qp->rd_credit_send_lock = 0;
229 ep_qp->credit_frag = NULL;
230
231 OBJ_CONSTRUCT(&ep_qp->no_wqe_pending_frags[0], opal_list_t);
232 OBJ_CONSTRUCT(&ep_qp->no_wqe_pending_frags[1], opal_list_t);
233
234 OBJ_CONSTRUCT(&ep_qp->no_credits_pending_frags[0], opal_list_t);
235 OBJ_CONSTRUCT(&ep_qp->no_credits_pending_frags[1], opal_list_t);
236
237 switch(BTL_OPENIB_QP_TYPE(qp)) {
238 case MCA_BTL_OPENIB_PP_QP:
239 endpoint_init_qp_pp(ep_qp, qp);
240 break;
241 case MCA_BTL_OPENIB_SRQ_QP:
242 endpoint_init_qp_srq(ep_qp, qp);
243 break;
244 case MCA_BTL_OPENIB_XRC_QP:
245 if (NULL == ep->ib_addr->qp) {
246 ep->ib_addr->qp = endpoint_alloc_qp();
247 }
248 endpoint_init_qp_xrc(ep, qp);
249 break;
250 default:
251 BTL_ERROR(("Wrong QP type"));
252 return;
253 }
254
255 ep_qp->qp->sd_wqe_inflight = 0;
256 ep_qp->qp->wqe_count = QP_TX_BATCH_COUNT;
257 }
258
mca_btl_openib_endpoint_init(mca_btl_openib_module_t * btl,mca_btl_base_endpoint_t * ep,opal_btl_openib_connect_base_module_t * local_cpc,mca_btl_openib_proc_modex_t * remote_proc_info,opal_btl_openib_connect_base_module_data_t * remote_cpc_data)259 void mca_btl_openib_endpoint_init(mca_btl_openib_module_t *btl,
260 mca_btl_base_endpoint_t *ep,
261 opal_btl_openib_connect_base_module_t *local_cpc,
262 mca_btl_openib_proc_modex_t *remote_proc_info,
263 opal_btl_openib_connect_base_module_data_t *remote_cpc_data)
264 {
265 int qp;
266
267 ep->endpoint_btl = btl;
268 ep->use_eager_rdma = btl->device->use_eager_rdma &
269 mca_btl_openib_component.use_eager_rdma;
270 ep->subnet_id = btl->port_info.subnet_id;
271 ep->endpoint_local_cpc = local_cpc;
272 ep->endpoint_remote_cpc_data = remote_cpc_data;
273
274 ep->rem_info.rem_lid = remote_proc_info->pm_port_info.lid;
275 ep->rem_info.rem_subnet_id = remote_proc_info->pm_port_info.subnet_id;
276 ep->rem_info.rem_mtu = remote_proc_info->pm_port_info.mtu;
277 opal_output(-1, "Got remote LID, subnet, MTU: %d, 0x%" PRIx64 ", %d",
278 ep->rem_info.rem_lid,
279 ep->rem_info.rem_subnet_id,
280 ep->rem_info.rem_mtu);
281
282 ep->rem_info.rem_vendor_id = (remote_proc_info->pm_port_info).vendor_id;
283 ep->rem_info.rem_vendor_part_id = (remote_proc_info->pm_port_info).vendor_part_id;
284
285 ep->rem_info.rem_transport_type =
286 (mca_btl_openib_transport_type_t) (remote_proc_info->pm_port_info).transport_type;
287
288 for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
289 endpoint_init_qp(ep, qp);
290 }
291 }
292
mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t * endpoint)293 static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
294 {
295 /* setup qp structures */
296 endpoint->qps = (mca_btl_openib_endpoint_qp_t*)
297 calloc(mca_btl_openib_component.num_qps,
298 sizeof(mca_btl_openib_endpoint_qp_t));
299 if (MCA_BTL_XRC_ENABLED) {
300 endpoint->rem_info.rem_qps = (mca_btl_openib_rem_qp_info_t*)
301 calloc(1, sizeof(mca_btl_openib_rem_qp_info_t));
302 endpoint->rem_info.rem_srqs = (mca_btl_openib_rem_srq_info_t*)
303 calloc(mca_btl_openib_component.num_xrc_qps,
304 sizeof(mca_btl_openib_rem_srq_info_t));
305 } else {
306 endpoint->rem_info.rem_qps = (mca_btl_openib_rem_qp_info_t*)
307 calloc(mca_btl_openib_component.num_qps,
308 sizeof(mca_btl_openib_rem_qp_info_t));
309 endpoint->rem_info.rem_srqs = NULL;
310 }
311
312 endpoint->ib_addr = NULL;
313 #if OPAL_HAVE_CONNECTX_XRC_DOMAINS
314 endpoint->xrc_recv_qp = NULL;
315 #else
316 endpoint->xrc_recv_qp_num = 0;
317 #endif
318 endpoint->endpoint_btl = 0;
319 endpoint->endpoint_proc = 0;
320 endpoint->endpoint_local_cpc = NULL;
321 endpoint->endpoint_remote_cpc_data = NULL;
322 endpoint->endpoint_initiator = false;
323 endpoint->endpoint_tstamp = 0.0;
324 endpoint->endpoint_state = MCA_BTL_IB_CLOSED;
325 endpoint->endpoint_retries = 0;
326 OBJ_CONSTRUCT(&endpoint->endpoint_lock, opal_mutex_t);
327 OBJ_CONSTRUCT(&endpoint->pending_lazy_frags, opal_list_t);
328 OBJ_CONSTRUCT(&endpoint->pending_get_frags, opal_list_t);
329 OBJ_CONSTRUCT(&endpoint->pending_put_frags, opal_list_t);
330
331 endpoint->get_tokens = mca_btl_openib_component.ib_qp_ous_rd_atom;
332
333 /* initialize RDMA eager related parts */
334 endpoint->eager_recv_count = 0;
335 memset(&endpoint->eager_rdma_remote, 0,
336 sizeof(mca_btl_openib_eager_rdma_remote_t));
337 memset(&endpoint->eager_rdma_local, 0,
338 sizeof(mca_btl_openib_eager_rdma_local_t));
339 OBJ_CONSTRUCT(&endpoint->eager_rdma_local.lock, opal_mutex_t);
340
341 endpoint->rem_info.rem_lid = 0;
342 endpoint->rem_info.rem_subnet_id = 0;
343 endpoint->rem_info.rem_mtu = 0;
344 endpoint->nbo = false;
345 endpoint->use_eager_rdma = false;
346 endpoint->eager_rdma_remote.tokens = 0;
347 endpoint->eager_rdma_local.credits = 0;
348 endpoint->endpoint_cts_mr = NULL;
349 endpoint->endpoint_cts_frag.super.super.base.super.registration = NULL;
350 endpoint->endpoint_cts_frag.super.super.base.super.ptr = NULL;
351 endpoint->endpoint_posted_recvs = false;
352 endpoint->endpoint_cts_received = false;
353 endpoint->endpoint_cts_sent = false;
354 }
355
356 /*
357 * Destroy a endpoint
358 *
359 */
360
mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t * endpoint)361 static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
362 {
363 bool pval_clean = false;
364 int qp;
365
366 /* If the CPC has an endpoint_finalize function, call it */
367 if (NULL != endpoint->endpoint_local_cpc->cbm_endpoint_finalize) {
368 endpoint->endpoint_local_cpc->cbm_endpoint_finalize(endpoint);
369 }
370
371 /* Release CTS buffer */
372 opal_btl_openib_connect_base_free_cts(endpoint);
373
374 /* Release memory resources */
375 do {
376 /* Make sure that mca_btl_openib_endpoint_connect_eager_rdma ()
377 * was not in "connect" or "bad" flow (failed to allocate memory)
378 * and changed the pointer back to NULL
379 */
380 if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) {
381 if (NULL != endpoint->eager_rdma_local.reg) {
382 endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache,
383 &endpoint->eager_rdma_local.reg->base);
384 endpoint->eager_rdma_local.reg = NULL;
385 }
386
387 void *alloc_base = opal_atomic_swap_ptr (&endpoint->eager_rdma_local.alloc_base, NULL);
388 if (alloc_base) {
389 endpoint->endpoint_btl->super.btl_mpool->mpool_free (endpoint->endpoint_btl->super.btl_mpool, alloc_base);
390 pval_clean = true;
391 }
392 } else {
393 pval_clean=true;
394 }
395 } while (!pval_clean);
396
397 /* Close opened QPs if we have them*/
398 for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
399 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->qps[qp].no_credits_pending_frags[0]);
400 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->qps[qp].no_credits_pending_frags[1]);
401 OBJ_DESTRUCT(&endpoint->qps[qp].no_credits_pending_frags[0]);
402 OBJ_DESTRUCT(&endpoint->qps[qp].no_credits_pending_frags[1]);
403
404 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
405 &endpoint->qps[qp].no_wqe_pending_frags[0]);
406 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
407 &endpoint->qps[qp].no_wqe_pending_frags[1]);
408 OBJ_DESTRUCT(&endpoint->qps[qp].no_wqe_pending_frags[0]);
409 OBJ_DESTRUCT(&endpoint->qps[qp].no_wqe_pending_frags[1]);
410
411
412 if(--endpoint->qps[qp].qp->users != 0)
413 continue;
414
415 if(endpoint->qps[qp].qp->lcl_qp != NULL)
416 if(ibv_destroy_qp(endpoint->qps[qp].qp->lcl_qp))
417 BTL_ERROR(("Failed to destroy QP:%d\n", qp));
418
419 free(endpoint->qps[qp].qp);
420 }
421
422 /* free the qps */
423 free(endpoint->qps);
424 endpoint->qps = NULL;
425
426 free(endpoint->rem_info.rem_qps);
427 free(endpoint->rem_info.rem_srqs);
428
429 /* unregister xrc recv qp */
430 #if HAVE_XRC
431 #if OPAL_HAVE_CONNECTX_XRC_DOMAINS
432 if (NULL != endpoint->xrc_recv_qp) {
433 if(ibv_destroy_qp(endpoint->xrc_recv_qp)) {
434 BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
435 } else {
436 BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
437 }
438 }
439 #else
440 if (0 != endpoint->xrc_recv_qp_num) {
441 if(ibv_unreg_xrc_rcv_qp(endpoint->endpoint_btl->device->xrc_domain,
442 endpoint->xrc_recv_qp_num)) {
443 BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp_num));
444 } else {
445 BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp_num));
446 }
447 }
448 #endif
449 #endif
450
451 OBJ_DESTRUCT(&endpoint->endpoint_lock);
452 /* Clean pending lists */
453 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->pending_lazy_frags);
454 OBJ_DESTRUCT(&endpoint->pending_lazy_frags);
455
456 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->pending_get_frags);
457 OBJ_DESTRUCT(&endpoint->pending_get_frags);
458
459 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->pending_put_frags);
460 OBJ_DESTRUCT(&endpoint->pending_put_frags);
461 }
462
463
464 /*
465 * Called when the connect module has created all the qp's on an
466 * endpoint and needs to have some receive buffers posted.
467 */
mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t * endpoint)468 int mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t *endpoint)
469 {
470 int qp;
471
472 for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
473 if (BTL_OPENIB_QP_TYPE_PP(qp)) {
474 mca_btl_openib_endpoint_post_rr_nolock(endpoint, qp);
475 } else {
476 mca_btl_openib_post_srr(endpoint->endpoint_btl, qp);
477 }
478 }
479
480 return OPAL_SUCCESS;
481 }
482
cts_sent(mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * ep,struct mca_btl_base_descriptor_t * des,int status)483 static void cts_sent(mca_btl_base_module_t* btl,
484 struct mca_btl_base_endpoint_t* ep,
485 struct mca_btl_base_descriptor_t* des,
486 int status)
487 {
488 /* Nothing to do/empty function (we can't pass in a NULL pointer
489 for the des_cbfunc) */
490 OPAL_OUTPUT((-1, "CTS send to %s completed",
491 opal_get_proc_hostname(ep->endpoint_proc->proc_opal)));
492 }
493
494 /*
495 * Send CTS control fragment
496 */
mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t * endpoint)497 void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
498 {
499 mca_btl_openib_send_control_frag_t *sc_frag;
500 mca_btl_base_descriptor_t *base_des;
501 mca_btl_openib_frag_t *openib_frag;
502 mca_btl_openib_com_frag_t *com_frag;
503 mca_btl_openib_control_header_t *ctl_hdr;
504
505 OPAL_OUTPUT((-1, "SENDING CTS to %s on qp index %d (QP num %d)",
506 opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
507 mca_btl_openib_component.credits_qp,
508 endpoint->qps[mca_btl_openib_component.credits_qp].qp->lcl_qp->qp_num));
509 sc_frag = alloc_control_frag(endpoint->endpoint_btl);
510 if (OPAL_UNLIKELY(NULL == sc_frag)) {
511 BTL_ERROR(("Failed to allocate control buffer"));
512 mca_btl_openib_endpoint_invoke_error(endpoint);
513 return;
514 }
515
516 /* I dislike using the "to_<foo>()" macros; I prefer using the
517 explicit member fields to ensure I get the types right. Since
518 this is not a performance-criticial part of the code, it's
519 ok. */
520 com_frag = &(sc_frag->super.super);
521 openib_frag = &(com_frag->super);
522 base_des = &(openib_frag->base);
523
524 base_des->des_cbfunc = cts_sent;
525 base_des->des_cbdata = NULL;
526 base_des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
527 base_des->order = mca_btl_openib_component.credits_qp;
528 openib_frag->segment.seg_len = sizeof(mca_btl_openib_control_header_t);
529 com_frag->endpoint = endpoint;
530
531 sc_frag->hdr->tag = MCA_BTL_TAG_IB;
532 sc_frag->hdr->cm_seen = 0;
533 sc_frag->hdr->credits = 0;
534
535 ctl_hdr = (mca_btl_openib_control_header_t*)
536 openib_frag->segment.seg_addr.pval;
537 ctl_hdr->type = MCA_BTL_OPENIB_CONTROL_CTS;
538
539 /* Send the fragment */
540 if (OPAL_SUCCESS != mca_btl_openib_endpoint_post_send(endpoint, sc_frag)) {
541 BTL_ERROR(("Failed to post CTS send"));
542 mca_btl_openib_endpoint_invoke_error(endpoint);
543 }
544 endpoint->endpoint_cts_sent = true;
545 }
546
547 /*
548 * Called when the CPC has established a connection on an endpoint
549 */
mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t * endpoint)550 void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint)
551 {
552 /* If the CPC uses the CTS protocol, then start it up */
553 if (endpoint->endpoint_local_cpc->cbm_uses_cts) {
554 int transport_type_ib_p = 0;
555 /* Post our receives, which will make credit management happy
556 (i.e., rd_credits will be 0) */
557 if (OPAL_SUCCESS != mca_btl_openib_endpoint_post_recvs(endpoint)) {
558 BTL_ERROR(("Failed to post receive buffers"));
559 mca_btl_openib_endpoint_invoke_error(endpoint);
560 return;
561 }
562 endpoint->endpoint_posted_recvs = true;
563
564 /* If this is IB, send the CTS immediately. If this is iWARP,
565 then only send the CTS if this endpoint was the initiator
566 of the connection (the receiver will send its CTS when it
567 receives this side's CTS). Also send the CTS if we already
568 received the peer's CTS (e.g., if this process was slow to
569 call cpc_complete(). */
570 #if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
571 transport_type_ib_p = (IBV_TRANSPORT_IB == endpoint->endpoint_btl->device->ib_dev->transport_type);
572 #endif
573 OPAL_OUTPUT((-1, "cpc_complete to peer %s: is IB %d, initiatior %d, cts received: %d",
574 opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
575 transport_type_ib_p,
576 endpoint->endpoint_initiator,
577 endpoint->endpoint_cts_received));
578 if (transport_type_ib_p ||
579 endpoint->endpoint_initiator ||
580 endpoint->endpoint_cts_received) {
581 mca_btl_openib_endpoint_send_cts(endpoint);
582
583 /* If we've already got the CTS from the other side, then
584 mark us as connected */
585 if (endpoint->endpoint_cts_received) {
586 OPAL_OUTPUT((-1, "cpc_complete to %s -- already got CTS, so marking endpoint as complete",
587 opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
588 mca_btl_openib_endpoint_connected(endpoint);
589 } else {
590 /* the caller hold the lock and expects us to drop it */
591 OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
592 }
593 }
594
595 OPAL_OUTPUT((-1, "cpc_complete to %s -- done",
596 opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
597 return;
598 }
599
600 /* Otherwise, just set the endpoint to "connected" */
601 mca_btl_openib_endpoint_connected(endpoint);
602 }
603
604 /*
605 * called when the connect module has completed setup of an endpoint
606 */
mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t * endpoint)607 void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
608 {
609 opal_list_item_t *frag_item, *ep_item;
610 mca_btl_openib_send_frag_t *frag;
611 mca_btl_openib_endpoint_t *ep;
612 bool master = false;
613
614 opal_output(-1, "Now we are CONNECTED");
615 if (MCA_BTL_XRC_ENABLED) {
616 opal_mutex_lock (&endpoint->ib_addr->addr_lock);
617 if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) {
618 /* We are not xrc master */
619 /* set our qp pointer to master qp */
620 master = false;
621 } else {
622 /* I'm master of XRC */
623 endpoint->ib_addr->status = MCA_BTL_IB_ADDR_CONNECTED;
624 master = true;
625 }
626 }
627
628 /* Run over all qps and load alternative path */
629 if (APM_ENABLED) {
630 int i;
631 if (MCA_BTL_XRC_ENABLED) {
632 if (master) {
633 mca_btl_openib_load_apm(endpoint->ib_addr->qp->lcl_qp, endpoint);
634 }
635 } else {
636 for(i = 0; i < mca_btl_openib_component.num_qps; i++) {
637 mca_btl_openib_load_apm(endpoint->qps[i].qp->lcl_qp, endpoint);
638 }
639 }
640 }
641
642 endpoint->endpoint_state = MCA_BTL_IB_CONNECTED;
643 endpoint->endpoint_btl->device->non_eager_rdma_endpoints++;
644
645 if(MCA_BTL_XRC_ENABLED) {
646 if (master) {
647 while (NULL != (ep_item = opal_list_remove_first(&endpoint->ib_addr->pending_ep))) {
648 ep = (mca_btl_openib_endpoint_t *)ep_item;
649 if (OPAL_SUCCESS !=
650 opal_btl_openib_connect_base_start(endpoint->endpoint_local_cpc, ep)) {
651 BTL_ERROR(("Failed to connect pending endpoint\n"));
652 }
653 }
654 }
655 opal_mutex_unlock (&endpoint->ib_addr->addr_lock);
656 }
657
658
659 /* Process pending packet on the endpoint */
660
661 /* While there are frags in the list, process them */
662 while (NULL != (frag_item = opal_list_remove_first(&(endpoint->pending_lazy_frags)))) {
663 frag = to_send_frag(frag_item);
664 /* We need to post this one */
665
666 if (OPAL_ERROR == mca_btl_openib_endpoint_post_send(endpoint, frag)) {
667 BTL_ERROR(("Error posting send"));
668 }
669 }
670 OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
671
672 /* if upper layer called put or get before connection moved to connected
673 * state then we restart them here */
674 mca_btl_openib_frag_progress_pending_put_get(endpoint,
675 mca_btl_openib_component.rdma_qp);
676 }
677
678 /*
679 * Attempt to send a fragment using a given endpoint. If the endpoint is not
680 * connected, queue the fragment and start the connection as required.
681 */
mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t * ep,mca_btl_openib_send_frag_t * frag)682 int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* ep,
683 mca_btl_openib_send_frag_t* frag)
684 {
685 int rc;
686
687 OPAL_THREAD_LOCK(&ep->endpoint_lock);
688 rc = check_endpoint_state(ep, &to_base_frag(frag)->base,
689 &ep->pending_lazy_frags);
690
691 if(OPAL_LIKELY(OPAL_SUCCESS == rc)) {
692 rc = mca_btl_openib_endpoint_post_send(ep, frag);
693 }
694 OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
695 if (OPAL_UNLIKELY(OPAL_ERR_RESOURCE_BUSY == rc)) {
696 rc = OPAL_SUCCESS;
697 }
698
699 return rc;
700 }
701
702 /**
703 * Return control fragment.
704 */
705
mca_btl_openib_endpoint_credits(mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * ep,struct mca_btl_base_descriptor_t * des,int status)706 static void mca_btl_openib_endpoint_credits(
707 mca_btl_base_module_t* btl,
708 struct mca_btl_base_endpoint_t* ep,
709 struct mca_btl_base_descriptor_t* des,
710 int status)
711 {
712
713 int qp;
714
715 mca_btl_openib_send_control_frag_t *frag = to_send_control_frag(des);
716
717 qp = frag->qp_idx;
718
719 /* we don't acquire a WQE for credit message - so decrement.
720 * Note: doing it for QP used for credit management */
721 (void) qp_get_wqe(ep, des->order);
722
723 if(check_send_credits(ep, qp) || check_eager_rdma_credits(ep))
724 mca_btl_openib_endpoint_send_credits(ep, qp);
725 else {
726 BTL_OPENIB_CREDITS_SEND_UNLOCK(ep, qp);
727 /* check one more time if credits are available after unlock */
728 send_credits(ep, qp);
729 }
730 }
731
732 /**
733 * Return credits to peer
734 */
735
mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t * endpoint,const int qp)736 void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
737 const int qp)
738 {
739 mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
740 mca_btl_openib_send_control_frag_t* frag;
741 mca_btl_openib_rdma_credits_header_t *credits_hdr;
742 int rc;
743 bool do_rdma = false;
744 int32_t cm_return;
745
746 frag = endpoint->qps[qp].credit_frag;
747
748 if(OPAL_UNLIKELY(NULL == frag)) {
749 frag = alloc_control_frag(openib_btl);
750 frag->qp_idx = qp;
751 endpoint->qps[qp].credit_frag = frag;
752 /* set those once and forever */
753 to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp;
754 to_base_frag(frag)->base.des_cbfunc = mca_btl_openib_endpoint_credits;
755 to_base_frag(frag)->base.des_cbdata = NULL;
756 to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;;
757 to_com_frag(frag)->endpoint = endpoint;
758 frag->hdr->tag = MCA_BTL_TAG_IB;
759 to_base_frag(frag)->segment.seg_len =
760 sizeof(mca_btl_openib_rdma_credits_header_t);
761 }
762
763 assert(frag->qp_idx == qp);
764 credits_hdr = (mca_btl_openib_rdma_credits_header_t*)
765 to_base_frag(frag)->segment.seg_addr.pval;
766 if(OPAL_SUCCESS == acquire_eager_rdma_send_credit(endpoint)) {
767 do_rdma = true;
768 } else {
769 if(OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) >
770 (mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv - 1)) {
771 OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
772 BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
773 return;
774 }
775 }
776
777 BTL_OPENIB_GET_CREDITS(endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits);
778
779 frag->hdr->cm_seen = 0;
780 BTL_OPENIB_GET_CREDITS(endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
781 if(cm_return > 255) {
782 frag->hdr->cm_seen = 255;
783 cm_return -= 255;
784 OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
785 } else {
786 frag->hdr->cm_seen = cm_return;
787 }
788
789 BTL_OPENIB_GET_CREDITS(endpoint->eager_rdma_local.credits, credits_hdr->rdma_credits);
790 credits_hdr->qpn = qp;
791 credits_hdr->control.type = MCA_BTL_OPENIB_CONTROL_CREDITS;
792
793 if(endpoint->nbo)
794 BTL_OPENIB_RDMA_CREDITS_HEADER_HTON(*credits_hdr);
795
796 qp_reset_signal_count(endpoint, qp);
797 if((rc = post_send(endpoint, frag, do_rdma, 1)) == 0)
798 return;
799
800 if(endpoint->nbo) {
801 BTL_OPENIB_HEADER_NTOH(*frag->hdr);
802 BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr);
803 }
804 BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
805 OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits,
806 frag->hdr->credits);
807 OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits,
808 credits_hdr->rdma_credits);
809 if(do_rdma)
810 OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
811 else
812 OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
813
814 BTL_ERROR(("error posting send request errno %d says %s", rc,
815 strerror(errno)));
816 }
817
818 /* local callback function for completion of eager rdma connect */
mca_btl_openib_endpoint_eager_rdma_connect_cb(mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * endpoint,struct mca_btl_base_descriptor_t * descriptor,int status)819 static void mca_btl_openib_endpoint_eager_rdma_connect_cb(
820 mca_btl_base_module_t* btl,
821 struct mca_btl_base_endpoint_t* endpoint,
822 struct mca_btl_base_descriptor_t* descriptor,
823 int status)
824 {
825 mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
826 OPAL_THREAD_ADD32(&device->non_eager_rdma_endpoints, -1);
827 assert(device->non_eager_rdma_endpoints >= 0);
828 MCA_BTL_IB_FRAG_RETURN(descriptor);
829 }
830
831 /* send the eager rdma connect message to the remote endpoint */
mca_btl_openib_endpoint_send_eager_rdma(mca_btl_base_endpoint_t * endpoint)832 static int mca_btl_openib_endpoint_send_eager_rdma(
833 mca_btl_base_endpoint_t* endpoint)
834 {
835 mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
836 mca_btl_openib_eager_rdma_header_t *rdma_hdr;
837 mca_btl_openib_send_control_frag_t* frag;
838 int rc;
839
840 frag = alloc_control_frag(openib_btl);
841 if(NULL == frag) {
842 return -1;
843 }
844
845 to_base_frag(frag)->base.des_cbfunc =
846 mca_btl_openib_endpoint_eager_rdma_connect_cb;
847 to_base_frag(frag)->base.des_cbdata = NULL;
848 to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
849 to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp;
850 to_base_frag(frag)->segment.seg_len =
851 sizeof(mca_btl_openib_eager_rdma_header_t);
852 to_com_frag(frag)->endpoint = endpoint;
853
854 frag->hdr->tag = MCA_BTL_TAG_IB;
855 rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)to_base_frag(frag)->segment.seg_addr.pval;
856 rdma_hdr->control.type = MCA_BTL_OPENIB_CONTROL_RDMA;
857 rdma_hdr->rkey = endpoint->eager_rdma_local.reg->mr->rkey;
858 rdma_hdr->rdma_start.lval = opal_ptr_ptol(endpoint->eager_rdma_local.base.pval);
859 BTL_VERBOSE(("sending rkey %" PRIu32 ", rdma_start.lval %" PRIx64
860 ", pval %p, ival %" PRIu32 " type %d and sizeof(rdma_hdr) %d\n",
861 rdma_hdr->rkey,
862 rdma_hdr->rdma_start.lval,
863 rdma_hdr->rdma_start.pval,
864 rdma_hdr->rdma_start.ival,
865 rdma_hdr->control.type,
866 (int) sizeof(mca_btl_openib_eager_rdma_header_t)
867 ));
868
869 if(endpoint->nbo) {
870 BTL_OPENIB_EAGER_RDMA_CONTROL_HEADER_HTON((*rdma_hdr));
871
872 BTL_VERBOSE(("after HTON: sending rkey %" PRIu32 ", rdma_start.lval %" PRIx64 ", pval %p, ival %" PRIu32 "\n",
873 rdma_hdr->rkey,
874 rdma_hdr->rdma_start.lval,
875 rdma_hdr->rdma_start.pval,
876 rdma_hdr->rdma_start.ival
877 ));
878 }
879 rc = mca_btl_openib_endpoint_send(endpoint, frag);
880 if (OPAL_SUCCESS == rc || OPAL_ERR_RESOURCE_BUSY == rc)
881 return OPAL_SUCCESS;
882
883 MCA_BTL_IB_FRAG_RETURN(frag);
884 BTL_ERROR(("Error sending RDMA buffer: %s", strerror(errno)));
885 return rc;
886 }
887
888 /* Setup eager RDMA buffers and notify the remote endpoint*/
mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t * endpoint)889 void mca_btl_openib_endpoint_connect_eager_rdma(
890 mca_btl_openib_endpoint_t* endpoint)
891 {
892 mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
893 char *buf, *alloc_base;
894 mca_btl_openib_recv_frag_t *headers_buf;
895 int i, rc;
896 uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS;
897
898 /* Set local rdma pointer to 1 temporarily so other threads will not try
899 * to enter the function */
900 if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL,
901 (void*)1))
902 return;
903
904 headers_buf = (mca_btl_openib_recv_frag_t*)
905 malloc(sizeof(mca_btl_openib_recv_frag_t) *
906 mca_btl_openib_component.eager_rdma_num);
907
908 if(NULL == headers_buf)
909 goto unlock_rdma_local;
910
911 #if HAVE_DECL_IBV_ACCESS_SO
912 /* Solaris implements the Relaxed Ordering feature defined in the
913 PCI Specification. With this in mind any memory region which
914 relies on a buffer being written in a specific order, for
915 example the eager rdma connections created in this routinue,
916 must set a strong order flag when registering the memory for
917 rdma operations.
918
919 The following flag will be interpreted and the appropriate
920 steps will be taken when the memory is registered in
921 openib_reg_mr(). */
922 flag |= MCA_RCACHE_FLAGS_SO_MEM;
923 #endif
924
925 alloc_base = buf = (char *) openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool,
926 openib_btl->eager_rdma_frag_size *
927 mca_btl_openib_component.eager_rdma_num,
928 mca_btl_openib_component.buffer_alignment,
929 0);
930
931 if(!buf)
932 goto free_headers_buf;
933
934 rc = openib_btl->device->rcache->rcache_register (openib_btl->device->rcache, buf, openib_btl->eager_rdma_frag_size *
935 mca_btl_openib_component.eager_rdma_num, flag, MCA_RCACHE_ACCESS_ANY,
936 (mca_rcache_base_registration_t**)&endpoint->eager_rdma_local.reg);
937 if (OPAL_SUCCESS != rc) {
938 openib_btl->super.btl_mpool->mpool_free (openib_btl->super.btl_mpool, alloc_base);
939 goto free_headers_buf;
940 }
941
942 buf = buf + openib_btl->eager_rdma_frag_size -
943 sizeof(mca_btl_openib_footer_t) - openib_btl->super.btl_eager_limit -
944 sizeof(mca_btl_openib_header_t);
945
946 for(i = 0; i < mca_btl_openib_component.eager_rdma_num; i++) {
947 opal_free_list_item_t *item;
948 mca_btl_openib_recv_frag_t * frag;
949 mca_btl_openib_frag_init_data_t init_data;
950
951 item = (opal_free_list_item_t*)&headers_buf[i];
952 item->registration = (mca_rcache_base_registration_t *)endpoint->eager_rdma_local.reg;
953 item->ptr = buf + i * openib_btl->eager_rdma_frag_size;
954 OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_t);
955
956 init_data.order = mca_btl_openib_component.credits_qp;
957 init_data.list = NULL;
958
959 mca_btl_openib_frag_init(item, &init_data);
960 frag = to_recv_frag(item);
961 to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA;
962 to_com_frag(frag)->endpoint = endpoint;
963 frag->ftr = (mca_btl_openib_footer_t*)
964 ((char*)to_base_frag(frag)->segment.seg_addr.pval +
965 mca_btl_openib_component.eager_limit);
966
967 MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr);
968 }
969
970 endpoint->eager_rdma_local.frags = headers_buf;
971
972 endpoint->eager_rdma_local.rd_win =
973 mca_btl_openib_component.eager_rdma_num >> 2;
974 endpoint->eager_rdma_local.rd_win =
975 endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1;
976
977 /* set local rdma pointer to real value */
978 (void)opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval,
979 (void*)1, buf);
980 endpoint->eager_rdma_local.alloc_base = alloc_base;
981
982 if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) {
983 mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
984 mca_btl_openib_endpoint_t **p;
985 OBJ_RETAIN(endpoint);
986 assert(((opal_object_t*)endpoint)->obj_reference_count == 2);
987 do {
988 p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count];
989 } while(!opal_atomic_cmpset_ptr(p, NULL, endpoint));
990
991 OPAL_THREAD_ADD32(&openib_btl->eager_rdma_channels, 1);
992 /* from this point progress function starts to poll new buffer */
993 OPAL_THREAD_ADD32(&device->eager_rdma_buffers_count, 1);
994 return;
995 }
996
997 openib_btl->device->rcache->rcache_deregister (openib_btl->device->rcache,
998 (mca_rcache_base_registration_t*)endpoint->eager_rdma_local.reg);
999 openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool, buf);
1000 free_headers_buf:
1001 free(headers_buf);
1002 unlock_rdma_local:
1003 /* set local rdma pointer back to zero. Will retry later */
1004 (void)opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval,
1005 endpoint->eager_rdma_local.base.pval, NULL);
1006 endpoint->eager_rdma_local.frags = NULL;
1007 }
1008
1009 /*
1010 * Invoke an error on the btl associated with an endpoint. If we
1011 * don't have an endpoint, then just use the first one on the
1012 * component list of BTLs.
1013 */
mca_btl_openib_endpoint_invoke_error(void * context)1014 void *mca_btl_openib_endpoint_invoke_error(void *context)
1015 {
1016 mca_btl_openib_endpoint_t *endpoint = (mca_btl_openib_endpoint_t*) context;
1017 mca_btl_openib_module_t *btl = NULL;
1018
1019 if (NULL == endpoint) {
1020 int i;
1021 for (i = 0; i < mca_btl_openib_component.ib_num_btls; ++i) {
1022 if (NULL != mca_btl_openib_component.openib_btls[i] &&
1023 NULL != mca_btl_openib_component.openib_btls[i]->error_cb) {
1024 btl = mca_btl_openib_component.openib_btls[i];
1025 break;
1026 }
1027 }
1028 } else {
1029 btl = endpoint->endpoint_btl;
1030 }
1031
1032 /* If we didn't find a BTL, then just bail :-( */
1033 if (NULL == btl || NULL == btl->error_cb) {
1034 opal_show_help("help-mpi-btl-openib.txt",
1035 "cannot raise btl error", true,
1036 opal_process_info.nodename,
1037 __FILE__, __LINE__);
1038 exit(1);
1039 }
1040
1041 /* Invoke the callback to the upper layer */
1042 btl->error_cb(&(btl->super), MCA_BTL_ERROR_FLAGS_FATAL, NULL, NULL);
1043
1044 /* Will likely never get here */
1045 return NULL;
1046 }
1047