1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2014 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
14 * Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights
15 * reserved.
16 * Copyright (c) 2006-2007 Voltaire All rights reserved.
17 * Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
18 * Copyright (c) 2010-2011 IBM Corporation. All rights reserved.
19 * Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved
20 * Copyright (c) 2013-2014 Intel, Inc. All rights reserved
21 * Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved.
22 * Copyright (c) 2014 Bull SAS. All rights reserved.
23 * Copyright (c) 2015 Research Organization for Information Science
24 * and Technology (RIST). All rights reserved.
25 *
26 * $COPYRIGHT$
27 *
28 * Additional copyrights may follow
29 *
30 * $HEADER$
31 */
32
33 #include "opal_config.h"
34
35 #ifdef HAVE_SYS_TIME_H
36 #include <sys/time.h>
37 #endif
38 #include <time.h>
39 #include <errno.h>
40 #include <string.h>
41
42 #include "opal_stdint.h"
43 #include "opal/util/output.h"
44 #include "opal/util/proc.h"
45 #include "opal/util/show_help.h"
46 #include "opal/class/opal_free_list.h"
47
48 #include "btl_openib_endpoint.h"
49 #include "btl_openib_proc.h"
50 #include "btl_openib_xrc.h"
51 #include "btl_openib_async.h"
52 #include "connect/connect.h"
53
54 static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
55 static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
56
acquire_wqe(mca_btl_openib_endpoint_t * ep,mca_btl_openib_send_frag_t * frag)57 static inline int acquire_wqe(mca_btl_openib_endpoint_t *ep,
58 mca_btl_openib_send_frag_t *frag)
59 {
60 int qp = to_base_frag(frag)->base.order;
61 int prio = !(to_base_frag(frag)->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY);
62
63 if(qp_get_wqe(ep, qp) < 0) {
64 qp_put_wqe(ep, qp);
65 opal_list_append(&ep->qps[qp].no_wqe_pending_frags[prio],
66 (opal_list_item_t *)frag);
67 return OPAL_ERR_OUT_OF_RESOURCE;
68 }
69
70 return OPAL_SUCCESS;
71 }
72
73 /* this function is called with endpoint->endpoint_lock held */
mca_btl_openib_endpoint_post_send(mca_btl_openib_endpoint_t * endpoint,mca_btl_openib_send_frag_t * frag)74 int mca_btl_openib_endpoint_post_send(mca_btl_openib_endpoint_t *endpoint,
75 mca_btl_openib_send_frag_t *frag)
76 {
77 int prio = to_base_frag(frag)->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY;
78 mca_btl_openib_header_t *hdr = frag->hdr;
79 mca_btl_base_descriptor_t *des = &to_base_frag(frag)->base;
80 int qp, ib_rc, rc;
81 bool do_rdma = false;
82 size_t size;
83
84 if(OPAL_LIKELY(des->order == MCA_BTL_NO_ORDER))
85 des->order = frag->qp_idx;
86
87 qp = des->order;
88
89 if(acquire_wqe(endpoint, frag) != OPAL_SUCCESS)
90 return OPAL_ERR_RESOURCE_BUSY;
91
92 size = des->des_segments->seg_len + frag->coalesced_length;
93
94 rc = mca_btl_openib_endpoint_credit_acquire (endpoint, qp, prio, size,
95 &do_rdma, frag, true);
96 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
97 qp_put_wqe(endpoint, qp);
98 return OPAL_ERR_RESOURCE_BUSY;
99 }
100
101 qp_reset_signal_count(endpoint, qp);
102 ib_rc = post_send(endpoint, frag, do_rdma, 1);
103
104 if(!ib_rc)
105 return OPAL_SUCCESS;
106
107 if(endpoint->nbo)
108 BTL_OPENIB_HEADER_NTOH(*hdr);
109
110 mca_btl_openib_endpoint_credit_release (endpoint, qp, do_rdma, frag);
111
112 qp_put_wqe(endpoint, qp);
113
114 BTL_ERROR(("error posting send request error %d: %s. size = %lu\n",
115 ib_rc, strerror(ib_rc), size));
116 return OPAL_ERROR;
117 }
118
119
120
121 OBJ_CLASS_INSTANCE(mca_btl_openib_endpoint_t,
122 opal_list_item_t, mca_btl_openib_endpoint_construct,
123 mca_btl_openib_endpoint_destruct);
124
125 /*
126 * Initialize state of the endpoint instance.
127 *
128 */
endpoint_alloc_qp(void)129 static mca_btl_openib_qp_t *endpoint_alloc_qp(void)
130 {
131 mca_btl_openib_qp_t *qp = (mca_btl_openib_qp_t *) calloc(1, sizeof(mca_btl_openib_qp_t));
132 if(!qp) {
133 BTL_ERROR(("Failed to allocate memory for qp"));
134 return NULL;
135 }
136
137 OBJ_CONSTRUCT(&qp->lock, opal_mutex_t);
138
139 return qp;
140 }
141
142 static void
endpoint_init_qp_pp(mca_btl_openib_endpoint_qp_t * ep_qp,const int qp)143 endpoint_init_qp_pp(mca_btl_openib_endpoint_qp_t *ep_qp, const int qp)
144 {
145 mca_btl_openib_qp_info_t *qp_info = &mca_btl_openib_component.qp_infos[qp];
146 ep_qp->qp = endpoint_alloc_qp();
147 ep_qp->qp->users++;
148
149 /* local credits are set here such that on initial posting
150 * of the receive buffers we end up with zero credits to return
151 * to our peer. The peer initializes his sd_credits to reflect this
152 * below. Note that this may be a problem for iWARP as the sender
153 * now has credits even if the receive buffers are not yet posted
154 */
155 ep_qp->u.pp_qp.rd_credits = -qp_info->rd_num;
156
157 ep_qp->u.pp_qp.rd_posted = 0;
158 ep_qp->u.pp_qp.cm_sent = 0;
159 ep_qp->u.pp_qp.cm_return = -qp_info->u.pp_qp.rd_rsv;
160 ep_qp->u.pp_qp.cm_received = qp_info->u.pp_qp.rd_rsv;
161
162 /* initialize the local view of credits */
163 ep_qp->u.pp_qp.sd_credits = qp_info->rd_num;
164
165 /* number of available send WQEs */
166 ep_qp->qp->sd_wqe = qp_info->rd_num;
167 }
168
169 static void
endpoint_init_qp_srq(mca_btl_openib_endpoint_qp_t * ep_qp,const int qp)170 endpoint_init_qp_srq(mca_btl_openib_endpoint_qp_t *ep_qp, const int qp)
171 {
172 ep_qp->qp = endpoint_alloc_qp();
173 ep_qp->qp->users++;
174
175 /* number of available send WQEs */
176 ep_qp->qp->sd_wqe = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
177 }
178
179 static void
endpoint_init_qp_xrc(mca_btl_base_endpoint_t * ep,const int qp)180 endpoint_init_qp_xrc(mca_btl_base_endpoint_t *ep, const int qp)
181 {
182 int max = ep->endpoint_btl->device->ib_dev_attr.max_qp_wr -
183 (mca_btl_openib_component.use_eager_rdma ?
184 mca_btl_openib_component.max_eager_rdma : 0);
185 mca_btl_openib_endpoint_qp_t *ep_qp = &ep->qps[qp];
186 int32_t wqe, incr = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
187 int rc;
188
189 opal_mutex_lock (&ep->ib_addr->addr_lock);
190
191 ep_qp->qp = ep->ib_addr->qp;
192 if (ep->ib_addr->max_wqe + incr > max) {
193 /* make sure that we don't overrun maximum supported by device */
194 incr = max - ep->ib_addr->max_wqe;
195 }
196
197 wqe = ep->ib_addr->max_wqe + incr +
198 (mca_btl_openib_component.use_eager_rdma ?
199 mca_btl_openib_component.max_eager_rdma : 0);
200
201 ep->ib_addr->max_wqe += incr;
202
203 if (NULL != ep_qp->qp->lcl_qp) {
204 struct ibv_qp_attr qp_attr;
205
206 /* if this is modified the code in udcm_xrc_send_qp_create may
207 * need to be updated as well */
208 qp_attr.cap.max_recv_wr = 0;
209 qp_attr.cap.max_send_wr = wqe;
210 qp_attr.cap.max_inline_data = ep->endpoint_btl->device->max_inline_data;
211 qp_attr.cap.max_send_sge = 1;
212 qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */
213 rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP);
214 if (0 == rc) {
215 opal_atomic_add_fetch_32 (&ep_qp->qp->sd_wqe, incr);
216 }
217 } else {
218 ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe;
219 }
220 ep_qp->qp->users++;
221 opal_mutex_unlock (&ep->ib_addr->addr_lock);
222 }
223
endpoint_init_qp(mca_btl_base_endpoint_t * ep,const int qp)224 static void endpoint_init_qp(mca_btl_base_endpoint_t *ep, const int qp)
225 {
226 mca_btl_openib_endpoint_qp_t *ep_qp = &ep->qps[qp];
227
228 ep_qp->rd_credit_send_lock = 0;
229 ep_qp->credit_frag = NULL;
230
231 OBJ_CONSTRUCT(&ep_qp->no_wqe_pending_frags[0], opal_list_t);
232 OBJ_CONSTRUCT(&ep_qp->no_wqe_pending_frags[1], opal_list_t);
233
234 OBJ_CONSTRUCT(&ep_qp->no_credits_pending_frags[0], opal_list_t);
235 OBJ_CONSTRUCT(&ep_qp->no_credits_pending_frags[1], opal_list_t);
236
237 switch(BTL_OPENIB_QP_TYPE(qp)) {
238 case MCA_BTL_OPENIB_PP_QP:
239 endpoint_init_qp_pp(ep_qp, qp);
240 break;
241 case MCA_BTL_OPENIB_SRQ_QP:
242 endpoint_init_qp_srq(ep_qp, qp);
243 break;
244 case MCA_BTL_OPENIB_XRC_QP:
245 if (NULL == ep->ib_addr->qp) {
246 ep->ib_addr->qp = endpoint_alloc_qp();
247 }
248 endpoint_init_qp_xrc(ep, qp);
249 break;
250 default:
251 BTL_ERROR(("Wrong QP type"));
252 return;
253 }
254
255 ep_qp->qp->sd_wqe_inflight = 0;
256 ep_qp->qp->wqe_count = QP_TX_BATCH_COUNT;
257 }
258
mca_btl_openib_endpoint_init(mca_btl_openib_module_t * btl,mca_btl_base_endpoint_t * ep,opal_btl_openib_connect_base_module_t * local_cpc,mca_btl_openib_proc_modex_t * remote_proc_info,opal_btl_openib_connect_base_module_data_t * remote_cpc_data)259 void mca_btl_openib_endpoint_init(mca_btl_openib_module_t *btl,
260 mca_btl_base_endpoint_t *ep,
261 opal_btl_openib_connect_base_module_t *local_cpc,
262 mca_btl_openib_proc_modex_t *remote_proc_info,
263 opal_btl_openib_connect_base_module_data_t *remote_cpc_data)
264 {
265 int qp;
266
267 ep->endpoint_btl = btl;
268 ep->use_eager_rdma = btl->device->use_eager_rdma &
269 mca_btl_openib_component.use_eager_rdma;
270 ep->subnet_id = btl->port_info.subnet_id;
271 ep->endpoint_local_cpc = local_cpc;
272 ep->endpoint_remote_cpc_data = remote_cpc_data;
273
274 ep->rem_info.rem_lid = remote_proc_info->pm_port_info.lid;
275 ep->rem_info.rem_subnet_id = remote_proc_info->pm_port_info.subnet_id;
276 ep->rem_info.rem_mtu = remote_proc_info->pm_port_info.mtu;
277 opal_output(-1, "Got remote LID, subnet, MTU: %d, 0x%" PRIx64 ", %d",
278 ep->rem_info.rem_lid,
279 ep->rem_info.rem_subnet_id,
280 ep->rem_info.rem_mtu);
281
282 ep->rem_info.rem_vendor_id = (remote_proc_info->pm_port_info).vendor_id;
283 ep->rem_info.rem_vendor_part_id = (remote_proc_info->pm_port_info).vendor_part_id;
284
285 ep->rem_info.rem_transport_type =
286 (mca_btl_openib_transport_type_t) (remote_proc_info->pm_port_info).transport_type;
287
288 for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
289 endpoint_init_qp(ep, qp);
290 }
291 }
292
mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t * endpoint)293 static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
294 {
295 /* setup qp structures */
296 endpoint->qps = (mca_btl_openib_endpoint_qp_t*)
297 calloc(mca_btl_openib_component.num_qps,
298 sizeof(mca_btl_openib_endpoint_qp_t));
299 if (MCA_BTL_XRC_ENABLED) {
300 endpoint->rem_info.rem_qps = (mca_btl_openib_rem_qp_info_t*)
301 calloc(1, sizeof(mca_btl_openib_rem_qp_info_t));
302 endpoint->rem_info.rem_srqs = (mca_btl_openib_rem_srq_info_t*)
303 calloc(mca_btl_openib_component.num_xrc_qps,
304 sizeof(mca_btl_openib_rem_srq_info_t));
305 } else {
306 endpoint->rem_info.rem_qps = (mca_btl_openib_rem_qp_info_t*)
307 calloc(mca_btl_openib_component.num_qps,
308 sizeof(mca_btl_openib_rem_qp_info_t));
309 endpoint->rem_info.rem_srqs = NULL;
310 }
311
312 endpoint->ib_addr = NULL;
313 #if OPAL_HAVE_CONNECTX_XRC_DOMAINS
314 endpoint->xrc_recv_qp = NULL;
315 #else
316 endpoint->xrc_recv_qp_num = 0;
317 #endif
318 endpoint->endpoint_btl = 0;
319 endpoint->endpoint_proc = 0;
320 endpoint->endpoint_local_cpc = NULL;
321 endpoint->endpoint_remote_cpc_data = NULL;
322 endpoint->endpoint_initiator = false;
323 endpoint->endpoint_tstamp = 0.0;
324 endpoint->endpoint_state = MCA_BTL_IB_CLOSED;
325 endpoint->endpoint_retries = 0;
326 OBJ_CONSTRUCT(&endpoint->endpoint_lock, opal_mutex_t);
327 OBJ_CONSTRUCT(&endpoint->pending_lazy_frags, opal_list_t);
328 OBJ_CONSTRUCT(&endpoint->pending_get_frags, opal_list_t);
329 OBJ_CONSTRUCT(&endpoint->pending_put_frags, opal_list_t);
330
331 endpoint->get_tokens = mca_btl_openib_component.ib_qp_ous_rd_atom;
332
333 /* initialize RDMA eager related parts */
334 endpoint->eager_recv_count = 0;
335 memset(&endpoint->eager_rdma_remote, 0,
336 sizeof(mca_btl_openib_eager_rdma_remote_t));
337 memset(&endpoint->eager_rdma_local, 0,
338 sizeof(mca_btl_openib_eager_rdma_local_t));
339 OBJ_CONSTRUCT(&endpoint->eager_rdma_local.lock, opal_mutex_t);
340
341 endpoint->rem_info.rem_lid = 0;
342 endpoint->rem_info.rem_subnet_id = 0;
343 endpoint->rem_info.rem_mtu = 0;
344 endpoint->nbo = false;
345 endpoint->use_eager_rdma = false;
346 endpoint->eager_rdma_remote.tokens = 0;
347 endpoint->eager_rdma_local.credits = 0;
348 endpoint->endpoint_cts_mr = NULL;
349 endpoint->endpoint_cts_frag.super.super.base.super.registration = NULL;
350 endpoint->endpoint_cts_frag.super.super.base.super.ptr = NULL;
351 endpoint->endpoint_posted_recvs = false;
352 endpoint->endpoint_cts_received = false;
353 endpoint->endpoint_cts_sent = false;
354 }
355
356 /*
357 * Destroy a endpoint
358 *
359 */
360
mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t * endpoint)361 static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
362 {
363 bool pval_clean = false;
364 int qp;
365
366 /* If the CPC has an endpoint_finalize function, call it */
367 if (NULL != endpoint->endpoint_local_cpc->cbm_endpoint_finalize) {
368 endpoint->endpoint_local_cpc->cbm_endpoint_finalize(endpoint);
369 }
370
371 /* Release CTS buffer */
372 opal_btl_openib_connect_base_free_cts(endpoint);
373
374 /* Release memory resources */
375 do {
376 void *_tmp_ptr = NULL;
377 /* Make sure that mca_btl_openib_endpoint_connect_eager_rdma ()
378 * was not in "connect" or "bad" flow (failed to allocate memory)
379 * and changed the pointer back to NULL
380 */
381 if(!opal_atomic_compare_exchange_strong_ptr(&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr, (void *) 1)) {
382 if (NULL != endpoint->eager_rdma_local.reg) {
383 endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache,
384 &endpoint->eager_rdma_local.reg->base);
385 endpoint->eager_rdma_local.reg = NULL;
386 }
387
388 void *alloc_base = opal_atomic_swap_ptr (&endpoint->eager_rdma_local.alloc_base, NULL);
389 if (alloc_base) {
390 endpoint->endpoint_btl->super.btl_mpool->mpool_free (endpoint->endpoint_btl->super.btl_mpool, alloc_base);
391 pval_clean = true;
392 }
393 } else {
394 pval_clean=true;
395 }
396 } while (!pval_clean);
397
398 /* Close opened QPs if we have them*/
399 for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
400 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->qps[qp].no_credits_pending_frags[0]);
401 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->qps[qp].no_credits_pending_frags[1]);
402 OBJ_DESTRUCT(&endpoint->qps[qp].no_credits_pending_frags[0]);
403 OBJ_DESTRUCT(&endpoint->qps[qp].no_credits_pending_frags[1]);
404
405 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
406 &endpoint->qps[qp].no_wqe_pending_frags[0]);
407 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
408 &endpoint->qps[qp].no_wqe_pending_frags[1]);
409 OBJ_DESTRUCT(&endpoint->qps[qp].no_wqe_pending_frags[0]);
410 OBJ_DESTRUCT(&endpoint->qps[qp].no_wqe_pending_frags[1]);
411
412
413 if(--endpoint->qps[qp].qp->users != 0)
414 continue;
415
416 if(endpoint->qps[qp].qp->lcl_qp != NULL)
417 if(ibv_destroy_qp(endpoint->qps[qp].qp->lcl_qp))
418 BTL_ERROR(("Failed to destroy QP:%d\n", qp));
419
420 free(endpoint->qps[qp].qp);
421 }
422
423 /* free the qps */
424 free(endpoint->qps);
425 endpoint->qps = NULL;
426
427 free(endpoint->rem_info.rem_qps);
428 free(endpoint->rem_info.rem_srqs);
429
430 /* unregister xrc recv qp */
431 #if HAVE_XRC
432 #if OPAL_HAVE_CONNECTX_XRC_DOMAINS
433 if (NULL != endpoint->xrc_recv_qp) {
434 if(ibv_destroy_qp(endpoint->xrc_recv_qp)) {
435 BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
436 } else {
437 BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
438 }
439 }
440 #else
441 if (0 != endpoint->xrc_recv_qp_num) {
442 if(ibv_unreg_xrc_rcv_qp(endpoint->endpoint_btl->device->xrc_domain,
443 endpoint->xrc_recv_qp_num)) {
444 BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp_num));
445 } else {
446 BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp_num));
447 }
448 }
449 #endif
450 #endif
451
452 OBJ_DESTRUCT(&endpoint->endpoint_lock);
453 /* Clean pending lists */
454 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->pending_lazy_frags);
455 OBJ_DESTRUCT(&endpoint->pending_lazy_frags);
456
457 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->pending_get_frags);
458 OBJ_DESTRUCT(&endpoint->pending_get_frags);
459
460 MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->pending_put_frags);
461 OBJ_DESTRUCT(&endpoint->pending_put_frags);
462 }
463
464
465 /*
466 * Called when the connect module has created all the qp's on an
467 * endpoint and needs to have some receive buffers posted.
468 */
mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t * endpoint)469 int mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t *endpoint)
470 {
471 int qp;
472
473 for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
474 if (BTL_OPENIB_QP_TYPE_PP(qp)) {
475 mca_btl_openib_endpoint_post_rr_nolock(endpoint, qp);
476 } else {
477 mca_btl_openib_post_srr(endpoint->endpoint_btl, qp);
478 }
479 }
480
481 return OPAL_SUCCESS;
482 }
483
cts_sent(mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * ep,struct mca_btl_base_descriptor_t * des,int status)484 static void cts_sent(mca_btl_base_module_t* btl,
485 struct mca_btl_base_endpoint_t* ep,
486 struct mca_btl_base_descriptor_t* des,
487 int status)
488 {
489 /* Nothing to do/empty function (we can't pass in a NULL pointer
490 for the des_cbfunc) */
491 OPAL_OUTPUT((-1, "CTS send to %s completed",
492 opal_get_proc_hostname(ep->endpoint_proc->proc_opal)));
493 }
494
495 /*
496 * Send CTS control fragment
497 */
mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t * endpoint)498 void mca_btl_openib_endpoint_send_cts(mca_btl_openib_endpoint_t *endpoint)
499 {
500 mca_btl_openib_send_control_frag_t *sc_frag;
501 mca_btl_base_descriptor_t *base_des;
502 mca_btl_openib_frag_t *openib_frag;
503 mca_btl_openib_com_frag_t *com_frag;
504 mca_btl_openib_control_header_t *ctl_hdr;
505
506 OPAL_OUTPUT((-1, "SENDING CTS to %s on qp index %d (QP num %d)",
507 opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
508 mca_btl_openib_component.credits_qp,
509 endpoint->qps[mca_btl_openib_component.credits_qp].qp->lcl_qp->qp_num));
510 sc_frag = alloc_control_frag(endpoint->endpoint_btl);
511 if (OPAL_UNLIKELY(NULL == sc_frag)) {
512 BTL_ERROR(("Failed to allocate control buffer"));
513 mca_btl_openib_endpoint_invoke_error(endpoint);
514 return;
515 }
516
517 /* I dislike using the "to_<foo>()" macros; I prefer using the
518 explicit member fields to ensure I get the types right. Since
519 this is not a performance-criticial part of the code, it's
520 ok. */
521 com_frag = &(sc_frag->super.super);
522 openib_frag = &(com_frag->super);
523 base_des = &(openib_frag->base);
524
525 base_des->des_cbfunc = cts_sent;
526 base_des->des_cbdata = NULL;
527 base_des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
528 base_des->order = mca_btl_openib_component.credits_qp;
529 openib_frag->segment.seg_len = sizeof(mca_btl_openib_control_header_t);
530 com_frag->endpoint = endpoint;
531
532 sc_frag->hdr->tag = MCA_BTL_TAG_IB;
533 sc_frag->hdr->cm_seen = 0;
534 sc_frag->hdr->credits = 0;
535
536 ctl_hdr = (mca_btl_openib_control_header_t*)
537 openib_frag->segment.seg_addr.pval;
538 ctl_hdr->type = MCA_BTL_OPENIB_CONTROL_CTS;
539
540 /* Send the fragment */
541 if (OPAL_SUCCESS != mca_btl_openib_endpoint_post_send(endpoint, sc_frag)) {
542 BTL_ERROR(("Failed to post CTS send"));
543 mca_btl_openib_endpoint_invoke_error(endpoint);
544 }
545 endpoint->endpoint_cts_sent = true;
546 }
547
548 /*
549 * Called when the CPC has established a connection on an endpoint
550 */
mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t * endpoint)551 void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint)
552 {
553 /* If the CPC uses the CTS protocol, then start it up */
554 if (endpoint->endpoint_local_cpc->cbm_uses_cts) {
555 int transport_type_ib_p = 0;
556 /* Post our receives, which will make credit management happy
557 (i.e., rd_credits will be 0) */
558 if (OPAL_SUCCESS != mca_btl_openib_endpoint_post_recvs(endpoint)) {
559 BTL_ERROR(("Failed to post receive buffers"));
560 mca_btl_openib_endpoint_invoke_error(endpoint);
561 return;
562 }
563 endpoint->endpoint_posted_recvs = true;
564
565 /* If this is IB, send the CTS immediately. If this is iWARP,
566 then only send the CTS if this endpoint was the initiator
567 of the connection (the receiver will send its CTS when it
568 receives this side's CTS). Also send the CTS if we already
569 received the peer's CTS (e.g., if this process was slow to
570 call cpc_complete(). */
571 #if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
572 transport_type_ib_p = (IBV_TRANSPORT_IB == endpoint->endpoint_btl->device->ib_dev->transport_type);
573 #endif
574 OPAL_OUTPUT((-1, "cpc_complete to peer %s: is IB %d, initiatior %d, cts received: %d",
575 opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
576 transport_type_ib_p,
577 endpoint->endpoint_initiator,
578 endpoint->endpoint_cts_received));
579 if (transport_type_ib_p ||
580 endpoint->endpoint_initiator ||
581 endpoint->endpoint_cts_received) {
582 mca_btl_openib_endpoint_send_cts(endpoint);
583
584 /* If we've already got the CTS from the other side, then
585 mark us as connected */
586 if (endpoint->endpoint_cts_received) {
587 OPAL_OUTPUT((-1, "cpc_complete to %s -- already got CTS, so marking endpoint as complete",
588 opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
589 mca_btl_openib_endpoint_connected(endpoint);
590 } else {
591 /* the caller hold the lock and expects us to drop it */
592 OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
593 }
594 }
595
596 OPAL_OUTPUT((-1, "cpc_complete to %s -- done",
597 opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal)));
598 return;
599 }
600
601 /* Otherwise, just set the endpoint to "connected" */
602 mca_btl_openib_endpoint_connected(endpoint);
603 }
604
605 /*
606 * called when the connect module has completed setup of an endpoint
607 */
mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t * endpoint)608 void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
609 {
610 opal_list_item_t *frag_item, *ep_item;
611 mca_btl_openib_send_frag_t *frag;
612 mca_btl_openib_endpoint_t *ep;
613 bool master = false;
614
615 opal_output(-1, "Now we are CONNECTED");
616 if (MCA_BTL_XRC_ENABLED) {
617 opal_mutex_lock (&endpoint->ib_addr->addr_lock);
618 if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) {
619 /* We are not xrc master */
620 /* set our qp pointer to master qp */
621 master = false;
622 } else {
623 /* I'm master of XRC */
624 endpoint->ib_addr->status = MCA_BTL_IB_ADDR_CONNECTED;
625 master = true;
626 }
627 }
628
629 /* Run over all qps and load alternative path */
630 if (APM_ENABLED) {
631 int i;
632 if (MCA_BTL_XRC_ENABLED) {
633 if (master) {
634 mca_btl_openib_load_apm(endpoint->ib_addr->qp->lcl_qp, endpoint);
635 }
636 } else {
637 for(i = 0; i < mca_btl_openib_component.num_qps; i++) {
638 mca_btl_openib_load_apm(endpoint->qps[i].qp->lcl_qp, endpoint);
639 }
640 }
641 }
642
643 endpoint->endpoint_state = MCA_BTL_IB_CONNECTED;
644 endpoint->endpoint_btl->device->non_eager_rdma_endpoints++;
645
646 if(MCA_BTL_XRC_ENABLED) {
647 if (master) {
648 while (NULL != (ep_item = opal_list_remove_first(&endpoint->ib_addr->pending_ep))) {
649 ep = (mca_btl_openib_endpoint_t *)ep_item;
650 if (OPAL_SUCCESS !=
651 opal_btl_openib_connect_base_start(endpoint->endpoint_local_cpc, ep)) {
652 BTL_ERROR(("Failed to connect pending endpoint\n"));
653 }
654 }
655 }
656 opal_mutex_unlock (&endpoint->ib_addr->addr_lock);
657 }
658
659
660 /* Process pending packet on the endpoint */
661
662 /* While there are frags in the list, process them */
663 while (NULL != (frag_item = opal_list_remove_first(&(endpoint->pending_lazy_frags)))) {
664 frag = to_send_frag(frag_item);
665 /* We need to post this one */
666
667 if (OPAL_ERROR == mca_btl_openib_endpoint_post_send(endpoint, frag)) {
668 BTL_ERROR(("Error posting send"));
669 }
670 }
671 OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
672
673 /* if upper layer called put or get before connection moved to connected
674 * state then we restart them here */
675 mca_btl_openib_frag_progress_pending_put_get(endpoint,
676 mca_btl_openib_component.rdma_qp);
677 }
678
679 /*
680 * Attempt to send a fragment using a given endpoint. If the endpoint is not
681 * connected, queue the fragment and start the connection as required.
682 */
mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t * ep,mca_btl_openib_send_frag_t * frag)683 int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* ep,
684 mca_btl_openib_send_frag_t* frag)
685 {
686 int rc;
687
688 OPAL_THREAD_LOCK(&ep->endpoint_lock);
689 rc = check_endpoint_state(ep, &to_base_frag(frag)->base,
690 &ep->pending_lazy_frags);
691
692 if(OPAL_LIKELY(OPAL_SUCCESS == rc)) {
693 rc = mca_btl_openib_endpoint_post_send(ep, frag);
694 }
695 OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
696 if (OPAL_UNLIKELY(OPAL_ERR_RESOURCE_BUSY == rc)) {
697 rc = OPAL_SUCCESS;
698 }
699
700 return rc;
701 }
702
703 /**
704 * Return control fragment.
705 */
706
mca_btl_openib_endpoint_credits(mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * ep,struct mca_btl_base_descriptor_t * des,int status)707 static void mca_btl_openib_endpoint_credits(
708 mca_btl_base_module_t* btl,
709 struct mca_btl_base_endpoint_t* ep,
710 struct mca_btl_base_descriptor_t* des,
711 int status)
712 {
713
714 int qp;
715
716 mca_btl_openib_send_control_frag_t *frag = to_send_control_frag(des);
717
718 qp = frag->qp_idx;
719
720 /* we don't acquire a WQE for credit message - so decrement.
721 * Note: doing it for QP used for credit management */
722 (void) qp_get_wqe(ep, des->order);
723
724 if(check_send_credits(ep, qp) || check_eager_rdma_credits(ep))
725 mca_btl_openib_endpoint_send_credits(ep, qp);
726 else {
727 BTL_OPENIB_CREDITS_SEND_UNLOCK(ep, qp);
728 /* check one more time if credits are available after unlock */
729 send_credits(ep, qp);
730 }
731 }
732
733 /**
734 * Return credits to peer
735 */
736
mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t * endpoint,const int qp)737 void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
738 const int qp)
739 {
740 mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
741 mca_btl_openib_send_control_frag_t* frag;
742 mca_btl_openib_rdma_credits_header_t *credits_hdr;
743 int rc;
744 bool do_rdma = false;
745 int32_t cm_return;
746
747 frag = endpoint->qps[qp].credit_frag;
748
749 if(OPAL_UNLIKELY(NULL == frag)) {
750 frag = alloc_control_frag(openib_btl);
751 frag->qp_idx = qp;
752 endpoint->qps[qp].credit_frag = frag;
753 /* set those once and forever */
754 to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp;
755 to_base_frag(frag)->base.des_cbfunc = mca_btl_openib_endpoint_credits;
756 to_base_frag(frag)->base.des_cbdata = NULL;
757 to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;;
758 to_com_frag(frag)->endpoint = endpoint;
759 frag->hdr->tag = MCA_BTL_TAG_IB;
760 to_base_frag(frag)->segment.seg_len =
761 sizeof(mca_btl_openib_rdma_credits_header_t);
762 }
763
764 assert(frag->qp_idx == qp);
765 credits_hdr = (mca_btl_openib_rdma_credits_header_t*)
766 to_base_frag(frag)->segment.seg_addr.pval;
767 if(OPAL_SUCCESS == acquire_eager_rdma_send_credit(endpoint)) {
768 do_rdma = true;
769 } else {
770 if(OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, 1) >
771 (mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv - 1)) {
772 OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
773 BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
774 return;
775 }
776 }
777
778 BTL_OPENIB_GET_CREDITS(endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits);
779
780 frag->hdr->cm_seen = 0;
781 BTL_OPENIB_GET_CREDITS(endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
782 if(cm_return > 255) {
783 frag->hdr->cm_seen = 255;
784 cm_return -= 255;
785 OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
786 } else {
787 frag->hdr->cm_seen = cm_return;
788 }
789
790 BTL_OPENIB_GET_CREDITS(endpoint->eager_rdma_local.credits, credits_hdr->rdma_credits);
791 credits_hdr->qpn = qp;
792 credits_hdr->control.type = MCA_BTL_OPENIB_CONTROL_CREDITS;
793
794 if(endpoint->nbo)
795 BTL_OPENIB_RDMA_CREDITS_HEADER_HTON(*credits_hdr);
796
797 qp_reset_signal_count(endpoint, qp);
798 if((rc = post_send(endpoint, frag, do_rdma, 1)) == 0)
799 return;
800
801 if(endpoint->nbo) {
802 BTL_OPENIB_HEADER_NTOH(*frag->hdr);
803 BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr);
804 }
805 BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
806 OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.rd_credits,
807 frag->hdr->credits);
808 OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_local.credits,
809 credits_hdr->rdma_credits);
810 if(do_rdma)
811 OPAL_THREAD_ADD_FETCH32(&endpoint->eager_rdma_remote.tokens, 1);
812 else
813 OPAL_THREAD_ADD_FETCH32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
814
815 BTL_ERROR(("error posting send request errno %d says %s", rc,
816 strerror(errno)));
817 }
818
819 /* local callback function for completion of eager rdma connect */
mca_btl_openib_endpoint_eager_rdma_connect_cb(mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * endpoint,struct mca_btl_base_descriptor_t * descriptor,int status)820 static void mca_btl_openib_endpoint_eager_rdma_connect_cb(
821 mca_btl_base_module_t* btl,
822 struct mca_btl_base_endpoint_t* endpoint,
823 struct mca_btl_base_descriptor_t* descriptor,
824 int status)
825 {
826 mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
827 OPAL_THREAD_ADD_FETCH32(&device->non_eager_rdma_endpoints, -1);
828 assert(device->non_eager_rdma_endpoints >= 0);
829 MCA_BTL_IB_FRAG_RETURN(descriptor);
830 }
831
832 /* send the eager rdma connect message to the remote endpoint */
mca_btl_openib_endpoint_send_eager_rdma(mca_btl_base_endpoint_t * endpoint)833 static int mca_btl_openib_endpoint_send_eager_rdma(
834 mca_btl_base_endpoint_t* endpoint)
835 {
836 mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
837 mca_btl_openib_eager_rdma_header_t *rdma_hdr;
838 mca_btl_openib_send_control_frag_t* frag;
839 int rc;
840
841 frag = alloc_control_frag(openib_btl);
842 if(NULL == frag) {
843 return -1;
844 }
845
846 to_base_frag(frag)->base.des_cbfunc =
847 mca_btl_openib_endpoint_eager_rdma_connect_cb;
848 to_base_frag(frag)->base.des_cbdata = NULL;
849 to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
850 to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp;
851 to_base_frag(frag)->segment.seg_len =
852 sizeof(mca_btl_openib_eager_rdma_header_t);
853 to_com_frag(frag)->endpoint = endpoint;
854
855 frag->hdr->tag = MCA_BTL_TAG_IB;
856 rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)to_base_frag(frag)->segment.seg_addr.pval;
857 rdma_hdr->control.type = MCA_BTL_OPENIB_CONTROL_RDMA;
858 rdma_hdr->rkey = endpoint->eager_rdma_local.reg->mr->rkey;
859 rdma_hdr->rdma_start.lval = opal_ptr_ptol(endpoint->eager_rdma_local.base.pval);
860 BTL_VERBOSE(("sending rkey %" PRIu32 ", rdma_start.lval %" PRIx64
861 ", pval %p, ival %" PRIu32 " type %d and sizeof(rdma_hdr) %d\n",
862 rdma_hdr->rkey,
863 rdma_hdr->rdma_start.lval,
864 rdma_hdr->rdma_start.pval,
865 rdma_hdr->rdma_start.ival,
866 rdma_hdr->control.type,
867 (int) sizeof(mca_btl_openib_eager_rdma_header_t)
868 ));
869
870 if(endpoint->nbo) {
871 BTL_OPENIB_EAGER_RDMA_CONTROL_HEADER_HTON((*rdma_hdr));
872
873 BTL_VERBOSE(("after HTON: sending rkey %" PRIu32 ", rdma_start.lval %" PRIx64 ", pval %p, ival %" PRIu32 "\n",
874 rdma_hdr->rkey,
875 rdma_hdr->rdma_start.lval,
876 rdma_hdr->rdma_start.pval,
877 rdma_hdr->rdma_start.ival
878 ));
879 }
880 rc = mca_btl_openib_endpoint_send(endpoint, frag);
881 if (OPAL_SUCCESS == rc || OPAL_ERR_RESOURCE_BUSY == rc)
882 return OPAL_SUCCESS;
883
884 MCA_BTL_IB_FRAG_RETURN(frag);
885 BTL_ERROR(("Error sending RDMA buffer: %s", strerror(errno)));
886 return rc;
887 }
888
889 /* Setup eager RDMA buffers and notify the remote endpoint*/
mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t * endpoint)890 void mca_btl_openib_endpoint_connect_eager_rdma(
891 mca_btl_openib_endpoint_t* endpoint)
892 {
893 mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
894 char *buf, *alloc_base;
895 mca_btl_openib_recv_frag_t *headers_buf;
896 int i, rc;
897 uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS;
898 void *_tmp_ptr = NULL;
899
900 /* Set local rdma pointer to 1 temporarily so other threads will not try
901 * to enter the function */
902 if(!opal_atomic_compare_exchange_strong_ptr (&endpoint->eager_rdma_local.base.pval, (void *) &_tmp_ptr,
903 (void *) 1)) {
904 return;
905 }
906
907 headers_buf = (mca_btl_openib_recv_frag_t*)
908 malloc(sizeof(mca_btl_openib_recv_frag_t) *
909 mca_btl_openib_component.eager_rdma_num);
910
911 if(NULL == headers_buf)
912 goto unlock_rdma_local;
913
914 #if HAVE_DECL_IBV_ACCESS_SO
915 /* Solaris implements the Relaxed Ordering feature defined in the
916 PCI Specification. With this in mind any memory region which
917 relies on a buffer being written in a specific order, for
918 example the eager rdma connections created in this routinue,
919 must set a strong order flag when registering the memory for
920 rdma operations.
921
922 The following flag will be interpreted and the appropriate
923 steps will be taken when the memory is registered in
924 openib_reg_mr(). */
925 flag |= MCA_RCACHE_FLAGS_SO_MEM;
926 #endif
927
928 alloc_base = buf = (char *) openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool,
929 openib_btl->eager_rdma_frag_size *
930 mca_btl_openib_component.eager_rdma_num,
931 mca_btl_openib_component.buffer_alignment,
932 0);
933
934 if(!buf)
935 goto free_headers_buf;
936
937 rc = openib_btl->device->rcache->rcache_register (openib_btl->device->rcache, buf, openib_btl->eager_rdma_frag_size *
938 mca_btl_openib_component.eager_rdma_num, flag, MCA_RCACHE_ACCESS_ANY,
939 (mca_rcache_base_registration_t**)&endpoint->eager_rdma_local.reg);
940 if (OPAL_SUCCESS != rc) {
941 openib_btl->super.btl_mpool->mpool_free (openib_btl->super.btl_mpool, alloc_base);
942 goto free_headers_buf;
943 }
944
945 buf = buf + openib_btl->eager_rdma_frag_size -
946 sizeof(mca_btl_openib_footer_t) - openib_btl->super.btl_eager_limit -
947 sizeof(mca_btl_openib_header_t);
948
949 for(i = 0; i < mca_btl_openib_component.eager_rdma_num; i++) {
950 opal_free_list_item_t *item;
951 mca_btl_openib_recv_frag_t * frag;
952 mca_btl_openib_frag_init_data_t init_data;
953
954 item = (opal_free_list_item_t*)&headers_buf[i];
955 item->registration = (mca_rcache_base_registration_t *)endpoint->eager_rdma_local.reg;
956 item->ptr = buf + i * openib_btl->eager_rdma_frag_size;
957 OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_t);
958
959 init_data.order = mca_btl_openib_component.credits_qp;
960 init_data.list = NULL;
961
962 mca_btl_openib_frag_init(item, &init_data);
963 frag = to_recv_frag(item);
964 to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA;
965 to_com_frag(frag)->endpoint = endpoint;
966 frag->ftr = (mca_btl_openib_footer_t*)
967 ((char*)to_base_frag(frag)->segment.seg_addr.pval +
968 mca_btl_openib_component.eager_limit);
969
970 MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr);
971 }
972
973 endpoint->eager_rdma_local.frags = headers_buf;
974
975 endpoint->eager_rdma_local.rd_win =
976 mca_btl_openib_component.eager_rdma_num >> 2;
977 endpoint->eager_rdma_local.rd_win =
978 endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1;
979
980 /* set local rdma pointer to real value */
981 endpoint->eager_rdma_local.base.pval = buf;
982 endpoint->eager_rdma_local.alloc_base = alloc_base;
983
984 if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) {
985 mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
986 mca_btl_openib_endpoint_t **p;
987 void *_tmp_ptr;
988 OBJ_RETAIN(endpoint);
989 assert(((opal_object_t*)endpoint)->obj_reference_count == 2);
990 do {
991 _tmp_ptr = NULL;
992 p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count];
993 } while(!opal_atomic_compare_exchange_strong_ptr (p, (void *) &_tmp_ptr, endpoint));
994
995 OPAL_THREAD_ADD_FETCH32(&openib_btl->eager_rdma_channels, 1);
996 /* from this point progress function starts to poll new buffer */
997 OPAL_THREAD_ADD_FETCH32(&device->eager_rdma_buffers_count, 1);
998 return;
999 }
1000
1001 openib_btl->device->rcache->rcache_deregister (openib_btl->device->rcache,
1002 (mca_rcache_base_registration_t*)endpoint->eager_rdma_local.reg);
1003 openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool, buf);
1004 free_headers_buf:
1005 free(headers_buf);
1006 unlock_rdma_local:
1007 /* set local rdma pointer back to zero. Will retry later */
1008 endpoint->eager_rdma_local.base.pval = NULL;
1009 endpoint->eager_rdma_local.frags = NULL;
1010 }
1011
1012 /*
1013 * Invoke an error on the btl associated with an endpoint. If we
1014 * don't have an endpoint, then just use the first one on the
1015 * component list of BTLs.
1016 */
mca_btl_openib_endpoint_invoke_error(void * context)1017 void *mca_btl_openib_endpoint_invoke_error(void *context)
1018 {
1019 mca_btl_openib_endpoint_t *endpoint = (mca_btl_openib_endpoint_t*) context;
1020 mca_btl_openib_module_t *btl = NULL;
1021
1022 if (NULL == endpoint) {
1023 int i;
1024 for (i = 0; i < mca_btl_openib_component.ib_num_btls; ++i) {
1025 if (NULL != mca_btl_openib_component.openib_btls[i] &&
1026 NULL != mca_btl_openib_component.openib_btls[i]->error_cb) {
1027 btl = mca_btl_openib_component.openib_btls[i];
1028 break;
1029 }
1030 }
1031 } else {
1032 btl = endpoint->endpoint_btl;
1033 }
1034
1035 /* If we didn't find a BTL, then just bail :-( */
1036 if (NULL == btl || NULL == btl->error_cb) {
1037 opal_show_help("help-mpi-btl-openib.txt",
1038 "cannot raise btl error", true,
1039 opal_process_info.nodename,
1040 __FILE__, __LINE__);
1041 exit(1);
1042 }
1043
1044 /* Invoke the callback to the upper layer */
1045 btl->error_cb(&(btl->super), MCA_BTL_ERROR_FLAGS_FATAL, NULL, NULL);
1046
1047 /* Will likely never get here */
1048 return NULL;
1049 }
1050