1 /*
2 * Copyright (c) 2015-2017 Cray Inc. All rights reserved.
3 * Copyright (c) 2015-2018 Los Alamos National Security, LLC.
4 * All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35 /*
36 * code for managing VC's
37 */
38
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42
43 #include "gnix.h"
44 #include "gnix_vc.h"
45 #include "gnix_util.h"
46 #include "gnix_datagram.h"
47 #include "gnix_cm_nic.h"
48 #include "gnix_nic.h"
49 #include "gnix_ep.h"
50 #include "gnix_mbox_allocator.h"
51 #include "gnix_hashtable.h"
52 #include "gnix_av.h"
53 #include "gnix_trigger.h"
54 #include "gnix_vector.h"
55 #include "gnix_xpmem.h"
56 #include "gnix_cq.h"
57
58 /*
59 * forward declarations and local struct defs.
60 */
61
62 struct wq_hndl_conn_req {
63 gni_smsg_attr_t src_smsg_attr;
64 int src_vc_id;
65 struct gnix_vc *vc;
66 uint64_t src_vc_ptr;
67 gni_mem_handle_t irq_mem_hndl;
68 xpmem_segid_t peer_segid;
69 };
70
71 static int __gnix_vc_conn_ack_prog_fn(void *data, int *complete_ptr);
72 static int __gnix_vc_conn_ack_comp_fn(void *data);
73 static int __gnix_vc_push_tx_reqs(struct gnix_vc *vc);
74
75 static int __gnix_vc_work_schedule(struct gnix_vc *vc);
76 static int _gnix_vc_sched_new_conn(struct gnix_vc *vc);
77
78 /*******************************************************************************
79 * Helper functions
80 ******************************************************************************/
81
82 /**
83 * Set key to the given gnix_addr.
84 *
85 * NOTE: If struct gnix_address is ever bit packed or packed by
86 * the compiler this assignment may not set key to the correct
87 * bytes.
88 */
__gnix_vc_set_ht_key(void * gnix_addr,gnix_ht_key_t * key)89 static inline void __gnix_vc_set_ht_key(void *gnix_addr,
90 gnix_ht_key_t *key)
91 {
92 *key = *((gnix_ht_key_t *)gnix_addr);
93 }
94
_gnix_ep_vc_lookup(struct gnix_fid_ep * ep,uint64_t key)95 static struct gnix_vc *_gnix_ep_vc_lookup(struct gnix_fid_ep *ep, uint64_t key)
96 {
97 struct gnix_vc *vc = NULL;
98 int ret;
99 int i;
100
101 assert(ep->av);
102
103
104 for (i = 0; i < GNIX_ADDR_CACHE_SIZE; i++)
105 {
106 if (ep->addr_cache[i].addr == key && ep->addr_cache[i].vc != NULL)
107 return ep->addr_cache[i].vc;
108 }
109
110 if (ep->av->type == FI_AV_TABLE) {
111 ret = _gnix_vec_at(ep->vc_table, (void **)&vc, key);
112 if (ret != FI_SUCCESS) {
113 vc = NULL;
114 }
115 } else {
116 vc = (struct gnix_vc *)_gnix_ht_lookup(ep->vc_ht, key);
117 }
118
119 if (vc) {
120 ep->addr_cache[ep->last_cached].addr = key;
121 ep->addr_cache[ep->last_cached].vc = vc;
122 ep->last_cached = (ep->last_cached + 1) % 5;
123 }
124
125 return vc;
126 }
127
_gnix_ep_vc_store(struct gnix_fid_ep * ep,struct gnix_vc * vc,uint64_t key)128 static int _gnix_ep_vc_store(struct gnix_fid_ep *ep, struct gnix_vc *vc,
129 uint64_t key)
130 {
131 int ret;
132
133 assert(ep->av);
134
135 if (ep->av->type == FI_AV_TABLE) {
136 ret = _gnix_vec_insert_at(ep->vc_table, (void *)vc, key);
137 } else {
138 ret = _gnix_ht_insert(ep->vc_ht, key, vc);
139 }
140
141 return ret;
142 }
143
__gnix_vc_gnix_addr_equal(struct dlist_entry * item,const void * arg)144 static int __gnix_vc_gnix_addr_equal(struct dlist_entry *item, const void *arg)
145 {
146 struct gnix_vc *vc = dlist_entry(item, struct gnix_vc, list);
147
148 return GNIX_ADDR_EQUAL(vc->peer_addr, *(struct gnix_address *)arg);
149 }
150
151 /* Find an unmapped VC that matches 'dest_addr' and map it into the EP's VC
152 * look up table.
153 *
154 * Note: EP must be locked. */
__gnix_vc_lookup_unmapped(struct gnix_fid_ep * ep,fi_addr_t dest_addr)155 static struct gnix_vc *__gnix_vc_lookup_unmapped(struct gnix_fid_ep *ep,
156 fi_addr_t dest_addr)
157 {
158 struct gnix_av_addr_entry av_entry;
159 struct dlist_entry *entry;
160 struct gnix_vc *vc;
161 int ret;
162
163 /* Determine if the fi_addr now exists in the AV. */
164 ret = _gnix_av_lookup(ep->av, dest_addr, &av_entry);
165 if (ret != FI_SUCCESS) {
166 GNIX_WARN(FI_LOG_EP_DATA,
167 "_gnix_av_lookup for addr 0x%lx returned %s\n",
168 dest_addr, fi_strerror(-ret));
169 return NULL;
170 }
171
172 /* Find a pre-existing, unmapped VC that matches the gnix_address
173 * mapped by dest_addr. */
174 entry = dlist_remove_first_match(&ep->unmapped_vcs,
175 __gnix_vc_gnix_addr_equal,
176 (void *)&av_entry.gnix_addr);
177 if (entry) {
178 /* Found a matching, unmapped VC. Map dest_addr to the VC in
179 * the EP's VC look up table. */
180 vc = dlist_entry(entry, struct gnix_vc, list);
181 GNIX_INFO(FI_LOG_EP_CTRL,
182 "Found unmapped VC: %p gnix_addr: 0x%lx fi_addr: 0x%lx\n",
183 vc, vc->peer_addr, vc->peer_fi_addr);
184
185 ret = _gnix_ep_vc_store(ep, vc, dest_addr);
186 if (OFI_UNLIKELY(ret != FI_SUCCESS)) {
187 GNIX_WARN(FI_LOG_EP_DATA,
188 "_gnix_ep_vc_store returned %s\n",
189 fi_strerror(-ret));
190 dlist_insert_tail(&vc->list, &ep->unmapped_vcs);
191 return NULL;
192 }
193
194 return vc;
195 }
196
197 return NULL;
198 }
199
200 /**
201 * Look up the vc by fi_addr_t, if it's found just return it,
202 * otherwise allocate a new vc, insert it into the hashtable,
203 * and vector for FI_AV_TABLE AV type, and start connection setup.
204 *
205 * assumptions: ep is non-null;
206 * dest_addr is valid;
207 * vc_ptr is non-null.
208 *
209 * Note: EP must be locked.
210 */
__gnix_vc_get_vc_by_fi_addr(struct gnix_fid_ep * ep,fi_addr_t dest_addr,struct gnix_vc ** vc_ptr)211 static int __gnix_vc_get_vc_by_fi_addr(struct gnix_fid_ep *ep, fi_addr_t dest_addr,
212 struct gnix_vc **vc_ptr)
213 {
214 struct gnix_fid_av *av;
215 int ret = FI_SUCCESS;
216 struct gnix_av_addr_entry av_entry;
217 struct gnix_vc *vc;
218
219 GNIX_DBG_TRACE(FI_LOG_EP_CTRL, "\n");
220
221 GNIX_DEBUG(FI_LOG_EP_CTRL,
222 "ep->vc_table = %p, ep->vc_table->vector = %p\n",
223 ep->vc_table, ep->vc_table->vector);
224
225 av = ep->av;
226 if (OFI_UNLIKELY(av == NULL)) {
227 GNIX_WARN(FI_LOG_EP_CTRL, "av field NULL for ep %p\n", ep);
228 return -FI_EINVAL;
229 }
230
231 /* Use FI address to lookup in EP VC table. */
232 vc = _gnix_ep_vc_lookup(ep, dest_addr);
233 if (vc) {
234 *vc_ptr = vc;
235 return FI_SUCCESS;
236 }
237
238 /* VC is not mapped yet. We can receive a connection request from a
239 * remote peer before the target EP has bound to an AV or before the
240 * remote peer has had it's address inserted into the target EP's AV.
241 * Those requests will result in a connection as usual, but the VC will
242 * not be mapped into an EP's AV until the EP attempts to send to the
243 * remote peer. Check the 'unmapped VC' list to see if such a VC
244 * exists and map it into the AV here. */
245 vc = __gnix_vc_lookup_unmapped(ep, dest_addr);
246 if (vc) {
247 *vc_ptr = vc;
248 return FI_SUCCESS;
249 }
250
251 /* No VC exists for the peer yet. Look up full AV entry for the
252 * destination address. */
253 ret = _gnix_av_lookup(av, dest_addr, &av_entry);
254 if (ret != FI_SUCCESS) {
255 GNIX_WARN(FI_LOG_EP_DATA,
256 "_gnix_av_lookup for addr 0x%llx returned %s \n",
257 dest_addr, fi_strerror(-ret));
258 goto err_w_lock;
259 }
260
261 /* Allocate new VC with AV entry. */
262 ret = _gnix_vc_alloc(ep, &av_entry, &vc);
263 if (ret != FI_SUCCESS) {
264 GNIX_WARN(FI_LOG_EP_DATA,
265 "_gnix_vc_alloc returned %s\n",
266 fi_strerror(-ret));
267 goto err_w_lock;
268 }
269
270 /* Map new VC through the EP connection table. */
271 ret = _gnix_ep_vc_store(ep, vc, dest_addr);
272 if (OFI_UNLIKELY(ret != FI_SUCCESS)) {
273 GNIX_WARN(FI_LOG_EP_DATA,
274 "_gnix_ep_vc_store returned %s\n",
275 fi_strerror(-ret));
276 goto err_w_lock;
277 }
278
279 /* Initiate new VC connection. */
280 ret = _gnix_vc_connect(vc);
281 if (ret != FI_SUCCESS) {
282 GNIX_WARN(FI_LOG_EP_DATA,
283 "_gnix_vc_connect returned %s\n",
284 fi_strerror(-ret));
285 goto err_w_lock;
286 }
287
288 *vc_ptr = vc;
289 return ret;
290
291 err_w_lock:
292 if (vc != NULL)
293 _gnix_vc_destroy(vc);
294 return ret;
295 }
296
297 /*******************************************************************************
298 * connection request /response message pack/unpack functions
299 ******************************************************************************/
300
301 /*
302 * pack a connection request. Contents:
303 * - target_addr (the addr of the targeted EP for the conn req)
304 * - src_addr (the address of the EP originating the conn req)
305 * - src_vc_id (the vc id the mbox the originating EP allocated to
306 * build this connection)
307 * - src_vc_vaddr (virt. address of the vc struct allocated at the originating
308 * EP to build this connection)
309 * - src_smsg_attr (smsg attributes of the mbox allocated at the
310 * originating EP for this connection)
311 * - src_irq_cq_mhdl (GNI memory handle for irq cq for originating EP)
312 */
__gnix_vc_pack_conn_req(char * sbuf,struct gnix_address * target_addr,struct gnix_address * src_addr,int src_vc_id,uint64_t src_vc_vaddr,gni_smsg_attr_t * src_smsg_attr,gni_mem_handle_t * src_irq_cq_mhdl,uint64_t caps,xpmem_segid_t my_segid,uint8_t name_type,uint8_t rx_ctx_cnt,uint32_t key_offset)313 static void __gnix_vc_pack_conn_req(char *sbuf,
314 struct gnix_address *target_addr,
315 struct gnix_address *src_addr,
316 int src_vc_id,
317 uint64_t src_vc_vaddr,
318 gni_smsg_attr_t *src_smsg_attr,
319 gni_mem_handle_t *src_irq_cq_mhdl,
320 uint64_t caps,
321 xpmem_segid_t my_segid,
322 uint8_t name_type,
323 uint8_t rx_ctx_cnt,
324 uint32_t key_offset)
325 {
326 size_t __attribute__((unused)) len;
327 char *cptr = sbuf;
328 uint8_t rtype = GNIX_VC_CONN_REQ;
329
330 /*
331 * sanity checks
332 */
333
334 assert(sbuf != NULL);
335
336 len = sizeof(rtype) +
337 sizeof(struct gnix_address) * 2 +
338 sizeof(int) +
339 sizeof(uint64_t) * 2 +
340 sizeof(gni_smsg_attr_t) +
341 sizeof(gni_mem_handle_t) +
342 sizeof(xpmem_segid_t) +
343 sizeof(name_type) +
344 sizeof(rx_ctx_cnt) +
345 sizeof(key_offset);
346
347 assert(len <= GNIX_CM_NIC_MAX_MSG_SIZE);
348
349 memcpy(cptr, &rtype, sizeof(rtype));
350 cptr += sizeof(rtype);
351 memcpy(cptr, target_addr, sizeof(struct gnix_address));
352 cptr += sizeof(struct gnix_address);
353 memcpy(cptr, src_addr, sizeof(struct gnix_address));
354 cptr += sizeof(struct gnix_address);
355 memcpy(cptr, &src_vc_id, sizeof(int));
356 cptr += sizeof(int);
357 memcpy(cptr, &src_vc_vaddr, sizeof(uint64_t));
358 cptr += sizeof(uint64_t);
359 memcpy(cptr, src_smsg_attr, sizeof(gni_smsg_attr_t));
360 cptr += sizeof(gni_smsg_attr_t);
361 memcpy(cptr, src_irq_cq_mhdl, sizeof(gni_mem_handle_t));
362 cptr += sizeof(gni_mem_handle_t);
363 memcpy(cptr, &caps, sizeof(uint64_t));
364 cptr += sizeof(uint64_t);
365 memcpy(cptr, &my_segid, sizeof(xpmem_segid_t));
366 cptr += sizeof(xpmem_segid_t);
367 memcpy(cptr, &name_type, sizeof(name_type));
368 cptr += sizeof(name_type);
369 memcpy(cptr, &rx_ctx_cnt, sizeof(rx_ctx_cnt));
370 cptr += sizeof(rx_ctx_cnt);
371 memcpy(cptr, &key_offset, sizeof(key_offset));
372 }
373
374 /*
375 * unpack a connection request message
376 */
__gnix_vc_unpack_conn_req(char * rbuf,struct gnix_address * target_addr,struct gnix_address * src_addr,int * src_vc_id,uint64_t * src_vc_vaddr,gni_smsg_attr_t * src_smsg_attr,gni_mem_handle_t * src_irq_cq_mhndl,uint64_t * caps,xpmem_segid_t * peer_segid,uint8_t * name_type,uint8_t * rx_ctx_cnt,uint32_t * key_offset)377 static void __gnix_vc_unpack_conn_req(char *rbuf,
378 struct gnix_address *target_addr,
379 struct gnix_address *src_addr,
380 int *src_vc_id,
381 uint64_t *src_vc_vaddr,
382 gni_smsg_attr_t *src_smsg_attr,
383 gni_mem_handle_t *src_irq_cq_mhndl,
384 uint64_t *caps,
385 xpmem_segid_t *peer_segid,
386 uint8_t *name_type,
387 uint8_t *rx_ctx_cnt,
388 uint32_t *key_offset)
389 {
390 size_t __attribute__((unused)) len;
391 char *cptr = rbuf;
392
393 /*
394 * sanity checks
395 */
396
397 assert(rbuf);
398
399 cptr += sizeof(uint8_t);
400 memcpy(target_addr, cptr, sizeof(struct gnix_address));
401 cptr += sizeof(struct gnix_address);
402 memcpy(src_addr, cptr, sizeof(struct gnix_address));
403 cptr += sizeof(struct gnix_address);
404 memcpy(src_vc_id, cptr, sizeof(int));
405 cptr += sizeof(int);
406 memcpy(src_vc_vaddr, cptr, sizeof(uint64_t));
407 cptr += sizeof(uint64_t);
408 memcpy(src_smsg_attr, cptr, sizeof(gni_smsg_attr_t));
409 cptr += sizeof(gni_smsg_attr_t);
410 memcpy(src_irq_cq_mhndl, cptr, sizeof(gni_mem_handle_t));
411 cptr += sizeof(gni_mem_handle_t);
412 memcpy(caps, cptr, sizeof(uint64_t));
413 cptr += sizeof(uint64_t);
414 memcpy(peer_segid, cptr, sizeof(xpmem_segid_t));
415 cptr += sizeof(xpmem_segid_t);
416 memcpy(name_type, cptr, sizeof(*name_type));
417 cptr += sizeof(*name_type);
418 memcpy(rx_ctx_cnt, cptr, sizeof(*rx_ctx_cnt));
419 cptr += sizeof(*rx_ctx_cnt);
420 memcpy(key_offset, cptr, sizeof(*key_offset));
421 }
422
423 /*
424 * pack a connection response. Contents:
425 * - src_vc_vaddr (vaddr of the vc struct allocated at the originating
426 * EP to build this connection)
427 * - resp_vc_id (the vc id of the mbox the responding EP allocated to
428 * build this connection)
429 * - resp_smsg_attr (smsg attributes of the mbox allocated at the
430 * responding EP for this connection)
431 * - resp_irq_cq_mhndl (GNI memhndl for irq cq of responding EP)
432 */
433
__gnix_vc_pack_conn_resp(char * sbuf,uint64_t src_vc_vaddr,uint64_t resp_vc_vaddr,int resp_vc_id,gni_smsg_attr_t * resp_smsg_attr,gni_mem_handle_t * resp_irq_cq_mhndl,uint64_t caps,xpmem_segid_t my_segid,uint32_t key_offset)434 static void __gnix_vc_pack_conn_resp(char *sbuf,
435 uint64_t src_vc_vaddr,
436 uint64_t resp_vc_vaddr,
437 int resp_vc_id,
438 gni_smsg_attr_t *resp_smsg_attr,
439 gni_mem_handle_t *resp_irq_cq_mhndl,
440 uint64_t caps,
441 xpmem_segid_t my_segid,
442 uint32_t key_offset)
443 {
444 size_t __attribute__((unused)) len;
445 char *cptr = sbuf;
446 uint8_t rtype = GNIX_VC_CONN_RESP;
447
448 /*
449 * sanity checks
450 */
451
452 assert(sbuf != NULL);
453
454 len = sizeof(rtype) +
455 sizeof(uint64_t) * 3 +
456 sizeof(int) +
457 sizeof(gni_smsg_attr_t) +
458 sizeof(gni_mem_handle_t) +
459 sizeof(xpmem_segid_t) +
460 sizeof(uint32_t);
461 assert(len <= GNIX_CM_NIC_MAX_MSG_SIZE);
462
463 memcpy(cptr, &rtype, sizeof(rtype));
464 cptr += sizeof(rtype);
465 memcpy(cptr, &src_vc_vaddr, sizeof(uint64_t));
466 cptr += sizeof(uint64_t);
467 memcpy(cptr, &resp_vc_vaddr, sizeof(uint64_t));
468 cptr += sizeof(uint64_t);
469 memcpy(cptr, &resp_vc_id, sizeof(int));
470 cptr += sizeof(int);
471 memcpy(cptr, resp_smsg_attr, sizeof(gni_smsg_attr_t));
472 cptr += sizeof(gni_smsg_attr_t);
473 memcpy(cptr, resp_irq_cq_mhndl, sizeof(gni_mem_handle_t));
474 cptr += sizeof(gni_mem_handle_t);
475 memcpy(cptr, &caps, sizeof(uint64_t));
476 cptr += sizeof(uint64_t);
477 memcpy(cptr, &my_segid, sizeof(xpmem_segid_t));
478 cptr += sizeof(xpmem_segid_t);
479 memcpy(cptr, &key_offset, sizeof(uint32_t));
480 }
481
482 /*
483 * unpack a connection request response
484 */
__gnix_vc_unpack_resp(char * rbuf,uint64_t * src_vc_vaddr,uint64_t * resp_vc_vaddr,int * resp_vc_id,gni_smsg_attr_t * resp_smsg_attr,gni_mem_handle_t * resp_irq_cq_mhndl,uint64_t * caps,xpmem_segid_t * peer_segid,uint32_t * key_offset)485 static void __gnix_vc_unpack_resp(char *rbuf,
486 uint64_t *src_vc_vaddr,
487 uint64_t *resp_vc_vaddr,
488 int *resp_vc_id,
489 gni_smsg_attr_t *resp_smsg_attr,
490 gni_mem_handle_t *resp_irq_cq_mhndl,
491 uint64_t *caps,
492 xpmem_segid_t *peer_segid,
493 uint32_t *key_offset)
494 {
495 char *cptr = rbuf;
496
497 cptr += sizeof(uint8_t);
498
499 memcpy(src_vc_vaddr, cptr, sizeof(uint64_t));
500 cptr += sizeof(uint64_t);
501 memcpy(resp_vc_vaddr, cptr, sizeof(uint64_t));
502 cptr += sizeof(uint64_t);
503 memcpy(resp_vc_id, cptr, sizeof(int));
504 cptr += sizeof(int);
505 memcpy(resp_smsg_attr, cptr, sizeof(gni_smsg_attr_t));
506 cptr += sizeof(gni_smsg_attr_t);
507 memcpy(resp_irq_cq_mhndl, cptr, sizeof(gni_mem_handle_t));
508 cptr += sizeof(gni_mem_handle_t);
509 memcpy(caps, cptr, sizeof(uint64_t));
510 cptr += sizeof(uint64_t);
511 memcpy(peer_segid, cptr, sizeof(xpmem_segid_t));
512 cptr += sizeof(xpmem_segid_t);
513 memcpy(key_offset, cptr, sizeof(uint32_t));
514 }
515
__gnix_vc_get_msg_type(char * rbuf,uint8_t * rtype)516 static void __gnix_vc_get_msg_type(char *rbuf,
517 uint8_t *rtype)
518 {
519 assert(rtype);
520 memcpy(rtype, rbuf, sizeof(uint8_t));
521 }
522
523 /*
524 * helper function to initialize an SMSG connection, plus
525 * a mem handle to use for delivering IRQs to peer when needed
526 */
_gnix_vc_smsg_init(struct gnix_vc * vc,int peer_id,gni_smsg_attr_t * peer_smsg_attr,gni_mem_handle_t * peer_irq_mem_hndl)527 int _gnix_vc_smsg_init(struct gnix_vc *vc, int peer_id,
528 gni_smsg_attr_t *peer_smsg_attr,
529 gni_mem_handle_t *peer_irq_mem_hndl)
530 {
531 int ret = FI_SUCCESS;
532 struct gnix_fid_ep *ep;
533 struct gnix_fid_domain *dom;
534 struct gnix_mbox *mbox = NULL;
535 gni_smsg_attr_t local_smsg_attr;
536 gni_return_t __attribute__((unused)) status;
537 ssize_t __attribute__((unused)) len;
538
539 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
540
541 assert(vc);
542
543 ep = vc->ep;
544 assert(ep);
545
546 dom = ep->domain;
547 if (dom == NULL)
548 return -FI_EINVAL;
549
550 mbox = vc->smsg_mbox;
551 assert (mbox);
552
553 local_smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
554 local_smsg_attr.msg_buffer = mbox->base;
555 local_smsg_attr.buff_size = vc->ep->nic->mem_per_mbox;
556 local_smsg_attr.mem_hndl = *mbox->memory_handle;
557 local_smsg_attr.mbox_offset = (uint64_t)mbox->offset;
558 local_smsg_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
559 local_smsg_attr.msg_maxsize = dom->params.mbox_msg_maxsize;
560
561 /*
562 * now build the SMSG connection
563 */
564
565 COND_ACQUIRE(ep->nic->requires_lock, &ep->nic->lock);
566
567 status = GNI_EpCreate(ep->nic->gni_nic_hndl,
568 ep->nic->tx_cq,
569 &vc->gni_ep);
570 if (status != GNI_RC_SUCCESS) {
571 GNIX_WARN(FI_LOG_EP_CTRL,
572 "GNI_EpCreate returned %s\n", gni_err_str[status]);
573 ret = gnixu_to_fi_errno(status);
574 goto err;
575 }
576
577 status = GNI_EpBind(vc->gni_ep,
578 vc->peer_addr.device_addr,
579 vc->peer_addr.cdm_id);
580 if (status != GNI_RC_SUCCESS) {
581 GNIX_WARN(FI_LOG_EP_CTRL,
582 "GNI_EpBind returned %s\n", gni_err_str[status]);
583 ret = gnixu_to_fi_errno(status);
584 goto err1;
585 }
586
587 status = GNI_SmsgInit(vc->gni_ep,
588 &local_smsg_attr,
589 peer_smsg_attr);
590 if (status != GNI_RC_SUCCESS) {
591 GNIX_WARN(FI_LOG_EP_CTRL,
592 "GNI_SmsgInit returned %s\n", gni_err_str[status]);
593 ret = gnixu_to_fi_errno(status);
594 goto err1;
595 }
596
597 status = GNI_EpSetEventData(vc->gni_ep,
598 vc->vc_id,
599 peer_id);
600 if (status != GNI_RC_SUCCESS) {
601 GNIX_WARN(FI_LOG_EP_CTRL,
602 "GNI_EpSetEventData returned %s\n",
603 gni_err_str[status]);
604 ret = gnixu_to_fi_errno(status);
605 goto err1;
606 }
607
608 if (peer_irq_mem_hndl != NULL)
609 vc->peer_irq_mem_hndl = *peer_irq_mem_hndl;
610
611 COND_RELEASE(ep->nic->requires_lock, &ep->nic->lock);
612 return ret;
613 err1:
614 GNI_EpDestroy(vc->gni_ep);
615 err:
616 COND_RELEASE(ep->nic->requires_lock, &ep->nic->lock);
617 return ret;
618 }
619
__gnix_vc_connect_to_self(struct gnix_vc * vc)620 static int __gnix_vc_connect_to_self(struct gnix_vc *vc)
621 {
622 int ret = FI_SUCCESS;
623 struct gnix_fid_domain *dom = NULL;
624 struct gnix_fid_ep *ep = NULL;
625 struct gnix_cm_nic *cm_nic = NULL;
626 struct gnix_mbox *mbox = NULL;
627 gni_smsg_attr_t smsg_mbox_attr;
628 xpmem_apid_t peer_apid;
629 xpmem_segid_t my_segid;
630
631 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
632
633 ep = vc->ep;
634 if (ep == NULL)
635 return -FI_EINVAL;
636
637 cm_nic = ep->cm_nic;
638 if (cm_nic == NULL)
639 return -FI_EINVAL;
640
641 dom = ep->domain;
642 if (dom == NULL)
643 return -FI_EINVAL;
644
645 assert(vc->conn_state == GNIX_VC_CONN_NONE);
646 vc->conn_state = GNIX_VC_CONNECTING;
647
648 assert(vc->smsg_mbox == NULL);
649
650 ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, &mbox);
651 if (ret != FI_SUCCESS) {
652 GNIX_WARN(FI_LOG_EP_DATA,
653 "_gnix_mbox_alloc returned %s\n",
654 fi_strerror(-ret));
655 return -FI_ENOSPC;
656 }
657 vc->smsg_mbox = mbox;
658
659 smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
660 smsg_mbox_attr.msg_buffer = mbox->base;
661 smsg_mbox_attr.buff_size = vc->ep->nic->mem_per_mbox;
662 smsg_mbox_attr.mem_hndl = *mbox->memory_handle;
663 smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
664 smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
665 smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize;
666
667 ret = _gnix_vc_smsg_init(vc, vc->vc_id, &smsg_mbox_attr, NULL);
668 if (ret != FI_SUCCESS) {
669 GNIX_WARN(FI_LOG_EP_DATA,
670 "_gnix_vc_smsg_init returned %s\n",
671 fi_strerror(-ret));
672 goto err_mbox_init;
673 }
674
675 /* TODO: use special send-to-self mechanism to avoid overhead of XPMEM
676 * when just sending a message to oneself. */
677 ret = _gnix_xpmem_get_my_segid(ep->xpmem_hndl, &my_segid);
678 if (ret != FI_SUCCESS) {
679 GNIX_WARN(FI_LOG_EP_CTRL,
680 "_gni_xpmem_get_my_segid returned %s\n",
681 fi_strerror(-ret));
682 }
683
684 ret = _gnix_xpmem_get_apid(ep->xpmem_hndl, my_segid, &peer_apid);
685 if (ret == FI_SUCCESS) {
686 vc->modes |= GNIX_VC_MODE_XPMEM;
687 vc->peer_apid = peer_apid;
688 } else {
689 GNIX_WARN(FI_LOG_EP_CTRL,
690 "_gni_xpmem_get_apiid returned %s\n",
691 fi_strerror(-ret));
692 }
693
694 vc->peer_id = vc->vc_id;
695 vc->peer_irq_mem_hndl = ep->nic->irq_mem_hndl;
696 vc->peer_caps = ep->caps;
697 vc->peer_key_offset = ep->auth_key->key_offset;
698 vc->conn_state = GNIX_VC_CONNECTED;
699
700 ret = _gnix_vc_sched_new_conn(vc);
701 if (ret != FI_SUCCESS)
702 GNIX_WARN(FI_LOG_EP_DATA,
703 "_gnix_vc_sched_new_conn returned %s\n",
704 fi_strerror(-ret));
705
706 GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n", vc);
707 return ret;
708
709 err_mbox_init:
710 _gnix_mbox_free(vc->smsg_mbox);
711 vc->smsg_mbox = NULL;
712
713 return ret;
714 }
715
716 /*******************************************************************************
717 * functions for handling incoming connection request/response messages
718 ******************************************************************************/
719
__gnix_vc_hndl_conn_resp(struct gnix_cm_nic * cm_nic,char * msg_buffer,struct gnix_address src_cm_nic_addr)720 static int __gnix_vc_hndl_conn_resp(struct gnix_cm_nic *cm_nic,
721 char *msg_buffer,
722 struct gnix_address src_cm_nic_addr)
723 {
724 int ret = FI_SUCCESS;
725 int peer_id;
726 struct gnix_vc *vc = NULL;
727 uint64_t peer_vc_addr;
728 struct gnix_fid_ep *ep;
729 gni_smsg_attr_t peer_smsg_attr;
730 gni_mem_handle_t tmp_mem_hndl;
731 uint64_t peer_caps;
732 xpmem_segid_t peer_segid;
733 xpmem_apid_t peer_apid;
734 uint32_t peer_key_offset;
735 bool accessible;
736
737 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
738
739 /*
740 * unpack the message
741 */
742
743 __gnix_vc_unpack_resp(msg_buffer,
744 (uint64_t *)&vc,
745 &peer_vc_addr,
746 &peer_id,
747 &peer_smsg_attr,
748 &tmp_mem_hndl,
749 &peer_caps,
750 &peer_segid,
751 &peer_key_offset);
752
753 GNIX_DEBUG(FI_LOG_EP_CTRL,
754 "resp rx: (From Aries 0x%x Id %d src vc %p peer vc addr 0x%lx)\n",
755 src_cm_nic_addr.device_addr,
756 src_cm_nic_addr.cdm_id,
757 vc,
758 peer_vc_addr);
759
760 ep = vc->ep;
761 assert(ep != NULL);
762
763 COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
764
765 /*
766 * at this point vc should be in connecting state
767 */
768 if (vc->conn_state != GNIX_VC_CONNECTING) {
769 GNIX_WARN(FI_LOG_EP_CTRL,
770 "vc %p not in connecting state, rather %d\n",
771 vc, vc->conn_state);
772 ret = -FI_EINVAL;
773 goto err;
774 }
775
776 /*
777 * build the SMSG connection
778 */
779
780 ret = _gnix_vc_smsg_init(vc, peer_id, &peer_smsg_attr,
781 &tmp_mem_hndl);
782 if (ret != FI_SUCCESS) {
783 GNIX_WARN(FI_LOG_EP_CTRL,
784 "_gnix_vc_smsg_init returned %s\n",
785 fi_strerror(-ret));
786 goto err;
787 }
788
789 /*
790 * see if we can do xpmem with this EP
791 */
792
793 ret = _gnix_xpmem_accessible(ep, src_cm_nic_addr, &accessible);
794 if ((ret == FI_SUCCESS) && (accessible == true)) {
795 ret = _gnix_xpmem_get_apid(ep->xpmem_hndl,
796 peer_segid,
797 &peer_apid);
798 if (ret == FI_SUCCESS) {
799 vc->modes |= GNIX_VC_MODE_XPMEM;
800 vc->peer_apid = peer_apid;
801 }
802 }
803
804 /*
805 * transition the VC to connected
806 * put in to the nic's work queue for
807 * further processing
808 */
809
810 vc->peer_caps = peer_caps;
811 vc->peer_key_offset = peer_key_offset;
812 vc->peer_id = peer_id;
813 vc->conn_state = GNIX_VC_CONNECTED;
814 GNIX_DEBUG(FI_LOG_EP_CTRL,
815 " moving vc %p to state connected\n",vc);
816
817 ret = _gnix_vc_sched_new_conn(vc);
818 if (ret != FI_SUCCESS)
819 GNIX_WARN(FI_LOG_EP_DATA,
820 "_gnix_vc_sched_new_conn returned %s\n",
821 fi_strerror(-ret));
822
823 COND_RELEASE(ep->requires_lock, &ep->vc_lock);
824
825 return ret;
826 err:
827 vc->conn_state = GNIX_VC_CONN_ERROR;
828
829 COND_RELEASE(ep->requires_lock, &ep->vc_lock);
830 return ret;
831 }
832
__gnix_vc_hndl_conn_req(struct gnix_cm_nic * cm_nic,char * msg_buffer,struct gnix_address src_cm_nic_addr)833 static int __gnix_vc_hndl_conn_req(struct gnix_cm_nic *cm_nic,
834 char *msg_buffer,
835 struct gnix_address src_cm_nic_addr)
836 {
837 int ret = FI_SUCCESS;
838 gni_return_t __attribute__((unused)) status;
839 struct gnix_fid_ep *ep = NULL;
840 gnix_ht_key_t key;
841 struct gnix_av_addr_entry entry;
842 struct gnix_address src_addr, target_addr;
843 struct gnix_vc *vc = NULL;
844 struct gnix_work_req *work_req;
845 int src_vc_id;
846 gni_smsg_attr_t src_smsg_attr;
847 uint64_t src_vc_ptr;
848 uint64_t peer_caps;
849 struct wq_hndl_conn_req *data = NULL;
850 gni_mem_handle_t tmp_mem_hndl;
851 int src_mapped = 0;
852 fi_addr_t fi_addr;
853 xpmem_segid_t peer_segid;
854 xpmem_apid_t peer_apid;
855 uint8_t name_type, rx_ctx_cnt;
856 bool accessible;
857 ssize_t __attribute__((unused)) len;
858 struct gnix_ep_name *error_data;
859 uint32_t key_offset;
860
861 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
862
863 /*
864 * unpack the message
865 */
866
867 __gnix_vc_unpack_conn_req(msg_buffer,
868 &target_addr,
869 &src_addr,
870 &src_vc_id,
871 &src_vc_ptr,
872 &src_smsg_attr,
873 &tmp_mem_hndl,
874 &peer_caps,
875 &peer_segid,
876 &name_type,
877 &rx_ctx_cnt,
878 &key_offset);
879
880 GNIX_DEBUG(FI_LOG_EP_CTRL,
881 "conn req rx: (From Aries addr 0x%x Id %d to Aries 0x%x Id %d src vc 0x%lx )\n",
882 src_addr.device_addr,
883 src_addr.cdm_id,
884 target_addr.device_addr,
885 target_addr.cdm_id,
886 src_vc_ptr);
887
888 /*
889 * lookup the ep from the addr_to_ep_ht using the target_addr
890 * in the datagram
891 */
892
893 __gnix_vc_set_ht_key(&target_addr, &key);
894
895 ep = (struct gnix_fid_ep *)_gnix_ht_lookup(cm_nic->addr_to_ep_ht,
896 key);
897 if (ep == NULL) {
898 GNIX_WARN(FI_LOG_EP_DATA,
899 "_gnix_ht_lookup addr_to_ep failed\n");
900 return -FI_ENOENT;
901 }
902
903 /*
904 * look to see if there is a VC already for the
905 * address of the connecting EP.
906 */
907
908 COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
909
910 /* If we already have an AV bound, see if sender's address is already
911 * mapped. */
912 if (ep->av) {
913 ret = _gnix_av_reverse_lookup(ep->av, src_addr, &fi_addr);
914 if (ret == FI_SUCCESS) {
915 src_mapped = 1;
916 vc = _gnix_ep_vc_lookup(ep, fi_addr);
917 }
918 }
919
920 /*
921 * if there is no corresponding vc in the hash,
922 * or there is an entry and it's not in connecting state
923 * go down the conn req ack route.
924 */
925 if ((vc == NULL) ||
926 (vc->conn_state == GNIX_VC_CONN_NONE)) {
927 if (vc == NULL) {
928 entry.gnix_addr = src_addr;
929 entry.cm_nic_cdm_id = src_cm_nic_addr.cdm_id;
930 ret = _gnix_vc_alloc(ep,
931 &entry,
932 &vc);
933 if (ret != FI_SUCCESS) {
934 GNIX_WARN(FI_LOG_EP_CTRL,
935 "_gnix_vc_alloc returned %s\n",
936 fi_strerror(-ret));
937 goto err;
938 }
939
940 vc->conn_state = GNIX_VC_CONNECTING;
941 vc->peer_key_offset = key_offset;
942
943 if (src_mapped) {
944 /* We have an AV which maps the incoming
945 * address. Store the new VC in our VC lookup
946 * table. */
947 ret = _gnix_ep_vc_store(ep, vc, fi_addr);
948 if (OFI_UNLIKELY(ret != FI_SUCCESS)) {
949 _gnix_vc_destroy(vc);
950 GNIX_WARN(FI_LOG_EP_DATA,
951 "_gnix_ep_vc_store returned %s\n",
952 fi_strerror(-ret));
953 goto err;
954 }
955 } else {
956 /* We lack an AV and/or the entry to map the
957 * incoming address. Keep VC in special table
958 * until it is mapped for a TX operation. */
959 GNIX_INFO(FI_LOG_EP_CTRL,
960 "Received conn. request from unmapped peer EP, vc: %p addr: 0x%lx\n",
961 vc, src_addr);
962
963 dlist_insert_tail(&vc->list, &ep->unmapped_vcs);
964
965 /*
966 * see issue 4521 for the error_data size allocated
967 */
968 if (vc->ep->caps & FI_SOURCE) {
969 error_data =
970 calloc(1, GNIX_CQ_MAX_ERR_DATA_SIZE);
971 if (error_data == NULL) {
972 ret = -FI_ENOMEM;
973 goto err;
974 }
975 vc->gnix_ep_name = (void *) error_data;
976
977 error_data->gnix_addr = src_addr;
978 error_data->name_type = name_type;
979
980 error_data->cm_nic_cdm_id =
981 cm_nic->my_name.cm_nic_cdm_id;
982 error_data->cookie =
983 cm_nic->my_name.cookie;
984
985 error_data->rx_ctx_cnt = rx_ctx_cnt;
986 }
987 }
988 } else {
989 vc->conn_state = GNIX_VC_CONNECTING;
990 }
991
992 vc->peer_caps = peer_caps;
993 vc->peer_key_offset = key_offset;
994 /*
995 * prepare a work request to
996 * initiate an request response
997 */
998
999 work_req = calloc(1, sizeof(*work_req));
1000 if (work_req == NULL) {
1001 ret = -FI_ENOMEM;
1002 goto err;
1003 }
1004
1005 data = calloc(1, sizeof(struct wq_hndl_conn_req));
1006 if (data == NULL) {
1007 ret = -FI_ENOMEM;
1008 goto err;
1009 }
1010 memcpy(&data->src_smsg_attr,
1011 &src_smsg_attr,
1012 sizeof(src_smsg_attr));
1013 data->vc = vc;
1014 data->src_vc_id = src_vc_id;
1015 data->src_vc_ptr = src_vc_ptr;
1016 data->irq_mem_hndl = tmp_mem_hndl;
1017 data->peer_segid = peer_segid;
1018
1019 work_req->progress_fn = __gnix_vc_conn_ack_prog_fn;
1020 work_req->data = data;
1021 work_req->completer_fn = __gnix_vc_conn_ack_comp_fn;
1022 work_req->completer_data = data;
1023
1024 /*
1025 * add the work request to the tail of the
1026 * cm_nic's work queue, progress the cm_nic.
1027 */
1028
1029 fastlock_acquire(&cm_nic->wq_lock);
1030 dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq);
1031 fastlock_release(&cm_nic->wq_lock);
1032 } else {
1033
1034 /*
1035 * we can only be in connecting state if we
1036 * reach here. We have all the informatinon,
1037 * and the other side will get the information
1038 * at some point, so go ahead and build SMSG connection.
1039 */
1040 if (vc->conn_state != GNIX_VC_CONNECTING) {
1041 GNIX_WARN(FI_LOG_EP_CTRL,
1042 "vc %p not in connecting state nor in cm wq\n",
1043 vc, vc->conn_state);
1044 ret = -FI_EINVAL;
1045 goto err;
1046 }
1047
1048 ret = _gnix_vc_smsg_init(vc, src_vc_id,
1049 &src_smsg_attr,
1050 &tmp_mem_hndl);
1051 if (ret != FI_SUCCESS) {
1052 GNIX_WARN(FI_LOG_EP_CTRL,
1053 "_gnix_vc_smsg_init returned %s\n",
1054 fi_strerror(-ret));
1055 goto err;
1056 }
1057
1058 ret = _gnix_xpmem_accessible(ep, src_cm_nic_addr, &accessible);
1059 if ((ret == FI_SUCCESS) && (accessible == true)) {
1060 ret = _gnix_xpmem_get_apid(ep->xpmem_hndl,
1061 peer_segid,
1062 &peer_apid);
1063 if (ret == FI_SUCCESS) {
1064 vc->modes |= GNIX_VC_MODE_XPMEM;
1065 vc->peer_apid = peer_apid;
1066 }
1067 }
1068
1069 vc->peer_caps = peer_caps;
1070 vc->peer_key_offset = key_offset;
1071 vc->peer_id = src_vc_id;
1072 vc->conn_state = GNIX_VC_CONNECTED;
1073 GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n",
1074 vc);
1075
1076 ret = _gnix_vc_sched_new_conn(vc);
1077 if (ret != FI_SUCCESS)
1078 GNIX_WARN(FI_LOG_EP_DATA,
1079 "_gnix_vc_sched_new_conn returned %s\n",
1080 fi_strerror(-ret));
1081 }
1082
1083 err:
1084 COND_RELEASE(ep->requires_lock, &ep->vc_lock);
1085
1086 return ret;
1087 }
1088
1089 /*
1090 * callback function to process incoming messages
1091 */
__gnix_vc_recv_fn(struct gnix_cm_nic * cm_nic,char * msg_buffer,struct gnix_address src_cm_nic_addr)1092 static int __gnix_vc_recv_fn(struct gnix_cm_nic *cm_nic,
1093 char *msg_buffer,
1094 struct gnix_address src_cm_nic_addr)
1095 {
1096 int ret = FI_SUCCESS;
1097 uint8_t mtype;
1098
1099 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1100
1101 __gnix_vc_get_msg_type(msg_buffer, &mtype);
1102
1103 GNIX_DEBUG(FI_LOG_EP_CTRL, "got a message of type %d\n", mtype);
1104
1105 switch (mtype) {
1106 case GNIX_VC_CONN_REQ:
1107 ret = __gnix_vc_hndl_conn_req(cm_nic,
1108 msg_buffer,
1109 src_cm_nic_addr);
1110 break;
1111 case GNIX_VC_CONN_RESP:
1112 ret = __gnix_vc_hndl_conn_resp(cm_nic,
1113 msg_buffer,
1114 src_cm_nic_addr);
1115 break;
1116 default:
1117 GNIX_FATAL(FI_LOG_EP_CTRL, "Invalid message type: %d\n",
1118 mtype);
1119 }
1120
1121 return ret;
1122 }
1123
1124 /*
1125 * progress function for progressing a connection
1126 * ACK.
1127 */
1128
__gnix_vc_conn_ack_prog_fn(void * data,int * complete_ptr)1129 static int __gnix_vc_conn_ack_prog_fn(void *data, int *complete_ptr)
1130 {
1131 int ret = FI_SUCCESS;
1132 int complete = 0;
1133 struct wq_hndl_conn_req *work_req_data;
1134 struct gnix_vc *vc;
1135 struct gnix_mbox *mbox = NULL;
1136 gni_smsg_attr_t smsg_mbox_attr;
1137 struct gnix_fid_ep *ep = NULL;
1138 struct gnix_fid_domain *dom = NULL;
1139 struct gnix_cm_nic *cm_nic = NULL;
1140 xpmem_segid_t my_segid;
1141 char sbuf[GNIX_CM_NIC_MAX_MSG_SIZE] = {0};
1142 xpmem_apid_t peer_apid;
1143 bool accessible;
1144
1145 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1146
1147
1148 work_req_data = (struct wq_hndl_conn_req *)data;
1149
1150 vc = work_req_data->vc;
1151 if (vc == NULL)
1152 return -FI_EINVAL;
1153
1154 ep = vc->ep;
1155 if (ep == NULL)
1156 return -FI_EINVAL;
1157
1158 dom = ep->domain;
1159 if (dom == NULL)
1160 return -FI_EINVAL;
1161
1162 cm_nic = ep->cm_nic;
1163 if (cm_nic == NULL)
1164 return -FI_EINVAL;
1165
1166 COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
1167
1168 /*
1169 * we may have already been moved to connected or
1170 * the datagram from an earlier conn request for this
1171 * vc was posted to GNI datagram state machine. The
1172 * connection will be completed in the __gnix_vc_hndl_conn_resp
1173 * datagram callback in the latter case.
1174 */
1175 if ((vc->conn_state == GNIX_VC_CONNECTED) ||
1176 (vc->modes & GNIX_VC_MODE_DG_POSTED)) {
1177 complete = 1;
1178 goto exit;
1179 }
1180
1181 /*
1182 * first see if we still need a mailbox
1183 */
1184
1185 if (vc->smsg_mbox == NULL) {
1186 ret = _gnix_mbox_alloc(ep->nic->mbox_hndl,
1187 &mbox);
1188 if (ret == FI_SUCCESS)
1189 vc->smsg_mbox = mbox;
1190 else
1191 goto exit;
1192 }
1193
1194 mbox = vc->smsg_mbox;
1195
1196 /*
1197 * prep the smsg_mbox_attr
1198 */
1199
1200 smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
1201 smsg_mbox_attr.msg_buffer = mbox->base;
1202 smsg_mbox_attr.buff_size = ep->nic->mem_per_mbox;
1203 smsg_mbox_attr.mem_hndl = *mbox->memory_handle;
1204 smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
1205 smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
1206 smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize;
1207
1208 /*
1209 * serialize the resp message in the buffer
1210 */
1211
1212 ret = _gnix_xpmem_get_my_segid(ep->xpmem_hndl,
1213 &my_segid);
1214 if (ret != FI_SUCCESS) {
1215 GNIX_WARN(FI_LOG_EP_CTRL, "_gni_xpmem_get_my_segid returned %s\n",
1216 fi_strerror(-ret));
1217 }
1218
1219 __gnix_vc_pack_conn_resp(sbuf,
1220 work_req_data->src_vc_ptr,
1221 (uint64_t)vc,
1222 vc->vc_id,
1223 &smsg_mbox_attr,
1224 &ep->nic->irq_mem_hndl,
1225 ep->caps,
1226 my_segid,
1227 ep->auth_key->key_offset);
1228
1229 /*
1230 * try to send the message, if it succeeds,
1231 * initialize mailbox and move vc to connected
1232 * state.
1233 */
1234
1235 ret = _gnix_cm_nic_send(cm_nic,
1236 sbuf,
1237 GNIX_CM_NIC_MAX_MSG_SIZE,
1238 vc->peer_cm_nic_addr);
1239 if (ret == FI_SUCCESS) {
1240 ret = _gnix_vc_smsg_init(vc,
1241 work_req_data->src_vc_id,
1242 &work_req_data->src_smsg_attr,
1243 &work_req_data->irq_mem_hndl);
1244 if (ret != FI_SUCCESS) {
1245 GNIX_WARN(FI_LOG_EP_CTRL,
1246 "_gnix_vc_smsg_init returned %s\n",
1247 fi_strerror(-ret));
1248 goto exit;
1249 }
1250
1251 /*
1252 * TODO: xpmem setup here
1253 */
1254
1255 ret = _gnix_xpmem_accessible(ep, vc->peer_cm_nic_addr,
1256 &accessible);
1257 if ((ret == FI_SUCCESS) && (accessible == true)) {
1258 ret = _gnix_xpmem_get_apid(ep->xpmem_hndl,
1259 work_req_data->peer_segid,
1260 &peer_apid);
1261 if (ret == FI_SUCCESS) {
1262 vc->modes |= GNIX_VC_MODE_XPMEM;
1263 vc->peer_apid = peer_apid;
1264 }
1265 }
1266
1267 complete = 1;
1268 vc->conn_state = GNIX_VC_CONNECTED;
1269 vc->peer_id = work_req_data->src_vc_id;
1270 GNIX_DEBUG(FI_LOG_EP_CTRL,
1271 "moving vc %p to connected\n",vc);
1272 vc->modes |= GNIX_VC_MODE_DG_POSTED;
1273
1274 ret = _gnix_vc_sched_new_conn(vc);
1275 if (ret != FI_SUCCESS)
1276 GNIX_WARN(FI_LOG_EP_DATA,
1277 "_gnix_vc_sched_new_conn returned %s\n",
1278 fi_strerror(-ret));
1279 } else if (ret == -FI_EAGAIN) {
1280 ret = FI_SUCCESS;
1281 } else {
1282 GNIX_FATAL(FI_LOG_EP_CTRL, "_gnix_cm_nic_send returned %s\n",
1283 fi_strerror(-ret));
1284 }
1285
1286 exit:
1287 COND_RELEASE(ep->requires_lock, &ep->vc_lock);
1288
1289 *complete_ptr = complete;
1290 return ret;
1291 }
1292
__gnix_vc_conn_req_prog_fn(void * data,int * complete_ptr)1293 static int __gnix_vc_conn_req_prog_fn(void *data, int *complete_ptr)
1294 {
1295 int ret = FI_SUCCESS;
1296 int complete = 0;
1297 struct gnix_vc *vc = (struct gnix_vc *)data;
1298 struct gnix_mbox *mbox = NULL;
1299 gni_smsg_attr_t smsg_mbox_attr;
1300 struct gnix_fid_ep *ep = NULL;
1301 struct gnix_fid_domain *dom = NULL;
1302 struct gnix_cm_nic *cm_nic = NULL;
1303 xpmem_segid_t my_segid;
1304 char sbuf[GNIX_CM_NIC_MAX_MSG_SIZE] = {0};
1305 struct gnix_auth_key *auth_key;
1306
1307 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1308
1309 ep = vc->ep;
1310 if (ep == NULL)
1311 return -FI_EINVAL;
1312
1313 dom = ep->domain;
1314 if (dom == NULL)
1315 return -FI_EINVAL;
1316
1317 cm_nic = ep->cm_nic;
1318 if (cm_nic == NULL)
1319 return -FI_EINVAL;
1320
1321 auth_key = ep->auth_key;
1322 if (auth_key == NULL)
1323 return -FI_EINVAL;
1324
1325 assert(auth_key->enabled);
1326
1327 COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
1328
1329 if ((vc->conn_state == GNIX_VC_CONNECTING) ||
1330 (vc->conn_state == GNIX_VC_CONNECTED)) {
1331 complete = 1;
1332 goto err;
1333 }
1334
1335 /*
1336 * first see if we still need a mailbox
1337 */
1338
1339 if (vc->smsg_mbox == NULL) {
1340 ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl,
1341 &mbox);
1342 if (ret == FI_SUCCESS)
1343 vc->smsg_mbox = mbox;
1344 else
1345 goto err;
1346 }
1347
1348 mbox = vc->smsg_mbox;
1349
1350 /*
1351 * prep the smsg_mbox_attr
1352 */
1353
1354 smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
1355 smsg_mbox_attr.msg_buffer = mbox->base;
1356 smsg_mbox_attr.buff_size = vc->ep->nic->mem_per_mbox;
1357 smsg_mbox_attr.mem_hndl = *mbox->memory_handle;
1358 smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
1359 smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
1360 smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize;
1361
1362 /*
1363 * serialize the message in the buffer
1364 */
1365
1366 GNIX_DEBUG(FI_LOG_EP_CTRL,
1367 "conn req tx: (From Aries addr 0x%x Id %d to Aries 0x%x Id %d CM NIC Id %d vc %p)\n",
1368 ep->src_addr.gnix_addr.device_addr,
1369 ep->src_addr.gnix_addr.cdm_id,
1370 vc->peer_addr.device_addr,
1371 vc->peer_addr.cdm_id,
1372 vc->peer_cm_nic_addr.cdm_id,
1373 vc);
1374
1375 ret = _gnix_xpmem_get_my_segid(ep->xpmem_hndl,
1376 &my_segid);
1377 if (ret != FI_SUCCESS) {
1378 GNIX_WARN(FI_LOG_EP_CTRL,
1379 "_gnix_xpmem_get_my_segid returned %s\n",
1380 fi_strerror(-ret));
1381 }
1382
1383 __gnix_vc_pack_conn_req(sbuf,
1384 &vc->peer_addr,
1385 &ep->src_addr.gnix_addr,
1386 vc->vc_id,
1387 (uint64_t)vc,
1388 &smsg_mbox_attr,
1389 &ep->nic->irq_mem_hndl,
1390 ep->caps,
1391 my_segid,
1392 ep->src_addr.name_type,
1393 ep->src_addr.rx_ctx_cnt,
1394 auth_key->key_offset);
1395
1396 /*
1397 * try to send the message, if -FI_EAGAIN is returned, okay,
1398 * just don't mark complete.
1399 */
1400
1401 ret = _gnix_cm_nic_send(cm_nic,
1402 sbuf,
1403 GNIX_CM_NIC_MAX_MSG_SIZE,
1404 vc->peer_cm_nic_addr);
1405 if (ret == FI_SUCCESS) {
1406 complete = 1;
1407 vc->conn_state = GNIX_VC_CONNECTING;
1408 GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connecting\n",
1409 vc);
1410 vc->modes |= GNIX_VC_MODE_DG_POSTED;
1411 } else if (ret == -FI_EAGAIN) {
1412 ret = FI_SUCCESS;
1413 } else {
1414 GNIX_FATAL(FI_LOG_EP_CTRL, "_gnix_cm_nic_send returned %s\n",
1415 fi_strerror(-ret));
1416 }
1417
1418 err:
1419 COND_RELEASE(ep->requires_lock, &ep->vc_lock);
1420 *complete_ptr = complete;
1421 return ret;
1422 }
1423
1424 /*
1425 * conn ack completer function for work queue element,
1426 * free the previously allocated wq_hndl_conn_req
1427 * data struct
1428 */
__gnix_vc_conn_ack_comp_fn(void * data)1429 static int __gnix_vc_conn_ack_comp_fn(void *data)
1430 {
1431 free(data);
1432 return FI_SUCCESS;
1433 }
1434
1435 /*
1436 * connect completer function for work queue element,
1437 * sort of a NO-OP for now.
1438 */
__gnix_vc_conn_req_comp_fn(void * data)1439 static int __gnix_vc_conn_req_comp_fn(void *data)
1440 {
1441 return FI_SUCCESS;
1442 }
1443
1444 /*******************************************************************************
1445 * Internal API functions
1446 ******************************************************************************/
_gnix_vc_alloc(struct gnix_fid_ep * ep_priv,struct gnix_av_addr_entry * entry,struct gnix_vc ** vc)1447 int _gnix_vc_alloc(struct gnix_fid_ep *ep_priv,
1448 struct gnix_av_addr_entry *entry, struct gnix_vc **vc)
1449
1450 {
1451 int ret = FI_SUCCESS;
1452 int remote_id;
1453 struct gnix_vc *vc_ptr = NULL;
1454 struct gnix_nic *nic = NULL;
1455 struct dlist_entry *de = NULL;
1456
1457 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1458
1459 nic = ep_priv->nic;
1460 if (nic == NULL)
1461 return -FI_EINVAL;
1462
1463 /*
1464 * allocate VC from domain's vc_freelist
1465 */
1466
1467 ret = _gnix_fl_alloc(&de, &nic->vc_freelist);
1468 while (ret == -FI_EAGAIN)
1469 ret = _gnix_fl_alloc(&de, &nic->vc_freelist);
1470 if (ret == FI_SUCCESS) {
1471 vc_ptr = container_of(de, struct gnix_vc, fr_list);
1472 } else
1473 return ret;
1474
1475 vc_ptr->conn_state = GNIX_VC_CONN_NONE;
1476 if (entry) {
1477 memcpy(&vc_ptr->peer_addr,
1478 &entry->gnix_addr,
1479 sizeof(struct gnix_address));
1480 vc_ptr->peer_cm_nic_addr.device_addr =
1481 entry->gnix_addr.device_addr;
1482 vc_ptr->peer_cm_nic_addr.cdm_id =
1483 entry->cm_nic_cdm_id;
1484 } else {
1485 vc_ptr->peer_addr.device_addr = -1;
1486 vc_ptr->peer_addr.cdm_id = -1;
1487 vc_ptr->peer_cm_nic_addr.device_addr = -1;
1488 vc_ptr->peer_cm_nic_addr.cdm_id = -1;
1489 }
1490 vc_ptr->ep = ep_priv;
1491
1492 dlist_init(&vc_ptr->prog_list);
1493 dlist_init(&vc_ptr->work_queue);
1494 dlist_init(&vc_ptr->tx_queue);
1495
1496 vc_ptr->peer_fi_addr = FI_ADDR_NOTAVAIL;
1497
1498 dlist_init(&vc_ptr->list);
1499
1500 ofi_atomic_initialize32(&vc_ptr->outstanding_tx_reqs, 0);
1501 ret = _gnix_alloc_bitmap(&vc_ptr->flags, 1, NULL);
1502 assert(!ret);
1503
1504 /*
1505 * we need an id for the vc to allow for quick lookup
1506 * based on GNI_CQ_GET_INST_ID
1507 */
1508
1509 ret = _gnix_nic_get_rem_id(nic, &remote_id, vc_ptr);
1510 if (ret != FI_SUCCESS)
1511 goto err;
1512 vc_ptr->vc_id = remote_id;
1513 vc_ptr->gnix_ep_name = NULL;
1514
1515 *vc = vc_ptr;
1516
1517 return ret;
1518
1519 err:
1520 if (vc_ptr)
1521 free(vc_ptr);
1522 return ret;
1523 }
1524
__gnix_vc_cancel(struct gnix_vc * vc)1525 static void __gnix_vc_cancel(struct gnix_vc *vc)
1526 {
1527 struct gnix_nic *nic = vc->ep->nic;
1528
1529 COND_ACQUIRE(nic->requires_lock, &nic->prog_vcs_lock);
1530 if (!dlist_empty(&vc->prog_list))
1531 dlist_remove_init(&vc->prog_list);
1532 COND_RELEASE(nic->requires_lock, &nic->prog_vcs_lock);
1533 }
1534
1535 /* Destroy an unconnected VC. More Support is needed to shutdown and destroy
1536 * an active VC. */
_gnix_vc_destroy(struct gnix_vc * vc)1537 int _gnix_vc_destroy(struct gnix_vc *vc)
1538 {
1539 int ret = FI_SUCCESS;
1540 struct gnix_nic *nic = NULL;
1541 gni_return_t status = GNI_RC_NOT_DONE;
1542
1543 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1544
1545 if (vc->ep == NULL) {
1546 GNIX_WARN(FI_LOG_EP_CTRL, "ep null\n");
1547 return -FI_EINVAL;
1548 }
1549
1550 nic = vc->ep->nic;
1551 if (nic == NULL) {
1552 GNIX_WARN(FI_LOG_EP_CTRL, "ep nic null for vc %p\n", vc);
1553 return -FI_EINVAL;
1554 }
1555
1556 /*
1557 * move vc state to terminating
1558 */
1559
1560 vc->conn_state = GNIX_VC_CONN_TERMINATING;
1561
1562 /*
1563 * try to unbind the gni_ep if non-NULL.
1564 * If there are SMSG or PostFMA/RDMA outstanding
1565 * wait here for them to complete
1566 */
1567
1568 if (vc->gni_ep != NULL) {
1569 while (status == GNI_RC_NOT_DONE) {
1570
1571 COND_ACQUIRE(nic->requires_lock, &nic->lock);
1572 status = GNI_EpUnbind(vc->gni_ep);
1573 COND_RELEASE(nic->requires_lock, &nic->lock);
1574
1575 if ((status != GNI_RC_NOT_DONE) &&
1576 (status != GNI_RC_SUCCESS)) {
1577 GNIX_WARN(FI_LOG_EP_CTRL,
1578 "GNI_EpUnBind returned %s\n",
1579 gni_err_str[status]);
1580 break;
1581 }
1582
1583 if (status == GNI_RC_NOT_DONE)
1584 _gnix_nic_progress(nic);
1585 }
1586 COND_ACQUIRE(nic->requires_lock, &nic->lock);
1587 status = GNI_EpDestroy(vc->gni_ep);
1588 COND_RELEASE(nic->requires_lock, &nic->lock);
1589 if (status != GNI_RC_SUCCESS)
1590 GNIX_WARN(FI_LOG_EP_CTRL,
1591 "GNI_EpDestroy returned %s\n",
1592 gni_err_str[status]);
1593 }
1594
1595 /*
1596 * if the vc is in a nic's work queue, remove it
1597 */
1598 __gnix_vc_cancel(vc);
1599
1600 /*
1601 * We may eventually want to check the state of the VC, if we
1602 * implement true VC shutdown.
1603
1604 if ((vc->conn_state != GNIX_VC_CONN_NONE)
1605 && (vc->conn_state != GNIX_VC_CONN_TERMINATED)) {
1606 GNIX_WARN(FI_LOG_EP_CTRL,
1607 "vc conn state %d\n",
1608 vc->conn_state);
1609 GNIX_WARN(FI_LOG_EP_CTRL, "vc conn state error\n");
1610 return -FI_EBUSY;
1611 }
1612 */
1613
1614 /*
1615 * if send_q not empty, return -FI_EBUSY
1616 * Note for FI_EP_MSG type eps, this behavior
1617 * may not be correct for handling fi_shutdown.
1618 */
1619
1620 if (!dlist_empty(&vc->tx_queue))
1621 GNIX_FATAL(FI_LOG_EP_CTRL, "VC TX queue not empty\n");
1622
1623 if (ofi_atomic_get32(&vc->outstanding_tx_reqs))
1624 GNIX_FATAL(FI_LOG_EP_CTRL,
1625 "VC outstanding_tx_reqs out of sync: %d\n",
1626 ofi_atomic_get32(&vc->outstanding_tx_reqs));
1627
1628 if (vc->smsg_mbox != NULL) {
1629 ret = _gnix_mbox_free(vc->smsg_mbox);
1630 if (ret != FI_SUCCESS)
1631 GNIX_WARN(FI_LOG_EP_CTRL,
1632 "_gnix_mbox_free returned %s\n",
1633 fi_strerror(-ret));
1634 vc->smsg_mbox = NULL;
1635 }
1636
1637 ret = _gnix_nic_free_rem_id(nic, vc->vc_id);
1638 if (ret != FI_SUCCESS)
1639 GNIX_WARN(FI_LOG_EP_CTRL,
1640 "__gnix_vc_free_id returned %s\n",
1641 fi_strerror(-ret));
1642
1643 _gnix_free_bitmap(&vc->flags);
1644
1645 if (vc->gnix_ep_name != NULL) {
1646 free(vc->gnix_ep_name);
1647 vc->gnix_ep_name = NULL;
1648 }
1649
1650 /*
1651 * put VC back on the freelist
1652 */
1653
1654 vc->conn_state = GNIX_VC_CONN_NONE;
1655 _gnix_fl_free(&vc->fr_list, &nic->vc_freelist);
1656
1657 return ret;
1658 }
1659
_gnix_vc_connect(struct gnix_vc * vc)1660 int _gnix_vc_connect(struct gnix_vc *vc)
1661 {
1662 int ret = FI_SUCCESS;
1663 struct gnix_fid_ep *ep = NULL;
1664 struct gnix_cm_nic *cm_nic = NULL;
1665 struct gnix_work_req *work_req;
1666
1667 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1668
1669 /*
1670 * can happen that we are already connecting, or
1671 * are connected
1672 */
1673
1674 if ((vc->conn_state == GNIX_VC_CONNECTING) ||
1675 (vc->conn_state == GNIX_VC_CONNECTED)) {
1676 return FI_SUCCESS;
1677 }
1678
1679 ep = vc->ep;
1680 if (ep == NULL)
1681 return -FI_EINVAL;
1682
1683 cm_nic = ep->cm_nic;
1684 if (cm_nic == NULL)
1685 return -FI_EINVAL;
1686
1687 /*
1688 * only endpoints of type FI_EP_RDM use this
1689 * connection method
1690 */
1691 if (!GNIX_EP_RDM_DGM(ep->type))
1692 return -FI_EINVAL;
1693
1694 /*
1695 * check if this EP is connecting to itself
1696 */
1697
1698 if (GNIX_ADDR_EQUAL(ep->src_addr.gnix_addr, vc->peer_addr)) {
1699 return __gnix_vc_connect_to_self(vc);
1700 }
1701
1702 /*
1703 * allocate a work request and put it
1704 * on the cm_nic work queue.
1705 */
1706
1707 work_req = calloc(1, sizeof(*work_req));
1708 if (work_req == NULL)
1709 return -FI_ENOMEM;
1710
1711 work_req->progress_fn = __gnix_vc_conn_req_prog_fn;
1712 work_req->data = vc;
1713 work_req->completer_fn = __gnix_vc_conn_req_comp_fn;
1714 work_req->completer_data = vc;
1715
1716 /*
1717 * add the work request to the tail of the
1718 * cm_nic's work queue, progress the cm_nic.
1719 */
1720
1721 fastlock_acquire(&cm_nic->wq_lock);
1722 dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq);
1723 fastlock_release(&cm_nic->wq_lock);
1724
1725 return ret;
1726 }
1727
1728 /******************************************************************************
1729 *
1730 * VC RX progress
1731 *
1732 *****************************************************************************/
1733
1734 /* Process a VC's SMSG mailbox.
1735 *
1736 * Note: EP must be locked. */
_gnix_vc_dequeue_smsg(struct gnix_vc * vc)1737 int _gnix_vc_dequeue_smsg(struct gnix_vc *vc)
1738 {
1739 int ret = FI_SUCCESS;
1740 struct gnix_nic *nic;
1741 gni_return_t status;
1742 void *msg_ptr;
1743 uint8_t tag;
1744
1745 GNIX_TRACE(FI_LOG_EP_DATA, "\n");
1746
1747 nic = vc->ep->nic;
1748 assert(nic != NULL);
1749
1750 do {
1751 tag = GNI_SMSG_ANY_TAG;
1752 status = GNI_SmsgGetNextWTag(vc->gni_ep,
1753 &msg_ptr,
1754 &tag);
1755
1756 if (status == GNI_RC_SUCCESS) {
1757 GNIX_DEBUG(FI_LOG_EP_DATA, "Found RX (%p)\n", vc);
1758 ret = nic->smsg_callbacks[tag](vc, msg_ptr);
1759 if (ret != FI_SUCCESS) {
1760 /* Stalled, reschedule */
1761 break;
1762 }
1763 } else if (status == GNI_RC_NOT_DONE) {
1764 /* No more work. */
1765 ret = FI_SUCCESS;
1766 break;
1767 } else {
1768 GNIX_WARN(FI_LOG_EP_DATA,
1769 "GNI_SmsgGetNextWTag returned %s\n",
1770 gni_err_str[status]);
1771 ret = gnixu_to_fi_errno(status);
1772 break;
1773 }
1774 } while (1);
1775
1776 return ret;
1777 }
1778
1779 /* Progress VC RXs. Reschedule VC if more there is more work.
1780 *
1781 * Note: EP must be locked. */
__gnix_vc_rx_progress(struct gnix_vc * vc)1782 static int __gnix_vc_rx_progress(struct gnix_vc *vc)
1783 {
1784 int ret;
1785
1786 /* Process pending RXs */
1787 COND_ACQUIRE(vc->ep->nic->requires_lock, &vc->ep->nic->lock);
1788 ret = _gnix_vc_dequeue_smsg(vc);
1789 COND_RELEASE(vc->ep->nic->requires_lock, &vc->ep->nic->lock);
1790
1791 if (ret != FI_SUCCESS) {
1792 /* We didn't finish processing RXs. Low memory likely.
1793 * Try again later. Return error to abort processing
1794 * other VCs. */
1795 _gnix_vc_rx_schedule(vc);
1796 return -FI_EAGAIN;
1797 }
1798
1799 /* Return success to continue processing other VCs */
1800 return FI_SUCCESS;
1801 }
1802
1803 /******************************************************************************
1804 *
1805 * VC work progress
1806 *
1807 *****************************************************************************/
1808
1809 /* Schedule deferred request processing. Usually used in RX completers.
1810 *
1811 * Note: EP must be locked. */
_gnix_vc_queue_work_req(struct gnix_fab_req * req)1812 int _gnix_vc_queue_work_req(struct gnix_fab_req *req)
1813 {
1814 struct gnix_vc *vc = req->vc;
1815
1816 dlist_insert_tail(&req->dlist, &vc->work_queue);
1817 __gnix_vc_work_schedule(vc);
1818
1819 return FI_SUCCESS;
1820 }
1821
1822 /* Schedule deferred request processing. Used in TX completers where VC lock is
1823 * not yet held. */
_gnix_vc_requeue_work_req(struct gnix_fab_req * req)1824 int _gnix_vc_requeue_work_req(struct gnix_fab_req *req)
1825 {
1826 int ret;
1827
1828 COND_ACQUIRE(req->gnix_ep->requires_lock, &req->gnix_ep->vc_lock);
1829 ret = _gnix_vc_queue_work_req(req);
1830 COND_RELEASE(req->gnix_ep->requires_lock, &req->gnix_ep->vc_lock);
1831
1832 return ret;
1833 }
1834
1835 /* Process deferred request work on the VC.
1836 *
1837 * Note: EP must be locked. */
__gnix_vc_push_work_reqs(struct gnix_vc * vc)1838 static int __gnix_vc_push_work_reqs(struct gnix_vc *vc)
1839 {
1840 int ret, fi_rc = FI_SUCCESS;
1841 struct gnix_fab_req *req;
1842
1843 while (1) {
1844 req = dlist_first_entry(&vc->work_queue,
1845 struct gnix_fab_req,
1846 dlist);
1847 if (!req)
1848 break;
1849
1850 dlist_remove_init(&req->dlist);
1851
1852 ret = req->work_fn(req);
1853 if (ret != FI_SUCCESS) {
1854 /* Re-schedule failed work. */
1855 _gnix_vc_queue_work_req(req);
1856
1857 /* FI_ENOSPC is reserved to indicate a lack of
1858 * TXDs, which are shared by all VCs on the
1859 * NIC. The other likely error is FI_EAGAIN
1860 * due to a lack of SMSG credits. */
1861 if ((ret != -FI_ENOSPC) &&
1862 (ret != -FI_EAGAIN)) {
1863 /*
1864 * TODO: Report error (via CQ err?)
1865 * Note: This error can't be reported here.
1866 */
1867 GNIX_FATAL(FI_LOG_EP_DATA,
1868 "Failed to push request %p: %s\n",
1869 req, fi_strerror(-ret));
1870 }
1871
1872 fi_rc = -FI_EAGAIN;
1873 break;
1874 } else {
1875 GNIX_INFO(FI_LOG_EP_DATA,
1876 "Request processed: %p\n", req);
1877 }
1878 }
1879
1880 return fi_rc;
1881 }
1882
1883 /******************************************************************************
1884 *
1885 * VC TX progress
1886 *
1887 *****************************************************************************/
1888
1889 /* Attempt to initiate a TX request. If the TX queue is blocked (due to low
1890 * resources or a FI_FENCE request), schedule the request to be sent later.
1891 *
1892 * Note: EP must be locked. */
_gnix_vc_queue_tx_req(struct gnix_fab_req * req)1893 int _gnix_vc_queue_tx_req(struct gnix_fab_req *req)
1894 {
1895 int rc = FI_SUCCESS, queue_tx = 0;
1896 struct gnix_vc *vc = req->vc;
1897 struct gnix_fid_ep *ep = req->gnix_ep;
1898 struct gnix_fab_req *more_req;
1899 int connected;
1900 struct slist_entry *sle;
1901
1902 /* Check if there is an outstanding fi_more chain to initiate */
1903 if ((!(req->flags & FI_MORE)) && (!(slist_empty(&ep->more_write)) ||
1904 !(slist_empty(&ep->more_read)))) {
1905 if (!slist_empty(&ep->more_write)) {
1906 sle = ep->more_write.head;
1907 more_req = container_of(sle, struct gnix_fab_req,
1908 rma.sle);
1909 GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: got fab_request "
1910 "from more_write. Queuing Request\n");
1911 _gnix_vc_queue_tx_req(more_req);
1912 slist_init(&ep->more_write);
1913 }
1914 if (!slist_empty(&ep->more_read)) {
1915 sle = ep->more_read.head;
1916 more_req = container_of(sle, struct gnix_fab_req,
1917 rma.sle);
1918 GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: got fab_request "
1919 "from more_read. Queuing Request\n");
1920 _gnix_vc_queue_tx_req(more_req);
1921 slist_init(&ep->more_read);
1922 }
1923 }
1924
1925 if (req->flags & FI_TRIGGER) {
1926 rc = _gnix_trigger_queue_req(req);
1927
1928 /* FI_SUCCESS means the request was queued to wait for the
1929 * trigger condition. */
1930 if (rc == FI_SUCCESS)
1931 return FI_SUCCESS;
1932 }
1933
1934 connected = (vc->conn_state == GNIX_VC_CONNECTED);
1935
1936 if ((req->flags & FI_FENCE) && ofi_atomic_get32(&vc->outstanding_tx_reqs)) {
1937 /* Fence request must be queued until all outstanding TX
1938 * requests are completed. Subsequent requests will be queued
1939 * due to non-empty tx_queue. */
1940 queue_tx = 1;
1941 GNIX_DEBUG(FI_LOG_EP_DATA,
1942 "Queued FI_FENCE request (%p) on VC\n",
1943 req);
1944 } else if (connected && dlist_empty(&vc->tx_queue)) {
1945 ofi_atomic_inc32(&vc->outstanding_tx_reqs);
1946
1947 /* try to initiate request */
1948 rc = req->work_fn(req);
1949 if (rc == FI_SUCCESS) {
1950 GNIX_DEBUG(FI_LOG_EP_DATA,
1951 "TX request processed: %p (OTX: %d)\n",
1952 req, ofi_atomic_get32(&vc->outstanding_tx_reqs));
1953 } else if (rc != -FI_ECANCELED) {
1954 ofi_atomic_dec32(&vc->outstanding_tx_reqs);
1955 queue_tx = 1;
1956 GNIX_DEBUG(FI_LOG_EP_DATA,
1957 "Queued request (%p) on full VC\n",
1958 req);
1959 }
1960 } else {
1961 queue_tx = 1;
1962 GNIX_DEBUG(FI_LOG_EP_DATA,
1963 "Queued request (%p) on busy VC\n",
1964 req);
1965 }
1966
1967 if (OFI_UNLIKELY(queue_tx)) {
1968 dlist_insert_tail(&req->dlist, &vc->tx_queue);
1969 _gnix_vc_tx_schedule(vc);
1970 }
1971
1972 return FI_SUCCESS;
1973 }
1974
1975 /* Push TX requests queued on the VC.
1976 *
1977 * Note: EP must be locked. */
__gnix_vc_push_tx_reqs(struct gnix_vc * vc)1978 static int __gnix_vc_push_tx_reqs(struct gnix_vc *vc)
1979 {
1980 int ret, fi_rc = FI_SUCCESS;
1981 struct gnix_fab_req *req;
1982
1983 req = dlist_first_entry(&vc->tx_queue, struct gnix_fab_req, dlist);
1984 while (req) {
1985 if ((req->flags & FI_FENCE) &&
1986 ofi_atomic_get32(&vc->outstanding_tx_reqs)) {
1987 GNIX_DEBUG(FI_LOG_EP_DATA,
1988 "TX request queue stalled on FI_FENCE request: %p (%d)\n",
1989 req, ofi_atomic_get32(&vc->outstanding_tx_reqs));
1990 /* Success is returned to allow processing of more VCs.
1991 * This VC will be rescheduled when the fence request
1992 * is completed. */
1993 break;
1994 }
1995
1996 ofi_atomic_inc32(&vc->outstanding_tx_reqs);
1997 dlist_remove_init(&req->dlist);
1998
1999 ret = req->work_fn(req);
2000 if (ret == FI_SUCCESS) {
2001 GNIX_DEBUG(FI_LOG_EP_DATA,
2002 "TX request processed: %p (OTX: %d)\n",
2003 req, ofi_atomic_get32(&vc->outstanding_tx_reqs));
2004 } else if (ret != -FI_ECANCELED) {
2005 /* Work failed. Reschedule to put this VC
2006 * back on the end of the list and return
2007 * -FI_EAGAIN. */
2008
2009 GNIX_DEBUG(FI_LOG_EP_DATA,
2010 "Failed to push TX request %p: %s\n",
2011 req, fi_strerror(-ret));
2012 fi_rc = -FI_EAGAIN;
2013
2014 /* FI_ENOSPC is reserved to indicate a lack of
2015 * TXDs, which are shared by all VCs on the
2016 * NIC. The other likely error is FI_EAGAIN
2017 * due to a lack of SMSG credits. */
2018
2019 if ((ret != -FI_ENOSPC) && (ret != -FI_EAGAIN)) {
2020 /* TODO report error? */
2021 GNIX_WARN(FI_LOG_EP_DATA,
2022 "Failed to push TX request %p: %s\n",
2023 req, fi_strerror(-ret));
2024 }
2025
2026 dlist_insert_head(&req->dlist, &vc->tx_queue);
2027 ofi_atomic_dec32(&vc->outstanding_tx_reqs);
2028
2029 /* _gnix_vc_tx_schedule() must come after the request
2030 * is inserted into the VC's tx_queue. */
2031 _gnix_vc_tx_schedule(vc);
2032 break;
2033
2034 }
2035
2036 req = dlist_first_entry(&vc->tx_queue,
2037 struct gnix_fab_req,
2038 dlist);
2039 }
2040
2041 return fi_rc;
2042 }
2043
2044 /* Return next VC needing progress on the NIC. */
__gnix_nic_next_pending_vc(struct gnix_nic * nic)2045 static struct gnix_vc *__gnix_nic_next_pending_vc(struct gnix_nic *nic)
2046 {
2047 struct gnix_vc *vc = NULL;
2048
2049 COND_ACQUIRE(nic->requires_lock, &nic->prog_vcs_lock);
2050 vc = dlist_first_entry(&nic->prog_vcs, struct gnix_vc, prog_list);
2051 if (vc)
2052 dlist_remove_init(&vc->prog_list);
2053 COND_RELEASE(nic->requires_lock, &nic->prog_vcs_lock);
2054
2055 if (vc) {
2056 GNIX_INFO(FI_LOG_EP_CTRL, "Dequeued progress VC (%p)\n", vc);
2057 _gnix_clear_bit(&vc->flags, GNIX_VC_FLAG_SCHEDULED);
2058 }
2059
2060 return vc;
2061 }
2062
_gnix_vc_progress(struct gnix_vc * vc)2063 int _gnix_vc_progress(struct gnix_vc *vc)
2064 {
2065 int ret, ret_tx;
2066
2067 ret = __gnix_vc_rx_progress(vc);
2068 if (ret != FI_SUCCESS)
2069 GNIX_DEBUG(FI_LOG_EP_CTRL,
2070 "__gnix_vc_rx_progress failed: %d\n", ret);
2071
2072 ret = __gnix_vc_push_work_reqs(vc);
2073 if (ret != FI_SUCCESS)
2074 GNIX_DEBUG(FI_LOG_EP_CTRL,
2075 "__gnix_vc_push_work_reqs failed: %d\n", ret);
2076
2077 ret_tx = __gnix_vc_push_tx_reqs(vc);
2078 if (ret != FI_SUCCESS)
2079 GNIX_DEBUG(FI_LOG_EP_CTRL,
2080 "__gnix_vc_push_tx_reqs failed: %d\n", ret);
2081
2082 return ret_tx;
2083 }
2084
2085 /* Progress all NIC VCs needing work. */
_gnix_vc_nic_progress(struct gnix_nic * nic)2086 int _gnix_vc_nic_progress(struct gnix_nic *nic)
2087 {
2088 struct gnix_vc *vc;
2089 int ret;
2090
2091 /*
2092 * we can't just spin and spin in this loop because
2093 * none of the functions invoked below end up dequeuing
2094 * GNI CQE's and subsequently freeing up TX descriptors.
2095 * So, if the tx reqs routine returns -FI_EAGAIN, break out.
2096 */
2097 while ((vc = __gnix_nic_next_pending_vc(nic))) {
2098 COND_ACQUIRE(vc->ep->requires_lock, &vc->ep->vc_lock);
2099
2100 if (vc->conn_state == GNIX_VC_CONNECTED) {
2101 ret = _gnix_vc_progress(vc);
2102 }
2103
2104 COND_RELEASE(vc->ep->requires_lock, &vc->ep->vc_lock);
2105
2106 if (ret != FI_SUCCESS)
2107 break;
2108 }
2109
2110 return FI_SUCCESS;
2111 }
2112
2113 /* Schedule VC for progress.
2114 *
2115 * Note: EP must be locked.
2116 * TODO: Better implementation for rx/work/tx VC scheduling. */
_gnix_vc_schedule(struct gnix_vc * vc)2117 int _gnix_vc_schedule(struct gnix_vc *vc)
2118 {
2119 struct gnix_nic *nic = vc->ep->nic;
2120
2121 if (!_gnix_test_and_set_bit(&vc->flags, GNIX_VC_FLAG_SCHEDULED)) {
2122 COND_ACQUIRE(nic->requires_lock, &nic->prog_vcs_lock);
2123 dlist_insert_tail(&vc->prog_list, &nic->prog_vcs);
2124 COND_RELEASE(nic->requires_lock, &nic->prog_vcs_lock);
2125 GNIX_DEBUG(FI_LOG_EP_CTRL, "Scheduled VC (%p)\n", vc);
2126 }
2127
2128 return FI_SUCCESS;
2129 }
2130
2131 /* Schedule the VC for RX progress. */
_gnix_vc_rx_schedule(struct gnix_vc * vc)2132 int _gnix_vc_rx_schedule(struct gnix_vc *vc)
2133 {
2134 return _gnix_vc_schedule(vc);
2135 }
2136
2137 /* Schedule the VC for work progress. */
__gnix_vc_work_schedule(struct gnix_vc * vc)2138 static int __gnix_vc_work_schedule(struct gnix_vc *vc)
2139 {
2140 return _gnix_vc_schedule(vc);
2141 }
2142
2143 /* Schedule the VC for TX progress. */
_gnix_vc_tx_schedule(struct gnix_vc * vc)2144 int _gnix_vc_tx_schedule(struct gnix_vc *vc)
2145 {
2146 return _gnix_vc_schedule(vc);
2147 }
2148
2149 /* For a newly scheduled VC. Do any queued work now that the connection is
2150 * complete.
2151 *
2152 * Note: EP must be locked. */
_gnix_vc_sched_new_conn(struct gnix_vc * vc)2153 int _gnix_vc_sched_new_conn(struct gnix_vc *vc)
2154 {
2155 _gnix_vc_schedule(vc);
2156 return _gnix_vc_progress(vc);
2157 }
2158
2159 /* Look up an EP's VC using fi_addr_t.
2160 *
2161 * Note: EP must be locked. */
_gnix_vc_ep_get_vc(struct gnix_fid_ep * ep,fi_addr_t dest_addr,struct gnix_vc ** vc_ptr)2162 int _gnix_vc_ep_get_vc(struct gnix_fid_ep *ep, fi_addr_t dest_addr,
2163 struct gnix_vc **vc_ptr)
2164 {
2165 int ret;
2166
2167 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
2168
2169 if (GNIX_EP_RDM_DGM(ep->type)) {
2170 ret = __gnix_vc_get_vc_by_fi_addr(ep, dest_addr, vc_ptr);
2171 if (OFI_UNLIKELY(ret != FI_SUCCESS)) {
2172 GNIX_WARN(FI_LOG_EP_DATA,
2173 "__gnix_vc_get_vc_by_fi_addr returned %s\n",
2174 fi_strerror(-ret));
2175 return ret;
2176 }
2177 } else if (ep->type == FI_EP_MSG) {
2178 if (GNIX_EP_CONNECTED(ep)) {
2179 *vc_ptr = ep->vc;
2180 } else {
2181 return -FI_EINVAL;
2182 }
2183 } else {
2184 GNIX_WARN(FI_LOG_EP_DATA, "Invalid endpoint type: %d\n",
2185 ep->type);
2186 return -FI_EINVAL;
2187 }
2188
2189 return FI_SUCCESS;
2190 }
2191
_gnix_vc_peer_fi_addr(struct gnix_vc * vc)2192 fi_addr_t _gnix_vc_peer_fi_addr(struct gnix_vc *vc)
2193 {
2194 int rc;
2195
2196 /* If FI_SOURCE capability was requested, do a reverse lookup of a VC's
2197 * FI address once. Skip translation on connected EPs (no AV). */
2198 if (vc->ep->av && vc->peer_fi_addr == FI_ADDR_NOTAVAIL) {
2199 rc = _gnix_av_reverse_lookup(vc->ep->av,
2200 vc->peer_addr,
2201 &vc->peer_fi_addr);
2202 if (rc != FI_SUCCESS)
2203 GNIX_WARN(FI_LOG_EP_DATA,
2204 "_gnix_av_reverse_lookup() failed: %d\n",
2205 rc);
2206 }
2207
2208 return vc->peer_fi_addr;
2209 }
2210
_gnix_vc_cm_init(struct gnix_cm_nic * cm_nic)2211 int _gnix_vc_cm_init(struct gnix_cm_nic *cm_nic)
2212 {
2213 int ret = FI_SUCCESS;
2214 gnix_cm_nic_rcv_cb_func *ofunc = NULL;
2215 struct gnix_nic *nic = NULL;
2216
2217 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
2218
2219 nic = cm_nic->nic;
2220 assert(nic != NULL);
2221
2222 COND_ACQUIRE(nic->requires_lock, &nic->lock);
2223 ret = _gnix_cm_nic_reg_recv_fn(cm_nic,
2224 __gnix_vc_recv_fn,
2225 &ofunc);
2226 if ((ofunc != NULL) &&
2227 (ofunc != __gnix_vc_recv_fn)) {
2228 GNIX_WARN(FI_LOG_EP_DATA, "callback reg failed: %s\n",
2229 fi_strerror(-ret));
2230 }
2231
2232 COND_RELEASE(nic->requires_lock, &nic->lock);
2233
2234 return ret;
2235 }
2236
2237