1 /*
2  * Copyright (c) 2015-2017 Cray Inc. All rights reserved.
3  * Copyright (c) 2015-2018 Los Alamos National Security, LLC.
4  *                         All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 /*
36  * code for managing VC's
37  */
38 
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42 
43 #include "gnix.h"
44 #include "gnix_vc.h"
45 #include "gnix_util.h"
46 #include "gnix_datagram.h"
47 #include "gnix_cm_nic.h"
48 #include "gnix_nic.h"
49 #include "gnix_ep.h"
50 #include "gnix_mbox_allocator.h"
51 #include "gnix_hashtable.h"
52 #include "gnix_av.h"
53 #include "gnix_trigger.h"
54 #include "gnix_vector.h"
55 #include "gnix_xpmem.h"
56 #include "gnix_cq.h"
57 
58 /*
59  * forward declarations and local struct defs.
60  */
61 
62 struct wq_hndl_conn_req {
63 	gni_smsg_attr_t src_smsg_attr;
64 	int src_vc_id;
65 	struct gnix_vc *vc;
66 	uint64_t src_vc_ptr;
67 	gni_mem_handle_t irq_mem_hndl;
68 	xpmem_segid_t peer_segid;
69 };
70 
71 static int __gnix_vc_conn_ack_prog_fn(void *data, int *complete_ptr);
72 static int __gnix_vc_conn_ack_comp_fn(void *data);
73 static int __gnix_vc_push_tx_reqs(struct gnix_vc *vc);
74 
75 static int __gnix_vc_work_schedule(struct gnix_vc *vc);
76 static int _gnix_vc_sched_new_conn(struct gnix_vc *vc);
77 
78 /*******************************************************************************
79  * Helper functions
80  ******************************************************************************/
81 
82 /**
83  * Set key to the given gnix_addr.
84  *
85  * NOTE: If struct gnix_address is ever bit packed or packed by
86  * the compiler this assignment may not set key to the correct
87  * bytes.
88  */
__gnix_vc_set_ht_key(void * gnix_addr,gnix_ht_key_t * key)89 static inline void __gnix_vc_set_ht_key(void *gnix_addr,
90 					gnix_ht_key_t *key)
91 {
92 	*key = *((gnix_ht_key_t *)gnix_addr);
93 }
94 
_gnix_ep_vc_lookup(struct gnix_fid_ep * ep,uint64_t key)95 static struct gnix_vc *_gnix_ep_vc_lookup(struct gnix_fid_ep *ep, uint64_t key)
96 {
97 	struct gnix_vc *vc = NULL;
98 	int ret;
99 	int i;
100 
101 	assert(ep->av);
102 
103 
104 	for (i = 0; i < GNIX_ADDR_CACHE_SIZE; i++)
105 	{
106 		if (ep->addr_cache[i].addr == key && ep->addr_cache[i].vc != NULL)
107 			return ep->addr_cache[i].vc;
108 	}
109 
110 	if (ep->av->type == FI_AV_TABLE) {
111 		ret = _gnix_vec_at(ep->vc_table, (void **)&vc, key);
112 		if (ret != FI_SUCCESS) {
113 			vc = NULL;
114 		}
115 	} else {
116 		vc = (struct gnix_vc *)_gnix_ht_lookup(ep->vc_ht, key);
117 	}
118 
119 	if (vc) {
120 		ep->addr_cache[ep->last_cached].addr = key;
121 		ep->addr_cache[ep->last_cached].vc = vc;
122 		ep->last_cached = (ep->last_cached + 1) % 5;
123 	}
124 
125 	return vc;
126 }
127 
_gnix_ep_vc_store(struct gnix_fid_ep * ep,struct gnix_vc * vc,uint64_t key)128 static int _gnix_ep_vc_store(struct gnix_fid_ep *ep, struct gnix_vc *vc,
129 			     uint64_t key)
130 {
131 	int ret;
132 
133 	assert(ep->av);
134 
135 	if (ep->av->type == FI_AV_TABLE) {
136 		ret = _gnix_vec_insert_at(ep->vc_table, (void *)vc, key);
137 	} else {
138 		ret = _gnix_ht_insert(ep->vc_ht, key, vc);
139 	}
140 
141 	return ret;
142 }
143 
__gnix_vc_gnix_addr_equal(struct dlist_entry * item,const void * arg)144 static int __gnix_vc_gnix_addr_equal(struct dlist_entry *item, const void *arg)
145 {
146 	struct gnix_vc *vc = dlist_entry(item, struct gnix_vc, list);
147 
148 	return GNIX_ADDR_EQUAL(vc->peer_addr, *(struct gnix_address *)arg);
149 }
150 
151 /* Find an unmapped VC that matches 'dest_addr' and map it into the EP's VC
152  * look up table.
153  *
154  * Note: EP must be locked. */
__gnix_vc_lookup_unmapped(struct gnix_fid_ep * ep,fi_addr_t dest_addr)155 static struct gnix_vc *__gnix_vc_lookup_unmapped(struct gnix_fid_ep *ep,
156 						 fi_addr_t dest_addr)
157 {
158 	struct gnix_av_addr_entry av_entry;
159 	struct dlist_entry *entry;
160 	struct gnix_vc *vc;
161 	int ret;
162 
163 	/* Determine if the fi_addr now exists in the AV. */
164 	ret = _gnix_av_lookup(ep->av, dest_addr, &av_entry);
165 	if (ret != FI_SUCCESS) {
166 		GNIX_WARN(FI_LOG_EP_DATA,
167 			  "_gnix_av_lookup for addr 0x%lx returned %s\n",
168 			  dest_addr, fi_strerror(-ret));
169 		return NULL;
170 	}
171 
172 	/* Find a pre-existing, unmapped VC that matches the gnix_address
173 	 * mapped by dest_addr. */
174 	entry = dlist_remove_first_match(&ep->unmapped_vcs,
175 					 __gnix_vc_gnix_addr_equal,
176 					 (void *)&av_entry.gnix_addr);
177 	if (entry) {
178 		/* Found a matching, unmapped VC.  Map dest_addr to the VC in
179 		 * the EP's VC look up table. */
180 		vc = dlist_entry(entry, struct gnix_vc, list);
181 		GNIX_INFO(FI_LOG_EP_CTRL,
182 			  "Found unmapped VC: %p gnix_addr: 0x%lx fi_addr: 0x%lx\n",
183 			  vc, vc->peer_addr, vc->peer_fi_addr);
184 
185 		ret = _gnix_ep_vc_store(ep, vc, dest_addr);
186 		if (OFI_UNLIKELY(ret != FI_SUCCESS)) {
187 			GNIX_WARN(FI_LOG_EP_DATA,
188 				  "_gnix_ep_vc_store returned %s\n",
189 				  fi_strerror(-ret));
190 			dlist_insert_tail(&vc->list, &ep->unmapped_vcs);
191 			return NULL;
192 		}
193 
194 		return vc;
195 	}
196 
197 	return NULL;
198 }
199 
200 /**
201  * Look up the vc by fi_addr_t, if it's found just return it,
202  * otherwise allocate a new vc, insert it into the hashtable,
203  * and vector for FI_AV_TABLE AV type, and start connection setup.
204  *
205  * assumptions: ep is non-null;
206  * dest_addr is valid;
207  * vc_ptr is non-null.
208  *
209  * Note: EP must be locked.
210  */
__gnix_vc_get_vc_by_fi_addr(struct gnix_fid_ep * ep,fi_addr_t dest_addr,struct gnix_vc ** vc_ptr)211 static int __gnix_vc_get_vc_by_fi_addr(struct gnix_fid_ep *ep, fi_addr_t dest_addr,
212 				       struct gnix_vc **vc_ptr)
213 {
214 	struct gnix_fid_av *av;
215 	int ret = FI_SUCCESS;
216 	struct gnix_av_addr_entry av_entry;
217 	struct gnix_vc *vc;
218 
219 	GNIX_DBG_TRACE(FI_LOG_EP_CTRL, "\n");
220 
221 	GNIX_DEBUG(FI_LOG_EP_CTRL,
222 		   "ep->vc_table = %p, ep->vc_table->vector = %p\n",
223 		   ep->vc_table, ep->vc_table->vector);
224 
225 	av = ep->av;
226 	if (OFI_UNLIKELY(av == NULL)) {
227 		GNIX_WARN(FI_LOG_EP_CTRL, "av field NULL for ep %p\n", ep);
228 		return -FI_EINVAL;
229 	}
230 
231 	/* Use FI address to lookup in EP VC table. */
232 	vc = _gnix_ep_vc_lookup(ep, dest_addr);
233 	if (vc) {
234 		*vc_ptr = vc;
235 		return FI_SUCCESS;
236 	}
237 
238 	/* VC is not mapped yet.  We can receive a connection request from a
239 	 * remote peer before the target EP has bound to an AV or before the
240 	 * remote peer has had it's address inserted into the target EP's AV.
241 	 * Those requests will result in a connection as usual, but the VC will
242 	 * not be mapped into an EP's AV until the EP attempts to send to the
243 	 * remote peer.  Check the 'unmapped VC' list to see if such a VC
244 	 * exists and map it into the AV here. */
245 	vc = __gnix_vc_lookup_unmapped(ep, dest_addr);
246 	if (vc) {
247 		*vc_ptr = vc;
248 		return FI_SUCCESS;
249 	}
250 
251 	/* No VC exists for the peer yet.  Look up full AV entry for the
252 	 * destination address. */
253 	ret = _gnix_av_lookup(av, dest_addr, &av_entry);
254 	if (ret != FI_SUCCESS) {
255 		GNIX_WARN(FI_LOG_EP_DATA,
256 			  "_gnix_av_lookup for addr 0x%llx returned %s \n",
257 			  dest_addr, fi_strerror(-ret));
258 		goto err_w_lock;
259 	}
260 
261 	/* Allocate new VC with AV entry. */
262 	ret = _gnix_vc_alloc(ep, &av_entry, &vc);
263 	if (ret != FI_SUCCESS) {
264 		GNIX_WARN(FI_LOG_EP_DATA,
265 			  "_gnix_vc_alloc returned %s\n",
266 			  fi_strerror(-ret));
267 		goto err_w_lock;
268 	}
269 
270 	/* Map new VC through the EP connection table. */
271 	ret = _gnix_ep_vc_store(ep, vc, dest_addr);
272 	if (OFI_UNLIKELY(ret != FI_SUCCESS)) {
273 		GNIX_WARN(FI_LOG_EP_DATA,
274 			  "_gnix_ep_vc_store returned %s\n",
275 			  fi_strerror(-ret));
276 		goto err_w_lock;
277 	}
278 
279 	/* Initiate new VC connection. */
280 	ret = _gnix_vc_connect(vc);
281 	if (ret != FI_SUCCESS) {
282 		GNIX_WARN(FI_LOG_EP_DATA,
283 			  "_gnix_vc_connect returned %s\n",
284 			  fi_strerror(-ret));
285 		goto err_w_lock;
286 	}
287 
288 	*vc_ptr = vc;
289 	return ret;
290 
291 err_w_lock:
292 	if (vc != NULL)
293 		_gnix_vc_destroy(vc);
294 	return ret;
295 }
296 
297 /*******************************************************************************
298  * connection request /response message pack/unpack functions
299  ******************************************************************************/
300 
301 /*
302  * pack a connection request. Contents:
303  * - target_addr (the addr of the targeted EP for the conn req)
304  * - src_addr (the address of the EP originating the conn req)
305  * - src_vc_id (the vc id the mbox the originating EP allocated to
306  *              build this connection)
307  * - src_vc_vaddr (virt. address of the vc struct allocated at the originating
308  *                 EP to build this connection)
309  * - src_smsg_attr (smsg attributes of the mbox allocated at the
310  *                  originating EP for this connection)
311  * - src_irq_cq_mhdl (GNI memory handle for irq cq for originating EP)
312  */
__gnix_vc_pack_conn_req(char * sbuf,struct gnix_address * target_addr,struct gnix_address * src_addr,int src_vc_id,uint64_t src_vc_vaddr,gni_smsg_attr_t * src_smsg_attr,gni_mem_handle_t * src_irq_cq_mhdl,uint64_t caps,xpmem_segid_t my_segid,uint8_t name_type,uint8_t rx_ctx_cnt,uint32_t key_offset)313 static void __gnix_vc_pack_conn_req(char *sbuf,
314 				    struct gnix_address *target_addr,
315 				    struct gnix_address *src_addr,
316 				    int src_vc_id,
317 				    uint64_t src_vc_vaddr,
318 				    gni_smsg_attr_t *src_smsg_attr,
319 				    gni_mem_handle_t *src_irq_cq_mhdl,
320 				    uint64_t caps,
321 				    xpmem_segid_t my_segid,
322 				    uint8_t name_type,
323 				    uint8_t rx_ctx_cnt,
324 					uint32_t key_offset)
325 {
326 	size_t __attribute__((unused)) len;
327 	char *cptr = sbuf;
328 	uint8_t rtype = GNIX_VC_CONN_REQ;
329 
330 	/*
331 	 * sanity checks
332 	 */
333 
334 	assert(sbuf != NULL);
335 
336 	len = sizeof(rtype) +
337 	      sizeof(struct gnix_address) * 2 +
338 	      sizeof(int) +
339 	      sizeof(uint64_t) * 2 +
340 	      sizeof(gni_smsg_attr_t) +
341 	      sizeof(gni_mem_handle_t) +
342 	      sizeof(xpmem_segid_t) +
343 	      sizeof(name_type) +
344 	      sizeof(rx_ctx_cnt) +
345 		  sizeof(key_offset);
346 
347 	assert(len <= GNIX_CM_NIC_MAX_MSG_SIZE);
348 
349 	memcpy(cptr, &rtype, sizeof(rtype));
350 	cptr += sizeof(rtype);
351 	memcpy(cptr, target_addr, sizeof(struct gnix_address));
352 	cptr += sizeof(struct gnix_address);
353 	memcpy(cptr, src_addr, sizeof(struct gnix_address));
354 	cptr += sizeof(struct gnix_address);
355 	memcpy(cptr, &src_vc_id, sizeof(int));
356 	cptr += sizeof(int);
357 	memcpy(cptr, &src_vc_vaddr, sizeof(uint64_t));
358 	cptr += sizeof(uint64_t);
359 	memcpy(cptr, src_smsg_attr, sizeof(gni_smsg_attr_t));
360 	cptr += sizeof(gni_smsg_attr_t);
361 	memcpy(cptr, src_irq_cq_mhdl, sizeof(gni_mem_handle_t));
362 	cptr += sizeof(gni_mem_handle_t);
363 	memcpy(cptr, &caps, sizeof(uint64_t));
364 	cptr += sizeof(uint64_t);
365 	memcpy(cptr, &my_segid, sizeof(xpmem_segid_t));
366 	cptr += sizeof(xpmem_segid_t);
367 	memcpy(cptr, &name_type, sizeof(name_type));
368 	cptr += sizeof(name_type);
369 	memcpy(cptr, &rx_ctx_cnt, sizeof(rx_ctx_cnt));
370 	cptr += sizeof(rx_ctx_cnt);
371 	memcpy(cptr, &key_offset, sizeof(key_offset));
372 }
373 
374 /*
375  * unpack a connection request message
376  */
__gnix_vc_unpack_conn_req(char * rbuf,struct gnix_address * target_addr,struct gnix_address * src_addr,int * src_vc_id,uint64_t * src_vc_vaddr,gni_smsg_attr_t * src_smsg_attr,gni_mem_handle_t * src_irq_cq_mhndl,uint64_t * caps,xpmem_segid_t * peer_segid,uint8_t * name_type,uint8_t * rx_ctx_cnt,uint32_t * key_offset)377 static void __gnix_vc_unpack_conn_req(char *rbuf,
378 				      struct gnix_address *target_addr,
379 				      struct gnix_address *src_addr,
380 				      int *src_vc_id,
381 				      uint64_t *src_vc_vaddr,
382 				      gni_smsg_attr_t *src_smsg_attr,
383 				      gni_mem_handle_t *src_irq_cq_mhndl,
384 				      uint64_t *caps,
385 				      xpmem_segid_t *peer_segid,
386 				      uint8_t *name_type,
387 				      uint8_t *rx_ctx_cnt,
388 					  uint32_t *key_offset)
389 {
390 	size_t __attribute__((unused)) len;
391 	char *cptr = rbuf;
392 
393 	/*
394 	 * sanity checks
395 	 */
396 
397 	assert(rbuf);
398 
399 	cptr += sizeof(uint8_t);
400 	memcpy(target_addr, cptr, sizeof(struct gnix_address));
401 	cptr += sizeof(struct gnix_address);
402 	memcpy(src_addr, cptr, sizeof(struct gnix_address));
403 	cptr += sizeof(struct gnix_address);
404 	memcpy(src_vc_id, cptr, sizeof(int));
405 	cptr += sizeof(int);
406 	memcpy(src_vc_vaddr, cptr, sizeof(uint64_t));
407 	cptr += sizeof(uint64_t);
408 	memcpy(src_smsg_attr, cptr, sizeof(gni_smsg_attr_t));
409 	cptr += sizeof(gni_smsg_attr_t);
410 	memcpy(src_irq_cq_mhndl, cptr, sizeof(gni_mem_handle_t));
411 	cptr += sizeof(gni_mem_handle_t);
412 	memcpy(caps, cptr, sizeof(uint64_t));
413 	cptr += sizeof(uint64_t);
414 	memcpy(peer_segid, cptr, sizeof(xpmem_segid_t));
415 	cptr += sizeof(xpmem_segid_t);
416 	memcpy(name_type, cptr, sizeof(*name_type));
417 	cptr += sizeof(*name_type);
418 	memcpy(rx_ctx_cnt, cptr, sizeof(*rx_ctx_cnt));
419 	cptr += sizeof(*rx_ctx_cnt);
420 	memcpy(key_offset, cptr, sizeof(*key_offset));
421 }
422 
423 /*
424  * pack a connection response. Contents:
425  * - src_vc_vaddr (vaddr of the vc struct allocated at the originating
426  *                EP to build this connection)
427  * - resp_vc_id (the vc id of the mbox the responding EP allocated to
428  *          build this connection)
429  * - resp_smsg_attr (smsg attributes of the mbox allocated at the
430  *                   responding EP for this connection)
431  * - resp_irq_cq_mhndl (GNI memhndl for irq cq of responding EP)
432  */
433 
__gnix_vc_pack_conn_resp(char * sbuf,uint64_t src_vc_vaddr,uint64_t resp_vc_vaddr,int resp_vc_id,gni_smsg_attr_t * resp_smsg_attr,gni_mem_handle_t * resp_irq_cq_mhndl,uint64_t caps,xpmem_segid_t my_segid,uint32_t key_offset)434 static void __gnix_vc_pack_conn_resp(char *sbuf,
435 				     uint64_t src_vc_vaddr,
436 				     uint64_t resp_vc_vaddr,
437 				     int resp_vc_id,
438 				     gni_smsg_attr_t *resp_smsg_attr,
439 				     gni_mem_handle_t *resp_irq_cq_mhndl,
440 				     uint64_t caps,
441 				     xpmem_segid_t my_segid,
442 					 uint32_t key_offset)
443 {
444 	size_t __attribute__((unused)) len;
445 	char *cptr = sbuf;
446 	uint8_t rtype = GNIX_VC_CONN_RESP;
447 
448 	/*
449 	 * sanity checks
450 	 */
451 
452 	assert(sbuf != NULL);
453 
454 	len = sizeof(rtype) +
455 	      sizeof(uint64_t) * 3 +
456 	      sizeof(int) +
457 	      sizeof(gni_smsg_attr_t) +
458 	      sizeof(gni_mem_handle_t) +
459 	      sizeof(xpmem_segid_t) +
460 		  sizeof(uint32_t);
461 	assert(len <= GNIX_CM_NIC_MAX_MSG_SIZE);
462 
463 	memcpy(cptr, &rtype, sizeof(rtype));
464 	cptr += sizeof(rtype);
465 	memcpy(cptr, &src_vc_vaddr, sizeof(uint64_t));
466 	cptr += sizeof(uint64_t);
467 	memcpy(cptr, &resp_vc_vaddr, sizeof(uint64_t));
468 	cptr += sizeof(uint64_t);
469 	memcpy(cptr, &resp_vc_id, sizeof(int));
470 	cptr += sizeof(int);
471 	memcpy(cptr, resp_smsg_attr, sizeof(gni_smsg_attr_t));
472 	cptr += sizeof(gni_smsg_attr_t);
473 	memcpy(cptr, resp_irq_cq_mhndl, sizeof(gni_mem_handle_t));
474 	cptr += sizeof(gni_mem_handle_t);
475 	memcpy(cptr, &caps, sizeof(uint64_t));
476 	cptr += sizeof(uint64_t);
477 	memcpy(cptr, &my_segid, sizeof(xpmem_segid_t));
478 	cptr += sizeof(xpmem_segid_t);
479 	memcpy(cptr, &key_offset, sizeof(uint32_t));
480 }
481 
482 /*
483  * unpack a connection request response
484  */
__gnix_vc_unpack_resp(char * rbuf,uint64_t * src_vc_vaddr,uint64_t * resp_vc_vaddr,int * resp_vc_id,gni_smsg_attr_t * resp_smsg_attr,gni_mem_handle_t * resp_irq_cq_mhndl,uint64_t * caps,xpmem_segid_t * peer_segid,uint32_t * key_offset)485 static void __gnix_vc_unpack_resp(char *rbuf,
486 				  uint64_t *src_vc_vaddr,
487 				  uint64_t *resp_vc_vaddr,
488 				  int *resp_vc_id,
489 				  gni_smsg_attr_t *resp_smsg_attr,
490 				  gni_mem_handle_t *resp_irq_cq_mhndl,
491 				  uint64_t *caps,
492 				  xpmem_segid_t *peer_segid,
493 				  uint32_t *key_offset)
494 {
495 	char *cptr = rbuf;
496 
497 	cptr += sizeof(uint8_t);
498 
499 	memcpy(src_vc_vaddr, cptr, sizeof(uint64_t));
500 	cptr += sizeof(uint64_t);
501 	memcpy(resp_vc_vaddr, cptr, sizeof(uint64_t));
502 	cptr += sizeof(uint64_t);
503 	memcpy(resp_vc_id, cptr, sizeof(int));
504 	cptr += sizeof(int);
505 	memcpy(resp_smsg_attr, cptr, sizeof(gni_smsg_attr_t));
506 	cptr += sizeof(gni_smsg_attr_t);
507 	memcpy(resp_irq_cq_mhndl, cptr, sizeof(gni_mem_handle_t));
508 	cptr += sizeof(gni_mem_handle_t);
509 	memcpy(caps, cptr, sizeof(uint64_t));
510 	cptr += sizeof(uint64_t);
511 	memcpy(peer_segid, cptr, sizeof(xpmem_segid_t));
512 	cptr += sizeof(xpmem_segid_t);
513 	memcpy(key_offset, cptr, sizeof(uint32_t));
514 }
515 
__gnix_vc_get_msg_type(char * rbuf,uint8_t * rtype)516 static void __gnix_vc_get_msg_type(char *rbuf,
517 				  uint8_t *rtype)
518 {
519 	assert(rtype);
520 	memcpy(rtype, rbuf, sizeof(uint8_t));
521 }
522 
523 /*
524  * helper function to initialize an SMSG connection, plus
525  * a mem handle to use for delivering IRQs to peer when needed
526  */
_gnix_vc_smsg_init(struct gnix_vc * vc,int peer_id,gni_smsg_attr_t * peer_smsg_attr,gni_mem_handle_t * peer_irq_mem_hndl)527 int _gnix_vc_smsg_init(struct gnix_vc *vc, int peer_id,
528 		       gni_smsg_attr_t *peer_smsg_attr,
529 		       gni_mem_handle_t *peer_irq_mem_hndl)
530 {
531 	int ret = FI_SUCCESS;
532 	struct gnix_fid_ep *ep;
533 	struct gnix_fid_domain *dom;
534 	struct gnix_mbox *mbox = NULL;
535 	gni_smsg_attr_t local_smsg_attr;
536 	gni_return_t __attribute__((unused)) status;
537 	ssize_t __attribute__((unused)) len;
538 
539 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
540 
541 	assert(vc);
542 
543 	ep = vc->ep;
544 	assert(ep);
545 
546 	dom = ep->domain;
547 	if (dom == NULL)
548 		return -FI_EINVAL;
549 
550 	mbox = vc->smsg_mbox;
551 	assert (mbox);
552 
553 	local_smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
554 	local_smsg_attr.msg_buffer = mbox->base;
555 	local_smsg_attr.buff_size =  vc->ep->nic->mem_per_mbox;
556 	local_smsg_attr.mem_hndl = *mbox->memory_handle;
557 	local_smsg_attr.mbox_offset = (uint64_t)mbox->offset;
558 	local_smsg_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
559 	local_smsg_attr.msg_maxsize = dom->params.mbox_msg_maxsize;
560 
561 	/*
562 	 *  now build the SMSG connection
563 	 */
564 
565 	COND_ACQUIRE(ep->nic->requires_lock, &ep->nic->lock);
566 
567 	status = GNI_EpCreate(ep->nic->gni_nic_hndl,
568 			      ep->nic->tx_cq,
569 			      &vc->gni_ep);
570 	if (status != GNI_RC_SUCCESS) {
571 		GNIX_WARN(FI_LOG_EP_CTRL,
572 			"GNI_EpCreate returned %s\n", gni_err_str[status]);
573 		ret = gnixu_to_fi_errno(status);
574 		goto err;
575 	}
576 
577 	status = GNI_EpBind(vc->gni_ep,
578 			    vc->peer_addr.device_addr,
579 			    vc->peer_addr.cdm_id);
580 	if (status != GNI_RC_SUCCESS) {
581 		GNIX_WARN(FI_LOG_EP_CTRL,
582 			  "GNI_EpBind returned %s\n", gni_err_str[status]);
583 		ret = gnixu_to_fi_errno(status);
584 		goto err1;
585 	}
586 
587 	status = GNI_SmsgInit(vc->gni_ep,
588 			      &local_smsg_attr,
589 			      peer_smsg_attr);
590 	if (status != GNI_RC_SUCCESS) {
591 		GNIX_WARN(FI_LOG_EP_CTRL,
592 			"GNI_SmsgInit returned %s\n", gni_err_str[status]);
593 		ret = gnixu_to_fi_errno(status);
594 		goto err1;
595 	}
596 
597 	status = GNI_EpSetEventData(vc->gni_ep,
598 				    vc->vc_id,
599 				    peer_id);
600 	if (status != GNI_RC_SUCCESS) {
601 		GNIX_WARN(FI_LOG_EP_CTRL,
602 			  "GNI_EpSetEventData returned %s\n",
603 			   gni_err_str[status]);
604 		ret = gnixu_to_fi_errno(status);
605 		goto err1;
606 	}
607 
608 	if (peer_irq_mem_hndl != NULL)
609 		vc->peer_irq_mem_hndl = *peer_irq_mem_hndl;
610 
611 	COND_RELEASE(ep->nic->requires_lock, &ep->nic->lock);
612 	return ret;
613 err1:
614 	GNI_EpDestroy(vc->gni_ep);
615 err:
616 	COND_RELEASE(ep->nic->requires_lock, &ep->nic->lock);
617 	return ret;
618 }
619 
__gnix_vc_connect_to_self(struct gnix_vc * vc)620 static int __gnix_vc_connect_to_self(struct gnix_vc *vc)
621 {
622 	int ret = FI_SUCCESS;
623 	struct gnix_fid_domain *dom = NULL;
624 	struct gnix_fid_ep *ep = NULL;
625 	struct gnix_cm_nic *cm_nic = NULL;
626 	struct gnix_mbox *mbox = NULL;
627 	gni_smsg_attr_t smsg_mbox_attr;
628 	xpmem_apid_t peer_apid;
629 	xpmem_segid_t my_segid;
630 
631 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
632 
633 	ep = vc->ep;
634 	if (ep == NULL)
635 		return -FI_EINVAL;
636 
637 	cm_nic = ep->cm_nic;
638 	if (cm_nic == NULL)
639 		return -FI_EINVAL;
640 
641 	dom = ep->domain;
642 	if (dom == NULL)
643 		return -FI_EINVAL;
644 
645 	assert(vc->conn_state == GNIX_VC_CONN_NONE);
646 	vc->conn_state = GNIX_VC_CONNECTING;
647 
648 	assert(vc->smsg_mbox == NULL);
649 
650 	ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, &mbox);
651 	if (ret != FI_SUCCESS) {
652 		GNIX_WARN(FI_LOG_EP_DATA,
653 			  "_gnix_mbox_alloc returned %s\n",
654 			  fi_strerror(-ret));
655 		return -FI_ENOSPC;
656 	}
657 	vc->smsg_mbox = mbox;
658 
659 	smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
660 	smsg_mbox_attr.msg_buffer = mbox->base;
661 	smsg_mbox_attr.buff_size =  vc->ep->nic->mem_per_mbox;
662 	smsg_mbox_attr.mem_hndl = *mbox->memory_handle;
663 	smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
664 	smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
665 	smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize;
666 
667 	ret = _gnix_vc_smsg_init(vc, vc->vc_id, &smsg_mbox_attr, NULL);
668 	if (ret != FI_SUCCESS) {
669 		GNIX_WARN(FI_LOG_EP_DATA,
670 			  "_gnix_vc_smsg_init returned %s\n",
671 			  fi_strerror(-ret));
672 		goto err_mbox_init;
673 	}
674 
675 	/* TODO: use special send-to-self mechanism to avoid overhead of XPMEM
676 	 * when just sending a message to oneself. */
677 	ret = _gnix_xpmem_get_my_segid(ep->xpmem_hndl, &my_segid);
678 	if (ret != FI_SUCCESS) {
679 		GNIX_WARN(FI_LOG_EP_CTRL,
680 			  "_gni_xpmem_get_my_segid returned %s\n",
681 			  fi_strerror(-ret));
682 	}
683 
684 	ret = _gnix_xpmem_get_apid(ep->xpmem_hndl, my_segid, &peer_apid);
685 	if (ret == FI_SUCCESS) {
686 		vc->modes |= GNIX_VC_MODE_XPMEM;
687 		vc->peer_apid = peer_apid;
688 	} else {
689 		GNIX_WARN(FI_LOG_EP_CTRL,
690 			  "_gni_xpmem_get_apiid returned %s\n",
691 			  fi_strerror(-ret));
692 	}
693 
694 	vc->peer_id = vc->vc_id;
695 	vc->peer_irq_mem_hndl = ep->nic->irq_mem_hndl;
696 	vc->peer_caps = ep->caps;
697 	vc->peer_key_offset = ep->auth_key->key_offset;
698 	vc->conn_state = GNIX_VC_CONNECTED;
699 
700 	ret = _gnix_vc_sched_new_conn(vc);
701 	if (ret != FI_SUCCESS)
702 		GNIX_WARN(FI_LOG_EP_DATA,
703 			  "_gnix_vc_sched_new_conn returned %s\n",
704 			  fi_strerror(-ret));
705 
706 	GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n", vc);
707 	return ret;
708 
709 err_mbox_init:
710 	_gnix_mbox_free(vc->smsg_mbox);
711 	vc->smsg_mbox = NULL;
712 
713 	return ret;
714 }
715 
716 /*******************************************************************************
717  * functions for handling incoming connection request/response messages
718  ******************************************************************************/
719 
__gnix_vc_hndl_conn_resp(struct gnix_cm_nic * cm_nic,char * msg_buffer,struct gnix_address src_cm_nic_addr)720 static int __gnix_vc_hndl_conn_resp(struct gnix_cm_nic *cm_nic,
721 				    char *msg_buffer,
722 				    struct gnix_address src_cm_nic_addr)
723 {
724 	int ret = FI_SUCCESS;
725 	int peer_id;
726 	struct gnix_vc *vc = NULL;
727 	uint64_t peer_vc_addr;
728 	struct gnix_fid_ep *ep;
729 	gni_smsg_attr_t peer_smsg_attr;
730 	gni_mem_handle_t tmp_mem_hndl;
731 	uint64_t peer_caps;
732 	xpmem_segid_t peer_segid;
733 	xpmem_apid_t peer_apid;
734 	uint32_t peer_key_offset;
735 	bool accessible;
736 
737 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
738 
739 	/*
740 	 * unpack the message
741 	 */
742 
743 	__gnix_vc_unpack_resp(msg_buffer,
744 			      (uint64_t *)&vc,
745 			      &peer_vc_addr,
746 			      &peer_id,
747 			      &peer_smsg_attr,
748 			      &tmp_mem_hndl,
749 			      &peer_caps,
750 			      &peer_segid,
751 				  &peer_key_offset);
752 
753 	GNIX_DEBUG(FI_LOG_EP_CTRL,
754 		"resp rx: (From Aries 0x%x Id %d src vc %p peer vc addr 0x%lx)\n",
755 		 src_cm_nic_addr.device_addr,
756 		 src_cm_nic_addr.cdm_id,
757 		 vc,
758 		 peer_vc_addr);
759 
760 	ep = vc->ep;
761 	assert(ep != NULL);
762 
763 	COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
764 
765 	/*
766 	 * at this point vc should be in connecting state
767 	 */
768 	if (vc->conn_state != GNIX_VC_CONNECTING) {
769 		GNIX_WARN(FI_LOG_EP_CTRL,
770 			  "vc %p not in connecting state, rather %d\n",
771 			  vc, vc->conn_state);
772 		ret = -FI_EINVAL;
773 		goto err;
774 	}
775 
776 	/*
777 	 * build the SMSG connection
778 	 */
779 
780 	ret = _gnix_vc_smsg_init(vc, peer_id, &peer_smsg_attr,
781 				 &tmp_mem_hndl);
782 	if (ret != FI_SUCCESS) {
783 		GNIX_WARN(FI_LOG_EP_CTRL,
784 			"_gnix_vc_smsg_init returned %s\n",
785 			fi_strerror(-ret));
786 		goto err;
787 	}
788 
789 	/*
790 	 * see if we can do xpmem with this EP
791 	 */
792 
793 	ret = _gnix_xpmem_accessible(ep, src_cm_nic_addr, &accessible);
794 	if ((ret == FI_SUCCESS) && (accessible == true)) {
795 		ret = _gnix_xpmem_get_apid(ep->xpmem_hndl,
796 					   peer_segid,
797 					   &peer_apid);
798 		if (ret == FI_SUCCESS) {
799 			vc->modes |= GNIX_VC_MODE_XPMEM;
800 			vc->peer_apid = peer_apid;
801 		}
802 	}
803 
804 	/*
805 	 * transition the VC to connected
806 	 * put in to the nic's work queue for
807 	 * further processing
808 	 */
809 
810 	vc->peer_caps = peer_caps;
811 	vc->peer_key_offset = peer_key_offset;
812 	vc->peer_id = peer_id;
813 	vc->conn_state = GNIX_VC_CONNECTED;
814 	GNIX_DEBUG(FI_LOG_EP_CTRL,
815 		   " moving vc %p to state connected\n",vc);
816 
817 	ret = _gnix_vc_sched_new_conn(vc);
818 	if (ret != FI_SUCCESS)
819 		GNIX_WARN(FI_LOG_EP_DATA,
820 			  "_gnix_vc_sched_new_conn returned %s\n",
821 			  fi_strerror(-ret));
822 
823 	COND_RELEASE(ep->requires_lock, &ep->vc_lock);
824 
825 	return ret;
826 err:
827 	vc->conn_state = GNIX_VC_CONN_ERROR;
828 
829 	COND_RELEASE(ep->requires_lock, &ep->vc_lock);
830 	return ret;
831 }
832 
__gnix_vc_hndl_conn_req(struct gnix_cm_nic * cm_nic,char * msg_buffer,struct gnix_address src_cm_nic_addr)833 static int __gnix_vc_hndl_conn_req(struct gnix_cm_nic *cm_nic,
834 				   char *msg_buffer,
835 				   struct gnix_address src_cm_nic_addr)
836 {
837 	int ret = FI_SUCCESS;
838 	gni_return_t __attribute__((unused)) status;
839 	struct gnix_fid_ep *ep = NULL;
840 	gnix_ht_key_t key;
841 	struct gnix_av_addr_entry entry;
842 	struct gnix_address src_addr, target_addr;
843 	struct gnix_vc *vc = NULL;
844 	struct gnix_work_req *work_req;
845 	int src_vc_id;
846 	gni_smsg_attr_t src_smsg_attr;
847 	uint64_t src_vc_ptr;
848 	uint64_t peer_caps;
849 	struct wq_hndl_conn_req *data = NULL;
850 	gni_mem_handle_t tmp_mem_hndl;
851 	int src_mapped = 0;
852 	fi_addr_t fi_addr;
853 	xpmem_segid_t peer_segid;
854 	xpmem_apid_t peer_apid;
855 	uint8_t name_type, rx_ctx_cnt;
856 	bool accessible;
857 	ssize_t __attribute__((unused)) len;
858 	struct gnix_ep_name *error_data;
859 	uint32_t key_offset;
860 
861 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
862 
863 	/*
864 	 * unpack the message
865 	 */
866 
867 	__gnix_vc_unpack_conn_req(msg_buffer,
868 				  &target_addr,
869 				  &src_addr,
870 				  &src_vc_id,
871 				  &src_vc_ptr,
872 				  &src_smsg_attr,
873 				  &tmp_mem_hndl,
874 				  &peer_caps,
875 				  &peer_segid,
876 				  &name_type,
877 				  &rx_ctx_cnt,
878 				  &key_offset);
879 
880 	GNIX_DEBUG(FI_LOG_EP_CTRL,
881 		"conn req rx: (From Aries addr 0x%x Id %d to Aries 0x%x Id %d src vc 0x%lx )\n",
882 		 src_addr.device_addr,
883 		 src_addr.cdm_id,
884 		 target_addr.device_addr,
885 		 target_addr.cdm_id,
886 		 src_vc_ptr);
887 
888 	/*
889 	 * lookup the ep from the addr_to_ep_ht using the target_addr
890 	 * in the datagram
891 	 */
892 
893 	__gnix_vc_set_ht_key(&target_addr, &key);
894 
895 	ep = (struct gnix_fid_ep *)_gnix_ht_lookup(cm_nic->addr_to_ep_ht,
896 						   key);
897 	if (ep == NULL) {
898 		GNIX_WARN(FI_LOG_EP_DATA,
899 			  "_gnix_ht_lookup addr_to_ep failed\n");
900 		return -FI_ENOENT;
901 	}
902 
903 	/*
904 	 * look to see if there is a VC already for the
905 	 * address of the connecting EP.
906 	 */
907 
908 	COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
909 
910 	/* If we already have an AV bound, see if sender's address is already
911 	 * mapped. */
912 	if (ep->av) {
913 		ret = _gnix_av_reverse_lookup(ep->av, src_addr, &fi_addr);
914 		if (ret == FI_SUCCESS) {
915 			src_mapped = 1;
916 			vc = _gnix_ep_vc_lookup(ep, fi_addr);
917 		}
918 	}
919 
920 	/*
921 	 * if there is no corresponding vc in the hash,
922 	 * or there is an entry and it's not in connecting state
923 	 * go down the conn req ack route.
924 	 */
925 	if ((vc == NULL)  ||
926 	    (vc->conn_state == GNIX_VC_CONN_NONE)) {
927 		if (vc == NULL) {
928 			entry.gnix_addr = src_addr;
929 			entry.cm_nic_cdm_id = src_cm_nic_addr.cdm_id;
930 			ret = _gnix_vc_alloc(ep,
931 					     &entry,
932 					     &vc);
933 			if (ret != FI_SUCCESS) {
934 				GNIX_WARN(FI_LOG_EP_CTRL,
935 					  "_gnix_vc_alloc returned %s\n",
936 					  fi_strerror(-ret));
937 				goto err;
938 			}
939 
940 			vc->conn_state = GNIX_VC_CONNECTING;
941 			vc->peer_key_offset = key_offset;
942 
943 			if (src_mapped) {
944 				/* We have an AV which maps the incoming
945 				 * address.  Store the new VC in our VC lookup
946 				 * table. */
947 				ret = _gnix_ep_vc_store(ep, vc, fi_addr);
948 				if (OFI_UNLIKELY(ret != FI_SUCCESS)) {
949 					_gnix_vc_destroy(vc);
950 					GNIX_WARN(FI_LOG_EP_DATA,
951 						  "_gnix_ep_vc_store returned %s\n",
952 						  fi_strerror(-ret));
953 					goto err;
954 				}
955 			} else {
956 				/* We lack an AV and/or the entry to map the
957 				 * incoming address.  Keep VC in special table
958 				 * until it is mapped for a TX operation. */
959 				GNIX_INFO(FI_LOG_EP_CTRL,
960 					  "Received conn. request from unmapped peer EP, vc: %p addr: 0x%lx\n",
961 					  vc, src_addr);
962 
963 				dlist_insert_tail(&vc->list, &ep->unmapped_vcs);
964 
965 				/*
966 				 * see issue 4521 for the error_data size allocated
967 				 */
968 				if (vc->ep->caps & FI_SOURCE) {
969 					error_data =
970 						calloc(1, GNIX_CQ_MAX_ERR_DATA_SIZE);
971 					if (error_data == NULL) {
972 						ret = -FI_ENOMEM;
973 						goto err;
974 					}
975 					vc->gnix_ep_name = (void *) error_data;
976 
977 					error_data->gnix_addr = src_addr;
978 					error_data->name_type = name_type;
979 
980 					error_data->cm_nic_cdm_id =
981 						cm_nic->my_name.cm_nic_cdm_id;
982 					error_data->cookie =
983 						cm_nic->my_name.cookie;
984 
985 					error_data->rx_ctx_cnt = rx_ctx_cnt;
986 				}
987 			}
988 		} else {
989 			vc->conn_state = GNIX_VC_CONNECTING;
990 		}
991 
992 		vc->peer_caps = peer_caps;
993 		vc->peer_key_offset = key_offset;
994 		/*
995 		 * prepare a work request to
996 		 * initiate an request response
997 		 */
998 
999 		work_req = calloc(1, sizeof(*work_req));
1000 		if (work_req == NULL) {
1001 			ret = -FI_ENOMEM;
1002 			goto err;
1003 		}
1004 
1005 		data = calloc(1, sizeof(struct wq_hndl_conn_req));
1006 		if (data == NULL) {
1007 			ret = -FI_ENOMEM;
1008 			goto err;
1009 		}
1010 		memcpy(&data->src_smsg_attr,
1011 		       &src_smsg_attr,
1012 		       sizeof(src_smsg_attr));
1013 		data->vc = vc;
1014 		data->src_vc_id = src_vc_id;
1015 		data->src_vc_ptr = src_vc_ptr;
1016 		data->irq_mem_hndl = tmp_mem_hndl;
1017 		data->peer_segid = peer_segid;
1018 
1019 		work_req->progress_fn = __gnix_vc_conn_ack_prog_fn;
1020 		work_req->data = data;
1021 		work_req->completer_fn = __gnix_vc_conn_ack_comp_fn;
1022 		work_req->completer_data = data;
1023 
1024 		/*
1025 		 * add the work request to the tail of the
1026 		 * cm_nic's work queue, progress the cm_nic.
1027 		 */
1028 
1029 		fastlock_acquire(&cm_nic->wq_lock);
1030 		dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq);
1031 		fastlock_release(&cm_nic->wq_lock);
1032 	} else {
1033 
1034 		/*
1035 		 * we can only be in connecting state if we
1036 		 * reach here.  We have all the informatinon,
1037 		 * and the other side will get the information
1038 		 * at some point, so go ahead and build SMSG connection.
1039 		 */
1040 		if (vc->conn_state != GNIX_VC_CONNECTING) {
1041 			GNIX_WARN(FI_LOG_EP_CTRL,
1042 				 "vc %p not in connecting state nor in cm wq\n",
1043 				  vc, vc->conn_state);
1044 			ret = -FI_EINVAL;
1045 			goto err;
1046 		}
1047 
1048 		ret = _gnix_vc_smsg_init(vc, src_vc_id,
1049 					 &src_smsg_attr,
1050 					 &tmp_mem_hndl);
1051 		if (ret != FI_SUCCESS) {
1052 			GNIX_WARN(FI_LOG_EP_CTRL,
1053 				  "_gnix_vc_smsg_init returned %s\n",
1054 				  fi_strerror(-ret));
1055 			goto err;
1056 		}
1057 
1058 		ret = _gnix_xpmem_accessible(ep, src_cm_nic_addr, &accessible);
1059 		if ((ret == FI_SUCCESS) && (accessible == true)) {
1060 			ret = _gnix_xpmem_get_apid(ep->xpmem_hndl,
1061 						   peer_segid,
1062 						   &peer_apid);
1063 			if (ret == FI_SUCCESS) {
1064 				vc->modes |= GNIX_VC_MODE_XPMEM;
1065 				vc->peer_apid = peer_apid;
1066 			}
1067 		}
1068 
1069 		vc->peer_caps = peer_caps;
1070 		vc->peer_key_offset = key_offset;
1071 		vc->peer_id = src_vc_id;
1072 		vc->conn_state = GNIX_VC_CONNECTED;
1073 		GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n",
1074 			vc);
1075 
1076 		ret = _gnix_vc_sched_new_conn(vc);
1077 		if (ret != FI_SUCCESS)
1078 			GNIX_WARN(FI_LOG_EP_DATA,
1079 				  "_gnix_vc_sched_new_conn returned %s\n",
1080 				  fi_strerror(-ret));
1081 	}
1082 
1083 err:
1084 	COND_RELEASE(ep->requires_lock, &ep->vc_lock);
1085 
1086 	return ret;
1087 }
1088 
1089 /*
1090  * callback function to process incoming messages
1091  */
__gnix_vc_recv_fn(struct gnix_cm_nic * cm_nic,char * msg_buffer,struct gnix_address src_cm_nic_addr)1092 static int __gnix_vc_recv_fn(struct gnix_cm_nic *cm_nic,
1093 		      char *msg_buffer,
1094 		      struct gnix_address src_cm_nic_addr)
1095 {
1096 	int ret = FI_SUCCESS;
1097 	uint8_t mtype;
1098 
1099 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1100 
1101 	__gnix_vc_get_msg_type(msg_buffer, &mtype);
1102 
1103 	GNIX_DEBUG(FI_LOG_EP_CTRL, "got a message of type %d\n", mtype);
1104 
1105 	switch (mtype) {
1106 	case GNIX_VC_CONN_REQ:
1107 		ret = __gnix_vc_hndl_conn_req(cm_nic,
1108 					      msg_buffer,
1109 					      src_cm_nic_addr);
1110 		break;
1111 	case GNIX_VC_CONN_RESP:
1112 		ret = __gnix_vc_hndl_conn_resp(cm_nic,
1113 					       msg_buffer,
1114 					       src_cm_nic_addr);
1115 		break;
1116 	default:
1117 		GNIX_FATAL(FI_LOG_EP_CTRL, "Invalid message type: %d\n",
1118 			   mtype);
1119 	}
1120 
1121 	return ret;
1122 }
1123 
1124 /*
1125  * progress function for progressing a connection
1126  * ACK.
1127  */
1128 
__gnix_vc_conn_ack_prog_fn(void * data,int * complete_ptr)1129 static int __gnix_vc_conn_ack_prog_fn(void *data, int *complete_ptr)
1130 {
1131 	int ret = FI_SUCCESS;
1132 	int complete = 0;
1133 	struct wq_hndl_conn_req *work_req_data;
1134 	struct gnix_vc *vc;
1135 	struct gnix_mbox *mbox = NULL;
1136 	gni_smsg_attr_t smsg_mbox_attr;
1137 	struct gnix_fid_ep *ep = NULL;
1138 	struct gnix_fid_domain *dom = NULL;
1139 	struct gnix_cm_nic *cm_nic = NULL;
1140 	xpmem_segid_t my_segid;
1141 	char sbuf[GNIX_CM_NIC_MAX_MSG_SIZE] = {0};
1142 	xpmem_apid_t peer_apid;
1143 	bool accessible;
1144 
1145 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1146 
1147 
1148 	work_req_data = (struct wq_hndl_conn_req *)data;
1149 
1150 	vc = work_req_data->vc;
1151 	if (vc == NULL)
1152 		return -FI_EINVAL;
1153 
1154 	ep = vc->ep;
1155 	if (ep == NULL)
1156 		return -FI_EINVAL;
1157 
1158 	dom = ep->domain;
1159 	if (dom == NULL)
1160 		return -FI_EINVAL;
1161 
1162 	cm_nic = ep->cm_nic;
1163 	if (cm_nic == NULL)
1164 		return -FI_EINVAL;
1165 
1166 	COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
1167 
1168 	/*
1169 	 * we may have already been moved to connected or
1170 	 * the datagram from an earlier conn request for this
1171 	 * vc was posted to GNI datagram state machine.  The
1172 	 * connection will be completed in the __gnix_vc_hndl_conn_resp
1173 	 * datagram callback in the latter case.
1174 	 */
1175 	if ((vc->conn_state == GNIX_VC_CONNECTED) ||
1176 		(vc->modes & GNIX_VC_MODE_DG_POSTED)) {
1177 		complete = 1;
1178 		goto exit;
1179 	}
1180 
1181 	/*
1182 	 * first see if we still need a mailbox
1183 	 */
1184 
1185 	if (vc->smsg_mbox == NULL) {
1186 		ret = _gnix_mbox_alloc(ep->nic->mbox_hndl,
1187 				       &mbox);
1188 		if (ret == FI_SUCCESS)
1189 			vc->smsg_mbox = mbox;
1190 		else
1191 			goto exit;
1192 	}
1193 
1194 	mbox = vc->smsg_mbox;
1195 
1196 	/*
1197 	 * prep the smsg_mbox_attr
1198 	 */
1199 
1200 	smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
1201 	smsg_mbox_attr.msg_buffer = mbox->base;
1202 	smsg_mbox_attr.buff_size =  ep->nic->mem_per_mbox;
1203 	smsg_mbox_attr.mem_hndl = *mbox->memory_handle;
1204 	smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
1205 	smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
1206 	smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize;
1207 
1208 	/*
1209 	 * serialize the resp message in the buffer
1210 	 */
1211 
1212 	ret = _gnix_xpmem_get_my_segid(ep->xpmem_hndl,
1213 				       &my_segid);
1214 	if (ret != FI_SUCCESS) {
1215 		GNIX_WARN(FI_LOG_EP_CTRL, "_gni_xpmem_get_my_segid returned %s\n",
1216 			  fi_strerror(-ret));
1217 	}
1218 
1219 	__gnix_vc_pack_conn_resp(sbuf,
1220 				 work_req_data->src_vc_ptr,
1221 				 (uint64_t)vc,
1222 				 vc->vc_id,
1223 				 &smsg_mbox_attr,
1224 				 &ep->nic->irq_mem_hndl,
1225 				 ep->caps,
1226 				 my_segid,
1227 				 ep->auth_key->key_offset);
1228 
1229 	/*
1230 	 * try to send the message, if it succeeds,
1231 	 * initialize mailbox and move vc to connected
1232 	 * state.
1233 	 */
1234 
1235 	ret = _gnix_cm_nic_send(cm_nic,
1236 				sbuf,
1237 				GNIX_CM_NIC_MAX_MSG_SIZE,
1238 				vc->peer_cm_nic_addr);
1239 	if (ret == FI_SUCCESS) {
1240 		ret = _gnix_vc_smsg_init(vc,
1241 					 work_req_data->src_vc_id,
1242 					 &work_req_data->src_smsg_attr,
1243 					 &work_req_data->irq_mem_hndl);
1244 		if (ret != FI_SUCCESS) {
1245 			GNIX_WARN(FI_LOG_EP_CTRL,
1246 				  "_gnix_vc_smsg_init returned %s\n",
1247 				  fi_strerror(-ret));
1248 			goto exit;
1249 		}
1250 
1251 		/*
1252 		 * TODO: xpmem setup here
1253 		 */
1254 
1255 		ret = _gnix_xpmem_accessible(ep, vc->peer_cm_nic_addr,
1256 					     &accessible);
1257 		if ((ret == FI_SUCCESS) && (accessible == true)) {
1258 			ret = _gnix_xpmem_get_apid(ep->xpmem_hndl,
1259 						   work_req_data->peer_segid,
1260 						   &peer_apid);
1261 			if (ret == FI_SUCCESS) {
1262 				vc->modes |= GNIX_VC_MODE_XPMEM;
1263 				vc->peer_apid = peer_apid;
1264 			}
1265 		}
1266 
1267 		complete = 1;
1268 		vc->conn_state = GNIX_VC_CONNECTED;
1269 		vc->peer_id = work_req_data->src_vc_id;
1270 		GNIX_DEBUG(FI_LOG_EP_CTRL,
1271 			   "moving vc %p to connected\n",vc);
1272 		vc->modes |= GNIX_VC_MODE_DG_POSTED;
1273 
1274 		ret = _gnix_vc_sched_new_conn(vc);
1275 		if (ret != FI_SUCCESS)
1276 			GNIX_WARN(FI_LOG_EP_DATA,
1277 				  "_gnix_vc_sched_new_conn returned %s\n",
1278 				  fi_strerror(-ret));
1279 	} else if (ret == -FI_EAGAIN) {
1280 		ret = FI_SUCCESS;
1281 	} else {
1282 		GNIX_FATAL(FI_LOG_EP_CTRL, "_gnix_cm_nic_send returned %s\n",
1283 			   fi_strerror(-ret));
1284 	}
1285 
1286 exit:
1287 	COND_RELEASE(ep->requires_lock, &ep->vc_lock);
1288 
1289 	*complete_ptr = complete;
1290 	return ret;
1291 }
1292 
__gnix_vc_conn_req_prog_fn(void * data,int * complete_ptr)1293 static int __gnix_vc_conn_req_prog_fn(void *data, int *complete_ptr)
1294 {
1295 	int ret = FI_SUCCESS;
1296 	int complete = 0;
1297 	struct gnix_vc *vc = (struct gnix_vc *)data;
1298 	struct gnix_mbox *mbox = NULL;
1299 	gni_smsg_attr_t smsg_mbox_attr;
1300 	struct gnix_fid_ep *ep = NULL;
1301 	struct gnix_fid_domain *dom = NULL;
1302 	struct gnix_cm_nic *cm_nic = NULL;
1303 	xpmem_segid_t my_segid;
1304 	char sbuf[GNIX_CM_NIC_MAX_MSG_SIZE] = {0};
1305 	struct gnix_auth_key *auth_key;
1306 
1307 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1308 
1309 	ep = vc->ep;
1310 	if (ep == NULL)
1311 		return -FI_EINVAL;
1312 
1313 	dom = ep->domain;
1314 	if (dom == NULL)
1315 		return -FI_EINVAL;
1316 
1317 	cm_nic = ep->cm_nic;
1318 	if (cm_nic == NULL)
1319 		return -FI_EINVAL;
1320 
1321 	auth_key = ep->auth_key;
1322 	if (auth_key == NULL)
1323 		return -FI_EINVAL;
1324 
1325 	assert(auth_key->enabled);
1326 
1327 	COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);
1328 
1329 	if ((vc->conn_state == GNIX_VC_CONNECTING) ||
1330 		(vc->conn_state == GNIX_VC_CONNECTED)) {
1331 			complete = 1;
1332 			goto err;
1333 	}
1334 
1335 	/*
1336 	 * first see if we still need a mailbox
1337 	 */
1338 
1339 	if (vc->smsg_mbox == NULL) {
1340 		ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl,
1341 				       &mbox);
1342 		if (ret == FI_SUCCESS)
1343 			vc->smsg_mbox = mbox;
1344 		else
1345 			goto err;
1346 	}
1347 
1348 	mbox = vc->smsg_mbox;
1349 
1350 	/*
1351 	 * prep the smsg_mbox_attr
1352 	 */
1353 
1354 	smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
1355 	smsg_mbox_attr.msg_buffer = mbox->base;
1356 	smsg_mbox_attr.buff_size =  vc->ep->nic->mem_per_mbox;
1357 	smsg_mbox_attr.mem_hndl = *mbox->memory_handle;
1358 	smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
1359 	smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
1360 	smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize;
1361 
1362 	/*
1363 	 * serialize the message in the buffer
1364 	 */
1365 
1366 	GNIX_DEBUG(FI_LOG_EP_CTRL,
1367 		"conn req tx: (From Aries addr 0x%x Id %d to Aries 0x%x Id %d CM NIC Id %d vc %p)\n",
1368 		 ep->src_addr.gnix_addr.device_addr,
1369 		 ep->src_addr.gnix_addr.cdm_id,
1370 		 vc->peer_addr.device_addr,
1371 		 vc->peer_addr.cdm_id,
1372 		 vc->peer_cm_nic_addr.cdm_id,
1373 		 vc);
1374 
1375         ret = _gnix_xpmem_get_my_segid(ep->xpmem_hndl,
1376 				       &my_segid);
1377 	if (ret != FI_SUCCESS) {
1378 		GNIX_WARN(FI_LOG_EP_CTRL,
1379 			"_gnix_xpmem_get_my_segid returned %s\n",
1380 			fi_strerror(-ret));
1381 	}
1382 
1383 	__gnix_vc_pack_conn_req(sbuf,
1384 				&vc->peer_addr,
1385 				&ep->src_addr.gnix_addr,
1386 				vc->vc_id,
1387 				(uint64_t)vc,
1388 				&smsg_mbox_attr,
1389 				&ep->nic->irq_mem_hndl,
1390 				ep->caps,
1391 				my_segid,
1392 				ep->src_addr.name_type,
1393 				ep->src_addr.rx_ctx_cnt,
1394 				auth_key->key_offset);
1395 
1396 	/*
1397 	 * try to send the message, if -FI_EAGAIN is returned, okay,
1398 	 * just don't mark complete.
1399 	 */
1400 
1401 	ret = _gnix_cm_nic_send(cm_nic,
1402 				sbuf,
1403 				GNIX_CM_NIC_MAX_MSG_SIZE,
1404 				vc->peer_cm_nic_addr);
1405 	if (ret == FI_SUCCESS) {
1406 		complete = 1;
1407 		vc->conn_state = GNIX_VC_CONNECTING;
1408 		GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connecting\n",
1409 			vc);
1410 		vc->modes |= GNIX_VC_MODE_DG_POSTED;
1411 	} else if (ret == -FI_EAGAIN) {
1412 		ret = FI_SUCCESS;
1413 	} else {
1414 		GNIX_FATAL(FI_LOG_EP_CTRL, "_gnix_cm_nic_send returned %s\n",
1415 			   fi_strerror(-ret));
1416 	}
1417 
1418 err:
1419 	COND_RELEASE(ep->requires_lock, &ep->vc_lock);
1420 	*complete_ptr = complete;
1421 	return ret;
1422 }
1423 
1424 /*
1425  * conn ack completer function for work queue element,
1426  * free the previously allocated wq_hndl_conn_req
1427  * data struct
1428  */
__gnix_vc_conn_ack_comp_fn(void * data)1429 static int __gnix_vc_conn_ack_comp_fn(void *data)
1430 {
1431 	free(data);
1432 	return FI_SUCCESS;
1433 }
1434 
1435 /*
1436  * connect completer function for work queue element,
1437  * sort of a NO-OP for now.
1438  */
__gnix_vc_conn_req_comp_fn(void * data)1439 static int __gnix_vc_conn_req_comp_fn(void *data)
1440 {
1441 	return FI_SUCCESS;
1442 }
1443 
1444 /*******************************************************************************
1445  * Internal API functions
1446  ******************************************************************************/
_gnix_vc_alloc(struct gnix_fid_ep * ep_priv,struct gnix_av_addr_entry * entry,struct gnix_vc ** vc)1447 int _gnix_vc_alloc(struct gnix_fid_ep *ep_priv,
1448 		   struct gnix_av_addr_entry *entry, struct gnix_vc **vc)
1449 
1450 {
1451 	int ret = FI_SUCCESS;
1452 	int remote_id;
1453 	struct gnix_vc *vc_ptr = NULL;
1454 	struct gnix_nic *nic = NULL;
1455 	struct dlist_entry *de = NULL;
1456 
1457 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1458 
1459 	nic = ep_priv->nic;
1460 	if (nic == NULL)
1461 		return -FI_EINVAL;
1462 
1463 	/*
1464 	 * allocate VC from domain's vc_freelist
1465 	 */
1466 
1467 	ret = _gnix_fl_alloc(&de, &nic->vc_freelist);
1468 	while (ret == -FI_EAGAIN)
1469 		ret = _gnix_fl_alloc(&de, &nic->vc_freelist);
1470 	if (ret == FI_SUCCESS) {
1471 		vc_ptr = container_of(de, struct gnix_vc, fr_list);
1472 	} else
1473 		return ret;
1474 
1475 	vc_ptr->conn_state = GNIX_VC_CONN_NONE;
1476 	if (entry) {
1477 		memcpy(&vc_ptr->peer_addr,
1478 			&entry->gnix_addr,
1479 			sizeof(struct gnix_address));
1480 		vc_ptr->peer_cm_nic_addr.device_addr =
1481 			entry->gnix_addr.device_addr;
1482 		vc_ptr->peer_cm_nic_addr.cdm_id =
1483 			entry->cm_nic_cdm_id;
1484 	} else {
1485 		vc_ptr->peer_addr.device_addr = -1;
1486 		vc_ptr->peer_addr.cdm_id = -1;
1487 		vc_ptr->peer_cm_nic_addr.device_addr = -1;
1488 		vc_ptr->peer_cm_nic_addr.cdm_id = -1;
1489 	}
1490 	vc_ptr->ep = ep_priv;
1491 
1492 	dlist_init(&vc_ptr->prog_list);
1493 	dlist_init(&vc_ptr->work_queue);
1494 	dlist_init(&vc_ptr->tx_queue);
1495 
1496 	vc_ptr->peer_fi_addr = FI_ADDR_NOTAVAIL;
1497 
1498 	dlist_init(&vc_ptr->list);
1499 
1500 	ofi_atomic_initialize32(&vc_ptr->outstanding_tx_reqs, 0);
1501 	ret = _gnix_alloc_bitmap(&vc_ptr->flags, 1, NULL);
1502 	assert(!ret);
1503 
1504 	/*
1505 	 * we need an id for the vc to allow for quick lookup
1506 	 * based on GNI_CQ_GET_INST_ID
1507 	 */
1508 
1509 	ret = _gnix_nic_get_rem_id(nic, &remote_id, vc_ptr);
1510 	if (ret != FI_SUCCESS)
1511 		goto err;
1512 	vc_ptr->vc_id = remote_id;
1513 	vc_ptr->gnix_ep_name = NULL;
1514 
1515 	*vc = vc_ptr;
1516 
1517 	return ret;
1518 
1519 err:
1520 	if (vc_ptr)
1521 		free(vc_ptr);
1522 	return ret;
1523 }
1524 
__gnix_vc_cancel(struct gnix_vc * vc)1525 static void __gnix_vc_cancel(struct gnix_vc *vc)
1526 {
1527 	struct gnix_nic *nic = vc->ep->nic;
1528 
1529 	COND_ACQUIRE(nic->requires_lock, &nic->prog_vcs_lock);
1530 	if (!dlist_empty(&vc->prog_list))
1531 		dlist_remove_init(&vc->prog_list);
1532 	COND_RELEASE(nic->requires_lock, &nic->prog_vcs_lock);
1533 }
1534 
1535 /* Destroy an unconnected VC.  More Support is needed to shutdown and destroy
1536  * an active VC. */
_gnix_vc_destroy(struct gnix_vc * vc)1537 int _gnix_vc_destroy(struct gnix_vc *vc)
1538 {
1539 	int ret = FI_SUCCESS;
1540 	struct gnix_nic *nic = NULL;
1541 	gni_return_t status = GNI_RC_NOT_DONE;
1542 
1543 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1544 
1545 	if (vc->ep == NULL) {
1546 		GNIX_WARN(FI_LOG_EP_CTRL, "ep null\n");
1547 		return -FI_EINVAL;
1548 	}
1549 
1550 	nic = vc->ep->nic;
1551 	if (nic == NULL) {
1552 		GNIX_WARN(FI_LOG_EP_CTRL, "ep nic null for vc %p\n", vc);
1553 		return -FI_EINVAL;
1554 	}
1555 
1556 	/*
1557 	 * move vc state to terminating
1558 	 */
1559 
1560 	vc->conn_state = GNIX_VC_CONN_TERMINATING;
1561 
1562 	/*
1563 	 * try to unbind the gni_ep if non-NULL.
1564 	 * If there are SMSG or PostFMA/RDMA outstanding
1565 	 * wait here for them to complete
1566 	 */
1567 
1568 	if (vc->gni_ep != NULL) {
1569 		while (status == GNI_RC_NOT_DONE) {
1570 
1571 			COND_ACQUIRE(nic->requires_lock, &nic->lock);
1572 			status = GNI_EpUnbind(vc->gni_ep);
1573 			COND_RELEASE(nic->requires_lock, &nic->lock);
1574 
1575 			if ((status != GNI_RC_NOT_DONE) &&
1576 				(status != GNI_RC_SUCCESS)) {
1577 				GNIX_WARN(FI_LOG_EP_CTRL,
1578 					"GNI_EpUnBind returned %s\n",
1579 					  gni_err_str[status]);
1580 				break;
1581 			}
1582 
1583 			if (status == GNI_RC_NOT_DONE)
1584 				_gnix_nic_progress(nic);
1585 		}
1586 		COND_ACQUIRE(nic->requires_lock, &nic->lock);
1587 		status = GNI_EpDestroy(vc->gni_ep);
1588 		COND_RELEASE(nic->requires_lock, &nic->lock);
1589 		if (status != GNI_RC_SUCCESS)
1590 			GNIX_WARN(FI_LOG_EP_CTRL,
1591 				"GNI_EpDestroy returned %s\n",
1592 				  gni_err_str[status]);
1593 	}
1594 
1595 	/*
1596 	 * if the vc is in a nic's work queue, remove it
1597 	 */
1598 	__gnix_vc_cancel(vc);
1599 
1600 	/*
1601 	 * We may eventually want to check the state of the VC, if we
1602 	 * implement true VC shutdown.
1603 
1604 	if ((vc->conn_state != GNIX_VC_CONN_NONE)
1605 		&& (vc->conn_state != GNIX_VC_CONN_TERMINATED)) {
1606 		GNIX_WARN(FI_LOG_EP_CTRL,
1607 			      "vc conn state  %d\n",
1608 			       vc->conn_state);
1609 		GNIX_WARN(FI_LOG_EP_CTRL, "vc conn state error\n");
1610 		return -FI_EBUSY;
1611 	}
1612 	 */
1613 
1614 	/*
1615 	 * if send_q not empty, return -FI_EBUSY
1616 	 * Note for FI_EP_MSG type eps, this behavior
1617 	 * may not be correct for handling fi_shutdown.
1618 	 */
1619 
1620 	if (!dlist_empty(&vc->tx_queue))
1621 		GNIX_FATAL(FI_LOG_EP_CTRL, "VC TX queue not empty\n");
1622 
1623 	if (ofi_atomic_get32(&vc->outstanding_tx_reqs))
1624 		GNIX_FATAL(FI_LOG_EP_CTRL,
1625 			   "VC outstanding_tx_reqs out of sync: %d\n",
1626 			   ofi_atomic_get32(&vc->outstanding_tx_reqs));
1627 
1628 	if (vc->smsg_mbox != NULL) {
1629 		ret = _gnix_mbox_free(vc->smsg_mbox);
1630 		if (ret != FI_SUCCESS)
1631 			GNIX_WARN(FI_LOG_EP_CTRL,
1632 			      "_gnix_mbox_free returned %s\n",
1633 			      fi_strerror(-ret));
1634 		vc->smsg_mbox = NULL;
1635 	}
1636 
1637 	ret = _gnix_nic_free_rem_id(nic, vc->vc_id);
1638 	if (ret != FI_SUCCESS)
1639 		GNIX_WARN(FI_LOG_EP_CTRL,
1640 		      "__gnix_vc_free_id returned %s\n",
1641 		      fi_strerror(-ret));
1642 
1643 	_gnix_free_bitmap(&vc->flags);
1644 
1645 	if (vc->gnix_ep_name != NULL) {
1646 		free(vc->gnix_ep_name);
1647 		vc->gnix_ep_name = NULL;
1648 	}
1649 
1650 	/*
1651 	 * put VC back on the freelist
1652 	 */
1653 
1654 	vc->conn_state = GNIX_VC_CONN_NONE;
1655 	_gnix_fl_free(&vc->fr_list, &nic->vc_freelist);
1656 
1657 	return ret;
1658 }
1659 
_gnix_vc_connect(struct gnix_vc * vc)1660 int _gnix_vc_connect(struct gnix_vc *vc)
1661 {
1662 	int ret = FI_SUCCESS;
1663 	struct gnix_fid_ep *ep = NULL;
1664 	struct gnix_cm_nic *cm_nic = NULL;
1665 	struct gnix_work_req *work_req;
1666 
1667 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
1668 
1669 	/*
1670 	 * can happen that we are already connecting, or
1671 	 * are connected
1672 	 */
1673 
1674 	if ((vc->conn_state == GNIX_VC_CONNECTING) ||
1675 		(vc->conn_state == GNIX_VC_CONNECTED)) {
1676 		return FI_SUCCESS;
1677 	}
1678 
1679 	ep = vc->ep;
1680 	if (ep == NULL)
1681 		return -FI_EINVAL;
1682 
1683 	cm_nic = ep->cm_nic;
1684 	if (cm_nic == NULL)
1685 		return -FI_EINVAL;
1686 
1687 	/*
1688 	 * only endpoints of type FI_EP_RDM use this
1689 	 * connection method
1690 	 */
1691 	if (!GNIX_EP_RDM_DGM(ep->type))
1692 		return -FI_EINVAL;
1693 
1694 	/*
1695 	 * check if this EP is connecting to itself
1696 	 */
1697 
1698 	if (GNIX_ADDR_EQUAL(ep->src_addr.gnix_addr, vc->peer_addr)) {
1699 		return __gnix_vc_connect_to_self(vc);
1700 	}
1701 
1702 	/*
1703 	 * allocate a work request and put it
1704 	 * on the cm_nic work queue.
1705 	 */
1706 
1707 	work_req = calloc(1, sizeof(*work_req));
1708 	if (work_req == NULL)
1709 		return -FI_ENOMEM;
1710 
1711 	work_req->progress_fn = __gnix_vc_conn_req_prog_fn;
1712 	work_req->data = vc;
1713 	work_req->completer_fn = __gnix_vc_conn_req_comp_fn;
1714 	work_req->completer_data = vc;
1715 
1716 	/*
1717 	 * add the work request to the tail of the
1718 	 * cm_nic's work queue, progress the cm_nic.
1719 	 */
1720 
1721 	fastlock_acquire(&cm_nic->wq_lock);
1722 	dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq);
1723 	fastlock_release(&cm_nic->wq_lock);
1724 
1725 	return ret;
1726 }
1727 
1728 /******************************************************************************
1729  *
1730  * VC RX progress
1731  *
1732  *****************************************************************************/
1733 
1734 /* Process a VC's SMSG mailbox.
1735  *
1736  * Note: EP must be locked. */
_gnix_vc_dequeue_smsg(struct gnix_vc * vc)1737 int _gnix_vc_dequeue_smsg(struct gnix_vc *vc)
1738 {
1739 	int ret = FI_SUCCESS;
1740 	struct gnix_nic *nic;
1741 	gni_return_t status;
1742 	void *msg_ptr;
1743 	uint8_t tag;
1744 
1745 	GNIX_TRACE(FI_LOG_EP_DATA, "\n");
1746 
1747 	nic = vc->ep->nic;
1748 	assert(nic != NULL);
1749 
1750 	do {
1751 		tag = GNI_SMSG_ANY_TAG;
1752 		status = GNI_SmsgGetNextWTag(vc->gni_ep,
1753 					     &msg_ptr,
1754 					     &tag);
1755 
1756 		if (status == GNI_RC_SUCCESS) {
1757 			GNIX_DEBUG(FI_LOG_EP_DATA, "Found RX (%p)\n", vc);
1758 			ret = nic->smsg_callbacks[tag](vc, msg_ptr);
1759 			if (ret != FI_SUCCESS) {
1760 				/* Stalled, reschedule */
1761 				break;
1762 			}
1763 		} else if (status == GNI_RC_NOT_DONE) {
1764 			/* No more work. */
1765 			ret = FI_SUCCESS;
1766 			break;
1767 		} else {
1768 			GNIX_WARN(FI_LOG_EP_DATA,
1769 				"GNI_SmsgGetNextWTag returned %s\n",
1770 				gni_err_str[status]);
1771 			ret = gnixu_to_fi_errno(status);
1772 			break;
1773 		}
1774 	} while (1);
1775 
1776 	return ret;
1777 }
1778 
1779 /* Progress VC RXs.  Reschedule VC if more there is more work.
1780  *
1781  * Note: EP must be locked. */
__gnix_vc_rx_progress(struct gnix_vc * vc)1782 static int __gnix_vc_rx_progress(struct gnix_vc *vc)
1783 {
1784 	int ret;
1785 
1786 	/* Process pending RXs */
1787 	COND_ACQUIRE(vc->ep->nic->requires_lock, &vc->ep->nic->lock);
1788 	ret = _gnix_vc_dequeue_smsg(vc);
1789 	COND_RELEASE(vc->ep->nic->requires_lock, &vc->ep->nic->lock);
1790 
1791 	if (ret != FI_SUCCESS) {
1792 		/* We didn't finish processing RXs.  Low memory likely.
1793 		 * Try again later.  Return error to abort processing
1794 		 * other VCs. */
1795 		_gnix_vc_rx_schedule(vc);
1796 		return -FI_EAGAIN;
1797 	}
1798 
1799 	/* Return success to continue processing other VCs */
1800 	return FI_SUCCESS;
1801 }
1802 
1803 /******************************************************************************
1804  *
1805  * VC work progress
1806  *
1807  *****************************************************************************/
1808 
1809 /* Schedule deferred request processing.  Usually used in RX completers.
1810  *
1811  * Note: EP must be locked. */
_gnix_vc_queue_work_req(struct gnix_fab_req * req)1812 int _gnix_vc_queue_work_req(struct gnix_fab_req *req)
1813 {
1814 	struct gnix_vc *vc = req->vc;
1815 
1816 	dlist_insert_tail(&req->dlist, &vc->work_queue);
1817 	__gnix_vc_work_schedule(vc);
1818 
1819 	return FI_SUCCESS;
1820 }
1821 
1822 /* Schedule deferred request processing.  Used in TX completers where VC lock is
1823  * not yet held. */
_gnix_vc_requeue_work_req(struct gnix_fab_req * req)1824 int _gnix_vc_requeue_work_req(struct gnix_fab_req *req)
1825 {
1826 	int ret;
1827 
1828 	COND_ACQUIRE(req->gnix_ep->requires_lock, &req->gnix_ep->vc_lock);
1829 	ret = _gnix_vc_queue_work_req(req);
1830 	COND_RELEASE(req->gnix_ep->requires_lock, &req->gnix_ep->vc_lock);
1831 
1832 	return ret;
1833 }
1834 
1835 /* Process deferred request work on the VC.
1836  *
1837  * Note: EP must be locked. */
__gnix_vc_push_work_reqs(struct gnix_vc * vc)1838 static int __gnix_vc_push_work_reqs(struct gnix_vc *vc)
1839 {
1840 	int ret, fi_rc = FI_SUCCESS;
1841 	struct gnix_fab_req *req;
1842 
1843 	while (1) {
1844 		req = dlist_first_entry(&vc->work_queue,
1845 					struct gnix_fab_req,
1846 					dlist);
1847 		if (!req)
1848 			break;
1849 
1850 		dlist_remove_init(&req->dlist);
1851 
1852 		ret = req->work_fn(req);
1853 		if (ret != FI_SUCCESS) {
1854 			/* Re-schedule failed work. */
1855 			_gnix_vc_queue_work_req(req);
1856 
1857 			/* FI_ENOSPC is reserved to indicate a lack of
1858 			 * TXDs, which are shared by all VCs on the
1859 			 * NIC.  The other likely error is FI_EAGAIN
1860 			 * due to a lack of SMSG credits. */
1861 			if ((ret != -FI_ENOSPC) &&
1862 			    (ret != -FI_EAGAIN)) {
1863 				/*
1864 				 * TODO: Report error (via CQ err?)
1865 				 * Note: This error can't be reported here.
1866 				 */
1867 				GNIX_FATAL(FI_LOG_EP_DATA,
1868 					   "Failed to push request %p: %s\n",
1869 					   req, fi_strerror(-ret));
1870 			}
1871 
1872 			fi_rc = -FI_EAGAIN;
1873 			break;
1874 		} else {
1875 			GNIX_INFO(FI_LOG_EP_DATA,
1876 				  "Request processed: %p\n", req);
1877 		}
1878 	}
1879 
1880 	return fi_rc;
1881 }
1882 
1883 /******************************************************************************
1884  *
1885  * VC TX progress
1886  *
1887  *****************************************************************************/
1888 
1889 /* Attempt to initiate a TX request.  If the TX queue is blocked (due to low
1890  * resources or a FI_FENCE request), schedule the request to be sent later.
1891  *
1892  * Note: EP must be locked. */
_gnix_vc_queue_tx_req(struct gnix_fab_req * req)1893 int _gnix_vc_queue_tx_req(struct gnix_fab_req *req)
1894 {
1895 	int rc = FI_SUCCESS, queue_tx = 0;
1896 	struct gnix_vc *vc = req->vc;
1897 	struct gnix_fid_ep *ep = req->gnix_ep;
1898 	struct gnix_fab_req *more_req;
1899 	int connected;
1900 	struct slist_entry *sle;
1901 
1902 	/* Check if there is an outstanding fi_more chain to initiate */
1903 	if ((!(req->flags & FI_MORE)) && (!(slist_empty(&ep->more_write)) ||
1904 		!(slist_empty(&ep->more_read)))) {
1905 		if (!slist_empty(&ep->more_write)) {
1906 			sle = ep->more_write.head;
1907 			more_req = container_of(sle, struct gnix_fab_req,
1908 						rma.sle);
1909 			GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: got fab_request "
1910 					"from more_write. Queuing Request\n");
1911 			_gnix_vc_queue_tx_req(more_req);
1912 			slist_init(&ep->more_write);
1913 		}
1914 		if (!slist_empty(&ep->more_read)) {
1915 			sle = ep->more_read.head;
1916 			more_req = container_of(sle, struct gnix_fab_req,
1917 						rma.sle);
1918 			GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: got fab_request "
1919 					"from more_read. Queuing Request\n");
1920 			_gnix_vc_queue_tx_req(more_req);
1921 			slist_init(&ep->more_read);
1922 		}
1923 	}
1924 
1925 	if (req->flags & FI_TRIGGER) {
1926 		rc = _gnix_trigger_queue_req(req);
1927 
1928 		/* FI_SUCCESS means the request was queued to wait for the
1929 		 * trigger condition. */
1930 		if (rc == FI_SUCCESS)
1931 			return FI_SUCCESS;
1932 	}
1933 
1934 	connected = (vc->conn_state == GNIX_VC_CONNECTED);
1935 
1936 	if ((req->flags & FI_FENCE) && ofi_atomic_get32(&vc->outstanding_tx_reqs)) {
1937 		/* Fence request must be queued until all outstanding TX
1938 		 * requests are completed.  Subsequent requests will be queued
1939 		 * due to non-empty tx_queue. */
1940 		queue_tx = 1;
1941 		GNIX_DEBUG(FI_LOG_EP_DATA,
1942 			  "Queued FI_FENCE request (%p) on VC\n",
1943 			  req);
1944 	} else if (connected && dlist_empty(&vc->tx_queue)) {
1945 		ofi_atomic_inc32(&vc->outstanding_tx_reqs);
1946 
1947 		/* try to initiate request */
1948 		rc = req->work_fn(req);
1949 		if (rc == FI_SUCCESS) {
1950 			GNIX_DEBUG(FI_LOG_EP_DATA,
1951 				  "TX request processed: %p (OTX: %d)\n",
1952 				  req, ofi_atomic_get32(&vc->outstanding_tx_reqs));
1953 		} else if (rc != -FI_ECANCELED) {
1954 			ofi_atomic_dec32(&vc->outstanding_tx_reqs);
1955 			queue_tx = 1;
1956 			GNIX_DEBUG(FI_LOG_EP_DATA,
1957 				  "Queued request (%p) on full VC\n",
1958 				  req);
1959 		}
1960 	} else {
1961 		queue_tx = 1;
1962 		GNIX_DEBUG(FI_LOG_EP_DATA,
1963 			  "Queued request (%p) on busy VC\n",
1964 			  req);
1965 	}
1966 
1967 	if (OFI_UNLIKELY(queue_tx)) {
1968 		dlist_insert_tail(&req->dlist, &vc->tx_queue);
1969 		_gnix_vc_tx_schedule(vc);
1970 	}
1971 
1972 	return FI_SUCCESS;
1973 }
1974 
1975 /* Push TX requests queued on the VC.
1976  *
1977  * Note: EP must be locked. */
__gnix_vc_push_tx_reqs(struct gnix_vc * vc)1978 static int __gnix_vc_push_tx_reqs(struct gnix_vc *vc)
1979 {
1980 	int ret, fi_rc = FI_SUCCESS;
1981 	struct gnix_fab_req *req;
1982 
1983 	req = dlist_first_entry(&vc->tx_queue, struct gnix_fab_req, dlist);
1984 	while (req) {
1985 		if ((req->flags & FI_FENCE) &&
1986 		    ofi_atomic_get32(&vc->outstanding_tx_reqs)) {
1987 			GNIX_DEBUG(FI_LOG_EP_DATA,
1988 				  "TX request queue stalled on FI_FENCE request: %p (%d)\n",
1989 				  req, ofi_atomic_get32(&vc->outstanding_tx_reqs));
1990 			/* Success is returned to allow processing of more VCs.
1991 			 * This VC will be rescheduled when the fence request
1992 			 * is completed. */
1993 			break;
1994 		}
1995 
1996 		ofi_atomic_inc32(&vc->outstanding_tx_reqs);
1997 		dlist_remove_init(&req->dlist);
1998 
1999 		ret = req->work_fn(req);
2000 		if (ret == FI_SUCCESS) {
2001 			GNIX_DEBUG(FI_LOG_EP_DATA,
2002 				  "TX request processed: %p (OTX: %d)\n",
2003 				  req, ofi_atomic_get32(&vc->outstanding_tx_reqs));
2004 		} else if (ret != -FI_ECANCELED) {
2005 			/* Work failed.  Reschedule to put this VC
2006 			 * back on the end of the list and return
2007 			 * -FI_EAGAIN. */
2008 
2009 			GNIX_DEBUG(FI_LOG_EP_DATA,
2010 				  "Failed to push TX request %p: %s\n",
2011 				  req, fi_strerror(-ret));
2012 			fi_rc = -FI_EAGAIN;
2013 
2014 			/* FI_ENOSPC is reserved to indicate a lack of
2015 			 * TXDs, which are shared by all VCs on the
2016 			 * NIC.  The other likely error is FI_EAGAIN
2017 			 * due to a lack of SMSG credits. */
2018 
2019 			if ((ret != -FI_ENOSPC) && (ret != -FI_EAGAIN)) {
2020 				/* TODO report error? */
2021 				GNIX_WARN(FI_LOG_EP_DATA,
2022 					  "Failed to push TX request %p: %s\n",
2023 					  req, fi_strerror(-ret));
2024 			}
2025 
2026 			dlist_insert_head(&req->dlist, &vc->tx_queue);
2027 			ofi_atomic_dec32(&vc->outstanding_tx_reqs);
2028 
2029 			/* _gnix_vc_tx_schedule() must come after the request
2030 			 * is inserted into the VC's tx_queue. */
2031 			_gnix_vc_tx_schedule(vc);
2032 			break;
2033 
2034 		}
2035 
2036 		req = dlist_first_entry(&vc->tx_queue,
2037 					struct gnix_fab_req,
2038 					dlist);
2039 	}
2040 
2041 	return fi_rc;
2042 }
2043 
2044 /* Return next VC needing progress on the NIC. */
__gnix_nic_next_pending_vc(struct gnix_nic * nic)2045 static struct gnix_vc *__gnix_nic_next_pending_vc(struct gnix_nic *nic)
2046 {
2047 	struct gnix_vc *vc = NULL;
2048 
2049 	COND_ACQUIRE(nic->requires_lock, &nic->prog_vcs_lock);
2050 	vc = dlist_first_entry(&nic->prog_vcs, struct gnix_vc, prog_list);
2051 	if (vc)
2052 		dlist_remove_init(&vc->prog_list);
2053 	COND_RELEASE(nic->requires_lock, &nic->prog_vcs_lock);
2054 
2055 	if (vc) {
2056 		GNIX_INFO(FI_LOG_EP_CTRL, "Dequeued progress VC (%p)\n", vc);
2057 		_gnix_clear_bit(&vc->flags, GNIX_VC_FLAG_SCHEDULED);
2058 	}
2059 
2060 	return vc;
2061 }
2062 
_gnix_vc_progress(struct gnix_vc * vc)2063 int _gnix_vc_progress(struct gnix_vc *vc)
2064 {
2065 	int ret, ret_tx;
2066 
2067 	ret = __gnix_vc_rx_progress(vc);
2068 	if (ret != FI_SUCCESS)
2069 		GNIX_DEBUG(FI_LOG_EP_CTRL,
2070 			   "__gnix_vc_rx_progress failed: %d\n", ret);
2071 
2072 	ret = __gnix_vc_push_work_reqs(vc);
2073 	if (ret != FI_SUCCESS)
2074 		GNIX_DEBUG(FI_LOG_EP_CTRL,
2075 			   "__gnix_vc_push_work_reqs failed: %d\n", ret);
2076 
2077 	ret_tx = __gnix_vc_push_tx_reqs(vc);
2078 	if (ret != FI_SUCCESS)
2079 		GNIX_DEBUG(FI_LOG_EP_CTRL,
2080 			   "__gnix_vc_push_tx_reqs failed: %d\n", ret);
2081 
2082 	return ret_tx;
2083 }
2084 
2085 /* Progress all NIC VCs needing work. */
_gnix_vc_nic_progress(struct gnix_nic * nic)2086 int _gnix_vc_nic_progress(struct gnix_nic *nic)
2087 {
2088 	struct gnix_vc *vc;
2089 	int ret;
2090 
2091 	/*
2092 	 * we can't just spin and spin in this loop because
2093 	 * none of the functions invoked below end up dequeuing
2094 	 * GNI CQE's and subsequently freeing up TX descriptors.
2095 	 * So, if the tx reqs routine returns -FI_EAGAIN, break out.
2096 	 */
2097 	while ((vc = __gnix_nic_next_pending_vc(nic))) {
2098 		COND_ACQUIRE(vc->ep->requires_lock, &vc->ep->vc_lock);
2099 
2100 		if (vc->conn_state == GNIX_VC_CONNECTED) {
2101 			ret = _gnix_vc_progress(vc);
2102 		}
2103 
2104 		COND_RELEASE(vc->ep->requires_lock, &vc->ep->vc_lock);
2105 
2106 		if (ret != FI_SUCCESS)
2107 			break;
2108 	}
2109 
2110 	return FI_SUCCESS;
2111 }
2112 
2113 /* Schedule VC for progress.
2114  *
2115  * Note: EP must be locked.
2116  * TODO: Better implementation for rx/work/tx VC scheduling. */
_gnix_vc_schedule(struct gnix_vc * vc)2117 int _gnix_vc_schedule(struct gnix_vc *vc)
2118 {
2119 	struct gnix_nic *nic = vc->ep->nic;
2120 
2121 	if (!_gnix_test_and_set_bit(&vc->flags, GNIX_VC_FLAG_SCHEDULED)) {
2122 		COND_ACQUIRE(nic->requires_lock, &nic->prog_vcs_lock);
2123 		dlist_insert_tail(&vc->prog_list, &nic->prog_vcs);
2124 		COND_RELEASE(nic->requires_lock, &nic->prog_vcs_lock);
2125 		GNIX_DEBUG(FI_LOG_EP_CTRL, "Scheduled VC (%p)\n", vc);
2126 	}
2127 
2128 	return FI_SUCCESS;
2129 }
2130 
2131 /* Schedule the VC for RX progress. */
_gnix_vc_rx_schedule(struct gnix_vc * vc)2132 int _gnix_vc_rx_schedule(struct gnix_vc *vc)
2133 {
2134 	return _gnix_vc_schedule(vc);
2135 }
2136 
2137 /* Schedule the VC for work progress. */
__gnix_vc_work_schedule(struct gnix_vc * vc)2138 static int __gnix_vc_work_schedule(struct gnix_vc *vc)
2139 {
2140 	return _gnix_vc_schedule(vc);
2141 }
2142 
2143 /* Schedule the VC for TX progress. */
_gnix_vc_tx_schedule(struct gnix_vc * vc)2144 int _gnix_vc_tx_schedule(struct gnix_vc *vc)
2145 {
2146 	return _gnix_vc_schedule(vc);
2147 }
2148 
2149 /* For a newly scheduled VC.  Do any queued work now that the connection is
2150  * complete.
2151  *
2152  * Note: EP must be locked. */
_gnix_vc_sched_new_conn(struct gnix_vc * vc)2153 int _gnix_vc_sched_new_conn(struct gnix_vc *vc)
2154 {
2155 	_gnix_vc_schedule(vc);
2156 	return _gnix_vc_progress(vc);
2157 }
2158 
2159 /* Look up an EP's VC using fi_addr_t.
2160  *
2161  * Note: EP must be locked. */
_gnix_vc_ep_get_vc(struct gnix_fid_ep * ep,fi_addr_t dest_addr,struct gnix_vc ** vc_ptr)2162 int _gnix_vc_ep_get_vc(struct gnix_fid_ep *ep, fi_addr_t dest_addr,
2163 		       struct gnix_vc **vc_ptr)
2164 {
2165 	int ret;
2166 
2167 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
2168 
2169 	if (GNIX_EP_RDM_DGM(ep->type)) {
2170 		ret = __gnix_vc_get_vc_by_fi_addr(ep, dest_addr, vc_ptr);
2171 		if (OFI_UNLIKELY(ret != FI_SUCCESS)) {
2172 			GNIX_WARN(FI_LOG_EP_DATA,
2173 				  "__gnix_vc_get_vc_by_fi_addr returned %s\n",
2174 				   fi_strerror(-ret));
2175 			return ret;
2176 		}
2177 	} else if (ep->type == FI_EP_MSG) {
2178 		if (GNIX_EP_CONNECTED(ep)) {
2179 			*vc_ptr = ep->vc;
2180 		} else {
2181 			return -FI_EINVAL;
2182 		}
2183 	} else {
2184 		GNIX_WARN(FI_LOG_EP_DATA, "Invalid endpoint type: %d\n",
2185 			  ep->type);
2186 		return -FI_EINVAL;
2187 	}
2188 
2189 	return FI_SUCCESS;
2190 }
2191 
_gnix_vc_peer_fi_addr(struct gnix_vc * vc)2192 fi_addr_t _gnix_vc_peer_fi_addr(struct gnix_vc *vc)
2193 {
2194 	int rc;
2195 
2196 	/* If FI_SOURCE capability was requested, do a reverse lookup of a VC's
2197 	 * FI address once.  Skip translation on connected EPs (no AV). */
2198 	if (vc->ep->av && vc->peer_fi_addr == FI_ADDR_NOTAVAIL) {
2199 		rc = _gnix_av_reverse_lookup(vc->ep->av,
2200 					     vc->peer_addr,
2201 					     &vc->peer_fi_addr);
2202 		if (rc != FI_SUCCESS)
2203 			GNIX_WARN(FI_LOG_EP_DATA,
2204 				  "_gnix_av_reverse_lookup() failed: %d\n",
2205 				  rc);
2206 	}
2207 
2208 	return vc->peer_fi_addr;
2209 }
2210 
_gnix_vc_cm_init(struct gnix_cm_nic * cm_nic)2211 int _gnix_vc_cm_init(struct gnix_cm_nic *cm_nic)
2212 {
2213 	int ret = FI_SUCCESS;
2214 	gnix_cm_nic_rcv_cb_func *ofunc = NULL;
2215 	struct gnix_nic *nic = NULL;
2216 
2217 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
2218 
2219 	nic = cm_nic->nic;
2220 	assert(nic != NULL);
2221 
2222 	COND_ACQUIRE(nic->requires_lock, &nic->lock);
2223 	ret = _gnix_cm_nic_reg_recv_fn(cm_nic,
2224 					__gnix_vc_recv_fn,
2225 					&ofunc);
2226 	if ((ofunc != NULL) &&
2227 	    (ofunc != __gnix_vc_recv_fn)) {
2228 		GNIX_WARN(FI_LOG_EP_DATA, "callback reg failed: %s\n",
2229 			  fi_strerror(-ret));
2230 	}
2231 
2232 	COND_RELEASE(nic->requires_lock, &nic->lock);
2233 
2234 	return ret;
2235 }
2236 
2237