1 /*
2  * Copyright (c) 2015-2017 Cray Inc.  All rights reserved.
3  * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #ifndef _GNIX_NIC_H_
35 #define _GNIX_NIC_H_
36 
37 #if HAVE_CONFIG_H
38 #include <config.h>
39 #endif /* HAVE_CONFIG_H */
40 #include <ofi_list.h>
41 #include <assert.h>
42 
43 #include "gnix.h"
44 #include "gnix_bitmap.h"
45 #include "gnix_mbox_allocator.h"
46 #include "gnix_util.h"
47 
48 #define GNIX_DEF_MAX_NICS_PER_PTAG	4
49 
50 /*
51  * globals
52  */
53 
54 extern uint32_t gnix_max_nics_per_ptag;
55 extern struct dlist_entry gnix_nic_list_ptag[];
56 extern struct dlist_entry gnix_nic_list;
57 extern pthread_mutex_t gnix_nic_list_lock;
58 
59 /*
60  * allocation flags for cleaning up GNI resources
61  * when closing a gnix_nic - needed since these
62  * can be dup'd from another structure.
63  */
64 
65 #define GNIX_NIC_CDM_ALLOCD	(1ULL << 1)
66 #define GNIX_NIC_TX_CQ_ALLOCD	(1ULL << 2)
67 #define GNIX_NIC_RX_CQ_ALLOCD	(1ULL << 3)
68 
69 /*
70  * typedefs for callbacks for handling
71  * receipt of SMSG messages at the target
72  */
73 typedef int (*smsg_callback_fn_t)(void  *ptr, void *msg);
74 
75 extern smsg_callback_fn_t gnix_ep_smsg_callbacks[];
76 
77 /*
78  * typedef for completer functions invoked
79  * at initiator when local CQE (tx) is processed
80  */
81 typedef int (*smsg_completer_fn_t)(void  *desc, gni_return_t);
82 
83 /**
84  * Set of attributes that can be passed to the gnix_alloc_nic.
85  *
86  * @var gni_cdm_hndl         optional previously allocated gni_cdm_hndl to
87  *                           use for allocating GNI resources (GNI CQs) for
88  *                           this nic.
89  * @var gni_nic_hndl         optional previously allocated gni_nic_hndl to
90  *                           use for allocating GNI resources (GNI CQs) for
91  *                           this nic
92  *
93  * @var gni_cdm_modes	     The mode bits gni_cdm_hndl was created with.
94  */
95 struct gnix_nic_attr {
96 	gni_cdm_handle_t gni_cdm_hndl;
97 	uint32_t	 gni_cdm_modes;
98 	gni_nic_handle_t gni_nic_hndl;
99 	bool use_cdm_id;
100 	uint32_t cdm_id;
101 	bool must_alloc;
102 	struct gnix_auth_key *auth_key;
103 };
104 
105 /**
106  * GNIX nic struct
107  *
108  * @var gnix_nic_list        list element used for global NIC list
109  * @var ptag_nic_list        list element used for NIC linked list associated
110  *                           with a given PTAG.
111  * @var lock                 lock used for serializing access to
112  *                           gni_nic_hndl, rx_cq, and tx_cq
113  * @var gni_cdm_hndl         handle for the GNI communication domain (CDM)
114  *                           this nic is bound to.
115  * @var gni_cdm_modes	     The mode bits gni_cdm_hndl was created with.
116  * @var gni_nic_hndl         handle for the GNI nic to which this GNIX nic is bound
117  * @var rx_cq                GNI rx cq (non-blocking) bound to this nic
118  * @var rx_cq_blk            GNI rx cq (blocking) bound to this nic
119  * @var tx_cq                GNI tx cq (non-blocking) bound to this nic
120  * @var tx_cq_blk            GNI tx cq (blocking) bound to this nic
121  * @var progress_thread      thread id of progress thread for this nic
122  * @var tx_desc_active_list  linked list of active tx descriptors associated
123  *                           with this nic
124  * @var tx_desc_free_list    linked list of free tx descriptors associated
125  *                           with this nic
126  * @var tx_desc_base         base address for the block of memory from which
127  *                           tx descriptors were allocated
128  * @var prog_vcs_lock        lock for prog_vcs
129  * @var prog_vcs             list of VCs needing progress
130  * @var wq_lock              lock for serializing access to the nic's work queue
131  * @var nic_wq               head of linked list of work queue elements
132  *                           associated with this nic
133  * @var ptag                 ptag of the GNI CDM this nic is bound to
134  * @var cookie               cookie of the GNI CDM this nic is bound to
135  *                           of the VC when using GNI_CQ_GET_INST_ID to get
136  * @var device_id            device id of the GNI nic this nic is bound to (always 0,
137  *                           unless ever need to support multiple GNI nics/node)
138  * @var device_addr          address (L2) of the GNI nic to which this nic is bound
139  * @var max_tx_desc_id       max tx descriptor id for this nic
140  * @var vc_id_lock           lock for serializing access to the vc_id_table for
141  *                           this nic
142  * @var vc_id_table          base address of the nic's vc_id_table
143  * @var vc_id_table_capacity current capacity of the nic's vc_id_table
144  * @var vc_id_table_count    current location of the next unoccupied entry in the
145  *                           vc_id_table - note there may be unused entries
146  *                           below this entry.
147  * @var vc_id_bitmap         bitmap indicating which entries in the vc_id_table are
148  *                           currently in use (1 - used, 0 - unused)
149  * @var mem_per_mbox         number of bytes consumed per GNI SMSG mailbox associated
150  *                           with this nic's vd_id_table
151  * @var mbox_hndl            handle for the mailbox allocator bound to this nic
152  * @var s_rdma_buf_hndl      handle for send side rdma buffer allocator bound to this nic
153  * @var r_rdma_buf_hndl      handle for recv side rdma buffer allocator bound to this nic
154  * @var ref_cnt              ref cnt for this nid
155  * @var smsg_callbacks       pointer to table of GNI SMSG callback functions used
156  *                           by this nic for processing incoming GNI SMS
157  *                           messages
158  * @var err_txds             slist of error'd tx descriptors
159  * @var tx_cq_blk_post_cnt   count of outstanding tx desc's posted using tx_cq_blk
160  *                           GNI CQ.
161  * @var irq_mem_hndl         gni_mem_handle_t for mmap region registered with
162  *                           gni hw cq handle used for GNI_PostCqWrite
163  * @var irq_mmap_addr        base address of mmap associated with irq_dma_hndl
164  * @var irq_mmap_len         length of the mmap in bytes
165  * @var using_vmdh           denotes whether nic is associated with a domain
166  *                           that is utilizing VMDH
167  * @var mdd_resources_set    flag to indicate whether GNI_SetMDDResources has
168  *                           called yet to reserve MDD resources
169  */
170 struct gnix_nic {
171 	struct dlist_entry gnix_nic_list; /* global NIC list */
172 	struct dlist_entry ptag_nic_list; /* global PTAG NIC list */
173 	struct dlist_entry gnix_nic_prog_list; /* temporary list for nic progression */
174 	fastlock_t lock;
175 	uint32_t allocd_gni_res;
176 	gni_cdm_handle_t gni_cdm_hndl;
177 	uint32_t	 gni_cdm_modes;
178 	gni_nic_handle_t gni_nic_hndl;
179 	gni_cq_handle_t rx_cq;
180 	gni_cq_handle_t rx_cq_blk;
181 	gni_cq_handle_t tx_cq;
182 	gni_cq_handle_t tx_cq_blk;
183 	pthread_t progress_thread;
184 	fastlock_t tx_desc_lock;
185 	struct dlist_entry tx_desc_active_list;
186 	struct dlist_entry tx_desc_free_list;
187 	struct gnix_tx_descriptor *tx_desc_base;
188 	fastlock_t prog_vcs_lock;
189 	struct dlist_entry prog_vcs;
190 	/* note this free list will be initialized for thread safe */
191 	struct gnix_freelist vc_freelist;
192 	uint8_t ptag;
193 	uint32_t cookie;
194 	uint32_t device_id;
195 	uint32_t device_addr;
196 	int max_tx_desc_id;
197 	fastlock_t vc_id_lock;
198 	void **vc_id_table;
199 	int vc_id_table_capacity;
200 	int vc_id_table_count;
201 	gnix_bitmap_t vc_id_bitmap;
202 	uint32_t mem_per_mbox;
203 	struct gnix_mbox_alloc_handle *mbox_hndl;
204 	/* TODO: gnix_buddy_alloc_handle_t *alloc_handle */
205 	struct gnix_mbox_alloc_handle *s_rdma_buf_hndl;
206 	struct gnix_mbox_alloc_handle *r_rdma_buf_hndl;
207 	struct gnix_reference ref_cnt;
208 	smsg_callback_fn_t const *smsg_callbacks;
209 	struct slist err_txds;
210 	gni_mem_handle_t irq_mem_hndl;
211 	void *irq_mmap_addr;
212 	size_t irq_mmap_len;
213 	int requires_lock;
214 	int mdd_resources_set;
215 	int using_vmdh;
216 };
217 
218 
219 /**
220  * gnix_smsg_eager_hdr  - first part of an eager send SMSG message
221  *
222  * @var flags      flag bits from send side that are needed at
223  *                 rcv side (e.g. FI_REMOTE_CQ_DATA)
224  * @var imm        immediate data associated with this message
225  * @var msg_tag    libfabric tag associated with this message
226  * @var len        length in bytes of the incoming message
227  */
228 struct gnix_smsg_eager_hdr {
229 	uint64_t flags;
230 	uint64_t imm;
231 	uint64_t msg_tag;
232 	size_t len;
233 };
234 
235 /**
236  * gnix_smsg_rndzv_start_hdr  - first part of a rendezvous send start SMSG
237  *                              message
238  *
239  * @var flags      flag bits from send side that are needed at
240  *                 rcv side (e.g. FI_REMOTE_CQ_DATA)
241  * @var imm        immediate data associated with this message
242  * @var msg_tag    libfabric tag associated with this message
243  * @var mdh        MDH for the rendezvous send buffer
244  * @var addr       address of the rendezvous send buffer
245  * @var len        length in bytes of the send buffer
246  * @var req_addr   local request address
247  * @var head       unaligned data at the head of a rendezvous send
248  * @var tail       unaligned data at the tail of a rendezvous send
249  */
250 struct gnix_smsg_rndzv_start_hdr {
251 	uint64_t flags;
252 	uint64_t imm;
253 	uint64_t msg_tag;
254 	gni_mem_handle_t mdh;
255 	uint64_t addr;
256 	size_t len;
257 	uint64_t req_addr;
258 	uint32_t head;
259 	uint32_t tail;
260 };
261 
262 /**
263  * gnix_smsg_rndzv_iov_start_hdr
264  *
265  * @var flags	      the sender's flags needed on the receive side.
266  * @var imm	      the immediate data associated with this message.
267  * @var msg_tag       the tag associated with this message.
268  * @var mdh	      the memory handle associated with the iov buffer.
269  * @var iov_cnt       the length of the scatter/gather vector.
270  * @var req_addr      the sender's fabric request address.
271  * @var send_len      the cumulative size (in bytes) of the client's
272  * iov base buffers.
273  *
274  * @note the actual iov base addresses and lengths are placed in the
275  * data section of the start message.
276  */
277 struct gnix_smsg_rndzv_iov_start_hdr {
278 	uint64_t flags;
279 	uint64_t imm;
280 	uint64_t msg_tag;
281 	uint64_t req_addr;
282 	size_t   iov_cnt;
283 	uint64_t send_len;
284 };
285 
286 /**
287  * gnix_smsg_rndzv_fin_hdr  - first part of a rendezvous send fin SMSG message
288  *
289  * @var req_addr   returned local request address
290  */
291 struct gnix_smsg_rndzv_fin_hdr {
292 	uint64_t req_addr;
293 	int status;
294 };
295 
296 /**
297  * gnix_smsg_rma_data_hdr  - RMA remote data message
298  *
299  * @var flags       control flags
300  * @var user_flags  remote CQ user flags
301  * @var user_data   remote CQ user immediate data
302  */
303 struct gnix_smsg_rma_data_hdr {
304 	uint64_t flags;
305 	uint64_t user_flags;
306 	uint64_t user_data;
307 };
308 
309 /**
310  * gnix_smsg_amo_cntr_hdr  - RMA remote counter message
311  *
312  * @var user_flags  control flags
313  */
314 struct gnix_smsg_amo_cntr_hdr {
315 	uint64_t flags;
316 };
317 
318 /**
319  * gni_tx_descriptor - full tx descriptor used to to track GNI SMSG
320  *                     and Post operations
321  *
322  * @var list             list element
323  * @var gni_desc         embedded GNI post descriptor
324  * @var gnix_ct_descs    embedded GNI post descriptors for concatenated gets
325  *                       used for unaligned gets
326  * @var gni_more_ct_descs embedded GNI post descriptors for concatenated puts
327 			  or gets for FI_MORE.
328  * @var gnix_smsg_eager_hdr embedded header for SMSG eager protocol
329  * @var gnix_smsg_rndzv_start_hdr embedded header for rendezvous protocol
330  * @var gnix_smsg_rndzv_iov_start_hdr embedded header for iovec rndzv protocol
331  * @var gnix_smsg_rndzv_fin_hdr embedded header for rendezvous protocol
332  * @var gnix_smsg_rndzv_rma_data_hdr embedded header for remote notification for
333  *                       rma operations
334  * @var gnix_smsg_amo_cntr_hdr embedded header for AMO remote counter events.
335  * @var req              pointer to fab request associated with this descriptor
336  * @var completer_fn     call back to invoke when associated GNI CQE's are
337  *                       returned.
338  * @var id               the id of this descriptor - the value returned
339  *                       from GNI_CQ_MSG_ID
340  * @var err_list         Error TXD list entry
341  * @var tx_failures	 Number of times this transmission descriptor failed.
342  */
343 struct gnix_tx_descriptor {
344 	struct dlist_entry          list;
345 	union {
346 		struct {
347 			gni_post_descriptor_t        gni_desc;
348 			gni_ct_get_post_descriptor_t gni_ct_descs[2];
349 			void			     *gni_more_ct_descs;
350 		};
351 		struct gnix_smsg_eager_hdr           eager_hdr;
352 		struct gnix_smsg_rndzv_start_hdr     rndzv_start_hdr;
353 		struct gnix_smsg_rndzv_iov_start_hdr rndzv_iov_start_hdr;
354 		struct gnix_smsg_rndzv_fin_hdr       rndzv_fin_hdr;
355 		struct gnix_smsg_rma_data_hdr        rma_data_hdr;
356 		struct gnix_smsg_amo_cntr_hdr	     amo_cntr_hdr;
357 	};
358 	struct gnix_fab_req *req;
359 	int  (*completer_fn)(void *, gni_return_t);
360 	int id;
361 	struct slist_entry err_list;
362 };
363 
364 /*
365  * prototypes
366  */
367 
368 /**
369  * @brief allocate a tx descriptor to use for GNI Post, SMSG ops
370  *
371  * @param[in] nic      pointer to previously allocated gnix_nic struct
372  * @param[in] tdesc    pointer to address where allocated tx descriptor
373  *                     is to be stored
374  * @return             FI_SUCCESS on success, -FI_ENOSPC no free tx descriptors
375  */
_gnix_nic_tx_alloc(struct gnix_nic * nic,struct gnix_tx_descriptor ** desc)376 static inline int _gnix_nic_tx_alloc(struct gnix_nic *nic,
377                struct gnix_tx_descriptor **desc)
378 {
379     struct dlist_entry *entry;
380 
381     COND_ACQUIRE(nic->requires_lock, &nic->tx_desc_lock);
382     if (dlist_empty(&nic->tx_desc_free_list)) {
383         COND_RELEASE(nic->requires_lock, &nic->tx_desc_lock);
384         return -FI_ENOSPC;
385     }
386 
387     entry = nic->tx_desc_free_list.next;
388     dlist_remove_init(entry);
389     dlist_insert_head(entry, &nic->tx_desc_active_list);
390     *desc = dlist_entry(entry, struct gnix_tx_descriptor, list);
391     COND_RELEASE(nic->requires_lock, &nic->tx_desc_lock);
392 
393     return FI_SUCCESS;
394 }
395 
396 
397 /**
398  * @brief frees a previously allocated tx descriptor
399  *
400  * @param[in] nic      pointer to previously allocated gnix_nic struct used
401  *                     when allocating the tx descriptor to be freed
402  * @param[in] tdesc    pointer to previously allocated tx descriptor
403  * @return             FI_SUCCESS on success
404  */
_gnix_nic_tx_free(struct gnix_nic * nic,struct gnix_tx_descriptor * desc)405 static inline int _gnix_nic_tx_free(struct gnix_nic *nic,
406                                 struct gnix_tx_descriptor *desc)
407 {
408     COND_ACQUIRE(nic->requires_lock, &nic->tx_desc_lock);
409     dlist_remove_init(&desc->list);
410     dlist_insert_head(&desc->list, &nic->tx_desc_free_list);
411     COND_RELEASE(nic->requires_lock, &nic->tx_desc_lock);
412 
413     return FI_SUCCESS;
414 }
415 
416 
417 /**
418  * @brief allocate a gnix_nic struct
419  *
420  * @param[in] domain   pointer to previously allocated gnix_fid_domain struct
421  * @param[in] attrs    optional pointer to an attributes argument.  NULL
422  *                     can be supplied if no attributes are required
423  * @param[out] nic_ptr pointer to address where address of allocated nic is
424  *                     to be returned
425  * @return             FI_SUCCESS on success, -FI_ENOMEM if insufficient memory
426  *                     to allocate nic struct, -FI_EINVAL if an invalid domain
427  *                     struct was supplied, -FI_EBUSY if insufficient resources
428  *                     are available to allocate the nic struct, -FI_EACCESS
429  *                     if a permissions access error occurred while allocating
430  *                     the nic struct, -FI_EAGAIN if an invalid state
431  *                     prevents creation or an interrupt was received while
432  *                     allocating kernel related resources for the nic.
433  */
434 int gnix_nic_alloc(struct gnix_fid_domain *domain,
435 		   struct gnix_nic_attr *attrs,
436 		   struct gnix_nic **nic_ptr);
437 
438 /**
439  * @brief frees a previously allocated gnix_nic struct
440  *
441  * @param[in] nic      pointer to previously allocated gnix_nic struct
442  * @return             FI_SUCCESS on success, -FI_ENOSPC no free tx descriptors
443  */
444 int _gnix_nic_free(struct gnix_nic *nic);
445 
446 /**
447  * @brief progresses control/data operations associated with the nic
448  *
449  * @param[in] arg      pointer to previously allocated gnix_nic struct
450  * @return             FI_SUCCESS on success, -FI_EINVAL if an invalid
451  *                     nic struct was supplied. TODO: a lot more error
452  *                     values can be returned.
453  */
454 int _gnix_nic_progress(void *arg);
455 
456 /**
457  * @brief allocate a remote id for an object, used for looking up an object
458  *        in O(1) based on returned value of GNI_CQ_INST_ID applied to a GNI
459  *        CQE
460  *
461  * @param[in] nic             pointer to previously allocated gnix_nic struct
462  * @param[out] remote_id      address where allocate remote_id is returned
463  * @param[in] entry           pointer to object to be associated with the
464  *                            remote id
465 
466  * @return             FI_SUCCESS on success, -FI_ENOMEM if insufficient
467  *                     memory to allocate remote_id
468  */
469 int _gnix_nic_get_rem_id(struct gnix_nic *nic, int *remote_id, void *entry);
470 
471 /**
472  * @brief release a previously allocated remote_id
473  *
474  * @param[in] nic            pointer to previously allocated gnix_nic struct
475  * @param[in] remote_id      previously allocated remote_id
476 
477  * @return             FI_SUCCESS on success, -FI_EINVAL if an invalid
478  *                     argument was provided.
479  */
480 int _gnix_nic_free_rem_id(struct gnix_nic *nic, int remote_id);
481 
482 /**
483  * @brief Look up an element by id
484  *
485  * @param[in] nic    pointer to gni nic with which the vc is associated
486  * @param[in] rem_id rem_id of the object being looked up
487  *                   rem_id comes from GNI_CQ_GET_INST_ID on a GNI CQE
488  *
489  * @return           pointer to vc with the given vc_id
490  *
491  * This function is only here because its used for criterion tests,
492  * otherwise it would be a static function within gnix_nic.c
493  */
__gnix_nic_elem_by_rem_id(struct gnix_nic * nic,int rem_id)494 static inline void *__gnix_nic_elem_by_rem_id(struct gnix_nic *nic, int rem_id)
495 {
496 	void *elem;
497 
498 	assert(nic);
499 
500 	COND_ACQUIRE(nic->requires_lock, &nic->vc_id_lock);
501 
502 	assert(rem_id <= nic->vc_id_table_count);
503 	elem = nic->vc_id_table[rem_id];
504 
505 	COND_RELEASE(nic->requires_lock, &nic->vc_id_lock);
506 
507 	return elem;
508 }
509 
510 void _gnix_nic_txd_err_inject(struct gnix_nic *nic,
511 			      struct gnix_tx_descriptor *txd);
512 
513 /**
514  * @brief Initialize global NIC data.
515  */
516 void _gnix_nic_init(void);
517 
518 #endif /* _GNIX_NIC_H_ */
519