1 /*
2 * Copyright (c) 2015-2017 Cray Inc. All rights reserved.
3 * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #ifndef _GNIX_NIC_H_
35 #define _GNIX_NIC_H_
36
37 #if HAVE_CONFIG_H
38 #include <config.h>
39 #endif /* HAVE_CONFIG_H */
40 #include <ofi_list.h>
41 #include <assert.h>
42
43 #include "gnix.h"
44 #include "gnix_bitmap.h"
45 #include "gnix_mbox_allocator.h"
46 #include "gnix_util.h"
47
48 #define GNIX_DEF_MAX_NICS_PER_PTAG 4
49
50 /*
51 * globals
52 */
53
54 extern uint32_t gnix_max_nics_per_ptag;
55 extern struct dlist_entry gnix_nic_list_ptag[];
56 extern struct dlist_entry gnix_nic_list;
57 extern pthread_mutex_t gnix_nic_list_lock;
58
59 /*
60 * allocation flags for cleaning up GNI resources
61 * when closing a gnix_nic - needed since these
62 * can be dup'd from another structure.
63 */
64
65 #define GNIX_NIC_CDM_ALLOCD (1ULL << 1)
66 #define GNIX_NIC_TX_CQ_ALLOCD (1ULL << 2)
67 #define GNIX_NIC_RX_CQ_ALLOCD (1ULL << 3)
68
69 /*
70 * typedefs for callbacks for handling
71 * receipt of SMSG messages at the target
72 */
73 typedef int (*smsg_callback_fn_t)(void *ptr, void *msg);
74
75 extern smsg_callback_fn_t gnix_ep_smsg_callbacks[];
76
77 /*
78 * typedef for completer functions invoked
79 * at initiator when local CQE (tx) is processed
80 */
81 typedef int (*smsg_completer_fn_t)(void *desc, gni_return_t);
82
83 /**
84 * Set of attributes that can be passed to the gnix_alloc_nic.
85 *
86 * @var gni_cdm_hndl optional previously allocated gni_cdm_hndl to
87 * use for allocating GNI resources (GNI CQs) for
88 * this nic.
89 * @var gni_nic_hndl optional previously allocated gni_nic_hndl to
90 * use for allocating GNI resources (GNI CQs) for
91 * this nic
92 *
93 * @var gni_cdm_modes The mode bits gni_cdm_hndl was created with.
94 */
95 struct gnix_nic_attr {
96 gni_cdm_handle_t gni_cdm_hndl;
97 uint32_t gni_cdm_modes;
98 gni_nic_handle_t gni_nic_hndl;
99 bool use_cdm_id;
100 uint32_t cdm_id;
101 bool must_alloc;
102 struct gnix_auth_key *auth_key;
103 };
104
105 /**
106 * GNIX nic struct
107 *
108 * @var gnix_nic_list list element used for global NIC list
109 * @var ptag_nic_list list element used for NIC linked list associated
110 * with a given PTAG.
111 * @var lock lock used for serializing access to
112 * gni_nic_hndl, rx_cq, and tx_cq
113 * @var gni_cdm_hndl handle for the GNI communication domain (CDM)
114 * this nic is bound to.
115 * @var gni_cdm_modes The mode bits gni_cdm_hndl was created with.
116 * @var gni_nic_hndl handle for the GNI nic to which this GNIX nic is bound
117 * @var rx_cq GNI rx cq (non-blocking) bound to this nic
118 * @var rx_cq_blk GNI rx cq (blocking) bound to this nic
119 * @var tx_cq GNI tx cq (non-blocking) bound to this nic
120 * @var tx_cq_blk GNI tx cq (blocking) bound to this nic
121 * @var progress_thread thread id of progress thread for this nic
122 * @var tx_desc_active_list linked list of active tx descriptors associated
123 * with this nic
124 * @var tx_desc_free_list linked list of free tx descriptors associated
125 * with this nic
126 * @var tx_desc_base base address for the block of memory from which
127 * tx descriptors were allocated
128 * @var prog_vcs_lock lock for prog_vcs
129 * @var prog_vcs list of VCs needing progress
130 * @var wq_lock lock for serializing access to the nic's work queue
131 * @var nic_wq head of linked list of work queue elements
132 * associated with this nic
133 * @var ptag ptag of the GNI CDM this nic is bound to
134 * @var cookie cookie of the GNI CDM this nic is bound to
135 * of the VC when using GNI_CQ_GET_INST_ID to get
136 * @var device_id device id of the GNI nic this nic is bound to (always 0,
137 * unless ever need to support multiple GNI nics/node)
138 * @var device_addr address (L2) of the GNI nic to which this nic is bound
139 * @var max_tx_desc_id max tx descriptor id for this nic
140 * @var vc_id_lock lock for serializing access to the vc_id_table for
141 * this nic
142 * @var vc_id_table base address of the nic's vc_id_table
143 * @var vc_id_table_capacity current capacity of the nic's vc_id_table
144 * @var vc_id_table_count current location of the next unoccupied entry in the
145 * vc_id_table - note there may be unused entries
146 * below this entry.
147 * @var vc_id_bitmap bitmap indicating which entries in the vc_id_table are
148 * currently in use (1 - used, 0 - unused)
149 * @var mem_per_mbox number of bytes consumed per GNI SMSG mailbox associated
150 * with this nic's vd_id_table
151 * @var mbox_hndl handle for the mailbox allocator bound to this nic
152 * @var s_rdma_buf_hndl handle for send side rdma buffer allocator bound to this nic
153 * @var r_rdma_buf_hndl handle for recv side rdma buffer allocator bound to this nic
154 * @var ref_cnt ref cnt for this nid
155 * @var smsg_callbacks pointer to table of GNI SMSG callback functions used
156 * by this nic for processing incoming GNI SMS
157 * messages
158 * @var err_txds slist of error'd tx descriptors
159 * @var tx_cq_blk_post_cnt count of outstanding tx desc's posted using tx_cq_blk
160 * GNI CQ.
161 * @var irq_mem_hndl gni_mem_handle_t for mmap region registered with
162 * gni hw cq handle used for GNI_PostCqWrite
163 * @var irq_mmap_addr base address of mmap associated with irq_dma_hndl
164 * @var irq_mmap_len length of the mmap in bytes
165 * @var using_vmdh denotes whether nic is associated with a domain
166 * that is utilizing VMDH
167 * @var mdd_resources_set flag to indicate whether GNI_SetMDDResources has
168 * called yet to reserve MDD resources
169 */
170 struct gnix_nic {
171 struct dlist_entry gnix_nic_list; /* global NIC list */
172 struct dlist_entry ptag_nic_list; /* global PTAG NIC list */
173 struct dlist_entry gnix_nic_prog_list; /* temporary list for nic progression */
174 fastlock_t lock;
175 uint32_t allocd_gni_res;
176 gni_cdm_handle_t gni_cdm_hndl;
177 uint32_t gni_cdm_modes;
178 gni_nic_handle_t gni_nic_hndl;
179 gni_cq_handle_t rx_cq;
180 gni_cq_handle_t rx_cq_blk;
181 gni_cq_handle_t tx_cq;
182 gni_cq_handle_t tx_cq_blk;
183 pthread_t progress_thread;
184 fastlock_t tx_desc_lock;
185 struct dlist_entry tx_desc_active_list;
186 struct dlist_entry tx_desc_free_list;
187 struct gnix_tx_descriptor *tx_desc_base;
188 fastlock_t prog_vcs_lock;
189 struct dlist_entry prog_vcs;
190 /* note this free list will be initialized for thread safe */
191 struct gnix_freelist vc_freelist;
192 uint8_t ptag;
193 uint32_t cookie;
194 uint32_t device_id;
195 uint32_t device_addr;
196 int max_tx_desc_id;
197 fastlock_t vc_id_lock;
198 void **vc_id_table;
199 int vc_id_table_capacity;
200 int vc_id_table_count;
201 gnix_bitmap_t vc_id_bitmap;
202 uint32_t mem_per_mbox;
203 struct gnix_mbox_alloc_handle *mbox_hndl;
204 /* TODO: gnix_buddy_alloc_handle_t *alloc_handle */
205 struct gnix_mbox_alloc_handle *s_rdma_buf_hndl;
206 struct gnix_mbox_alloc_handle *r_rdma_buf_hndl;
207 struct gnix_reference ref_cnt;
208 smsg_callback_fn_t const *smsg_callbacks;
209 struct slist err_txds;
210 gni_mem_handle_t irq_mem_hndl;
211 void *irq_mmap_addr;
212 size_t irq_mmap_len;
213 int requires_lock;
214 int mdd_resources_set;
215 int using_vmdh;
216 };
217
218
219 /**
220 * gnix_smsg_eager_hdr - first part of an eager send SMSG message
221 *
222 * @var flags flag bits from send side that are needed at
223 * rcv side (e.g. FI_REMOTE_CQ_DATA)
224 * @var imm immediate data associated with this message
225 * @var msg_tag libfabric tag associated with this message
226 * @var len length in bytes of the incoming message
227 */
228 struct gnix_smsg_eager_hdr {
229 uint64_t flags;
230 uint64_t imm;
231 uint64_t msg_tag;
232 size_t len;
233 };
234
235 /**
236 * gnix_smsg_rndzv_start_hdr - first part of a rendezvous send start SMSG
237 * message
238 *
239 * @var flags flag bits from send side that are needed at
240 * rcv side (e.g. FI_REMOTE_CQ_DATA)
241 * @var imm immediate data associated with this message
242 * @var msg_tag libfabric tag associated with this message
243 * @var mdh MDH for the rendezvous send buffer
244 * @var addr address of the rendezvous send buffer
245 * @var len length in bytes of the send buffer
246 * @var req_addr local request address
247 * @var head unaligned data at the head of a rendezvous send
248 * @var tail unaligned data at the tail of a rendezvous send
249 */
250 struct gnix_smsg_rndzv_start_hdr {
251 uint64_t flags;
252 uint64_t imm;
253 uint64_t msg_tag;
254 gni_mem_handle_t mdh;
255 uint64_t addr;
256 size_t len;
257 uint64_t req_addr;
258 uint32_t head;
259 uint32_t tail;
260 };
261
262 /**
263 * gnix_smsg_rndzv_iov_start_hdr
264 *
265 * @var flags the sender's flags needed on the receive side.
266 * @var imm the immediate data associated with this message.
267 * @var msg_tag the tag associated with this message.
268 * @var mdh the memory handle associated with the iov buffer.
269 * @var iov_cnt the length of the scatter/gather vector.
270 * @var req_addr the sender's fabric request address.
271 * @var send_len the cumulative size (in bytes) of the client's
272 * iov base buffers.
273 *
274 * @note the actual iov base addresses and lengths are placed in the
275 * data section of the start message.
276 */
277 struct gnix_smsg_rndzv_iov_start_hdr {
278 uint64_t flags;
279 uint64_t imm;
280 uint64_t msg_tag;
281 uint64_t req_addr;
282 size_t iov_cnt;
283 uint64_t send_len;
284 };
285
286 /**
287 * gnix_smsg_rndzv_fin_hdr - first part of a rendezvous send fin SMSG message
288 *
289 * @var req_addr returned local request address
290 */
291 struct gnix_smsg_rndzv_fin_hdr {
292 uint64_t req_addr;
293 int status;
294 };
295
296 /**
297 * gnix_smsg_rma_data_hdr - RMA remote data message
298 *
299 * @var flags control flags
300 * @var user_flags remote CQ user flags
301 * @var user_data remote CQ user immediate data
302 */
303 struct gnix_smsg_rma_data_hdr {
304 uint64_t flags;
305 uint64_t user_flags;
306 uint64_t user_data;
307 };
308
309 /**
310 * gnix_smsg_amo_cntr_hdr - RMA remote counter message
311 *
312 * @var user_flags control flags
313 */
314 struct gnix_smsg_amo_cntr_hdr {
315 uint64_t flags;
316 };
317
318 /**
319 * gni_tx_descriptor - full tx descriptor used to to track GNI SMSG
320 * and Post operations
321 *
322 * @var list list element
323 * @var gni_desc embedded GNI post descriptor
324 * @var gnix_ct_descs embedded GNI post descriptors for concatenated gets
325 * used for unaligned gets
326 * @var gni_more_ct_descs embedded GNI post descriptors for concatenated puts
327 or gets for FI_MORE.
328 * @var gnix_smsg_eager_hdr embedded header for SMSG eager protocol
329 * @var gnix_smsg_rndzv_start_hdr embedded header for rendezvous protocol
330 * @var gnix_smsg_rndzv_iov_start_hdr embedded header for iovec rndzv protocol
331 * @var gnix_smsg_rndzv_fin_hdr embedded header for rendezvous protocol
332 * @var gnix_smsg_rndzv_rma_data_hdr embedded header for remote notification for
333 * rma operations
334 * @var gnix_smsg_amo_cntr_hdr embedded header for AMO remote counter events.
335 * @var req pointer to fab request associated with this descriptor
336 * @var completer_fn call back to invoke when associated GNI CQE's are
337 * returned.
338 * @var id the id of this descriptor - the value returned
339 * from GNI_CQ_MSG_ID
340 * @var err_list Error TXD list entry
341 * @var tx_failures Number of times this transmission descriptor failed.
342 */
343 struct gnix_tx_descriptor {
344 struct dlist_entry list;
345 union {
346 struct {
347 gni_post_descriptor_t gni_desc;
348 gni_ct_get_post_descriptor_t gni_ct_descs[2];
349 void *gni_more_ct_descs;
350 };
351 struct gnix_smsg_eager_hdr eager_hdr;
352 struct gnix_smsg_rndzv_start_hdr rndzv_start_hdr;
353 struct gnix_smsg_rndzv_iov_start_hdr rndzv_iov_start_hdr;
354 struct gnix_smsg_rndzv_fin_hdr rndzv_fin_hdr;
355 struct gnix_smsg_rma_data_hdr rma_data_hdr;
356 struct gnix_smsg_amo_cntr_hdr amo_cntr_hdr;
357 };
358 struct gnix_fab_req *req;
359 int (*completer_fn)(void *, gni_return_t);
360 int id;
361 struct slist_entry err_list;
362 };
363
364 /*
365 * prototypes
366 */
367
368 /**
369 * @brief allocate a tx descriptor to use for GNI Post, SMSG ops
370 *
371 * @param[in] nic pointer to previously allocated gnix_nic struct
372 * @param[in] tdesc pointer to address where allocated tx descriptor
373 * is to be stored
374 * @return FI_SUCCESS on success, -FI_ENOSPC no free tx descriptors
375 */
_gnix_nic_tx_alloc(struct gnix_nic * nic,struct gnix_tx_descriptor ** desc)376 static inline int _gnix_nic_tx_alloc(struct gnix_nic *nic,
377 struct gnix_tx_descriptor **desc)
378 {
379 struct dlist_entry *entry;
380
381 COND_ACQUIRE(nic->requires_lock, &nic->tx_desc_lock);
382 if (dlist_empty(&nic->tx_desc_free_list)) {
383 COND_RELEASE(nic->requires_lock, &nic->tx_desc_lock);
384 return -FI_ENOSPC;
385 }
386
387 entry = nic->tx_desc_free_list.next;
388 dlist_remove_init(entry);
389 dlist_insert_head(entry, &nic->tx_desc_active_list);
390 *desc = dlist_entry(entry, struct gnix_tx_descriptor, list);
391 COND_RELEASE(nic->requires_lock, &nic->tx_desc_lock);
392
393 return FI_SUCCESS;
394 }
395
396
397 /**
398 * @brief frees a previously allocated tx descriptor
399 *
400 * @param[in] nic pointer to previously allocated gnix_nic struct used
401 * when allocating the tx descriptor to be freed
402 * @param[in] tdesc pointer to previously allocated tx descriptor
403 * @return FI_SUCCESS on success
404 */
_gnix_nic_tx_free(struct gnix_nic * nic,struct gnix_tx_descriptor * desc)405 static inline int _gnix_nic_tx_free(struct gnix_nic *nic,
406 struct gnix_tx_descriptor *desc)
407 {
408 COND_ACQUIRE(nic->requires_lock, &nic->tx_desc_lock);
409 dlist_remove_init(&desc->list);
410 dlist_insert_head(&desc->list, &nic->tx_desc_free_list);
411 COND_RELEASE(nic->requires_lock, &nic->tx_desc_lock);
412
413 return FI_SUCCESS;
414 }
415
416
417 /**
418 * @brief allocate a gnix_nic struct
419 *
420 * @param[in] domain pointer to previously allocated gnix_fid_domain struct
421 * @param[in] attrs optional pointer to an attributes argument. NULL
422 * can be supplied if no attributes are required
423 * @param[out] nic_ptr pointer to address where address of allocated nic is
424 * to be returned
425 * @return FI_SUCCESS on success, -FI_ENOMEM if insufficient memory
426 * to allocate nic struct, -FI_EINVAL if an invalid domain
427 * struct was supplied, -FI_EBUSY if insufficient resources
428 * are available to allocate the nic struct, -FI_EACCESS
429 * if a permissions access error occurred while allocating
430 * the nic struct, -FI_EAGAIN if an invalid state
431 * prevents creation or an interrupt was received while
432 * allocating kernel related resources for the nic.
433 */
434 int gnix_nic_alloc(struct gnix_fid_domain *domain,
435 struct gnix_nic_attr *attrs,
436 struct gnix_nic **nic_ptr);
437
438 /**
439 * @brief frees a previously allocated gnix_nic struct
440 *
441 * @param[in] nic pointer to previously allocated gnix_nic struct
442 * @return FI_SUCCESS on success, -FI_ENOSPC no free tx descriptors
443 */
444 int _gnix_nic_free(struct gnix_nic *nic);
445
446 /**
447 * @brief progresses control/data operations associated with the nic
448 *
449 * @param[in] arg pointer to previously allocated gnix_nic struct
450 * @return FI_SUCCESS on success, -FI_EINVAL if an invalid
451 * nic struct was supplied. TODO: a lot more error
452 * values can be returned.
453 */
454 int _gnix_nic_progress(void *arg);
455
456 /**
457 * @brief allocate a remote id for an object, used for looking up an object
458 * in O(1) based on returned value of GNI_CQ_INST_ID applied to a GNI
459 * CQE
460 *
461 * @param[in] nic pointer to previously allocated gnix_nic struct
462 * @param[out] remote_id address where allocate remote_id is returned
463 * @param[in] entry pointer to object to be associated with the
464 * remote id
465
466 * @return FI_SUCCESS on success, -FI_ENOMEM if insufficient
467 * memory to allocate remote_id
468 */
469 int _gnix_nic_get_rem_id(struct gnix_nic *nic, int *remote_id, void *entry);
470
471 /**
472 * @brief release a previously allocated remote_id
473 *
474 * @param[in] nic pointer to previously allocated gnix_nic struct
475 * @param[in] remote_id previously allocated remote_id
476
477 * @return FI_SUCCESS on success, -FI_EINVAL if an invalid
478 * argument was provided.
479 */
480 int _gnix_nic_free_rem_id(struct gnix_nic *nic, int remote_id);
481
482 /**
483 * @brief Look up an element by id
484 *
485 * @param[in] nic pointer to gni nic with which the vc is associated
486 * @param[in] rem_id rem_id of the object being looked up
487 * rem_id comes from GNI_CQ_GET_INST_ID on a GNI CQE
488 *
489 * @return pointer to vc with the given vc_id
490 *
491 * This function is only here because its used for criterion tests,
492 * otherwise it would be a static function within gnix_nic.c
493 */
__gnix_nic_elem_by_rem_id(struct gnix_nic * nic,int rem_id)494 static inline void *__gnix_nic_elem_by_rem_id(struct gnix_nic *nic, int rem_id)
495 {
496 void *elem;
497
498 assert(nic);
499
500 COND_ACQUIRE(nic->requires_lock, &nic->vc_id_lock);
501
502 assert(rem_id <= nic->vc_id_table_count);
503 elem = nic->vc_id_table[rem_id];
504
505 COND_RELEASE(nic->requires_lock, &nic->vc_id_lock);
506
507 return elem;
508 }
509
510 void _gnix_nic_txd_err_inject(struct gnix_nic *nic,
511 struct gnix_tx_descriptor *txd);
512
513 /**
514 * @brief Initialize global NIC data.
515 */
516 void _gnix_nic_init(void);
517
518 #endif /* _GNIX_NIC_H_ */
519