1 /*
2 * Copyright (c) 2015-2017 Cray Inc. All rights reserved.
3 * Copyright (c) 2015-2017 Los Alamos National Security, LLC.
4 * All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35 #ifndef _GNIX_VC_H_
36 #define _GNIX_VC_H_
37
38 #if HAVE_CONFIG_H
39 #include <config.h>
40 #endif /* HAVE_CONFIG_H */
41
42 #include "gnix.h"
43 #include "gnix_bitmap.h"
44 #include "gnix_av.h"
45 #include "gnix_xpmem.h"
46 #include "gnix_cm_nic.h"
47
48 /*
49 * mode bits
50 */
51 #define GNIX_VC_MODE_IN_WQ (1U)
52 #define GNIX_VC_MODE_IN_HT (1U << 1)
53 #define GNIX_VC_MODE_DG_POSTED (1U << 2)
54 #define GNIX_VC_MODE_PENDING_MSGS (1U << 3)
55 #define GNIX_VC_MODE_PEER_CONNECTED (1U << 4)
56 #define GNIX_VC_MODE_IN_TABLE (1U << 5)
57 #define GNIX_VC_MODE_XPMEM (1U << 6)
58
59 /* VC flags */
60 #define GNIX_VC_FLAG_RX_SCHEDULED 0
61 #define GNIX_VC_FLAG_WORK_SCHEDULED 1
62 #define GNIX_VC_FLAG_TX_SCHEDULED 2
63 #define GNIX_VC_FLAG_SCHEDULED 4
64
65 /*
66 * defines for connection state for gnix VC
67 */
68 enum gnix_vc_conn_state {
69 GNIX_VC_CONN_NONE = 1,
70 GNIX_VC_CONNECTING,
71 GNIX_VC_CONNECTED,
72 GNIX_VC_CONN_TERMINATING,
73 GNIX_VC_CONN_TERMINATED,
74 GNIX_VC_CONN_ERROR
75 };
76
77 enum gnix_vc_conn_req_type {
78 GNIX_VC_CONN_REQ = 1,
79 GNIX_VC_CONN_RESP
80 };
81
82 #define LOCAL_MBOX_SENT (1UL)
83 #define REMOTE_MBOX_RCVD (1UL << 1)
84
85 /**
86 * Virtual Connection (VC) struct
87 *
88 * @var prog_list NIC VC progress list
89 * @var work_queue Deferred work request queue
90 * @var tx_queue TX request queue
91 * @var list used for unmapped vc list
92 * @var fr_list used for vc free list
93 * @var entry used internally for managing linked lists
94 * of vc structs that require O(1) insertion/removal
95 * @var peer_fi_addr FI address of peer with which this VC is connected
96 * @var peer_addr address of peer with which this VC is connected
97 * @var peer_cm_nic_addr address of the cm_nic being used by peer, this
98 * is the address to which GNI datagrams must be
99 * posted
100 * @var ep libfabric endpoint with which this VC is
101 * associated
102 * @var smsg_mbox pointer to GNI SMSG mailbox used by this VC
103 * to exchange SMSG messages with its peer
104 * @var gnix_ep_name cache for storing remote endpoint name
105 * @var gni_ep GNI endpoint for this VC
106 * @var outstanding_fab_reqs Count of outstanding libfabric level requests
107 * associated with this endpoint.
108 * @var conn_state Connection state of this VC
109 * @var vc_id ID of this vc. Allows for rapid O(1) lookup
110 * of the VC when using GNI_CQ_GET_INST_ID to get
111 * the inst_id of a GNI CQE.
112 * @var peer_id vc_id of peer.
113 * @var modes Used internally to track current state of
114 * the VC not pertaining to the connection state.
115 * @var flags Bitmap used to hold vc schedule state
116 * @var peer_irq_mem_hndl peer GNI memhndl used for delivering
117 * GNI_PostCqWrite requests to remote peer
118 * @var peer_caps peer capability flags
119 */
120 struct gnix_vc {
121 struct dlist_entry prog_list; /* NIC VC progress list entry */
122 struct dlist_entry work_queue; /* Work reqs */
123 struct dlist_entry tx_queue; /* TX reqs */
124
125 struct dlist_entry list; /* General purpose list */
126 struct dlist_entry fr_list; /* fr list */
127 fi_addr_t peer_fi_addr;
128 struct gnix_address peer_addr;
129 struct gnix_address peer_cm_nic_addr;
130 struct gnix_fid_ep *ep;
131 void *smsg_mbox;
132 void *gnix_ep_name;
133 gni_ep_handle_t gni_ep;
134 ofi_atomic32_t outstanding_tx_reqs;
135 enum gnix_vc_conn_state conn_state;
136 uint32_t post_state;
137 int vc_id;
138 int peer_id;
139 int modes;
140 gnix_bitmap_t flags; /* We're missing regular bit ops */
141 gni_mem_handle_t peer_irq_mem_hndl;
142 xpmem_apid_t peer_apid;
143 uint64_t peer_caps;
144 uint32_t peer_key_offset;
145 };
146
147 /*
148 * prototypes
149 */
150
151 /**
152 * @brief Allocates a virtual channel(vc) struct
153 *
154 * @param[in] ep_priv pointer to previously allocated gnix_fid_ep object
155 * @param[in] entry av entry for remote peer for this VC. Can be NULL
156 * for accepting VCs.
157 * @param[out] vc location in which the address of the allocated vc
158 * struct is to be returned.
159 * @return FI_SUCCESS on success, -FI_ENOMEM if allocation of vc struct fails,
160 */
161 int _gnix_vc_alloc(struct gnix_fid_ep *ep_priv,
162 struct gnix_av_addr_entry *entry, struct gnix_vc **vc);
163
164 /**
165 * @brief Initiates non-blocking connect of a vc with its peer
166 *
167 * @param[in] vc pointer to previously allocated vc struct
168 *
169 * @return FI_SUCCESS on success, -FI_EINVAL if an invalid field in the vc
170 * struct is encountered, -ENOMEM if insufficient memory to initiate
171 * connection request.
172 */
173 int _gnix_vc_connect(struct gnix_vc *vc);
174
175 /**
176 * @brief Destroys a previously allocated vc and cleans up resources
177 * associated with the vc
178 *
179 * @param[in] vc pointer to previously allocated vc struct
180 *
181 * @return FI_SUCCESS on success, -FI_EINVAL if an invalid field in the vc
182 * struct is encountered.
183 */
184 int _gnix_vc_destroy(struct gnix_vc *vc);
185
186 /**
187 * @brief Add a vc to the work queue of its associated nic
188 *
189 * @param[in] vc pointer to previously allocated vc struct
190 *
191 * @return FI_SUCCESS on success, -ENOMEM if insufficient memory
192 * allocate memory to enqueue work request
193 */
194 int _gnix_vc_add_to_wq(struct gnix_vc *vc);
195
196 /**
197 * @brief Progress a VC's SMSG mailbox.
198 *
199 * Messages are dequeued from the VCs SMSG mailbox until cleared or a failure
200 * is encountered.
201 *
202 * @param[in] req The GNIX VC to progress.
203 */
204 int _gnix_vc_dequeue_smsg(struct gnix_vc *vc);
205
206 /**
207 * @brief Schedule a VC for RX progress.
208 *
209 * The VC will have it's SMSG mailbox progressed while the NIC is being
210 * progressed in the near future.
211 *
212 * @param[in] vc The GNIX VC to schedule.
213 */
214 int _gnix_vc_rx_schedule(struct gnix_vc *vc);
215
216 /**
217 * @brief Queue a request with deferred work.
218 *
219 * @param[in] req The GNIX fabric request to queue.
220 */
221 int _gnix_vc_queue_work_req(struct gnix_fab_req *req);
222
223 /**
224 * @brief Requeue a request with deferred work. Used only in TX completers
225 * where the VC lock is not yet held.
226 *
227 * @param[in] req The GNIX fabric request to requeue.
228 */
229 int _gnix_vc_requeue_work_req(struct gnix_fab_req *req);
230
231 /**
232 * @brief Schedule a VC for TX progress.
233 *
234 * The VC will have it's tx_queue progressed while the NIC is being progressed
235 * in the near future.
236 *
237 * @param[in] vc The GNIX VC to schedule.
238 */
239 int _gnix_vc_tx_schedule(struct gnix_vc *vc);
240
241 /**
242 * @brief Queue a new TX request.
243 *
244 * @param[in] req The GNIX fabric request to queue.
245 */
246 int _gnix_vc_queue_tx_req(struct gnix_fab_req *req);
247
248 /**
249 * @brief Progress NIC VCs.
250 *
251 * There are three facets of VC progress: RX, deferred work and TX. The NIC
252 * maintains one queue of VCs for each type of progress. When a VC requires
253 * progress, the associated _gnix_vc_<prog_type>_schedule() function is used to
254 * schedule processing within _gnix_vc_nic_progress(). The queues are
255 * independent to prevent a stall in TX processing from delaying RX processing,
256 * and so forth.
257 *
258 * RX progress involves dequeueing SMSG messages and progressing the state of
259 * associated requests. If receipt of a message during RX progress will
260 * trigger a new network operation (or similarly heavy or lock dependent
261 * operation), that work should be queued in the deferred work queue, which
262 * will be progressed once VC RX work is complete. Examples of this deferred
263 * work include the start of rendezvous data transfer or freeing an automatic
264 * memory registration after an RX completion.
265 *
266 * The deferred work queue is processed after RX progress, where most deferred
267 * work will be originated, and before TX processing, giving network resource
268 * priority (specifically TXDs) to TX requests which have already been
269 * initiated.
270 *
271 * New TX requests belong in a VCs TX queue. Ordering of the VC TX queue is
272 * enforced. A request using the FI_FENCE flag will cause a VCs TX queue to be
273 * stalled until that request is completed.
274 *
275 * @param[in] nic The GNIX NIC to progress.
276 */
277 int _gnix_vc_nic_progress(struct gnix_nic *nic);
278
279 /**
280 * @brief return vc associated with a given ep/dest address, or the ep in the
281 * case of FI_EP_MSG endpoint type. For FI_EP_RDM type, a vc may be
282 * allocated and a connection initiated if no vc is associated with
283 * ep/dest_addr.
284 *
285 * @param[in] ep pointer to a previously allocated endpoint
286 * @param[in] dest_addr for FI_EP_RDM endpoints, used to look up vc associated
287 * with this target address
288 * @param[out] vc_ptr address in which to store pointer to returned vc
289 * @return FI_SUCCESS on success, -FI_ENOMEM insufficient
290 * memory to allocate vc, -FI_EINVAL if an invalid
291 * argument was supplied
292 */
293 int _gnix_vc_ep_get_vc(struct gnix_fid_ep *ep, fi_addr_t dest_addr,
294 struct gnix_vc **vc_ptr);
295
296 /**
297 * @brief Return the FI address of a VC.
298 *
299 * @param vc The VC for to use for lookup.
300 * @return The FI address of the input VC. FI_ADDR_NOTAVAIL on error or
301 * if the VC is of incompatible type.
302 */
303 fi_addr_t _gnix_vc_peer_fi_addr(struct gnix_vc *vc);
304
305 int _gnix_vc_cm_init(struct gnix_cm_nic *cm_nic);
306 int _gnix_vc_schedule(struct gnix_vc *vc);
307 int _gnix_vc_smsg_init(struct gnix_vc *vc,
308 int peer_id,
309 gni_smsg_attr_t *peer_smsg_attr,
310 gni_mem_handle_t *peer_irq_mem_hndl);
311
312 /*
313 * inline functions
314 */
315
316 /**
317 * @brief Return connection state of a vc
318 *
319 * @param[in] vc pointer to previously allocated vc struct
320 * @return connection state of vc
321 */
_gnix_vc_state(struct gnix_vc * vc)322 static inline enum gnix_vc_conn_state _gnix_vc_state(struct gnix_vc *vc)
323 {
324 assert(vc);
325 return vc->conn_state;
326 }
327
328 #endif /* _GNIX_VC_H_ */
329