1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
4  *                         reserved.
5  * Copyright (c) 2011      UT-Battelle, LLC. All rights reserved.
6  * Copyright (c) 2014      Research Organization for Information Science
7  *                         and Technology (RIST). All rights reserved.
8  * Copyright (c) 2017      Intel, Inc.  All rights reserved.
9  * $COPYRIGHT$
10  *
11  * Additional copyrights may follow
12  *
13  * $HEADER$
14  */
15 /**
16  * @file This file contains wrappers for uGNI functionality. These wrappers are thread-safe
17  * and intended to provide a way to measure various different ways to handle mutual exclusion
18  * into the uGNI library (which is not thread safe). These functions are all defined to be
19  * inline to limit the cost to non-threaded users.
20  */
21 
22 #if !defined(BTL_UGNI_DEVICE_H)
23 #define BTL_UGNI_DEVICE_H
24 
25 #include "btl_ugni_endpoint.h"
26 #include "btl_ugni_frag.h"
27 
28 /* helper functions */
29 /**
30  * @brief Output an error message on CQ or completion error.
31  *
32  * @param[in] grc        GNI error from GNI_CqGetEvent or GNI_GetCompleted
33  * @param[in] event_data event data from GNI_CqGetEvent
34  *
35  * This is a small function to print out an error if an error
36  * was detected on a CQ event.
37  */
38 int mca_btl_ugni_event_fatal_error (gni_return_t grc, gni_cq_entry_t event_data);
39 
40 /**
41  * @brief Attempt to re-post an rdma descriptor
42  *
43  * @param[in] rdma_desc  RDMA descriptor that failed
44  * @param[in] event_data CQ event data
45  *
46  * @returns OPAL_SUCCESS if the descriptor was re-posted
47  * @returns OPAL_ERROR otherwise
48  *
49  * This function checks if the error is recoverable and re-posts the
50  * descriptor if possible. The device lock MUST be held when this
51  * function is called.
52  */
53 int mca_btl_ugni_device_handle_event_error (struct mca_btl_ugni_rdma_desc_t *rdma_desc, gni_cq_entry_t event_data);
54 
55 typedef struct mca_btl_ugni_smsg_send_wtag_arg_t {
56     gni_ep_handle_t ep_handle;
57     void *hdr;
58     size_t hdr_len;
59     void *payload;
60     size_t payload_len;
61     uint32_t msg_id;
62     int tag;
63 } mca_btl_ugni_smsg_send_wtag_arg_t;
64 
mca_btl_ugni_smsg_send_wtag_device(mca_btl_ugni_device_t * device,void * arg)65 static inline int mca_btl_ugni_smsg_send_wtag_device (mca_btl_ugni_device_t *device, void *arg)
66 {
67     mca_btl_ugni_smsg_send_wtag_arg_t *args = (mca_btl_ugni_smsg_send_wtag_arg_t *) arg;
68     gni_return_t grc;
69 
70     grc = GNI_SmsgSendWTag (args->ep_handle, args->hdr, args->hdr_len, args->payload,
71                             args->payload_len, args->msg_id, args->tag);
72     device->dev_smsg_local_cq.active_operations += (GNI_RC_SUCCESS == grc);
73     return grc;
74 }
75 
76 typedef struct mca_btl_ugni_smsg_get_next_wtag_arg_t {
77     gni_ep_handle_t ep_handle;
78     uintptr_t *data_ptr;
79     uint8_t *tag;
80 } mca_btl_ugni_smsg_get_next_wtag_arg_t;
81 
mca_btl_ugni_smsg_get_next_wtag_device(mca_btl_ugni_device_t * device,void * arg)82 static inline intptr_t mca_btl_ugni_smsg_get_next_wtag_device (mca_btl_ugni_device_t *device, void *arg)
83 {
84     mca_btl_ugni_smsg_get_next_wtag_arg_t *args = (mca_btl_ugni_smsg_get_next_wtag_arg_t *) arg;
85     return GNI_SmsgGetNextWTag(args->ep_handle, (void **) args->data_ptr, args->tag);
86 }
87 
mca_btl_ugni_smsg_release_device(mca_btl_ugni_device_t * device,void * arg)88 static inline intptr_t mca_btl_ugni_smsg_release_device (mca_btl_ugni_device_t *device, void *arg)
89 {
90     mca_btl_ugni_endpoint_handle_t *ep_handle = (mca_btl_ugni_endpoint_handle_t *) arg;
91 
92     return GNI_SmsgRelease (ep_handle->gni_handle);
93 }
94 
95 typedef struct mca_btl_ugni_cq_get_event_args_t {
96     mca_btl_ugni_cq_t *cq;
97     gni_cq_entry_t *event_data;
98 } mca_btl_ugni_cq_get_event_args_t;
99 
mca_btl_ugni_cq_get_event_device(mca_btl_ugni_device_t * device,void * arg)100 static inline intptr_t mca_btl_ugni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
101 {
102     mca_btl_ugni_cq_get_event_args_t *args = (mca_btl_ugni_cq_get_event_args_t *) arg;
103     gni_return_t rc;
104 
105     rc = GNI_CqGetEvent (args->cq->gni_handle, args->event_data);
106     args->cq->active_operations -= (GNI_RC_NOT_DONE != rc);
107     return rc;
108 }
109 
mca_btl_ugni_cq_clear_device(mca_btl_ugni_device_t * device,void * arg)110 static inline intptr_t mca_btl_ugni_cq_clear_device (mca_btl_ugni_device_t *device, void *arg)
111 {
112     gni_cq_handle_t cq = (gni_cq_handle_t) (intptr_t) arg;
113     gni_cq_entry_t event_data;
114     int rc;
115 
116     do {
117         rc = GNI_CqGetEvent (cq, &event_data);
118     } while (GNI_RC_NOT_DONE != rc);
119 
120     return OPAL_SUCCESS;
121 }
122 
123 typedef struct mca_btl_ugni_gni_cq_get_event_args_t {
124     gni_cq_handle_t cq;
125     gni_cq_entry_t *event_data;
126 } mca_btl_ugni_gni_cq_get_event_args_t;
127 
mca_btl_ugni_gni_cq_get_event_device(mca_btl_ugni_device_t * device,void * arg)128 static inline intptr_t mca_btl_ugni_gni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
129 {
130     mca_btl_ugni_gni_cq_get_event_args_t *args = (mca_btl_ugni_gni_cq_get_event_args_t *) arg;
131 
132     return GNI_CqGetEvent (args->cq, args->event_data);
133 }
134 
135 typedef struct mca_btl_ugni_cq_get_completed_desc_arg_t {
136     mca_btl_ugni_cq_t *cq;
137     mca_btl_ugni_post_descriptor_t *post_desc;
138     int count;
139 } mca_btl_ugni_cq_get_completed_desc_arg_t;
140 
141 __opal_attribute_always_inline__
_mca_btl_ugni_repost_rdma_desc_device(mca_btl_ugni_device_t * device,mca_btl_ugni_rdma_desc_t * rdma_desc)142 static inline int _mca_btl_ugni_repost_rdma_desc_device (mca_btl_ugni_device_t *device, mca_btl_ugni_rdma_desc_t *rdma_desc)
143 {
144     mca_btl_ugni_post_descriptor_t *post_desc = &rdma_desc->btl_ugni_desc;
145     int rc;
146 
147     if (post_desc->use_bte) {
148         rc = GNI_PostRdma (rdma_desc->gni_handle, &post_desc->gni_desc);
149     } else {
150         rc = GNI_PostFma (rdma_desc->gni_handle, &post_desc->gni_desc);
151     }
152 
153     return mca_btl_rc_ugni_to_opal (rc);
154 }
155 
_mca_btl_ugni_cq_get_completed_desc_device(mca_btl_ugni_device_t * device,mca_btl_ugni_cq_t * cq,mca_btl_ugni_post_descriptor_t * post_desc,const int count,bool block)156 static inline intptr_t _mca_btl_ugni_cq_get_completed_desc_device (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq,
157                                                                    mca_btl_ugni_post_descriptor_t *post_desc,
158                                                                    const int count, bool block)
159 {
160     mca_btl_ugni_rdma_desc_t *rdma_desc;
161     gni_post_descriptor_t *desc;
162     gni_cq_entry_t event_data;
163     int rc, desc_index = 0;
164 
165     for (desc_index = 0 ; desc_index < count && cq->active_operations ; ) {
166         int desc_rc = OPAL_SUCCESS;
167 
168         rc = GNI_CqGetEvent (cq->gni_handle, &event_data);
169         if (GNI_RC_NOT_DONE == rc) {
170 	    if (block) {
171 		/* try again */
172 		continue;
173 	    }
174             break;
175         }
176 
177 	block = false;
178 
179         rc = GNI_GetCompleted (cq->gni_handle, event_data, &desc);
180         if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc && GNI_RC_TRANSACTION_ERROR != rc)) {
181             return mca_btl_ugni_event_fatal_error (rc, event_data);
182         }
183 
184         rdma_desc = MCA_BTL_UGNI_GNI_DESC_TO_RDMA_DESC(desc);
185 
186         if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data))) {
187             desc_rc = mca_btl_ugni_device_handle_event_error (rdma_desc, event_data);
188             if (OPAL_LIKELY(OPAL_SUCCESS == desc_rc)) {
189                 /* descriptor was re-posted */
190                 continue;
191             }
192         }
193 
194         /* copy back the descriptor only if additional processing is needed. in this case more processing
195          * is needed if a user callback is specified or the bte was in use. */
196         if (rdma_desc->btl_ugni_desc.cbfunc || rdma_desc->btl_ugni_desc.use_bte || OPAL_SUCCESS != desc_rc) {
197             post_desc[desc_index] = rdma_desc->btl_ugni_desc;
198             post_desc[desc_index++].rc = desc_rc;
199         }
200 
201         /* return the descriptor while we have the lock. this is done so we can avoid using the
202          * free list atomics (as both push and pop are done with the lock) */
203         mca_btl_ugni_return_rdma_desc (rdma_desc);
204         --cq->active_operations;
205     }
206 
207     return desc_index;
208 }
209 
mca_btl_ugni_cq_get_completed_desc_device(mca_btl_ugni_device_t * device,void * arg0)210 static inline intptr_t mca_btl_ugni_cq_get_completed_desc_device (mca_btl_ugni_device_t *device, void *arg0)
211 {
212     mca_btl_ugni_cq_get_completed_desc_arg_t *args = (mca_btl_ugni_cq_get_completed_desc_arg_t *) arg0;
213 
214     return _mca_btl_ugni_cq_get_completed_desc_device (device, args->cq, args->post_desc, args->count, false);
215 }
216 
217 /* NTH: When posting FMA or RDMA descriptors it makes sense to try and clear out a completion
218  * event after posting the descriptor. This probably gives us a couple of things:
219  *   1) Good locality on the associated data structures (especially with FMA which may
220  *      complete fairly quickly).
221  *   2) Since we are already holding the lock it could mean fewer attempts to
222  *      lock the device over the course of the program.
223  *
224  * As far as I can tell there is not reason to try and clear out more than a couple
225  * completiong events. The code has been written to allow us to easily modify the
226  * number reaped if we determine that there is a benefit to clearing a different
227  * number of events. */
228 
229 /**
230  * @brief Number of events to clear after posting a descriptor
231  */
232 #define MCA_BTL_UGNI_DEVICE_REAP_COUNT 4
233 
234 struct mca_btl_ugni_post_device_args_t {
235     mca_btl_ugni_post_descriptor_t *desc;
236     mca_btl_ugni_device_t *device;
237     int count;
238     mca_btl_ugni_post_descriptor_t completed[MCA_BTL_UGNI_DEVICE_REAP_COUNT];
239 };
240 
241 static inline mca_btl_ugni_rdma_desc_t *
mca_btl_ugni_get_rdma_desc_device(mca_btl_ugni_device_t * device,struct mca_btl_ugni_post_device_args_t * args,bool use_bte)242 mca_btl_ugni_get_rdma_desc_device (mca_btl_ugni_device_t *device, struct mca_btl_ugni_post_device_args_t *args, bool use_bte)
243 {
244     mca_btl_ugni_post_descriptor_t *desc = args->desc;
245     mca_btl_ugni_rdma_desc_t *rdma_desc;
246 
247     args->device = device;
248     args->count = 0;
249 
250     do {
251         rdma_desc = mca_btl_ugni_alloc_rdma_desc (device, desc, use_bte);
252 	if (OPAL_LIKELY(NULL != rdma_desc)) {
253 	    return rdma_desc;
254 	}
255 
256         if (OPAL_LIKELY(NULL == rdma_desc && !args->count)) {
257 	    args->count = _mca_btl_ugni_cq_get_completed_desc_device (device, &device->dev_rdma_local_cq,
258 								      args->completed, MCA_BTL_UGNI_DEVICE_REAP_COUNT,
259 								      true);
260 	    continue;
261         }
262 
263 	return NULL;
264     } while (1);
265 }
266 
267 
mca_btl_ugni_post_fma_device(mca_btl_ugni_device_t * device,void * arg)268 static inline intptr_t mca_btl_ugni_post_fma_device (mca_btl_ugni_device_t *device, void *arg)
269 {
270     struct mca_btl_ugni_post_device_args_t *args = (struct mca_btl_ugni_post_device_args_t *) arg;
271     mca_btl_ugni_rdma_desc_t *rdma_desc;
272     int rc;
273 
274     rdma_desc = mca_btl_ugni_get_rdma_desc_device (device, args, false);
275     if (OPAL_UNLIKELY(NULL == rdma_desc)) {
276 	return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
277     }
278 
279     BTL_VERBOSE(("Posting FMA descriptor %p with op_type %d, amo %d, remote_addr 0x%lx, "
280                  "length %lu", (void*)rdma_desc, rdma_desc->btl_ugni_desc.gni_desc.type, rdma_desc->btl_ugni_desc.gni_desc.amo_cmd,
281                  rdma_desc->btl_ugni_desc.gni_desc.remote_addr, rdma_desc->btl_ugni_desc.gni_desc.length));
282 
283     rc = GNI_PostFma (rdma_desc->gni_handle, &rdma_desc->btl_ugni_desc.gni_desc);
284     if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
285         mca_btl_ugni_return_rdma_desc (rdma_desc);
286         return mca_btl_rc_ugni_to_opal (rc);
287     }
288 
289     ++device->dev_rdma_local_cq.active_operations;
290 
291     /* to improve bandwidth and latency it is ideal for all posting threads to also reap completions from
292      * the rdma completion queue. there are two optimizations here. 1) for bandwidth we only want to
293      * reap what is available now so more messages can be posted quickly, and 2) for latency (single
294      * put/get before flushing) we want to ensure the operation is complete. To some degree this is
295      * gaming the benchmark but it may benefit some application communication patterns without really
296      * hurting others (in theory). */
297     if (opal_using_threads ()) {
298 	int count = args->count;
299 	args->count += _mca_btl_ugni_cq_get_completed_desc_device (device, &device->dev_rdma_local_cq,
300 								   args->completed + count,
301 								   MCA_BTL_UGNI_DEVICE_REAP_COUNT - count,
302 								   device->flushed);
303 	device->flushed = false;
304     }
305 
306     return OPAL_SUCCESS;
307 }
308 
mca_btl_ugni_post_rdma_device(mca_btl_ugni_device_t * device,void * arg)309 static inline intptr_t mca_btl_ugni_post_rdma_device (mca_btl_ugni_device_t *device, void *arg)
310 {
311     struct mca_btl_ugni_post_device_args_t *args = (struct mca_btl_ugni_post_device_args_t *) arg;
312     mca_btl_ugni_rdma_desc_t *rdma_desc;
313     int rc;
314 
315     rdma_desc = mca_btl_ugni_get_rdma_desc_device (device, args, true);
316     if (OPAL_UNLIKELY(NULL == rdma_desc)) {
317 	return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
318     }
319 
320     /* pick the appropriate CQ */
321     rdma_desc->btl_ugni_desc.cq = mca_btl_ugni_component.progress_thread_enabled ? &device->dev_rdma_local_irq_cq :
322         &device->dev_rdma_local_cq;
323 
324     BTL_VERBOSE(("Posting RDMA descriptor %p with op_type %d, amo %d, remote_addr 0x%lx, "
325                  "length %lu", (void*)rdma_desc, rdma_desc->btl_ugni_desc.gni_desc.type, rdma_desc->btl_ugni_desc.gni_desc.amo_cmd,
326                  rdma_desc->btl_ugni_desc.gni_desc.remote_addr, rdma_desc->btl_ugni_desc.gni_desc.length));
327 
328     rc = GNI_PostRdma (rdma_desc->gni_handle, &rdma_desc->btl_ugni_desc.gni_desc);
329     if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
330         mca_btl_ugni_return_rdma_desc (rdma_desc);
331         return mca_btl_rc_ugni_to_opal (rc);
332     }
333 
334     ++rdma_desc->btl_ugni_desc.cq->active_operations;
335 
336     /* to improve bandwidth and latency it is ideal for all posting threads to also reap completions from
337      * the rdma completion queue. there are two optimizations here. 1) for bandwidth we only want to
338      * reap what is available now so more messages can be posted quickly, and 2) for latency (single
339      * put/get before flushing) we want to ensure the operation is complete. To some degree this is
340      * gaming the benchmark but it may benefit some application communication patterns without really
341      * hurting others (in theory). */
342     if (opal_using_threads ()) {
343 	int count = args->count;
344 	args->count += _mca_btl_ugni_cq_get_completed_desc_device (device, &device->dev_rdma_local_cq,
345 								   args->completed + count,
346 								   MCA_BTL_UGNI_DEVICE_REAP_COUNT - count,
347 								   device->flushed);
348 	device->flushed = false;
349     }
350 
351     return OPAL_SUCCESS;
352 }
353 
mca_btl_ugni_post_cqwrite_device(mca_btl_ugni_device_t * device,void * arg)354 static inline intptr_t mca_btl_ugni_post_cqwrite_device (mca_btl_ugni_device_t *device, void *arg)
355 {
356     mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
357     mca_btl_ugni_rdma_desc_t *rdma_desc;
358     int rc;
359 
360     desc->gni_desc.src_cq_hndl = device->dev_rdma_local_cq.gni_handle;
361 
362     rdma_desc = mca_btl_ugni_alloc_rdma_desc (device, desc, false);
363     if (OPAL_UNLIKELY(NULL == rdma_desc)) {
364         return OPAL_ERR_OUT_OF_RESOURCE;
365     }
366 
367     rc = GNI_PostCqWrite (rdma_desc->gni_handle, &rdma_desc->btl_ugni_desc.gni_desc);
368     if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
369         mca_btl_ugni_return_rdma_desc (rdma_desc);
370     }
371 
372     return mca_btl_rc_ugni_to_opal (rc);
373 }
374 
375 typedef struct mca_btl_ugni_get_datagram_args_t {
376     mca_btl_ugni_module_t *ugni_module;
377     gni_ep_handle_t *handle;
378     mca_btl_base_endpoint_t **ep;
379 } mca_btl_ugni_get_datagram_args_t;
380 
mca_btl_ugni_get_datagram_device(mca_btl_ugni_device_t * device,void * arg0)381 static inline intptr_t mca_btl_ugni_get_datagram_device (mca_btl_ugni_device_t *device, void *arg0)
382 {
383     mca_btl_ugni_get_datagram_args_t *args = (mca_btl_ugni_get_datagram_args_t *) arg0;
384     uint32_t remote_addr, remote_id;
385     uint64_t datagram_id;
386     gni_post_state_t post_state;
387     gni_return_t grc;
388     uint64_t data;
389 
390     grc = GNI_PostDataProbeById (device->dev_handle, &datagram_id);
391     if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
392         return 0;
393     }
394 
395     data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);
396 
397     BTL_VERBOSE(("rc: %d, datgram_id: %" PRIx64 ", mask: %" PRIx64, grc, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));
398 
399     if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
400         *(args->ep) = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&args->ugni_module->endpoints, data);
401         *(args->handle) = (*args->ep)->smsg_ep_handle.gni_handle;
402     } else {
403         *(args->handle) = args->ugni_module->wildcard_ep;
404     }
405 
406     /* wait for the incoming datagram to complete (in case it isn't) */
407     grc = GNI_EpPostDataWaitById (*args->handle, datagram_id, -1, &post_state,
408                                   &remote_addr, &remote_id);
409     if (GNI_RC_SUCCESS != grc) {
410         BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
411         return mca_btl_rc_ugni_to_opal (grc);
412     }
413 
414     BTL_VERBOSE(("handled datagram completion. post_state: %d, remote_addr: %u, remote_id: %u, directed?: %d",
415                  post_state, remote_addr, remote_id, (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID));
416 
417     return 1;
418 }
419 
420 typedef struct mca_btl_ugni_reg_mem_args_t {
421     mca_btl_ugni_module_t *ugni_module;
422     void *base;
423     size_t size;
424     mca_btl_ugni_reg_t *ugni_reg;
425     gni_cq_handle_t cq;
426     int flags;
427 } mca_btl_ugni_reg_mem_args_t;
428 
mca_btl_ugni_reg_mem_device(mca_btl_ugni_device_t * device,void * arg)429 static intptr_t mca_btl_ugni_reg_mem_device (mca_btl_ugni_device_t *device, void *arg)
430 {
431     mca_btl_ugni_reg_mem_args_t *args = (mca_btl_ugni_reg_mem_args_t *) arg;
432     gni_return_t rc;
433 
434     rc = GNI_MemRegister (device->dev_handle, (uint64_t) args->base, args->size, args->cq,
435                           args->flags, -1, &args->ugni_reg->handle.gni_handle);
436     if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
437         return OPAL_ERR_OUT_OF_RESOURCE;
438     }
439 
440     return OPAL_SUCCESS;
441 }
442 
443 typedef struct mca_btl_ugni_dereg_mem_arg_t {
444     mca_btl_ugni_module_t *ugni_module;
445     mca_btl_ugni_reg_t *ugni_reg;
446 } mca_btl_ugni_dereg_mem_arg_t;
447 
mca_btl_ugni_dereg_mem_device(mca_btl_ugni_device_t * device,void * arg)448 static intptr_t mca_btl_ugni_dereg_mem_device (mca_btl_ugni_device_t *device, void *arg)
449 {
450     mca_btl_ugni_dereg_mem_arg_t *args = (mca_btl_ugni_dereg_mem_arg_t *) arg;
451     gni_return_t rc;
452 
453     rc = GNI_MemDeregister (device->dev_handle, &args->ugni_reg->handle.gni_handle);
454     return mca_btl_rc_ugni_to_opal (rc);
455 }
456 
457 /* multi-thread safe interface to uGNI */
458 
mca_btl_ugni_endpoint_smsg_send_wtag(mca_btl_base_endpoint_t * endpoint,void * hdr,size_t hdr_len,void * payload,size_t payload_len,uint32_t msg_id,int tag)459 static inline int mca_btl_ugni_endpoint_smsg_send_wtag (mca_btl_base_endpoint_t *endpoint, void *hdr, size_t hdr_len,
460                                                         void *payload, size_t payload_len, uint32_t msg_id, int tag)
461 {
462     mca_btl_ugni_smsg_send_wtag_arg_t args = {.ep_handle = endpoint->smsg_ep_handle.gni_handle,
463                                               .hdr = hdr, .hdr_len = hdr_len, .payload = payload,
464                                               .payload_len = payload_len, .msg_id = msg_id,
465                                               .tag = tag};
466     mca_btl_ugni_device_t *device = endpoint->smsg_ep_handle.device;
467     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_send_wtag_device, &args);
468 }
469 
mca_btl_ugni_smsg_get_next_wtag(mca_btl_ugni_endpoint_handle_t * ep_handle,uintptr_t * data_ptr,uint8_t * tag)470 static inline int mca_btl_ugni_smsg_get_next_wtag (mca_btl_ugni_endpoint_handle_t *ep_handle, uintptr_t *data_ptr, uint8_t *tag)
471 {
472     mca_btl_ugni_device_t *device = ep_handle->device;
473     mca_btl_ugni_smsg_get_next_wtag_arg_t args = {.ep_handle = ep_handle->gni_handle, .data_ptr = data_ptr, .tag = tag};
474 
475     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_get_next_wtag_device, &args);
476 }
477 
mca_btl_ugni_smsg_release(mca_btl_ugni_endpoint_handle_t * ep_handle)478 static inline int mca_btl_ugni_smsg_release (mca_btl_ugni_endpoint_handle_t *ep_handle)
479 {
480     mca_btl_ugni_device_t *device = ep_handle->device;
481 
482     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_release_device, ep_handle);
483 }
484 
mca_btl_ugni_cq_clear(mca_btl_ugni_device_t * device,gni_cq_handle_t cq)485 static inline void mca_btl_ugni_cq_clear (mca_btl_ugni_device_t *device, gni_cq_handle_t cq)
486 {
487     (void) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_clear_device, (void *) (intptr_t) cq);
488 }
489 
mca_btl_ugni_cq_get_event(mca_btl_ugni_device_t * device,mca_btl_ugni_cq_t * cq,gni_cq_entry_t * event_data)490 static inline int mca_btl_ugni_cq_get_event (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq, gni_cq_entry_t *event_data)
491 {
492     mca_btl_ugni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
493     /* NTH: normally there would be a check for any outstanding CQ operations but there seems
494      * to be a reason to check the local SMSG completion queue anyway. since this function
495      * only handled the SMSG local completion queue not checking here should be fine and
496      * should not impact performance. */
497     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_event_device, &args);
498 }
499 
mca_btl_ugni_gni_cq_get_event(mca_btl_ugni_device_t * device,gni_cq_handle_t cq,gni_cq_entry_t * event_data)500 static inline int mca_btl_ugni_gni_cq_get_event (mca_btl_ugni_device_t *device, gni_cq_handle_t cq, gni_cq_entry_t *event_data)
501 {
502     mca_btl_ugni_gni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
503     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_gni_cq_get_event_device, &args);
504 }
505 
506 __opal_attribute_always_inline__
mca_btl_ugni_endpoint_post(mca_btl_ugni_endpoint_t * endpoint,mca_btl_ugni_post_descriptor_t * desc,mca_btl_ugni_device_serialize_fn_t post_fn)507 static inline int mca_btl_ugni_endpoint_post (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc,
508                                               mca_btl_ugni_device_serialize_fn_t post_fn)
509 {
510     struct mca_btl_ugni_post_device_args_t args = {.desc = desc};
511     mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
512     int rc;
513 
514     /* use serialize_any as it is responsible for binding devices to threads (if enabled). this generally
515      * gives better performance as it reduces contention on any individual device. */
516     rc = mca_btl_ugni_device_serialize_any (ugni_module, post_fn, &args);
517     if (args.count) {
518         mca_btl_ugni_handle_rdma_completions (ugni_module, args.device, args.completed, args.count);
519     }
520 
521     return rc;
522 }
523 
524 __opal_attribute_always_inline__
mca_btl_ugni_endpoint_post_fma(mca_btl_ugni_endpoint_t * endpoint,mca_btl_ugni_post_descriptor_t * desc)525 static inline int mca_btl_ugni_endpoint_post_fma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
526 {
527     return mca_btl_ugni_endpoint_post (endpoint, desc, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_fma_device);
528 }
529 
530 __opal_attribute_always_inline__
mca_btl_ugni_endpoint_post_rdma(mca_btl_ugni_endpoint_t * endpoint,mca_btl_ugni_post_descriptor_t * desc)531 static inline int mca_btl_ugni_endpoint_post_rdma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
532 {
533     return mca_btl_ugni_endpoint_post (endpoint, desc, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_rdma_device);
534 }
535 
mca_btl_ugni_endpoint_post_cqwrite(mca_btl_ugni_endpoint_t * endpoint,mca_btl_ugni_post_descriptor_t * desc)536 static inline int mca_btl_ugni_endpoint_post_cqwrite (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
537 {
538     mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
539     mca_btl_ugni_device_t *device = ugni_module->devices;
540     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_cqwrite_device, desc);
541 }
542 
543 __opal_attribute_always_inline__
mca_btl_ugni_cq_get_completed_desc(mca_btl_ugni_device_t * device,mca_btl_ugni_cq_t * cq,mca_btl_ugni_post_descriptor_t * post_desc,int count)544 static inline int mca_btl_ugni_cq_get_completed_desc (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq,
545                                                       mca_btl_ugni_post_descriptor_t *post_desc,
546                                                       int count)
547 {
548     mca_btl_ugni_cq_get_completed_desc_arg_t args = {.cq = cq, .post_desc = post_desc, .count = count};
549     if (0 == cq->active_operations) {
550         return 0;
551     }
552 
553     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_completed_desc_device, &args);
554 }
555 
mca_btl_ugni_get_datagram(mca_btl_ugni_module_t * ugni_module,mca_btl_ugni_device_t * device,gni_ep_handle_t * gni_handle,mca_btl_base_endpoint_t ** ep)556 static inline int mca_btl_ugni_get_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device, gni_ep_handle_t *gni_handle,
557                                              mca_btl_base_endpoint_t **ep)
558 {
559     mca_btl_ugni_get_datagram_args_t args = {.ugni_module = ugni_module, .ep = ep, .handle = gni_handle};
560     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_get_datagram_device, &args);
561 }
562 
mca_btl_ugni_reg_mem(mca_btl_ugni_module_t * ugni_module,void * base,size_t size,mca_btl_ugni_reg_t * ugni_reg,gni_cq_handle_t cq,int flags)563 static inline int mca_btl_ugni_reg_mem (mca_btl_ugni_module_t *ugni_module, void *base, size_t size, mca_btl_ugni_reg_t *ugni_reg,
564                                  gni_cq_handle_t cq, int flags)
565 {
566     mca_btl_ugni_reg_mem_args_t args = {.ugni_module = ugni_module, .base = base, .size = size,
567                                         .ugni_reg = ugni_reg, .cq = cq, .flags = flags};
568     mca_btl_ugni_device_t *device = ugni_module->devices;
569     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_reg_mem_device, &args);
570 }
571 
mca_btl_ugni_dereg_mem(mca_btl_ugni_module_t * ugni_module,mca_btl_ugni_reg_t * ugni_reg)572 static inline int mca_btl_ugni_dereg_mem (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_reg_t *ugni_reg)
573 {
574     mca_btl_ugni_dereg_mem_arg_t args = {.ugni_module = ugni_module, .ugni_reg = ugni_reg};
575     mca_btl_ugni_device_t *device = ugni_module->devices;
576     return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_dereg_mem_device, &args);
577 }
578 
579 #endif /* BTL_UGNI_DEVICE_H */
580