1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
4 * reserved.
5 * $COPYRIGHT$
6 *
7 * Additional copyrights may follow
8 *
9 * $HEADER$
10 */
11
12 #include "btl_uct_device_context.h"
13
14 #if OPAL_HAVE_UCT_EP_ATOMIC64_POST
15 /* we add 1 to the ops to differentiate between unsupported and supported ops since
16 * UCT_ATOMIC_OP_ADD == 0. otherwise we would have to fill in this table completely. */
17 static int mca_btl_uct_btl_to_uct_atomic[MCA_BTL_ATOMIC_LAST] = {
18 [MCA_BTL_ATOMIC_ADD] = UCT_ATOMIC_OP_ADD + 1,
19 [MCA_BTL_ATOMIC_AND] = UCT_ATOMIC_OP_AND + 1,
20 [MCA_BTL_ATOMIC_OR] = UCT_ATOMIC_OP_OR + 1,
21 [MCA_BTL_ATOMIC_XOR] = UCT_ATOMIC_OP_XOR + 1,
22 [MCA_BTL_ATOMIC_SWAP] = UCT_ATOMIC_OP_SWAP + 1,
23 };
24 #endif
25
mca_btl_uct_afop(struct mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * endpoint,void * local_address,uint64_t remote_address,mca_btl_base_registration_handle_t * local_handle,mca_btl_base_registration_handle_t * remote_handle,mca_btl_base_atomic_op_t op,uint64_t operand,int flags,int order,mca_btl_base_rdma_completion_fn_t cbfunc,void * cbcontext,void * cbdata)26 int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
27 void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
28 mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
29 uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
30 void *cbcontext, void *cbdata)
31 {
32 mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
33 mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context (uct_btl);
34 mca_btl_uct_uct_completion_t *comp = NULL;
35 ucs_status_t ucs_status;
36 uct_rkey_bundle_t rkey;
37 uct_ep_h ep_handle;
38 int rc;
39
40 #if OPAL_HAVE_UCT_EP_ATOMIC64_POST
41 int uct_op = mca_btl_uct_btl_to_uct_atomic[op];
42
43 if (OPAL_UNLIKELY(0 == uct_op--)) {
44 return OPAL_ERR_BAD_PARAM;
45 }
46 #else
47 if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op && MCA_BTL_ATOMIC_SWAP != op)) {
48 return OPAL_ERR_BAD_PARAM;
49 }
50 #endif
51
52 if (cbfunc) {
53 comp = mca_btl_uct_uct_completion_alloc (uct_btl, endpoint, local_address, local_handle, context,
54 cbfunc, cbcontext, cbdata);
55 if (OPAL_UNLIKELY(NULL == comp)) {
56 return OPAL_ERR_OUT_OF_RESOURCE;
57 }
58 }
59
60 rc = mca_btl_uct_get_rkey (uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
61 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
62 mca_btl_uct_uct_completion_release (comp);
63 return rc;
64 }
65
66 mca_btl_uct_context_lock (context);
67
68 #if OPAL_HAVE_UCT_EP_ATOMIC64_POST
69 if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
70 ucs_status = uct_ep_atomic32_fetch (ep_handle, uct_op, operand, (uint32_t *) local_address, remote_address,
71 rkey.rkey, &comp->uct_comp);
72 } else {
73 ucs_status = uct_ep_atomic64_fetch (ep_handle, uct_op, operand, (uint64_t *) local_address, remote_address,
74 rkey.rkey, &comp->uct_comp);
75 }
76 #else
77 if (MCA_BTL_ATOMIC_ADD == op) {
78 if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
79 ucs_status = uct_ep_atomic_fadd32 (ep_handle, (uint32_t) operand, remote_address,
80 rkey.rkey, (uint32_t *) local_address, &comp->uct_comp);
81 } else {
82 ucs_status = uct_ep_atomic_fadd64 (ep_handle, operand, remote_address, rkey.rkey,
83 (uint64_t *) local_address, &comp->uct_comp);
84 }
85 } else {
86 if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
87 ucs_status = uct_ep_atomic_swap32 (ep_handle, (uint32_t) operand, remote_address,
88 rkey.rkey, (uint32_t *) local_address, &comp->uct_comp);
89 } else {
90 ucs_status = uct_ep_atomic_swap64 (ep_handle, operand, remote_address, rkey.rkey,
91 (uint64_t *) local_address, &comp->uct_comp);
92 }
93 }
94 #endif
95
96 /* go ahead and progress the worker while we have the lock */
97 (void) uct_worker_progress (context->uct_worker);
98
99 mca_btl_uct_context_unlock (context);
100
101 mca_btl_uct_device_handle_completions (context);
102
103 if (UCS_INPROGRESS == ucs_status) {
104 rc = OPAL_SUCCESS;
105 } else if (UCS_OK == ucs_status) {
106 rc = 1;
107 mca_btl_uct_uct_completion_release (comp);
108 } else {
109 rc = OPAL_ERR_OUT_OF_RESOURCE;
110 mca_btl_uct_uct_completion_release (comp);
111 }
112
113 mca_btl_uct_rkey_release (uct_btl, &rkey);
114
115 return rc;
116 }
117
mca_btl_uct_aop(struct mca_btl_base_module_t * btl,mca_btl_base_endpoint_t * endpoint,uint64_t remote_address,mca_btl_base_registration_handle_t * remote_handle,mca_btl_base_atomic_op_t op,uint64_t operand,int flags,int order,mca_btl_base_rdma_completion_fn_t cbfunc,void * cbcontext,void * cbdata)118 int mca_btl_uct_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
119 uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
120 mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
121 mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
122 {
123 /* this is static so it survives after this function returns. we don't care about the result */
124 static uint64_t result;
125
126 /* just use the fetching ops for now. there probably is a performance benefit to using
127 * the non-fetching on some platforms but this is easier to implement quickly and it
128 * guarantees remote completion. */
129 return mca_btl_uct_afop (btl, endpoint, &result, remote_address, NULL, remote_handle, op,
130 operand, flags, order, cbfunc, cbcontext, cbdata);
131 }
132
mca_btl_uct_acswap(struct mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * endpoint,void * local_address,uint64_t remote_address,mca_btl_base_registration_handle_t * local_handle,mca_btl_base_registration_handle_t * remote_handle,uint64_t compare,uint64_t value,int flags,int order,mca_btl_base_rdma_completion_fn_t cbfunc,void * cbcontext,void * cbdata)133 int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
134 void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
135 mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags,
136 int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
137 {
138 mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
139 mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context (uct_btl);
140 mca_btl_uct_uct_completion_t *comp = NULL;
141 ucs_status_t ucs_status;
142 uct_rkey_bundle_t rkey;
143 uct_ep_h ep_handle;
144 int rc;
145
146 if (cbfunc) {
147 comp = mca_btl_uct_uct_completion_alloc (uct_btl, endpoint, local_address, local_handle, context,
148 cbfunc, cbcontext, cbdata);
149 if (OPAL_UNLIKELY(NULL == comp)) {
150 return OPAL_ERR_OUT_OF_RESOURCE;
151 }
152 }
153
154 rc = mca_btl_uct_get_rkey (uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
155 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
156 mca_btl_uct_uct_completion_release (comp);
157 return rc;
158 }
159
160 mca_btl_uct_context_lock (context);
161
162 if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
163 ucs_status = uct_ep_atomic_cswap32 (ep_handle, (uint32_t) compare, (uint32_t) value, remote_address,
164 rkey.rkey, (uint32_t *) local_address, &comp->uct_comp);
165 } else {
166 ucs_status = uct_ep_atomic_cswap64 (ep_handle, compare, value, remote_address, rkey.rkey,
167 (uint64_t *) local_address, &comp->uct_comp);
168 }
169
170 /* go ahead and progress the worker while we have the lock */
171 (void) uct_worker_progress (context->uct_worker);
172
173 mca_btl_uct_context_unlock (context);
174
175 mca_btl_uct_device_handle_completions (context);
176
177 if (UCS_INPROGRESS == ucs_status) {
178 rc = OPAL_SUCCESS;
179 } else if (UCS_OK == ucs_status) {
180 rc = 1;
181 mca_btl_uct_uct_completion_release (comp);
182 } else {
183 rc = OPAL_ERR_OUT_OF_RESOURCE;
184 mca_btl_uct_uct_completion_release (comp);
185 }
186
187 mca_btl_uct_rkey_release (uct_btl, &rkey);
188
189 return rc;
190 }
191