1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
4  *                         reserved.
5  * $COPYRIGHT$
6  *
7  * Additional copyrights may follow
8  *
9  * $HEADER$
10  */
11 
12 #include "btl_uct_device_context.h"
13 
14 #if OPAL_HAVE_UCT_EP_ATOMIC64_POST
15 /* we add 1 to the ops to differentiate between unsupported and supported ops since
16  * UCT_ATOMIC_OP_ADD == 0. otherwise we would have to fill in this table completely. */
17 static int mca_btl_uct_btl_to_uct_atomic[MCA_BTL_ATOMIC_LAST] = {
18     [MCA_BTL_ATOMIC_ADD] = UCT_ATOMIC_OP_ADD + 1,
19     [MCA_BTL_ATOMIC_AND] = UCT_ATOMIC_OP_AND + 1,
20     [MCA_BTL_ATOMIC_OR]  = UCT_ATOMIC_OP_OR + 1,
21     [MCA_BTL_ATOMIC_XOR] = UCT_ATOMIC_OP_XOR + 1,
22     [MCA_BTL_ATOMIC_SWAP] = UCT_ATOMIC_OP_SWAP + 1,
23 };
24 #endif
25 
mca_btl_uct_afop(struct mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * endpoint,void * local_address,uint64_t remote_address,mca_btl_base_registration_handle_t * local_handle,mca_btl_base_registration_handle_t * remote_handle,mca_btl_base_atomic_op_t op,uint64_t operand,int flags,int order,mca_btl_base_rdma_completion_fn_t cbfunc,void * cbcontext,void * cbdata)26 int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
27                       void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
28                       mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
29                       uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
30                       void *cbcontext, void *cbdata)
31 {
32     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
33     mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context (uct_btl);
34     mca_btl_uct_uct_completion_t *comp = NULL;
35     ucs_status_t ucs_status;
36     uct_rkey_bundle_t rkey;
37     uct_ep_h ep_handle;
38     int rc;
39 
40 #if OPAL_HAVE_UCT_EP_ATOMIC64_POST
41     int uct_op = mca_btl_uct_btl_to_uct_atomic[op];
42 
43     if (OPAL_UNLIKELY(0 == uct_op--)) {
44         return OPAL_ERR_BAD_PARAM;
45     }
46 #else
47     if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op && MCA_BTL_ATOMIC_SWAP != op)) {
48         return OPAL_ERR_BAD_PARAM;
49     }
50 #endif
51 
52     if (cbfunc) {
53         comp = mca_btl_uct_uct_completion_alloc (uct_btl, endpoint, local_address, local_handle, context,
54                                                  cbfunc, cbcontext, cbdata);
55         if (OPAL_UNLIKELY(NULL == comp)) {
56             return OPAL_ERR_OUT_OF_RESOURCE;
57         }
58     }
59 
60     rc = mca_btl_uct_get_rkey (uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
61     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
62         mca_btl_uct_uct_completion_release (comp);
63         return rc;
64     }
65 
66     mca_btl_uct_context_lock (context);
67 
68 #if OPAL_HAVE_UCT_EP_ATOMIC64_POST
69     if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
70         ucs_status = uct_ep_atomic32_fetch (ep_handle, uct_op, operand, (uint32_t *) local_address, remote_address,
71 	                                    rkey.rkey, &comp->uct_comp);
72     } else {
73         ucs_status = uct_ep_atomic64_fetch (ep_handle, uct_op, operand, (uint64_t *) local_address, remote_address,
74 	                                    rkey.rkey, &comp->uct_comp);
75     }
76 #else
77     if (MCA_BTL_ATOMIC_ADD == op) {
78         if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
79             ucs_status = uct_ep_atomic_fadd32 (ep_handle, (uint32_t) operand, remote_address,
80                                                rkey.rkey, (uint32_t *) local_address, &comp->uct_comp);
81         } else {
82             ucs_status = uct_ep_atomic_fadd64 (ep_handle, operand, remote_address, rkey.rkey,
83                                                (uint64_t *) local_address, &comp->uct_comp);
84         }
85     } else {
86         if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
87             ucs_status = uct_ep_atomic_swap32 (ep_handle, (uint32_t) operand, remote_address,
88                                                rkey.rkey, (uint32_t *) local_address, &comp->uct_comp);
89         } else {
90             ucs_status = uct_ep_atomic_swap64 (ep_handle, operand, remote_address, rkey.rkey,
91                                                (uint64_t *) local_address, &comp->uct_comp);
92         }
93     }
94 #endif
95 
96     /* go ahead and progress the worker while we have the lock */
97     (void) uct_worker_progress (context->uct_worker);
98 
99     mca_btl_uct_context_unlock (context);
100 
101     mca_btl_uct_device_handle_completions (context);
102 
103     if (UCS_INPROGRESS == ucs_status) {
104         rc = OPAL_SUCCESS;
105     } else if (UCS_OK == ucs_status) {
106         rc = 1;
107         mca_btl_uct_uct_completion_release (comp);
108     } else {
109         rc = OPAL_ERR_OUT_OF_RESOURCE;
110         mca_btl_uct_uct_completion_release (comp);
111     }
112 
113     mca_btl_uct_rkey_release (uct_btl, &rkey);
114 
115     return rc;
116 }
117 
mca_btl_uct_aop(struct mca_btl_base_module_t * btl,mca_btl_base_endpoint_t * endpoint,uint64_t remote_address,mca_btl_base_registration_handle_t * remote_handle,mca_btl_base_atomic_op_t op,uint64_t operand,int flags,int order,mca_btl_base_rdma_completion_fn_t cbfunc,void * cbcontext,void * cbdata)118 int mca_btl_uct_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
119                      uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
120                      mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
121                      mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
122 {
123     /* this is static so it survives after this function returns. we don't care about the result */
124     static uint64_t result;
125 
126     /* just use the fetching ops for now. there probably is a performance benefit to using
127      * the non-fetching on some platforms but this is easier to implement quickly and it
128      * guarantees remote completion. */
129     return mca_btl_uct_afop (btl, endpoint, &result, remote_address, NULL, remote_handle, op,
130                              operand, flags, order, cbfunc, cbcontext, cbdata);
131 }
132 
mca_btl_uct_acswap(struct mca_btl_base_module_t * btl,struct mca_btl_base_endpoint_t * endpoint,void * local_address,uint64_t remote_address,mca_btl_base_registration_handle_t * local_handle,mca_btl_base_registration_handle_t * remote_handle,uint64_t compare,uint64_t value,int flags,int order,mca_btl_base_rdma_completion_fn_t cbfunc,void * cbcontext,void * cbdata)133 int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
134                         void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
135                         mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags,
136                         int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
137 {
138     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
139     mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context (uct_btl);
140     mca_btl_uct_uct_completion_t *comp = NULL;
141     ucs_status_t ucs_status;
142     uct_rkey_bundle_t rkey;
143     uct_ep_h ep_handle;
144     int rc;
145 
146     if (cbfunc) {
147         comp = mca_btl_uct_uct_completion_alloc (uct_btl, endpoint, local_address, local_handle, context,
148                                                  cbfunc, cbcontext, cbdata);
149         if (OPAL_UNLIKELY(NULL == comp)) {
150             return OPAL_ERR_OUT_OF_RESOURCE;
151         }
152     }
153 
154     rc = mca_btl_uct_get_rkey (uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
155     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
156         mca_btl_uct_uct_completion_release (comp);
157         return rc;
158     }
159 
160     mca_btl_uct_context_lock (context);
161 
162     if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
163         ucs_status = uct_ep_atomic_cswap32 (ep_handle, (uint32_t) compare, (uint32_t) value, remote_address,
164                                             rkey.rkey, (uint32_t *) local_address, &comp->uct_comp);
165     } else {
166         ucs_status = uct_ep_atomic_cswap64 (ep_handle, compare, value, remote_address, rkey.rkey,
167                                             (uint64_t *) local_address, &comp->uct_comp);
168     }
169 
170     /* go ahead and progress the worker while we have the lock */
171     (void) uct_worker_progress (context->uct_worker);
172 
173     mca_btl_uct_context_unlock (context);
174 
175     mca_btl_uct_device_handle_completions (context);
176 
177     if (UCS_INPROGRESS == ucs_status) {
178         rc = OPAL_SUCCESS;
179     } else if (UCS_OK == ucs_status) {
180         rc = 1;
181         mca_btl_uct_uct_completion_release (comp);
182     } else {
183         rc = OPAL_ERR_OUT_OF_RESOURCE;
184         mca_btl_uct_uct_completion_release (comp);
185     }
186 
187     mca_btl_uct_rkey_release (uct_btl, &rkey);
188 
189     return rc;
190 }
191