1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2012      Sandia National Laboratories.  All rights reserved.
4  * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
5  *                         reserved.
6  * Copyright (c) 2014-2017 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * $COPYRIGHT$
10  *
11  * Additional copyrights may follow
12  *
13  * $HEADER$
14  */
15 
16 #include "ompi_config.h"
17 
18 #include "opal/sys/atomic.h"
19 #include "ompi/mca/osc/osc.h"
20 #include "ompi/mca/osc/base/base.h"
21 #include "ompi/mca/osc/base/osc_base_obj_convert.h"
22 
23 #include "osc_sm.h"
24 
25 /**
26  * compare_ranks:
27  *
28  * @param[in] ptra    Pointer to integer item
29  * @param[in] ptrb    Pointer to integer item
30  *
31  * @returns 0 if *ptra == *ptrb
32  * @returns -1 if *ptra < *ptrb
33  * @returns 1 otherwise
34  *
35  * This function is used to sort the rank list. It can be removed if
36  * groups are always in order.
37  */
compare_ranks(const void * ptra,const void * ptrb)38 static int compare_ranks (const void *ptra, const void *ptrb)
39 {
40     int a = *((int *) ptra);
41     int b = *((int *) ptrb);
42 
43     if (a < b) {
44         return -1;
45     } else if (a > b) {
46         return 1;
47     }
48 
49     return 0;
50 }
51 
52 /**
53  * ompi_osc_pt2pt_get_comm_ranks:
54  *
55  * @param[in] module    - OSC PT2PT module
56  * @param[in] sub_group - Group with ranks to translate
57  *
58  * @returns an array of translated ranks on success or NULL on failure
59  *
60  * Translate the ranks given in {sub_group} into ranks in the
61  * communicator used to create {module}.
62  */
ompi_osc_sm_group_ranks(ompi_group_t * group,ompi_group_t * sub_group)63 static int *ompi_osc_sm_group_ranks (ompi_group_t *group, ompi_group_t *sub_group)
64 {
65     int size = ompi_group_size(sub_group);
66     int *ranks1, *ranks2;
67     int ret;
68 
69     ranks1 = calloc (size, sizeof(int));
70     ranks2 = calloc (size, sizeof(int));
71     if (NULL == ranks1 || NULL == ranks2) {
72         free (ranks1);
73         free (ranks2);
74         return NULL;
75     }
76 
77     for (int i = 0 ; i < size ; ++i) {
78         ranks1[i] = i;
79     }
80 
81     ret = ompi_group_translate_ranks (sub_group, size, ranks1, group, ranks2);
82     free (ranks1);
83     if (OMPI_SUCCESS != ret) {
84         free (ranks2);
85         return NULL;
86     }
87 
88     qsort (ranks2, size, sizeof (int), compare_ranks);
89 
90     return ranks2;
91 }
92 
93 
94 int
ompi_osc_sm_fence(int assert,struct ompi_win_t * win)95 ompi_osc_sm_fence(int assert, struct ompi_win_t *win)
96 {
97     ompi_osc_sm_module_t *module =
98         (ompi_osc_sm_module_t*) win->w_osc_module;
99 
100     /* ensure all memory operations have completed */
101     opal_atomic_mb();
102 
103     if (module->global_state->use_barrier_for_fence) {
104         return module->comm->c_coll->coll_barrier(module->comm,
105                                                  module->comm->c_coll->coll_barrier_module);
106     } else {
107         module->my_sense = !module->my_sense;
108         pthread_mutex_lock(&module->global_state->mtx);
109         module->global_state->count--;
110         if (module->global_state->count == 0) {
111             module->global_state->count = ompi_comm_size(module->comm);
112             module->global_state->sense = module->my_sense;
113             pthread_cond_broadcast(&module->global_state->cond);
114         } else {
115             while (module->global_state->sense != module->my_sense) {
116                 pthread_cond_wait(&module->global_state->cond, &module->global_state->mtx);
117             }
118         }
119         pthread_mutex_unlock(&module->global_state->mtx);
120 
121         return OMPI_SUCCESS;
122     }
123 }
124 
125 int
ompi_osc_sm_start(struct ompi_group_t * group,int assert,struct ompi_win_t * win)126 ompi_osc_sm_start(struct ompi_group_t *group,
127                   int assert,
128                   struct ompi_win_t *win)
129 {
130     ompi_osc_sm_module_t *module =
131         (ompi_osc_sm_module_t*) win->w_osc_module;
132     int my_rank = ompi_comm_rank (module->comm);
133     void *_tmp_ptr = NULL;
134 
135     OBJ_RETAIN(group);
136 
137     if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, (void *) &_tmp_ptr, group)) {
138         OBJ_RELEASE(group);
139         return OMPI_ERR_RMA_SYNC;
140     }
141 
142     if (0 == (assert & MPI_MODE_NOCHECK)) {
143         int size;
144 
145         int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
146         if (NULL == ranks) {
147             return OMPI_ERR_OUT_OF_RESOURCE;
148         }
149 
150         size = ompi_group_size(module->start_group);
151 
152         for (int i = 0 ; i < size ; ++i) {
153             int rank_byte = ranks[i] >> OSC_SM_POST_BITS;
154             osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & OSC_SM_POST_MASK);
155 
156             /* wait for rank to post */
157             while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
158                 opal_progress();
159                 opal_atomic_mb();
160             }
161 
162             opal_atomic_rmb ();
163 
164 #if OPAL_HAVE_ATOMIC_MATH_64
165             (void) opal_atomic_fetch_xor_64 ((volatile int64_t *) module->posts[my_rank] + rank_byte, rank_bit);
166 #else
167             (void) opal_atomic_fetch_xor_32 ((volatile int32_t *) module->posts[my_rank] + rank_byte, rank_bit);
168 #endif
169        }
170 
171         free (ranks);
172     }
173 
174     opal_atomic_mb();
175     return OMPI_SUCCESS;
176 }
177 
178 
179 int
ompi_osc_sm_complete(struct ompi_win_t * win)180 ompi_osc_sm_complete(struct ompi_win_t *win)
181 {
182     ompi_osc_sm_module_t *module =
183         (ompi_osc_sm_module_t*) win->w_osc_module;
184     ompi_group_t *group;
185     int gsize;
186 
187     /* ensure all memory operations have completed */
188     opal_atomic_mb();
189 
190     group = module->start_group;
191     if (NULL == group || !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&module->start_group, &group, NULL)) {
192         return OMPI_ERR_RMA_SYNC;
193     }
194 
195     opal_atomic_mb();
196 
197     int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
198     if (NULL == ranks) {
199         return OMPI_ERR_OUT_OF_RESOURCE;
200     }
201 
202     gsize = ompi_group_size(group);
203     for (int i = 0 ; i < gsize ; ++i) {
204         (void) opal_atomic_add_fetch_32(&module->node_states[ranks[i]].complete_count, 1);
205     }
206 
207     free (ranks);
208 
209     OBJ_RELEASE(group);
210 
211     opal_atomic_mb();
212     return OMPI_SUCCESS;
213 }
214 
215 
216 int
ompi_osc_sm_post(struct ompi_group_t * group,int assert,struct ompi_win_t * win)217 ompi_osc_sm_post(struct ompi_group_t *group,
218                        int assert,
219                        struct ompi_win_t *win)
220 {
221     ompi_osc_sm_module_t *module =
222         (ompi_osc_sm_module_t*) win->w_osc_module;
223     int my_rank = ompi_comm_rank (module->comm);
224     int my_byte = my_rank >> OSC_SM_POST_BITS;
225     osc_sm_post_type_t my_bit = ((osc_sm_post_type_t) 1) << (my_rank & OSC_SM_POST_MASK);
226     int gsize;
227 
228     OPAL_THREAD_LOCK(&module->lock);
229 
230     if (NULL != module->post_group) {
231         OPAL_THREAD_UNLOCK(&module->lock);
232         return OMPI_ERR_RMA_SYNC;
233     }
234 
235     module->post_group = group;
236 
237     OBJ_RETAIN(group);
238 
239     if (0 == (assert & MPI_MODE_NOCHECK)) {
240         int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
241         if (NULL == ranks) {
242             return OMPI_ERR_OUT_OF_RESOURCE;
243         }
244 
245         module->my_node_state->complete_count = 0;
246         opal_atomic_mb();
247 
248         gsize = ompi_group_size(module->post_group);
249         for (int i = 0 ; i < gsize ; ++i) {
250             opal_atomic_add ((volatile osc_sm_post_type_t *) module->posts[ranks[i]] + my_byte, my_bit);
251         }
252 
253         opal_atomic_wmb ();
254 
255         free (ranks);
256 
257         opal_progress ();
258     }
259 
260     OPAL_THREAD_UNLOCK(&module->lock);
261 
262     return OMPI_SUCCESS;
263 }
264 
265 
266 int
ompi_osc_sm_wait(struct ompi_win_t * win)267 ompi_osc_sm_wait(struct ompi_win_t *win)
268 {
269     ompi_osc_sm_module_t *module =
270         (ompi_osc_sm_module_t*) win->w_osc_module;
271     ompi_group_t *group;
272 
273     OPAL_THREAD_LOCK(&module->lock);
274 
275     if (NULL == module->post_group) {
276         OPAL_THREAD_UNLOCK(&module->lock);
277         return OMPI_ERR_RMA_SYNC;
278     }
279 
280     group = module->post_group;
281 
282     int size = ompi_group_size (group);
283 
284     while (module->my_node_state->complete_count != size) {
285         opal_progress();
286         opal_atomic_mb();
287     }
288 
289     OBJ_RELEASE(group);
290     module->post_group = NULL;
291 
292     OPAL_THREAD_UNLOCK(&module->lock);
293 
294     /* ensure all memory operations have completed */
295     opal_atomic_mb();
296 
297     return OMPI_SUCCESS;
298 }
299 
300 
301 int
ompi_osc_sm_test(struct ompi_win_t * win,int * flag)302 ompi_osc_sm_test(struct ompi_win_t *win,
303                        int *flag)
304 {
305     ompi_osc_sm_module_t *module =
306         (ompi_osc_sm_module_t*) win->w_osc_module;
307 
308     OPAL_THREAD_LOCK(&module->lock);
309 
310     if (NULL == module->post_group) {
311         OPAL_THREAD_UNLOCK(&module->lock);
312         return OMPI_ERR_RMA_SYNC;
313     }
314 
315     int size = ompi_group_size(module->post_group);
316 
317     if (module->my_node_state->complete_count == size) {
318         OBJ_RELEASE(module->post_group);
319         module->post_group = NULL;
320         *flag = 1;
321     } else {
322         *flag = 0;
323     }
324 
325     OPAL_THREAD_UNLOCK(&module->lock);
326 
327     /* ensure all memory operations have completed */
328     opal_atomic_mb();
329 
330     return OMPI_SUCCESS;
331 }
332