1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  *  (C) 2006 by Argonne National Laboratory.
4  *      See COPYRIGHT in top-level directory.
5  */
6 
7 /* TODO figure out how to rewrite some/all of this queue code to use
8  * explicit OPA_load_ptr/OPA_store_ptr operations */
9 
10 #ifndef MPID_NEM_QUEUE_H
11 #define MPID_NEM_QUEUE_H
12 #include "mpid_nem_datatypes.h"
13 #include "mpid_nem_defs.h"
14 #include "mpid_nem_atomics.h"
15 
16 int MPID_nem_network_poll(int in_blocking_progress);
17 
18 #define MPID_nem_dump_cell_mpich2(cell, index)  MPID_nem_dump_cell_mpich2__((cell),(index),__FILE__,__LINE__)
19 
20 void MPID_nem_dump_cell_mpich2__( MPID_nem_cell_ptr_t cell, int, char* ,int);
21 void MPID_nem_dump_cell_mpich( MPID_nem_cell_ptr_t cell, int);
22 
23 /* Assertion macros for nemesis queues.  We don't use the normal
24  * assertion macros because we don't usually want to assert several
25  * times per queue operation.  These assertions serve more as structured
26  * comments that can easily transformed into being real assertions */
27 #if 0
28 #define MPID_nem_q_assert(a_) \
29     do {                                                             \
30         if (unlikely(!(a_))) {                                       \
31             MPID_nem_q_assert_fail(#a_, __FILE__, __LINE__);         \
32         }                                                            \
33     } while (0)
34 #define MPID_nem_q_assert_fail(a_str_, file_, line_) \
35     do {/*nothing*/} while(0)
36 #else
37 #define MPID_nem_q_assert(a_) \
38     do {/*nothing*/} while (0)
39 #endif
40 
41 #undef FUNCNAME
42 #define FUNCNAME MPID_nem_cell_init
43 #undef FCNAME
44 #define FCNAME MPIDI_QUOTE(FUNCNAME)
MPID_nem_cell_init(MPID_nem_cell_ptr_t cell)45 static inline void MPID_nem_cell_init(MPID_nem_cell_ptr_t cell)
46 {
47     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_CELL_INIT);
48 
49     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_CELL_INIT);
50 
51     MPID_NEM_SET_REL_NULL(cell->next);
52     memset((void *)&cell->pkt, 0, sizeof(MPID_nem_pkt_header_t));
53 
54     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_CELL_INIT);
55 }
56 
57 #if defined(MPID_NEM_USE_LOCK_FREE_QUEUES)
58 
59 #undef FUNCNAME
60 #define FUNCNAME MPID_nem_queue_init
61 #undef FCNAME
62 #define FCNAME MPIDI_QUOTE(FUNCNAME)
MPID_nem_queue_init(MPID_nem_queue_ptr_t qhead)63 static inline void MPID_nem_queue_init(MPID_nem_queue_ptr_t qhead)
64 {
65     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_QUEUE_INIT);
66 
67     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_QUEUE_INIT);
68 
69     MPID_NEM_SET_REL_NULL(qhead->head);
70     MPID_NEM_SET_REL_NULL(qhead->my_head);
71     MPID_NEM_SET_REL_NULL(qhead->tail);
72 
73     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_QUEUE_INIT);
74 }
75 
76 #define MPID_NEM_USE_SHADOW_HEAD
77 
MPID_NEM_SWAP_REL(MPID_nem_cell_rel_ptr_t * ptr,MPID_nem_cell_rel_ptr_t val)78 static inline MPID_nem_cell_rel_ptr_t MPID_NEM_SWAP_REL (MPID_nem_cell_rel_ptr_t *ptr, MPID_nem_cell_rel_ptr_t val)
79 {
80     MPID_nem_cell_rel_ptr_t ret;
81     OPA_store_ptr(&ret.p, OPA_swap_ptr(&(ptr->p), OPA_load_ptr(&val.p)));
82     return ret;
83 }
84 
85 /* do a compare-and-swap with MPID_NEM_RELNULL */
MPID_NEM_CAS_REL_NULL(MPID_nem_cell_rel_ptr_t * ptr,MPID_nem_cell_rel_ptr_t oldv)86 static inline MPID_nem_cell_rel_ptr_t MPID_NEM_CAS_REL_NULL (MPID_nem_cell_rel_ptr_t *ptr, MPID_nem_cell_rel_ptr_t oldv)
87 {
88     MPID_nem_cell_rel_ptr_t ret;
89     OPA_store_ptr(&ret.p, OPA_cas_ptr(&(ptr->p), OPA_load_ptr(&oldv.p), MPID_NEM_REL_NULL));
90     return ret;
91 }
92 
93 static inline void
MPID_nem_queue_enqueue(MPID_nem_queue_ptr_t qhead,MPID_nem_cell_ptr_t element)94 MPID_nem_queue_enqueue (MPID_nem_queue_ptr_t qhead, MPID_nem_cell_ptr_t element)
95 {
96     MPID_nem_cell_rel_ptr_t r_prev;
97     MPID_nem_cell_rel_ptr_t r_element = MPID_NEM_ABS_TO_REL (element);
98 
99     /* the _dequeue can break if this does not hold */
100     MPID_nem_q_assert(MPID_NEM_IS_REL_NULL(element->next));
101 
102     /* Orders payload and e->next=NULL w.r.t. the SWAP, updating head, and
103      * updating prev->next.  We assert e->next==NULL above, but it may have been
104      * done by us in the preceding _dequeue operation.
105      *
106      * The SWAP itself does not need to be ordered w.r.t. the payload because
107      * the consumer does not directly inspect the tail.  But the subsequent
108      * update to the head or e->next field does need to be ordered w.r.t. the
109      * payload or the consumer may read incorrect data. */
110     OPA_write_barrier();
111 
112     /* enqueue at tail */
113     r_prev = MPID_NEM_SWAP_REL (&(qhead->tail), r_element);
114     if (MPID_NEM_IS_REL_NULL (r_prev))
115     {
116         /* queue was empty, element is the new head too */
117 
118         /* no write barrier needed, we believe atomic SWAP with a control
119          * dependence (if) will enforce ordering between the SWAP and the head
120          * assignment */
121         qhead->head = r_element;
122     }
123     else
124     {
125         /* queue was not empty, swing old tail's next field to point to
126          * our element */
127         MPID_nem_q_assert(MPID_NEM_IS_REL_NULL(MPID_NEM_REL_TO_ABS(r_prev)->next));
128 
129         /* no write barrier needed, we believe atomic SWAP with a control
130          * dependence (if/else) will enforce ordering between the SWAP and the
131          * prev->next assignment */
132         MPID_NEM_REL_TO_ABS (r_prev)->next = r_element;
133     }
134 }
135 
136 /* This operation is only safe because this is a single-dequeuer queue impl.
137    Assumes that MPID_nem_queue_empty was called immediately prior to fix up any
138    shadow head issues. */
139 static inline MPID_nem_cell_ptr_t
MPID_nem_queue_head(MPID_nem_queue_ptr_t qhead)140 MPID_nem_queue_head (MPID_nem_queue_ptr_t qhead)
141 {
142     MPID_nem_q_assert(MPID_NEM_IS_REL_NULL(qhead->head));
143     return MPID_NEM_REL_TO_ABS(qhead->my_head);
144 }
145 
146 static inline int
MPID_nem_queue_empty(MPID_nem_queue_ptr_t qhead)147 MPID_nem_queue_empty (MPID_nem_queue_ptr_t qhead)
148 {
149     /* outside of this routine my_head and head should never both
150      * contain a non-null value */
151     MPID_nem_q_assert(MPID_NEM_IS_REL_NULL(qhead->my_head) ||
152                       MPID_NEM_IS_REL_NULL(qhead->head));
153 
154     if (MPID_NEM_IS_REL_NULL (qhead->my_head))
155     {
156         /* the order of comparison between my_head and head does not
157          * matter, no read barrier needed here */
158         if (MPID_NEM_IS_REL_NULL (qhead->head))
159         {
160             /* both null, nothing in queue */
161             return 1;
162         }
163         else
164         {
165             /* shadow head null and head has value, move the value to
166              * our private shadow head and zero the real head */
167             qhead->my_head = qhead->head;
168             /* no barrier needed, my_head is entirely private to consumer */
169             MPID_NEM_SET_REL_NULL (qhead->head);
170         }
171     }
172 
173     /* the following assertions are present at the beginning of _dequeue:
174     MPID_nem_q_assert(!MPID_NEM_IS_REL_NULL(qhead->my_head));
175     MPID_nem_q_assert( MPID_NEM_IS_REL_NULL(qhead->head));
176     */
177     return 0;
178 }
179 
180 
181 /* Gets the head */
182 static inline void
MPID_nem_queue_dequeue(MPID_nem_queue_ptr_t qhead,MPID_nem_cell_ptr_t * e)183 MPID_nem_queue_dequeue (MPID_nem_queue_ptr_t qhead, MPID_nem_cell_ptr_t *e)
184 {
185     MPID_nem_cell_ptr_t _e;
186     MPID_nem_cell_rel_ptr_t _r_e;
187 
188     /* _empty always called first, moving head-->my_head */
189     MPID_nem_q_assert(!MPID_NEM_IS_REL_NULL(qhead->my_head));
190     MPID_nem_q_assert( MPID_NEM_IS_REL_NULL(qhead->head));
191 
192     _r_e = qhead->my_head;
193     _e = MPID_NEM_REL_TO_ABS (_r_e);
194 
195     /* no barrier needed, my_head is private to consumer, plus
196      * head/my_head and _e->next are ordered by a data dependency */
197     if (!MPID_NEM_IS_REL_NULL(_e->next))
198     {
199         qhead->my_head = _e->next;
200     }
201     else
202     {
203         /* we've reached the end (tail) of the queue */
204         MPID_nem_cell_rel_ptr_t old_tail;
205 
206         MPID_NEM_SET_REL_NULL (qhead->my_head);
207         /* no barrier needed, the caller doesn't need any ordering w.r.t.
208          * my_head or the tail */
209         old_tail = MPID_NEM_CAS_REL_NULL (&(qhead->tail), _r_e);
210 
211         if (!MPID_NEM_REL_ARE_EQUAL (old_tail, _r_e))
212         {
213             /* FIXME is a barrier needed here because of the control-only dependency? */
214             while (MPID_NEM_IS_REL_NULL (_e->next))
215             {
216                 SKIP;
217             }
218             /* no read barrier needed between loads from the same location */
219             qhead->my_head = _e->next;
220         }
221     }
222     MPID_NEM_SET_REL_NULL (_e->next);
223 
224     /* Conservative read barrier here to ensure loads from head are ordered
225      * w.r.t. payload reads by the caller.  The McKenney "whymb" document's
226      * Figure 11 indicates that we don't need a barrier, but we are currently
227      * unconvinced of this.  Further work, ideally using more formal methods,
228      * should justify removing this.  (note that this barrier won't cost us
229      * anything on many platforms, esp. x86) */
230     OPA_read_barrier();
231 
232     *e = _e;
233 }
234 
235 #else /* !defined(MPID_NEM_USE_LOCK_FREE_QUEUES) */
236 
237 /* FIXME We shouldn't really be using the MPID_Thread_mutex_* code but the
238  * MPIDU_Process_locks code is a total mess right now.  In the long term we need
239    to resolve this, but in the short run it should be safe on most (all?)
240    platforms to use these instead.  Usually they will both boil down to a
241    pthread_mutex_t and and associated functions. */
242 #define MPID_nem_queue_mutex_create MPID_Thread_mutex_create
243 #define MPID_nem_queue_mutex_lock   MPID_Thread_mutex_lock
244 #define MPID_nem_queue_mutex_unlock MPID_Thread_mutex_unlock
245 
246 /* must be called by exactly one process per queue */
247 #undef FUNCNAME
248 #define FUNCNAME MPID_nem_queue_init
249 #undef FCNAME
250 #define FCNAME MPIDI_QUOTE(FUNCNAME)
MPID_nem_queue_init(MPID_nem_queue_ptr_t qhead)251 static inline void MPID_nem_queue_init(MPID_nem_queue_ptr_t qhead)
252 {
253     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_QUEUE_INIT);
254 
255     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_QUEUE_INIT);
256 
257     MPID_NEM_SET_REL_NULL(qhead->head);
258     MPID_NEM_SET_REL_NULL(qhead->my_head);
259     MPID_NEM_SET_REL_NULL(qhead->tail);
260     MPID_nem_queue_mutex_create(&qhead->lock, NULL);
261 
262     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_QUEUE_INIT);
263 }
264 
265 static inline void
MPID_nem_queue_enqueue(MPID_nem_queue_ptr_t qhead,MPID_nem_cell_ptr_t element)266 MPID_nem_queue_enqueue (MPID_nem_queue_ptr_t qhead, MPID_nem_cell_ptr_t element)
267 {
268     MPID_nem_cell_rel_ptr_t r_prev;
269     MPID_nem_cell_rel_ptr_t r_element = MPID_NEM_ABS_TO_REL (element);
270 
271     MPID_nem_queue_mutex_lock(&qhead->lock);
272 
273     r_prev = qhead->tail;
274     qhead->tail = r_element;
275     if (MPID_NEM_IS_REL_NULL(r_prev)) {
276         qhead->head = r_element;
277     }
278     else {
279         MPID_NEM_REL_TO_ABS(r_prev)->next = r_element;
280     }
281 
282     MPID_nem_queue_mutex_unlock(&qhead->lock);
283 }
284 
285 /* This operation is only safe because this is a single-dequeuer queue impl. */
286 static inline MPID_nem_cell_ptr_t
MPID_nem_queue_head(MPID_nem_queue_ptr_t qhead)287 MPID_nem_queue_head (MPID_nem_queue_ptr_t qhead)
288 {
289     return MPID_NEM_REL_TO_ABS(qhead->my_head);
290 }
291 
292 /* Assumption: regular loads & stores are atomic.  This may not be univerally
293    true, but it's not uncommon.  We often need to use these "lock-ful" queues on
294    platforms where atomics are not yet implemented, so we can't rely on the
295    atomics to provide atomic load/store operations for us. */
296 static inline int
MPID_nem_queue_empty(MPID_nem_queue_ptr_t qhead)297 MPID_nem_queue_empty (MPID_nem_queue_ptr_t qhead)
298 {
299     if (MPID_NEM_IS_REL_NULL (qhead->my_head))
300     {
301         if (MPID_NEM_IS_REL_NULL (qhead->head))
302         {
303             return 1;
304         }
305         else
306         {
307             qhead->my_head = qhead->head;
308             MPID_NEM_SET_REL_NULL (qhead->head); /* reset it for next time */
309         }
310     }
311 
312     return 0;
313 }
314 
315 static inline void
MPID_nem_queue_dequeue(MPID_nem_queue_ptr_t qhead,MPID_nem_cell_ptr_t * e)316 MPID_nem_queue_dequeue (MPID_nem_queue_ptr_t qhead, MPID_nem_cell_ptr_t *e)
317 {
318     MPID_nem_cell_ptr_t _e;
319     MPID_nem_cell_rel_ptr_t _r_e;
320 
321     _r_e = qhead->my_head;
322     _e = MPID_NEM_REL_TO_ABS (_r_e);
323 
324 
325     if (MPID_NEM_IS_REL_NULL(_e->next)) {
326         /* a REL_NULL _e->next or writing qhead->tail both require locking */
327         MPID_nem_queue_mutex_lock(&qhead->lock);
328         qhead->my_head = _e->next;
329         /* We have to check _e->next again because it may have changed between
330            the time we checked it without the lock and the time that we acquired
331            the lock. */
332         if (MPID_NEM_IS_REL_NULL(_e->next)) {
333             MPID_NEM_SET_REL_NULL(qhead->tail);
334         }
335         MPID_nem_queue_mutex_unlock(&qhead->lock);
336     }
337     else { /* !MPID_NEM_IS_REL_NULL(_e->next) */
338         /* We don't need to lock because a non-null _e->next can't be changed by
339            anyone but us (the dequeuer) and we don't need to modify qhead->tail
340            because we aren't removing the last element from the queue. */
341         qhead->my_head = _e->next;
342     }
343 
344     MPID_NEM_SET_REL_NULL (_e->next);
345     *e = _e;
346 }
347 
348 #endif /* !defined(MPID_NEM_USE_LOCK_FREE_QUEUES) */
349 
350 #endif /* MPID_NEM_QUEUE_H */
351