1*404b540aSrobert /* Copyright (C) 2005 Free Software Foundation, Inc.
2*404b540aSrobert Contributed by Richard Henderson <rth@redhat.com>.
3*404b540aSrobert
4*404b540aSrobert This file is part of the GNU OpenMP Library (libgomp).
5*404b540aSrobert
6*404b540aSrobert Libgomp is free software; you can redistribute it and/or modify it
7*404b540aSrobert under the terms of the GNU Lesser General Public License as published by
8*404b540aSrobert the Free Software Foundation; either version 2.1 of the License, or
9*404b540aSrobert (at your option) any later version.
10*404b540aSrobert
11*404b540aSrobert Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12*404b540aSrobert WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13*404b540aSrobert FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
14*404b540aSrobert more details.
15*404b540aSrobert
16*404b540aSrobert You should have received a copy of the GNU Lesser General Public License
17*404b540aSrobert along with libgomp; see the file COPYING.LIB. If not, write to the
18*404b540aSrobert Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19*404b540aSrobert MA 02110-1301, USA. */
20*404b540aSrobert
21*404b540aSrobert /* As a special exception, if you link this library with other files, some
22*404b540aSrobert of which are compiled with GCC, to produce an executable, this library
23*404b540aSrobert does not by itself cause the resulting executable to be covered by the
24*404b540aSrobert GNU General Public License. This exception does not however invalidate
25*404b540aSrobert any other reasons why the executable file might be covered by the GNU
26*404b540aSrobert General Public License. */
27*404b540aSrobert
28*404b540aSrobert /* This file handles the ORDERED construct. */
29*404b540aSrobert
30*404b540aSrobert #include "libgomp.h"
31*404b540aSrobert
32*404b540aSrobert
33*404b540aSrobert /* This function is called when first allocating an iteration block. That
34*404b540aSrobert is, the thread is not currently on the queue. The work-share lock must
35*404b540aSrobert be held on entry. */
36*404b540aSrobert
37*404b540aSrobert void
gomp_ordered_first(void)38*404b540aSrobert gomp_ordered_first (void)
39*404b540aSrobert {
40*404b540aSrobert struct gomp_thread *thr = gomp_thread ();
41*404b540aSrobert struct gomp_team *team = thr->ts.team;
42*404b540aSrobert struct gomp_work_share *ws = thr->ts.work_share;
43*404b540aSrobert unsigned index;
44*404b540aSrobert
45*404b540aSrobert /* Work share constructs can be orphaned. */
46*404b540aSrobert if (team == NULL || team->nthreads == 1)
47*404b540aSrobert return;
48*404b540aSrobert
49*404b540aSrobert index = ws->ordered_cur + ws->ordered_num_used;
50*404b540aSrobert if (index >= team->nthreads)
51*404b540aSrobert index -= team->nthreads;
52*404b540aSrobert ws->ordered_team_ids[index] = thr->ts.team_id;
53*404b540aSrobert
54*404b540aSrobert /* If this is the first and only thread in the queue, then there is
55*404b540aSrobert no one to release us when we get to our ordered section. Post to
56*404b540aSrobert our own release queue now so that we won't block later. */
57*404b540aSrobert if (ws->ordered_num_used++ == 0)
58*404b540aSrobert gomp_sem_post (team->ordered_release[thr->ts.team_id]);
59*404b540aSrobert }
60*404b540aSrobert
61*404b540aSrobert /* This function is called when completing the last iteration block. That
62*404b540aSrobert is, there are no more iterations to perform and so the thread should be
63*404b540aSrobert removed from the queue entirely. Because of the way ORDERED blocks are
64*404b540aSrobert managed, it follows that we currently own access to the ORDERED block,
65*404b540aSrobert and should now pass it on to the next thread. The work-share lock must
66*404b540aSrobert be held on entry. */
67*404b540aSrobert
68*404b540aSrobert void
gomp_ordered_last(void)69*404b540aSrobert gomp_ordered_last (void)
70*404b540aSrobert {
71*404b540aSrobert struct gomp_thread *thr = gomp_thread ();
72*404b540aSrobert struct gomp_team *team = thr->ts.team;
73*404b540aSrobert struct gomp_work_share *ws = thr->ts.work_share;
74*404b540aSrobert unsigned next_id;
75*404b540aSrobert
76*404b540aSrobert /* Work share constructs can be orphaned. */
77*404b540aSrobert if (team == NULL || team->nthreads == 1)
78*404b540aSrobert return;
79*404b540aSrobert
80*404b540aSrobert /* We're no longer the owner. */
81*404b540aSrobert ws->ordered_owner = -1;
82*404b540aSrobert
83*404b540aSrobert /* If we're not the last thread in the queue, then wake the next. */
84*404b540aSrobert if (--ws->ordered_num_used > 0)
85*404b540aSrobert {
86*404b540aSrobert unsigned next = ws->ordered_cur + 1;
87*404b540aSrobert if (next == team->nthreads)
88*404b540aSrobert next = 0;
89*404b540aSrobert ws->ordered_cur = next;
90*404b540aSrobert
91*404b540aSrobert next_id = ws->ordered_team_ids[next];
92*404b540aSrobert gomp_sem_post (team->ordered_release[next_id]);
93*404b540aSrobert }
94*404b540aSrobert }
95*404b540aSrobert
96*404b540aSrobert
97*404b540aSrobert /* This function is called when allocating a subsequent allocation block.
98*404b540aSrobert That is, we're done with the current iteration block and we're allocating
99*404b540aSrobert another. This is the logical combination of a call to gomp_ordered_last
100*404b540aSrobert followed by a call to gomp_ordered_first. The work-share lock must be
101*404b540aSrobert held on entry. */
102*404b540aSrobert
103*404b540aSrobert void
gomp_ordered_next(void)104*404b540aSrobert gomp_ordered_next (void)
105*404b540aSrobert {
106*404b540aSrobert struct gomp_thread *thr = gomp_thread ();
107*404b540aSrobert struct gomp_team *team = thr->ts.team;
108*404b540aSrobert struct gomp_work_share *ws = thr->ts.work_share;
109*404b540aSrobert unsigned index, next_id;
110*404b540aSrobert
111*404b540aSrobert /* Work share constructs can be orphaned. */
112*404b540aSrobert if (team == NULL || team->nthreads == 1)
113*404b540aSrobert return;
114*404b540aSrobert
115*404b540aSrobert /* We're no longer the owner. */
116*404b540aSrobert ws->ordered_owner = -1;
117*404b540aSrobert
118*404b540aSrobert /* If there's only one thread in the queue, that must be us. */
119*404b540aSrobert if (ws->ordered_num_used == 1)
120*404b540aSrobert {
121*404b540aSrobert /* We have a similar situation as in gomp_ordered_first
122*404b540aSrobert where we need to post to our own release semaphore. */
123*404b540aSrobert gomp_sem_post (team->ordered_release[thr->ts.team_id]);
124*404b540aSrobert return;
125*404b540aSrobert }
126*404b540aSrobert
127*404b540aSrobert /* If the queue is entirely full, then we move ourself to the end of
128*404b540aSrobert the queue merely by incrementing ordered_cur. Only if it's not
129*404b540aSrobert full do we have to write our id. */
130*404b540aSrobert if (ws->ordered_num_used < team->nthreads)
131*404b540aSrobert {
132*404b540aSrobert index = ws->ordered_cur + ws->ordered_num_used;
133*404b540aSrobert if (index >= team->nthreads)
134*404b540aSrobert index -= team->nthreads;
135*404b540aSrobert ws->ordered_team_ids[index] = thr->ts.team_id;
136*404b540aSrobert }
137*404b540aSrobert
138*404b540aSrobert index = ws->ordered_cur + 1;
139*404b540aSrobert if (index == team->nthreads)
140*404b540aSrobert index = 0;
141*404b540aSrobert ws->ordered_cur = index;
142*404b540aSrobert
143*404b540aSrobert next_id = ws->ordered_team_ids[index];
144*404b540aSrobert gomp_sem_post (team->ordered_release[next_id]);
145*404b540aSrobert }
146*404b540aSrobert
147*404b540aSrobert
148*404b540aSrobert /* This function is called when a statically scheduled loop is first
149*404b540aSrobert being created. */
150*404b540aSrobert
151*404b540aSrobert void
gomp_ordered_static_init(void)152*404b540aSrobert gomp_ordered_static_init (void)
153*404b540aSrobert {
154*404b540aSrobert struct gomp_thread *thr = gomp_thread ();
155*404b540aSrobert struct gomp_team *team = thr->ts.team;
156*404b540aSrobert
157*404b540aSrobert if (team == NULL || team->nthreads == 1)
158*404b540aSrobert return;
159*404b540aSrobert
160*404b540aSrobert gomp_sem_post (team->ordered_release[0]);
161*404b540aSrobert }
162*404b540aSrobert
163*404b540aSrobert /* This function is called when a statically scheduled loop is moving to
164*404b540aSrobert the next allocation block. Static schedules are not first come first
165*404b540aSrobert served like the others, so we're to move to the numerically next thread,
166*404b540aSrobert not the next thread on a list. The work-share lock should *not* be held
167*404b540aSrobert on entry. */
168*404b540aSrobert
169*404b540aSrobert void
gomp_ordered_static_next(void)170*404b540aSrobert gomp_ordered_static_next (void)
171*404b540aSrobert {
172*404b540aSrobert struct gomp_thread *thr = gomp_thread ();
173*404b540aSrobert struct gomp_team *team = thr->ts.team;
174*404b540aSrobert struct gomp_work_share *ws = thr->ts.work_share;
175*404b540aSrobert unsigned id = thr->ts.team_id;
176*404b540aSrobert
177*404b540aSrobert if (team == NULL || team->nthreads == 1)
178*404b540aSrobert return;
179*404b540aSrobert
180*404b540aSrobert ws->ordered_owner = -1;
181*404b540aSrobert
182*404b540aSrobert /* This thread currently owns the lock. Increment the owner. */
183*404b540aSrobert if (++id == team->nthreads)
184*404b540aSrobert id = 0;
185*404b540aSrobert ws->ordered_team_ids[0] = id;
186*404b540aSrobert gomp_sem_post (team->ordered_release[id]);
187*404b540aSrobert }
188*404b540aSrobert
189*404b540aSrobert /* This function is called when we need to assert that the thread owns the
190*404b540aSrobert ordered section. Due to the problem of posted-but-not-waited semaphores,
191*404b540aSrobert this needs to happen before completing a loop iteration. */
192*404b540aSrobert
193*404b540aSrobert void
gomp_ordered_sync(void)194*404b540aSrobert gomp_ordered_sync (void)
195*404b540aSrobert {
196*404b540aSrobert struct gomp_thread *thr = gomp_thread ();
197*404b540aSrobert struct gomp_team *team = thr->ts.team;
198*404b540aSrobert struct gomp_work_share *ws = thr->ts.work_share;
199*404b540aSrobert
200*404b540aSrobert /* Work share constructs can be orphaned. But this clearly means that
201*404b540aSrobert we are the only thread, and so we automatically own the section. */
202*404b540aSrobert if (team == NULL || team->nthreads == 1)
203*404b540aSrobert return;
204*404b540aSrobert
205*404b540aSrobert /* ??? I believe it to be safe to access this data without taking the
206*404b540aSrobert ws->lock. The only presumed race condition is with the previous
207*404b540aSrobert thread on the queue incrementing ordered_cur such that it points
208*404b540aSrobert to us, concurrently with our check below. But our team_id is
209*404b540aSrobert already present in the queue, and the other thread will always
210*404b540aSrobert post to our release semaphore. So the two cases are that we will
211*404b540aSrobert either win the race an momentarily block on the semaphore, or lose
212*404b540aSrobert the race and find the semaphore already unlocked and so not block.
213*404b540aSrobert Either way we get correct results. */
214*404b540aSrobert
215*404b540aSrobert if (ws->ordered_owner != thr->ts.team_id)
216*404b540aSrobert {
217*404b540aSrobert gomp_sem_wait (team->ordered_release[thr->ts.team_id]);
218*404b540aSrobert ws->ordered_owner = thr->ts.team_id;
219*404b540aSrobert }
220*404b540aSrobert }
221*404b540aSrobert
222*404b540aSrobert /* This function is called by user code when encountering the start of an
223*404b540aSrobert ORDERED block. We must check to see if the current thread is at the
224*404b540aSrobert head of the queue, and if not, block. */
225*404b540aSrobert
226*404b540aSrobert #ifdef HAVE_ATTRIBUTE_ALIAS
227*404b540aSrobert extern void GOMP_ordered_start (void)
228*404b540aSrobert __attribute__((alias ("gomp_ordered_sync")));
229*404b540aSrobert #else
230*404b540aSrobert void
GOMP_ordered_start(void)231*404b540aSrobert GOMP_ordered_start (void)
232*404b540aSrobert {
233*404b540aSrobert gomp_ordered_sync ();
234*404b540aSrobert }
235*404b540aSrobert #endif
236*404b540aSrobert
237*404b540aSrobert /* This function is called by user code when encountering the end of an
238*404b540aSrobert ORDERED block. With the current ORDERED implementation there's nothing
239*404b540aSrobert for us to do.
240*404b540aSrobert
241*404b540aSrobert However, the current implementation has a flaw in that it does not allow
242*404b540aSrobert the next thread into the ORDERED section immediately after the current
243*404b540aSrobert thread exits the ORDERED section in its last iteration. The existance
244*404b540aSrobert of this function allows the implementation to change. */
245*404b540aSrobert
246*404b540aSrobert void
GOMP_ordered_end(void)247*404b540aSrobert GOMP_ordered_end (void)
248*404b540aSrobert {
249*404b540aSrobert }
250