1*38fd1498Szrj /* Copyright (C) 2005-2018 Free Software Foundation, Inc.
2*38fd1498Szrj Contributed by Richard Henderson <rth@redhat.com>.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of the GNU Offloading and Multi Processing Library
5*38fd1498Szrj (libgomp).
6*38fd1498Szrj
7*38fd1498Szrj Libgomp is free software; you can redistribute it and/or modify it
8*38fd1498Szrj under the terms of the GNU General Public License as published by
9*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
10*38fd1498Szrj any later version.
11*38fd1498Szrj
12*38fd1498Szrj Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14*38fd1498Szrj FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15*38fd1498Szrj more details.
16*38fd1498Szrj
17*38fd1498Szrj Under Section 7 of GPL version 3, you are granted additional
18*38fd1498Szrj permissions described in the GCC Runtime Library Exception, version
19*38fd1498Szrj 3.1, as published by the Free Software Foundation.
20*38fd1498Szrj
21*38fd1498Szrj You should have received a copy of the GNU General Public License and
22*38fd1498Szrj a copy of the GCC Runtime Library Exception along with this program;
23*38fd1498Szrj see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24*38fd1498Szrj <http://www.gnu.org/licenses/>. */
25*38fd1498Szrj
26*38fd1498Szrj /* This file handles the maintainence of threads in response to team
27*38fd1498Szrj creation and termination. */
28*38fd1498Szrj
29*38fd1498Szrj #include "libgomp.h"
30*38fd1498Szrj #include "pool.h"
31*38fd1498Szrj #include <stdlib.h>
32*38fd1498Szrj #include <string.h>
33*38fd1498Szrj
34*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
35*38fd1498Szrj /* This attribute contains PTHREAD_CREATE_DETACHED. */
36*38fd1498Szrj pthread_attr_t gomp_thread_attr;
37*38fd1498Szrj
38*38fd1498Szrj /* This key is for the thread destructor. */
39*38fd1498Szrj pthread_key_t gomp_thread_destructor;
40*38fd1498Szrj
41*38fd1498Szrj
42*38fd1498Szrj /* This is the libgomp per-thread data structure. */
43*38fd1498Szrj #if defined HAVE_TLS || defined USE_EMUTLS
44*38fd1498Szrj __thread struct gomp_thread gomp_tls_data;
45*38fd1498Szrj #else
46*38fd1498Szrj pthread_key_t gomp_tls_key;
47*38fd1498Szrj #endif
48*38fd1498Szrj
49*38fd1498Szrj
50*38fd1498Szrj /* This structure is used to communicate across pthread_create. */
51*38fd1498Szrj
52*38fd1498Szrj struct gomp_thread_start_data
53*38fd1498Szrj {
54*38fd1498Szrj void (*fn) (void *);
55*38fd1498Szrj void *fn_data;
56*38fd1498Szrj struct gomp_team_state ts;
57*38fd1498Szrj struct gomp_task *task;
58*38fd1498Szrj struct gomp_thread_pool *thread_pool;
59*38fd1498Szrj unsigned int place;
60*38fd1498Szrj bool nested;
61*38fd1498Szrj };
62*38fd1498Szrj
63*38fd1498Szrj
64*38fd1498Szrj /* This function is a pthread_create entry point. This contains the idle
65*38fd1498Szrj loop in which a thread waits to be called up to become part of a team. */
66*38fd1498Szrj
67*38fd1498Szrj static void *
gomp_thread_start(void * xdata)68*38fd1498Szrj gomp_thread_start (void *xdata)
69*38fd1498Szrj {
70*38fd1498Szrj struct gomp_thread_start_data *data = xdata;
71*38fd1498Szrj struct gomp_thread *thr;
72*38fd1498Szrj struct gomp_thread_pool *pool;
73*38fd1498Szrj void (*local_fn) (void *);
74*38fd1498Szrj void *local_data;
75*38fd1498Szrj
76*38fd1498Szrj #if defined HAVE_TLS || defined USE_EMUTLS
77*38fd1498Szrj thr = &gomp_tls_data;
78*38fd1498Szrj #else
79*38fd1498Szrj struct gomp_thread local_thr;
80*38fd1498Szrj thr = &local_thr;
81*38fd1498Szrj pthread_setspecific (gomp_tls_key, thr);
82*38fd1498Szrj #endif
83*38fd1498Szrj gomp_sem_init (&thr->release, 0);
84*38fd1498Szrj
85*38fd1498Szrj /* Extract what we need from data. */
86*38fd1498Szrj local_fn = data->fn;
87*38fd1498Szrj local_data = data->fn_data;
88*38fd1498Szrj thr->thread_pool = data->thread_pool;
89*38fd1498Szrj thr->ts = data->ts;
90*38fd1498Szrj thr->task = data->task;
91*38fd1498Szrj thr->place = data->place;
92*38fd1498Szrj
93*38fd1498Szrj thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
94*38fd1498Szrj
95*38fd1498Szrj /* Make thread pool local. */
96*38fd1498Szrj pool = thr->thread_pool;
97*38fd1498Szrj
98*38fd1498Szrj if (data->nested)
99*38fd1498Szrj {
100*38fd1498Szrj struct gomp_team *team = thr->ts.team;
101*38fd1498Szrj struct gomp_task *task = thr->task;
102*38fd1498Szrj
103*38fd1498Szrj gomp_barrier_wait (&team->barrier);
104*38fd1498Szrj
105*38fd1498Szrj local_fn (local_data);
106*38fd1498Szrj gomp_team_barrier_wait_final (&team->barrier);
107*38fd1498Szrj gomp_finish_task (task);
108*38fd1498Szrj gomp_barrier_wait_last (&team->barrier);
109*38fd1498Szrj }
110*38fd1498Szrj else
111*38fd1498Szrj {
112*38fd1498Szrj pool->threads[thr->ts.team_id] = thr;
113*38fd1498Szrj
114*38fd1498Szrj gomp_simple_barrier_wait (&pool->threads_dock);
115*38fd1498Szrj do
116*38fd1498Szrj {
117*38fd1498Szrj struct gomp_team *team = thr->ts.team;
118*38fd1498Szrj struct gomp_task *task = thr->task;
119*38fd1498Szrj
120*38fd1498Szrj local_fn (local_data);
121*38fd1498Szrj gomp_team_barrier_wait_final (&team->barrier);
122*38fd1498Szrj gomp_finish_task (task);
123*38fd1498Szrj
124*38fd1498Szrj gomp_simple_barrier_wait (&pool->threads_dock);
125*38fd1498Szrj
126*38fd1498Szrj local_fn = thr->fn;
127*38fd1498Szrj local_data = thr->data;
128*38fd1498Szrj thr->fn = NULL;
129*38fd1498Szrj }
130*38fd1498Szrj while (local_fn);
131*38fd1498Szrj }
132*38fd1498Szrj
133*38fd1498Szrj gomp_sem_destroy (&thr->release);
134*38fd1498Szrj thr->thread_pool = NULL;
135*38fd1498Szrj thr->task = NULL;
136*38fd1498Szrj return NULL;
137*38fd1498Szrj }
138*38fd1498Szrj #endif
139*38fd1498Szrj
140*38fd1498Szrj static inline struct gomp_team *
get_last_team(unsigned nthreads)141*38fd1498Szrj get_last_team (unsigned nthreads)
142*38fd1498Szrj {
143*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
144*38fd1498Szrj if (thr->ts.team == NULL)
145*38fd1498Szrj {
146*38fd1498Szrj struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
147*38fd1498Szrj struct gomp_team *last_team = pool->last_team;
148*38fd1498Szrj if (last_team != NULL && last_team->nthreads == nthreads)
149*38fd1498Szrj {
150*38fd1498Szrj pool->last_team = NULL;
151*38fd1498Szrj return last_team;
152*38fd1498Szrj }
153*38fd1498Szrj }
154*38fd1498Szrj return NULL;
155*38fd1498Szrj }
156*38fd1498Szrj
157*38fd1498Szrj /* Create a new team data structure. */
158*38fd1498Szrj
159*38fd1498Szrj struct gomp_team *
gomp_new_team(unsigned nthreads)160*38fd1498Szrj gomp_new_team (unsigned nthreads)
161*38fd1498Szrj {
162*38fd1498Szrj struct gomp_team *team;
163*38fd1498Szrj int i;
164*38fd1498Szrj
165*38fd1498Szrj team = get_last_team (nthreads);
166*38fd1498Szrj if (team == NULL)
167*38fd1498Szrj {
168*38fd1498Szrj size_t extra = sizeof (team->ordered_release[0])
169*38fd1498Szrj + sizeof (team->implicit_task[0]);
170*38fd1498Szrj team = gomp_malloc (sizeof (*team) + nthreads * extra);
171*38fd1498Szrj
172*38fd1498Szrj #ifndef HAVE_SYNC_BUILTINS
173*38fd1498Szrj gomp_mutex_init (&team->work_share_list_free_lock);
174*38fd1498Szrj #endif
175*38fd1498Szrj gomp_barrier_init (&team->barrier, nthreads);
176*38fd1498Szrj gomp_mutex_init (&team->task_lock);
177*38fd1498Szrj
178*38fd1498Szrj team->nthreads = nthreads;
179*38fd1498Szrj }
180*38fd1498Szrj
181*38fd1498Szrj team->work_share_chunk = 8;
182*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
183*38fd1498Szrj team->single_count = 0;
184*38fd1498Szrj #endif
185*38fd1498Szrj team->work_shares_to_free = &team->work_shares[0];
186*38fd1498Szrj gomp_init_work_share (&team->work_shares[0], false, nthreads);
187*38fd1498Szrj team->work_shares[0].next_alloc = NULL;
188*38fd1498Szrj team->work_share_list_free = NULL;
189*38fd1498Szrj team->work_share_list_alloc = &team->work_shares[1];
190*38fd1498Szrj for (i = 1; i < 7; i++)
191*38fd1498Szrj team->work_shares[i].next_free = &team->work_shares[i + 1];
192*38fd1498Szrj team->work_shares[i].next_free = NULL;
193*38fd1498Szrj
194*38fd1498Szrj gomp_sem_init (&team->master_release, 0);
195*38fd1498Szrj team->ordered_release = (void *) &team->implicit_task[nthreads];
196*38fd1498Szrj team->ordered_release[0] = &team->master_release;
197*38fd1498Szrj
198*38fd1498Szrj priority_queue_init (&team->task_queue);
199*38fd1498Szrj team->task_count = 0;
200*38fd1498Szrj team->task_queued_count = 0;
201*38fd1498Szrj team->task_running_count = 0;
202*38fd1498Szrj team->work_share_cancelled = 0;
203*38fd1498Szrj team->team_cancelled = 0;
204*38fd1498Szrj
205*38fd1498Szrj return team;
206*38fd1498Szrj }
207*38fd1498Szrj
208*38fd1498Szrj
209*38fd1498Szrj /* Free a team data structure. */
210*38fd1498Szrj
211*38fd1498Szrj static void
free_team(struct gomp_team * team)212*38fd1498Szrj free_team (struct gomp_team *team)
213*38fd1498Szrj {
214*38fd1498Szrj #ifndef HAVE_SYNC_BUILTINS
215*38fd1498Szrj gomp_mutex_destroy (&team->work_share_list_free_lock);
216*38fd1498Szrj #endif
217*38fd1498Szrj gomp_barrier_destroy (&team->barrier);
218*38fd1498Szrj gomp_mutex_destroy (&team->task_lock);
219*38fd1498Szrj priority_queue_free (&team->task_queue);
220*38fd1498Szrj free (team);
221*38fd1498Szrj }
222*38fd1498Szrj
223*38fd1498Szrj static void
gomp_free_pool_helper(void * thread_pool)224*38fd1498Szrj gomp_free_pool_helper (void *thread_pool)
225*38fd1498Szrj {
226*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
227*38fd1498Szrj struct gomp_thread_pool *pool
228*38fd1498Szrj = (struct gomp_thread_pool *) thread_pool;
229*38fd1498Szrj gomp_simple_barrier_wait_last (&pool->threads_dock);
230*38fd1498Szrj gomp_sem_destroy (&thr->release);
231*38fd1498Szrj thr->thread_pool = NULL;
232*38fd1498Szrj thr->task = NULL;
233*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
234*38fd1498Szrj pthread_exit (NULL);
235*38fd1498Szrj #elif defined(__nvptx__)
236*38fd1498Szrj asm ("exit;");
237*38fd1498Szrj #else
238*38fd1498Szrj #error gomp_free_pool_helper must terminate the thread
239*38fd1498Szrj #endif
240*38fd1498Szrj }
241*38fd1498Szrj
242*38fd1498Szrj /* Free a thread pool and release its threads. */
243*38fd1498Szrj
244*38fd1498Szrj void
gomp_free_thread(void * arg)245*38fd1498Szrj gomp_free_thread (void *arg __attribute__((unused)))
246*38fd1498Szrj {
247*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
248*38fd1498Szrj struct gomp_thread_pool *pool = thr->thread_pool;
249*38fd1498Szrj if (pool)
250*38fd1498Szrj {
251*38fd1498Szrj if (pool->threads_used > 0)
252*38fd1498Szrj {
253*38fd1498Szrj int i;
254*38fd1498Szrj for (i = 1; i < pool->threads_used; i++)
255*38fd1498Szrj {
256*38fd1498Szrj struct gomp_thread *nthr = pool->threads[i];
257*38fd1498Szrj nthr->fn = gomp_free_pool_helper;
258*38fd1498Szrj nthr->data = pool;
259*38fd1498Szrj }
260*38fd1498Szrj /* This barrier undocks threads docked on pool->threads_dock. */
261*38fd1498Szrj gomp_simple_barrier_wait (&pool->threads_dock);
262*38fd1498Szrj /* And this waits till all threads have called gomp_barrier_wait_last
263*38fd1498Szrj in gomp_free_pool_helper. */
264*38fd1498Szrj gomp_simple_barrier_wait (&pool->threads_dock);
265*38fd1498Szrj /* Now it is safe to destroy the barrier and free the pool. */
266*38fd1498Szrj gomp_simple_barrier_destroy (&pool->threads_dock);
267*38fd1498Szrj
268*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
269*38fd1498Szrj __sync_fetch_and_add (&gomp_managed_threads,
270*38fd1498Szrj 1L - pool->threads_used);
271*38fd1498Szrj #else
272*38fd1498Szrj gomp_mutex_lock (&gomp_managed_threads_lock);
273*38fd1498Szrj gomp_managed_threads -= pool->threads_used - 1L;
274*38fd1498Szrj gomp_mutex_unlock (&gomp_managed_threads_lock);
275*38fd1498Szrj #endif
276*38fd1498Szrj }
277*38fd1498Szrj if (pool->last_team)
278*38fd1498Szrj free_team (pool->last_team);
279*38fd1498Szrj #ifndef __nvptx__
280*38fd1498Szrj free (pool->threads);
281*38fd1498Szrj free (pool);
282*38fd1498Szrj #endif
283*38fd1498Szrj thr->thread_pool = NULL;
284*38fd1498Szrj }
285*38fd1498Szrj if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
286*38fd1498Szrj gomp_team_end ();
287*38fd1498Szrj if (thr->task != NULL)
288*38fd1498Szrj {
289*38fd1498Szrj struct gomp_task *task = thr->task;
290*38fd1498Szrj gomp_end_task ();
291*38fd1498Szrj free (task);
292*38fd1498Szrj }
293*38fd1498Szrj }
294*38fd1498Szrj
295*38fd1498Szrj /* Launch a team. */
296*38fd1498Szrj
297*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
298*38fd1498Szrj void
gomp_team_start(void (* fn)(void *),void * data,unsigned nthreads,unsigned flags,struct gomp_team * team)299*38fd1498Szrj gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
300*38fd1498Szrj unsigned flags, struct gomp_team *team)
301*38fd1498Szrj {
302*38fd1498Szrj struct gomp_thread_start_data *start_data;
303*38fd1498Szrj struct gomp_thread *thr, *nthr;
304*38fd1498Szrj struct gomp_task *task;
305*38fd1498Szrj struct gomp_task_icv *icv;
306*38fd1498Szrj bool nested;
307*38fd1498Szrj struct gomp_thread_pool *pool;
308*38fd1498Szrj unsigned i, n, old_threads_used = 0;
309*38fd1498Szrj pthread_attr_t thread_attr, *attr;
310*38fd1498Szrj unsigned long nthreads_var;
311*38fd1498Szrj char bind, bind_var;
312*38fd1498Szrj unsigned int s = 0, rest = 0, p = 0, k = 0;
313*38fd1498Szrj unsigned int affinity_count = 0;
314*38fd1498Szrj struct gomp_thread **affinity_thr = NULL;
315*38fd1498Szrj
316*38fd1498Szrj thr = gomp_thread ();
317*38fd1498Szrj nested = thr->ts.level;
318*38fd1498Szrj pool = thr->thread_pool;
319*38fd1498Szrj task = thr->task;
320*38fd1498Szrj icv = task ? &task->icv : &gomp_global_icv;
321*38fd1498Szrj if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
322*38fd1498Szrj gomp_init_affinity ();
323*38fd1498Szrj
324*38fd1498Szrj /* Always save the previous state, even if this isn't a nested team.
325*38fd1498Szrj In particular, we should save any work share state from an outer
326*38fd1498Szrj orphaned work share construct. */
327*38fd1498Szrj team->prev_ts = thr->ts;
328*38fd1498Szrj
329*38fd1498Szrj thr->ts.team = team;
330*38fd1498Szrj thr->ts.team_id = 0;
331*38fd1498Szrj ++thr->ts.level;
332*38fd1498Szrj if (nthreads > 1)
333*38fd1498Szrj ++thr->ts.active_level;
334*38fd1498Szrj thr->ts.work_share = &team->work_shares[0];
335*38fd1498Szrj thr->ts.last_work_share = NULL;
336*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
337*38fd1498Szrj thr->ts.single_count = 0;
338*38fd1498Szrj #endif
339*38fd1498Szrj thr->ts.static_trip = 0;
340*38fd1498Szrj thr->task = &team->implicit_task[0];
341*38fd1498Szrj nthreads_var = icv->nthreads_var;
342*38fd1498Szrj if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
343*38fd1498Szrj && thr->ts.level < gomp_nthreads_var_list_len)
344*38fd1498Szrj nthreads_var = gomp_nthreads_var_list[thr->ts.level];
345*38fd1498Szrj bind_var = icv->bind_var;
346*38fd1498Szrj if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
347*38fd1498Szrj bind_var = flags & 7;
348*38fd1498Szrj bind = bind_var;
349*38fd1498Szrj if (__builtin_expect (gomp_bind_var_list != NULL, 0)
350*38fd1498Szrj && thr->ts.level < gomp_bind_var_list_len)
351*38fd1498Szrj bind_var = gomp_bind_var_list[thr->ts.level];
352*38fd1498Szrj gomp_init_task (thr->task, task, icv);
353*38fd1498Szrj team->implicit_task[0].icv.nthreads_var = nthreads_var;
354*38fd1498Szrj team->implicit_task[0].icv.bind_var = bind_var;
355*38fd1498Szrj
356*38fd1498Szrj if (nthreads == 1)
357*38fd1498Szrj return;
358*38fd1498Szrj
359*38fd1498Szrj i = 1;
360*38fd1498Szrj
361*38fd1498Szrj if (__builtin_expect (gomp_places_list != NULL, 0))
362*38fd1498Szrj {
363*38fd1498Szrj /* Depending on chosen proc_bind model, set subpartition
364*38fd1498Szrj for the master thread and initialize helper variables
365*38fd1498Szrj P and optionally S, K and/or REST used by later place
366*38fd1498Szrj computation for each additional thread. */
367*38fd1498Szrj p = thr->place - 1;
368*38fd1498Szrj switch (bind)
369*38fd1498Szrj {
370*38fd1498Szrj case omp_proc_bind_true:
371*38fd1498Szrj case omp_proc_bind_close:
372*38fd1498Szrj if (nthreads > thr->ts.place_partition_len)
373*38fd1498Szrj {
374*38fd1498Szrj /* T > P. S threads will be placed in each place,
375*38fd1498Szrj and the final REM threads placed one by one
376*38fd1498Szrj into the already occupied places. */
377*38fd1498Szrj s = nthreads / thr->ts.place_partition_len;
378*38fd1498Szrj rest = nthreads % thr->ts.place_partition_len;
379*38fd1498Szrj }
380*38fd1498Szrj else
381*38fd1498Szrj s = 1;
382*38fd1498Szrj k = 1;
383*38fd1498Szrj break;
384*38fd1498Szrj case omp_proc_bind_master:
385*38fd1498Szrj /* Each thread will be bound to master's place. */
386*38fd1498Szrj break;
387*38fd1498Szrj case omp_proc_bind_spread:
388*38fd1498Szrj if (nthreads <= thr->ts.place_partition_len)
389*38fd1498Szrj {
390*38fd1498Szrj /* T <= P. Each subpartition will have in between s
391*38fd1498Szrj and s+1 places (subpartitions starting at or
392*38fd1498Szrj after rest will have s places, earlier s+1 places),
393*38fd1498Szrj each thread will be bound to the first place in
394*38fd1498Szrj its subpartition (except for the master thread
395*38fd1498Szrj that can be bound to another place in its
396*38fd1498Szrj subpartition). */
397*38fd1498Szrj s = thr->ts.place_partition_len / nthreads;
398*38fd1498Szrj rest = thr->ts.place_partition_len % nthreads;
399*38fd1498Szrj rest = (s + 1) * rest + thr->ts.place_partition_off;
400*38fd1498Szrj if (p < rest)
401*38fd1498Szrj {
402*38fd1498Szrj p -= (p - thr->ts.place_partition_off) % (s + 1);
403*38fd1498Szrj thr->ts.place_partition_len = s + 1;
404*38fd1498Szrj }
405*38fd1498Szrj else
406*38fd1498Szrj {
407*38fd1498Szrj p -= (p - rest) % s;
408*38fd1498Szrj thr->ts.place_partition_len = s;
409*38fd1498Szrj }
410*38fd1498Szrj thr->ts.place_partition_off = p;
411*38fd1498Szrj }
412*38fd1498Szrj else
413*38fd1498Szrj {
414*38fd1498Szrj /* T > P. Each subpartition will have just a single
415*38fd1498Szrj place and we'll place between s and s+1
416*38fd1498Szrj threads into each subpartition. */
417*38fd1498Szrj s = nthreads / thr->ts.place_partition_len;
418*38fd1498Szrj rest = nthreads % thr->ts.place_partition_len;
419*38fd1498Szrj thr->ts.place_partition_off = p;
420*38fd1498Szrj thr->ts.place_partition_len = 1;
421*38fd1498Szrj k = 1;
422*38fd1498Szrj }
423*38fd1498Szrj break;
424*38fd1498Szrj }
425*38fd1498Szrj }
426*38fd1498Szrj else
427*38fd1498Szrj bind = omp_proc_bind_false;
428*38fd1498Szrj
429*38fd1498Szrj /* We only allow the reuse of idle threads for non-nested PARALLEL
430*38fd1498Szrj regions. This appears to be implied by the semantics of
431*38fd1498Szrj threadprivate variables, but perhaps that's reading too much into
432*38fd1498Szrj things. Certainly it does prevent any locking problems, since
433*38fd1498Szrj only the initial program thread will modify gomp_threads. */
434*38fd1498Szrj if (!nested)
435*38fd1498Szrj {
436*38fd1498Szrj old_threads_used = pool->threads_used;
437*38fd1498Szrj
438*38fd1498Szrj if (nthreads <= old_threads_used)
439*38fd1498Szrj n = nthreads;
440*38fd1498Szrj else if (old_threads_used == 0)
441*38fd1498Szrj {
442*38fd1498Szrj n = 0;
443*38fd1498Szrj gomp_simple_barrier_init (&pool->threads_dock, nthreads);
444*38fd1498Szrj }
445*38fd1498Szrj else
446*38fd1498Szrj {
447*38fd1498Szrj n = old_threads_used;
448*38fd1498Szrj
449*38fd1498Szrj /* Increase the barrier threshold to make sure all new
450*38fd1498Szrj threads arrive before the team is released. */
451*38fd1498Szrj gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
452*38fd1498Szrj }
453*38fd1498Szrj
454*38fd1498Szrj /* Not true yet, but soon will be. We're going to release all
455*38fd1498Szrj threads from the dock, and those that aren't part of the
456*38fd1498Szrj team will exit. */
457*38fd1498Szrj pool->threads_used = nthreads;
458*38fd1498Szrj
459*38fd1498Szrj /* If necessary, expand the size of the gomp_threads array. It is
460*38fd1498Szrj expected that changes in the number of threads are rare, thus we
461*38fd1498Szrj make no effort to expand gomp_threads_size geometrically. */
462*38fd1498Szrj if (nthreads >= pool->threads_size)
463*38fd1498Szrj {
464*38fd1498Szrj pool->threads_size = nthreads + 1;
465*38fd1498Szrj pool->threads
466*38fd1498Szrj = gomp_realloc (pool->threads,
467*38fd1498Szrj pool->threads_size
468*38fd1498Szrj * sizeof (struct gomp_thread_data *));
469*38fd1498Szrj }
470*38fd1498Szrj
471*38fd1498Szrj /* Release existing idle threads. */
472*38fd1498Szrj for (; i < n; ++i)
473*38fd1498Szrj {
474*38fd1498Szrj unsigned int place_partition_off = thr->ts.place_partition_off;
475*38fd1498Szrj unsigned int place_partition_len = thr->ts.place_partition_len;
476*38fd1498Szrj unsigned int place = 0;
477*38fd1498Szrj if (__builtin_expect (gomp_places_list != NULL, 0))
478*38fd1498Szrj {
479*38fd1498Szrj switch (bind)
480*38fd1498Szrj {
481*38fd1498Szrj case omp_proc_bind_true:
482*38fd1498Szrj case omp_proc_bind_close:
483*38fd1498Szrj if (k == s)
484*38fd1498Szrj {
485*38fd1498Szrj ++p;
486*38fd1498Szrj if (p == (team->prev_ts.place_partition_off
487*38fd1498Szrj + team->prev_ts.place_partition_len))
488*38fd1498Szrj p = team->prev_ts.place_partition_off;
489*38fd1498Szrj k = 1;
490*38fd1498Szrj if (i == nthreads - rest)
491*38fd1498Szrj s = 1;
492*38fd1498Szrj }
493*38fd1498Szrj else
494*38fd1498Szrj ++k;
495*38fd1498Szrj break;
496*38fd1498Szrj case omp_proc_bind_master:
497*38fd1498Szrj break;
498*38fd1498Szrj case omp_proc_bind_spread:
499*38fd1498Szrj if (k == 0)
500*38fd1498Szrj {
501*38fd1498Szrj /* T <= P. */
502*38fd1498Szrj if (p < rest)
503*38fd1498Szrj p += s + 1;
504*38fd1498Szrj else
505*38fd1498Szrj p += s;
506*38fd1498Szrj if (p == (team->prev_ts.place_partition_off
507*38fd1498Szrj + team->prev_ts.place_partition_len))
508*38fd1498Szrj p = team->prev_ts.place_partition_off;
509*38fd1498Szrj place_partition_off = p;
510*38fd1498Szrj if (p < rest)
511*38fd1498Szrj place_partition_len = s + 1;
512*38fd1498Szrj else
513*38fd1498Szrj place_partition_len = s;
514*38fd1498Szrj }
515*38fd1498Szrj else
516*38fd1498Szrj {
517*38fd1498Szrj /* T > P. */
518*38fd1498Szrj if (k == s)
519*38fd1498Szrj {
520*38fd1498Szrj ++p;
521*38fd1498Szrj if (p == (team->prev_ts.place_partition_off
522*38fd1498Szrj + team->prev_ts.place_partition_len))
523*38fd1498Szrj p = team->prev_ts.place_partition_off;
524*38fd1498Szrj k = 1;
525*38fd1498Szrj if (i == nthreads - rest)
526*38fd1498Szrj s = 1;
527*38fd1498Szrj }
528*38fd1498Szrj else
529*38fd1498Szrj ++k;
530*38fd1498Szrj place_partition_off = p;
531*38fd1498Szrj place_partition_len = 1;
532*38fd1498Szrj }
533*38fd1498Szrj break;
534*38fd1498Szrj }
535*38fd1498Szrj if (affinity_thr != NULL
536*38fd1498Szrj || (bind != omp_proc_bind_true
537*38fd1498Szrj && pool->threads[i]->place != p + 1)
538*38fd1498Szrj || pool->threads[i]->place <= place_partition_off
539*38fd1498Szrj || pool->threads[i]->place > (place_partition_off
540*38fd1498Szrj + place_partition_len))
541*38fd1498Szrj {
542*38fd1498Szrj unsigned int l;
543*38fd1498Szrj if (affinity_thr == NULL)
544*38fd1498Szrj {
545*38fd1498Szrj unsigned int j;
546*38fd1498Szrj
547*38fd1498Szrj if (team->prev_ts.place_partition_len > 64)
548*38fd1498Szrj affinity_thr
549*38fd1498Szrj = gomp_malloc (team->prev_ts.place_partition_len
550*38fd1498Szrj * sizeof (struct gomp_thread *));
551*38fd1498Szrj else
552*38fd1498Szrj affinity_thr
553*38fd1498Szrj = gomp_alloca (team->prev_ts.place_partition_len
554*38fd1498Szrj * sizeof (struct gomp_thread *));
555*38fd1498Szrj memset (affinity_thr, '\0',
556*38fd1498Szrj team->prev_ts.place_partition_len
557*38fd1498Szrj * sizeof (struct gomp_thread *));
558*38fd1498Szrj for (j = i; j < old_threads_used; j++)
559*38fd1498Szrj {
560*38fd1498Szrj if (pool->threads[j]->place
561*38fd1498Szrj > team->prev_ts.place_partition_off
562*38fd1498Szrj && (pool->threads[j]->place
563*38fd1498Szrj <= (team->prev_ts.place_partition_off
564*38fd1498Szrj + team->prev_ts.place_partition_len)))
565*38fd1498Szrj {
566*38fd1498Szrj l = pool->threads[j]->place - 1
567*38fd1498Szrj - team->prev_ts.place_partition_off;
568*38fd1498Szrj pool->threads[j]->data = affinity_thr[l];
569*38fd1498Szrj affinity_thr[l] = pool->threads[j];
570*38fd1498Szrj }
571*38fd1498Szrj pool->threads[j] = NULL;
572*38fd1498Szrj }
573*38fd1498Szrj if (nthreads > old_threads_used)
574*38fd1498Szrj memset (&pool->threads[old_threads_used],
575*38fd1498Szrj '\0', ((nthreads - old_threads_used)
576*38fd1498Szrj * sizeof (struct gomp_thread *)));
577*38fd1498Szrj n = nthreads;
578*38fd1498Szrj affinity_count = old_threads_used - i;
579*38fd1498Szrj }
580*38fd1498Szrj if (affinity_count == 0)
581*38fd1498Szrj break;
582*38fd1498Szrj l = p;
583*38fd1498Szrj if (affinity_thr[l - team->prev_ts.place_partition_off]
584*38fd1498Szrj == NULL)
585*38fd1498Szrj {
586*38fd1498Szrj if (bind != omp_proc_bind_true)
587*38fd1498Szrj continue;
588*38fd1498Szrj for (l = place_partition_off;
589*38fd1498Szrj l < place_partition_off + place_partition_len;
590*38fd1498Szrj l++)
591*38fd1498Szrj if (affinity_thr[l - team->prev_ts.place_partition_off]
592*38fd1498Szrj != NULL)
593*38fd1498Szrj break;
594*38fd1498Szrj if (l == place_partition_off + place_partition_len)
595*38fd1498Szrj continue;
596*38fd1498Szrj }
597*38fd1498Szrj nthr = affinity_thr[l - team->prev_ts.place_partition_off];
598*38fd1498Szrj affinity_thr[l - team->prev_ts.place_partition_off]
599*38fd1498Szrj = (struct gomp_thread *) nthr->data;
600*38fd1498Szrj affinity_count--;
601*38fd1498Szrj pool->threads[i] = nthr;
602*38fd1498Szrj }
603*38fd1498Szrj else
604*38fd1498Szrj nthr = pool->threads[i];
605*38fd1498Szrj place = p + 1;
606*38fd1498Szrj }
607*38fd1498Szrj else
608*38fd1498Szrj nthr = pool->threads[i];
609*38fd1498Szrj nthr->ts.team = team;
610*38fd1498Szrj nthr->ts.work_share = &team->work_shares[0];
611*38fd1498Szrj nthr->ts.last_work_share = NULL;
612*38fd1498Szrj nthr->ts.team_id = i;
613*38fd1498Szrj nthr->ts.level = team->prev_ts.level + 1;
614*38fd1498Szrj nthr->ts.active_level = thr->ts.active_level;
615*38fd1498Szrj nthr->ts.place_partition_off = place_partition_off;
616*38fd1498Szrj nthr->ts.place_partition_len = place_partition_len;
617*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
618*38fd1498Szrj nthr->ts.single_count = 0;
619*38fd1498Szrj #endif
620*38fd1498Szrj nthr->ts.static_trip = 0;
621*38fd1498Szrj nthr->task = &team->implicit_task[i];
622*38fd1498Szrj nthr->place = place;
623*38fd1498Szrj gomp_init_task (nthr->task, task, icv);
624*38fd1498Szrj team->implicit_task[i].icv.nthreads_var = nthreads_var;
625*38fd1498Szrj team->implicit_task[i].icv.bind_var = bind_var;
626*38fd1498Szrj nthr->fn = fn;
627*38fd1498Szrj nthr->data = data;
628*38fd1498Szrj team->ordered_release[i] = &nthr->release;
629*38fd1498Szrj }
630*38fd1498Szrj
631*38fd1498Szrj if (__builtin_expect (affinity_thr != NULL, 0))
632*38fd1498Szrj {
633*38fd1498Szrj /* If AFFINITY_THR is non-NULL just because we had to
634*38fd1498Szrj permute some threads in the pool, but we've managed
635*38fd1498Szrj to find exactly as many old threads as we'd find
636*38fd1498Szrj without affinity, we don't need to handle this
637*38fd1498Szrj specially anymore. */
638*38fd1498Szrj if (nthreads <= old_threads_used
639*38fd1498Szrj ? (affinity_count == old_threads_used - nthreads)
640*38fd1498Szrj : (i == old_threads_used))
641*38fd1498Szrj {
642*38fd1498Szrj if (team->prev_ts.place_partition_len > 64)
643*38fd1498Szrj free (affinity_thr);
644*38fd1498Szrj affinity_thr = NULL;
645*38fd1498Szrj affinity_count = 0;
646*38fd1498Szrj }
647*38fd1498Szrj else
648*38fd1498Szrj {
649*38fd1498Szrj i = 1;
650*38fd1498Szrj /* We are going to compute the places/subpartitions
651*38fd1498Szrj again from the beginning. So, we need to reinitialize
652*38fd1498Szrj vars modified by the switch (bind) above inside
653*38fd1498Szrj of the loop, to the state they had after the initial
654*38fd1498Szrj switch (bind). */
655*38fd1498Szrj switch (bind)
656*38fd1498Szrj {
657*38fd1498Szrj case omp_proc_bind_true:
658*38fd1498Szrj case omp_proc_bind_close:
659*38fd1498Szrj if (nthreads > thr->ts.place_partition_len)
660*38fd1498Szrj /* T > P. S has been changed, so needs
661*38fd1498Szrj to be recomputed. */
662*38fd1498Szrj s = nthreads / thr->ts.place_partition_len;
663*38fd1498Szrj k = 1;
664*38fd1498Szrj p = thr->place - 1;
665*38fd1498Szrj break;
666*38fd1498Szrj case omp_proc_bind_master:
667*38fd1498Szrj /* No vars have been changed. */
668*38fd1498Szrj break;
669*38fd1498Szrj case omp_proc_bind_spread:
670*38fd1498Szrj p = thr->ts.place_partition_off;
671*38fd1498Szrj if (k != 0)
672*38fd1498Szrj {
673*38fd1498Szrj /* T > P. */
674*38fd1498Szrj s = nthreads / team->prev_ts.place_partition_len;
675*38fd1498Szrj k = 1;
676*38fd1498Szrj }
677*38fd1498Szrj break;
678*38fd1498Szrj }
679*38fd1498Szrj
680*38fd1498Szrj /* Increase the barrier threshold to make sure all new
681*38fd1498Szrj threads and all the threads we're going to let die
682*38fd1498Szrj arrive before the team is released. */
683*38fd1498Szrj if (affinity_count)
684*38fd1498Szrj gomp_simple_barrier_reinit (&pool->threads_dock,
685*38fd1498Szrj nthreads + affinity_count);
686*38fd1498Szrj }
687*38fd1498Szrj }
688*38fd1498Szrj
689*38fd1498Szrj if (i == nthreads)
690*38fd1498Szrj goto do_release;
691*38fd1498Szrj
692*38fd1498Szrj }
693*38fd1498Szrj
694*38fd1498Szrj if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
695*38fd1498Szrj {
696*38fd1498Szrj long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
697*38fd1498Szrj
698*38fd1498Szrj if (old_threads_used == 0)
699*38fd1498Szrj --diff;
700*38fd1498Szrj
701*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
702*38fd1498Szrj __sync_fetch_and_add (&gomp_managed_threads, diff);
703*38fd1498Szrj #else
704*38fd1498Szrj gomp_mutex_lock (&gomp_managed_threads_lock);
705*38fd1498Szrj gomp_managed_threads += diff;
706*38fd1498Szrj gomp_mutex_unlock (&gomp_managed_threads_lock);
707*38fd1498Szrj #endif
708*38fd1498Szrj }
709*38fd1498Szrj
710*38fd1498Szrj attr = &gomp_thread_attr;
711*38fd1498Szrj if (__builtin_expect (gomp_places_list != NULL, 0))
712*38fd1498Szrj {
713*38fd1498Szrj size_t stacksize;
714*38fd1498Szrj pthread_attr_init (&thread_attr);
715*38fd1498Szrj pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
716*38fd1498Szrj if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
717*38fd1498Szrj pthread_attr_setstacksize (&thread_attr, stacksize);
718*38fd1498Szrj attr = &thread_attr;
719*38fd1498Szrj }
720*38fd1498Szrj
721*38fd1498Szrj start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
722*38fd1498Szrj * (nthreads-i));
723*38fd1498Szrj
724*38fd1498Szrj /* Launch new threads. */
725*38fd1498Szrj for (; i < nthreads; ++i)
726*38fd1498Szrj {
727*38fd1498Szrj pthread_t pt;
728*38fd1498Szrj int err;
729*38fd1498Szrj
730*38fd1498Szrj start_data->ts.place_partition_off = thr->ts.place_partition_off;
731*38fd1498Szrj start_data->ts.place_partition_len = thr->ts.place_partition_len;
732*38fd1498Szrj start_data->place = 0;
733*38fd1498Szrj if (__builtin_expect (gomp_places_list != NULL, 0))
734*38fd1498Szrj {
735*38fd1498Szrj switch (bind)
736*38fd1498Szrj {
737*38fd1498Szrj case omp_proc_bind_true:
738*38fd1498Szrj case omp_proc_bind_close:
739*38fd1498Szrj if (k == s)
740*38fd1498Szrj {
741*38fd1498Szrj ++p;
742*38fd1498Szrj if (p == (team->prev_ts.place_partition_off
743*38fd1498Szrj + team->prev_ts.place_partition_len))
744*38fd1498Szrj p = team->prev_ts.place_partition_off;
745*38fd1498Szrj k = 1;
746*38fd1498Szrj if (i == nthreads - rest)
747*38fd1498Szrj s = 1;
748*38fd1498Szrj }
749*38fd1498Szrj else
750*38fd1498Szrj ++k;
751*38fd1498Szrj break;
752*38fd1498Szrj case omp_proc_bind_master:
753*38fd1498Szrj break;
754*38fd1498Szrj case omp_proc_bind_spread:
755*38fd1498Szrj if (k == 0)
756*38fd1498Szrj {
757*38fd1498Szrj /* T <= P. */
758*38fd1498Szrj if (p < rest)
759*38fd1498Szrj p += s + 1;
760*38fd1498Szrj else
761*38fd1498Szrj p += s;
762*38fd1498Szrj if (p == (team->prev_ts.place_partition_off
763*38fd1498Szrj + team->prev_ts.place_partition_len))
764*38fd1498Szrj p = team->prev_ts.place_partition_off;
765*38fd1498Szrj start_data->ts.place_partition_off = p;
766*38fd1498Szrj if (p < rest)
767*38fd1498Szrj start_data->ts.place_partition_len = s + 1;
768*38fd1498Szrj else
769*38fd1498Szrj start_data->ts.place_partition_len = s;
770*38fd1498Szrj }
771*38fd1498Szrj else
772*38fd1498Szrj {
773*38fd1498Szrj /* T > P. */
774*38fd1498Szrj if (k == s)
775*38fd1498Szrj {
776*38fd1498Szrj ++p;
777*38fd1498Szrj if (p == (team->prev_ts.place_partition_off
778*38fd1498Szrj + team->prev_ts.place_partition_len))
779*38fd1498Szrj p = team->prev_ts.place_partition_off;
780*38fd1498Szrj k = 1;
781*38fd1498Szrj if (i == nthreads - rest)
782*38fd1498Szrj s = 1;
783*38fd1498Szrj }
784*38fd1498Szrj else
785*38fd1498Szrj ++k;
786*38fd1498Szrj start_data->ts.place_partition_off = p;
787*38fd1498Szrj start_data->ts.place_partition_len = 1;
788*38fd1498Szrj }
789*38fd1498Szrj break;
790*38fd1498Szrj }
791*38fd1498Szrj start_data->place = p + 1;
792*38fd1498Szrj if (affinity_thr != NULL && pool->threads[i] != NULL)
793*38fd1498Szrj continue;
794*38fd1498Szrj gomp_init_thread_affinity (attr, p);
795*38fd1498Szrj }
796*38fd1498Szrj
797*38fd1498Szrj start_data->fn = fn;
798*38fd1498Szrj start_data->fn_data = data;
799*38fd1498Szrj start_data->ts.team = team;
800*38fd1498Szrj start_data->ts.work_share = &team->work_shares[0];
801*38fd1498Szrj start_data->ts.last_work_share = NULL;
802*38fd1498Szrj start_data->ts.team_id = i;
803*38fd1498Szrj start_data->ts.level = team->prev_ts.level + 1;
804*38fd1498Szrj start_data->ts.active_level = thr->ts.active_level;
805*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
806*38fd1498Szrj start_data->ts.single_count = 0;
807*38fd1498Szrj #endif
808*38fd1498Szrj start_data->ts.static_trip = 0;
809*38fd1498Szrj start_data->task = &team->implicit_task[i];
810*38fd1498Szrj gomp_init_task (start_data->task, task, icv);
811*38fd1498Szrj team->implicit_task[i].icv.nthreads_var = nthreads_var;
812*38fd1498Szrj team->implicit_task[i].icv.bind_var = bind_var;
813*38fd1498Szrj start_data->thread_pool = pool;
814*38fd1498Szrj start_data->nested = nested;
815*38fd1498Szrj
816*38fd1498Szrj attr = gomp_adjust_thread_attr (attr, &thread_attr);
817*38fd1498Szrj err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
818*38fd1498Szrj if (err != 0)
819*38fd1498Szrj gomp_fatal ("Thread creation failed: %s", strerror (err));
820*38fd1498Szrj }
821*38fd1498Szrj
822*38fd1498Szrj if (__builtin_expect (attr == &thread_attr, 0))
823*38fd1498Szrj pthread_attr_destroy (&thread_attr);
824*38fd1498Szrj
825*38fd1498Szrj do_release:
826*38fd1498Szrj if (nested)
827*38fd1498Szrj gomp_barrier_wait (&team->barrier);
828*38fd1498Szrj else
829*38fd1498Szrj gomp_simple_barrier_wait (&pool->threads_dock);
830*38fd1498Szrj
831*38fd1498Szrj /* Decrease the barrier threshold to match the number of threads
832*38fd1498Szrj that should arrive back at the end of this team. The extra
833*38fd1498Szrj threads should be exiting. Note that we arrange for this test
834*38fd1498Szrj to never be true for nested teams. If AFFINITY_COUNT is non-zero,
835*38fd1498Szrj the barrier as well as gomp_managed_threads was temporarily
836*38fd1498Szrj set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
837*38fd1498Szrj AFFINITY_COUNT if non-zero will be always at least
838*38fd1498Szrj OLD_THREADS_COUNT - NTHREADS. */
839*38fd1498Szrj if (__builtin_expect (nthreads < old_threads_used, 0)
840*38fd1498Szrj || __builtin_expect (affinity_count, 0))
841*38fd1498Szrj {
842*38fd1498Szrj long diff = (long) nthreads - (long) old_threads_used;
843*38fd1498Szrj
844*38fd1498Szrj if (affinity_count)
845*38fd1498Szrj diff = -affinity_count;
846*38fd1498Szrj
847*38fd1498Szrj gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
848*38fd1498Szrj
849*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
850*38fd1498Szrj __sync_fetch_and_add (&gomp_managed_threads, diff);
851*38fd1498Szrj #else
852*38fd1498Szrj gomp_mutex_lock (&gomp_managed_threads_lock);
853*38fd1498Szrj gomp_managed_threads += diff;
854*38fd1498Szrj gomp_mutex_unlock (&gomp_managed_threads_lock);
855*38fd1498Szrj #endif
856*38fd1498Szrj }
857*38fd1498Szrj if (__builtin_expect (affinity_thr != NULL, 0)
858*38fd1498Szrj && team->prev_ts.place_partition_len > 64)
859*38fd1498Szrj free (affinity_thr);
860*38fd1498Szrj }
861*38fd1498Szrj #endif
862*38fd1498Szrj
863*38fd1498Szrj
864*38fd1498Szrj /* Terminate the current team. This is only to be called by the master
865*38fd1498Szrj thread. We assume that we must wait for the other threads. */
866*38fd1498Szrj
867*38fd1498Szrj void
gomp_team_end(void)868*38fd1498Szrj gomp_team_end (void)
869*38fd1498Szrj {
870*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
871*38fd1498Szrj struct gomp_team *team = thr->ts.team;
872*38fd1498Szrj
873*38fd1498Szrj /* This barrier handles all pending explicit threads.
874*38fd1498Szrj As #pragma omp cancel parallel might get awaited count in
875*38fd1498Szrj team->barrier in a inconsistent state, we need to use a different
876*38fd1498Szrj counter here. */
877*38fd1498Szrj gomp_team_barrier_wait_final (&team->barrier);
878*38fd1498Szrj if (__builtin_expect (team->team_cancelled, 0))
879*38fd1498Szrj {
880*38fd1498Szrj struct gomp_work_share *ws = team->work_shares_to_free;
881*38fd1498Szrj do
882*38fd1498Szrj {
883*38fd1498Szrj struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
884*38fd1498Szrj if (next_ws == NULL)
885*38fd1498Szrj gomp_ptrlock_set (&ws->next_ws, ws);
886*38fd1498Szrj gomp_fini_work_share (ws);
887*38fd1498Szrj ws = next_ws;
888*38fd1498Szrj }
889*38fd1498Szrj while (ws != NULL);
890*38fd1498Szrj }
891*38fd1498Szrj else
892*38fd1498Szrj gomp_fini_work_share (thr->ts.work_share);
893*38fd1498Szrj
894*38fd1498Szrj gomp_end_task ();
895*38fd1498Szrj thr->ts = team->prev_ts;
896*38fd1498Szrj
897*38fd1498Szrj if (__builtin_expect (thr->ts.team != NULL, 0))
898*38fd1498Szrj {
899*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
900*38fd1498Szrj __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
901*38fd1498Szrj #else
902*38fd1498Szrj gomp_mutex_lock (&gomp_managed_threads_lock);
903*38fd1498Szrj gomp_managed_threads -= team->nthreads - 1L;
904*38fd1498Szrj gomp_mutex_unlock (&gomp_managed_threads_lock);
905*38fd1498Szrj #endif
906*38fd1498Szrj /* This barrier has gomp_barrier_wait_last counterparts
907*38fd1498Szrj and ensures the team can be safely destroyed. */
908*38fd1498Szrj gomp_barrier_wait (&team->barrier);
909*38fd1498Szrj }
910*38fd1498Szrj
911*38fd1498Szrj if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
912*38fd1498Szrj {
913*38fd1498Szrj struct gomp_work_share *ws = team->work_shares[0].next_alloc;
914*38fd1498Szrj do
915*38fd1498Szrj {
916*38fd1498Szrj struct gomp_work_share *next_ws = ws->next_alloc;
917*38fd1498Szrj free (ws);
918*38fd1498Szrj ws = next_ws;
919*38fd1498Szrj }
920*38fd1498Szrj while (ws != NULL);
921*38fd1498Szrj }
922*38fd1498Szrj gomp_sem_destroy (&team->master_release);
923*38fd1498Szrj
924*38fd1498Szrj if (__builtin_expect (thr->ts.team != NULL, 0)
925*38fd1498Szrj || __builtin_expect (team->nthreads == 1, 0))
926*38fd1498Szrj free_team (team);
927*38fd1498Szrj else
928*38fd1498Szrj {
929*38fd1498Szrj struct gomp_thread_pool *pool = thr->thread_pool;
930*38fd1498Szrj if (pool->last_team)
931*38fd1498Szrj free_team (pool->last_team);
932*38fd1498Szrj pool->last_team = team;
933*38fd1498Szrj gomp_release_thread_pool (pool);
934*38fd1498Szrj }
935*38fd1498Szrj }
936*38fd1498Szrj
937*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
938*38fd1498Szrj
939*38fd1498Szrj /* Constructors for this file. */
940*38fd1498Szrj
941*38fd1498Szrj static void __attribute__((constructor))
initialize_team(void)942*38fd1498Szrj initialize_team (void)
943*38fd1498Szrj {
944*38fd1498Szrj #if !defined HAVE_TLS && !defined USE_EMUTLS
945*38fd1498Szrj static struct gomp_thread initial_thread_tls_data;
946*38fd1498Szrj
947*38fd1498Szrj pthread_key_create (&gomp_tls_key, NULL);
948*38fd1498Szrj pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
949*38fd1498Szrj #endif
950*38fd1498Szrj
951*38fd1498Szrj if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
952*38fd1498Szrj gomp_fatal ("could not create thread pool destructor.");
953*38fd1498Szrj }
954*38fd1498Szrj
955*38fd1498Szrj static void __attribute__((destructor))
team_destructor(void)956*38fd1498Szrj team_destructor (void)
957*38fd1498Szrj {
958*38fd1498Szrj /* Without this dlclose on libgomp could lead to subsequent
959*38fd1498Szrj crashes. */
960*38fd1498Szrj pthread_key_delete (gomp_thread_destructor);
961*38fd1498Szrj }
962*38fd1498Szrj #endif
963*38fd1498Szrj
964*38fd1498Szrj struct gomp_task_icv *
gomp_new_icv(void)965*38fd1498Szrj gomp_new_icv (void)
966*38fd1498Szrj {
967*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
968*38fd1498Szrj struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
969*38fd1498Szrj gomp_init_task (task, NULL, &gomp_global_icv);
970*38fd1498Szrj thr->task = task;
971*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
972*38fd1498Szrj pthread_setspecific (gomp_thread_destructor, thr);
973*38fd1498Szrj #endif
974*38fd1498Szrj return &task->icv;
975*38fd1498Szrj }
976