xref: /dragonfly/contrib/gcc-8.0/libgomp/team.c (revision 38fd1498)
1*38fd1498Szrj /* Copyright (C) 2005-2018 Free Software Foundation, Inc.
2*38fd1498Szrj    Contributed by Richard Henderson <rth@redhat.com>.
3*38fd1498Szrj 
4*38fd1498Szrj    This file is part of the GNU Offloading and Multi Processing Library
5*38fd1498Szrj    (libgomp).
6*38fd1498Szrj 
7*38fd1498Szrj    Libgomp is free software; you can redistribute it and/or modify it
8*38fd1498Szrj    under the terms of the GNU General Public License as published by
9*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
10*38fd1498Szrj    any later version.
11*38fd1498Szrj 
12*38fd1498Szrj    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14*38fd1498Szrj    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15*38fd1498Szrj    more details.
16*38fd1498Szrj 
17*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
18*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
19*38fd1498Szrj    3.1, as published by the Free Software Foundation.
20*38fd1498Szrj 
21*38fd1498Szrj    You should have received a copy of the GNU General Public License and
22*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
23*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
25*38fd1498Szrj 
26*38fd1498Szrj /* This file handles the maintainence of threads in response to team
27*38fd1498Szrj    creation and termination.  */
28*38fd1498Szrj 
29*38fd1498Szrj #include "libgomp.h"
30*38fd1498Szrj #include "pool.h"
31*38fd1498Szrj #include <stdlib.h>
32*38fd1498Szrj #include <string.h>
33*38fd1498Szrj 
34*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
35*38fd1498Szrj /* This attribute contains PTHREAD_CREATE_DETACHED.  */
36*38fd1498Szrj pthread_attr_t gomp_thread_attr;
37*38fd1498Szrj 
38*38fd1498Szrj /* This key is for the thread destructor.  */
39*38fd1498Szrj pthread_key_t gomp_thread_destructor;
40*38fd1498Szrj 
41*38fd1498Szrj 
42*38fd1498Szrj /* This is the libgomp per-thread data structure.  */
43*38fd1498Szrj #if defined HAVE_TLS || defined USE_EMUTLS
44*38fd1498Szrj __thread struct gomp_thread gomp_tls_data;
45*38fd1498Szrj #else
46*38fd1498Szrj pthread_key_t gomp_tls_key;
47*38fd1498Szrj #endif
48*38fd1498Szrj 
49*38fd1498Szrj 
50*38fd1498Szrj /* This structure is used to communicate across pthread_create.  */
51*38fd1498Szrj 
52*38fd1498Szrj struct gomp_thread_start_data
53*38fd1498Szrj {
54*38fd1498Szrj   void (*fn) (void *);
55*38fd1498Szrj   void *fn_data;
56*38fd1498Szrj   struct gomp_team_state ts;
57*38fd1498Szrj   struct gomp_task *task;
58*38fd1498Szrj   struct gomp_thread_pool *thread_pool;
59*38fd1498Szrj   unsigned int place;
60*38fd1498Szrj   bool nested;
61*38fd1498Szrj };
62*38fd1498Szrj 
63*38fd1498Szrj 
64*38fd1498Szrj /* This function is a pthread_create entry point.  This contains the idle
65*38fd1498Szrj    loop in which a thread waits to be called up to become part of a team.  */
66*38fd1498Szrj 
67*38fd1498Szrj static void *
gomp_thread_start(void * xdata)68*38fd1498Szrj gomp_thread_start (void *xdata)
69*38fd1498Szrj {
70*38fd1498Szrj   struct gomp_thread_start_data *data = xdata;
71*38fd1498Szrj   struct gomp_thread *thr;
72*38fd1498Szrj   struct gomp_thread_pool *pool;
73*38fd1498Szrj   void (*local_fn) (void *);
74*38fd1498Szrj   void *local_data;
75*38fd1498Szrj 
76*38fd1498Szrj #if defined HAVE_TLS || defined USE_EMUTLS
77*38fd1498Szrj   thr = &gomp_tls_data;
78*38fd1498Szrj #else
79*38fd1498Szrj   struct gomp_thread local_thr;
80*38fd1498Szrj   thr = &local_thr;
81*38fd1498Szrj   pthread_setspecific (gomp_tls_key, thr);
82*38fd1498Szrj #endif
83*38fd1498Szrj   gomp_sem_init (&thr->release, 0);
84*38fd1498Szrj 
85*38fd1498Szrj   /* Extract what we need from data.  */
86*38fd1498Szrj   local_fn = data->fn;
87*38fd1498Szrj   local_data = data->fn_data;
88*38fd1498Szrj   thr->thread_pool = data->thread_pool;
89*38fd1498Szrj   thr->ts = data->ts;
90*38fd1498Szrj   thr->task = data->task;
91*38fd1498Szrj   thr->place = data->place;
92*38fd1498Szrj 
93*38fd1498Szrj   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
94*38fd1498Szrj 
95*38fd1498Szrj   /* Make thread pool local. */
96*38fd1498Szrj   pool = thr->thread_pool;
97*38fd1498Szrj 
98*38fd1498Szrj   if (data->nested)
99*38fd1498Szrj     {
100*38fd1498Szrj       struct gomp_team *team = thr->ts.team;
101*38fd1498Szrj       struct gomp_task *task = thr->task;
102*38fd1498Szrj 
103*38fd1498Szrj       gomp_barrier_wait (&team->barrier);
104*38fd1498Szrj 
105*38fd1498Szrj       local_fn (local_data);
106*38fd1498Szrj       gomp_team_barrier_wait_final (&team->barrier);
107*38fd1498Szrj       gomp_finish_task (task);
108*38fd1498Szrj       gomp_barrier_wait_last (&team->barrier);
109*38fd1498Szrj     }
110*38fd1498Szrj   else
111*38fd1498Szrj     {
112*38fd1498Szrj       pool->threads[thr->ts.team_id] = thr;
113*38fd1498Szrj 
114*38fd1498Szrj       gomp_simple_barrier_wait (&pool->threads_dock);
115*38fd1498Szrj       do
116*38fd1498Szrj 	{
117*38fd1498Szrj 	  struct gomp_team *team = thr->ts.team;
118*38fd1498Szrj 	  struct gomp_task *task = thr->task;
119*38fd1498Szrj 
120*38fd1498Szrj 	  local_fn (local_data);
121*38fd1498Szrj 	  gomp_team_barrier_wait_final (&team->barrier);
122*38fd1498Szrj 	  gomp_finish_task (task);
123*38fd1498Szrj 
124*38fd1498Szrj 	  gomp_simple_barrier_wait (&pool->threads_dock);
125*38fd1498Szrj 
126*38fd1498Szrj 	  local_fn = thr->fn;
127*38fd1498Szrj 	  local_data = thr->data;
128*38fd1498Szrj 	  thr->fn = NULL;
129*38fd1498Szrj 	}
130*38fd1498Szrj       while (local_fn);
131*38fd1498Szrj     }
132*38fd1498Szrj 
133*38fd1498Szrj   gomp_sem_destroy (&thr->release);
134*38fd1498Szrj   thr->thread_pool = NULL;
135*38fd1498Szrj   thr->task = NULL;
136*38fd1498Szrj   return NULL;
137*38fd1498Szrj }
138*38fd1498Szrj #endif
139*38fd1498Szrj 
140*38fd1498Szrj static inline struct gomp_team *
get_last_team(unsigned nthreads)141*38fd1498Szrj get_last_team (unsigned nthreads)
142*38fd1498Szrj {
143*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
144*38fd1498Szrj   if (thr->ts.team == NULL)
145*38fd1498Szrj     {
146*38fd1498Szrj       struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
147*38fd1498Szrj       struct gomp_team *last_team = pool->last_team;
148*38fd1498Szrj       if (last_team != NULL && last_team->nthreads == nthreads)
149*38fd1498Szrj         {
150*38fd1498Szrj           pool->last_team = NULL;
151*38fd1498Szrj           return last_team;
152*38fd1498Szrj         }
153*38fd1498Szrj     }
154*38fd1498Szrj   return NULL;
155*38fd1498Szrj }
156*38fd1498Szrj 
157*38fd1498Szrj /* Create a new team data structure.  */
158*38fd1498Szrj 
159*38fd1498Szrj struct gomp_team *
gomp_new_team(unsigned nthreads)160*38fd1498Szrj gomp_new_team (unsigned nthreads)
161*38fd1498Szrj {
162*38fd1498Szrj   struct gomp_team *team;
163*38fd1498Szrj   int i;
164*38fd1498Szrj 
165*38fd1498Szrj   team = get_last_team (nthreads);
166*38fd1498Szrj   if (team == NULL)
167*38fd1498Szrj     {
168*38fd1498Szrj       size_t extra = sizeof (team->ordered_release[0])
169*38fd1498Szrj 		     + sizeof (team->implicit_task[0]);
170*38fd1498Szrj       team = gomp_malloc (sizeof (*team) + nthreads * extra);
171*38fd1498Szrj 
172*38fd1498Szrj #ifndef HAVE_SYNC_BUILTINS
173*38fd1498Szrj       gomp_mutex_init (&team->work_share_list_free_lock);
174*38fd1498Szrj #endif
175*38fd1498Szrj       gomp_barrier_init (&team->barrier, nthreads);
176*38fd1498Szrj       gomp_mutex_init (&team->task_lock);
177*38fd1498Szrj 
178*38fd1498Szrj       team->nthreads = nthreads;
179*38fd1498Szrj     }
180*38fd1498Szrj 
181*38fd1498Szrj   team->work_share_chunk = 8;
182*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
183*38fd1498Szrj   team->single_count = 0;
184*38fd1498Szrj #endif
185*38fd1498Szrj   team->work_shares_to_free = &team->work_shares[0];
186*38fd1498Szrj   gomp_init_work_share (&team->work_shares[0], false, nthreads);
187*38fd1498Szrj   team->work_shares[0].next_alloc = NULL;
188*38fd1498Szrj   team->work_share_list_free = NULL;
189*38fd1498Szrj   team->work_share_list_alloc = &team->work_shares[1];
190*38fd1498Szrj   for (i = 1; i < 7; i++)
191*38fd1498Szrj     team->work_shares[i].next_free = &team->work_shares[i + 1];
192*38fd1498Szrj   team->work_shares[i].next_free = NULL;
193*38fd1498Szrj 
194*38fd1498Szrj   gomp_sem_init (&team->master_release, 0);
195*38fd1498Szrj   team->ordered_release = (void *) &team->implicit_task[nthreads];
196*38fd1498Szrj   team->ordered_release[0] = &team->master_release;
197*38fd1498Szrj 
198*38fd1498Szrj   priority_queue_init (&team->task_queue);
199*38fd1498Szrj   team->task_count = 0;
200*38fd1498Szrj   team->task_queued_count = 0;
201*38fd1498Szrj   team->task_running_count = 0;
202*38fd1498Szrj   team->work_share_cancelled = 0;
203*38fd1498Szrj   team->team_cancelled = 0;
204*38fd1498Szrj 
205*38fd1498Szrj   return team;
206*38fd1498Szrj }
207*38fd1498Szrj 
208*38fd1498Szrj 
209*38fd1498Szrj /* Free a team data structure.  */
210*38fd1498Szrj 
211*38fd1498Szrj static void
free_team(struct gomp_team * team)212*38fd1498Szrj free_team (struct gomp_team *team)
213*38fd1498Szrj {
214*38fd1498Szrj #ifndef HAVE_SYNC_BUILTINS
215*38fd1498Szrj   gomp_mutex_destroy (&team->work_share_list_free_lock);
216*38fd1498Szrj #endif
217*38fd1498Szrj   gomp_barrier_destroy (&team->barrier);
218*38fd1498Szrj   gomp_mutex_destroy (&team->task_lock);
219*38fd1498Szrj   priority_queue_free (&team->task_queue);
220*38fd1498Szrj   free (team);
221*38fd1498Szrj }
222*38fd1498Szrj 
223*38fd1498Szrj static void
gomp_free_pool_helper(void * thread_pool)224*38fd1498Szrj gomp_free_pool_helper (void *thread_pool)
225*38fd1498Szrj {
226*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
227*38fd1498Szrj   struct gomp_thread_pool *pool
228*38fd1498Szrj     = (struct gomp_thread_pool *) thread_pool;
229*38fd1498Szrj   gomp_simple_barrier_wait_last (&pool->threads_dock);
230*38fd1498Szrj   gomp_sem_destroy (&thr->release);
231*38fd1498Szrj   thr->thread_pool = NULL;
232*38fd1498Szrj   thr->task = NULL;
233*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
234*38fd1498Szrj   pthread_exit (NULL);
235*38fd1498Szrj #elif defined(__nvptx__)
236*38fd1498Szrj   asm ("exit;");
237*38fd1498Szrj #else
238*38fd1498Szrj #error gomp_free_pool_helper must terminate the thread
239*38fd1498Szrj #endif
240*38fd1498Szrj }
241*38fd1498Szrj 
242*38fd1498Szrj /* Free a thread pool and release its threads. */
243*38fd1498Szrj 
244*38fd1498Szrj void
gomp_free_thread(void * arg)245*38fd1498Szrj gomp_free_thread (void *arg __attribute__((unused)))
246*38fd1498Szrj {
247*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
248*38fd1498Szrj   struct gomp_thread_pool *pool = thr->thread_pool;
249*38fd1498Szrj   if (pool)
250*38fd1498Szrj     {
251*38fd1498Szrj       if (pool->threads_used > 0)
252*38fd1498Szrj 	{
253*38fd1498Szrj 	  int i;
254*38fd1498Szrj 	  for (i = 1; i < pool->threads_used; i++)
255*38fd1498Szrj 	    {
256*38fd1498Szrj 	      struct gomp_thread *nthr = pool->threads[i];
257*38fd1498Szrj 	      nthr->fn = gomp_free_pool_helper;
258*38fd1498Szrj 	      nthr->data = pool;
259*38fd1498Szrj 	    }
260*38fd1498Szrj 	  /* This barrier undocks threads docked on pool->threads_dock.  */
261*38fd1498Szrj 	  gomp_simple_barrier_wait (&pool->threads_dock);
262*38fd1498Szrj 	  /* And this waits till all threads have called gomp_barrier_wait_last
263*38fd1498Szrj 	     in gomp_free_pool_helper.  */
264*38fd1498Szrj 	  gomp_simple_barrier_wait (&pool->threads_dock);
265*38fd1498Szrj 	  /* Now it is safe to destroy the barrier and free the pool.  */
266*38fd1498Szrj 	  gomp_simple_barrier_destroy (&pool->threads_dock);
267*38fd1498Szrj 
268*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
269*38fd1498Szrj 	  __sync_fetch_and_add (&gomp_managed_threads,
270*38fd1498Szrj 				1L - pool->threads_used);
271*38fd1498Szrj #else
272*38fd1498Szrj 	  gomp_mutex_lock (&gomp_managed_threads_lock);
273*38fd1498Szrj 	  gomp_managed_threads -= pool->threads_used - 1L;
274*38fd1498Szrj 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
275*38fd1498Szrj #endif
276*38fd1498Szrj 	}
277*38fd1498Szrj       if (pool->last_team)
278*38fd1498Szrj 	free_team (pool->last_team);
279*38fd1498Szrj #ifndef __nvptx__
280*38fd1498Szrj       free (pool->threads);
281*38fd1498Szrj       free (pool);
282*38fd1498Szrj #endif
283*38fd1498Szrj       thr->thread_pool = NULL;
284*38fd1498Szrj     }
285*38fd1498Szrj   if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
286*38fd1498Szrj     gomp_team_end ();
287*38fd1498Szrj   if (thr->task != NULL)
288*38fd1498Szrj     {
289*38fd1498Szrj       struct gomp_task *task = thr->task;
290*38fd1498Szrj       gomp_end_task ();
291*38fd1498Szrj       free (task);
292*38fd1498Szrj     }
293*38fd1498Szrj }
294*38fd1498Szrj 
295*38fd1498Szrj /* Launch a team.  */
296*38fd1498Szrj 
297*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
298*38fd1498Szrj void
gomp_team_start(void (* fn)(void *),void * data,unsigned nthreads,unsigned flags,struct gomp_team * team)299*38fd1498Szrj gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
300*38fd1498Szrj 		 unsigned flags, struct gomp_team *team)
301*38fd1498Szrj {
302*38fd1498Szrj   struct gomp_thread_start_data *start_data;
303*38fd1498Szrj   struct gomp_thread *thr, *nthr;
304*38fd1498Szrj   struct gomp_task *task;
305*38fd1498Szrj   struct gomp_task_icv *icv;
306*38fd1498Szrj   bool nested;
307*38fd1498Szrj   struct gomp_thread_pool *pool;
308*38fd1498Szrj   unsigned i, n, old_threads_used = 0;
309*38fd1498Szrj   pthread_attr_t thread_attr, *attr;
310*38fd1498Szrj   unsigned long nthreads_var;
311*38fd1498Szrj   char bind, bind_var;
312*38fd1498Szrj   unsigned int s = 0, rest = 0, p = 0, k = 0;
313*38fd1498Szrj   unsigned int affinity_count = 0;
314*38fd1498Szrj   struct gomp_thread **affinity_thr = NULL;
315*38fd1498Szrj 
316*38fd1498Szrj   thr = gomp_thread ();
317*38fd1498Szrj   nested = thr->ts.level;
318*38fd1498Szrj   pool = thr->thread_pool;
319*38fd1498Szrj   task = thr->task;
320*38fd1498Szrj   icv = task ? &task->icv : &gomp_global_icv;
321*38fd1498Szrj   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
322*38fd1498Szrj     gomp_init_affinity ();
323*38fd1498Szrj 
324*38fd1498Szrj   /* Always save the previous state, even if this isn't a nested team.
325*38fd1498Szrj      In particular, we should save any work share state from an outer
326*38fd1498Szrj      orphaned work share construct.  */
327*38fd1498Szrj   team->prev_ts = thr->ts;
328*38fd1498Szrj 
329*38fd1498Szrj   thr->ts.team = team;
330*38fd1498Szrj   thr->ts.team_id = 0;
331*38fd1498Szrj   ++thr->ts.level;
332*38fd1498Szrj   if (nthreads > 1)
333*38fd1498Szrj     ++thr->ts.active_level;
334*38fd1498Szrj   thr->ts.work_share = &team->work_shares[0];
335*38fd1498Szrj   thr->ts.last_work_share = NULL;
336*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
337*38fd1498Szrj   thr->ts.single_count = 0;
338*38fd1498Szrj #endif
339*38fd1498Szrj   thr->ts.static_trip = 0;
340*38fd1498Szrj   thr->task = &team->implicit_task[0];
341*38fd1498Szrj   nthreads_var = icv->nthreads_var;
342*38fd1498Szrj   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
343*38fd1498Szrj       && thr->ts.level < gomp_nthreads_var_list_len)
344*38fd1498Szrj     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
345*38fd1498Szrj   bind_var = icv->bind_var;
346*38fd1498Szrj   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
347*38fd1498Szrj     bind_var = flags & 7;
348*38fd1498Szrj   bind = bind_var;
349*38fd1498Szrj   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
350*38fd1498Szrj       && thr->ts.level < gomp_bind_var_list_len)
351*38fd1498Szrj     bind_var = gomp_bind_var_list[thr->ts.level];
352*38fd1498Szrj   gomp_init_task (thr->task, task, icv);
353*38fd1498Szrj   team->implicit_task[0].icv.nthreads_var = nthreads_var;
354*38fd1498Szrj   team->implicit_task[0].icv.bind_var = bind_var;
355*38fd1498Szrj 
356*38fd1498Szrj   if (nthreads == 1)
357*38fd1498Szrj     return;
358*38fd1498Szrj 
359*38fd1498Szrj   i = 1;
360*38fd1498Szrj 
361*38fd1498Szrj   if (__builtin_expect (gomp_places_list != NULL, 0))
362*38fd1498Szrj     {
363*38fd1498Szrj       /* Depending on chosen proc_bind model, set subpartition
364*38fd1498Szrj 	 for the master thread and initialize helper variables
365*38fd1498Szrj 	 P and optionally S, K and/or REST used by later place
366*38fd1498Szrj 	 computation for each additional thread.  */
367*38fd1498Szrj       p = thr->place - 1;
368*38fd1498Szrj       switch (bind)
369*38fd1498Szrj 	{
370*38fd1498Szrj 	case omp_proc_bind_true:
371*38fd1498Szrj 	case omp_proc_bind_close:
372*38fd1498Szrj 	  if (nthreads > thr->ts.place_partition_len)
373*38fd1498Szrj 	    {
374*38fd1498Szrj 	      /* T > P.  S threads will be placed in each place,
375*38fd1498Szrj 		 and the final REM threads placed one by one
376*38fd1498Szrj 		 into the already occupied places.  */
377*38fd1498Szrj 	      s = nthreads / thr->ts.place_partition_len;
378*38fd1498Szrj 	      rest = nthreads % thr->ts.place_partition_len;
379*38fd1498Szrj 	    }
380*38fd1498Szrj 	  else
381*38fd1498Szrj 	    s = 1;
382*38fd1498Szrj 	  k = 1;
383*38fd1498Szrj 	  break;
384*38fd1498Szrj 	case omp_proc_bind_master:
385*38fd1498Szrj 	  /* Each thread will be bound to master's place.  */
386*38fd1498Szrj 	  break;
387*38fd1498Szrj 	case omp_proc_bind_spread:
388*38fd1498Szrj 	  if (nthreads <= thr->ts.place_partition_len)
389*38fd1498Szrj 	    {
390*38fd1498Szrj 	      /* T <= P.  Each subpartition will have in between s
391*38fd1498Szrj 		 and s+1 places (subpartitions starting at or
392*38fd1498Szrj 		 after rest will have s places, earlier s+1 places),
393*38fd1498Szrj 		 each thread will be bound to the first place in
394*38fd1498Szrj 		 its subpartition (except for the master thread
395*38fd1498Szrj 		 that can be bound to another place in its
396*38fd1498Szrj 		 subpartition).  */
397*38fd1498Szrj 	      s = thr->ts.place_partition_len / nthreads;
398*38fd1498Szrj 	      rest = thr->ts.place_partition_len % nthreads;
399*38fd1498Szrj 	      rest = (s + 1) * rest + thr->ts.place_partition_off;
400*38fd1498Szrj 	      if (p < rest)
401*38fd1498Szrj 		{
402*38fd1498Szrj 		  p -= (p - thr->ts.place_partition_off) % (s + 1);
403*38fd1498Szrj 		  thr->ts.place_partition_len = s + 1;
404*38fd1498Szrj 		}
405*38fd1498Szrj 	      else
406*38fd1498Szrj 		{
407*38fd1498Szrj 		  p -= (p - rest) % s;
408*38fd1498Szrj 		  thr->ts.place_partition_len = s;
409*38fd1498Szrj 		}
410*38fd1498Szrj 	      thr->ts.place_partition_off = p;
411*38fd1498Szrj 	    }
412*38fd1498Szrj 	  else
413*38fd1498Szrj 	    {
414*38fd1498Szrj 	      /* T > P.  Each subpartition will have just a single
415*38fd1498Szrj 		 place and we'll place between s and s+1
416*38fd1498Szrj 		 threads into each subpartition.  */
417*38fd1498Szrj 	      s = nthreads / thr->ts.place_partition_len;
418*38fd1498Szrj 	      rest = nthreads % thr->ts.place_partition_len;
419*38fd1498Szrj 	      thr->ts.place_partition_off = p;
420*38fd1498Szrj 	      thr->ts.place_partition_len = 1;
421*38fd1498Szrj 	      k = 1;
422*38fd1498Szrj 	    }
423*38fd1498Szrj 	  break;
424*38fd1498Szrj 	}
425*38fd1498Szrj     }
426*38fd1498Szrj   else
427*38fd1498Szrj     bind = omp_proc_bind_false;
428*38fd1498Szrj 
429*38fd1498Szrj   /* We only allow the reuse of idle threads for non-nested PARALLEL
430*38fd1498Szrj      regions.  This appears to be implied by the semantics of
431*38fd1498Szrj      threadprivate variables, but perhaps that's reading too much into
432*38fd1498Szrj      things.  Certainly it does prevent any locking problems, since
433*38fd1498Szrj      only the initial program thread will modify gomp_threads.  */
434*38fd1498Szrj   if (!nested)
435*38fd1498Szrj     {
436*38fd1498Szrj       old_threads_used = pool->threads_used;
437*38fd1498Szrj 
438*38fd1498Szrj       if (nthreads <= old_threads_used)
439*38fd1498Szrj 	n = nthreads;
440*38fd1498Szrj       else if (old_threads_used == 0)
441*38fd1498Szrj 	{
442*38fd1498Szrj 	  n = 0;
443*38fd1498Szrj 	  gomp_simple_barrier_init (&pool->threads_dock, nthreads);
444*38fd1498Szrj 	}
445*38fd1498Szrj       else
446*38fd1498Szrj 	{
447*38fd1498Szrj 	  n = old_threads_used;
448*38fd1498Szrj 
449*38fd1498Szrj 	  /* Increase the barrier threshold to make sure all new
450*38fd1498Szrj 	     threads arrive before the team is released.  */
451*38fd1498Szrj 	  gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
452*38fd1498Szrj 	}
453*38fd1498Szrj 
454*38fd1498Szrj       /* Not true yet, but soon will be.  We're going to release all
455*38fd1498Szrj 	 threads from the dock, and those that aren't part of the
456*38fd1498Szrj 	 team will exit.  */
457*38fd1498Szrj       pool->threads_used = nthreads;
458*38fd1498Szrj 
459*38fd1498Szrj       /* If necessary, expand the size of the gomp_threads array.  It is
460*38fd1498Szrj 	 expected that changes in the number of threads are rare, thus we
461*38fd1498Szrj 	 make no effort to expand gomp_threads_size geometrically.  */
462*38fd1498Szrj       if (nthreads >= pool->threads_size)
463*38fd1498Szrj 	{
464*38fd1498Szrj 	  pool->threads_size = nthreads + 1;
465*38fd1498Szrj 	  pool->threads
466*38fd1498Szrj 	    = gomp_realloc (pool->threads,
467*38fd1498Szrj 			    pool->threads_size
468*38fd1498Szrj 			    * sizeof (struct gomp_thread_data *));
469*38fd1498Szrj 	}
470*38fd1498Szrj 
471*38fd1498Szrj       /* Release existing idle threads.  */
472*38fd1498Szrj       for (; i < n; ++i)
473*38fd1498Szrj 	{
474*38fd1498Szrj 	  unsigned int place_partition_off = thr->ts.place_partition_off;
475*38fd1498Szrj 	  unsigned int place_partition_len = thr->ts.place_partition_len;
476*38fd1498Szrj 	  unsigned int place = 0;
477*38fd1498Szrj 	  if (__builtin_expect (gomp_places_list != NULL, 0))
478*38fd1498Szrj 	    {
479*38fd1498Szrj 	      switch (bind)
480*38fd1498Szrj 		{
481*38fd1498Szrj 		case omp_proc_bind_true:
482*38fd1498Szrj 		case omp_proc_bind_close:
483*38fd1498Szrj 		  if (k == s)
484*38fd1498Szrj 		    {
485*38fd1498Szrj 		      ++p;
486*38fd1498Szrj 		      if (p == (team->prev_ts.place_partition_off
487*38fd1498Szrj 				+ team->prev_ts.place_partition_len))
488*38fd1498Szrj 			p = team->prev_ts.place_partition_off;
489*38fd1498Szrj 		      k = 1;
490*38fd1498Szrj 		      if (i == nthreads - rest)
491*38fd1498Szrj 			s = 1;
492*38fd1498Szrj 		    }
493*38fd1498Szrj 		  else
494*38fd1498Szrj 		    ++k;
495*38fd1498Szrj 		  break;
496*38fd1498Szrj 		case omp_proc_bind_master:
497*38fd1498Szrj 		  break;
498*38fd1498Szrj 		case omp_proc_bind_spread:
499*38fd1498Szrj 		  if (k == 0)
500*38fd1498Szrj 		    {
501*38fd1498Szrj 		      /* T <= P.  */
502*38fd1498Szrj 		      if (p < rest)
503*38fd1498Szrj 			p += s + 1;
504*38fd1498Szrj 		      else
505*38fd1498Szrj 			p += s;
506*38fd1498Szrj 		      if (p == (team->prev_ts.place_partition_off
507*38fd1498Szrj 				+ team->prev_ts.place_partition_len))
508*38fd1498Szrj 			p = team->prev_ts.place_partition_off;
509*38fd1498Szrj 		      place_partition_off = p;
510*38fd1498Szrj 		      if (p < rest)
511*38fd1498Szrj 			place_partition_len = s + 1;
512*38fd1498Szrj 		      else
513*38fd1498Szrj 			place_partition_len = s;
514*38fd1498Szrj 		    }
515*38fd1498Szrj 		  else
516*38fd1498Szrj 		    {
517*38fd1498Szrj 		      /* T > P.  */
518*38fd1498Szrj 		      if (k == s)
519*38fd1498Szrj 			{
520*38fd1498Szrj 			  ++p;
521*38fd1498Szrj 			  if (p == (team->prev_ts.place_partition_off
522*38fd1498Szrj 				    + team->prev_ts.place_partition_len))
523*38fd1498Szrj 			    p = team->prev_ts.place_partition_off;
524*38fd1498Szrj 			  k = 1;
525*38fd1498Szrj 			  if (i == nthreads - rest)
526*38fd1498Szrj 			    s = 1;
527*38fd1498Szrj 			}
528*38fd1498Szrj 		      else
529*38fd1498Szrj 			++k;
530*38fd1498Szrj 		      place_partition_off = p;
531*38fd1498Szrj 		      place_partition_len = 1;
532*38fd1498Szrj 		    }
533*38fd1498Szrj 		  break;
534*38fd1498Szrj 		}
535*38fd1498Szrj 	      if (affinity_thr != NULL
536*38fd1498Szrj 		  || (bind != omp_proc_bind_true
537*38fd1498Szrj 		      && pool->threads[i]->place != p + 1)
538*38fd1498Szrj 		  || pool->threads[i]->place <= place_partition_off
539*38fd1498Szrj 		  || pool->threads[i]->place > (place_partition_off
540*38fd1498Szrj 						+ place_partition_len))
541*38fd1498Szrj 		{
542*38fd1498Szrj 		  unsigned int l;
543*38fd1498Szrj 		  if (affinity_thr == NULL)
544*38fd1498Szrj 		    {
545*38fd1498Szrj 		      unsigned int j;
546*38fd1498Szrj 
547*38fd1498Szrj 		      if (team->prev_ts.place_partition_len > 64)
548*38fd1498Szrj 			affinity_thr
549*38fd1498Szrj 			  = gomp_malloc (team->prev_ts.place_partition_len
550*38fd1498Szrj 					 * sizeof (struct gomp_thread *));
551*38fd1498Szrj 		      else
552*38fd1498Szrj 			affinity_thr
553*38fd1498Szrj 			  = gomp_alloca (team->prev_ts.place_partition_len
554*38fd1498Szrj 					 * sizeof (struct gomp_thread *));
555*38fd1498Szrj 		      memset (affinity_thr, '\0',
556*38fd1498Szrj 			      team->prev_ts.place_partition_len
557*38fd1498Szrj 			      * sizeof (struct gomp_thread *));
558*38fd1498Szrj 		      for (j = i; j < old_threads_used; j++)
559*38fd1498Szrj 			{
560*38fd1498Szrj 			  if (pool->threads[j]->place
561*38fd1498Szrj 			      > team->prev_ts.place_partition_off
562*38fd1498Szrj 			      && (pool->threads[j]->place
563*38fd1498Szrj 				  <= (team->prev_ts.place_partition_off
564*38fd1498Szrj 				      + team->prev_ts.place_partition_len)))
565*38fd1498Szrj 			    {
566*38fd1498Szrj 			      l = pool->threads[j]->place - 1
567*38fd1498Szrj 				  - team->prev_ts.place_partition_off;
568*38fd1498Szrj 			      pool->threads[j]->data = affinity_thr[l];
569*38fd1498Szrj 			      affinity_thr[l] = pool->threads[j];
570*38fd1498Szrj 			    }
571*38fd1498Szrj 			  pool->threads[j] = NULL;
572*38fd1498Szrj 			}
573*38fd1498Szrj 		      if (nthreads > old_threads_used)
574*38fd1498Szrj 			memset (&pool->threads[old_threads_used],
575*38fd1498Szrj 				'\0', ((nthreads - old_threads_used)
576*38fd1498Szrj 				       * sizeof (struct gomp_thread *)));
577*38fd1498Szrj 		      n = nthreads;
578*38fd1498Szrj 		      affinity_count = old_threads_used - i;
579*38fd1498Szrj 		    }
580*38fd1498Szrj 		  if (affinity_count == 0)
581*38fd1498Szrj 		    break;
582*38fd1498Szrj 		  l = p;
583*38fd1498Szrj 		  if (affinity_thr[l - team->prev_ts.place_partition_off]
584*38fd1498Szrj 		      == NULL)
585*38fd1498Szrj 		    {
586*38fd1498Szrj 		      if (bind != omp_proc_bind_true)
587*38fd1498Szrj 			continue;
588*38fd1498Szrj 		      for (l = place_partition_off;
589*38fd1498Szrj 			   l < place_partition_off + place_partition_len;
590*38fd1498Szrj 			   l++)
591*38fd1498Szrj 			if (affinity_thr[l - team->prev_ts.place_partition_off]
592*38fd1498Szrj 			    != NULL)
593*38fd1498Szrj 			  break;
594*38fd1498Szrj 		      if (l == place_partition_off + place_partition_len)
595*38fd1498Szrj 			continue;
596*38fd1498Szrj 		    }
597*38fd1498Szrj 		  nthr = affinity_thr[l - team->prev_ts.place_partition_off];
598*38fd1498Szrj 		  affinity_thr[l - team->prev_ts.place_partition_off]
599*38fd1498Szrj 		    = (struct gomp_thread *) nthr->data;
600*38fd1498Szrj 		  affinity_count--;
601*38fd1498Szrj 		  pool->threads[i] = nthr;
602*38fd1498Szrj 		}
603*38fd1498Szrj 	      else
604*38fd1498Szrj 		nthr = pool->threads[i];
605*38fd1498Szrj 	      place = p + 1;
606*38fd1498Szrj 	    }
607*38fd1498Szrj 	  else
608*38fd1498Szrj 	    nthr = pool->threads[i];
609*38fd1498Szrj 	  nthr->ts.team = team;
610*38fd1498Szrj 	  nthr->ts.work_share = &team->work_shares[0];
611*38fd1498Szrj 	  nthr->ts.last_work_share = NULL;
612*38fd1498Szrj 	  nthr->ts.team_id = i;
613*38fd1498Szrj 	  nthr->ts.level = team->prev_ts.level + 1;
614*38fd1498Szrj 	  nthr->ts.active_level = thr->ts.active_level;
615*38fd1498Szrj 	  nthr->ts.place_partition_off = place_partition_off;
616*38fd1498Szrj 	  nthr->ts.place_partition_len = place_partition_len;
617*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
618*38fd1498Szrj 	  nthr->ts.single_count = 0;
619*38fd1498Szrj #endif
620*38fd1498Szrj 	  nthr->ts.static_trip = 0;
621*38fd1498Szrj 	  nthr->task = &team->implicit_task[i];
622*38fd1498Szrj 	  nthr->place = place;
623*38fd1498Szrj 	  gomp_init_task (nthr->task, task, icv);
624*38fd1498Szrj 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
625*38fd1498Szrj 	  team->implicit_task[i].icv.bind_var = bind_var;
626*38fd1498Szrj 	  nthr->fn = fn;
627*38fd1498Szrj 	  nthr->data = data;
628*38fd1498Szrj 	  team->ordered_release[i] = &nthr->release;
629*38fd1498Szrj 	}
630*38fd1498Szrj 
631*38fd1498Szrj       if (__builtin_expect (affinity_thr != NULL, 0))
632*38fd1498Szrj 	{
633*38fd1498Szrj 	  /* If AFFINITY_THR is non-NULL just because we had to
634*38fd1498Szrj 	     permute some threads in the pool, but we've managed
635*38fd1498Szrj 	     to find exactly as many old threads as we'd find
636*38fd1498Szrj 	     without affinity, we don't need to handle this
637*38fd1498Szrj 	     specially anymore.  */
638*38fd1498Szrj 	  if (nthreads <= old_threads_used
639*38fd1498Szrj 	      ? (affinity_count == old_threads_used - nthreads)
640*38fd1498Szrj 	      : (i == old_threads_used))
641*38fd1498Szrj 	    {
642*38fd1498Szrj 	      if (team->prev_ts.place_partition_len > 64)
643*38fd1498Szrj 		free (affinity_thr);
644*38fd1498Szrj 	      affinity_thr = NULL;
645*38fd1498Szrj 	      affinity_count = 0;
646*38fd1498Szrj 	    }
647*38fd1498Szrj 	  else
648*38fd1498Szrj 	    {
649*38fd1498Szrj 	      i = 1;
650*38fd1498Szrj 	      /* We are going to compute the places/subpartitions
651*38fd1498Szrj 		 again from the beginning.  So, we need to reinitialize
652*38fd1498Szrj 		 vars modified by the switch (bind) above inside
653*38fd1498Szrj 		 of the loop, to the state they had after the initial
654*38fd1498Szrj 		 switch (bind).  */
655*38fd1498Szrj 	      switch (bind)
656*38fd1498Szrj 		{
657*38fd1498Szrj 		case omp_proc_bind_true:
658*38fd1498Szrj 		case omp_proc_bind_close:
659*38fd1498Szrj 		  if (nthreads > thr->ts.place_partition_len)
660*38fd1498Szrj 		    /* T > P.  S has been changed, so needs
661*38fd1498Szrj 		       to be recomputed.  */
662*38fd1498Szrj 		    s = nthreads / thr->ts.place_partition_len;
663*38fd1498Szrj 		  k = 1;
664*38fd1498Szrj 		  p = thr->place - 1;
665*38fd1498Szrj 		  break;
666*38fd1498Szrj 		case omp_proc_bind_master:
667*38fd1498Szrj 		  /* No vars have been changed.  */
668*38fd1498Szrj 		  break;
669*38fd1498Szrj 		case omp_proc_bind_spread:
670*38fd1498Szrj 		  p = thr->ts.place_partition_off;
671*38fd1498Szrj 		  if (k != 0)
672*38fd1498Szrj 		    {
673*38fd1498Szrj 		      /* T > P.  */
674*38fd1498Szrj 		      s = nthreads / team->prev_ts.place_partition_len;
675*38fd1498Szrj 		      k = 1;
676*38fd1498Szrj 		    }
677*38fd1498Szrj 		  break;
678*38fd1498Szrj 		}
679*38fd1498Szrj 
680*38fd1498Szrj 	      /* Increase the barrier threshold to make sure all new
681*38fd1498Szrj 		 threads and all the threads we're going to let die
682*38fd1498Szrj 		 arrive before the team is released.  */
683*38fd1498Szrj 	      if (affinity_count)
684*38fd1498Szrj 		gomp_simple_barrier_reinit (&pool->threads_dock,
685*38fd1498Szrj 					    nthreads + affinity_count);
686*38fd1498Szrj 	    }
687*38fd1498Szrj 	}
688*38fd1498Szrj 
689*38fd1498Szrj       if (i == nthreads)
690*38fd1498Szrj 	goto do_release;
691*38fd1498Szrj 
692*38fd1498Szrj     }
693*38fd1498Szrj 
694*38fd1498Szrj   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
695*38fd1498Szrj     {
696*38fd1498Szrj       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
697*38fd1498Szrj 
698*38fd1498Szrj       if (old_threads_used == 0)
699*38fd1498Szrj 	--diff;
700*38fd1498Szrj 
701*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
702*38fd1498Szrj       __sync_fetch_and_add (&gomp_managed_threads, diff);
703*38fd1498Szrj #else
704*38fd1498Szrj       gomp_mutex_lock (&gomp_managed_threads_lock);
705*38fd1498Szrj       gomp_managed_threads += diff;
706*38fd1498Szrj       gomp_mutex_unlock (&gomp_managed_threads_lock);
707*38fd1498Szrj #endif
708*38fd1498Szrj     }
709*38fd1498Szrj 
710*38fd1498Szrj   attr = &gomp_thread_attr;
711*38fd1498Szrj   if (__builtin_expect (gomp_places_list != NULL, 0))
712*38fd1498Szrj     {
713*38fd1498Szrj       size_t stacksize;
714*38fd1498Szrj       pthread_attr_init (&thread_attr);
715*38fd1498Szrj       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
716*38fd1498Szrj       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
717*38fd1498Szrj 	pthread_attr_setstacksize (&thread_attr, stacksize);
718*38fd1498Szrj       attr = &thread_attr;
719*38fd1498Szrj     }
720*38fd1498Szrj 
721*38fd1498Szrj   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
722*38fd1498Szrj 			    * (nthreads-i));
723*38fd1498Szrj 
724*38fd1498Szrj   /* Launch new threads.  */
725*38fd1498Szrj   for (; i < nthreads; ++i)
726*38fd1498Szrj     {
727*38fd1498Szrj       pthread_t pt;
728*38fd1498Szrj       int err;
729*38fd1498Szrj 
730*38fd1498Szrj       start_data->ts.place_partition_off = thr->ts.place_partition_off;
731*38fd1498Szrj       start_data->ts.place_partition_len = thr->ts.place_partition_len;
732*38fd1498Szrj       start_data->place = 0;
733*38fd1498Szrj       if (__builtin_expect (gomp_places_list != NULL, 0))
734*38fd1498Szrj 	{
735*38fd1498Szrj 	  switch (bind)
736*38fd1498Szrj 	    {
737*38fd1498Szrj 	    case omp_proc_bind_true:
738*38fd1498Szrj 	    case omp_proc_bind_close:
739*38fd1498Szrj 	      if (k == s)
740*38fd1498Szrj 		{
741*38fd1498Szrj 		  ++p;
742*38fd1498Szrj 		  if (p == (team->prev_ts.place_partition_off
743*38fd1498Szrj 			    + team->prev_ts.place_partition_len))
744*38fd1498Szrj 		    p = team->prev_ts.place_partition_off;
745*38fd1498Szrj 		  k = 1;
746*38fd1498Szrj 		  if (i == nthreads - rest)
747*38fd1498Szrj 		    s = 1;
748*38fd1498Szrj 		}
749*38fd1498Szrj 	      else
750*38fd1498Szrj 		++k;
751*38fd1498Szrj 	      break;
752*38fd1498Szrj 	    case omp_proc_bind_master:
753*38fd1498Szrj 	      break;
754*38fd1498Szrj 	    case omp_proc_bind_spread:
755*38fd1498Szrj 	      if (k == 0)
756*38fd1498Szrj 		{
757*38fd1498Szrj 		  /* T <= P.  */
758*38fd1498Szrj 		  if (p < rest)
759*38fd1498Szrj 		    p += s + 1;
760*38fd1498Szrj 		  else
761*38fd1498Szrj 		    p += s;
762*38fd1498Szrj 		  if (p == (team->prev_ts.place_partition_off
763*38fd1498Szrj 			    + team->prev_ts.place_partition_len))
764*38fd1498Szrj 		    p = team->prev_ts.place_partition_off;
765*38fd1498Szrj 		  start_data->ts.place_partition_off = p;
766*38fd1498Szrj 		  if (p < rest)
767*38fd1498Szrj 		    start_data->ts.place_partition_len = s + 1;
768*38fd1498Szrj 		  else
769*38fd1498Szrj 		    start_data->ts.place_partition_len = s;
770*38fd1498Szrj 		}
771*38fd1498Szrj 	      else
772*38fd1498Szrj 		{
773*38fd1498Szrj 		  /* T > P.  */
774*38fd1498Szrj 		  if (k == s)
775*38fd1498Szrj 		    {
776*38fd1498Szrj 		      ++p;
777*38fd1498Szrj 		      if (p == (team->prev_ts.place_partition_off
778*38fd1498Szrj 				+ team->prev_ts.place_partition_len))
779*38fd1498Szrj 			p = team->prev_ts.place_partition_off;
780*38fd1498Szrj 		      k = 1;
781*38fd1498Szrj 		      if (i == nthreads - rest)
782*38fd1498Szrj 			s = 1;
783*38fd1498Szrj 		    }
784*38fd1498Szrj 		  else
785*38fd1498Szrj 		    ++k;
786*38fd1498Szrj 		  start_data->ts.place_partition_off = p;
787*38fd1498Szrj 		  start_data->ts.place_partition_len = 1;
788*38fd1498Szrj 		}
789*38fd1498Szrj 	      break;
790*38fd1498Szrj 	    }
791*38fd1498Szrj 	  start_data->place = p + 1;
792*38fd1498Szrj 	  if (affinity_thr != NULL && pool->threads[i] != NULL)
793*38fd1498Szrj 	    continue;
794*38fd1498Szrj 	  gomp_init_thread_affinity (attr, p);
795*38fd1498Szrj 	}
796*38fd1498Szrj 
797*38fd1498Szrj       start_data->fn = fn;
798*38fd1498Szrj       start_data->fn_data = data;
799*38fd1498Szrj       start_data->ts.team = team;
800*38fd1498Szrj       start_data->ts.work_share = &team->work_shares[0];
801*38fd1498Szrj       start_data->ts.last_work_share = NULL;
802*38fd1498Szrj       start_data->ts.team_id = i;
803*38fd1498Szrj       start_data->ts.level = team->prev_ts.level + 1;
804*38fd1498Szrj       start_data->ts.active_level = thr->ts.active_level;
805*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
806*38fd1498Szrj       start_data->ts.single_count = 0;
807*38fd1498Szrj #endif
808*38fd1498Szrj       start_data->ts.static_trip = 0;
809*38fd1498Szrj       start_data->task = &team->implicit_task[i];
810*38fd1498Szrj       gomp_init_task (start_data->task, task, icv);
811*38fd1498Szrj       team->implicit_task[i].icv.nthreads_var = nthreads_var;
812*38fd1498Szrj       team->implicit_task[i].icv.bind_var = bind_var;
813*38fd1498Szrj       start_data->thread_pool = pool;
814*38fd1498Szrj       start_data->nested = nested;
815*38fd1498Szrj 
816*38fd1498Szrj       attr = gomp_adjust_thread_attr (attr, &thread_attr);
817*38fd1498Szrj       err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
818*38fd1498Szrj       if (err != 0)
819*38fd1498Szrj 	gomp_fatal ("Thread creation failed: %s", strerror (err));
820*38fd1498Szrj     }
821*38fd1498Szrj 
822*38fd1498Szrj   if (__builtin_expect (attr == &thread_attr, 0))
823*38fd1498Szrj     pthread_attr_destroy (&thread_attr);
824*38fd1498Szrj 
825*38fd1498Szrj  do_release:
826*38fd1498Szrj   if (nested)
827*38fd1498Szrj     gomp_barrier_wait (&team->barrier);
828*38fd1498Szrj   else
829*38fd1498Szrj     gomp_simple_barrier_wait (&pool->threads_dock);
830*38fd1498Szrj 
831*38fd1498Szrj   /* Decrease the barrier threshold to match the number of threads
832*38fd1498Szrj      that should arrive back at the end of this team.  The extra
833*38fd1498Szrj      threads should be exiting.  Note that we arrange for this test
834*38fd1498Szrj      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
835*38fd1498Szrj      the barrier as well as gomp_managed_threads was temporarily
836*38fd1498Szrj      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
837*38fd1498Szrj      AFFINITY_COUNT if non-zero will be always at least
838*38fd1498Szrj      OLD_THREADS_COUNT - NTHREADS.  */
839*38fd1498Szrj   if (__builtin_expect (nthreads < old_threads_used, 0)
840*38fd1498Szrj       || __builtin_expect (affinity_count, 0))
841*38fd1498Szrj     {
842*38fd1498Szrj       long diff = (long) nthreads - (long) old_threads_used;
843*38fd1498Szrj 
844*38fd1498Szrj       if (affinity_count)
845*38fd1498Szrj 	diff = -affinity_count;
846*38fd1498Szrj 
847*38fd1498Szrj       gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
848*38fd1498Szrj 
849*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
850*38fd1498Szrj       __sync_fetch_and_add (&gomp_managed_threads, diff);
851*38fd1498Szrj #else
852*38fd1498Szrj       gomp_mutex_lock (&gomp_managed_threads_lock);
853*38fd1498Szrj       gomp_managed_threads += diff;
854*38fd1498Szrj       gomp_mutex_unlock (&gomp_managed_threads_lock);
855*38fd1498Szrj #endif
856*38fd1498Szrj     }
857*38fd1498Szrj   if (__builtin_expect (affinity_thr != NULL, 0)
858*38fd1498Szrj       && team->prev_ts.place_partition_len > 64)
859*38fd1498Szrj     free (affinity_thr);
860*38fd1498Szrj }
861*38fd1498Szrj #endif
862*38fd1498Szrj 
863*38fd1498Szrj 
864*38fd1498Szrj /* Terminate the current team.  This is only to be called by the master
865*38fd1498Szrj    thread.  We assume that we must wait for the other threads.  */
866*38fd1498Szrj 
867*38fd1498Szrj void
gomp_team_end(void)868*38fd1498Szrj gomp_team_end (void)
869*38fd1498Szrj {
870*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
871*38fd1498Szrj   struct gomp_team *team = thr->ts.team;
872*38fd1498Szrj 
873*38fd1498Szrj   /* This barrier handles all pending explicit threads.
874*38fd1498Szrj      As #pragma omp cancel parallel might get awaited count in
875*38fd1498Szrj      team->barrier in a inconsistent state, we need to use a different
876*38fd1498Szrj      counter here.  */
877*38fd1498Szrj   gomp_team_barrier_wait_final (&team->barrier);
878*38fd1498Szrj   if (__builtin_expect (team->team_cancelled, 0))
879*38fd1498Szrj     {
880*38fd1498Szrj       struct gomp_work_share *ws = team->work_shares_to_free;
881*38fd1498Szrj       do
882*38fd1498Szrj 	{
883*38fd1498Szrj 	  struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
884*38fd1498Szrj 	  if (next_ws == NULL)
885*38fd1498Szrj 	    gomp_ptrlock_set (&ws->next_ws, ws);
886*38fd1498Szrj 	  gomp_fini_work_share (ws);
887*38fd1498Szrj 	  ws = next_ws;
888*38fd1498Szrj 	}
889*38fd1498Szrj       while (ws != NULL);
890*38fd1498Szrj     }
891*38fd1498Szrj   else
892*38fd1498Szrj     gomp_fini_work_share (thr->ts.work_share);
893*38fd1498Szrj 
894*38fd1498Szrj   gomp_end_task ();
895*38fd1498Szrj   thr->ts = team->prev_ts;
896*38fd1498Szrj 
897*38fd1498Szrj   if (__builtin_expect (thr->ts.team != NULL, 0))
898*38fd1498Szrj     {
899*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
900*38fd1498Szrj       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
901*38fd1498Szrj #else
902*38fd1498Szrj       gomp_mutex_lock (&gomp_managed_threads_lock);
903*38fd1498Szrj       gomp_managed_threads -= team->nthreads - 1L;
904*38fd1498Szrj       gomp_mutex_unlock (&gomp_managed_threads_lock);
905*38fd1498Szrj #endif
906*38fd1498Szrj       /* This barrier has gomp_barrier_wait_last counterparts
907*38fd1498Szrj 	 and ensures the team can be safely destroyed.  */
908*38fd1498Szrj       gomp_barrier_wait (&team->barrier);
909*38fd1498Szrj     }
910*38fd1498Szrj 
911*38fd1498Szrj   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
912*38fd1498Szrj     {
913*38fd1498Szrj       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
914*38fd1498Szrj       do
915*38fd1498Szrj 	{
916*38fd1498Szrj 	  struct gomp_work_share *next_ws = ws->next_alloc;
917*38fd1498Szrj 	  free (ws);
918*38fd1498Szrj 	  ws = next_ws;
919*38fd1498Szrj 	}
920*38fd1498Szrj       while (ws != NULL);
921*38fd1498Szrj     }
922*38fd1498Szrj   gomp_sem_destroy (&team->master_release);
923*38fd1498Szrj 
924*38fd1498Szrj   if (__builtin_expect (thr->ts.team != NULL, 0)
925*38fd1498Szrj       || __builtin_expect (team->nthreads == 1, 0))
926*38fd1498Szrj     free_team (team);
927*38fd1498Szrj   else
928*38fd1498Szrj     {
929*38fd1498Szrj       struct gomp_thread_pool *pool = thr->thread_pool;
930*38fd1498Szrj       if (pool->last_team)
931*38fd1498Szrj 	free_team (pool->last_team);
932*38fd1498Szrj       pool->last_team = team;
933*38fd1498Szrj       gomp_release_thread_pool (pool);
934*38fd1498Szrj     }
935*38fd1498Szrj }
936*38fd1498Szrj 
937*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
938*38fd1498Szrj 
939*38fd1498Szrj /* Constructors for this file.  */
940*38fd1498Szrj 
941*38fd1498Szrj static void __attribute__((constructor))
initialize_team(void)942*38fd1498Szrj initialize_team (void)
943*38fd1498Szrj {
944*38fd1498Szrj #if !defined HAVE_TLS && !defined USE_EMUTLS
945*38fd1498Szrj   static struct gomp_thread initial_thread_tls_data;
946*38fd1498Szrj 
947*38fd1498Szrj   pthread_key_create (&gomp_tls_key, NULL);
948*38fd1498Szrj   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
949*38fd1498Szrj #endif
950*38fd1498Szrj 
951*38fd1498Szrj   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
952*38fd1498Szrj     gomp_fatal ("could not create thread pool destructor.");
953*38fd1498Szrj }
954*38fd1498Szrj 
955*38fd1498Szrj static void __attribute__((destructor))
team_destructor(void)956*38fd1498Szrj team_destructor (void)
957*38fd1498Szrj {
958*38fd1498Szrj   /* Without this dlclose on libgomp could lead to subsequent
959*38fd1498Szrj      crashes.  */
960*38fd1498Szrj   pthread_key_delete (gomp_thread_destructor);
961*38fd1498Szrj }
962*38fd1498Szrj #endif
963*38fd1498Szrj 
964*38fd1498Szrj struct gomp_task_icv *
gomp_new_icv(void)965*38fd1498Szrj gomp_new_icv (void)
966*38fd1498Szrj {
967*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
968*38fd1498Szrj   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
969*38fd1498Szrj   gomp_init_task (task, NULL, &gomp_global_icv);
970*38fd1498Szrj   thr->task = task;
971*38fd1498Szrj #ifdef LIBGOMP_USE_PTHREADS
972*38fd1498Szrj   pthread_setspecific (gomp_thread_destructor, thr);
973*38fd1498Szrj #endif
974*38fd1498Szrj   return &task->icv;
975*38fd1498Szrj }
976