xref: /dragonfly/contrib/gcc-8.0/libgomp/team.c (revision 7bcb6caf)
1 /* Copyright (C) 2005-2018 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintainence of threads in response to team
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
33 
34 #ifdef LIBGOMP_USE_PTHREADS
35 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
36 pthread_attr_t gomp_thread_attr;
37 
38 /* This key is for the thread destructor.  */
39 pthread_key_t gomp_thread_destructor;
40 
41 
42 /* This is the libgomp per-thread data structure.  */
43 #if defined HAVE_TLS || defined USE_EMUTLS
44 __thread struct gomp_thread gomp_tls_data;
45 #else
46 pthread_key_t gomp_tls_key;
47 #endif
48 
49 
50 /* This structure is used to communicate across pthread_create.  */
51 
52 struct gomp_thread_start_data
53 {
54   void (*fn) (void *);
55   void *fn_data;
56   struct gomp_team_state ts;
57   struct gomp_task *task;
58   struct gomp_thread_pool *thread_pool;
59   unsigned int place;
60   bool nested;
61 };
62 
63 
64 /* This function is a pthread_create entry point.  This contains the idle
65    loop in which a thread waits to be called up to become part of a team.  */
66 
67 static void *
68 gomp_thread_start (void *xdata)
69 {
70   struct gomp_thread_start_data *data = xdata;
71   struct gomp_thread *thr;
72   struct gomp_thread_pool *pool;
73   void (*local_fn) (void *);
74   void *local_data;
75 
76 #if defined HAVE_TLS || defined USE_EMUTLS
77   thr = &gomp_tls_data;
78 #else
79   struct gomp_thread local_thr;
80   thr = &local_thr;
81   pthread_setspecific (gomp_tls_key, thr);
82 #endif
83   gomp_sem_init (&thr->release, 0);
84 
85   /* Extract what we need from data.  */
86   local_fn = data->fn;
87   local_data = data->fn_data;
88   thr->thread_pool = data->thread_pool;
89   thr->ts = data->ts;
90   thr->task = data->task;
91   thr->place = data->place;
92 
93   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
94 
95   /* Make thread pool local. */
96   pool = thr->thread_pool;
97 
98   if (data->nested)
99     {
100       struct gomp_team *team = thr->ts.team;
101       struct gomp_task *task = thr->task;
102 
103       gomp_barrier_wait (&team->barrier);
104 
105       local_fn (local_data);
106       gomp_team_barrier_wait_final (&team->barrier);
107       gomp_finish_task (task);
108       gomp_barrier_wait_last (&team->barrier);
109     }
110   else
111     {
112       pool->threads[thr->ts.team_id] = thr;
113 
114       gomp_simple_barrier_wait (&pool->threads_dock);
115       do
116 	{
117 	  struct gomp_team *team = thr->ts.team;
118 	  struct gomp_task *task = thr->task;
119 
120 	  local_fn (local_data);
121 	  gomp_team_barrier_wait_final (&team->barrier);
122 	  gomp_finish_task (task);
123 
124 	  gomp_simple_barrier_wait (&pool->threads_dock);
125 
126 	  local_fn = thr->fn;
127 	  local_data = thr->data;
128 	  thr->fn = NULL;
129 	}
130       while (local_fn);
131     }
132 
133   gomp_sem_destroy (&thr->release);
134   thr->thread_pool = NULL;
135   thr->task = NULL;
136   return NULL;
137 }
138 #endif
139 
140 static inline struct gomp_team *
141 get_last_team (unsigned nthreads)
142 {
143   struct gomp_thread *thr = gomp_thread ();
144   if (thr->ts.team == NULL)
145     {
146       struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
147       struct gomp_team *last_team = pool->last_team;
148       if (last_team != NULL && last_team->nthreads == nthreads)
149         {
150           pool->last_team = NULL;
151           return last_team;
152         }
153     }
154   return NULL;
155 }
156 
157 /* Create a new team data structure.  */
158 
159 struct gomp_team *
160 gomp_new_team (unsigned nthreads)
161 {
162   struct gomp_team *team;
163   int i;
164 
165   team = get_last_team (nthreads);
166   if (team == NULL)
167     {
168       size_t extra = sizeof (team->ordered_release[0])
169 		     + sizeof (team->implicit_task[0]);
170       team = gomp_malloc (sizeof (*team) + nthreads * extra);
171 
172 #ifndef HAVE_SYNC_BUILTINS
173       gomp_mutex_init (&team->work_share_list_free_lock);
174 #endif
175       gomp_barrier_init (&team->barrier, nthreads);
176       gomp_mutex_init (&team->task_lock);
177 
178       team->nthreads = nthreads;
179     }
180 
181   team->work_share_chunk = 8;
182 #ifdef HAVE_SYNC_BUILTINS
183   team->single_count = 0;
184 #endif
185   team->work_shares_to_free = &team->work_shares[0];
186   gomp_init_work_share (&team->work_shares[0], false, nthreads);
187   team->work_shares[0].next_alloc = NULL;
188   team->work_share_list_free = NULL;
189   team->work_share_list_alloc = &team->work_shares[1];
190   for (i = 1; i < 7; i++)
191     team->work_shares[i].next_free = &team->work_shares[i + 1];
192   team->work_shares[i].next_free = NULL;
193 
194   gomp_sem_init (&team->master_release, 0);
195   team->ordered_release = (void *) &team->implicit_task[nthreads];
196   team->ordered_release[0] = &team->master_release;
197 
198   priority_queue_init (&team->task_queue);
199   team->task_count = 0;
200   team->task_queued_count = 0;
201   team->task_running_count = 0;
202   team->work_share_cancelled = 0;
203   team->team_cancelled = 0;
204 
205   return team;
206 }
207 
208 
209 /* Free a team data structure.  */
210 
211 static void
212 free_team (struct gomp_team *team)
213 {
214 #ifndef HAVE_SYNC_BUILTINS
215   gomp_mutex_destroy (&team->work_share_list_free_lock);
216 #endif
217   gomp_barrier_destroy (&team->barrier);
218   gomp_mutex_destroy (&team->task_lock);
219   priority_queue_free (&team->task_queue);
220   free (team);
221 }
222 
223 static void
224 gomp_free_pool_helper (void *thread_pool)
225 {
226   struct gomp_thread *thr = gomp_thread ();
227   struct gomp_thread_pool *pool
228     = (struct gomp_thread_pool *) thread_pool;
229   gomp_simple_barrier_wait_last (&pool->threads_dock);
230   gomp_sem_destroy (&thr->release);
231   thr->thread_pool = NULL;
232   thr->task = NULL;
233 #ifdef LIBGOMP_USE_PTHREADS
234   pthread_exit (NULL);
235 #elif defined(__nvptx__)
236   asm ("exit;");
237 #else
238 #error gomp_free_pool_helper must terminate the thread
239 #endif
240 }
241 
242 /* Free a thread pool and release its threads. */
243 
244 void
245 gomp_free_thread (void *arg __attribute__((unused)))
246 {
247   struct gomp_thread *thr = gomp_thread ();
248   struct gomp_thread_pool *pool = thr->thread_pool;
249   if (pool)
250     {
251       if (pool->threads_used > 0)
252 	{
253 	  int i;
254 	  for (i = 1; i < pool->threads_used; i++)
255 	    {
256 	      struct gomp_thread *nthr = pool->threads[i];
257 	      nthr->fn = gomp_free_pool_helper;
258 	      nthr->data = pool;
259 	    }
260 	  /* This barrier undocks threads docked on pool->threads_dock.  */
261 	  gomp_simple_barrier_wait (&pool->threads_dock);
262 	  /* And this waits till all threads have called gomp_barrier_wait_last
263 	     in gomp_free_pool_helper.  */
264 	  gomp_simple_barrier_wait (&pool->threads_dock);
265 	  /* Now it is safe to destroy the barrier and free the pool.  */
266 	  gomp_simple_barrier_destroy (&pool->threads_dock);
267 
268 #ifdef HAVE_SYNC_BUILTINS
269 	  __sync_fetch_and_add (&gomp_managed_threads,
270 				1L - pool->threads_used);
271 #else
272 	  gomp_mutex_lock (&gomp_managed_threads_lock);
273 	  gomp_managed_threads -= pool->threads_used - 1L;
274 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
275 #endif
276 	}
277       if (pool->last_team)
278 	free_team (pool->last_team);
279 #ifndef __nvptx__
280       free (pool->threads);
281       free (pool);
282 #endif
283       thr->thread_pool = NULL;
284     }
285   if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
286     gomp_team_end ();
287   if (thr->task != NULL)
288     {
289       struct gomp_task *task = thr->task;
290       gomp_end_task ();
291       free (task);
292     }
293 }
294 
295 /* Launch a team.  */
296 
297 #ifdef LIBGOMP_USE_PTHREADS
298 void
299 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
300 		 unsigned flags, struct gomp_team *team)
301 {
302   struct gomp_thread_start_data *start_data;
303   struct gomp_thread *thr, *nthr;
304   struct gomp_task *task;
305   struct gomp_task_icv *icv;
306   bool nested;
307   struct gomp_thread_pool *pool;
308   unsigned i, n, old_threads_used = 0;
309   pthread_attr_t thread_attr, *attr;
310   unsigned long nthreads_var;
311   char bind, bind_var;
312   unsigned int s = 0, rest = 0, p = 0, k = 0;
313   unsigned int affinity_count = 0;
314   struct gomp_thread **affinity_thr = NULL;
315 
316   thr = gomp_thread ();
317   nested = thr->ts.level;
318   pool = thr->thread_pool;
319   task = thr->task;
320   icv = task ? &task->icv : &gomp_global_icv;
321   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
322     gomp_init_affinity ();
323 
324   /* Always save the previous state, even if this isn't a nested team.
325      In particular, we should save any work share state from an outer
326      orphaned work share construct.  */
327   team->prev_ts = thr->ts;
328 
329   thr->ts.team = team;
330   thr->ts.team_id = 0;
331   ++thr->ts.level;
332   if (nthreads > 1)
333     ++thr->ts.active_level;
334   thr->ts.work_share = &team->work_shares[0];
335   thr->ts.last_work_share = NULL;
336 #ifdef HAVE_SYNC_BUILTINS
337   thr->ts.single_count = 0;
338 #endif
339   thr->ts.static_trip = 0;
340   thr->task = &team->implicit_task[0];
341   nthreads_var = icv->nthreads_var;
342   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
343       && thr->ts.level < gomp_nthreads_var_list_len)
344     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
345   bind_var = icv->bind_var;
346   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
347     bind_var = flags & 7;
348   bind = bind_var;
349   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
350       && thr->ts.level < gomp_bind_var_list_len)
351     bind_var = gomp_bind_var_list[thr->ts.level];
352   gomp_init_task (thr->task, task, icv);
353   team->implicit_task[0].icv.nthreads_var = nthreads_var;
354   team->implicit_task[0].icv.bind_var = bind_var;
355 
356   if (nthreads == 1)
357     return;
358 
359   i = 1;
360 
361   if (__builtin_expect (gomp_places_list != NULL, 0))
362     {
363       /* Depending on chosen proc_bind model, set subpartition
364 	 for the master thread and initialize helper variables
365 	 P and optionally S, K and/or REST used by later place
366 	 computation for each additional thread.  */
367       p = thr->place - 1;
368       switch (bind)
369 	{
370 	case omp_proc_bind_true:
371 	case omp_proc_bind_close:
372 	  if (nthreads > thr->ts.place_partition_len)
373 	    {
374 	      /* T > P.  S threads will be placed in each place,
375 		 and the final REM threads placed one by one
376 		 into the already occupied places.  */
377 	      s = nthreads / thr->ts.place_partition_len;
378 	      rest = nthreads % thr->ts.place_partition_len;
379 	    }
380 	  else
381 	    s = 1;
382 	  k = 1;
383 	  break;
384 	case omp_proc_bind_master:
385 	  /* Each thread will be bound to master's place.  */
386 	  break;
387 	case omp_proc_bind_spread:
388 	  if (nthreads <= thr->ts.place_partition_len)
389 	    {
390 	      /* T <= P.  Each subpartition will have in between s
391 		 and s+1 places (subpartitions starting at or
392 		 after rest will have s places, earlier s+1 places),
393 		 each thread will be bound to the first place in
394 		 its subpartition (except for the master thread
395 		 that can be bound to another place in its
396 		 subpartition).  */
397 	      s = thr->ts.place_partition_len / nthreads;
398 	      rest = thr->ts.place_partition_len % nthreads;
399 	      rest = (s + 1) * rest + thr->ts.place_partition_off;
400 	      if (p < rest)
401 		{
402 		  p -= (p - thr->ts.place_partition_off) % (s + 1);
403 		  thr->ts.place_partition_len = s + 1;
404 		}
405 	      else
406 		{
407 		  p -= (p - rest) % s;
408 		  thr->ts.place_partition_len = s;
409 		}
410 	      thr->ts.place_partition_off = p;
411 	    }
412 	  else
413 	    {
414 	      /* T > P.  Each subpartition will have just a single
415 		 place and we'll place between s and s+1
416 		 threads into each subpartition.  */
417 	      s = nthreads / thr->ts.place_partition_len;
418 	      rest = nthreads % thr->ts.place_partition_len;
419 	      thr->ts.place_partition_off = p;
420 	      thr->ts.place_partition_len = 1;
421 	      k = 1;
422 	    }
423 	  break;
424 	}
425     }
426   else
427     bind = omp_proc_bind_false;
428 
429   /* We only allow the reuse of idle threads for non-nested PARALLEL
430      regions.  This appears to be implied by the semantics of
431      threadprivate variables, but perhaps that's reading too much into
432      things.  Certainly it does prevent any locking problems, since
433      only the initial program thread will modify gomp_threads.  */
434   if (!nested)
435     {
436       old_threads_used = pool->threads_used;
437 
438       if (nthreads <= old_threads_used)
439 	n = nthreads;
440       else if (old_threads_used == 0)
441 	{
442 	  n = 0;
443 	  gomp_simple_barrier_init (&pool->threads_dock, nthreads);
444 	}
445       else
446 	{
447 	  n = old_threads_used;
448 
449 	  /* Increase the barrier threshold to make sure all new
450 	     threads arrive before the team is released.  */
451 	  gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
452 	}
453 
454       /* Not true yet, but soon will be.  We're going to release all
455 	 threads from the dock, and those that aren't part of the
456 	 team will exit.  */
457       pool->threads_used = nthreads;
458 
459       /* If necessary, expand the size of the gomp_threads array.  It is
460 	 expected that changes in the number of threads are rare, thus we
461 	 make no effort to expand gomp_threads_size geometrically.  */
462       if (nthreads >= pool->threads_size)
463 	{
464 	  pool->threads_size = nthreads + 1;
465 	  pool->threads
466 	    = gomp_realloc (pool->threads,
467 			    pool->threads_size
468 			    * sizeof (struct gomp_thread_data *));
469 	}
470 
471       /* Release existing idle threads.  */
472       for (; i < n; ++i)
473 	{
474 	  unsigned int place_partition_off = thr->ts.place_partition_off;
475 	  unsigned int place_partition_len = thr->ts.place_partition_len;
476 	  unsigned int place = 0;
477 	  if (__builtin_expect (gomp_places_list != NULL, 0))
478 	    {
479 	      switch (bind)
480 		{
481 		case omp_proc_bind_true:
482 		case omp_proc_bind_close:
483 		  if (k == s)
484 		    {
485 		      ++p;
486 		      if (p == (team->prev_ts.place_partition_off
487 				+ team->prev_ts.place_partition_len))
488 			p = team->prev_ts.place_partition_off;
489 		      k = 1;
490 		      if (i == nthreads - rest)
491 			s = 1;
492 		    }
493 		  else
494 		    ++k;
495 		  break;
496 		case omp_proc_bind_master:
497 		  break;
498 		case omp_proc_bind_spread:
499 		  if (k == 0)
500 		    {
501 		      /* T <= P.  */
502 		      if (p < rest)
503 			p += s + 1;
504 		      else
505 			p += s;
506 		      if (p == (team->prev_ts.place_partition_off
507 				+ team->prev_ts.place_partition_len))
508 			p = team->prev_ts.place_partition_off;
509 		      place_partition_off = p;
510 		      if (p < rest)
511 			place_partition_len = s + 1;
512 		      else
513 			place_partition_len = s;
514 		    }
515 		  else
516 		    {
517 		      /* T > P.  */
518 		      if (k == s)
519 			{
520 			  ++p;
521 			  if (p == (team->prev_ts.place_partition_off
522 				    + team->prev_ts.place_partition_len))
523 			    p = team->prev_ts.place_partition_off;
524 			  k = 1;
525 			  if (i == nthreads - rest)
526 			    s = 1;
527 			}
528 		      else
529 			++k;
530 		      place_partition_off = p;
531 		      place_partition_len = 1;
532 		    }
533 		  break;
534 		}
535 	      if (affinity_thr != NULL
536 		  || (bind != omp_proc_bind_true
537 		      && pool->threads[i]->place != p + 1)
538 		  || pool->threads[i]->place <= place_partition_off
539 		  || pool->threads[i]->place > (place_partition_off
540 						+ place_partition_len))
541 		{
542 		  unsigned int l;
543 		  if (affinity_thr == NULL)
544 		    {
545 		      unsigned int j;
546 
547 		      if (team->prev_ts.place_partition_len > 64)
548 			affinity_thr
549 			  = gomp_malloc (team->prev_ts.place_partition_len
550 					 * sizeof (struct gomp_thread *));
551 		      else
552 			affinity_thr
553 			  = gomp_alloca (team->prev_ts.place_partition_len
554 					 * sizeof (struct gomp_thread *));
555 		      memset (affinity_thr, '\0',
556 			      team->prev_ts.place_partition_len
557 			      * sizeof (struct gomp_thread *));
558 		      for (j = i; j < old_threads_used; j++)
559 			{
560 			  if (pool->threads[j]->place
561 			      > team->prev_ts.place_partition_off
562 			      && (pool->threads[j]->place
563 				  <= (team->prev_ts.place_partition_off
564 				      + team->prev_ts.place_partition_len)))
565 			    {
566 			      l = pool->threads[j]->place - 1
567 				  - team->prev_ts.place_partition_off;
568 			      pool->threads[j]->data = affinity_thr[l];
569 			      affinity_thr[l] = pool->threads[j];
570 			    }
571 			  pool->threads[j] = NULL;
572 			}
573 		      if (nthreads > old_threads_used)
574 			memset (&pool->threads[old_threads_used],
575 				'\0', ((nthreads - old_threads_used)
576 				       * sizeof (struct gomp_thread *)));
577 		      n = nthreads;
578 		      affinity_count = old_threads_used - i;
579 		    }
580 		  if (affinity_count == 0)
581 		    break;
582 		  l = p;
583 		  if (affinity_thr[l - team->prev_ts.place_partition_off]
584 		      == NULL)
585 		    {
586 		      if (bind != omp_proc_bind_true)
587 			continue;
588 		      for (l = place_partition_off;
589 			   l < place_partition_off + place_partition_len;
590 			   l++)
591 			if (affinity_thr[l - team->prev_ts.place_partition_off]
592 			    != NULL)
593 			  break;
594 		      if (l == place_partition_off + place_partition_len)
595 			continue;
596 		    }
597 		  nthr = affinity_thr[l - team->prev_ts.place_partition_off];
598 		  affinity_thr[l - team->prev_ts.place_partition_off]
599 		    = (struct gomp_thread *) nthr->data;
600 		  affinity_count--;
601 		  pool->threads[i] = nthr;
602 		}
603 	      else
604 		nthr = pool->threads[i];
605 	      place = p + 1;
606 	    }
607 	  else
608 	    nthr = pool->threads[i];
609 	  nthr->ts.team = team;
610 	  nthr->ts.work_share = &team->work_shares[0];
611 	  nthr->ts.last_work_share = NULL;
612 	  nthr->ts.team_id = i;
613 	  nthr->ts.level = team->prev_ts.level + 1;
614 	  nthr->ts.active_level = thr->ts.active_level;
615 	  nthr->ts.place_partition_off = place_partition_off;
616 	  nthr->ts.place_partition_len = place_partition_len;
617 #ifdef HAVE_SYNC_BUILTINS
618 	  nthr->ts.single_count = 0;
619 #endif
620 	  nthr->ts.static_trip = 0;
621 	  nthr->task = &team->implicit_task[i];
622 	  nthr->place = place;
623 	  gomp_init_task (nthr->task, task, icv);
624 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
625 	  team->implicit_task[i].icv.bind_var = bind_var;
626 	  nthr->fn = fn;
627 	  nthr->data = data;
628 	  team->ordered_release[i] = &nthr->release;
629 	}
630 
631       if (__builtin_expect (affinity_thr != NULL, 0))
632 	{
633 	  /* If AFFINITY_THR is non-NULL just because we had to
634 	     permute some threads in the pool, but we've managed
635 	     to find exactly as many old threads as we'd find
636 	     without affinity, we don't need to handle this
637 	     specially anymore.  */
638 	  if (nthreads <= old_threads_used
639 	      ? (affinity_count == old_threads_used - nthreads)
640 	      : (i == old_threads_used))
641 	    {
642 	      if (team->prev_ts.place_partition_len > 64)
643 		free (affinity_thr);
644 	      affinity_thr = NULL;
645 	      affinity_count = 0;
646 	    }
647 	  else
648 	    {
649 	      i = 1;
650 	      /* We are going to compute the places/subpartitions
651 		 again from the beginning.  So, we need to reinitialize
652 		 vars modified by the switch (bind) above inside
653 		 of the loop, to the state they had after the initial
654 		 switch (bind).  */
655 	      switch (bind)
656 		{
657 		case omp_proc_bind_true:
658 		case omp_proc_bind_close:
659 		  if (nthreads > thr->ts.place_partition_len)
660 		    /* T > P.  S has been changed, so needs
661 		       to be recomputed.  */
662 		    s = nthreads / thr->ts.place_partition_len;
663 		  k = 1;
664 		  p = thr->place - 1;
665 		  break;
666 		case omp_proc_bind_master:
667 		  /* No vars have been changed.  */
668 		  break;
669 		case omp_proc_bind_spread:
670 		  p = thr->ts.place_partition_off;
671 		  if (k != 0)
672 		    {
673 		      /* T > P.  */
674 		      s = nthreads / team->prev_ts.place_partition_len;
675 		      k = 1;
676 		    }
677 		  break;
678 		}
679 
680 	      /* Increase the barrier threshold to make sure all new
681 		 threads and all the threads we're going to let die
682 		 arrive before the team is released.  */
683 	      if (affinity_count)
684 		gomp_simple_barrier_reinit (&pool->threads_dock,
685 					    nthreads + affinity_count);
686 	    }
687 	}
688 
689       if (i == nthreads)
690 	goto do_release;
691 
692     }
693 
694   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
695     {
696       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
697 
698       if (old_threads_used == 0)
699 	--diff;
700 
701 #ifdef HAVE_SYNC_BUILTINS
702       __sync_fetch_and_add (&gomp_managed_threads, diff);
703 #else
704       gomp_mutex_lock (&gomp_managed_threads_lock);
705       gomp_managed_threads += diff;
706       gomp_mutex_unlock (&gomp_managed_threads_lock);
707 #endif
708     }
709 
710   attr = &gomp_thread_attr;
711   if (__builtin_expect (gomp_places_list != NULL, 0))
712     {
713       size_t stacksize;
714       pthread_attr_init (&thread_attr);
715       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
716       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
717 	pthread_attr_setstacksize (&thread_attr, stacksize);
718       attr = &thread_attr;
719     }
720 
721   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
722 			    * (nthreads-i));
723 
724   /* Launch new threads.  */
725   for (; i < nthreads; ++i)
726     {
727       pthread_t pt;
728       int err;
729 
730       start_data->ts.place_partition_off = thr->ts.place_partition_off;
731       start_data->ts.place_partition_len = thr->ts.place_partition_len;
732       start_data->place = 0;
733       if (__builtin_expect (gomp_places_list != NULL, 0))
734 	{
735 	  switch (bind)
736 	    {
737 	    case omp_proc_bind_true:
738 	    case omp_proc_bind_close:
739 	      if (k == s)
740 		{
741 		  ++p;
742 		  if (p == (team->prev_ts.place_partition_off
743 			    + team->prev_ts.place_partition_len))
744 		    p = team->prev_ts.place_partition_off;
745 		  k = 1;
746 		  if (i == nthreads - rest)
747 		    s = 1;
748 		}
749 	      else
750 		++k;
751 	      break;
752 	    case omp_proc_bind_master:
753 	      break;
754 	    case omp_proc_bind_spread:
755 	      if (k == 0)
756 		{
757 		  /* T <= P.  */
758 		  if (p < rest)
759 		    p += s + 1;
760 		  else
761 		    p += s;
762 		  if (p == (team->prev_ts.place_partition_off
763 			    + team->prev_ts.place_partition_len))
764 		    p = team->prev_ts.place_partition_off;
765 		  start_data->ts.place_partition_off = p;
766 		  if (p < rest)
767 		    start_data->ts.place_partition_len = s + 1;
768 		  else
769 		    start_data->ts.place_partition_len = s;
770 		}
771 	      else
772 		{
773 		  /* T > P.  */
774 		  if (k == s)
775 		    {
776 		      ++p;
777 		      if (p == (team->prev_ts.place_partition_off
778 				+ team->prev_ts.place_partition_len))
779 			p = team->prev_ts.place_partition_off;
780 		      k = 1;
781 		      if (i == nthreads - rest)
782 			s = 1;
783 		    }
784 		  else
785 		    ++k;
786 		  start_data->ts.place_partition_off = p;
787 		  start_data->ts.place_partition_len = 1;
788 		}
789 	      break;
790 	    }
791 	  start_data->place = p + 1;
792 	  if (affinity_thr != NULL && pool->threads[i] != NULL)
793 	    continue;
794 	  gomp_init_thread_affinity (attr, p);
795 	}
796 
797       start_data->fn = fn;
798       start_data->fn_data = data;
799       start_data->ts.team = team;
800       start_data->ts.work_share = &team->work_shares[0];
801       start_data->ts.last_work_share = NULL;
802       start_data->ts.team_id = i;
803       start_data->ts.level = team->prev_ts.level + 1;
804       start_data->ts.active_level = thr->ts.active_level;
805 #ifdef HAVE_SYNC_BUILTINS
806       start_data->ts.single_count = 0;
807 #endif
808       start_data->ts.static_trip = 0;
809       start_data->task = &team->implicit_task[i];
810       gomp_init_task (start_data->task, task, icv);
811       team->implicit_task[i].icv.nthreads_var = nthreads_var;
812       team->implicit_task[i].icv.bind_var = bind_var;
813       start_data->thread_pool = pool;
814       start_data->nested = nested;
815 
816       attr = gomp_adjust_thread_attr (attr, &thread_attr);
817       err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
818       if (err != 0)
819 	gomp_fatal ("Thread creation failed: %s", strerror (err));
820     }
821 
822   if (__builtin_expect (attr == &thread_attr, 0))
823     pthread_attr_destroy (&thread_attr);
824 
825  do_release:
826   if (nested)
827     gomp_barrier_wait (&team->barrier);
828   else
829     gomp_simple_barrier_wait (&pool->threads_dock);
830 
831   /* Decrease the barrier threshold to match the number of threads
832      that should arrive back at the end of this team.  The extra
833      threads should be exiting.  Note that we arrange for this test
834      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
835      the barrier as well as gomp_managed_threads was temporarily
836      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
837      AFFINITY_COUNT if non-zero will be always at least
838      OLD_THREADS_COUNT - NTHREADS.  */
839   if (__builtin_expect (nthreads < old_threads_used, 0)
840       || __builtin_expect (affinity_count, 0))
841     {
842       long diff = (long) nthreads - (long) old_threads_used;
843 
844       if (affinity_count)
845 	diff = -affinity_count;
846 
847       gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
848 
849 #ifdef HAVE_SYNC_BUILTINS
850       __sync_fetch_and_add (&gomp_managed_threads, diff);
851 #else
852       gomp_mutex_lock (&gomp_managed_threads_lock);
853       gomp_managed_threads += diff;
854       gomp_mutex_unlock (&gomp_managed_threads_lock);
855 #endif
856     }
857   if (__builtin_expect (affinity_thr != NULL, 0)
858       && team->prev_ts.place_partition_len > 64)
859     free (affinity_thr);
860 }
861 #endif
862 
863 
864 /* Terminate the current team.  This is only to be called by the master
865    thread.  We assume that we must wait for the other threads.  */
866 
867 void
868 gomp_team_end (void)
869 {
870   struct gomp_thread *thr = gomp_thread ();
871   struct gomp_team *team = thr->ts.team;
872 
873   /* This barrier handles all pending explicit threads.
874      As #pragma omp cancel parallel might get awaited count in
875      team->barrier in a inconsistent state, we need to use a different
876      counter here.  */
877   gomp_team_barrier_wait_final (&team->barrier);
878   if (__builtin_expect (team->team_cancelled, 0))
879     {
880       struct gomp_work_share *ws = team->work_shares_to_free;
881       do
882 	{
883 	  struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
884 	  if (next_ws == NULL)
885 	    gomp_ptrlock_set (&ws->next_ws, ws);
886 	  gomp_fini_work_share (ws);
887 	  ws = next_ws;
888 	}
889       while (ws != NULL);
890     }
891   else
892     gomp_fini_work_share (thr->ts.work_share);
893 
894   gomp_end_task ();
895   thr->ts = team->prev_ts;
896 
897   if (__builtin_expect (thr->ts.team != NULL, 0))
898     {
899 #ifdef HAVE_SYNC_BUILTINS
900       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
901 #else
902       gomp_mutex_lock (&gomp_managed_threads_lock);
903       gomp_managed_threads -= team->nthreads - 1L;
904       gomp_mutex_unlock (&gomp_managed_threads_lock);
905 #endif
906       /* This barrier has gomp_barrier_wait_last counterparts
907 	 and ensures the team can be safely destroyed.  */
908       gomp_barrier_wait (&team->barrier);
909     }
910 
911   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
912     {
913       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
914       do
915 	{
916 	  struct gomp_work_share *next_ws = ws->next_alloc;
917 	  free (ws);
918 	  ws = next_ws;
919 	}
920       while (ws != NULL);
921     }
922   gomp_sem_destroy (&team->master_release);
923 
924   if (__builtin_expect (thr->ts.team != NULL, 0)
925       || __builtin_expect (team->nthreads == 1, 0))
926     free_team (team);
927   else
928     {
929       struct gomp_thread_pool *pool = thr->thread_pool;
930       if (pool->last_team)
931 	free_team (pool->last_team);
932       pool->last_team = team;
933       gomp_release_thread_pool (pool);
934     }
935 }
936 
937 #ifdef LIBGOMP_USE_PTHREADS
938 
939 /* Constructors for this file.  */
940 
941 static void __attribute__((constructor))
942 initialize_team (void)
943 {
944 #if !defined HAVE_TLS && !defined USE_EMUTLS
945   static struct gomp_thread initial_thread_tls_data;
946 
947   pthread_key_create (&gomp_tls_key, NULL);
948   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
949 #endif
950 
951   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
952     gomp_fatal ("could not create thread pool destructor.");
953 }
954 
955 static void __attribute__((destructor))
956 team_destructor (void)
957 {
958   /* Without this dlclose on libgomp could lead to subsequent
959      crashes.  */
960   pthread_key_delete (gomp_thread_destructor);
961 }
962 #endif
963 
964 struct gomp_task_icv *
965 gomp_new_icv (void)
966 {
967   struct gomp_thread *thr = gomp_thread ();
968   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
969   gomp_init_task (task, NULL, &gomp_global_icv);
970   thr->task = task;
971 #ifdef LIBGOMP_USE_PTHREADS
972   pthread_setspecific (gomp_thread_destructor, thr);
973 #endif
974   return &task->icv;
975 }
976