1 /* Copyright (C) 2005-2014 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU OpenMP Library (libgomp).
5 
6    Libgomp is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14    more details.
15 
16    Under Section 7 of GPL version 3, you are granted additional
17    permissions described in the GCC Runtime Library Exception, version
18    3.1, as published by the Free Software Foundation.
19 
20    You should have received a copy of the GNU General Public License and
21    a copy of the GCC Runtime Library Exception along with this program;
22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23    <http://www.gnu.org/licenses/>.  */
24 
25 /* This file handles the maintainence of threads in response to team
26    creation and termination.  */
27 
28 #include "libgomp.h"
29 #include <stdlib.h>
30 #include <string.h>
31 
32 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
33 pthread_attr_t gomp_thread_attr;
34 
35 /* This key is for the thread destructor.  */
36 pthread_key_t gomp_thread_destructor;
37 
38 
39 /* This is the libgomp per-thread data structure.  */
40 #ifdef HAVE_TLS
41 __thread struct gomp_thread gomp_tls_data;
42 #else
43 pthread_key_t gomp_tls_key;
44 #endif
45 
46 
47 /* This structure is used to communicate across pthread_create.  */
48 
49 struct gomp_thread_start_data
50 {
51   void (*fn) (void *);
52   void *fn_data;
53   struct gomp_team_state ts;
54   struct gomp_task *task;
55   struct gomp_thread_pool *thread_pool;
56   unsigned int place;
57   bool nested;
58 };
59 
60 
61 /* This function is a pthread_create entry point.  This contains the idle
62    loop in which a thread waits to be called up to become part of a team.  */
63 
64 static void *
gomp_thread_start(void * xdata)65 gomp_thread_start (void *xdata)
66 {
67   struct gomp_thread_start_data *data = xdata;
68   struct gomp_thread *thr;
69   struct gomp_thread_pool *pool;
70   void (*local_fn) (void *);
71   void *local_data;
72 
73 #ifdef HAVE_TLS
74   thr = &gomp_tls_data;
75 #else
76   struct gomp_thread local_thr;
77   thr = &local_thr;
78   pthread_setspecific (gomp_tls_key, thr);
79 #endif
80   gomp_sem_init (&thr->release, 0);
81 
82   /* Extract what we need from data.  */
83   local_fn = data->fn;
84   local_data = data->fn_data;
85   thr->thread_pool = data->thread_pool;
86   thr->ts = data->ts;
87   thr->task = data->task;
88   thr->place = data->place;
89 
90   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
91 
92   /* Make thread pool local. */
93   pool = thr->thread_pool;
94 
95   if (data->nested)
96     {
97       struct gomp_team *team = thr->ts.team;
98       struct gomp_task *task = thr->task;
99 
100       gomp_barrier_wait (&team->barrier);
101 
102       local_fn (local_data);
103       gomp_team_barrier_wait_final (&team->barrier);
104       gomp_finish_task (task);
105       gomp_barrier_wait_last (&team->barrier);
106     }
107   else
108     {
109       pool->threads[thr->ts.team_id] = thr;
110 
111       gomp_barrier_wait (&pool->threads_dock);
112       do
113 	{
114 	  struct gomp_team *team = thr->ts.team;
115 	  struct gomp_task *task = thr->task;
116 
117 	  local_fn (local_data);
118 	  gomp_team_barrier_wait_final (&team->barrier);
119 	  gomp_finish_task (task);
120 
121 	  gomp_barrier_wait (&pool->threads_dock);
122 
123 	  local_fn = thr->fn;
124 	  local_data = thr->data;
125 	  thr->fn = NULL;
126 	}
127       while (local_fn);
128     }
129 
130   gomp_sem_destroy (&thr->release);
131   thr->thread_pool = NULL;
132   thr->task = NULL;
133   return NULL;
134 }
135 
136 
137 /* Create a new team data structure.  */
138 
139 struct gomp_team *
gomp_new_team(unsigned nthreads)140 gomp_new_team (unsigned nthreads)
141 {
142   struct gomp_team *team;
143   size_t size;
144   int i;
145 
146   size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
147 				      + sizeof (team->implicit_task[0]));
148   team = gomp_malloc (size);
149 
150   team->work_share_chunk = 8;
151 #ifdef HAVE_SYNC_BUILTINS
152   team->single_count = 0;
153 #else
154   gomp_mutex_init (&team->work_share_list_free_lock);
155 #endif
156   team->work_shares_to_free = &team->work_shares[0];
157   gomp_init_work_share (&team->work_shares[0], false, nthreads);
158   team->work_shares[0].next_alloc = NULL;
159   team->work_share_list_free = NULL;
160   team->work_share_list_alloc = &team->work_shares[1];
161   for (i = 1; i < 7; i++)
162     team->work_shares[i].next_free = &team->work_shares[i + 1];
163   team->work_shares[i].next_free = NULL;
164 
165   team->nthreads = nthreads;
166   gomp_barrier_init (&team->barrier, nthreads);
167 
168   gomp_sem_init (&team->master_release, 0);
169   team->ordered_release = (void *) &team->implicit_task[nthreads];
170   team->ordered_release[0] = &team->master_release;
171 
172   gomp_mutex_init (&team->task_lock);
173   team->task_queue = NULL;
174   team->task_count = 0;
175   team->task_queued_count = 0;
176   team->task_running_count = 0;
177   team->work_share_cancelled = 0;
178   team->team_cancelled = 0;
179 
180   return team;
181 }
182 
183 
184 /* Free a team data structure.  */
185 
186 static void
free_team(struct gomp_team * team)187 free_team (struct gomp_team *team)
188 {
189   gomp_barrier_destroy (&team->barrier);
190   gomp_mutex_destroy (&team->task_lock);
191   free (team);
192 }
193 
194 /* Allocate and initialize a thread pool. */
195 
gomp_new_thread_pool(void)196 static struct gomp_thread_pool *gomp_new_thread_pool (void)
197 {
198   struct gomp_thread_pool *pool
199     = gomp_malloc (sizeof(struct gomp_thread_pool));
200   pool->threads = NULL;
201   pool->threads_size = 0;
202   pool->threads_used = 0;
203   pool->last_team = NULL;
204   return pool;
205 }
206 
207 static void
gomp_free_pool_helper(void * thread_pool)208 gomp_free_pool_helper (void *thread_pool)
209 {
210   struct gomp_thread *thr = gomp_thread ();
211   struct gomp_thread_pool *pool
212     = (struct gomp_thread_pool *) thread_pool;
213   gomp_barrier_wait_last (&pool->threads_dock);
214   gomp_sem_destroy (&thr->release);
215   thr->thread_pool = NULL;
216   thr->task = NULL;
217   pthread_exit (NULL);
218 }
219 
220 /* Free a thread pool and release its threads. */
221 
222 void
gomp_free_thread(void * arg)223 gomp_free_thread (void *arg __attribute__((unused)))
224 {
225   struct gomp_thread *thr = gomp_thread ();
226   struct gomp_thread_pool *pool = thr->thread_pool;
227   if (pool)
228     {
229       if (pool->threads_used > 0)
230 	{
231 	  int i;
232 	  for (i = 1; i < pool->threads_used; i++)
233 	    {
234 	      struct gomp_thread *nthr = pool->threads[i];
235 	      nthr->fn = gomp_free_pool_helper;
236 	      nthr->data = pool;
237 	    }
238 	  /* This barrier undocks threads docked on pool->threads_dock.  */
239 	  gomp_barrier_wait (&pool->threads_dock);
240 	  /* And this waits till all threads have called gomp_barrier_wait_last
241 	     in gomp_free_pool_helper.  */
242 	  gomp_barrier_wait (&pool->threads_dock);
243 	  /* Now it is safe to destroy the barrier and free the pool.  */
244 	  gomp_barrier_destroy (&pool->threads_dock);
245 
246 #ifdef HAVE_SYNC_BUILTINS
247 	  __sync_fetch_and_add (&gomp_managed_threads,
248 				1L - pool->threads_used);
249 #else
250 	  gomp_mutex_lock (&gomp_managed_threads_lock);
251 	  gomp_managed_threads -= pool->threads_used - 1L;
252 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
253 #endif
254 	}
255       free (pool->threads);
256       if (pool->last_team)
257 	free_team (pool->last_team);
258       free (pool);
259       thr->thread_pool = NULL;
260     }
261   if (thr->task != NULL)
262     {
263       struct gomp_task *task = thr->task;
264       gomp_end_task ();
265       free (task);
266     }
267 }
268 
269 /* Launch a team.  */
270 
271 void
gomp_team_start(void (* fn)(void *),void * data,unsigned nthreads,unsigned flags,struct gomp_team * team)272 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
273 		 unsigned flags, struct gomp_team *team)
274 {
275   struct gomp_thread_start_data *start_data;
276   struct gomp_thread *thr, *nthr;
277   struct gomp_task *task;
278   struct gomp_task_icv *icv;
279   bool nested;
280   struct gomp_thread_pool *pool;
281   unsigned i, n, old_threads_used = 0;
282   pthread_attr_t thread_attr, *attr;
283   unsigned long nthreads_var;
284   char bind, bind_var;
285   unsigned int s = 0, rest = 0, p = 0, k = 0;
286   unsigned int affinity_count = 0;
287   struct gomp_thread **affinity_thr = NULL;
288 
289   thr = gomp_thread ();
290   nested = thr->ts.team != NULL;
291   if (__builtin_expect (thr->thread_pool == NULL, 0))
292     {
293       thr->thread_pool = gomp_new_thread_pool ();
294       thr->thread_pool->threads_busy = nthreads;
295       pthread_setspecific (gomp_thread_destructor, thr);
296     }
297   pool = thr->thread_pool;
298   task = thr->task;
299   icv = task ? &task->icv : &gomp_global_icv;
300   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
301     gomp_init_affinity ();
302 
303   /* Always save the previous state, even if this isn't a nested team.
304      In particular, we should save any work share state from an outer
305      orphaned work share construct.  */
306   team->prev_ts = thr->ts;
307 
308   thr->ts.team = team;
309   thr->ts.team_id = 0;
310   ++thr->ts.level;
311   if (nthreads > 1)
312     ++thr->ts.active_level;
313   thr->ts.work_share = &team->work_shares[0];
314   thr->ts.last_work_share = NULL;
315 #ifdef HAVE_SYNC_BUILTINS
316   thr->ts.single_count = 0;
317 #endif
318   thr->ts.static_trip = 0;
319   thr->task = &team->implicit_task[0];
320   nthreads_var = icv->nthreads_var;
321   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
322       && thr->ts.level < gomp_nthreads_var_list_len)
323     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
324   bind_var = icv->bind_var;
325   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
326     bind_var = flags & 7;
327   bind = bind_var;
328   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
329       && thr->ts.level < gomp_bind_var_list_len)
330     bind_var = gomp_bind_var_list[thr->ts.level];
331   gomp_init_task (thr->task, task, icv);
332   team->implicit_task[0].icv.nthreads_var = nthreads_var;
333   team->implicit_task[0].icv.bind_var = bind_var;
334 
335   if (nthreads == 1)
336     return;
337 
338   i = 1;
339 
340   if (__builtin_expect (gomp_places_list != NULL, 0))
341     {
342       /* Depending on chosen proc_bind model, set subpartition
343 	 for the master thread and initialize helper variables
344 	 P and optionally S, K and/or REST used by later place
345 	 computation for each additional thread.  */
346       p = thr->place - 1;
347       switch (bind)
348 	{
349 	case omp_proc_bind_true:
350 	case omp_proc_bind_close:
351 	  if (nthreads > thr->ts.place_partition_len)
352 	    {
353 	      /* T > P.  S threads will be placed in each place,
354 		 and the final REM threads placed one by one
355 		 into the already occupied places.  */
356 	      s = nthreads / thr->ts.place_partition_len;
357 	      rest = nthreads % thr->ts.place_partition_len;
358 	    }
359 	  else
360 	    s = 1;
361 	  k = 1;
362 	  break;
363 	case omp_proc_bind_master:
364 	  /* Each thread will be bound to master's place.  */
365 	  break;
366 	case omp_proc_bind_spread:
367 	  if (nthreads <= thr->ts.place_partition_len)
368 	    {
369 	      /* T <= P.  Each subpartition will have in between s
370 		 and s+1 places (subpartitions starting at or
371 		 after rest will have s places, earlier s+1 places),
372 		 each thread will be bound to the first place in
373 		 its subpartition (except for the master thread
374 		 that can be bound to another place in its
375 		 subpartition).  */
376 	      s = thr->ts.place_partition_len / nthreads;
377 	      rest = thr->ts.place_partition_len % nthreads;
378 	      rest = (s + 1) * rest + thr->ts.place_partition_off;
379 	      if (p < rest)
380 		{
381 		  p -= (p - thr->ts.place_partition_off) % (s + 1);
382 		  thr->ts.place_partition_len = s + 1;
383 		}
384 	      else
385 		{
386 		  p -= (p - rest) % s;
387 		  thr->ts.place_partition_len = s;
388 		}
389 	      thr->ts.place_partition_off = p;
390 	    }
391 	  else
392 	    {
393 	      /* T > P.  Each subpartition will have just a single
394 		 place and we'll place between s and s+1
395 		 threads into each subpartition.  */
396 	      s = nthreads / thr->ts.place_partition_len;
397 	      rest = nthreads % thr->ts.place_partition_len;
398 	      thr->ts.place_partition_off = p;
399 	      thr->ts.place_partition_len = 1;
400 	      k = 1;
401 	    }
402 	  break;
403 	}
404     }
405   else
406     bind = omp_proc_bind_false;
407 
408   /* We only allow the reuse of idle threads for non-nested PARALLEL
409      regions.  This appears to be implied by the semantics of
410      threadprivate variables, but perhaps that's reading too much into
411      things.  Certainly it does prevent any locking problems, since
412      only the initial program thread will modify gomp_threads.  */
413   if (!nested)
414     {
415       old_threads_used = pool->threads_used;
416 
417       if (nthreads <= old_threads_used)
418 	n = nthreads;
419       else if (old_threads_used == 0)
420 	{
421 	  n = 0;
422 	  gomp_barrier_init (&pool->threads_dock, nthreads);
423 	}
424       else
425 	{
426 	  n = old_threads_used;
427 
428 	  /* Increase the barrier threshold to make sure all new
429 	     threads arrive before the team is released.  */
430 	  gomp_barrier_reinit (&pool->threads_dock, nthreads);
431 	}
432 
433       /* Not true yet, but soon will be.  We're going to release all
434 	 threads from the dock, and those that aren't part of the
435 	 team will exit.  */
436       pool->threads_used = nthreads;
437 
438       /* If necessary, expand the size of the gomp_threads array.  It is
439 	 expected that changes in the number of threads are rare, thus we
440 	 make no effort to expand gomp_threads_size geometrically.  */
441       if (nthreads >= pool->threads_size)
442 	{
443 	  pool->threads_size = nthreads + 1;
444 	  pool->threads
445 	    = gomp_realloc (pool->threads,
446 			    pool->threads_size
447 			    * sizeof (struct gomp_thread_data *));
448 	}
449 
450       /* Release existing idle threads.  */
451       for (; i < n; ++i)
452 	{
453 	  unsigned int place_partition_off = thr->ts.place_partition_off;
454 	  unsigned int place_partition_len = thr->ts.place_partition_len;
455 	  unsigned int place = 0;
456 	  if (__builtin_expect (gomp_places_list != NULL, 0))
457 	    {
458 	      switch (bind)
459 		{
460 		case omp_proc_bind_true:
461 		case omp_proc_bind_close:
462 		  if (k == s)
463 		    {
464 		      ++p;
465 		      if (p == (team->prev_ts.place_partition_off
466 				+ team->prev_ts.place_partition_len))
467 			p = team->prev_ts.place_partition_off;
468 		      k = 1;
469 		      if (i == nthreads - rest)
470 			s = 1;
471 		    }
472 		  else
473 		    ++k;
474 		  break;
475 		case omp_proc_bind_master:
476 		  break;
477 		case omp_proc_bind_spread:
478 		  if (k == 0)
479 		    {
480 		      /* T <= P.  */
481 		      if (p < rest)
482 			p += s + 1;
483 		      else
484 			p += s;
485 		      if (p == (team->prev_ts.place_partition_off
486 				+ team->prev_ts.place_partition_len))
487 			p = team->prev_ts.place_partition_off;
488 		      place_partition_off = p;
489 		      if (p < rest)
490 			place_partition_len = s + 1;
491 		      else
492 			place_partition_len = s;
493 		    }
494 		  else
495 		    {
496 		      /* T > P.  */
497 		      if (k == s)
498 			{
499 			  ++p;
500 			  if (p == (team->prev_ts.place_partition_off
501 				    + team->prev_ts.place_partition_len))
502 			    p = team->prev_ts.place_partition_off;
503 			  k = 1;
504 			  if (i == nthreads - rest)
505 			    s = 1;
506 			}
507 		      else
508 			++k;
509 		      place_partition_off = p;
510 		      place_partition_len = 1;
511 		    }
512 		  break;
513 		}
514 	      if (affinity_thr != NULL
515 		  || (bind != omp_proc_bind_true
516 		      && pool->threads[i]->place != p + 1)
517 		  || pool->threads[i]->place <= place_partition_off
518 		  || pool->threads[i]->place > (place_partition_off
519 						+ place_partition_len))
520 		{
521 		  unsigned int l;
522 		  if (affinity_thr == NULL)
523 		    {
524 		      unsigned int j;
525 
526 		      if (team->prev_ts.place_partition_len > 64)
527 			affinity_thr
528 			  = gomp_malloc (team->prev_ts.place_partition_len
529 					 * sizeof (struct gomp_thread *));
530 		      else
531 			affinity_thr
532 			  = gomp_alloca (team->prev_ts.place_partition_len
533 					 * sizeof (struct gomp_thread *));
534 		      memset (affinity_thr, '\0',
535 			      team->prev_ts.place_partition_len
536 			      * sizeof (struct gomp_thread *));
537 		      for (j = i; j < old_threads_used; j++)
538 			{
539 			  if (pool->threads[j]->place
540 			      > team->prev_ts.place_partition_off
541 			      && (pool->threads[j]->place
542 				  <= (team->prev_ts.place_partition_off
543 				      + team->prev_ts.place_partition_len)))
544 			    {
545 			      l = pool->threads[j]->place - 1
546 				  - team->prev_ts.place_partition_off;
547 			      pool->threads[j]->data = affinity_thr[l];
548 			      affinity_thr[l] = pool->threads[j];
549 			    }
550 			  pool->threads[j] = NULL;
551 			}
552 		      if (nthreads > old_threads_used)
553 			memset (&pool->threads[old_threads_used],
554 				'\0', ((nthreads - old_threads_used)
555 				       * sizeof (struct gomp_thread *)));
556 		      n = nthreads;
557 		      affinity_count = old_threads_used - i;
558 		    }
559 		  if (affinity_count == 0)
560 		    break;
561 		  l = p;
562 		  if (affinity_thr[l - team->prev_ts.place_partition_off]
563 		      == NULL)
564 		    {
565 		      if (bind != omp_proc_bind_true)
566 			continue;
567 		      for (l = place_partition_off;
568 			   l < place_partition_off + place_partition_len;
569 			   l++)
570 			if (affinity_thr[l - team->prev_ts.place_partition_off]
571 			    != NULL)
572 			  break;
573 		      if (l == place_partition_off + place_partition_len)
574 			continue;
575 		    }
576 		  nthr = affinity_thr[l - team->prev_ts.place_partition_off];
577 		  affinity_thr[l - team->prev_ts.place_partition_off]
578 		    = (struct gomp_thread *) nthr->data;
579 		  affinity_count--;
580 		  pool->threads[i] = nthr;
581 		}
582 	      else
583 		nthr = pool->threads[i];
584 	      place = p + 1;
585 	    }
586 	  else
587 	    nthr = pool->threads[i];
588 	  nthr->ts.team = team;
589 	  nthr->ts.work_share = &team->work_shares[0];
590 	  nthr->ts.last_work_share = NULL;
591 	  nthr->ts.team_id = i;
592 	  nthr->ts.level = team->prev_ts.level + 1;
593 	  nthr->ts.active_level = thr->ts.active_level;
594 	  nthr->ts.place_partition_off = place_partition_off;
595 	  nthr->ts.place_partition_len = place_partition_len;
596 #ifdef HAVE_SYNC_BUILTINS
597 	  nthr->ts.single_count = 0;
598 #endif
599 	  nthr->ts.static_trip = 0;
600 	  nthr->task = &team->implicit_task[i];
601 	  nthr->place = place;
602 	  gomp_init_task (nthr->task, task, icv);
603 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
604 	  team->implicit_task[i].icv.bind_var = bind_var;
605 	  nthr->fn = fn;
606 	  nthr->data = data;
607 	  team->ordered_release[i] = &nthr->release;
608 	}
609 
610       if (__builtin_expect (affinity_thr != NULL, 0))
611 	{
612 	  /* If AFFINITY_THR is non-NULL just because we had to
613 	     permute some threads in the pool, but we've managed
614 	     to find exactly as many old threads as we'd find
615 	     without affinity, we don't need to handle this
616 	     specially anymore.  */
617 	  if (nthreads <= old_threads_used
618 	      ? (affinity_count == old_threads_used - nthreads)
619 	      : (i == old_threads_used))
620 	    {
621 	      if (team->prev_ts.place_partition_len > 64)
622 		free (affinity_thr);
623 	      affinity_thr = NULL;
624 	      affinity_count = 0;
625 	    }
626 	  else
627 	    {
628 	      i = 1;
629 	      /* We are going to compute the places/subpartitions
630 		 again from the beginning.  So, we need to reinitialize
631 		 vars modified by the switch (bind) above inside
632 		 of the loop, to the state they had after the initial
633 		 switch (bind).  */
634 	      switch (bind)
635 		{
636 		case omp_proc_bind_true:
637 		case omp_proc_bind_close:
638 		  if (nthreads > thr->ts.place_partition_len)
639 		    /* T > P.  S has been changed, so needs
640 		       to be recomputed.  */
641 		    s = nthreads / thr->ts.place_partition_len;
642 		  k = 1;
643 		  p = thr->place - 1;
644 		  break;
645 		case omp_proc_bind_master:
646 		  /* No vars have been changed.  */
647 		  break;
648 		case omp_proc_bind_spread:
649 		  p = thr->ts.place_partition_off;
650 		  if (k != 0)
651 		    {
652 		      /* T > P.  */
653 		      s = nthreads / team->prev_ts.place_partition_len;
654 		      k = 1;
655 		    }
656 		  break;
657 		}
658 
659 	      /* Increase the barrier threshold to make sure all new
660 		 threads and all the threads we're going to let die
661 		 arrive before the team is released.  */
662 	      if (affinity_count)
663 		gomp_barrier_reinit (&pool->threads_dock,
664 				     nthreads + affinity_count);
665 	    }
666 	}
667 
668       if (i == nthreads)
669 	goto do_release;
670 
671     }
672 
673   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
674     {
675       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
676 
677       if (old_threads_used == 0)
678 	--diff;
679 
680 #ifdef HAVE_SYNC_BUILTINS
681       __sync_fetch_and_add (&gomp_managed_threads, diff);
682 #else
683       gomp_mutex_lock (&gomp_managed_threads_lock);
684       gomp_managed_threads += diff;
685       gomp_mutex_unlock (&gomp_managed_threads_lock);
686 #endif
687     }
688 
689   attr = &gomp_thread_attr;
690   if (__builtin_expect (gomp_places_list != NULL, 0))
691     {
692       size_t stacksize;
693       pthread_attr_init (&thread_attr);
694       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
695       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
696 	pthread_attr_setstacksize (&thread_attr, stacksize);
697       attr = &thread_attr;
698     }
699 
700   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
701 			    * (nthreads-i));
702 
703   /* Launch new threads.  */
704   for (; i < nthreads; ++i)
705     {
706       pthread_t pt;
707       int err;
708 
709       start_data->ts.place_partition_off = thr->ts.place_partition_off;
710       start_data->ts.place_partition_len = thr->ts.place_partition_len;
711       start_data->place = 0;
712       if (__builtin_expect (gomp_places_list != NULL, 0))
713 	{
714 	  switch (bind)
715 	    {
716 	    case omp_proc_bind_true:
717 	    case omp_proc_bind_close:
718 	      if (k == s)
719 		{
720 		  ++p;
721 		  if (p == (team->prev_ts.place_partition_off
722 			    + team->prev_ts.place_partition_len))
723 		    p = team->prev_ts.place_partition_off;
724 		  k = 1;
725 		  if (i == nthreads - rest)
726 		    s = 1;
727 		}
728 	      else
729 		++k;
730 	      break;
731 	    case omp_proc_bind_master:
732 	      break;
733 	    case omp_proc_bind_spread:
734 	      if (k == 0)
735 		{
736 		  /* T <= P.  */
737 		  if (p < rest)
738 		    p += s + 1;
739 		  else
740 		    p += s;
741 		  if (p == (team->prev_ts.place_partition_off
742 			    + team->prev_ts.place_partition_len))
743 		    p = team->prev_ts.place_partition_off;
744 		  start_data->ts.place_partition_off = p;
745 		  if (p < rest)
746 		    start_data->ts.place_partition_len = s + 1;
747 		  else
748 		    start_data->ts.place_partition_len = s;
749 		}
750 	      else
751 		{
752 		  /* T > P.  */
753 		  if (k == s)
754 		    {
755 		      ++p;
756 		      if (p == (team->prev_ts.place_partition_off
757 				+ team->prev_ts.place_partition_len))
758 			p = team->prev_ts.place_partition_off;
759 		      k = 1;
760 		      if (i == nthreads - rest)
761 			s = 1;
762 		    }
763 		  else
764 		    ++k;
765 		  start_data->ts.place_partition_off = p;
766 		  start_data->ts.place_partition_len = 1;
767 		}
768 	      break;
769 	    }
770 	  start_data->place = p + 1;
771 	  if (affinity_thr != NULL && pool->threads[i] != NULL)
772 	    continue;
773 	  gomp_init_thread_affinity (attr, p);
774 	}
775 
776       start_data->fn = fn;
777       start_data->fn_data = data;
778       start_data->ts.team = team;
779       start_data->ts.work_share = &team->work_shares[0];
780       start_data->ts.last_work_share = NULL;
781       start_data->ts.team_id = i;
782       start_data->ts.level = team->prev_ts.level + 1;
783       start_data->ts.active_level = thr->ts.active_level;
784 #ifdef HAVE_SYNC_BUILTINS
785       start_data->ts.single_count = 0;
786 #endif
787       start_data->ts.static_trip = 0;
788       start_data->task = &team->implicit_task[i];
789       gomp_init_task (start_data->task, task, icv);
790       team->implicit_task[i].icv.nthreads_var = nthreads_var;
791       team->implicit_task[i].icv.bind_var = bind_var;
792       start_data->thread_pool = pool;
793       start_data->nested = nested;
794 
795       err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
796       if (err != 0)
797 	gomp_fatal ("Thread creation failed: %s", strerror (err));
798     }
799 
800   if (__builtin_expect (gomp_places_list != NULL, 0))
801     pthread_attr_destroy (&thread_attr);
802 
803  do_release:
804   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
805 
806   /* Decrease the barrier threshold to match the number of threads
807      that should arrive back at the end of this team.  The extra
808      threads should be exiting.  Note that we arrange for this test
809      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
810      the barrier as well as gomp_managed_threads was temporarily
811      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
812      AFFINITY_COUNT if non-zero will be always at least
813      OLD_THREADS_COUNT - NTHREADS.  */
814   if (__builtin_expect (nthreads < old_threads_used, 0)
815       || __builtin_expect (affinity_count, 0))
816     {
817       long diff = (long) nthreads - (long) old_threads_used;
818 
819       if (affinity_count)
820 	diff = -affinity_count;
821 
822       gomp_barrier_reinit (&pool->threads_dock, nthreads);
823 
824 #ifdef HAVE_SYNC_BUILTINS
825       __sync_fetch_and_add (&gomp_managed_threads, diff);
826 #else
827       gomp_mutex_lock (&gomp_managed_threads_lock);
828       gomp_managed_threads += diff;
829       gomp_mutex_unlock (&gomp_managed_threads_lock);
830 #endif
831     }
832   if (__builtin_expect (affinity_thr != NULL, 0)
833       && team->prev_ts.place_partition_len > 64)
834     free (affinity_thr);
835 }
836 
837 
838 /* Terminate the current team.  This is only to be called by the master
839    thread.  We assume that we must wait for the other threads.  */
840 
841 void
gomp_team_end(void)842 gomp_team_end (void)
843 {
844   struct gomp_thread *thr = gomp_thread ();
845   struct gomp_team *team = thr->ts.team;
846 
847   /* This barrier handles all pending explicit threads.
848      As #pragma omp cancel parallel might get awaited count in
849      team->barrier in a inconsistent state, we need to use a different
850      counter here.  */
851   gomp_team_barrier_wait_final (&team->barrier);
852   if (__builtin_expect (team->team_cancelled, 0))
853     {
854       struct gomp_work_share *ws = team->work_shares_to_free;
855       do
856 	{
857 	  struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
858 	  if (next_ws == NULL)
859 	    gomp_ptrlock_set (&ws->next_ws, ws);
860 	  gomp_fini_work_share (ws);
861 	  ws = next_ws;
862 	}
863       while (ws != NULL);
864     }
865   else
866     gomp_fini_work_share (thr->ts.work_share);
867 
868   gomp_end_task ();
869   thr->ts = team->prev_ts;
870 
871   if (__builtin_expect (thr->ts.team != NULL, 0))
872     {
873 #ifdef HAVE_SYNC_BUILTINS
874       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
875 #else
876       gomp_mutex_lock (&gomp_managed_threads_lock);
877       gomp_managed_threads -= team->nthreads - 1L;
878       gomp_mutex_unlock (&gomp_managed_threads_lock);
879 #endif
880       /* This barrier has gomp_barrier_wait_last counterparts
881 	 and ensures the team can be safely destroyed.  */
882       gomp_barrier_wait (&team->barrier);
883     }
884 
885   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
886     {
887       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
888       do
889 	{
890 	  struct gomp_work_share *next_ws = ws->next_alloc;
891 	  free (ws);
892 	  ws = next_ws;
893 	}
894       while (ws != NULL);
895     }
896   gomp_sem_destroy (&team->master_release);
897 #ifndef HAVE_SYNC_BUILTINS
898   gomp_mutex_destroy (&team->work_share_list_free_lock);
899 #endif
900 
901   if (__builtin_expect (thr->ts.team != NULL, 0)
902       || __builtin_expect (team->nthreads == 1, 0))
903     free_team (team);
904   else
905     {
906       struct gomp_thread_pool *pool = thr->thread_pool;
907       if (pool->last_team)
908 	free_team (pool->last_team);
909       pool->last_team = team;
910     }
911 }
912 
913 
914 /* Constructors for this file.  */
915 
916 static void __attribute__((constructor))
initialize_team(void)917 initialize_team (void)
918 {
919 #ifndef HAVE_TLS
920   static struct gomp_thread initial_thread_tls_data;
921 
922   pthread_key_create (&gomp_tls_key, NULL);
923   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
924 #endif
925 
926   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
927     gomp_fatal ("could not create thread pool destructor.");
928 }
929 
930 static void __attribute__((destructor))
team_destructor(void)931 team_destructor (void)
932 {
933   /* Without this dlclose on libgomp could lead to subsequent
934      crashes.  */
935   pthread_key_delete (gomp_thread_destructor);
936 }
937 
938 struct gomp_task_icv *
gomp_new_icv(void)939 gomp_new_icv (void)
940 {
941   struct gomp_thread *thr = gomp_thread ();
942   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
943   gomp_init_task (task, NULL, &gomp_global_icv);
944   thr->task = task;
945   pthread_setspecific (gomp_thread_destructor, thr);
946   return &task->icv;
947 }
948