1 /* Copyright (C) 2005-2019 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintainence of threads in response to team
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
33 
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr;
36 
37 /* This key is for the thread destructor.  */
38 pthread_key_t gomp_thread_destructor;
39 
40 
41 /* This is the libgomp per-thread data structure.  */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
47 
48 
49 /* This structure is used to communicate across pthread_create.  */
50 
51 struct gomp_thread_start_data
52 {
53   void (*fn) (void *);
54   void *fn_data;
55   struct gomp_team_state ts;
56   struct gomp_task *task;
57   struct gomp_thread_pool *thread_pool;
58   unsigned int place;
59   bool nested;
60   pthread_t handle;
61 };
62 
63 
64 /* This function is a pthread_create entry point.  This contains the idle
65    loop in which a thread waits to be called up to become part of a team.  */
66 
67 static void *
gomp_thread_start(void * xdata)68 gomp_thread_start (void *xdata)
69 {
70   struct gomp_thread_start_data *data = xdata;
71   struct gomp_thread *thr;
72   struct gomp_thread_pool *pool;
73   void (*local_fn) (void *);
74   void *local_data;
75 
76 #if defined HAVE_TLS || defined USE_EMUTLS
77   thr = &gomp_tls_data;
78 #else
79   struct gomp_thread local_thr;
80   thr = &local_thr;
81   pthread_setspecific (gomp_tls_key, thr);
82 #endif
83   gomp_sem_init (&thr->release, 0);
84 
85   /* Extract what we need from data.  */
86   local_fn = data->fn;
87   local_data = data->fn_data;
88   thr->thread_pool = data->thread_pool;
89   thr->ts = data->ts;
90   thr->task = data->task;
91   thr->place = data->place;
92 #ifdef GOMP_NEEDS_THREAD_HANDLE
93   thr->handle = data->handle;
94 #endif
95 
96   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
97 
98   /* Make thread pool local. */
99   pool = thr->thread_pool;
100 
101   if (data->nested)
102     {
103       struct gomp_team *team = thr->ts.team;
104       struct gomp_task *task = thr->task;
105 
106       gomp_barrier_wait (&team->barrier);
107 
108       local_fn (local_data);
109       gomp_team_barrier_wait_final (&team->barrier);
110       gomp_finish_task (task);
111       gomp_barrier_wait_last (&team->barrier);
112     }
113   else
114     {
115       pool->threads[thr->ts.team_id] = thr;
116 
117       gomp_simple_barrier_wait (&pool->threads_dock);
118       do
119 	{
120 	  struct gomp_team *team = thr->ts.team;
121 	  struct gomp_task *task = thr->task;
122 
123 	  local_fn (local_data);
124 	  gomp_team_barrier_wait_final (&team->barrier);
125 	  gomp_finish_task (task);
126 
127 	  gomp_simple_barrier_wait (&pool->threads_dock);
128 
129 	  local_fn = thr->fn;
130 	  local_data = thr->data;
131 	  thr->fn = NULL;
132 	}
133       while (local_fn);
134     }
135 
136   gomp_sem_destroy (&thr->release);
137   pthread_detach (pthread_self ());
138   thr->thread_pool = NULL;
139   thr->task = NULL;
140   return NULL;
141 }
142 #endif
143 
144 static inline struct gomp_team *
get_last_team(unsigned nthreads)145 get_last_team (unsigned nthreads)
146 {
147   struct gomp_thread *thr = gomp_thread ();
148   if (thr->ts.team == NULL)
149     {
150       struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
151       struct gomp_team *last_team = pool->last_team;
152       if (last_team != NULL && last_team->nthreads == nthreads)
153         {
154           pool->last_team = NULL;
155           return last_team;
156         }
157     }
158   return NULL;
159 }
160 
161 /* Create a new team data structure.  */
162 
163 struct gomp_team *
gomp_new_team(unsigned nthreads)164 gomp_new_team (unsigned nthreads)
165 {
166   struct gomp_team *team;
167   int i;
168 
169   team = get_last_team (nthreads);
170   if (team == NULL)
171     {
172       size_t extra = sizeof (team->ordered_release[0])
173 		     + sizeof (team->implicit_task[0]);
174       team = gomp_malloc (sizeof (*team) + nthreads * extra);
175 
176 #ifndef HAVE_SYNC_BUILTINS
177       gomp_mutex_init (&team->work_share_list_free_lock);
178 #endif
179       gomp_barrier_init (&team->barrier, nthreads);
180       gomp_mutex_init (&team->task_lock);
181 
182       team->nthreads = nthreads;
183     }
184 
185   team->work_share_chunk = 8;
186 #ifdef HAVE_SYNC_BUILTINS
187   team->single_count = 0;
188 #endif
189   team->work_shares_to_free = &team->work_shares[0];
190   gomp_init_work_share (&team->work_shares[0], 0, nthreads);
191   team->work_shares[0].next_alloc = NULL;
192   team->work_share_list_free = NULL;
193   team->work_share_list_alloc = &team->work_shares[1];
194   for (i = 1; i < 7; i++)
195     team->work_shares[i].next_free = &team->work_shares[i + 1];
196   team->work_shares[i].next_free = NULL;
197 
198   gomp_sem_init (&team->master_release, 0);
199   team->ordered_release = (void *) &team->implicit_task[nthreads];
200   team->ordered_release[0] = &team->master_release;
201 
202   priority_queue_init (&team->task_queue);
203   team->task_count = 0;
204   team->task_queued_count = 0;
205   team->task_running_count = 0;
206   team->work_share_cancelled = 0;
207   team->team_cancelled = 0;
208 
209   return team;
210 }
211 
212 
213 /* Free a team data structure.  */
214 
215 static void
free_team(struct gomp_team * team)216 free_team (struct gomp_team *team)
217 {
218 #ifndef HAVE_SYNC_BUILTINS
219   gomp_mutex_destroy (&team->work_share_list_free_lock);
220 #endif
221   gomp_barrier_destroy (&team->barrier);
222   gomp_mutex_destroy (&team->task_lock);
223   priority_queue_free (&team->task_queue);
224   free (team);
225 }
226 
227 static void
gomp_free_pool_helper(void * thread_pool)228 gomp_free_pool_helper (void *thread_pool)
229 {
230   struct gomp_thread *thr = gomp_thread ();
231   struct gomp_thread_pool *pool
232     = (struct gomp_thread_pool *) thread_pool;
233   gomp_simple_barrier_wait_last (&pool->threads_dock);
234   gomp_sem_destroy (&thr->release);
235   thr->thread_pool = NULL;
236   thr->task = NULL;
237 #ifdef LIBGOMP_USE_PTHREADS
238   pthread_detach (pthread_self ());
239   pthread_exit (NULL);
240 #elif defined(__nvptx__)
241   asm ("exit;");
242 #else
243 #error gomp_free_pool_helper must terminate the thread
244 #endif
245 }
246 
247 /* Free a thread pool and release its threads. */
248 
249 void
gomp_free_thread(void * arg)250 gomp_free_thread (void *arg __attribute__((unused)))
251 {
252   struct gomp_thread *thr = gomp_thread ();
253   struct gomp_thread_pool *pool = thr->thread_pool;
254   if (pool)
255     {
256       if (pool->threads_used > 0)
257 	{
258 	  int i;
259 	  for (i = 1; i < pool->threads_used; i++)
260 	    {
261 	      struct gomp_thread *nthr = pool->threads[i];
262 	      nthr->fn = gomp_free_pool_helper;
263 	      nthr->data = pool;
264 	    }
265 	  /* This barrier undocks threads docked on pool->threads_dock.  */
266 	  gomp_simple_barrier_wait (&pool->threads_dock);
267 	  /* And this waits till all threads have called gomp_barrier_wait_last
268 	     in gomp_free_pool_helper.  */
269 	  gomp_simple_barrier_wait (&pool->threads_dock);
270 	  /* Now it is safe to destroy the barrier and free the pool.  */
271 	  gomp_simple_barrier_destroy (&pool->threads_dock);
272 
273 #ifdef HAVE_SYNC_BUILTINS
274 	  __sync_fetch_and_add (&gomp_managed_threads,
275 				1L - pool->threads_used);
276 #else
277 	  gomp_mutex_lock (&gomp_managed_threads_lock);
278 	  gomp_managed_threads -= pool->threads_used - 1L;
279 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
280 #endif
281 	}
282       if (pool->last_team)
283 	free_team (pool->last_team);
284 #ifndef __nvptx__
285       free (pool->threads);
286       free (pool);
287 #endif
288       thr->thread_pool = NULL;
289     }
290   if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
291     gomp_team_end ();
292   if (thr->task != NULL)
293     {
294       struct gomp_task *task = thr->task;
295       gomp_end_task ();
296       free (task);
297     }
298 }
299 
300 /* Launch a team.  */
301 
302 #ifdef LIBGOMP_USE_PTHREADS
303 void
gomp_team_start(void (* fn)(void *),void * data,unsigned nthreads,unsigned flags,struct gomp_team * team,struct gomp_taskgroup * taskgroup)304 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
305 		 unsigned flags, struct gomp_team *team,
306 		 struct gomp_taskgroup *taskgroup)
307 {
308   struct gomp_thread_start_data *start_data;
309   struct gomp_thread *thr, *nthr;
310   struct gomp_task *task;
311   struct gomp_task_icv *icv;
312   bool nested;
313   struct gomp_thread_pool *pool;
314   unsigned i, n, old_threads_used = 0;
315   pthread_attr_t thread_attr, *attr;
316   unsigned long nthreads_var;
317   char bind, bind_var;
318   unsigned int s = 0, rest = 0, p = 0, k = 0;
319   unsigned int affinity_count = 0;
320   struct gomp_thread **affinity_thr = NULL;
321   bool force_display = false;
322 
323   thr = gomp_thread ();
324   nested = thr->ts.level;
325   pool = thr->thread_pool;
326   task = thr->task;
327   icv = task ? &task->icv : &gomp_global_icv;
328   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
329     {
330       gomp_init_affinity ();
331       if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
332 	gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
333 				      thr->place);
334     }
335 
336   /* Always save the previous state, even if this isn't a nested team.
337      In particular, we should save any work share state from an outer
338      orphaned work share construct.  */
339   team->prev_ts = thr->ts;
340 
341   thr->ts.team = team;
342   thr->ts.team_id = 0;
343   ++thr->ts.level;
344   if (nthreads > 1)
345     ++thr->ts.active_level;
346   thr->ts.work_share = &team->work_shares[0];
347   thr->ts.last_work_share = NULL;
348 #ifdef HAVE_SYNC_BUILTINS
349   thr->ts.single_count = 0;
350 #endif
351   thr->ts.static_trip = 0;
352   thr->task = &team->implicit_task[0];
353 #ifdef GOMP_NEEDS_THREAD_HANDLE
354   thr->handle = pthread_self ();
355 #endif
356   nthreads_var = icv->nthreads_var;
357   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
358       && thr->ts.level < gomp_nthreads_var_list_len)
359     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
360   bind_var = icv->bind_var;
361   if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
362     bind_var = flags & 7;
363   bind = bind_var;
364   if (__builtin_expect (gomp_bind_var_list != NULL, 0)
365       && thr->ts.level < gomp_bind_var_list_len)
366     bind_var = gomp_bind_var_list[thr->ts.level];
367   gomp_init_task (thr->task, task, icv);
368   thr->task->taskgroup = taskgroup;
369   team->implicit_task[0].icv.nthreads_var = nthreads_var;
370   team->implicit_task[0].icv.bind_var = bind_var;
371 
372   if (nthreads == 1)
373     return;
374 
375   i = 1;
376 
377   if (__builtin_expect (gomp_places_list != NULL, 0))
378     {
379       /* Depending on chosen proc_bind model, set subpartition
380 	 for the master thread and initialize helper variables
381 	 P and optionally S, K and/or REST used by later place
382 	 computation for each additional thread.  */
383       p = thr->place - 1;
384       switch (bind)
385 	{
386 	case omp_proc_bind_true:
387 	case omp_proc_bind_close:
388 	  if (nthreads > thr->ts.place_partition_len)
389 	    {
390 	      /* T > P.  S threads will be placed in each place,
391 		 and the final REM threads placed one by one
392 		 into the already occupied places.  */
393 	      s = nthreads / thr->ts.place_partition_len;
394 	      rest = nthreads % thr->ts.place_partition_len;
395 	    }
396 	  else
397 	    s = 1;
398 	  k = 1;
399 	  break;
400 	case omp_proc_bind_master:
401 	  /* Each thread will be bound to master's place.  */
402 	  break;
403 	case omp_proc_bind_spread:
404 	  if (nthreads <= thr->ts.place_partition_len)
405 	    {
406 	      /* T <= P.  Each subpartition will have in between s
407 		 and s+1 places (subpartitions starting at or
408 		 after rest will have s places, earlier s+1 places),
409 		 each thread will be bound to the first place in
410 		 its subpartition (except for the master thread
411 		 that can be bound to another place in its
412 		 subpartition).  */
413 	      s = thr->ts.place_partition_len / nthreads;
414 	      rest = thr->ts.place_partition_len % nthreads;
415 	      rest = (s + 1) * rest + thr->ts.place_partition_off;
416 	      if (p < rest)
417 		{
418 		  p -= (p - thr->ts.place_partition_off) % (s + 1);
419 		  thr->ts.place_partition_len = s + 1;
420 		}
421 	      else
422 		{
423 		  p -= (p - rest) % s;
424 		  thr->ts.place_partition_len = s;
425 		}
426 	      thr->ts.place_partition_off = p;
427 	    }
428 	  else
429 	    {
430 	      /* T > P.  Each subpartition will have just a single
431 		 place and we'll place between s and s+1
432 		 threads into each subpartition.  */
433 	      s = nthreads / thr->ts.place_partition_len;
434 	      rest = nthreads % thr->ts.place_partition_len;
435 	      thr->ts.place_partition_off = p;
436 	      thr->ts.place_partition_len = 1;
437 	      k = 1;
438 	    }
439 	  break;
440 	}
441     }
442   else
443     bind = omp_proc_bind_false;
444 
445   /* We only allow the reuse of idle threads for non-nested PARALLEL
446      regions.  This appears to be implied by the semantics of
447      threadprivate variables, but perhaps that's reading too much into
448      things.  Certainly it does prevent any locking problems, since
449      only the initial program thread will modify gomp_threads.  */
450   if (!nested)
451     {
452       old_threads_used = pool->threads_used;
453 
454       if (nthreads <= old_threads_used)
455 	n = nthreads;
456       else if (old_threads_used == 0)
457 	{
458 	  n = 0;
459 	  gomp_simple_barrier_init (&pool->threads_dock, nthreads);
460 	}
461       else
462 	{
463 	  n = old_threads_used;
464 
465 	  /* Increase the barrier threshold to make sure all new
466 	     threads arrive before the team is released.  */
467 	  gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
468 	}
469 
470       /* Not true yet, but soon will be.  We're going to release all
471 	 threads from the dock, and those that aren't part of the
472 	 team will exit.  */
473       pool->threads_used = nthreads;
474 
475       /* If necessary, expand the size of the gomp_threads array.  It is
476 	 expected that changes in the number of threads are rare, thus we
477 	 make no effort to expand gomp_threads_size geometrically.  */
478       if (nthreads >= pool->threads_size)
479 	{
480 	  pool->threads_size = nthreads + 1;
481 	  pool->threads
482 	    = gomp_realloc (pool->threads,
483 			    pool->threads_size
484 			    * sizeof (struct gomp_thread *));
485 	  /* Add current (master) thread to threads[].  */
486 	  pool->threads[0] = thr;
487 	}
488 
489       /* Release existing idle threads.  */
490       for (; i < n; ++i)
491 	{
492 	  unsigned int place_partition_off = thr->ts.place_partition_off;
493 	  unsigned int place_partition_len = thr->ts.place_partition_len;
494 	  unsigned int place = 0;
495 	  if (__builtin_expect (gomp_places_list != NULL, 0))
496 	    {
497 	      switch (bind)
498 		{
499 		case omp_proc_bind_true:
500 		case omp_proc_bind_close:
501 		  if (k == s)
502 		    {
503 		      ++p;
504 		      if (p == (team->prev_ts.place_partition_off
505 				+ team->prev_ts.place_partition_len))
506 			p = team->prev_ts.place_partition_off;
507 		      k = 1;
508 		      if (i == nthreads - rest)
509 			s = 1;
510 		    }
511 		  else
512 		    ++k;
513 		  break;
514 		case omp_proc_bind_master:
515 		  break;
516 		case omp_proc_bind_spread:
517 		  if (k == 0)
518 		    {
519 		      /* T <= P.  */
520 		      if (p < rest)
521 			p += s + 1;
522 		      else
523 			p += s;
524 		      if (p == (team->prev_ts.place_partition_off
525 				+ team->prev_ts.place_partition_len))
526 			p = team->prev_ts.place_partition_off;
527 		      place_partition_off = p;
528 		      if (p < rest)
529 			place_partition_len = s + 1;
530 		      else
531 			place_partition_len = s;
532 		    }
533 		  else
534 		    {
535 		      /* T > P.  */
536 		      if (k == s)
537 			{
538 			  ++p;
539 			  if (p == (team->prev_ts.place_partition_off
540 				    + team->prev_ts.place_partition_len))
541 			    p = team->prev_ts.place_partition_off;
542 			  k = 1;
543 			  if (i == nthreads - rest)
544 			    s = 1;
545 			}
546 		      else
547 			++k;
548 		      place_partition_off = p;
549 		      place_partition_len = 1;
550 		    }
551 		  break;
552 		}
553 	      if (affinity_thr != NULL
554 		  || (bind != omp_proc_bind_true
555 		      && pool->threads[i]->place != p + 1)
556 		  || pool->threads[i]->place <= place_partition_off
557 		  || pool->threads[i]->place > (place_partition_off
558 						+ place_partition_len))
559 		{
560 		  unsigned int l;
561 		  force_display = true;
562 		  if (affinity_thr == NULL)
563 		    {
564 		      unsigned int j;
565 
566 		      if (team->prev_ts.place_partition_len > 64)
567 			affinity_thr
568 			  = gomp_malloc (team->prev_ts.place_partition_len
569 					 * sizeof (struct gomp_thread *));
570 		      else
571 			affinity_thr
572 			  = gomp_alloca (team->prev_ts.place_partition_len
573 					 * sizeof (struct gomp_thread *));
574 		      memset (affinity_thr, '\0',
575 			      team->prev_ts.place_partition_len
576 			      * sizeof (struct gomp_thread *));
577 		      for (j = i; j < old_threads_used; j++)
578 			{
579 			  if (pool->threads[j]->place
580 			      > team->prev_ts.place_partition_off
581 			      && (pool->threads[j]->place
582 				  <= (team->prev_ts.place_partition_off
583 				      + team->prev_ts.place_partition_len)))
584 			    {
585 			      l = pool->threads[j]->place - 1
586 				  - team->prev_ts.place_partition_off;
587 			      pool->threads[j]->data = affinity_thr[l];
588 			      affinity_thr[l] = pool->threads[j];
589 			    }
590 			  pool->threads[j] = NULL;
591 			}
592 		      if (nthreads > old_threads_used)
593 			memset (&pool->threads[old_threads_used],
594 				'\0', ((nthreads - old_threads_used)
595 				       * sizeof (struct gomp_thread *)));
596 		      n = nthreads;
597 		      affinity_count = old_threads_used - i;
598 		    }
599 		  if (affinity_count == 0)
600 		    break;
601 		  l = p;
602 		  if (affinity_thr[l - team->prev_ts.place_partition_off]
603 		      == NULL)
604 		    {
605 		      if (bind != omp_proc_bind_true)
606 			continue;
607 		      for (l = place_partition_off;
608 			   l < place_partition_off + place_partition_len;
609 			   l++)
610 			if (affinity_thr[l - team->prev_ts.place_partition_off]
611 			    != NULL)
612 			  break;
613 		      if (l == place_partition_off + place_partition_len)
614 			continue;
615 		    }
616 		  nthr = affinity_thr[l - team->prev_ts.place_partition_off];
617 		  affinity_thr[l - team->prev_ts.place_partition_off]
618 		    = (struct gomp_thread *) nthr->data;
619 		  affinity_count--;
620 		  pool->threads[i] = nthr;
621 		}
622 	      else
623 		nthr = pool->threads[i];
624 	      place = p + 1;
625 	    }
626 	  else
627 	    nthr = pool->threads[i];
628 	  nthr->ts.team = team;
629 	  nthr->ts.work_share = &team->work_shares[0];
630 	  nthr->ts.last_work_share = NULL;
631 	  nthr->ts.team_id = i;
632 	  nthr->ts.level = team->prev_ts.level + 1;
633 	  nthr->ts.active_level = thr->ts.active_level;
634 	  nthr->ts.place_partition_off = place_partition_off;
635 	  nthr->ts.place_partition_len = place_partition_len;
636 #ifdef HAVE_SYNC_BUILTINS
637 	  nthr->ts.single_count = 0;
638 #endif
639 	  nthr->ts.static_trip = 0;
640 	  nthr->task = &team->implicit_task[i];
641 	  nthr->place = place;
642 	  gomp_init_task (nthr->task, task, icv);
643 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
644 	  team->implicit_task[i].icv.bind_var = bind_var;
645 	  nthr->task->taskgroup = taskgroup;
646 	  nthr->fn = fn;
647 	  nthr->data = data;
648 	  team->ordered_release[i] = &nthr->release;
649 	}
650 
651       if (__builtin_expect (affinity_thr != NULL, 0))
652 	{
653 	  /* If AFFINITY_THR is non-NULL just because we had to
654 	     permute some threads in the pool, but we've managed
655 	     to find exactly as many old threads as we'd find
656 	     without affinity, we don't need to handle this
657 	     specially anymore.  */
658 	  if (nthreads <= old_threads_used
659 	      ? (affinity_count == old_threads_used - nthreads)
660 	      : (i == old_threads_used))
661 	    {
662 	      if (team->prev_ts.place_partition_len > 64)
663 		free (affinity_thr);
664 	      affinity_thr = NULL;
665 	      affinity_count = 0;
666 	    }
667 	  else
668 	    {
669 	      i = 1;
670 	      /* We are going to compute the places/subpartitions
671 		 again from the beginning.  So, we need to reinitialize
672 		 vars modified by the switch (bind) above inside
673 		 of the loop, to the state they had after the initial
674 		 switch (bind).  */
675 	      switch (bind)
676 		{
677 		case omp_proc_bind_true:
678 		case omp_proc_bind_close:
679 		  if (nthreads > thr->ts.place_partition_len)
680 		    /* T > P.  S has been changed, so needs
681 		       to be recomputed.  */
682 		    s = nthreads / thr->ts.place_partition_len;
683 		  k = 1;
684 		  p = thr->place - 1;
685 		  break;
686 		case omp_proc_bind_master:
687 		  /* No vars have been changed.  */
688 		  break;
689 		case omp_proc_bind_spread:
690 		  p = thr->ts.place_partition_off;
691 		  if (k != 0)
692 		    {
693 		      /* T > P.  */
694 		      s = nthreads / team->prev_ts.place_partition_len;
695 		      k = 1;
696 		    }
697 		  break;
698 		}
699 
700 	      /* Increase the barrier threshold to make sure all new
701 		 threads and all the threads we're going to let die
702 		 arrive before the team is released.  */
703 	      if (affinity_count)
704 		gomp_simple_barrier_reinit (&pool->threads_dock,
705 					    nthreads + affinity_count);
706 	    }
707 	}
708 
709       if (i == nthreads)
710 	goto do_release;
711 
712     }
713 
714   if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
715     {
716       long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
717 
718       if (old_threads_used == 0)
719 	--diff;
720 
721 #ifdef HAVE_SYNC_BUILTINS
722       __sync_fetch_and_add (&gomp_managed_threads, diff);
723 #else
724       gomp_mutex_lock (&gomp_managed_threads_lock);
725       gomp_managed_threads += diff;
726       gomp_mutex_unlock (&gomp_managed_threads_lock);
727 #endif
728     }
729 
730   attr = &gomp_thread_attr;
731   if (__builtin_expect (gomp_places_list != NULL, 0))
732     {
733       size_t stacksize;
734       pthread_attr_init (&thread_attr);
735       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
736 	pthread_attr_setstacksize (&thread_attr, stacksize);
737       attr = &thread_attr;
738     }
739 
740   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
741 			    * (nthreads - i));
742 
743   /* Launch new threads.  */
744   for (; i < nthreads; ++i)
745     {
746       int err;
747 
748       start_data->ts.place_partition_off = thr->ts.place_partition_off;
749       start_data->ts.place_partition_len = thr->ts.place_partition_len;
750       start_data->place = 0;
751       if (__builtin_expect (gomp_places_list != NULL, 0))
752 	{
753 	  switch (bind)
754 	    {
755 	    case omp_proc_bind_true:
756 	    case omp_proc_bind_close:
757 	      if (k == s)
758 		{
759 		  ++p;
760 		  if (p == (team->prev_ts.place_partition_off
761 			    + team->prev_ts.place_partition_len))
762 		    p = team->prev_ts.place_partition_off;
763 		  k = 1;
764 		  if (i == nthreads - rest)
765 		    s = 1;
766 		}
767 	      else
768 		++k;
769 	      break;
770 	    case omp_proc_bind_master:
771 	      break;
772 	    case omp_proc_bind_spread:
773 	      if (k == 0)
774 		{
775 		  /* T <= P.  */
776 		  if (p < rest)
777 		    p += s + 1;
778 		  else
779 		    p += s;
780 		  if (p == (team->prev_ts.place_partition_off
781 			    + team->prev_ts.place_partition_len))
782 		    p = team->prev_ts.place_partition_off;
783 		  start_data->ts.place_partition_off = p;
784 		  if (p < rest)
785 		    start_data->ts.place_partition_len = s + 1;
786 		  else
787 		    start_data->ts.place_partition_len = s;
788 		}
789 	      else
790 		{
791 		  /* T > P.  */
792 		  if (k == s)
793 		    {
794 		      ++p;
795 		      if (p == (team->prev_ts.place_partition_off
796 				+ team->prev_ts.place_partition_len))
797 			p = team->prev_ts.place_partition_off;
798 		      k = 1;
799 		      if (i == nthreads - rest)
800 			s = 1;
801 		    }
802 		  else
803 		    ++k;
804 		  start_data->ts.place_partition_off = p;
805 		  start_data->ts.place_partition_len = 1;
806 		}
807 	      break;
808 	    }
809 	  start_data->place = p + 1;
810 	  if (affinity_thr != NULL && pool->threads[i] != NULL)
811 	    continue;
812 	  gomp_init_thread_affinity (attr, p);
813 	}
814 
815       start_data->fn = fn;
816       start_data->fn_data = data;
817       start_data->ts.team = team;
818       start_data->ts.work_share = &team->work_shares[0];
819       start_data->ts.last_work_share = NULL;
820       start_data->ts.team_id = i;
821       start_data->ts.level = team->prev_ts.level + 1;
822       start_data->ts.active_level = thr->ts.active_level;
823 #ifdef HAVE_SYNC_BUILTINS
824       start_data->ts.single_count = 0;
825 #endif
826       start_data->ts.static_trip = 0;
827       start_data->task = &team->implicit_task[i];
828       gomp_init_task (start_data->task, task, icv);
829       team->implicit_task[i].icv.nthreads_var = nthreads_var;
830       team->implicit_task[i].icv.bind_var = bind_var;
831       start_data->task->taskgroup = taskgroup;
832       start_data->thread_pool = pool;
833       start_data->nested = nested;
834 
835       attr = gomp_adjust_thread_attr (attr, &thread_attr);
836       err = pthread_create (&start_data->handle, attr, gomp_thread_start,
837 			    start_data);
838       start_data++;
839       if (err != 0)
840 	gomp_fatal ("Thread creation failed: %s", strerror (err));
841     }
842 
843   if (__builtin_expect (attr == &thread_attr, 0))
844     pthread_attr_destroy (&thread_attr);
845 
846  do_release:
847   if (nested)
848     gomp_barrier_wait (&team->barrier);
849   else
850     gomp_simple_barrier_wait (&pool->threads_dock);
851 
852   /* Decrease the barrier threshold to match the number of threads
853      that should arrive back at the end of this team.  The extra
854      threads should be exiting.  Note that we arrange for this test
855      to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
856      the barrier as well as gomp_managed_threads was temporarily
857      set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
858      AFFINITY_COUNT if non-zero will be always at least
859      OLD_THREADS_COUNT - NTHREADS.  */
860   if (__builtin_expect (nthreads < old_threads_used, 0)
861       || __builtin_expect (affinity_count, 0))
862     {
863       long diff = (long) nthreads - (long) old_threads_used;
864 
865       if (affinity_count)
866 	diff = -affinity_count;
867 
868       gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
869 
870 #ifdef HAVE_SYNC_BUILTINS
871       __sync_fetch_and_add (&gomp_managed_threads, diff);
872 #else
873       gomp_mutex_lock (&gomp_managed_threads_lock);
874       gomp_managed_threads += diff;
875       gomp_mutex_unlock (&gomp_managed_threads_lock);
876 #endif
877     }
878   if (__builtin_expect (gomp_display_affinity_var, 0))
879     {
880       if (nested
881 	  || nthreads != old_threads_used
882 	  || force_display)
883 	{
884 	  gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
885 					thr->place);
886 	  if (nested)
887 	    {
888 	      start_data -= nthreads - 1;
889 	      for (i = 1; i < nthreads; ++i)
890 		{
891 		  gomp_display_affinity_thread (
892 #ifdef LIBGOMP_USE_PTHREADS
893 						start_data->handle,
894 #else
895 						gomp_thread_self (),
896 #endif
897 						&start_data->ts,
898 						start_data->place);
899 		  start_data++;
900 		}
901 	    }
902 	  else
903 	    {
904 	      for (i = 1; i < nthreads; ++i)
905 		{
906 		  gomp_thread_handle handle
907 		    = gomp_thread_to_pthread_t (pool->threads[i]);
908 		  gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
909 						pool->threads[i]->place);
910 		}
911 	    }
912 	}
913     }
914   if (__builtin_expect (affinity_thr != NULL, 0)
915       && team->prev_ts.place_partition_len > 64)
916     free (affinity_thr);
917 }
918 #endif
919 
920 
921 /* Terminate the current team.  This is only to be called by the master
922    thread.  We assume that we must wait for the other threads.  */
923 
924 void
gomp_team_end(void)925 gomp_team_end (void)
926 {
927   struct gomp_thread *thr = gomp_thread ();
928   struct gomp_team *team = thr->ts.team;
929 
930   /* This barrier handles all pending explicit threads.
931      As #pragma omp cancel parallel might get awaited count in
932      team->barrier in a inconsistent state, we need to use a different
933      counter here.  */
934   gomp_team_barrier_wait_final (&team->barrier);
935   if (__builtin_expect (team->team_cancelled, 0))
936     {
937       struct gomp_work_share *ws = team->work_shares_to_free;
938       do
939 	{
940 	  struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
941 	  if (next_ws == NULL)
942 	    gomp_ptrlock_set (&ws->next_ws, ws);
943 	  gomp_fini_work_share (ws);
944 	  ws = next_ws;
945 	}
946       while (ws != NULL);
947     }
948   else
949     gomp_fini_work_share (thr->ts.work_share);
950 
951   gomp_end_task ();
952   thr->ts = team->prev_ts;
953 
954   if (__builtin_expect (thr->ts.level != 0, 0))
955     {
956 #ifdef HAVE_SYNC_BUILTINS
957       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
958 #else
959       gomp_mutex_lock (&gomp_managed_threads_lock);
960       gomp_managed_threads -= team->nthreads - 1L;
961       gomp_mutex_unlock (&gomp_managed_threads_lock);
962 #endif
963       /* This barrier has gomp_barrier_wait_last counterparts
964 	 and ensures the team can be safely destroyed.  */
965       gomp_barrier_wait (&team->barrier);
966     }
967 
968   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
969     {
970       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
971       do
972 	{
973 	  struct gomp_work_share *next_ws = ws->next_alloc;
974 	  free (ws);
975 	  ws = next_ws;
976 	}
977       while (ws != NULL);
978     }
979   gomp_sem_destroy (&team->master_release);
980 
981   if (__builtin_expect (thr->ts.team != NULL, 0)
982       || __builtin_expect (team->nthreads == 1, 0))
983     free_team (team);
984   else
985     {
986       struct gomp_thread_pool *pool = thr->thread_pool;
987       if (pool->last_team)
988 	free_team (pool->last_team);
989       pool->last_team = team;
990       gomp_release_thread_pool (pool);
991     }
992 }
993 
994 #ifdef LIBGOMP_USE_PTHREADS
995 
996 /* Constructors for this file.  */
997 
998 static void __attribute__((constructor))
initialize_team(void)999 initialize_team (void)
1000 {
1001 #if !defined HAVE_TLS && !defined USE_EMUTLS
1002   static struct gomp_thread initial_thread_tls_data;
1003 
1004   pthread_key_create (&gomp_tls_key, NULL);
1005   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1006 #endif
1007 
1008   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1009     gomp_fatal ("could not create thread pool destructor.");
1010 }
1011 
1012 static void __attribute__((destructor))
team_destructor(void)1013 team_destructor (void)
1014 {
1015   /* Without this dlclose on libgomp could lead to subsequent
1016      crashes.  */
1017   pthread_key_delete (gomp_thread_destructor);
1018 }
1019 
1020 /* Similar to gomp_free_pool_helper, but don't detach itself,
1021    gomp_pause_host will pthread_join those threads.  */
1022 
1023 static void
gomp_pause_pool_helper(void * thread_pool)1024 gomp_pause_pool_helper (void *thread_pool)
1025 {
1026   struct gomp_thread *thr = gomp_thread ();
1027   struct gomp_thread_pool *pool
1028     = (struct gomp_thread_pool *) thread_pool;
1029   gomp_simple_barrier_wait_last (&pool->threads_dock);
1030   gomp_sem_destroy (&thr->release);
1031   thr->thread_pool = NULL;
1032   thr->task = NULL;
1033   pthread_exit (NULL);
1034 }
1035 
1036 /* Free a thread pool and release its threads.  Return non-zero on
1037    failure.  */
1038 
1039 int
gomp_pause_host(void)1040 gomp_pause_host (void)
1041 {
1042   struct gomp_thread *thr = gomp_thread ();
1043   struct gomp_thread_pool *pool = thr->thread_pool;
1044   if (thr->ts.level)
1045     return -1;
1046   if (pool)
1047     {
1048       if (pool->threads_used > 0)
1049 	{
1050 	  int i;
1051 	  pthread_t *thrs
1052 	    = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1053 	  for (i = 1; i < pool->threads_used; i++)
1054 	    {
1055 	      struct gomp_thread *nthr = pool->threads[i];
1056 	      nthr->fn = gomp_pause_pool_helper;
1057 	      nthr->data = pool;
1058 	      thrs[i] = gomp_thread_to_pthread_t (nthr);
1059 	    }
1060 	  /* This barrier undocks threads docked on pool->threads_dock.  */
1061 	  gomp_simple_barrier_wait (&pool->threads_dock);
1062 	  /* And this waits till all threads have called gomp_barrier_wait_last
1063 	     in gomp_pause_pool_helper.  */
1064 	  gomp_simple_barrier_wait (&pool->threads_dock);
1065 	  /* Now it is safe to destroy the barrier and free the pool.  */
1066 	  gomp_simple_barrier_destroy (&pool->threads_dock);
1067 
1068 #ifdef HAVE_SYNC_BUILTINS
1069 	  __sync_fetch_and_add (&gomp_managed_threads,
1070 				1L - pool->threads_used);
1071 #else
1072 	  gomp_mutex_lock (&gomp_managed_threads_lock);
1073 	  gomp_managed_threads -= pool->threads_used - 1L;
1074 	  gomp_mutex_unlock (&gomp_managed_threads_lock);
1075 #endif
1076 	  for (i = 1; i < pool->threads_used; i++)
1077 	    pthread_join (thrs[i], NULL);
1078 	}
1079       if (pool->last_team)
1080 	free_team (pool->last_team);
1081 #ifndef __nvptx__
1082       free (pool->threads);
1083       free (pool);
1084 #endif
1085       thr->thread_pool = NULL;
1086     }
1087   return 0;
1088 }
1089 #endif
1090 
1091 struct gomp_task_icv *
gomp_new_icv(void)1092 gomp_new_icv (void)
1093 {
1094   struct gomp_thread *thr = gomp_thread ();
1095   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1096   gomp_init_task (task, NULL, &gomp_global_icv);
1097   thr->task = task;
1098 #ifdef LIBGOMP_USE_PTHREADS
1099   pthread_setspecific (gomp_thread_destructor, thr);
1100 #endif
1101   return &task->icv;
1102 }
1103