1 /* Copyright (C) 2005-2013 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU OpenMP Library (libgomp).
5 
6    Libgomp is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14    more details.
15 
16    Under Section 7 of GPL version 3, you are granted additional
17    permissions described in the GCC Runtime Library Exception, version
18    3.1, as published by the Free Software Foundation.
19 
20    You should have received a copy of the GNU General Public License and
21    a copy of the GCC Runtime Library Exception along with this program;
22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23    <http://www.gnu.org/licenses/>.  */
24 
25 /* This file handles the maintainence of threads in response to team
26    creation and termination.  */
27 
28 #include "libgomp.h"
29 #include <stdlib.h>
30 #include <string.h>
31 
32 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
33 pthread_attr_t gomp_thread_attr;
34 
35 /* This key is for the thread destructor.  */
36 pthread_key_t gomp_thread_destructor;
37 
38 
39 /* This is the libgomp per-thread data structure.  */
40 #ifdef HAVE_TLS
41 __thread struct gomp_thread gomp_tls_data;
42 #else
43 pthread_key_t gomp_tls_key;
44 #endif
45 
46 
47 /* This structure is used to communicate across pthread_create.  */
48 
49 struct gomp_thread_start_data
50 {
51   void (*fn) (void *);
52   void *fn_data;
53   struct gomp_team_state ts;
54   struct gomp_task *task;
55   struct gomp_thread_pool *thread_pool;
56   bool nested;
57 };
58 
59 
60 /* This function is a pthread_create entry point.  This contains the idle
61    loop in which a thread waits to be called up to become part of a team.  */
62 
63 static void *
gomp_thread_start(void * xdata)64 gomp_thread_start (void *xdata)
65 {
66   struct gomp_thread_start_data *data = xdata;
67   struct gomp_thread *thr;
68   struct gomp_thread_pool *pool;
69   void (*local_fn) (void *);
70   void *local_data;
71 
72 #ifdef HAVE_TLS
73   thr = &gomp_tls_data;
74 #else
75   struct gomp_thread local_thr;
76   thr = &local_thr;
77   pthread_setspecific (gomp_tls_key, thr);
78 #endif
79   gomp_sem_init (&thr->release, 0);
80 
81   /* Extract what we need from data.  */
82   local_fn = data->fn;
83   local_data = data->fn_data;
84   thr->thread_pool = data->thread_pool;
85   thr->ts = data->ts;
86   thr->task = data->task;
87 
88   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
89 
90   /* Make thread pool local. */
91   pool = thr->thread_pool;
92 
93   if (data->nested)
94     {
95       struct gomp_team *team = thr->ts.team;
96       struct gomp_task *task = thr->task;
97 
98       gomp_barrier_wait (&team->barrier);
99 
100       local_fn (local_data);
101       gomp_team_barrier_wait (&team->barrier);
102       gomp_finish_task (task);
103       gomp_barrier_wait_last (&team->barrier);
104     }
105   else
106     {
107       pool->threads[thr->ts.team_id] = thr;
108 
109       gomp_barrier_wait (&pool->threads_dock);
110       do
111 	{
112 	  struct gomp_team *team = thr->ts.team;
113 	  struct gomp_task *task = thr->task;
114 
115 	  local_fn (local_data);
116 	  gomp_team_barrier_wait (&team->barrier);
117 	  gomp_finish_task (task);
118 
119 	  gomp_barrier_wait (&pool->threads_dock);
120 
121 	  local_fn = thr->fn;
122 	  local_data = thr->data;
123 	  thr->fn = NULL;
124 	}
125       while (local_fn);
126     }
127 
128   gomp_sem_destroy (&thr->release);
129   return NULL;
130 }
131 
132 
133 /* Create a new team data structure.  */
134 
135 struct gomp_team *
gomp_new_team(unsigned nthreads)136 gomp_new_team (unsigned nthreads)
137 {
138   struct gomp_team *team;
139   size_t size;
140   int i;
141 
142   size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
143 				      + sizeof (team->implicit_task[0]));
144   team = gomp_malloc (size);
145 
146   team->work_share_chunk = 8;
147 #ifdef HAVE_SYNC_BUILTINS
148   team->single_count = 0;
149 #else
150   gomp_mutex_init (&team->work_share_list_free_lock);
151 #endif
152   gomp_init_work_share (&team->work_shares[0], false, nthreads);
153   team->work_shares[0].next_alloc = NULL;
154   team->work_share_list_free = NULL;
155   team->work_share_list_alloc = &team->work_shares[1];
156   for (i = 1; i < 7; i++)
157     team->work_shares[i].next_free = &team->work_shares[i + 1];
158   team->work_shares[i].next_free = NULL;
159 
160   team->nthreads = nthreads;
161   gomp_barrier_init (&team->barrier, nthreads);
162 
163   gomp_sem_init (&team->master_release, 0);
164   team->ordered_release = (void *) &team->implicit_task[nthreads];
165   team->ordered_release[0] = &team->master_release;
166 
167   gomp_mutex_init (&team->task_lock);
168   team->task_queue = NULL;
169   team->task_count = 0;
170   team->task_running_count = 0;
171 
172   return team;
173 }
174 
175 
176 /* Free a team data structure.  */
177 
178 static void
free_team(struct gomp_team * team)179 free_team (struct gomp_team *team)
180 {
181   gomp_barrier_destroy (&team->barrier);
182   gomp_mutex_destroy (&team->task_lock);
183   free (team);
184 }
185 
186 /* Allocate and initialize a thread pool. */
187 
gomp_new_thread_pool(void)188 static struct gomp_thread_pool *gomp_new_thread_pool (void)
189 {
190   struct gomp_thread_pool *pool
191     = gomp_malloc (sizeof(struct gomp_thread_pool));
192   pool->threads = NULL;
193   pool->threads_size = 0;
194   pool->threads_used = 0;
195   pool->last_team = NULL;
196   return pool;
197 }
198 
199 static void
gomp_free_pool_helper(void * thread_pool)200 gomp_free_pool_helper (void *thread_pool)
201 {
202   struct gomp_thread_pool *pool
203     = (struct gomp_thread_pool *) thread_pool;
204   gomp_barrier_wait_last (&pool->threads_dock);
205   gomp_sem_destroy (&gomp_thread ()->release);
206   pthread_exit (NULL);
207 }
208 
209 /* Free a thread pool and release its threads. */
210 
211 static void
gomp_free_thread(void * arg)212 gomp_free_thread (void *arg __attribute__((unused)))
213 {
214   struct gomp_thread *thr = gomp_thread ();
215   struct gomp_thread_pool *pool = thr->thread_pool;
216   if (pool)
217     {
218       if (pool->threads_used > 0)
219 	{
220 	  int i;
221 	  for (i = 1; i < pool->threads_used; i++)
222 	    {
223 	      struct gomp_thread *nthr = pool->threads[i];
224 	      nthr->fn = gomp_free_pool_helper;
225 	      nthr->data = pool;
226 	    }
227 	  /* This barrier undocks threads docked on pool->threads_dock.  */
228 	  gomp_barrier_wait (&pool->threads_dock);
229 	  /* And this waits till all threads have called gomp_barrier_wait_last
230 	     in gomp_free_pool_helper.  */
231 	  gomp_barrier_wait (&pool->threads_dock);
232 	  /* Now it is safe to destroy the barrier and free the pool.  */
233 	  gomp_barrier_destroy (&pool->threads_dock);
234 
235 #ifdef HAVE_SYNC_BUILTINS
236 	  __sync_fetch_and_add (&gomp_managed_threads,
237 				1L - pool->threads_used);
238 #else
239 	  gomp_mutex_lock (&gomp_remaining_threads_lock);
240 	  gomp_managed_threads -= pool->threads_used - 1L;
241 	  gomp_mutex_unlock (&gomp_remaining_threads_lock);
242 #endif
243 	}
244       free (pool->threads);
245       if (pool->last_team)
246 	free_team (pool->last_team);
247       free (pool);
248       thr->thread_pool = NULL;
249     }
250   if (thr->task != NULL)
251     {
252       struct gomp_task *task = thr->task;
253       gomp_end_task ();
254       free (task);
255     }
256 }
257 
258 /* Launch a team.  */
259 
260 void
gomp_team_start(void (* fn)(void *),void * data,unsigned nthreads,struct gomp_team * team)261 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
262 		 struct gomp_team *team)
263 {
264   struct gomp_thread_start_data *start_data;
265   struct gomp_thread *thr, *nthr;
266   struct gomp_task *task;
267   struct gomp_task_icv *icv;
268   bool nested;
269   struct gomp_thread_pool *pool;
270   unsigned i, n, old_threads_used = 0;
271   pthread_attr_t thread_attr, *attr;
272   unsigned long nthreads_var;
273 
274   thr = gomp_thread ();
275   nested = thr->ts.team != NULL;
276   if (__builtin_expect (thr->thread_pool == NULL, 0))
277     {
278       thr->thread_pool = gomp_new_thread_pool ();
279       pthread_setspecific (gomp_thread_destructor, thr);
280     }
281   pool = thr->thread_pool;
282   task = thr->task;
283   icv = task ? &task->icv : &gomp_global_icv;
284 
285   /* Always save the previous state, even if this isn't a nested team.
286      In particular, we should save any work share state from an outer
287      orphaned work share construct.  */
288   team->prev_ts = thr->ts;
289 
290   thr->ts.team = team;
291   thr->ts.team_id = 0;
292   ++thr->ts.level;
293   if (nthreads > 1)
294     ++thr->ts.active_level;
295   thr->ts.work_share = &team->work_shares[0];
296   thr->ts.last_work_share = NULL;
297 #ifdef HAVE_SYNC_BUILTINS
298   thr->ts.single_count = 0;
299 #endif
300   thr->ts.static_trip = 0;
301   thr->task = &team->implicit_task[0];
302   nthreads_var = icv->nthreads_var;
303   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
304       && thr->ts.level < gomp_nthreads_var_list_len)
305     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
306   gomp_init_task (thr->task, task, icv);
307   team->implicit_task[0].icv.nthreads_var = nthreads_var;
308 
309   if (nthreads == 1)
310     return;
311 
312   i = 1;
313 
314   /* We only allow the reuse of idle threads for non-nested PARALLEL
315      regions.  This appears to be implied by the semantics of
316      threadprivate variables, but perhaps that's reading too much into
317      things.  Certainly it does prevent any locking problems, since
318      only the initial program thread will modify gomp_threads.  */
319   if (!nested)
320     {
321       old_threads_used = pool->threads_used;
322 
323       if (nthreads <= old_threads_used)
324 	n = nthreads;
325       else if (old_threads_used == 0)
326 	{
327 	  n = 0;
328 	  gomp_barrier_init (&pool->threads_dock, nthreads);
329 	}
330       else
331 	{
332 	  n = old_threads_used;
333 
334 	  /* Increase the barrier threshold to make sure all new
335 	     threads arrive before the team is released.  */
336 	  gomp_barrier_reinit (&pool->threads_dock, nthreads);
337 	}
338 
339       /* Not true yet, but soon will be.  We're going to release all
340 	 threads from the dock, and those that aren't part of the
341 	 team will exit.  */
342       pool->threads_used = nthreads;
343 
344       /* Release existing idle threads.  */
345       for (; i < n; ++i)
346 	{
347 	  nthr = pool->threads[i];
348 	  nthr->ts.team = team;
349 	  nthr->ts.work_share = &team->work_shares[0];
350 	  nthr->ts.last_work_share = NULL;
351 	  nthr->ts.team_id = i;
352 	  nthr->ts.level = team->prev_ts.level + 1;
353 	  nthr->ts.active_level = thr->ts.active_level;
354 #ifdef HAVE_SYNC_BUILTINS
355 	  nthr->ts.single_count = 0;
356 #endif
357 	  nthr->ts.static_trip = 0;
358 	  nthr->task = &team->implicit_task[i];
359 	  gomp_init_task (nthr->task, task, icv);
360 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
361 	  nthr->fn = fn;
362 	  nthr->data = data;
363 	  team->ordered_release[i] = &nthr->release;
364 	}
365 
366       if (i == nthreads)
367 	goto do_release;
368 
369       /* If necessary, expand the size of the gomp_threads array.  It is
370 	 expected that changes in the number of threads are rare, thus we
371 	 make no effort to expand gomp_threads_size geometrically.  */
372       if (nthreads >= pool->threads_size)
373 	{
374 	  pool->threads_size = nthreads + 1;
375 	  pool->threads
376 	    = gomp_realloc (pool->threads,
377 			    pool->threads_size
378 			    * sizeof (struct gomp_thread_data *));
379 	}
380     }
381 
382   if (__builtin_expect (nthreads > old_threads_used, 0))
383     {
384       long diff = (long) nthreads - (long) old_threads_used;
385 
386       if (old_threads_used == 0)
387 	--diff;
388 
389 #ifdef HAVE_SYNC_BUILTINS
390       __sync_fetch_and_add (&gomp_managed_threads, diff);
391 #else
392       gomp_mutex_lock (&gomp_remaining_threads_lock);
393       gomp_managed_threads += diff;
394       gomp_mutex_unlock (&gomp_remaining_threads_lock);
395 #endif
396     }
397 
398   attr = &gomp_thread_attr;
399   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
400     {
401       size_t stacksize;
402       pthread_attr_init (&thread_attr);
403       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
404       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
405 	pthread_attr_setstacksize (&thread_attr, stacksize);
406       attr = &thread_attr;
407     }
408 
409   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
410 			    * (nthreads-i));
411 
412   /* Launch new threads.  */
413   for (; i < nthreads; ++i, ++start_data)
414     {
415       pthread_t pt;
416       int err;
417 
418       start_data->fn = fn;
419       start_data->fn_data = data;
420       start_data->ts.team = team;
421       start_data->ts.work_share = &team->work_shares[0];
422       start_data->ts.last_work_share = NULL;
423       start_data->ts.team_id = i;
424       start_data->ts.level = team->prev_ts.level + 1;
425       start_data->ts.active_level = thr->ts.active_level;
426 #ifdef HAVE_SYNC_BUILTINS
427       start_data->ts.single_count = 0;
428 #endif
429       start_data->ts.static_trip = 0;
430       start_data->task = &team->implicit_task[i];
431       gomp_init_task (start_data->task, task, icv);
432       team->implicit_task[i].icv.nthreads_var = nthreads_var;
433       start_data->thread_pool = pool;
434       start_data->nested = nested;
435 
436       if (gomp_cpu_affinity != NULL)
437 	gomp_init_thread_affinity (attr);
438 
439       err = pthread_create (&pt, attr, gomp_thread_start, start_data);
440       if (err != 0)
441 	gomp_fatal ("Thread creation failed: %s", strerror (err));
442     }
443 
444   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
445     pthread_attr_destroy (&thread_attr);
446 
447  do_release:
448   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
449 
450   /* Decrease the barrier threshold to match the number of threads
451      that should arrive back at the end of this team.  The extra
452      threads should be exiting.  Note that we arrange for this test
453      to never be true for nested teams.  */
454   if (__builtin_expect (nthreads < old_threads_used, 0))
455     {
456       long diff = (long) nthreads - (long) old_threads_used;
457 
458       gomp_barrier_reinit (&pool->threads_dock, nthreads);
459 
460 #ifdef HAVE_SYNC_BUILTINS
461       __sync_fetch_and_add (&gomp_managed_threads, diff);
462 #else
463       gomp_mutex_lock (&gomp_remaining_threads_lock);
464       gomp_managed_threads += diff;
465       gomp_mutex_unlock (&gomp_remaining_threads_lock);
466 #endif
467     }
468 }
469 
470 
471 /* Terminate the current team.  This is only to be called by the master
472    thread.  We assume that we must wait for the other threads.  */
473 
474 void
gomp_team_end(void)475 gomp_team_end (void)
476 {
477   struct gomp_thread *thr = gomp_thread ();
478   struct gomp_team *team = thr->ts.team;
479 
480   /* This barrier handles all pending explicit threads.  */
481   gomp_team_barrier_wait (&team->barrier);
482   gomp_fini_work_share (thr->ts.work_share);
483 
484   gomp_end_task ();
485   thr->ts = team->prev_ts;
486 
487   if (__builtin_expect (thr->ts.team != NULL, 0))
488     {
489 #ifdef HAVE_SYNC_BUILTINS
490       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
491 #else
492       gomp_mutex_lock (&gomp_remaining_threads_lock);
493       gomp_managed_threads -= team->nthreads - 1L;
494       gomp_mutex_unlock (&gomp_remaining_threads_lock);
495 #endif
496       /* This barrier has gomp_barrier_wait_last counterparts
497 	 and ensures the team can be safely destroyed.  */
498       gomp_barrier_wait (&team->barrier);
499     }
500 
501   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
502     {
503       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
504       do
505 	{
506 	  struct gomp_work_share *next_ws = ws->next_alloc;
507 	  free (ws);
508 	  ws = next_ws;
509 	}
510       while (ws != NULL);
511     }
512   gomp_sem_destroy (&team->master_release);
513 #ifndef HAVE_SYNC_BUILTINS
514   gomp_mutex_destroy (&team->work_share_list_free_lock);
515 #endif
516 
517   if (__builtin_expect (thr->ts.team != NULL, 0)
518       || __builtin_expect (team->nthreads == 1, 0))
519     free_team (team);
520   else
521     {
522       struct gomp_thread_pool *pool = thr->thread_pool;
523       if (pool->last_team)
524 	free_team (pool->last_team);
525       pool->last_team = team;
526     }
527 }
528 
529 
530 /* Constructors for this file.  */
531 
532 static void __attribute__((constructor))
initialize_team(void)533 initialize_team (void)
534 {
535   struct gomp_thread *thr;
536 
537 #ifndef HAVE_TLS
538   static struct gomp_thread initial_thread_tls_data;
539 
540   pthread_key_create (&gomp_tls_key, NULL);
541   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
542 #endif
543 
544   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
545     gomp_fatal ("could not create thread pool destructor.");
546 
547 #ifdef HAVE_TLS
548   thr = &gomp_tls_data;
549 #else
550   thr = &initial_thread_tls_data;
551 #endif
552   gomp_sem_init (&thr->release, 0);
553 }
554 
555 static void __attribute__((destructor))
team_destructor(void)556 team_destructor (void)
557 {
558   /* Without this dlclose on libgomp could lead to subsequent
559      crashes.  */
560   pthread_key_delete (gomp_thread_destructor);
561 }
562 
563 struct gomp_task_icv *
gomp_new_icv(void)564 gomp_new_icv (void)
565 {
566   struct gomp_thread *thr = gomp_thread ();
567   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
568   gomp_init_task (task, NULL, &gomp_global_icv);
569   thr->task = task;
570   pthread_setspecific (gomp_thread_destructor, thr);
571   return &task->icv;
572 }
573