xref: /dragonfly/contrib/gcc-4.7/libgomp/team.c (revision 3170ffd7)
1 /* Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011
2    Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>.
4 
5    This file is part of the GNU OpenMP Library (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintainence of threads in response to team
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 
33 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
34 pthread_attr_t gomp_thread_attr;
35 
36 /* This key is for the thread destructor.  */
37 pthread_key_t gomp_thread_destructor;
38 
39 
40 /* This is the libgomp per-thread data structure.  */
41 #ifdef HAVE_TLS
42 __thread struct gomp_thread gomp_tls_data;
43 #else
44 pthread_key_t gomp_tls_key;
45 #endif
46 
47 
48 /* This structure is used to communicate across pthread_create.  */
49 
50 struct gomp_thread_start_data
51 {
52   void (*fn) (void *);
53   void *fn_data;
54   struct gomp_team_state ts;
55   struct gomp_task *task;
56   struct gomp_thread_pool *thread_pool;
57   bool nested;
58 };
59 
60 
61 /* This function is a pthread_create entry point.  This contains the idle
62    loop in which a thread waits to be called up to become part of a team.  */
63 
64 static void *
65 gomp_thread_start (void *xdata)
66 {
67   struct gomp_thread_start_data *data = xdata;
68   struct gomp_thread *thr;
69   struct gomp_thread_pool *pool;
70   void (*local_fn) (void *);
71   void *local_data;
72 
73 #ifdef HAVE_TLS
74   thr = &gomp_tls_data;
75 #else
76   struct gomp_thread local_thr;
77   thr = &local_thr;
78   pthread_setspecific (gomp_tls_key, thr);
79 #endif
80   gomp_sem_init (&thr->release, 0);
81 
82   /* Extract what we need from data.  */
83   local_fn = data->fn;
84   local_data = data->fn_data;
85   thr->thread_pool = data->thread_pool;
86   thr->ts = data->ts;
87   thr->task = data->task;
88 
89   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
90 
91   /* Make thread pool local. */
92   pool = thr->thread_pool;
93 
94   if (data->nested)
95     {
96       struct gomp_team *team = thr->ts.team;
97       struct gomp_task *task = thr->task;
98 
99       gomp_barrier_wait (&team->barrier);
100 
101       local_fn (local_data);
102       gomp_team_barrier_wait (&team->barrier);
103       gomp_finish_task (task);
104       gomp_barrier_wait_last (&team->barrier);
105     }
106   else
107     {
108       pool->threads[thr->ts.team_id] = thr;
109 
110       gomp_barrier_wait (&pool->threads_dock);
111       do
112 	{
113 	  struct gomp_team *team = thr->ts.team;
114 	  struct gomp_task *task = thr->task;
115 
116 	  local_fn (local_data);
117 	  gomp_team_barrier_wait (&team->barrier);
118 	  gomp_finish_task (task);
119 
120 	  gomp_barrier_wait (&pool->threads_dock);
121 
122 	  local_fn = thr->fn;
123 	  local_data = thr->data;
124 	  thr->fn = NULL;
125 	}
126       while (local_fn);
127     }
128 
129   gomp_sem_destroy (&thr->release);
130   return NULL;
131 }
132 
133 
134 /* Create a new team data structure.  */
135 
136 struct gomp_team *
137 gomp_new_team (unsigned nthreads)
138 {
139   struct gomp_team *team;
140   size_t size;
141   int i;
142 
143   size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
144 				      + sizeof (team->implicit_task[0]));
145   team = gomp_malloc (size);
146 
147   team->work_share_chunk = 8;
148 #ifdef HAVE_SYNC_BUILTINS
149   team->single_count = 0;
150 #else
151   gomp_mutex_init (&team->work_share_list_free_lock);
152 #endif
153   gomp_init_work_share (&team->work_shares[0], false, nthreads);
154   team->work_shares[0].next_alloc = NULL;
155   team->work_share_list_free = NULL;
156   team->work_share_list_alloc = &team->work_shares[1];
157   for (i = 1; i < 7; i++)
158     team->work_shares[i].next_free = &team->work_shares[i + 1];
159   team->work_shares[i].next_free = NULL;
160 
161   team->nthreads = nthreads;
162   gomp_barrier_init (&team->barrier, nthreads);
163 
164   gomp_sem_init (&team->master_release, 0);
165   team->ordered_release = (void *) &team->implicit_task[nthreads];
166   team->ordered_release[0] = &team->master_release;
167 
168   gomp_mutex_init (&team->task_lock);
169   team->task_queue = NULL;
170   team->task_count = 0;
171   team->task_running_count = 0;
172 
173   return team;
174 }
175 
176 
177 /* Free a team data structure.  */
178 
179 static void
180 free_team (struct gomp_team *team)
181 {
182   gomp_barrier_destroy (&team->barrier);
183   gomp_mutex_destroy (&team->task_lock);
184   free (team);
185 }
186 
187 /* Allocate and initialize a thread pool. */
188 
189 static struct gomp_thread_pool *gomp_new_thread_pool (void)
190 {
191   struct gomp_thread_pool *pool
192     = gomp_malloc (sizeof(struct gomp_thread_pool));
193   pool->threads = NULL;
194   pool->threads_size = 0;
195   pool->threads_used = 0;
196   pool->last_team = NULL;
197   return pool;
198 }
199 
200 static void
201 gomp_free_pool_helper (void *thread_pool)
202 {
203   struct gomp_thread_pool *pool
204     = (struct gomp_thread_pool *) thread_pool;
205   gomp_barrier_wait_last (&pool->threads_dock);
206   gomp_sem_destroy (&gomp_thread ()->release);
207   pthread_exit (NULL);
208 }
209 
210 /* Free a thread pool and release its threads. */
211 
212 static void
213 gomp_free_thread (void *arg __attribute__((unused)))
214 {
215   struct gomp_thread *thr = gomp_thread ();
216   struct gomp_thread_pool *pool = thr->thread_pool;
217   if (pool)
218     {
219       if (pool->threads_used > 0)
220 	{
221 	  int i;
222 	  for (i = 1; i < pool->threads_used; i++)
223 	    {
224 	      struct gomp_thread *nthr = pool->threads[i];
225 	      nthr->fn = gomp_free_pool_helper;
226 	      nthr->data = pool;
227 	    }
228 	  /* This barrier undocks threads docked on pool->threads_dock.  */
229 	  gomp_barrier_wait (&pool->threads_dock);
230 	  /* And this waits till all threads have called gomp_barrier_wait_last
231 	     in gomp_free_pool_helper.  */
232 	  gomp_barrier_wait (&pool->threads_dock);
233 	  /* Now it is safe to destroy the barrier and free the pool.  */
234 	  gomp_barrier_destroy (&pool->threads_dock);
235 	}
236       free (pool->threads);
237       if (pool->last_team)
238 	free_team (pool->last_team);
239       free (pool);
240       thr->thread_pool = NULL;
241     }
242   if (thr->task != NULL)
243     {
244       struct gomp_task *task = thr->task;
245       gomp_end_task ();
246       free (task);
247     }
248 }
249 
250 /* Launch a team.  */
251 
252 void
253 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
254 		 struct gomp_team *team)
255 {
256   struct gomp_thread_start_data *start_data;
257   struct gomp_thread *thr, *nthr;
258   struct gomp_task *task;
259   struct gomp_task_icv *icv;
260   bool nested;
261   struct gomp_thread_pool *pool;
262   unsigned i, n, old_threads_used = 0;
263   pthread_attr_t thread_attr, *attr;
264   unsigned long nthreads_var;
265 
266   thr = gomp_thread ();
267   nested = thr->ts.team != NULL;
268   if (__builtin_expect (thr->thread_pool == NULL, 0))
269     {
270       thr->thread_pool = gomp_new_thread_pool ();
271       pthread_setspecific (gomp_thread_destructor, thr);
272     }
273   pool = thr->thread_pool;
274   task = thr->task;
275   icv = task ? &task->icv : &gomp_global_icv;
276 
277   /* Always save the previous state, even if this isn't a nested team.
278      In particular, we should save any work share state from an outer
279      orphaned work share construct.  */
280   team->prev_ts = thr->ts;
281 
282   thr->ts.team = team;
283   thr->ts.team_id = 0;
284   ++thr->ts.level;
285   if (nthreads > 1)
286     ++thr->ts.active_level;
287   thr->ts.work_share = &team->work_shares[0];
288   thr->ts.last_work_share = NULL;
289 #ifdef HAVE_SYNC_BUILTINS
290   thr->ts.single_count = 0;
291 #endif
292   thr->ts.static_trip = 0;
293   thr->task = &team->implicit_task[0];
294   nthreads_var = icv->nthreads_var;
295   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
296       && thr->ts.level < gomp_nthreads_var_list_len)
297     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
298   gomp_init_task (thr->task, task, icv);
299   team->implicit_task[0].icv.nthreads_var = nthreads_var;
300 
301   if (nthreads == 1)
302     return;
303 
304   i = 1;
305 
306   /* We only allow the reuse of idle threads for non-nested PARALLEL
307      regions.  This appears to be implied by the semantics of
308      threadprivate variables, but perhaps that's reading too much into
309      things.  Certainly it does prevent any locking problems, since
310      only the initial program thread will modify gomp_threads.  */
311   if (!nested)
312     {
313       old_threads_used = pool->threads_used;
314 
315       if (nthreads <= old_threads_used)
316 	n = nthreads;
317       else if (old_threads_used == 0)
318 	{
319 	  n = 0;
320 	  gomp_barrier_init (&pool->threads_dock, nthreads);
321 	}
322       else
323 	{
324 	  n = old_threads_used;
325 
326 	  /* Increase the barrier threshold to make sure all new
327 	     threads arrive before the team is released.  */
328 	  gomp_barrier_reinit (&pool->threads_dock, nthreads);
329 	}
330 
331       /* Not true yet, but soon will be.  We're going to release all
332 	 threads from the dock, and those that aren't part of the
333 	 team will exit.  */
334       pool->threads_used = nthreads;
335 
336       /* Release existing idle threads.  */
337       for (; i < n; ++i)
338 	{
339 	  nthr = pool->threads[i];
340 	  nthr->ts.team = team;
341 	  nthr->ts.work_share = &team->work_shares[0];
342 	  nthr->ts.last_work_share = NULL;
343 	  nthr->ts.team_id = i;
344 	  nthr->ts.level = team->prev_ts.level + 1;
345 	  nthr->ts.active_level = thr->ts.active_level;
346 #ifdef HAVE_SYNC_BUILTINS
347 	  nthr->ts.single_count = 0;
348 #endif
349 	  nthr->ts.static_trip = 0;
350 	  nthr->task = &team->implicit_task[i];
351 	  gomp_init_task (nthr->task, task, icv);
352 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
353 	  nthr->fn = fn;
354 	  nthr->data = data;
355 	  team->ordered_release[i] = &nthr->release;
356 	}
357 
358       if (i == nthreads)
359 	goto do_release;
360 
361       /* If necessary, expand the size of the gomp_threads array.  It is
362 	 expected that changes in the number of threads are rare, thus we
363 	 make no effort to expand gomp_threads_size geometrically.  */
364       if (nthreads >= pool->threads_size)
365 	{
366 	  pool->threads_size = nthreads + 1;
367 	  pool->threads
368 	    = gomp_realloc (pool->threads,
369 			    pool->threads_size
370 			    * sizeof (struct gomp_thread_data *));
371 	}
372     }
373 
374   if (__builtin_expect (nthreads > old_threads_used, 0))
375     {
376       long diff = (long) nthreads - (long) old_threads_used;
377 
378       if (old_threads_used == 0)
379 	--diff;
380 
381 #ifdef HAVE_SYNC_BUILTINS
382       __sync_fetch_and_add (&gomp_managed_threads, diff);
383 #else
384       gomp_mutex_lock (&gomp_remaining_threads_lock);
385       gomp_managed_threads += diff;
386       gomp_mutex_unlock (&gomp_remaining_threads_lock);
387 #endif
388     }
389 
390   attr = &gomp_thread_attr;
391   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
392     {
393       size_t stacksize;
394       pthread_attr_init (&thread_attr);
395       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
396       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
397 	pthread_attr_setstacksize (&thread_attr, stacksize);
398       attr = &thread_attr;
399     }
400 
401   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
402 			    * (nthreads-i));
403 
404   /* Launch new threads.  */
405   for (; i < nthreads; ++i, ++start_data)
406     {
407       pthread_t pt;
408       int err;
409 
410       start_data->fn = fn;
411       start_data->fn_data = data;
412       start_data->ts.team = team;
413       start_data->ts.work_share = &team->work_shares[0];
414       start_data->ts.last_work_share = NULL;
415       start_data->ts.team_id = i;
416       start_data->ts.level = team->prev_ts.level + 1;
417       start_data->ts.active_level = thr->ts.active_level;
418 #ifdef HAVE_SYNC_BUILTINS
419       start_data->ts.single_count = 0;
420 #endif
421       start_data->ts.static_trip = 0;
422       start_data->task = &team->implicit_task[i];
423       gomp_init_task (start_data->task, task, icv);
424       team->implicit_task[i].icv.nthreads_var = nthreads_var;
425       start_data->thread_pool = pool;
426       start_data->nested = nested;
427 
428       if (gomp_cpu_affinity != NULL)
429 	gomp_init_thread_affinity (attr);
430 
431       err = pthread_create (&pt, attr, gomp_thread_start, start_data);
432       if (err != 0)
433 	gomp_fatal ("Thread creation failed: %s", strerror (err));
434     }
435 
436   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
437     pthread_attr_destroy (&thread_attr);
438 
439  do_release:
440   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
441 
442   /* Decrease the barrier threshold to match the number of threads
443      that should arrive back at the end of this team.  The extra
444      threads should be exiting.  Note that we arrange for this test
445      to never be true for nested teams.  */
446   if (__builtin_expect (nthreads < old_threads_used, 0))
447     {
448       long diff = (long) nthreads - (long) old_threads_used;
449 
450       gomp_barrier_reinit (&pool->threads_dock, nthreads);
451 
452 #ifdef HAVE_SYNC_BUILTINS
453       __sync_fetch_and_add (&gomp_managed_threads, diff);
454 #else
455       gomp_mutex_lock (&gomp_remaining_threads_lock);
456       gomp_managed_threads += diff;
457       gomp_mutex_unlock (&gomp_remaining_threads_lock);
458 #endif
459     }
460 }
461 
462 
463 /* Terminate the current team.  This is only to be called by the master
464    thread.  We assume that we must wait for the other threads.  */
465 
466 void
467 gomp_team_end (void)
468 {
469   struct gomp_thread *thr = gomp_thread ();
470   struct gomp_team *team = thr->ts.team;
471 
472   /* This barrier handles all pending explicit threads.  */
473   gomp_team_barrier_wait (&team->barrier);
474   gomp_fini_work_share (thr->ts.work_share);
475 
476   gomp_end_task ();
477   thr->ts = team->prev_ts;
478 
479   if (__builtin_expect (thr->ts.team != NULL, 0))
480     {
481 #ifdef HAVE_SYNC_BUILTINS
482       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
483 #else
484       gomp_mutex_lock (&gomp_remaining_threads_lock);
485       gomp_managed_threads -= team->nthreads - 1L;
486       gomp_mutex_unlock (&gomp_remaining_threads_lock);
487 #endif
488       /* This barrier has gomp_barrier_wait_last counterparts
489 	 and ensures the team can be safely destroyed.  */
490       gomp_barrier_wait (&team->barrier);
491     }
492 
493   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
494     {
495       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
496       do
497 	{
498 	  struct gomp_work_share *next_ws = ws->next_alloc;
499 	  free (ws);
500 	  ws = next_ws;
501 	}
502       while (ws != NULL);
503     }
504   gomp_sem_destroy (&team->master_release);
505 #ifndef HAVE_SYNC_BUILTINS
506   gomp_mutex_destroy (&team->work_share_list_free_lock);
507 #endif
508 
509   if (__builtin_expect (thr->ts.team != NULL, 0)
510       || __builtin_expect (team->nthreads == 1, 0))
511     free_team (team);
512   else
513     {
514       struct gomp_thread_pool *pool = thr->thread_pool;
515       if (pool->last_team)
516 	free_team (pool->last_team);
517       pool->last_team = team;
518     }
519 }
520 
521 
522 /* Constructors for this file.  */
523 
524 static void __attribute__((constructor))
525 initialize_team (void)
526 {
527   struct gomp_thread *thr;
528 
529 #ifndef HAVE_TLS
530   static struct gomp_thread initial_thread_tls_data;
531 
532   pthread_key_create (&gomp_tls_key, NULL);
533   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
534 #endif
535 
536   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
537     gomp_fatal ("could not create thread pool destructor.");
538 
539 #ifdef HAVE_TLS
540   thr = &gomp_tls_data;
541 #else
542   thr = &initial_thread_tls_data;
543 #endif
544   gomp_sem_init (&thr->release, 0);
545 }
546 
547 static void __attribute__((destructor))
548 team_destructor (void)
549 {
550   /* Without this dlclose on libgomp could lead to subsequent
551      crashes.  */
552   pthread_key_delete (gomp_thread_destructor);
553 }
554 
555 struct gomp_task_icv *
556 gomp_new_icv (void)
557 {
558   struct gomp_thread *thr = gomp_thread ();
559   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
560   gomp_init_task (task, NULL, &gomp_global_icv);
561   thr->task = task;
562   pthread_setspecific (gomp_thread_destructor, thr);
563   return &task->icv;
564 }
565