xref: /dragonfly/contrib/gcc-4.7/libgomp/team.c (revision 0ca59c34)
1 /* Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2012
2    Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>.
4 
5    This file is part of the GNU OpenMP Library (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintainence of threads in response to team
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 
33 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
34 pthread_attr_t gomp_thread_attr;
35 
36 /* This key is for the thread destructor.  */
37 pthread_key_t gomp_thread_destructor;
38 
39 
40 /* This is the libgomp per-thread data structure.  */
41 #ifdef HAVE_TLS
42 __thread struct gomp_thread gomp_tls_data;
43 #else
44 pthread_key_t gomp_tls_key;
45 #endif
46 
47 
48 /* This structure is used to communicate across pthread_create.  */
49 
50 struct gomp_thread_start_data
51 {
52   void (*fn) (void *);
53   void *fn_data;
54   struct gomp_team_state ts;
55   struct gomp_task *task;
56   struct gomp_thread_pool *thread_pool;
57   bool nested;
58 };
59 
60 
61 /* This function is a pthread_create entry point.  This contains the idle
62    loop in which a thread waits to be called up to become part of a team.  */
63 
64 static void *
65 gomp_thread_start (void *xdata)
66 {
67   struct gomp_thread_start_data *data = xdata;
68   struct gomp_thread *thr;
69   struct gomp_thread_pool *pool;
70   void (*local_fn) (void *);
71   void *local_data;
72 
73 #ifdef HAVE_TLS
74   thr = &gomp_tls_data;
75 #else
76   struct gomp_thread local_thr;
77   thr = &local_thr;
78   pthread_setspecific (gomp_tls_key, thr);
79 #endif
80   gomp_sem_init (&thr->release, 0);
81 
82   /* Extract what we need from data.  */
83   local_fn = data->fn;
84   local_data = data->fn_data;
85   thr->thread_pool = data->thread_pool;
86   thr->ts = data->ts;
87   thr->task = data->task;
88 
89   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
90 
91   /* Make thread pool local. */
92   pool = thr->thread_pool;
93 
94   if (data->nested)
95     {
96       struct gomp_team *team = thr->ts.team;
97       struct gomp_task *task = thr->task;
98 
99       gomp_barrier_wait (&team->barrier);
100 
101       local_fn (local_data);
102       gomp_team_barrier_wait (&team->barrier);
103       gomp_finish_task (task);
104       gomp_barrier_wait_last (&team->barrier);
105     }
106   else
107     {
108       pool->threads[thr->ts.team_id] = thr;
109 
110       gomp_barrier_wait (&pool->threads_dock);
111       do
112 	{
113 	  struct gomp_team *team = thr->ts.team;
114 	  struct gomp_task *task = thr->task;
115 
116 	  local_fn (local_data);
117 	  gomp_team_barrier_wait (&team->barrier);
118 	  gomp_finish_task (task);
119 
120 	  gomp_barrier_wait (&pool->threads_dock);
121 
122 	  local_fn = thr->fn;
123 	  local_data = thr->data;
124 	  thr->fn = NULL;
125 	}
126       while (local_fn);
127     }
128 
129   gomp_sem_destroy (&thr->release);
130   return NULL;
131 }
132 
133 
134 /* Create a new team data structure.  */
135 
136 struct gomp_team *
137 gomp_new_team (unsigned nthreads)
138 {
139   struct gomp_team *team;
140   size_t size;
141   int i;
142 
143   size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
144 				      + sizeof (team->implicit_task[0]));
145   team = gomp_malloc (size);
146 
147   team->work_share_chunk = 8;
148 #ifdef HAVE_SYNC_BUILTINS
149   team->single_count = 0;
150 #else
151   gomp_mutex_init (&team->work_share_list_free_lock);
152 #endif
153   gomp_init_work_share (&team->work_shares[0], false, nthreads);
154   team->work_shares[0].next_alloc = NULL;
155   team->work_share_list_free = NULL;
156   team->work_share_list_alloc = &team->work_shares[1];
157   for (i = 1; i < 7; i++)
158     team->work_shares[i].next_free = &team->work_shares[i + 1];
159   team->work_shares[i].next_free = NULL;
160 
161   team->nthreads = nthreads;
162   gomp_barrier_init (&team->barrier, nthreads);
163 
164   gomp_sem_init (&team->master_release, 0);
165   team->ordered_release = (void *) &team->implicit_task[nthreads];
166   team->ordered_release[0] = &team->master_release;
167 
168   gomp_mutex_init (&team->task_lock);
169   team->task_queue = NULL;
170   team->task_count = 0;
171   team->task_running_count = 0;
172 
173   return team;
174 }
175 
176 
177 /* Free a team data structure.  */
178 
179 static void
180 free_team (struct gomp_team *team)
181 {
182   gomp_barrier_destroy (&team->barrier);
183   gomp_mutex_destroy (&team->task_lock);
184   free (team);
185 }
186 
187 /* Allocate and initialize a thread pool. */
188 
189 static struct gomp_thread_pool *gomp_new_thread_pool (void)
190 {
191   struct gomp_thread_pool *pool
192     = gomp_malloc (sizeof(struct gomp_thread_pool));
193   pool->threads = NULL;
194   pool->threads_size = 0;
195   pool->threads_used = 0;
196   pool->last_team = NULL;
197   return pool;
198 }
199 
200 static void
201 gomp_free_pool_helper (void *thread_pool)
202 {
203   struct gomp_thread_pool *pool
204     = (struct gomp_thread_pool *) thread_pool;
205   gomp_barrier_wait_last (&pool->threads_dock);
206   gomp_sem_destroy (&gomp_thread ()->release);
207   pthread_exit (NULL);
208 }
209 
210 /* Free a thread pool and release its threads. */
211 
212 static void
213 gomp_free_thread (void *arg __attribute__((unused)))
214 {
215   struct gomp_thread *thr = gomp_thread ();
216   struct gomp_thread_pool *pool = thr->thread_pool;
217   if (pool)
218     {
219       if (pool->threads_used > 0)
220 	{
221 	  int i;
222 	  for (i = 1; i < pool->threads_used; i++)
223 	    {
224 	      struct gomp_thread *nthr = pool->threads[i];
225 	      nthr->fn = gomp_free_pool_helper;
226 	      nthr->data = pool;
227 	    }
228 	  /* This barrier undocks threads docked on pool->threads_dock.  */
229 	  gomp_barrier_wait (&pool->threads_dock);
230 	  /* And this waits till all threads have called gomp_barrier_wait_last
231 	     in gomp_free_pool_helper.  */
232 	  gomp_barrier_wait (&pool->threads_dock);
233 	  /* Now it is safe to destroy the barrier and free the pool.  */
234 	  gomp_barrier_destroy (&pool->threads_dock);
235 
236 #ifdef HAVE_SYNC_BUILTINS
237 	  __sync_fetch_and_add (&gomp_managed_threads,
238 				1L - pool->threads_used);
239 #else
240 	  gomp_mutex_lock (&gomp_remaining_threads_lock);
241 	  gomp_managed_threads -= pool->threads_used - 1L;
242 	  gomp_mutex_unlock (&gomp_remaining_threads_lock);
243 #endif
244 	}
245       free (pool->threads);
246       if (pool->last_team)
247 	free_team (pool->last_team);
248       free (pool);
249       thr->thread_pool = NULL;
250     }
251   if (thr->task != NULL)
252     {
253       struct gomp_task *task = thr->task;
254       gomp_end_task ();
255       free (task);
256     }
257 }
258 
259 /* Launch a team.  */
260 
261 void
262 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
263 		 struct gomp_team *team)
264 {
265   struct gomp_thread_start_data *start_data;
266   struct gomp_thread *thr, *nthr;
267   struct gomp_task *task;
268   struct gomp_task_icv *icv;
269   bool nested;
270   struct gomp_thread_pool *pool;
271   unsigned i, n, old_threads_used = 0;
272   pthread_attr_t thread_attr, *attr;
273   unsigned long nthreads_var;
274 
275   thr = gomp_thread ();
276   nested = thr->ts.team != NULL;
277   if (__builtin_expect (thr->thread_pool == NULL, 0))
278     {
279       thr->thread_pool = gomp_new_thread_pool ();
280       pthread_setspecific (gomp_thread_destructor, thr);
281     }
282   pool = thr->thread_pool;
283   task = thr->task;
284   icv = task ? &task->icv : &gomp_global_icv;
285 
286   /* Always save the previous state, even if this isn't a nested team.
287      In particular, we should save any work share state from an outer
288      orphaned work share construct.  */
289   team->prev_ts = thr->ts;
290 
291   thr->ts.team = team;
292   thr->ts.team_id = 0;
293   ++thr->ts.level;
294   if (nthreads > 1)
295     ++thr->ts.active_level;
296   thr->ts.work_share = &team->work_shares[0];
297   thr->ts.last_work_share = NULL;
298 #ifdef HAVE_SYNC_BUILTINS
299   thr->ts.single_count = 0;
300 #endif
301   thr->ts.static_trip = 0;
302   thr->task = &team->implicit_task[0];
303   nthreads_var = icv->nthreads_var;
304   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
305       && thr->ts.level < gomp_nthreads_var_list_len)
306     nthreads_var = gomp_nthreads_var_list[thr->ts.level];
307   gomp_init_task (thr->task, task, icv);
308   team->implicit_task[0].icv.nthreads_var = nthreads_var;
309 
310   if (nthreads == 1)
311     return;
312 
313   i = 1;
314 
315   /* We only allow the reuse of idle threads for non-nested PARALLEL
316      regions.  This appears to be implied by the semantics of
317      threadprivate variables, but perhaps that's reading too much into
318      things.  Certainly it does prevent any locking problems, since
319      only the initial program thread will modify gomp_threads.  */
320   if (!nested)
321     {
322       old_threads_used = pool->threads_used;
323 
324       if (nthreads <= old_threads_used)
325 	n = nthreads;
326       else if (old_threads_used == 0)
327 	{
328 	  n = 0;
329 	  gomp_barrier_init (&pool->threads_dock, nthreads);
330 	}
331       else
332 	{
333 	  n = old_threads_used;
334 
335 	  /* Increase the barrier threshold to make sure all new
336 	     threads arrive before the team is released.  */
337 	  gomp_barrier_reinit (&pool->threads_dock, nthreads);
338 	}
339 
340       /* Not true yet, but soon will be.  We're going to release all
341 	 threads from the dock, and those that aren't part of the
342 	 team will exit.  */
343       pool->threads_used = nthreads;
344 
345       /* Release existing idle threads.  */
346       for (; i < n; ++i)
347 	{
348 	  nthr = pool->threads[i];
349 	  nthr->ts.team = team;
350 	  nthr->ts.work_share = &team->work_shares[0];
351 	  nthr->ts.last_work_share = NULL;
352 	  nthr->ts.team_id = i;
353 	  nthr->ts.level = team->prev_ts.level + 1;
354 	  nthr->ts.active_level = thr->ts.active_level;
355 #ifdef HAVE_SYNC_BUILTINS
356 	  nthr->ts.single_count = 0;
357 #endif
358 	  nthr->ts.static_trip = 0;
359 	  nthr->task = &team->implicit_task[i];
360 	  gomp_init_task (nthr->task, task, icv);
361 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
362 	  nthr->fn = fn;
363 	  nthr->data = data;
364 	  team->ordered_release[i] = &nthr->release;
365 	}
366 
367       if (i == nthreads)
368 	goto do_release;
369 
370       /* If necessary, expand the size of the gomp_threads array.  It is
371 	 expected that changes in the number of threads are rare, thus we
372 	 make no effort to expand gomp_threads_size geometrically.  */
373       if (nthreads >= pool->threads_size)
374 	{
375 	  pool->threads_size = nthreads + 1;
376 	  pool->threads
377 	    = gomp_realloc (pool->threads,
378 			    pool->threads_size
379 			    * sizeof (struct gomp_thread_data *));
380 	}
381     }
382 
383   if (__builtin_expect (nthreads > old_threads_used, 0))
384     {
385       long diff = (long) nthreads - (long) old_threads_used;
386 
387       if (old_threads_used == 0)
388 	--diff;
389 
390 #ifdef HAVE_SYNC_BUILTINS
391       __sync_fetch_and_add (&gomp_managed_threads, diff);
392 #else
393       gomp_mutex_lock (&gomp_remaining_threads_lock);
394       gomp_managed_threads += diff;
395       gomp_mutex_unlock (&gomp_remaining_threads_lock);
396 #endif
397     }
398 
399   attr = &gomp_thread_attr;
400   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
401     {
402       size_t stacksize;
403       pthread_attr_init (&thread_attr);
404       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
405       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
406 	pthread_attr_setstacksize (&thread_attr, stacksize);
407       attr = &thread_attr;
408     }
409 
410   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
411 			    * (nthreads-i));
412 
413   /* Launch new threads.  */
414   for (; i < nthreads; ++i, ++start_data)
415     {
416       pthread_t pt;
417       int err;
418 
419       start_data->fn = fn;
420       start_data->fn_data = data;
421       start_data->ts.team = team;
422       start_data->ts.work_share = &team->work_shares[0];
423       start_data->ts.last_work_share = NULL;
424       start_data->ts.team_id = i;
425       start_data->ts.level = team->prev_ts.level + 1;
426       start_data->ts.active_level = thr->ts.active_level;
427 #ifdef HAVE_SYNC_BUILTINS
428       start_data->ts.single_count = 0;
429 #endif
430       start_data->ts.static_trip = 0;
431       start_data->task = &team->implicit_task[i];
432       gomp_init_task (start_data->task, task, icv);
433       team->implicit_task[i].icv.nthreads_var = nthreads_var;
434       start_data->thread_pool = pool;
435       start_data->nested = nested;
436 
437       if (gomp_cpu_affinity != NULL)
438 	gomp_init_thread_affinity (attr);
439 
440       err = pthread_create (&pt, attr, gomp_thread_start, start_data);
441       if (err != 0)
442 	gomp_fatal ("Thread creation failed: %s", strerror (err));
443     }
444 
445   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
446     pthread_attr_destroy (&thread_attr);
447 
448  do_release:
449   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
450 
451   /* Decrease the barrier threshold to match the number of threads
452      that should arrive back at the end of this team.  The extra
453      threads should be exiting.  Note that we arrange for this test
454      to never be true for nested teams.  */
455   if (__builtin_expect (nthreads < old_threads_used, 0))
456     {
457       long diff = (long) nthreads - (long) old_threads_used;
458 
459       gomp_barrier_reinit (&pool->threads_dock, nthreads);
460 
461 #ifdef HAVE_SYNC_BUILTINS
462       __sync_fetch_and_add (&gomp_managed_threads, diff);
463 #else
464       gomp_mutex_lock (&gomp_remaining_threads_lock);
465       gomp_managed_threads += diff;
466       gomp_mutex_unlock (&gomp_remaining_threads_lock);
467 #endif
468     }
469 }
470 
471 
472 /* Terminate the current team.  This is only to be called by the master
473    thread.  We assume that we must wait for the other threads.  */
474 
475 void
476 gomp_team_end (void)
477 {
478   struct gomp_thread *thr = gomp_thread ();
479   struct gomp_team *team = thr->ts.team;
480 
481   /* This barrier handles all pending explicit threads.  */
482   gomp_team_barrier_wait (&team->barrier);
483   gomp_fini_work_share (thr->ts.work_share);
484 
485   gomp_end_task ();
486   thr->ts = team->prev_ts;
487 
488   if (__builtin_expect (thr->ts.team != NULL, 0))
489     {
490 #ifdef HAVE_SYNC_BUILTINS
491       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
492 #else
493       gomp_mutex_lock (&gomp_remaining_threads_lock);
494       gomp_managed_threads -= team->nthreads - 1L;
495       gomp_mutex_unlock (&gomp_remaining_threads_lock);
496 #endif
497       /* This barrier has gomp_barrier_wait_last counterparts
498 	 and ensures the team can be safely destroyed.  */
499       gomp_barrier_wait (&team->barrier);
500     }
501 
502   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
503     {
504       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
505       do
506 	{
507 	  struct gomp_work_share *next_ws = ws->next_alloc;
508 	  free (ws);
509 	  ws = next_ws;
510 	}
511       while (ws != NULL);
512     }
513   gomp_sem_destroy (&team->master_release);
514 #ifndef HAVE_SYNC_BUILTINS
515   gomp_mutex_destroy (&team->work_share_list_free_lock);
516 #endif
517 
518   if (__builtin_expect (thr->ts.team != NULL, 0)
519       || __builtin_expect (team->nthreads == 1, 0))
520     free_team (team);
521   else
522     {
523       struct gomp_thread_pool *pool = thr->thread_pool;
524       if (pool->last_team)
525 	free_team (pool->last_team);
526       pool->last_team = team;
527     }
528 }
529 
530 
531 /* Constructors for this file.  */
532 
533 static void __attribute__((constructor))
534 initialize_team (void)
535 {
536   struct gomp_thread *thr;
537 
538 #ifndef HAVE_TLS
539   static struct gomp_thread initial_thread_tls_data;
540 
541   pthread_key_create (&gomp_tls_key, NULL);
542   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
543 #endif
544 
545   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
546     gomp_fatal ("could not create thread pool destructor.");
547 
548 #ifdef HAVE_TLS
549   thr = &gomp_tls_data;
550 #else
551   thr = &initial_thread_tls_data;
552 #endif
553   gomp_sem_init (&thr->release, 0);
554 }
555 
556 static void __attribute__((destructor))
557 team_destructor (void)
558 {
559   /* Without this dlclose on libgomp could lead to subsequent
560      crashes.  */
561   pthread_key_delete (gomp_thread_destructor);
562 }
563 
564 struct gomp_task_icv *
565 gomp_new_icv (void)
566 {
567   struct gomp_thread *thr = gomp_thread ();
568   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
569   gomp_init_task (task, NULL, &gomp_global_icv);
570   thr->task = task;
571   pthread_setspecific (gomp_thread_destructor, thr);
572   return &task->icv;
573 }
574