1 /* Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2012 2 Free Software Foundation, Inc. 3 Contributed by Richard Henderson <rth@redhat.com>. 4 5 This file is part of the GNU OpenMP Library (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 /* This file handles the maintainence of threads in response to team 27 creation and termination. */ 28 29 #include "libgomp.h" 30 #include <stdlib.h> 31 #include <string.h> 32 33 /* This attribute contains PTHREAD_CREATE_DETACHED. */ 34 pthread_attr_t gomp_thread_attr; 35 36 /* This key is for the thread destructor. */ 37 pthread_key_t gomp_thread_destructor; 38 39 40 /* This is the libgomp per-thread data structure. */ 41 #ifdef HAVE_TLS 42 __thread struct gomp_thread gomp_tls_data; 43 #else 44 pthread_key_t gomp_tls_key; 45 #endif 46 47 48 /* This structure is used to communicate across pthread_create. */ 49 50 struct gomp_thread_start_data 51 { 52 void (*fn) (void *); 53 void *fn_data; 54 struct gomp_team_state ts; 55 struct gomp_task *task; 56 struct gomp_thread_pool *thread_pool; 57 bool nested; 58 }; 59 60 61 /* This function is a pthread_create entry point. This contains the idle 62 loop in which a thread waits to be called up to become part of a team. */ 63 64 static void * 65 gomp_thread_start (void *xdata) 66 { 67 struct gomp_thread_start_data *data = xdata; 68 struct gomp_thread *thr; 69 struct gomp_thread_pool *pool; 70 void (*local_fn) (void *); 71 void *local_data; 72 73 #ifdef HAVE_TLS 74 thr = &gomp_tls_data; 75 #else 76 struct gomp_thread local_thr; 77 thr = &local_thr; 78 pthread_setspecific (gomp_tls_key, thr); 79 #endif 80 gomp_sem_init (&thr->release, 0); 81 82 /* Extract what we need from data. */ 83 local_fn = data->fn; 84 local_data = data->fn_data; 85 thr->thread_pool = data->thread_pool; 86 thr->ts = data->ts; 87 thr->task = data->task; 88 89 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; 90 91 /* Make thread pool local. */ 92 pool = thr->thread_pool; 93 94 if (data->nested) 95 { 96 struct gomp_team *team = thr->ts.team; 97 struct gomp_task *task = thr->task; 98 99 gomp_barrier_wait (&team->barrier); 100 101 local_fn (local_data); 102 gomp_team_barrier_wait (&team->barrier); 103 gomp_finish_task (task); 104 gomp_barrier_wait_last (&team->barrier); 105 } 106 else 107 { 108 pool->threads[thr->ts.team_id] = thr; 109 110 gomp_barrier_wait (&pool->threads_dock); 111 do 112 { 113 struct gomp_team *team = thr->ts.team; 114 struct gomp_task *task = thr->task; 115 116 local_fn (local_data); 117 gomp_team_barrier_wait (&team->barrier); 118 gomp_finish_task (task); 119 120 gomp_barrier_wait (&pool->threads_dock); 121 122 local_fn = thr->fn; 123 local_data = thr->data; 124 thr->fn = NULL; 125 } 126 while (local_fn); 127 } 128 129 gomp_sem_destroy (&thr->release); 130 return NULL; 131 } 132 133 134 /* Create a new team data structure. */ 135 136 struct gomp_team * 137 gomp_new_team (unsigned nthreads) 138 { 139 struct gomp_team *team; 140 size_t size; 141 int i; 142 143 size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0]) 144 + sizeof (team->implicit_task[0])); 145 team = gomp_malloc (size); 146 147 team->work_share_chunk = 8; 148 #ifdef HAVE_SYNC_BUILTINS 149 team->single_count = 0; 150 #else 151 gomp_mutex_init (&team->work_share_list_free_lock); 152 #endif 153 gomp_init_work_share (&team->work_shares[0], false, nthreads); 154 team->work_shares[0].next_alloc = NULL; 155 team->work_share_list_free = NULL; 156 team->work_share_list_alloc = &team->work_shares[1]; 157 for (i = 1; i < 7; i++) 158 team->work_shares[i].next_free = &team->work_shares[i + 1]; 159 team->work_shares[i].next_free = NULL; 160 161 team->nthreads = nthreads; 162 gomp_barrier_init (&team->barrier, nthreads); 163 164 gomp_sem_init (&team->master_release, 0); 165 team->ordered_release = (void *) &team->implicit_task[nthreads]; 166 team->ordered_release[0] = &team->master_release; 167 168 gomp_mutex_init (&team->task_lock); 169 team->task_queue = NULL; 170 team->task_count = 0; 171 team->task_running_count = 0; 172 173 return team; 174 } 175 176 177 /* Free a team data structure. */ 178 179 static void 180 free_team (struct gomp_team *team) 181 { 182 gomp_barrier_destroy (&team->barrier); 183 gomp_mutex_destroy (&team->task_lock); 184 free (team); 185 } 186 187 /* Allocate and initialize a thread pool. */ 188 189 static struct gomp_thread_pool *gomp_new_thread_pool (void) 190 { 191 struct gomp_thread_pool *pool 192 = gomp_malloc (sizeof(struct gomp_thread_pool)); 193 pool->threads = NULL; 194 pool->threads_size = 0; 195 pool->threads_used = 0; 196 pool->last_team = NULL; 197 return pool; 198 } 199 200 static void 201 gomp_free_pool_helper (void *thread_pool) 202 { 203 struct gomp_thread_pool *pool 204 = (struct gomp_thread_pool *) thread_pool; 205 gomp_barrier_wait_last (&pool->threads_dock); 206 gomp_sem_destroy (&gomp_thread ()->release); 207 pthread_exit (NULL); 208 } 209 210 /* Free a thread pool and release its threads. */ 211 212 static void 213 gomp_free_thread (void *arg __attribute__((unused))) 214 { 215 struct gomp_thread *thr = gomp_thread (); 216 struct gomp_thread_pool *pool = thr->thread_pool; 217 if (pool) 218 { 219 if (pool->threads_used > 0) 220 { 221 int i; 222 for (i = 1; i < pool->threads_used; i++) 223 { 224 struct gomp_thread *nthr = pool->threads[i]; 225 nthr->fn = gomp_free_pool_helper; 226 nthr->data = pool; 227 } 228 /* This barrier undocks threads docked on pool->threads_dock. */ 229 gomp_barrier_wait (&pool->threads_dock); 230 /* And this waits till all threads have called gomp_barrier_wait_last 231 in gomp_free_pool_helper. */ 232 gomp_barrier_wait (&pool->threads_dock); 233 /* Now it is safe to destroy the barrier and free the pool. */ 234 gomp_barrier_destroy (&pool->threads_dock); 235 236 #ifdef HAVE_SYNC_BUILTINS 237 __sync_fetch_and_add (&gomp_managed_threads, 238 1L - pool->threads_used); 239 #else 240 gomp_mutex_lock (&gomp_remaining_threads_lock); 241 gomp_managed_threads -= pool->threads_used - 1L; 242 gomp_mutex_unlock (&gomp_remaining_threads_lock); 243 #endif 244 } 245 free (pool->threads); 246 if (pool->last_team) 247 free_team (pool->last_team); 248 free (pool); 249 thr->thread_pool = NULL; 250 } 251 if (thr->task != NULL) 252 { 253 struct gomp_task *task = thr->task; 254 gomp_end_task (); 255 free (task); 256 } 257 } 258 259 /* Launch a team. */ 260 261 void 262 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 263 struct gomp_team *team) 264 { 265 struct gomp_thread_start_data *start_data; 266 struct gomp_thread *thr, *nthr; 267 struct gomp_task *task; 268 struct gomp_task_icv *icv; 269 bool nested; 270 struct gomp_thread_pool *pool; 271 unsigned i, n, old_threads_used = 0; 272 pthread_attr_t thread_attr, *attr; 273 unsigned long nthreads_var; 274 275 thr = gomp_thread (); 276 nested = thr->ts.team != NULL; 277 if (__builtin_expect (thr->thread_pool == NULL, 0)) 278 { 279 thr->thread_pool = gomp_new_thread_pool (); 280 pthread_setspecific (gomp_thread_destructor, thr); 281 } 282 pool = thr->thread_pool; 283 task = thr->task; 284 icv = task ? &task->icv : &gomp_global_icv; 285 286 /* Always save the previous state, even if this isn't a nested team. 287 In particular, we should save any work share state from an outer 288 orphaned work share construct. */ 289 team->prev_ts = thr->ts; 290 291 thr->ts.team = team; 292 thr->ts.team_id = 0; 293 ++thr->ts.level; 294 if (nthreads > 1) 295 ++thr->ts.active_level; 296 thr->ts.work_share = &team->work_shares[0]; 297 thr->ts.last_work_share = NULL; 298 #ifdef HAVE_SYNC_BUILTINS 299 thr->ts.single_count = 0; 300 #endif 301 thr->ts.static_trip = 0; 302 thr->task = &team->implicit_task[0]; 303 nthreads_var = icv->nthreads_var; 304 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) 305 && thr->ts.level < gomp_nthreads_var_list_len) 306 nthreads_var = gomp_nthreads_var_list[thr->ts.level]; 307 gomp_init_task (thr->task, task, icv); 308 team->implicit_task[0].icv.nthreads_var = nthreads_var; 309 310 if (nthreads == 1) 311 return; 312 313 i = 1; 314 315 /* We only allow the reuse of idle threads for non-nested PARALLEL 316 regions. This appears to be implied by the semantics of 317 threadprivate variables, but perhaps that's reading too much into 318 things. Certainly it does prevent any locking problems, since 319 only the initial program thread will modify gomp_threads. */ 320 if (!nested) 321 { 322 old_threads_used = pool->threads_used; 323 324 if (nthreads <= old_threads_used) 325 n = nthreads; 326 else if (old_threads_used == 0) 327 { 328 n = 0; 329 gomp_barrier_init (&pool->threads_dock, nthreads); 330 } 331 else 332 { 333 n = old_threads_used; 334 335 /* Increase the barrier threshold to make sure all new 336 threads arrive before the team is released. */ 337 gomp_barrier_reinit (&pool->threads_dock, nthreads); 338 } 339 340 /* Not true yet, but soon will be. We're going to release all 341 threads from the dock, and those that aren't part of the 342 team will exit. */ 343 pool->threads_used = nthreads; 344 345 /* Release existing idle threads. */ 346 for (; i < n; ++i) 347 { 348 nthr = pool->threads[i]; 349 nthr->ts.team = team; 350 nthr->ts.work_share = &team->work_shares[0]; 351 nthr->ts.last_work_share = NULL; 352 nthr->ts.team_id = i; 353 nthr->ts.level = team->prev_ts.level + 1; 354 nthr->ts.active_level = thr->ts.active_level; 355 #ifdef HAVE_SYNC_BUILTINS 356 nthr->ts.single_count = 0; 357 #endif 358 nthr->ts.static_trip = 0; 359 nthr->task = &team->implicit_task[i]; 360 gomp_init_task (nthr->task, task, icv); 361 team->implicit_task[i].icv.nthreads_var = nthreads_var; 362 nthr->fn = fn; 363 nthr->data = data; 364 team->ordered_release[i] = &nthr->release; 365 } 366 367 if (i == nthreads) 368 goto do_release; 369 370 /* If necessary, expand the size of the gomp_threads array. It is 371 expected that changes in the number of threads are rare, thus we 372 make no effort to expand gomp_threads_size geometrically. */ 373 if (nthreads >= pool->threads_size) 374 { 375 pool->threads_size = nthreads + 1; 376 pool->threads 377 = gomp_realloc (pool->threads, 378 pool->threads_size 379 * sizeof (struct gomp_thread_data *)); 380 } 381 } 382 383 if (__builtin_expect (nthreads > old_threads_used, 0)) 384 { 385 long diff = (long) nthreads - (long) old_threads_used; 386 387 if (old_threads_used == 0) 388 --diff; 389 390 #ifdef HAVE_SYNC_BUILTINS 391 __sync_fetch_and_add (&gomp_managed_threads, diff); 392 #else 393 gomp_mutex_lock (&gomp_remaining_threads_lock); 394 gomp_managed_threads += diff; 395 gomp_mutex_unlock (&gomp_remaining_threads_lock); 396 #endif 397 } 398 399 attr = &gomp_thread_attr; 400 if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) 401 { 402 size_t stacksize; 403 pthread_attr_init (&thread_attr); 404 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED); 405 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) 406 pthread_attr_setstacksize (&thread_attr, stacksize); 407 attr = &thread_attr; 408 } 409 410 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) 411 * (nthreads-i)); 412 413 /* Launch new threads. */ 414 for (; i < nthreads; ++i, ++start_data) 415 { 416 pthread_t pt; 417 int err; 418 419 start_data->fn = fn; 420 start_data->fn_data = data; 421 start_data->ts.team = team; 422 start_data->ts.work_share = &team->work_shares[0]; 423 start_data->ts.last_work_share = NULL; 424 start_data->ts.team_id = i; 425 start_data->ts.level = team->prev_ts.level + 1; 426 start_data->ts.active_level = thr->ts.active_level; 427 #ifdef HAVE_SYNC_BUILTINS 428 start_data->ts.single_count = 0; 429 #endif 430 start_data->ts.static_trip = 0; 431 start_data->task = &team->implicit_task[i]; 432 gomp_init_task (start_data->task, task, icv); 433 team->implicit_task[i].icv.nthreads_var = nthreads_var; 434 start_data->thread_pool = pool; 435 start_data->nested = nested; 436 437 if (gomp_cpu_affinity != NULL) 438 gomp_init_thread_affinity (attr); 439 440 err = pthread_create (&pt, attr, gomp_thread_start, start_data); 441 if (err != 0) 442 gomp_fatal ("Thread creation failed: %s", strerror (err)); 443 } 444 445 if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) 446 pthread_attr_destroy (&thread_attr); 447 448 do_release: 449 gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock); 450 451 /* Decrease the barrier threshold to match the number of threads 452 that should arrive back at the end of this team. The extra 453 threads should be exiting. Note that we arrange for this test 454 to never be true for nested teams. */ 455 if (__builtin_expect (nthreads < old_threads_used, 0)) 456 { 457 long diff = (long) nthreads - (long) old_threads_used; 458 459 gomp_barrier_reinit (&pool->threads_dock, nthreads); 460 461 #ifdef HAVE_SYNC_BUILTINS 462 __sync_fetch_and_add (&gomp_managed_threads, diff); 463 #else 464 gomp_mutex_lock (&gomp_remaining_threads_lock); 465 gomp_managed_threads += diff; 466 gomp_mutex_unlock (&gomp_remaining_threads_lock); 467 #endif 468 } 469 } 470 471 472 /* Terminate the current team. This is only to be called by the master 473 thread. We assume that we must wait for the other threads. */ 474 475 void 476 gomp_team_end (void) 477 { 478 struct gomp_thread *thr = gomp_thread (); 479 struct gomp_team *team = thr->ts.team; 480 481 /* This barrier handles all pending explicit threads. */ 482 gomp_team_barrier_wait (&team->barrier); 483 gomp_fini_work_share (thr->ts.work_share); 484 485 gomp_end_task (); 486 thr->ts = team->prev_ts; 487 488 if (__builtin_expect (thr->ts.team != NULL, 0)) 489 { 490 #ifdef HAVE_SYNC_BUILTINS 491 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); 492 #else 493 gomp_mutex_lock (&gomp_remaining_threads_lock); 494 gomp_managed_threads -= team->nthreads - 1L; 495 gomp_mutex_unlock (&gomp_remaining_threads_lock); 496 #endif 497 /* This barrier has gomp_barrier_wait_last counterparts 498 and ensures the team can be safely destroyed. */ 499 gomp_barrier_wait (&team->barrier); 500 } 501 502 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) 503 { 504 struct gomp_work_share *ws = team->work_shares[0].next_alloc; 505 do 506 { 507 struct gomp_work_share *next_ws = ws->next_alloc; 508 free (ws); 509 ws = next_ws; 510 } 511 while (ws != NULL); 512 } 513 gomp_sem_destroy (&team->master_release); 514 #ifndef HAVE_SYNC_BUILTINS 515 gomp_mutex_destroy (&team->work_share_list_free_lock); 516 #endif 517 518 if (__builtin_expect (thr->ts.team != NULL, 0) 519 || __builtin_expect (team->nthreads == 1, 0)) 520 free_team (team); 521 else 522 { 523 struct gomp_thread_pool *pool = thr->thread_pool; 524 if (pool->last_team) 525 free_team (pool->last_team); 526 pool->last_team = team; 527 } 528 } 529 530 531 /* Constructors for this file. */ 532 533 static void __attribute__((constructor)) 534 initialize_team (void) 535 { 536 struct gomp_thread *thr; 537 538 #ifndef HAVE_TLS 539 static struct gomp_thread initial_thread_tls_data; 540 541 pthread_key_create (&gomp_tls_key, NULL); 542 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); 543 #endif 544 545 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) 546 gomp_fatal ("could not create thread pool destructor."); 547 548 #ifdef HAVE_TLS 549 thr = &gomp_tls_data; 550 #else 551 thr = &initial_thread_tls_data; 552 #endif 553 gomp_sem_init (&thr->release, 0); 554 } 555 556 static void __attribute__((destructor)) 557 team_destructor (void) 558 { 559 /* Without this dlclose on libgomp could lead to subsequent 560 crashes. */ 561 pthread_key_delete (gomp_thread_destructor); 562 } 563 564 struct gomp_task_icv * 565 gomp_new_icv (void) 566 { 567 struct gomp_thread *thr = gomp_thread (); 568 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); 569 gomp_init_task (task, NULL, &gomp_global_icv); 570 thr->task = task; 571 pthread_setspecific (gomp_thread_destructor, thr); 572 return &task->icv; 573 } 574