1 /* Copyright (C) 2005-2018 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 /* This file handles the maintainence of threads in response to team 27 creation and termination. */ 28 29 #include "libgomp.h" 30 #include "pool.h" 31 #include <stdlib.h> 32 #include <string.h> 33 34 #ifdef LIBGOMP_USE_PTHREADS 35 /* This attribute contains PTHREAD_CREATE_DETACHED. */ 36 pthread_attr_t gomp_thread_attr; 37 38 /* This key is for the thread destructor. */ 39 pthread_key_t gomp_thread_destructor; 40 41 42 /* This is the libgomp per-thread data structure. */ 43 #if defined HAVE_TLS || defined USE_EMUTLS 44 __thread struct gomp_thread gomp_tls_data; 45 #else 46 pthread_key_t gomp_tls_key; 47 #endif 48 49 50 /* This structure is used to communicate across pthread_create. */ 51 52 struct gomp_thread_start_data 53 { 54 void (*fn) (void *); 55 void *fn_data; 56 struct gomp_team_state ts; 57 struct gomp_task *task; 58 struct gomp_thread_pool *thread_pool; 59 unsigned int place; 60 bool nested; 61 }; 62 63 64 /* This function is a pthread_create entry point. This contains the idle 65 loop in which a thread waits to be called up to become part of a team. */ 66 67 static void * 68 gomp_thread_start (void *xdata) 69 { 70 struct gomp_thread_start_data *data = xdata; 71 struct gomp_thread *thr; 72 struct gomp_thread_pool *pool; 73 void (*local_fn) (void *); 74 void *local_data; 75 76 #if defined HAVE_TLS || defined USE_EMUTLS 77 thr = &gomp_tls_data; 78 #else 79 struct gomp_thread local_thr; 80 thr = &local_thr; 81 pthread_setspecific (gomp_tls_key, thr); 82 #endif 83 gomp_sem_init (&thr->release, 0); 84 85 /* Extract what we need from data. */ 86 local_fn = data->fn; 87 local_data = data->fn_data; 88 thr->thread_pool = data->thread_pool; 89 thr->ts = data->ts; 90 thr->task = data->task; 91 thr->place = data->place; 92 93 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; 94 95 /* Make thread pool local. */ 96 pool = thr->thread_pool; 97 98 if (data->nested) 99 { 100 struct gomp_team *team = thr->ts.team; 101 struct gomp_task *task = thr->task; 102 103 gomp_barrier_wait (&team->barrier); 104 105 local_fn (local_data); 106 gomp_team_barrier_wait_final (&team->barrier); 107 gomp_finish_task (task); 108 gomp_barrier_wait_last (&team->barrier); 109 } 110 else 111 { 112 pool->threads[thr->ts.team_id] = thr; 113 114 gomp_simple_barrier_wait (&pool->threads_dock); 115 do 116 { 117 struct gomp_team *team = thr->ts.team; 118 struct gomp_task *task = thr->task; 119 120 local_fn (local_data); 121 gomp_team_barrier_wait_final (&team->barrier); 122 gomp_finish_task (task); 123 124 gomp_simple_barrier_wait (&pool->threads_dock); 125 126 local_fn = thr->fn; 127 local_data = thr->data; 128 thr->fn = NULL; 129 } 130 while (local_fn); 131 } 132 133 gomp_sem_destroy (&thr->release); 134 thr->thread_pool = NULL; 135 thr->task = NULL; 136 return NULL; 137 } 138 #endif 139 140 static inline struct gomp_team * 141 get_last_team (unsigned nthreads) 142 { 143 struct gomp_thread *thr = gomp_thread (); 144 if (thr->ts.team == NULL) 145 { 146 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads); 147 struct gomp_team *last_team = pool->last_team; 148 if (last_team != NULL && last_team->nthreads == nthreads) 149 { 150 pool->last_team = NULL; 151 return last_team; 152 } 153 } 154 return NULL; 155 } 156 157 /* Create a new team data structure. */ 158 159 struct gomp_team * 160 gomp_new_team (unsigned nthreads) 161 { 162 struct gomp_team *team; 163 int i; 164 165 team = get_last_team (nthreads); 166 if (team == NULL) 167 { 168 size_t extra = sizeof (team->ordered_release[0]) 169 + sizeof (team->implicit_task[0]); 170 team = gomp_malloc (sizeof (*team) + nthreads * extra); 171 172 #ifndef HAVE_SYNC_BUILTINS 173 gomp_mutex_init (&team->work_share_list_free_lock); 174 #endif 175 gomp_barrier_init (&team->barrier, nthreads); 176 gomp_mutex_init (&team->task_lock); 177 178 team->nthreads = nthreads; 179 } 180 181 team->work_share_chunk = 8; 182 #ifdef HAVE_SYNC_BUILTINS 183 team->single_count = 0; 184 #endif 185 team->work_shares_to_free = &team->work_shares[0]; 186 gomp_init_work_share (&team->work_shares[0], false, nthreads); 187 team->work_shares[0].next_alloc = NULL; 188 team->work_share_list_free = NULL; 189 team->work_share_list_alloc = &team->work_shares[1]; 190 for (i = 1; i < 7; i++) 191 team->work_shares[i].next_free = &team->work_shares[i + 1]; 192 team->work_shares[i].next_free = NULL; 193 194 gomp_sem_init (&team->master_release, 0); 195 team->ordered_release = (void *) &team->implicit_task[nthreads]; 196 team->ordered_release[0] = &team->master_release; 197 198 priority_queue_init (&team->task_queue); 199 team->task_count = 0; 200 team->task_queued_count = 0; 201 team->task_running_count = 0; 202 team->work_share_cancelled = 0; 203 team->team_cancelled = 0; 204 205 return team; 206 } 207 208 209 /* Free a team data structure. */ 210 211 static void 212 free_team (struct gomp_team *team) 213 { 214 #ifndef HAVE_SYNC_BUILTINS 215 gomp_mutex_destroy (&team->work_share_list_free_lock); 216 #endif 217 gomp_barrier_destroy (&team->barrier); 218 gomp_mutex_destroy (&team->task_lock); 219 priority_queue_free (&team->task_queue); 220 free (team); 221 } 222 223 static void 224 gomp_free_pool_helper (void *thread_pool) 225 { 226 struct gomp_thread *thr = gomp_thread (); 227 struct gomp_thread_pool *pool 228 = (struct gomp_thread_pool *) thread_pool; 229 gomp_simple_barrier_wait_last (&pool->threads_dock); 230 gomp_sem_destroy (&thr->release); 231 thr->thread_pool = NULL; 232 thr->task = NULL; 233 #ifdef LIBGOMP_USE_PTHREADS 234 pthread_exit (NULL); 235 #elif defined(__nvptx__) 236 asm ("exit;"); 237 #else 238 #error gomp_free_pool_helper must terminate the thread 239 #endif 240 } 241 242 /* Free a thread pool and release its threads. */ 243 244 void 245 gomp_free_thread (void *arg __attribute__((unused))) 246 { 247 struct gomp_thread *thr = gomp_thread (); 248 struct gomp_thread_pool *pool = thr->thread_pool; 249 if (pool) 250 { 251 if (pool->threads_used > 0) 252 { 253 int i; 254 for (i = 1; i < pool->threads_used; i++) 255 { 256 struct gomp_thread *nthr = pool->threads[i]; 257 nthr->fn = gomp_free_pool_helper; 258 nthr->data = pool; 259 } 260 /* This barrier undocks threads docked on pool->threads_dock. */ 261 gomp_simple_barrier_wait (&pool->threads_dock); 262 /* And this waits till all threads have called gomp_barrier_wait_last 263 in gomp_free_pool_helper. */ 264 gomp_simple_barrier_wait (&pool->threads_dock); 265 /* Now it is safe to destroy the barrier and free the pool. */ 266 gomp_simple_barrier_destroy (&pool->threads_dock); 267 268 #ifdef HAVE_SYNC_BUILTINS 269 __sync_fetch_and_add (&gomp_managed_threads, 270 1L - pool->threads_used); 271 #else 272 gomp_mutex_lock (&gomp_managed_threads_lock); 273 gomp_managed_threads -= pool->threads_used - 1L; 274 gomp_mutex_unlock (&gomp_managed_threads_lock); 275 #endif 276 } 277 if (pool->last_team) 278 free_team (pool->last_team); 279 #ifndef __nvptx__ 280 free (pool->threads); 281 free (pool); 282 #endif 283 thr->thread_pool = NULL; 284 } 285 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) 286 gomp_team_end (); 287 if (thr->task != NULL) 288 { 289 struct gomp_task *task = thr->task; 290 gomp_end_task (); 291 free (task); 292 } 293 } 294 295 /* Launch a team. */ 296 297 #ifdef LIBGOMP_USE_PTHREADS 298 void 299 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 300 unsigned flags, struct gomp_team *team) 301 { 302 struct gomp_thread_start_data *start_data; 303 struct gomp_thread *thr, *nthr; 304 struct gomp_task *task; 305 struct gomp_task_icv *icv; 306 bool nested; 307 struct gomp_thread_pool *pool; 308 unsigned i, n, old_threads_used = 0; 309 pthread_attr_t thread_attr, *attr; 310 unsigned long nthreads_var; 311 char bind, bind_var; 312 unsigned int s = 0, rest = 0, p = 0, k = 0; 313 unsigned int affinity_count = 0; 314 struct gomp_thread **affinity_thr = NULL; 315 316 thr = gomp_thread (); 317 nested = thr->ts.level; 318 pool = thr->thread_pool; 319 task = thr->task; 320 icv = task ? &task->icv : &gomp_global_icv; 321 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) 322 gomp_init_affinity (); 323 324 /* Always save the previous state, even if this isn't a nested team. 325 In particular, we should save any work share state from an outer 326 orphaned work share construct. */ 327 team->prev_ts = thr->ts; 328 329 thr->ts.team = team; 330 thr->ts.team_id = 0; 331 ++thr->ts.level; 332 if (nthreads > 1) 333 ++thr->ts.active_level; 334 thr->ts.work_share = &team->work_shares[0]; 335 thr->ts.last_work_share = NULL; 336 #ifdef HAVE_SYNC_BUILTINS 337 thr->ts.single_count = 0; 338 #endif 339 thr->ts.static_trip = 0; 340 thr->task = &team->implicit_task[0]; 341 nthreads_var = icv->nthreads_var; 342 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) 343 && thr->ts.level < gomp_nthreads_var_list_len) 344 nthreads_var = gomp_nthreads_var_list[thr->ts.level]; 345 bind_var = icv->bind_var; 346 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false) 347 bind_var = flags & 7; 348 bind = bind_var; 349 if (__builtin_expect (gomp_bind_var_list != NULL, 0) 350 && thr->ts.level < gomp_bind_var_list_len) 351 bind_var = gomp_bind_var_list[thr->ts.level]; 352 gomp_init_task (thr->task, task, icv); 353 team->implicit_task[0].icv.nthreads_var = nthreads_var; 354 team->implicit_task[0].icv.bind_var = bind_var; 355 356 if (nthreads == 1) 357 return; 358 359 i = 1; 360 361 if (__builtin_expect (gomp_places_list != NULL, 0)) 362 { 363 /* Depending on chosen proc_bind model, set subpartition 364 for the master thread and initialize helper variables 365 P and optionally S, K and/or REST used by later place 366 computation for each additional thread. */ 367 p = thr->place - 1; 368 switch (bind) 369 { 370 case omp_proc_bind_true: 371 case omp_proc_bind_close: 372 if (nthreads > thr->ts.place_partition_len) 373 { 374 /* T > P. S threads will be placed in each place, 375 and the final REM threads placed one by one 376 into the already occupied places. */ 377 s = nthreads / thr->ts.place_partition_len; 378 rest = nthreads % thr->ts.place_partition_len; 379 } 380 else 381 s = 1; 382 k = 1; 383 break; 384 case omp_proc_bind_master: 385 /* Each thread will be bound to master's place. */ 386 break; 387 case omp_proc_bind_spread: 388 if (nthreads <= thr->ts.place_partition_len) 389 { 390 /* T <= P. Each subpartition will have in between s 391 and s+1 places (subpartitions starting at or 392 after rest will have s places, earlier s+1 places), 393 each thread will be bound to the first place in 394 its subpartition (except for the master thread 395 that can be bound to another place in its 396 subpartition). */ 397 s = thr->ts.place_partition_len / nthreads; 398 rest = thr->ts.place_partition_len % nthreads; 399 rest = (s + 1) * rest + thr->ts.place_partition_off; 400 if (p < rest) 401 { 402 p -= (p - thr->ts.place_partition_off) % (s + 1); 403 thr->ts.place_partition_len = s + 1; 404 } 405 else 406 { 407 p -= (p - rest) % s; 408 thr->ts.place_partition_len = s; 409 } 410 thr->ts.place_partition_off = p; 411 } 412 else 413 { 414 /* T > P. Each subpartition will have just a single 415 place and we'll place between s and s+1 416 threads into each subpartition. */ 417 s = nthreads / thr->ts.place_partition_len; 418 rest = nthreads % thr->ts.place_partition_len; 419 thr->ts.place_partition_off = p; 420 thr->ts.place_partition_len = 1; 421 k = 1; 422 } 423 break; 424 } 425 } 426 else 427 bind = omp_proc_bind_false; 428 429 /* We only allow the reuse of idle threads for non-nested PARALLEL 430 regions. This appears to be implied by the semantics of 431 threadprivate variables, but perhaps that's reading too much into 432 things. Certainly it does prevent any locking problems, since 433 only the initial program thread will modify gomp_threads. */ 434 if (!nested) 435 { 436 old_threads_used = pool->threads_used; 437 438 if (nthreads <= old_threads_used) 439 n = nthreads; 440 else if (old_threads_used == 0) 441 { 442 n = 0; 443 gomp_simple_barrier_init (&pool->threads_dock, nthreads); 444 } 445 else 446 { 447 n = old_threads_used; 448 449 /* Increase the barrier threshold to make sure all new 450 threads arrive before the team is released. */ 451 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); 452 } 453 454 /* Not true yet, but soon will be. We're going to release all 455 threads from the dock, and those that aren't part of the 456 team will exit. */ 457 pool->threads_used = nthreads; 458 459 /* If necessary, expand the size of the gomp_threads array. It is 460 expected that changes in the number of threads are rare, thus we 461 make no effort to expand gomp_threads_size geometrically. */ 462 if (nthreads >= pool->threads_size) 463 { 464 pool->threads_size = nthreads + 1; 465 pool->threads 466 = gomp_realloc (pool->threads, 467 pool->threads_size 468 * sizeof (struct gomp_thread_data *)); 469 } 470 471 /* Release existing idle threads. */ 472 for (; i < n; ++i) 473 { 474 unsigned int place_partition_off = thr->ts.place_partition_off; 475 unsigned int place_partition_len = thr->ts.place_partition_len; 476 unsigned int place = 0; 477 if (__builtin_expect (gomp_places_list != NULL, 0)) 478 { 479 switch (bind) 480 { 481 case omp_proc_bind_true: 482 case omp_proc_bind_close: 483 if (k == s) 484 { 485 ++p; 486 if (p == (team->prev_ts.place_partition_off 487 + team->prev_ts.place_partition_len)) 488 p = team->prev_ts.place_partition_off; 489 k = 1; 490 if (i == nthreads - rest) 491 s = 1; 492 } 493 else 494 ++k; 495 break; 496 case omp_proc_bind_master: 497 break; 498 case omp_proc_bind_spread: 499 if (k == 0) 500 { 501 /* T <= P. */ 502 if (p < rest) 503 p += s + 1; 504 else 505 p += s; 506 if (p == (team->prev_ts.place_partition_off 507 + team->prev_ts.place_partition_len)) 508 p = team->prev_ts.place_partition_off; 509 place_partition_off = p; 510 if (p < rest) 511 place_partition_len = s + 1; 512 else 513 place_partition_len = s; 514 } 515 else 516 { 517 /* T > P. */ 518 if (k == s) 519 { 520 ++p; 521 if (p == (team->prev_ts.place_partition_off 522 + team->prev_ts.place_partition_len)) 523 p = team->prev_ts.place_partition_off; 524 k = 1; 525 if (i == nthreads - rest) 526 s = 1; 527 } 528 else 529 ++k; 530 place_partition_off = p; 531 place_partition_len = 1; 532 } 533 break; 534 } 535 if (affinity_thr != NULL 536 || (bind != omp_proc_bind_true 537 && pool->threads[i]->place != p + 1) 538 || pool->threads[i]->place <= place_partition_off 539 || pool->threads[i]->place > (place_partition_off 540 + place_partition_len)) 541 { 542 unsigned int l; 543 if (affinity_thr == NULL) 544 { 545 unsigned int j; 546 547 if (team->prev_ts.place_partition_len > 64) 548 affinity_thr 549 = gomp_malloc (team->prev_ts.place_partition_len 550 * sizeof (struct gomp_thread *)); 551 else 552 affinity_thr 553 = gomp_alloca (team->prev_ts.place_partition_len 554 * sizeof (struct gomp_thread *)); 555 memset (affinity_thr, '\0', 556 team->prev_ts.place_partition_len 557 * sizeof (struct gomp_thread *)); 558 for (j = i; j < old_threads_used; j++) 559 { 560 if (pool->threads[j]->place 561 > team->prev_ts.place_partition_off 562 && (pool->threads[j]->place 563 <= (team->prev_ts.place_partition_off 564 + team->prev_ts.place_partition_len))) 565 { 566 l = pool->threads[j]->place - 1 567 - team->prev_ts.place_partition_off; 568 pool->threads[j]->data = affinity_thr[l]; 569 affinity_thr[l] = pool->threads[j]; 570 } 571 pool->threads[j] = NULL; 572 } 573 if (nthreads > old_threads_used) 574 memset (&pool->threads[old_threads_used], 575 '\0', ((nthreads - old_threads_used) 576 * sizeof (struct gomp_thread *))); 577 n = nthreads; 578 affinity_count = old_threads_used - i; 579 } 580 if (affinity_count == 0) 581 break; 582 l = p; 583 if (affinity_thr[l - team->prev_ts.place_partition_off] 584 == NULL) 585 { 586 if (bind != omp_proc_bind_true) 587 continue; 588 for (l = place_partition_off; 589 l < place_partition_off + place_partition_len; 590 l++) 591 if (affinity_thr[l - team->prev_ts.place_partition_off] 592 != NULL) 593 break; 594 if (l == place_partition_off + place_partition_len) 595 continue; 596 } 597 nthr = affinity_thr[l - team->prev_ts.place_partition_off]; 598 affinity_thr[l - team->prev_ts.place_partition_off] 599 = (struct gomp_thread *) nthr->data; 600 affinity_count--; 601 pool->threads[i] = nthr; 602 } 603 else 604 nthr = pool->threads[i]; 605 place = p + 1; 606 } 607 else 608 nthr = pool->threads[i]; 609 nthr->ts.team = team; 610 nthr->ts.work_share = &team->work_shares[0]; 611 nthr->ts.last_work_share = NULL; 612 nthr->ts.team_id = i; 613 nthr->ts.level = team->prev_ts.level + 1; 614 nthr->ts.active_level = thr->ts.active_level; 615 nthr->ts.place_partition_off = place_partition_off; 616 nthr->ts.place_partition_len = place_partition_len; 617 #ifdef HAVE_SYNC_BUILTINS 618 nthr->ts.single_count = 0; 619 #endif 620 nthr->ts.static_trip = 0; 621 nthr->task = &team->implicit_task[i]; 622 nthr->place = place; 623 gomp_init_task (nthr->task, task, icv); 624 team->implicit_task[i].icv.nthreads_var = nthreads_var; 625 team->implicit_task[i].icv.bind_var = bind_var; 626 nthr->fn = fn; 627 nthr->data = data; 628 team->ordered_release[i] = &nthr->release; 629 } 630 631 if (__builtin_expect (affinity_thr != NULL, 0)) 632 { 633 /* If AFFINITY_THR is non-NULL just because we had to 634 permute some threads in the pool, but we've managed 635 to find exactly as many old threads as we'd find 636 without affinity, we don't need to handle this 637 specially anymore. */ 638 if (nthreads <= old_threads_used 639 ? (affinity_count == old_threads_used - nthreads) 640 : (i == old_threads_used)) 641 { 642 if (team->prev_ts.place_partition_len > 64) 643 free (affinity_thr); 644 affinity_thr = NULL; 645 affinity_count = 0; 646 } 647 else 648 { 649 i = 1; 650 /* We are going to compute the places/subpartitions 651 again from the beginning. So, we need to reinitialize 652 vars modified by the switch (bind) above inside 653 of the loop, to the state they had after the initial 654 switch (bind). */ 655 switch (bind) 656 { 657 case omp_proc_bind_true: 658 case omp_proc_bind_close: 659 if (nthreads > thr->ts.place_partition_len) 660 /* T > P. S has been changed, so needs 661 to be recomputed. */ 662 s = nthreads / thr->ts.place_partition_len; 663 k = 1; 664 p = thr->place - 1; 665 break; 666 case omp_proc_bind_master: 667 /* No vars have been changed. */ 668 break; 669 case omp_proc_bind_spread: 670 p = thr->ts.place_partition_off; 671 if (k != 0) 672 { 673 /* T > P. */ 674 s = nthreads / team->prev_ts.place_partition_len; 675 k = 1; 676 } 677 break; 678 } 679 680 /* Increase the barrier threshold to make sure all new 681 threads and all the threads we're going to let die 682 arrive before the team is released. */ 683 if (affinity_count) 684 gomp_simple_barrier_reinit (&pool->threads_dock, 685 nthreads + affinity_count); 686 } 687 } 688 689 if (i == nthreads) 690 goto do_release; 691 692 } 693 694 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0)) 695 { 696 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used; 697 698 if (old_threads_used == 0) 699 --diff; 700 701 #ifdef HAVE_SYNC_BUILTINS 702 __sync_fetch_and_add (&gomp_managed_threads, diff); 703 #else 704 gomp_mutex_lock (&gomp_managed_threads_lock); 705 gomp_managed_threads += diff; 706 gomp_mutex_unlock (&gomp_managed_threads_lock); 707 #endif 708 } 709 710 attr = &gomp_thread_attr; 711 if (__builtin_expect (gomp_places_list != NULL, 0)) 712 { 713 size_t stacksize; 714 pthread_attr_init (&thread_attr); 715 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED); 716 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) 717 pthread_attr_setstacksize (&thread_attr, stacksize); 718 attr = &thread_attr; 719 } 720 721 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) 722 * (nthreads-i)); 723 724 /* Launch new threads. */ 725 for (; i < nthreads; ++i) 726 { 727 pthread_t pt; 728 int err; 729 730 start_data->ts.place_partition_off = thr->ts.place_partition_off; 731 start_data->ts.place_partition_len = thr->ts.place_partition_len; 732 start_data->place = 0; 733 if (__builtin_expect (gomp_places_list != NULL, 0)) 734 { 735 switch (bind) 736 { 737 case omp_proc_bind_true: 738 case omp_proc_bind_close: 739 if (k == s) 740 { 741 ++p; 742 if (p == (team->prev_ts.place_partition_off 743 + team->prev_ts.place_partition_len)) 744 p = team->prev_ts.place_partition_off; 745 k = 1; 746 if (i == nthreads - rest) 747 s = 1; 748 } 749 else 750 ++k; 751 break; 752 case omp_proc_bind_master: 753 break; 754 case omp_proc_bind_spread: 755 if (k == 0) 756 { 757 /* T <= P. */ 758 if (p < rest) 759 p += s + 1; 760 else 761 p += s; 762 if (p == (team->prev_ts.place_partition_off 763 + team->prev_ts.place_partition_len)) 764 p = team->prev_ts.place_partition_off; 765 start_data->ts.place_partition_off = p; 766 if (p < rest) 767 start_data->ts.place_partition_len = s + 1; 768 else 769 start_data->ts.place_partition_len = s; 770 } 771 else 772 { 773 /* T > P. */ 774 if (k == s) 775 { 776 ++p; 777 if (p == (team->prev_ts.place_partition_off 778 + team->prev_ts.place_partition_len)) 779 p = team->prev_ts.place_partition_off; 780 k = 1; 781 if (i == nthreads - rest) 782 s = 1; 783 } 784 else 785 ++k; 786 start_data->ts.place_partition_off = p; 787 start_data->ts.place_partition_len = 1; 788 } 789 break; 790 } 791 start_data->place = p + 1; 792 if (affinity_thr != NULL && pool->threads[i] != NULL) 793 continue; 794 gomp_init_thread_affinity (attr, p); 795 } 796 797 start_data->fn = fn; 798 start_data->fn_data = data; 799 start_data->ts.team = team; 800 start_data->ts.work_share = &team->work_shares[0]; 801 start_data->ts.last_work_share = NULL; 802 start_data->ts.team_id = i; 803 start_data->ts.level = team->prev_ts.level + 1; 804 start_data->ts.active_level = thr->ts.active_level; 805 #ifdef HAVE_SYNC_BUILTINS 806 start_data->ts.single_count = 0; 807 #endif 808 start_data->ts.static_trip = 0; 809 start_data->task = &team->implicit_task[i]; 810 gomp_init_task (start_data->task, task, icv); 811 team->implicit_task[i].icv.nthreads_var = nthreads_var; 812 team->implicit_task[i].icv.bind_var = bind_var; 813 start_data->thread_pool = pool; 814 start_data->nested = nested; 815 816 attr = gomp_adjust_thread_attr (attr, &thread_attr); 817 err = pthread_create (&pt, attr, gomp_thread_start, start_data++); 818 if (err != 0) 819 gomp_fatal ("Thread creation failed: %s", strerror (err)); 820 } 821 822 if (__builtin_expect (attr == &thread_attr, 0)) 823 pthread_attr_destroy (&thread_attr); 824 825 do_release: 826 if (nested) 827 gomp_barrier_wait (&team->barrier); 828 else 829 gomp_simple_barrier_wait (&pool->threads_dock); 830 831 /* Decrease the barrier threshold to match the number of threads 832 that should arrive back at the end of this team. The extra 833 threads should be exiting. Note that we arrange for this test 834 to never be true for nested teams. If AFFINITY_COUNT is non-zero, 835 the barrier as well as gomp_managed_threads was temporarily 836 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT, 837 AFFINITY_COUNT if non-zero will be always at least 838 OLD_THREADS_COUNT - NTHREADS. */ 839 if (__builtin_expect (nthreads < old_threads_used, 0) 840 || __builtin_expect (affinity_count, 0)) 841 { 842 long diff = (long) nthreads - (long) old_threads_used; 843 844 if (affinity_count) 845 diff = -affinity_count; 846 847 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); 848 849 #ifdef HAVE_SYNC_BUILTINS 850 __sync_fetch_and_add (&gomp_managed_threads, diff); 851 #else 852 gomp_mutex_lock (&gomp_managed_threads_lock); 853 gomp_managed_threads += diff; 854 gomp_mutex_unlock (&gomp_managed_threads_lock); 855 #endif 856 } 857 if (__builtin_expect (affinity_thr != NULL, 0) 858 && team->prev_ts.place_partition_len > 64) 859 free (affinity_thr); 860 } 861 #endif 862 863 864 /* Terminate the current team. This is only to be called by the master 865 thread. We assume that we must wait for the other threads. */ 866 867 void 868 gomp_team_end (void) 869 { 870 struct gomp_thread *thr = gomp_thread (); 871 struct gomp_team *team = thr->ts.team; 872 873 /* This barrier handles all pending explicit threads. 874 As #pragma omp cancel parallel might get awaited count in 875 team->barrier in a inconsistent state, we need to use a different 876 counter here. */ 877 gomp_team_barrier_wait_final (&team->barrier); 878 if (__builtin_expect (team->team_cancelled, 0)) 879 { 880 struct gomp_work_share *ws = team->work_shares_to_free; 881 do 882 { 883 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws); 884 if (next_ws == NULL) 885 gomp_ptrlock_set (&ws->next_ws, ws); 886 gomp_fini_work_share (ws); 887 ws = next_ws; 888 } 889 while (ws != NULL); 890 } 891 else 892 gomp_fini_work_share (thr->ts.work_share); 893 894 gomp_end_task (); 895 thr->ts = team->prev_ts; 896 897 if (__builtin_expect (thr->ts.team != NULL, 0)) 898 { 899 #ifdef HAVE_SYNC_BUILTINS 900 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); 901 #else 902 gomp_mutex_lock (&gomp_managed_threads_lock); 903 gomp_managed_threads -= team->nthreads - 1L; 904 gomp_mutex_unlock (&gomp_managed_threads_lock); 905 #endif 906 /* This barrier has gomp_barrier_wait_last counterparts 907 and ensures the team can be safely destroyed. */ 908 gomp_barrier_wait (&team->barrier); 909 } 910 911 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) 912 { 913 struct gomp_work_share *ws = team->work_shares[0].next_alloc; 914 do 915 { 916 struct gomp_work_share *next_ws = ws->next_alloc; 917 free (ws); 918 ws = next_ws; 919 } 920 while (ws != NULL); 921 } 922 gomp_sem_destroy (&team->master_release); 923 924 if (__builtin_expect (thr->ts.team != NULL, 0) 925 || __builtin_expect (team->nthreads == 1, 0)) 926 free_team (team); 927 else 928 { 929 struct gomp_thread_pool *pool = thr->thread_pool; 930 if (pool->last_team) 931 free_team (pool->last_team); 932 pool->last_team = team; 933 gomp_release_thread_pool (pool); 934 } 935 } 936 937 #ifdef LIBGOMP_USE_PTHREADS 938 939 /* Constructors for this file. */ 940 941 static void __attribute__((constructor)) 942 initialize_team (void) 943 { 944 #if !defined HAVE_TLS && !defined USE_EMUTLS 945 static struct gomp_thread initial_thread_tls_data; 946 947 pthread_key_create (&gomp_tls_key, NULL); 948 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); 949 #endif 950 951 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) 952 gomp_fatal ("could not create thread pool destructor."); 953 } 954 955 static void __attribute__((destructor)) 956 team_destructor (void) 957 { 958 /* Without this dlclose on libgomp could lead to subsequent 959 crashes. */ 960 pthread_key_delete (gomp_thread_destructor); 961 } 962 #endif 963 964 struct gomp_task_icv * 965 gomp_new_icv (void) 966 { 967 struct gomp_thread *thr = gomp_thread (); 968 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); 969 gomp_init_task (task, NULL, &gomp_global_icv); 970 thr->task = task; 971 #ifdef LIBGOMP_USE_PTHREADS 972 pthread_setspecific (gomp_thread_destructor, thr); 973 #endif 974 return &task->icv; 975 } 976