1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU OpenMP Library (libgomp). 5 6 Libgomp is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 more details. 15 16 Under Section 7 of GPL version 3, you are granted additional 17 permissions described in the GCC Runtime Library Exception, version 18 3.1, as published by the Free Software Foundation. 19 20 You should have received a copy of the GNU General Public License and 21 a copy of the GCC Runtime Library Exception along with this program; 22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 <http://www.gnu.org/licenses/>. */ 24 25 /* This file handles the LOOP (FOR/DO) construct. */ 26 27 #include <limits.h> 28 #include <stdlib.h> 29 #include "libgomp.h" 30 31 32 /* Initialize the given work share construct from the given arguments. */ 33 34 static inline void 35 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr, 36 enum gomp_schedule_type sched, long chunk_size) 37 { 38 ws->sched = sched; 39 ws->chunk_size = chunk_size; 40 /* Canonicalize loops that have zero iterations to ->next == ->end. */ 41 ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end)) 42 ? start : end; 43 ws->incr = incr; 44 ws->next = start; 45 if (sched == GFS_DYNAMIC) 46 { 47 ws->chunk_size *= incr; 48 49 #ifdef HAVE_SYNC_BUILTINS 50 { 51 /* For dynamic scheduling prepare things to make each iteration 52 faster. */ 53 struct gomp_thread *thr = gomp_thread (); 54 struct gomp_team *team = thr->ts.team; 55 long nthreads = team ? team->nthreads : 1; 56 57 if (__builtin_expect (incr > 0, 1)) 58 { 59 /* Cheap overflow protection. */ 60 if (__builtin_expect ((nthreads | ws->chunk_size) 61 >= 1UL << (sizeof (long) 62 * __CHAR_BIT__ / 2 - 1), 0)) 63 ws->mode = 0; 64 else 65 ws->mode = ws->end < (LONG_MAX 66 - (nthreads + 1) * ws->chunk_size); 67 } 68 /* Cheap overflow protection. */ 69 else if (__builtin_expect ((nthreads | -ws->chunk_size) 70 >= 1UL << (sizeof (long) 71 * __CHAR_BIT__ / 2 - 1), 0)) 72 ws->mode = 0; 73 else 74 ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX; 75 } 76 #endif 77 } 78 } 79 80 /* The *_start routines are called when first encountering a loop construct 81 that is not bound directly to a parallel construct. The first thread 82 that arrives will create the work-share construct; subsequent threads 83 will see the construct exists and allocate work from it. 84 85 START, END, INCR are the bounds of the loop; due to the restrictions of 86 OpenMP, these values must be the same in every thread. This is not 87 verified (nor is it entirely verifiable, since START is not necessarily 88 retained intact in the work-share data structure). CHUNK_SIZE is the 89 scheduling parameter; again this must be identical in all threads. 90 91 Returns true if there's any work for this thread to perform. If so, 92 *ISTART and *IEND are filled with the bounds of the iteration block 93 allocated to this thread. Returns false if all work was assigned to 94 other threads prior to this thread's arrival. */ 95 96 static bool 97 gomp_loop_static_start (long start, long end, long incr, long chunk_size, 98 long *istart, long *iend) 99 { 100 struct gomp_thread *thr = gomp_thread (); 101 102 thr->ts.static_trip = 0; 103 if (gomp_work_share_start (false)) 104 { 105 gomp_loop_init (thr->ts.work_share, start, end, incr, 106 GFS_STATIC, chunk_size); 107 gomp_work_share_init_done (); 108 } 109 110 return !gomp_iter_static_next (istart, iend); 111 } 112 113 static bool 114 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size, 115 long *istart, long *iend) 116 { 117 struct gomp_thread *thr = gomp_thread (); 118 bool ret; 119 120 if (gomp_work_share_start (false)) 121 { 122 gomp_loop_init (thr->ts.work_share, start, end, incr, 123 GFS_DYNAMIC, chunk_size); 124 gomp_work_share_init_done (); 125 } 126 127 #ifdef HAVE_SYNC_BUILTINS 128 ret = gomp_iter_dynamic_next (istart, iend); 129 #else 130 gomp_mutex_lock (&thr->ts.work_share->lock); 131 ret = gomp_iter_dynamic_next_locked (istart, iend); 132 gomp_mutex_unlock (&thr->ts.work_share->lock); 133 #endif 134 135 return ret; 136 } 137 138 static bool 139 gomp_loop_guided_start (long start, long end, long incr, long chunk_size, 140 long *istart, long *iend) 141 { 142 struct gomp_thread *thr = gomp_thread (); 143 bool ret; 144 145 if (gomp_work_share_start (false)) 146 { 147 gomp_loop_init (thr->ts.work_share, start, end, incr, 148 GFS_GUIDED, chunk_size); 149 gomp_work_share_init_done (); 150 } 151 152 #ifdef HAVE_SYNC_BUILTINS 153 ret = gomp_iter_guided_next (istart, iend); 154 #else 155 gomp_mutex_lock (&thr->ts.work_share->lock); 156 ret = gomp_iter_guided_next_locked (istart, iend); 157 gomp_mutex_unlock (&thr->ts.work_share->lock); 158 #endif 159 160 return ret; 161 } 162 163 bool 164 GOMP_loop_runtime_start (long start, long end, long incr, 165 long *istart, long *iend) 166 { 167 struct gomp_task_icv *icv = gomp_icv (false); 168 switch (icv->run_sched_var) 169 { 170 case GFS_STATIC: 171 return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier, 172 istart, iend); 173 case GFS_DYNAMIC: 174 return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier, 175 istart, iend); 176 case GFS_GUIDED: 177 return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier, 178 istart, iend); 179 case GFS_AUTO: 180 /* For now map to schedule(static), later on we could play with feedback 181 driven choice. */ 182 return gomp_loop_static_start (start, end, incr, 0, istart, iend); 183 default: 184 abort (); 185 } 186 } 187 188 /* The *_ordered_*_start routines are similar. The only difference is that 189 this work-share construct is initialized to expect an ORDERED section. */ 190 191 static bool 192 gomp_loop_ordered_static_start (long start, long end, long incr, 193 long chunk_size, long *istart, long *iend) 194 { 195 struct gomp_thread *thr = gomp_thread (); 196 197 thr->ts.static_trip = 0; 198 if (gomp_work_share_start (true)) 199 { 200 gomp_loop_init (thr->ts.work_share, start, end, incr, 201 GFS_STATIC, chunk_size); 202 gomp_ordered_static_init (); 203 gomp_work_share_init_done (); 204 } 205 206 return !gomp_iter_static_next (istart, iend); 207 } 208 209 static bool 210 gomp_loop_ordered_dynamic_start (long start, long end, long incr, 211 long chunk_size, long *istart, long *iend) 212 { 213 struct gomp_thread *thr = gomp_thread (); 214 bool ret; 215 216 if (gomp_work_share_start (true)) 217 { 218 gomp_loop_init (thr->ts.work_share, start, end, incr, 219 GFS_DYNAMIC, chunk_size); 220 gomp_mutex_lock (&thr->ts.work_share->lock); 221 gomp_work_share_init_done (); 222 } 223 else 224 gomp_mutex_lock (&thr->ts.work_share->lock); 225 226 ret = gomp_iter_dynamic_next_locked (istart, iend); 227 if (ret) 228 gomp_ordered_first (); 229 gomp_mutex_unlock (&thr->ts.work_share->lock); 230 231 return ret; 232 } 233 234 static bool 235 gomp_loop_ordered_guided_start (long start, long end, long incr, 236 long chunk_size, long *istart, long *iend) 237 { 238 struct gomp_thread *thr = gomp_thread (); 239 bool ret; 240 241 if (gomp_work_share_start (true)) 242 { 243 gomp_loop_init (thr->ts.work_share, start, end, incr, 244 GFS_GUIDED, chunk_size); 245 gomp_mutex_lock (&thr->ts.work_share->lock); 246 gomp_work_share_init_done (); 247 } 248 else 249 gomp_mutex_lock (&thr->ts.work_share->lock); 250 251 ret = gomp_iter_guided_next_locked (istart, iend); 252 if (ret) 253 gomp_ordered_first (); 254 gomp_mutex_unlock (&thr->ts.work_share->lock); 255 256 return ret; 257 } 258 259 bool 260 GOMP_loop_ordered_runtime_start (long start, long end, long incr, 261 long *istart, long *iend) 262 { 263 struct gomp_task_icv *icv = gomp_icv (false); 264 switch (icv->run_sched_var) 265 { 266 case GFS_STATIC: 267 return gomp_loop_ordered_static_start (start, end, incr, 268 icv->run_sched_modifier, 269 istart, iend); 270 case GFS_DYNAMIC: 271 return gomp_loop_ordered_dynamic_start (start, end, incr, 272 icv->run_sched_modifier, 273 istart, iend); 274 case GFS_GUIDED: 275 return gomp_loop_ordered_guided_start (start, end, incr, 276 icv->run_sched_modifier, 277 istart, iend); 278 case GFS_AUTO: 279 /* For now map to schedule(static), later on we could play with feedback 280 driven choice. */ 281 return gomp_loop_ordered_static_start (start, end, incr, 282 0, istart, iend); 283 default: 284 abort (); 285 } 286 } 287 288 /* The *_next routines are called when the thread completes processing of 289 the iteration block currently assigned to it. If the work-share 290 construct is bound directly to a parallel construct, then the iteration 291 bounds may have been set up before the parallel. In which case, this 292 may be the first iteration for the thread. 293 294 Returns true if there is work remaining to be performed; *ISTART and 295 *IEND are filled with a new iteration block. Returns false if all work 296 has been assigned. */ 297 298 static bool 299 gomp_loop_static_next (long *istart, long *iend) 300 { 301 return !gomp_iter_static_next (istart, iend); 302 } 303 304 static bool 305 gomp_loop_dynamic_next (long *istart, long *iend) 306 { 307 bool ret; 308 309 #ifdef HAVE_SYNC_BUILTINS 310 ret = gomp_iter_dynamic_next (istart, iend); 311 #else 312 struct gomp_thread *thr = gomp_thread (); 313 gomp_mutex_lock (&thr->ts.work_share->lock); 314 ret = gomp_iter_dynamic_next_locked (istart, iend); 315 gomp_mutex_unlock (&thr->ts.work_share->lock); 316 #endif 317 318 return ret; 319 } 320 321 static bool 322 gomp_loop_guided_next (long *istart, long *iend) 323 { 324 bool ret; 325 326 #ifdef HAVE_SYNC_BUILTINS 327 ret = gomp_iter_guided_next (istart, iend); 328 #else 329 struct gomp_thread *thr = gomp_thread (); 330 gomp_mutex_lock (&thr->ts.work_share->lock); 331 ret = gomp_iter_guided_next_locked (istart, iend); 332 gomp_mutex_unlock (&thr->ts.work_share->lock); 333 #endif 334 335 return ret; 336 } 337 338 bool 339 GOMP_loop_runtime_next (long *istart, long *iend) 340 { 341 struct gomp_thread *thr = gomp_thread (); 342 343 switch (thr->ts.work_share->sched) 344 { 345 case GFS_STATIC: 346 case GFS_AUTO: 347 return gomp_loop_static_next (istart, iend); 348 case GFS_DYNAMIC: 349 return gomp_loop_dynamic_next (istart, iend); 350 case GFS_GUIDED: 351 return gomp_loop_guided_next (istart, iend); 352 default: 353 abort (); 354 } 355 } 356 357 /* The *_ordered_*_next routines are called when the thread completes 358 processing of the iteration block currently assigned to it. 359 360 Returns true if there is work remaining to be performed; *ISTART and 361 *IEND are filled with a new iteration block. Returns false if all work 362 has been assigned. */ 363 364 static bool 365 gomp_loop_ordered_static_next (long *istart, long *iend) 366 { 367 struct gomp_thread *thr = gomp_thread (); 368 int test; 369 370 gomp_ordered_sync (); 371 gomp_mutex_lock (&thr->ts.work_share->lock); 372 test = gomp_iter_static_next (istart, iend); 373 if (test >= 0) 374 gomp_ordered_static_next (); 375 gomp_mutex_unlock (&thr->ts.work_share->lock); 376 377 return test == 0; 378 } 379 380 static bool 381 gomp_loop_ordered_dynamic_next (long *istart, long *iend) 382 { 383 struct gomp_thread *thr = gomp_thread (); 384 bool ret; 385 386 gomp_ordered_sync (); 387 gomp_mutex_lock (&thr->ts.work_share->lock); 388 ret = gomp_iter_dynamic_next_locked (istart, iend); 389 if (ret) 390 gomp_ordered_next (); 391 else 392 gomp_ordered_last (); 393 gomp_mutex_unlock (&thr->ts.work_share->lock); 394 395 return ret; 396 } 397 398 static bool 399 gomp_loop_ordered_guided_next (long *istart, long *iend) 400 { 401 struct gomp_thread *thr = gomp_thread (); 402 bool ret; 403 404 gomp_ordered_sync (); 405 gomp_mutex_lock (&thr->ts.work_share->lock); 406 ret = gomp_iter_guided_next_locked (istart, iend); 407 if (ret) 408 gomp_ordered_next (); 409 else 410 gomp_ordered_last (); 411 gomp_mutex_unlock (&thr->ts.work_share->lock); 412 413 return ret; 414 } 415 416 bool 417 GOMP_loop_ordered_runtime_next (long *istart, long *iend) 418 { 419 struct gomp_thread *thr = gomp_thread (); 420 421 switch (thr->ts.work_share->sched) 422 { 423 case GFS_STATIC: 424 case GFS_AUTO: 425 return gomp_loop_ordered_static_next (istart, iend); 426 case GFS_DYNAMIC: 427 return gomp_loop_ordered_dynamic_next (istart, iend); 428 case GFS_GUIDED: 429 return gomp_loop_ordered_guided_next (istart, iend); 430 default: 431 abort (); 432 } 433 } 434 435 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct 436 to avoid one synchronization once we get into the loop. */ 437 438 static void 439 gomp_parallel_loop_start (void (*fn) (void *), void *data, 440 unsigned num_threads, long start, long end, 441 long incr, enum gomp_schedule_type sched, 442 long chunk_size) 443 { 444 struct gomp_team *team; 445 446 num_threads = gomp_resolve_num_threads (num_threads, 0); 447 team = gomp_new_team (num_threads); 448 gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size); 449 gomp_team_start (fn, data, num_threads, team); 450 } 451 452 void 453 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data, 454 unsigned num_threads, long start, long end, 455 long incr, long chunk_size) 456 { 457 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 458 GFS_STATIC, chunk_size); 459 } 460 461 void 462 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data, 463 unsigned num_threads, long start, long end, 464 long incr, long chunk_size) 465 { 466 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 467 GFS_DYNAMIC, chunk_size); 468 } 469 470 void 471 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data, 472 unsigned num_threads, long start, long end, 473 long incr, long chunk_size) 474 { 475 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 476 GFS_GUIDED, chunk_size); 477 } 478 479 void 480 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data, 481 unsigned num_threads, long start, long end, 482 long incr) 483 { 484 struct gomp_task_icv *icv = gomp_icv (false); 485 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 486 icv->run_sched_var, icv->run_sched_modifier); 487 } 488 489 /* The GOMP_loop_end* routines are called after the thread is told that 490 all loop iterations are complete. This first version synchronizes 491 all threads; the nowait version does not. */ 492 493 void 494 GOMP_loop_end (void) 495 { 496 gomp_work_share_end (); 497 } 498 499 void 500 GOMP_loop_end_nowait (void) 501 { 502 gomp_work_share_end_nowait (); 503 } 504 505 506 /* We use static functions above so that we're sure that the "runtime" 507 function can defer to the proper routine without interposition. We 508 export the static function with a strong alias when possible, or with 509 a wrapper function otherwise. */ 510 511 #ifdef HAVE_ATTRIBUTE_ALIAS 512 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start 513 __attribute__((alias ("gomp_loop_static_start"))); 514 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start 515 __attribute__((alias ("gomp_loop_dynamic_start"))); 516 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start 517 __attribute__((alias ("gomp_loop_guided_start"))); 518 519 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start 520 __attribute__((alias ("gomp_loop_ordered_static_start"))); 521 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start 522 __attribute__((alias ("gomp_loop_ordered_dynamic_start"))); 523 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start 524 __attribute__((alias ("gomp_loop_ordered_guided_start"))); 525 526 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next 527 __attribute__((alias ("gomp_loop_static_next"))); 528 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next 529 __attribute__((alias ("gomp_loop_dynamic_next"))); 530 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next 531 __attribute__((alias ("gomp_loop_guided_next"))); 532 533 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next 534 __attribute__((alias ("gomp_loop_ordered_static_next"))); 535 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next 536 __attribute__((alias ("gomp_loop_ordered_dynamic_next"))); 537 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next 538 __attribute__((alias ("gomp_loop_ordered_guided_next"))); 539 #else 540 bool 541 GOMP_loop_static_start (long start, long end, long incr, long chunk_size, 542 long *istart, long *iend) 543 { 544 return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend); 545 } 546 547 bool 548 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size, 549 long *istart, long *iend) 550 { 551 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend); 552 } 553 554 bool 555 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size, 556 long *istart, long *iend) 557 { 558 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend); 559 } 560 561 bool 562 GOMP_loop_ordered_static_start (long start, long end, long incr, 563 long chunk_size, long *istart, long *iend) 564 { 565 return gomp_loop_ordered_static_start (start, end, incr, chunk_size, 566 istart, iend); 567 } 568 569 bool 570 GOMP_loop_ordered_dynamic_start (long start, long end, long incr, 571 long chunk_size, long *istart, long *iend) 572 { 573 return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size, 574 istart, iend); 575 } 576 577 bool 578 GOMP_loop_ordered_guided_start (long start, long end, long incr, 579 long chunk_size, long *istart, long *iend) 580 { 581 return gomp_loop_ordered_guided_start (start, end, incr, chunk_size, 582 istart, iend); 583 } 584 585 bool 586 GOMP_loop_static_next (long *istart, long *iend) 587 { 588 return gomp_loop_static_next (istart, iend); 589 } 590 591 bool 592 GOMP_loop_dynamic_next (long *istart, long *iend) 593 { 594 return gomp_loop_dynamic_next (istart, iend); 595 } 596 597 bool 598 GOMP_loop_guided_next (long *istart, long *iend) 599 { 600 return gomp_loop_guided_next (istart, iend); 601 } 602 603 bool 604 GOMP_loop_ordered_static_next (long *istart, long *iend) 605 { 606 return gomp_loop_ordered_static_next (istart, iend); 607 } 608 609 bool 610 GOMP_loop_ordered_dynamic_next (long *istart, long *iend) 611 { 612 return gomp_loop_ordered_dynamic_next (istart, iend); 613 } 614 615 bool 616 GOMP_loop_ordered_guided_next (long *istart, long *iend) 617 { 618 return gomp_loop_ordered_guided_next (istart, iend); 619 } 620 #endif 621