xref: /dragonfly/contrib/gcc-8.0/libgomp/loop.c (revision 7ff0fc30)
1 /* Copyright (C) 2005-2018 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the LOOP (FOR/DO) construct.  */
27 
28 #include <limits.h>
29 #include <stdlib.h>
30 #include "libgomp.h"
31 
32 
33 /* Initialize the given work share construct from the given arguments.  */
34 
35 static inline void
36 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
37 		enum gomp_schedule_type sched, long chunk_size)
38 {
39   ws->sched = sched;
40   ws->chunk_size = chunk_size;
41   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
42   ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
43 	    ? start : end;
44   ws->incr = incr;
45   ws->next = start;
46   if (sched == GFS_DYNAMIC)
47     {
48       ws->chunk_size *= incr;
49 
50 #ifdef HAVE_SYNC_BUILTINS
51       {
52 	/* For dynamic scheduling prepare things to make each iteration
53 	   faster.  */
54 	struct gomp_thread *thr = gomp_thread ();
55 	struct gomp_team *team = thr->ts.team;
56 	long nthreads = team ? team->nthreads : 1;
57 
58 	if (__builtin_expect (incr > 0, 1))
59 	  {
60 	    /* Cheap overflow protection.  */
61 	    if (__builtin_expect ((nthreads | ws->chunk_size)
62 				  >= 1UL << (sizeof (long)
63 					     * __CHAR_BIT__ / 2 - 1), 0))
64 	      ws->mode = 0;
65 	    else
66 	      ws->mode = ws->end < (LONG_MAX
67 				    - (nthreads + 1) * ws->chunk_size);
68 	  }
69 	/* Cheap overflow protection.  */
70 	else if (__builtin_expect ((nthreads | -ws->chunk_size)
71 				   >= 1UL << (sizeof (long)
72 					      * __CHAR_BIT__ / 2 - 1), 0))
73 	  ws->mode = 0;
74 	else
75 	  ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
76       }
77 #endif
78     }
79 }
80 
81 /* The *_start routines are called when first encountering a loop construct
82    that is not bound directly to a parallel construct.  The first thread
83    that arrives will create the work-share construct; subsequent threads
84    will see the construct exists and allocate work from it.
85 
86    START, END, INCR are the bounds of the loop; due to the restrictions of
87    OpenMP, these values must be the same in every thread.  This is not
88    verified (nor is it entirely verifiable, since START is not necessarily
89    retained intact in the work-share data structure).  CHUNK_SIZE is the
90    scheduling parameter; again this must be identical in all threads.
91 
92    Returns true if there's any work for this thread to perform.  If so,
93    *ISTART and *IEND are filled with the bounds of the iteration block
94    allocated to this thread.  Returns false if all work was assigned to
95    other threads prior to this thread's arrival.  */
96 
97 static bool
98 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
99 			long *istart, long *iend)
100 {
101   struct gomp_thread *thr = gomp_thread ();
102 
103   thr->ts.static_trip = 0;
104   if (gomp_work_share_start (false))
105     {
106       gomp_loop_init (thr->ts.work_share, start, end, incr,
107 		      GFS_STATIC, chunk_size);
108       gomp_work_share_init_done ();
109     }
110 
111   return !gomp_iter_static_next (istart, iend);
112 }
113 
114 /* The current dynamic implementation is always monotonic.  The
115    entrypoints without nonmonotonic in them have to be always monotonic,
116    but the nonmonotonic ones could be changed to use work-stealing for
117    improved scalability.  */
118 
119 static bool
120 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
121 			 long *istart, long *iend)
122 {
123   struct gomp_thread *thr = gomp_thread ();
124   bool ret;
125 
126   if (gomp_work_share_start (false))
127     {
128       gomp_loop_init (thr->ts.work_share, start, end, incr,
129 		      GFS_DYNAMIC, chunk_size);
130       gomp_work_share_init_done ();
131     }
132 
133 #ifdef HAVE_SYNC_BUILTINS
134   ret = gomp_iter_dynamic_next (istart, iend);
135 #else
136   gomp_mutex_lock (&thr->ts.work_share->lock);
137   ret = gomp_iter_dynamic_next_locked (istart, iend);
138   gomp_mutex_unlock (&thr->ts.work_share->lock);
139 #endif
140 
141   return ret;
142 }
143 
144 /* Similarly as for dynamic, though the question is how can the chunk sizes
145    be decreased without a central locking or atomics.  */
146 
147 static bool
148 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
149 			long *istart, long *iend)
150 {
151   struct gomp_thread *thr = gomp_thread ();
152   bool ret;
153 
154   if (gomp_work_share_start (false))
155     {
156       gomp_loop_init (thr->ts.work_share, start, end, incr,
157 		      GFS_GUIDED, chunk_size);
158       gomp_work_share_init_done ();
159     }
160 
161 #ifdef HAVE_SYNC_BUILTINS
162   ret = gomp_iter_guided_next (istart, iend);
163 #else
164   gomp_mutex_lock (&thr->ts.work_share->lock);
165   ret = gomp_iter_guided_next_locked (istart, iend);
166   gomp_mutex_unlock (&thr->ts.work_share->lock);
167 #endif
168 
169   return ret;
170 }
171 
172 bool
173 GOMP_loop_runtime_start (long start, long end, long incr,
174 			 long *istart, long *iend)
175 {
176   struct gomp_task_icv *icv = gomp_icv (false);
177   switch (icv->run_sched_var)
178     {
179     case GFS_STATIC:
180       return gomp_loop_static_start (start, end, incr,
181 				     icv->run_sched_chunk_size,
182 				     istart, iend);
183     case GFS_DYNAMIC:
184       return gomp_loop_dynamic_start (start, end, incr,
185 				      icv->run_sched_chunk_size,
186 				      istart, iend);
187     case GFS_GUIDED:
188       return gomp_loop_guided_start (start, end, incr,
189 				     icv->run_sched_chunk_size,
190 				     istart, iend);
191     case GFS_AUTO:
192       /* For now map to schedule(static), later on we could play with feedback
193 	 driven choice.  */
194       return gomp_loop_static_start (start, end, incr, 0, istart, iend);
195     default:
196       abort ();
197     }
198 }
199 
200 /* The *_ordered_*_start routines are similar.  The only difference is that
201    this work-share construct is initialized to expect an ORDERED section.  */
202 
203 static bool
204 gomp_loop_ordered_static_start (long start, long end, long incr,
205 				long chunk_size, long *istart, long *iend)
206 {
207   struct gomp_thread *thr = gomp_thread ();
208 
209   thr->ts.static_trip = 0;
210   if (gomp_work_share_start (true))
211     {
212       gomp_loop_init (thr->ts.work_share, start, end, incr,
213 		      GFS_STATIC, chunk_size);
214       gomp_ordered_static_init ();
215       gomp_work_share_init_done ();
216     }
217 
218   return !gomp_iter_static_next (istart, iend);
219 }
220 
221 static bool
222 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
223 				 long chunk_size, long *istart, long *iend)
224 {
225   struct gomp_thread *thr = gomp_thread ();
226   bool ret;
227 
228   if (gomp_work_share_start (true))
229     {
230       gomp_loop_init (thr->ts.work_share, start, end, incr,
231 		      GFS_DYNAMIC, chunk_size);
232       gomp_mutex_lock (&thr->ts.work_share->lock);
233       gomp_work_share_init_done ();
234     }
235   else
236     gomp_mutex_lock (&thr->ts.work_share->lock);
237 
238   ret = gomp_iter_dynamic_next_locked (istart, iend);
239   if (ret)
240     gomp_ordered_first ();
241   gomp_mutex_unlock (&thr->ts.work_share->lock);
242 
243   return ret;
244 }
245 
246 static bool
247 gomp_loop_ordered_guided_start (long start, long end, long incr,
248 				long chunk_size, long *istart, long *iend)
249 {
250   struct gomp_thread *thr = gomp_thread ();
251   bool ret;
252 
253   if (gomp_work_share_start (true))
254     {
255       gomp_loop_init (thr->ts.work_share, start, end, incr,
256 		      GFS_GUIDED, chunk_size);
257       gomp_mutex_lock (&thr->ts.work_share->lock);
258       gomp_work_share_init_done ();
259     }
260   else
261     gomp_mutex_lock (&thr->ts.work_share->lock);
262 
263   ret = gomp_iter_guided_next_locked (istart, iend);
264   if (ret)
265     gomp_ordered_first ();
266   gomp_mutex_unlock (&thr->ts.work_share->lock);
267 
268   return ret;
269 }
270 
271 bool
272 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
273 				 long *istart, long *iend)
274 {
275   struct gomp_task_icv *icv = gomp_icv (false);
276   switch (icv->run_sched_var)
277     {
278     case GFS_STATIC:
279       return gomp_loop_ordered_static_start (start, end, incr,
280 					     icv->run_sched_chunk_size,
281 					     istart, iend);
282     case GFS_DYNAMIC:
283       return gomp_loop_ordered_dynamic_start (start, end, incr,
284 					      icv->run_sched_chunk_size,
285 					      istart, iend);
286     case GFS_GUIDED:
287       return gomp_loop_ordered_guided_start (start, end, incr,
288 					     icv->run_sched_chunk_size,
289 					     istart, iend);
290     case GFS_AUTO:
291       /* For now map to schedule(static), later on we could play with feedback
292 	 driven choice.  */
293       return gomp_loop_ordered_static_start (start, end, incr,
294 					     0, istart, iend);
295     default:
296       abort ();
297     }
298 }
299 
300 /* The *_doacross_*_start routines are similar.  The only difference is that
301    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
302    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
303    and other COUNTS array elements tell the library number of iterations
304    in the ordered inner loops.  */
305 
306 static bool
307 gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
308 				 long chunk_size, long *istart, long *iend)
309 {
310   struct gomp_thread *thr = gomp_thread ();
311 
312   thr->ts.static_trip = 0;
313   if (gomp_work_share_start (false))
314     {
315       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
316 		      GFS_STATIC, chunk_size);
317       gomp_doacross_init (ncounts, counts, chunk_size);
318       gomp_work_share_init_done ();
319     }
320 
321   return !gomp_iter_static_next (istart, iend);
322 }
323 
324 static bool
325 gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
326 				  long chunk_size, long *istart, long *iend)
327 {
328   struct gomp_thread *thr = gomp_thread ();
329   bool ret;
330 
331   if (gomp_work_share_start (false))
332     {
333       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
334 		      GFS_DYNAMIC, chunk_size);
335       gomp_doacross_init (ncounts, counts, chunk_size);
336       gomp_work_share_init_done ();
337     }
338 
339 #ifdef HAVE_SYNC_BUILTINS
340   ret = gomp_iter_dynamic_next (istart, iend);
341 #else
342   gomp_mutex_lock (&thr->ts.work_share->lock);
343   ret = gomp_iter_dynamic_next_locked (istart, iend);
344   gomp_mutex_unlock (&thr->ts.work_share->lock);
345 #endif
346 
347   return ret;
348 }
349 
350 static bool
351 gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
352 				 long chunk_size, long *istart, long *iend)
353 {
354   struct gomp_thread *thr = gomp_thread ();
355   bool ret;
356 
357   if (gomp_work_share_start (false))
358     {
359       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
360 		      GFS_GUIDED, chunk_size);
361       gomp_doacross_init (ncounts, counts, chunk_size);
362       gomp_work_share_init_done ();
363     }
364 
365 #ifdef HAVE_SYNC_BUILTINS
366   ret = gomp_iter_guided_next (istart, iend);
367 #else
368   gomp_mutex_lock (&thr->ts.work_share->lock);
369   ret = gomp_iter_guided_next_locked (istart, iend);
370   gomp_mutex_unlock (&thr->ts.work_share->lock);
371 #endif
372 
373   return ret;
374 }
375 
376 bool
377 GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
378 				  long *istart, long *iend)
379 {
380   struct gomp_task_icv *icv = gomp_icv (false);
381   switch (icv->run_sched_var)
382     {
383     case GFS_STATIC:
384       return gomp_loop_doacross_static_start (ncounts, counts,
385 					      icv->run_sched_chunk_size,
386 					      istart, iend);
387     case GFS_DYNAMIC:
388       return gomp_loop_doacross_dynamic_start (ncounts, counts,
389 					       icv->run_sched_chunk_size,
390 					       istart, iend);
391     case GFS_GUIDED:
392       return gomp_loop_doacross_guided_start (ncounts, counts,
393 					      icv->run_sched_chunk_size,
394 					      istart, iend);
395     case GFS_AUTO:
396       /* For now map to schedule(static), later on we could play with feedback
397 	 driven choice.  */
398       return gomp_loop_doacross_static_start (ncounts, counts,
399 					      0, istart, iend);
400     default:
401       abort ();
402     }
403 }
404 
405 /* The *_next routines are called when the thread completes processing of
406    the iteration block currently assigned to it.  If the work-share
407    construct is bound directly to a parallel construct, then the iteration
408    bounds may have been set up before the parallel.  In which case, this
409    may be the first iteration for the thread.
410 
411    Returns true if there is work remaining to be performed; *ISTART and
412    *IEND are filled with a new iteration block.  Returns false if all work
413    has been assigned.  */
414 
415 static bool
416 gomp_loop_static_next (long *istart, long *iend)
417 {
418   return !gomp_iter_static_next (istart, iend);
419 }
420 
421 static bool
422 gomp_loop_dynamic_next (long *istart, long *iend)
423 {
424   bool ret;
425 
426 #ifdef HAVE_SYNC_BUILTINS
427   ret = gomp_iter_dynamic_next (istart, iend);
428 #else
429   struct gomp_thread *thr = gomp_thread ();
430   gomp_mutex_lock (&thr->ts.work_share->lock);
431   ret = gomp_iter_dynamic_next_locked (istart, iend);
432   gomp_mutex_unlock (&thr->ts.work_share->lock);
433 #endif
434 
435   return ret;
436 }
437 
438 static bool
439 gomp_loop_guided_next (long *istart, long *iend)
440 {
441   bool ret;
442 
443 #ifdef HAVE_SYNC_BUILTINS
444   ret = gomp_iter_guided_next (istart, iend);
445 #else
446   struct gomp_thread *thr = gomp_thread ();
447   gomp_mutex_lock (&thr->ts.work_share->lock);
448   ret = gomp_iter_guided_next_locked (istart, iend);
449   gomp_mutex_unlock (&thr->ts.work_share->lock);
450 #endif
451 
452   return ret;
453 }
454 
455 bool
456 GOMP_loop_runtime_next (long *istart, long *iend)
457 {
458   struct gomp_thread *thr = gomp_thread ();
459 
460   switch (thr->ts.work_share->sched)
461     {
462     case GFS_STATIC:
463     case GFS_AUTO:
464       return gomp_loop_static_next (istart, iend);
465     case GFS_DYNAMIC:
466       return gomp_loop_dynamic_next (istart, iend);
467     case GFS_GUIDED:
468       return gomp_loop_guided_next (istart, iend);
469     default:
470       abort ();
471     }
472 }
473 
474 /* The *_ordered_*_next routines are called when the thread completes
475    processing of the iteration block currently assigned to it.
476 
477    Returns true if there is work remaining to be performed; *ISTART and
478    *IEND are filled with a new iteration block.  Returns false if all work
479    has been assigned.  */
480 
481 static bool
482 gomp_loop_ordered_static_next (long *istart, long *iend)
483 {
484   struct gomp_thread *thr = gomp_thread ();
485   int test;
486 
487   gomp_ordered_sync ();
488   gomp_mutex_lock (&thr->ts.work_share->lock);
489   test = gomp_iter_static_next (istart, iend);
490   if (test >= 0)
491     gomp_ordered_static_next ();
492   gomp_mutex_unlock (&thr->ts.work_share->lock);
493 
494   return test == 0;
495 }
496 
497 static bool
498 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
499 {
500   struct gomp_thread *thr = gomp_thread ();
501   bool ret;
502 
503   gomp_ordered_sync ();
504   gomp_mutex_lock (&thr->ts.work_share->lock);
505   ret = gomp_iter_dynamic_next_locked (istart, iend);
506   if (ret)
507     gomp_ordered_next ();
508   else
509     gomp_ordered_last ();
510   gomp_mutex_unlock (&thr->ts.work_share->lock);
511 
512   return ret;
513 }
514 
515 static bool
516 gomp_loop_ordered_guided_next (long *istart, long *iend)
517 {
518   struct gomp_thread *thr = gomp_thread ();
519   bool ret;
520 
521   gomp_ordered_sync ();
522   gomp_mutex_lock (&thr->ts.work_share->lock);
523   ret = gomp_iter_guided_next_locked (istart, iend);
524   if (ret)
525     gomp_ordered_next ();
526   else
527     gomp_ordered_last ();
528   gomp_mutex_unlock (&thr->ts.work_share->lock);
529 
530   return ret;
531 }
532 
533 bool
534 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
535 {
536   struct gomp_thread *thr = gomp_thread ();
537 
538   switch (thr->ts.work_share->sched)
539     {
540     case GFS_STATIC:
541     case GFS_AUTO:
542       return gomp_loop_ordered_static_next (istart, iend);
543     case GFS_DYNAMIC:
544       return gomp_loop_ordered_dynamic_next (istart, iend);
545     case GFS_GUIDED:
546       return gomp_loop_ordered_guided_next (istart, iend);
547     default:
548       abort ();
549     }
550 }
551 
552 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
553    to avoid one synchronization once we get into the loop.  */
554 
555 static void
556 gomp_parallel_loop_start (void (*fn) (void *), void *data,
557 			  unsigned num_threads, long start, long end,
558 			  long incr, enum gomp_schedule_type sched,
559 			  long chunk_size, unsigned int flags)
560 {
561   struct gomp_team *team;
562 
563   num_threads = gomp_resolve_num_threads (num_threads, 0);
564   team = gomp_new_team (num_threads);
565   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
566   gomp_team_start (fn, data, num_threads, flags, team);
567 }
568 
569 void
570 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
571 				 unsigned num_threads, long start, long end,
572 				 long incr, long chunk_size)
573 {
574   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
575 			    GFS_STATIC, chunk_size, 0);
576 }
577 
578 void
579 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
580 				  unsigned num_threads, long start, long end,
581 				  long incr, long chunk_size)
582 {
583   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
584 			    GFS_DYNAMIC, chunk_size, 0);
585 }
586 
587 void
588 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
589 				 unsigned num_threads, long start, long end,
590 				 long incr, long chunk_size)
591 {
592   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
593 			    GFS_GUIDED, chunk_size, 0);
594 }
595 
596 void
597 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
598 				  unsigned num_threads, long start, long end,
599 				  long incr)
600 {
601   struct gomp_task_icv *icv = gomp_icv (false);
602   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
603 			    icv->run_sched_var, icv->run_sched_chunk_size, 0);
604 }
605 
606 ialias_redirect (GOMP_parallel_end)
607 
608 void
609 GOMP_parallel_loop_static (void (*fn) (void *), void *data,
610 			   unsigned num_threads, long start, long end,
611 			   long incr, long chunk_size, unsigned flags)
612 {
613   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
614 			    GFS_STATIC, chunk_size, flags);
615   fn (data);
616   GOMP_parallel_end ();
617 }
618 
619 void
620 GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
621 			    unsigned num_threads, long start, long end,
622 			    long incr, long chunk_size, unsigned flags)
623 {
624   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
625 			    GFS_DYNAMIC, chunk_size, flags);
626   fn (data);
627   GOMP_parallel_end ();
628 }
629 
630 void
631 GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
632 			  unsigned num_threads, long start, long end,
633 			  long incr, long chunk_size, unsigned flags)
634 {
635   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
636 			    GFS_GUIDED, chunk_size, flags);
637   fn (data);
638   GOMP_parallel_end ();
639 }
640 
641 #ifdef HAVE_ATTRIBUTE_ALIAS
642 extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
643 	__attribute__((alias ("GOMP_parallel_loop_dynamic")));
644 extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
645 	__attribute__((alias ("GOMP_parallel_loop_guided")));
646 #else
647 void
648 GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
649 					 unsigned num_threads, long start,
650 					 long end, long incr, long chunk_size,
651 					 unsigned flags)
652 {
653   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
654 			    GFS_DYNAMIC, chunk_size, flags);
655   fn (data);
656   GOMP_parallel_end ();
657 }
658 
659 void
660 GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
661 					unsigned num_threads, long start,
662 					long end, long incr, long chunk_size,
663 					unsigned flags)
664 {
665   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
666 			    GFS_GUIDED, chunk_size, flags);
667   fn (data);
668   GOMP_parallel_end ();
669 }
670 #endif
671 
672 void
673 GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
674 			    unsigned num_threads, long start, long end,
675 			    long incr, unsigned flags)
676 {
677   struct gomp_task_icv *icv = gomp_icv (false);
678   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
679 			    icv->run_sched_var, icv->run_sched_chunk_size,
680 			    flags);
681   fn (data);
682   GOMP_parallel_end ();
683 }
684 
685 /* The GOMP_loop_end* routines are called after the thread is told that
686    all loop iterations are complete.  The first two versions synchronize
687    all threads; the nowait version does not.  */
688 
689 void
690 GOMP_loop_end (void)
691 {
692   gomp_work_share_end ();
693 }
694 
695 bool
696 GOMP_loop_end_cancel (void)
697 {
698   return gomp_work_share_end_cancel ();
699 }
700 
701 void
702 GOMP_loop_end_nowait (void)
703 {
704   gomp_work_share_end_nowait ();
705 }
706 
707 
708 /* We use static functions above so that we're sure that the "runtime"
709    function can defer to the proper routine without interposition.  We
710    export the static function with a strong alias when possible, or with
711    a wrapper function otherwise.  */
712 
713 #ifdef HAVE_ATTRIBUTE_ALIAS
714 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
715 	__attribute__((alias ("gomp_loop_static_start")));
716 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
717 	__attribute__((alias ("gomp_loop_dynamic_start")));
718 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
719 	__attribute__((alias ("gomp_loop_guided_start")));
720 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
721 	__attribute__((alias ("gomp_loop_dynamic_start")));
722 extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
723 	__attribute__((alias ("gomp_loop_guided_start")));
724 
725 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
726 	__attribute__((alias ("gomp_loop_ordered_static_start")));
727 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
728 	__attribute__((alias ("gomp_loop_ordered_dynamic_start")));
729 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
730 	__attribute__((alias ("gomp_loop_ordered_guided_start")));
731 
732 extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
733 	__attribute__((alias ("gomp_loop_doacross_static_start")));
734 extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
735 	__attribute__((alias ("gomp_loop_doacross_dynamic_start")));
736 extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
737 	__attribute__((alias ("gomp_loop_doacross_guided_start")));
738 
739 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
740 	__attribute__((alias ("gomp_loop_static_next")));
741 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
742 	__attribute__((alias ("gomp_loop_dynamic_next")));
743 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
744 	__attribute__((alias ("gomp_loop_guided_next")));
745 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
746 	__attribute__((alias ("gomp_loop_dynamic_next")));
747 extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
748 	__attribute__((alias ("gomp_loop_guided_next")));
749 
750 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
751 	__attribute__((alias ("gomp_loop_ordered_static_next")));
752 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
753 	__attribute__((alias ("gomp_loop_ordered_dynamic_next")));
754 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
755 	__attribute__((alias ("gomp_loop_ordered_guided_next")));
756 #else
757 bool
758 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
759 			long *istart, long *iend)
760 {
761   return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
762 }
763 
764 bool
765 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
766 			 long *istart, long *iend)
767 {
768   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
769 }
770 
771 bool
772 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
773 			long *istart, long *iend)
774 {
775   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
776 }
777 
778 bool
779 GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
780 				      long chunk_size, long *istart,
781 				      long *iend)
782 {
783   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
784 }
785 
786 bool
787 GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
788 				     long chunk_size, long *istart, long *iend)
789 {
790   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
791 }
792 
793 bool
794 GOMP_loop_ordered_static_start (long start, long end, long incr,
795 				long chunk_size, long *istart, long *iend)
796 {
797   return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
798 					 istart, iend);
799 }
800 
801 bool
802 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
803 				 long chunk_size, long *istart, long *iend)
804 {
805   return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
806 					  istart, iend);
807 }
808 
809 bool
810 GOMP_loop_ordered_guided_start (long start, long end, long incr,
811 				long chunk_size, long *istart, long *iend)
812 {
813   return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
814 					 istart, iend);
815 }
816 
817 bool
818 GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
819 				 long chunk_size, long *istart, long *iend)
820 {
821   return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
822 					  istart, iend);
823 }
824 
825 bool
826 GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
827 				  long chunk_size, long *istart, long *iend)
828 {
829   return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
830 					   istart, iend);
831 }
832 
833 bool
834 GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
835 				 long chunk_size, long *istart, long *iend)
836 {
837   return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
838 					  istart, iend);
839 }
840 
841 bool
842 GOMP_loop_static_next (long *istart, long *iend)
843 {
844   return gomp_loop_static_next (istart, iend);
845 }
846 
847 bool
848 GOMP_loop_dynamic_next (long *istart, long *iend)
849 {
850   return gomp_loop_dynamic_next (istart, iend);
851 }
852 
853 bool
854 GOMP_loop_guided_next (long *istart, long *iend)
855 {
856   return gomp_loop_guided_next (istart, iend);
857 }
858 
859 bool
860 GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
861 {
862   return gomp_loop_dynamic_next (istart, iend);
863 }
864 
865 bool
866 GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
867 {
868   return gomp_loop_guided_next (istart, iend);
869 }
870 
871 bool
872 GOMP_loop_ordered_static_next (long *istart, long *iend)
873 {
874   return gomp_loop_ordered_static_next (istart, iend);
875 }
876 
877 bool
878 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
879 {
880   return gomp_loop_ordered_dynamic_next (istart, iend);
881 }
882 
883 bool
884 GOMP_loop_ordered_guided_next (long *istart, long *iend)
885 {
886   return gomp_loop_ordered_guided_next (istart, iend);
887 }
888 #endif
889