xref: /dragonfly/contrib/gcc-4.7/libgomp/loop.c (revision c69bf40f)
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU OpenMP Library (libgomp).
5 
6    Libgomp is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14    more details.
15 
16    Under Section 7 of GPL version 3, you are granted additional
17    permissions described in the GCC Runtime Library Exception, version
18    3.1, as published by the Free Software Foundation.
19 
20    You should have received a copy of the GNU General Public License and
21    a copy of the GCC Runtime Library Exception along with this program;
22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23    <http://www.gnu.org/licenses/>.  */
24 
25 /* This file handles the LOOP (FOR/DO) construct.  */
26 
27 #include <limits.h>
28 #include <stdlib.h>
29 #include "libgomp.h"
30 
31 
32 /* Initialize the given work share construct from the given arguments.  */
33 
34 static inline void
35 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
36 		enum gomp_schedule_type sched, long chunk_size)
37 {
38   ws->sched = sched;
39   ws->chunk_size = chunk_size;
40   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
41   ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
42 	    ? start : end;
43   ws->incr = incr;
44   ws->next = start;
45   if (sched == GFS_DYNAMIC)
46     {
47       ws->chunk_size *= incr;
48 
49 #ifdef HAVE_SYNC_BUILTINS
50       {
51 	/* For dynamic scheduling prepare things to make each iteration
52 	   faster.  */
53 	struct gomp_thread *thr = gomp_thread ();
54 	struct gomp_team *team = thr->ts.team;
55 	long nthreads = team ? team->nthreads : 1;
56 
57 	if (__builtin_expect (incr > 0, 1))
58 	  {
59 	    /* Cheap overflow protection.  */
60 	    if (__builtin_expect ((nthreads | ws->chunk_size)
61 				  >= 1UL << (sizeof (long)
62 					     * __CHAR_BIT__ / 2 - 1), 0))
63 	      ws->mode = 0;
64 	    else
65 	      ws->mode = ws->end < (LONG_MAX
66 				    - (nthreads + 1) * ws->chunk_size);
67 	  }
68 	/* Cheap overflow protection.  */
69 	else if (__builtin_expect ((nthreads | -ws->chunk_size)
70 				   >= 1UL << (sizeof (long)
71 					      * __CHAR_BIT__ / 2 - 1), 0))
72 	  ws->mode = 0;
73 	else
74 	  ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
75       }
76 #endif
77     }
78 }
79 
80 /* The *_start routines are called when first encountering a loop construct
81    that is not bound directly to a parallel construct.  The first thread
82    that arrives will create the work-share construct; subsequent threads
83    will see the construct exists and allocate work from it.
84 
85    START, END, INCR are the bounds of the loop; due to the restrictions of
86    OpenMP, these values must be the same in every thread.  This is not
87    verified (nor is it entirely verifiable, since START is not necessarily
88    retained intact in the work-share data structure).  CHUNK_SIZE is the
89    scheduling parameter; again this must be identical in all threads.
90 
91    Returns true if there's any work for this thread to perform.  If so,
92    *ISTART and *IEND are filled with the bounds of the iteration block
93    allocated to this thread.  Returns false if all work was assigned to
94    other threads prior to this thread's arrival.  */
95 
96 static bool
97 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
98 			long *istart, long *iend)
99 {
100   struct gomp_thread *thr = gomp_thread ();
101 
102   thr->ts.static_trip = 0;
103   if (gomp_work_share_start (false))
104     {
105       gomp_loop_init (thr->ts.work_share, start, end, incr,
106 		      GFS_STATIC, chunk_size);
107       gomp_work_share_init_done ();
108     }
109 
110   return !gomp_iter_static_next (istart, iend);
111 }
112 
113 static bool
114 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
115 			 long *istart, long *iend)
116 {
117   struct gomp_thread *thr = gomp_thread ();
118   bool ret;
119 
120   if (gomp_work_share_start (false))
121     {
122       gomp_loop_init (thr->ts.work_share, start, end, incr,
123 		      GFS_DYNAMIC, chunk_size);
124       gomp_work_share_init_done ();
125     }
126 
127 #ifdef HAVE_SYNC_BUILTINS
128   ret = gomp_iter_dynamic_next (istart, iend);
129 #else
130   gomp_mutex_lock (&thr->ts.work_share->lock);
131   ret = gomp_iter_dynamic_next_locked (istart, iend);
132   gomp_mutex_unlock (&thr->ts.work_share->lock);
133 #endif
134 
135   return ret;
136 }
137 
138 static bool
139 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
140 			long *istart, long *iend)
141 {
142   struct gomp_thread *thr = gomp_thread ();
143   bool ret;
144 
145   if (gomp_work_share_start (false))
146     {
147       gomp_loop_init (thr->ts.work_share, start, end, incr,
148 		      GFS_GUIDED, chunk_size);
149       gomp_work_share_init_done ();
150     }
151 
152 #ifdef HAVE_SYNC_BUILTINS
153   ret = gomp_iter_guided_next (istart, iend);
154 #else
155   gomp_mutex_lock (&thr->ts.work_share->lock);
156   ret = gomp_iter_guided_next_locked (istart, iend);
157   gomp_mutex_unlock (&thr->ts.work_share->lock);
158 #endif
159 
160   return ret;
161 }
162 
163 bool
164 GOMP_loop_runtime_start (long start, long end, long incr,
165 			 long *istart, long *iend)
166 {
167   struct gomp_task_icv *icv = gomp_icv (false);
168   switch (icv->run_sched_var)
169     {
170     case GFS_STATIC:
171       return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier,
172 				     istart, iend);
173     case GFS_DYNAMIC:
174       return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier,
175 				      istart, iend);
176     case GFS_GUIDED:
177       return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier,
178 				     istart, iend);
179     case GFS_AUTO:
180       /* For now map to schedule(static), later on we could play with feedback
181 	 driven choice.  */
182       return gomp_loop_static_start (start, end, incr, 0, istart, iend);
183     default:
184       abort ();
185     }
186 }
187 
188 /* The *_ordered_*_start routines are similar.  The only difference is that
189    this work-share construct is initialized to expect an ORDERED section.  */
190 
191 static bool
192 gomp_loop_ordered_static_start (long start, long end, long incr,
193 				long chunk_size, long *istart, long *iend)
194 {
195   struct gomp_thread *thr = gomp_thread ();
196 
197   thr->ts.static_trip = 0;
198   if (gomp_work_share_start (true))
199     {
200       gomp_loop_init (thr->ts.work_share, start, end, incr,
201 		      GFS_STATIC, chunk_size);
202       gomp_ordered_static_init ();
203       gomp_work_share_init_done ();
204     }
205 
206   return !gomp_iter_static_next (istart, iend);
207 }
208 
209 static bool
210 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
211 				 long chunk_size, long *istart, long *iend)
212 {
213   struct gomp_thread *thr = gomp_thread ();
214   bool ret;
215 
216   if (gomp_work_share_start (true))
217     {
218       gomp_loop_init (thr->ts.work_share, start, end, incr,
219 		      GFS_DYNAMIC, chunk_size);
220       gomp_mutex_lock (&thr->ts.work_share->lock);
221       gomp_work_share_init_done ();
222     }
223   else
224     gomp_mutex_lock (&thr->ts.work_share->lock);
225 
226   ret = gomp_iter_dynamic_next_locked (istart, iend);
227   if (ret)
228     gomp_ordered_first ();
229   gomp_mutex_unlock (&thr->ts.work_share->lock);
230 
231   return ret;
232 }
233 
234 static bool
235 gomp_loop_ordered_guided_start (long start, long end, long incr,
236 				long chunk_size, long *istart, long *iend)
237 {
238   struct gomp_thread *thr = gomp_thread ();
239   bool ret;
240 
241   if (gomp_work_share_start (true))
242     {
243       gomp_loop_init (thr->ts.work_share, start, end, incr,
244 		      GFS_GUIDED, chunk_size);
245       gomp_mutex_lock (&thr->ts.work_share->lock);
246       gomp_work_share_init_done ();
247     }
248   else
249     gomp_mutex_lock (&thr->ts.work_share->lock);
250 
251   ret = gomp_iter_guided_next_locked (istart, iend);
252   if (ret)
253     gomp_ordered_first ();
254   gomp_mutex_unlock (&thr->ts.work_share->lock);
255 
256   return ret;
257 }
258 
259 bool
260 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
261 				 long *istart, long *iend)
262 {
263   struct gomp_task_icv *icv = gomp_icv (false);
264   switch (icv->run_sched_var)
265     {
266     case GFS_STATIC:
267       return gomp_loop_ordered_static_start (start, end, incr,
268 					     icv->run_sched_modifier,
269 					     istart, iend);
270     case GFS_DYNAMIC:
271       return gomp_loop_ordered_dynamic_start (start, end, incr,
272 					      icv->run_sched_modifier,
273 					      istart, iend);
274     case GFS_GUIDED:
275       return gomp_loop_ordered_guided_start (start, end, incr,
276 					     icv->run_sched_modifier,
277 					     istart, iend);
278     case GFS_AUTO:
279       /* For now map to schedule(static), later on we could play with feedback
280 	 driven choice.  */
281       return gomp_loop_ordered_static_start (start, end, incr,
282 					     0, istart, iend);
283     default:
284       abort ();
285     }
286 }
287 
288 /* The *_next routines are called when the thread completes processing of
289    the iteration block currently assigned to it.  If the work-share
290    construct is bound directly to a parallel construct, then the iteration
291    bounds may have been set up before the parallel.  In which case, this
292    may be the first iteration for the thread.
293 
294    Returns true if there is work remaining to be performed; *ISTART and
295    *IEND are filled with a new iteration block.  Returns false if all work
296    has been assigned.  */
297 
298 static bool
299 gomp_loop_static_next (long *istart, long *iend)
300 {
301   return !gomp_iter_static_next (istart, iend);
302 }
303 
304 static bool
305 gomp_loop_dynamic_next (long *istart, long *iend)
306 {
307   bool ret;
308 
309 #ifdef HAVE_SYNC_BUILTINS
310   ret = gomp_iter_dynamic_next (istart, iend);
311 #else
312   struct gomp_thread *thr = gomp_thread ();
313   gomp_mutex_lock (&thr->ts.work_share->lock);
314   ret = gomp_iter_dynamic_next_locked (istart, iend);
315   gomp_mutex_unlock (&thr->ts.work_share->lock);
316 #endif
317 
318   return ret;
319 }
320 
321 static bool
322 gomp_loop_guided_next (long *istart, long *iend)
323 {
324   bool ret;
325 
326 #ifdef HAVE_SYNC_BUILTINS
327   ret = gomp_iter_guided_next (istart, iend);
328 #else
329   struct gomp_thread *thr = gomp_thread ();
330   gomp_mutex_lock (&thr->ts.work_share->lock);
331   ret = gomp_iter_guided_next_locked (istart, iend);
332   gomp_mutex_unlock (&thr->ts.work_share->lock);
333 #endif
334 
335   return ret;
336 }
337 
338 bool
339 GOMP_loop_runtime_next (long *istart, long *iend)
340 {
341   struct gomp_thread *thr = gomp_thread ();
342 
343   switch (thr->ts.work_share->sched)
344     {
345     case GFS_STATIC:
346     case GFS_AUTO:
347       return gomp_loop_static_next (istart, iend);
348     case GFS_DYNAMIC:
349       return gomp_loop_dynamic_next (istart, iend);
350     case GFS_GUIDED:
351       return gomp_loop_guided_next (istart, iend);
352     default:
353       abort ();
354     }
355 }
356 
357 /* The *_ordered_*_next routines are called when the thread completes
358    processing of the iteration block currently assigned to it.
359 
360    Returns true if there is work remaining to be performed; *ISTART and
361    *IEND are filled with a new iteration block.  Returns false if all work
362    has been assigned.  */
363 
364 static bool
365 gomp_loop_ordered_static_next (long *istart, long *iend)
366 {
367   struct gomp_thread *thr = gomp_thread ();
368   int test;
369 
370   gomp_ordered_sync ();
371   gomp_mutex_lock (&thr->ts.work_share->lock);
372   test = gomp_iter_static_next (istart, iend);
373   if (test >= 0)
374     gomp_ordered_static_next ();
375   gomp_mutex_unlock (&thr->ts.work_share->lock);
376 
377   return test == 0;
378 }
379 
380 static bool
381 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
382 {
383   struct gomp_thread *thr = gomp_thread ();
384   bool ret;
385 
386   gomp_ordered_sync ();
387   gomp_mutex_lock (&thr->ts.work_share->lock);
388   ret = gomp_iter_dynamic_next_locked (istart, iend);
389   if (ret)
390     gomp_ordered_next ();
391   else
392     gomp_ordered_last ();
393   gomp_mutex_unlock (&thr->ts.work_share->lock);
394 
395   return ret;
396 }
397 
398 static bool
399 gomp_loop_ordered_guided_next (long *istart, long *iend)
400 {
401   struct gomp_thread *thr = gomp_thread ();
402   bool ret;
403 
404   gomp_ordered_sync ();
405   gomp_mutex_lock (&thr->ts.work_share->lock);
406   ret = gomp_iter_guided_next_locked (istart, iend);
407   if (ret)
408     gomp_ordered_next ();
409   else
410     gomp_ordered_last ();
411   gomp_mutex_unlock (&thr->ts.work_share->lock);
412 
413   return ret;
414 }
415 
416 bool
417 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
418 {
419   struct gomp_thread *thr = gomp_thread ();
420 
421   switch (thr->ts.work_share->sched)
422     {
423     case GFS_STATIC:
424     case GFS_AUTO:
425       return gomp_loop_ordered_static_next (istart, iend);
426     case GFS_DYNAMIC:
427       return gomp_loop_ordered_dynamic_next (istart, iend);
428     case GFS_GUIDED:
429       return gomp_loop_ordered_guided_next (istart, iend);
430     default:
431       abort ();
432     }
433 }
434 
435 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
436    to avoid one synchronization once we get into the loop.  */
437 
438 static void
439 gomp_parallel_loop_start (void (*fn) (void *), void *data,
440 			  unsigned num_threads, long start, long end,
441 			  long incr, enum gomp_schedule_type sched,
442 			  long chunk_size)
443 {
444   struct gomp_team *team;
445 
446   num_threads = gomp_resolve_num_threads (num_threads, 0);
447   team = gomp_new_team (num_threads);
448   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
449   gomp_team_start (fn, data, num_threads, team);
450 }
451 
452 void
453 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
454 				 unsigned num_threads, long start, long end,
455 				 long incr, long chunk_size)
456 {
457   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
458 			    GFS_STATIC, chunk_size);
459 }
460 
461 void
462 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
463 				  unsigned num_threads, long start, long end,
464 				  long incr, long chunk_size)
465 {
466   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
467 			    GFS_DYNAMIC, chunk_size);
468 }
469 
470 void
471 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
472 				 unsigned num_threads, long start, long end,
473 				 long incr, long chunk_size)
474 {
475   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
476 			    GFS_GUIDED, chunk_size);
477 }
478 
479 void
480 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
481 				  unsigned num_threads, long start, long end,
482 				  long incr)
483 {
484   struct gomp_task_icv *icv = gomp_icv (false);
485   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
486 			    icv->run_sched_var, icv->run_sched_modifier);
487 }
488 
489 /* The GOMP_loop_end* routines are called after the thread is told that
490    all loop iterations are complete.  This first version synchronizes
491    all threads; the nowait version does not.  */
492 
493 void
494 GOMP_loop_end (void)
495 {
496   gomp_work_share_end ();
497 }
498 
499 void
500 GOMP_loop_end_nowait (void)
501 {
502   gomp_work_share_end_nowait ();
503 }
504 
505 
506 /* We use static functions above so that we're sure that the "runtime"
507    function can defer to the proper routine without interposition.  We
508    export the static function with a strong alias when possible, or with
509    a wrapper function otherwise.  */
510 
511 #ifdef HAVE_ATTRIBUTE_ALIAS
512 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
513 	__attribute__((alias ("gomp_loop_static_start")));
514 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
515 	__attribute__((alias ("gomp_loop_dynamic_start")));
516 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
517 	__attribute__((alias ("gomp_loop_guided_start")));
518 
519 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
520 	__attribute__((alias ("gomp_loop_ordered_static_start")));
521 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
522 	__attribute__((alias ("gomp_loop_ordered_dynamic_start")));
523 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
524 	__attribute__((alias ("gomp_loop_ordered_guided_start")));
525 
526 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
527 	__attribute__((alias ("gomp_loop_static_next")));
528 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
529 	__attribute__((alias ("gomp_loop_dynamic_next")));
530 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
531 	__attribute__((alias ("gomp_loop_guided_next")));
532 
533 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
534 	__attribute__((alias ("gomp_loop_ordered_static_next")));
535 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
536 	__attribute__((alias ("gomp_loop_ordered_dynamic_next")));
537 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
538 	__attribute__((alias ("gomp_loop_ordered_guided_next")));
539 #else
540 bool
541 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
542 			long *istart, long *iend)
543 {
544   return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
545 }
546 
547 bool
548 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
549 			 long *istart, long *iend)
550 {
551   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
552 }
553 
554 bool
555 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
556 			long *istart, long *iend)
557 {
558   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
559 }
560 
561 bool
562 GOMP_loop_ordered_static_start (long start, long end, long incr,
563 				long chunk_size, long *istart, long *iend)
564 {
565   return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
566 					 istart, iend);
567 }
568 
569 bool
570 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
571 				 long chunk_size, long *istart, long *iend)
572 {
573   return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
574 					  istart, iend);
575 }
576 
577 bool
578 GOMP_loop_ordered_guided_start (long start, long end, long incr,
579 				long chunk_size, long *istart, long *iend)
580 {
581   return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
582 					 istart, iend);
583 }
584 
585 bool
586 GOMP_loop_static_next (long *istart, long *iend)
587 {
588   return gomp_loop_static_next (istart, iend);
589 }
590 
591 bool
592 GOMP_loop_dynamic_next (long *istart, long *iend)
593 {
594   return gomp_loop_dynamic_next (istart, iend);
595 }
596 
597 bool
598 GOMP_loop_guided_next (long *istart, long *iend)
599 {
600   return gomp_loop_guided_next (istart, iend);
601 }
602 
603 bool
604 GOMP_loop_ordered_static_next (long *istart, long *iend)
605 {
606   return gomp_loop_ordered_static_next (istart, iend);
607 }
608 
609 bool
610 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
611 {
612   return gomp_loop_ordered_dynamic_next (istart, iend);
613 }
614 
615 bool
616 GOMP_loop_ordered_guided_next (long *istart, long *iend)
617 {
618   return gomp_loop_ordered_guided_next (istart, iend);
619 }
620 #endif
621