1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU OpenMP Library (libgomp).
5 
6    Libgomp is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14    more details.
15 
16    Under Section 7 of GPL version 3, you are granted additional
17    permissions described in the GCC Runtime Library Exception, version
18    3.1, as published by the Free Software Foundation.
19 
20    You should have received a copy of the GNU General Public License and
21    a copy of the GCC Runtime Library Exception along with this program;
22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23    <http://www.gnu.org/licenses/>.  */
24 
25 /* This file handles the LOOP (FOR/DO) construct.  */
26 
27 #include <limits.h>
28 #include <stdlib.h>
29 #include "libgomp.h"
30 
31 typedef unsigned long long gomp_ull;
32 
33 /* Initialize the given work share construct from the given arguments.  */
34 
35 static inline void
36 gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
37 		    gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
38 		    gomp_ull chunk_size)
39 {
40   ws->sched = sched;
41   ws->chunk_size_ull = chunk_size;
42   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
43   ws->end_ull = ((up && start > end) || (!up && start < end))
44 		? start : end;
45   ws->incr_ull = incr;
46   ws->next_ull = start;
47   ws->mode = 0;
48   if (sched == GFS_DYNAMIC)
49     {
50       ws->chunk_size_ull *= incr;
51 
52 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
53       {
54 	/* For dynamic scheduling prepare things to make each iteration
55 	   faster.  */
56 	struct gomp_thread *thr = gomp_thread ();
57 	struct gomp_team *team = thr->ts.team;
58 	long nthreads = team ? team->nthreads : 1;
59 
60 	if (__builtin_expect (up, 1))
61 	  {
62 	    /* Cheap overflow protection.  */
63 	    if (__builtin_expect ((nthreads | ws->chunk_size_ull)
64 				  < 1ULL << (sizeof (gomp_ull)
65 					     * __CHAR_BIT__ / 2 - 1), 1))
66 	      ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
67 					- (nthreads + 1) * ws->chunk_size_ull);
68 	  }
69 	/* Cheap overflow protection.  */
70 	else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
71 				   < 1ULL << (sizeof (gomp_ull)
72 					      * __CHAR_BIT__ / 2 - 1), 1))
73 	  ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
74 				    - (__LONG_LONG_MAX__ * 2ULL + 1));
75       }
76 #endif
77     }
78   if (!up)
79     ws->mode |= 2;
80 }
81 
82 /* The *_start routines are called when first encountering a loop construct
83    that is not bound directly to a parallel construct.  The first thread
84    that arrives will create the work-share construct; subsequent threads
85    will see the construct exists and allocate work from it.
86 
87    START, END, INCR are the bounds of the loop; due to the restrictions of
88    OpenMP, these values must be the same in every thread.  This is not
89    verified (nor is it entirely verifiable, since START is not necessarily
90    retained intact in the work-share data structure).  CHUNK_SIZE is the
91    scheduling parameter; again this must be identical in all threads.
92 
93    Returns true if there's any work for this thread to perform.  If so,
94    *ISTART and *IEND are filled with the bounds of the iteration block
95    allocated to this thread.  Returns false if all work was assigned to
96    other threads prior to this thread's arrival.  */
97 
98 static bool
99 gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
100 			    gomp_ull incr, gomp_ull chunk_size,
101 			    gomp_ull *istart, gomp_ull *iend)
102 {
103   struct gomp_thread *thr = gomp_thread ();
104 
105   thr->ts.static_trip = 0;
106   if (gomp_work_share_start (false))
107     {
108       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
109 			  GFS_STATIC, chunk_size);
110       gomp_work_share_init_done ();
111     }
112 
113   return !gomp_iter_ull_static_next (istart, iend);
114 }
115 
116 static bool
117 gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
118 			     gomp_ull incr, gomp_ull chunk_size,
119 			     gomp_ull *istart, gomp_ull *iend)
120 {
121   struct gomp_thread *thr = gomp_thread ();
122   bool ret;
123 
124   if (gomp_work_share_start (false))
125     {
126       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
127 			  GFS_DYNAMIC, chunk_size);
128       gomp_work_share_init_done ();
129     }
130 
131 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
132   ret = gomp_iter_ull_dynamic_next (istart, iend);
133 #else
134   gomp_mutex_lock (&thr->ts.work_share->lock);
135   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
136   gomp_mutex_unlock (&thr->ts.work_share->lock);
137 #endif
138 
139   return ret;
140 }
141 
142 static bool
143 gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
144 			    gomp_ull incr, gomp_ull chunk_size,
145 			    gomp_ull *istart, gomp_ull *iend)
146 {
147   struct gomp_thread *thr = gomp_thread ();
148   bool ret;
149 
150   if (gomp_work_share_start (false))
151     {
152       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
153 			  GFS_GUIDED, chunk_size);
154       gomp_work_share_init_done ();
155     }
156 
157 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
158   ret = gomp_iter_ull_guided_next (istart, iend);
159 #else
160   gomp_mutex_lock (&thr->ts.work_share->lock);
161   ret = gomp_iter_ull_guided_next_locked (istart, iend);
162   gomp_mutex_unlock (&thr->ts.work_share->lock);
163 #endif
164 
165   return ret;
166 }
167 
168 bool
169 GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
170 			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
171 {
172   struct gomp_task_icv *icv = gomp_icv (false);
173   switch (icv->run_sched_var)
174     {
175     case GFS_STATIC:
176       return gomp_loop_ull_static_start (up, start, end, incr,
177 					 icv->run_sched_modifier,
178 					 istart, iend);
179     case GFS_DYNAMIC:
180       return gomp_loop_ull_dynamic_start (up, start, end, incr,
181 					  icv->run_sched_modifier,
182 					  istart, iend);
183     case GFS_GUIDED:
184       return gomp_loop_ull_guided_start (up, start, end, incr,
185 					 icv->run_sched_modifier,
186 					 istart, iend);
187     case GFS_AUTO:
188       /* For now map to schedule(static), later on we could play with feedback
189 	 driven choice.  */
190       return gomp_loop_ull_static_start (up, start, end, incr,
191 					 0, istart, iend);
192     default:
193       abort ();
194     }
195 }
196 
197 /* The *_ordered_*_start routines are similar.  The only difference is that
198    this work-share construct is initialized to expect an ORDERED section.  */
199 
200 static bool
201 gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
202 				    gomp_ull incr, gomp_ull chunk_size,
203 				    gomp_ull *istart, gomp_ull *iend)
204 {
205   struct gomp_thread *thr = gomp_thread ();
206 
207   thr->ts.static_trip = 0;
208   if (gomp_work_share_start (true))
209     {
210       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
211 			  GFS_STATIC, chunk_size);
212       gomp_ordered_static_init ();
213       gomp_work_share_init_done ();
214     }
215 
216   return !gomp_iter_ull_static_next (istart, iend);
217 }
218 
219 static bool
220 gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
221 				     gomp_ull incr, gomp_ull chunk_size,
222 				     gomp_ull *istart, gomp_ull *iend)
223 {
224   struct gomp_thread *thr = gomp_thread ();
225   bool ret;
226 
227   if (gomp_work_share_start (true))
228     {
229       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
230 			  GFS_DYNAMIC, chunk_size);
231       gomp_mutex_lock (&thr->ts.work_share->lock);
232       gomp_work_share_init_done ();
233     }
234   else
235     gomp_mutex_lock (&thr->ts.work_share->lock);
236 
237   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
238   if (ret)
239     gomp_ordered_first ();
240   gomp_mutex_unlock (&thr->ts.work_share->lock);
241 
242   return ret;
243 }
244 
245 static bool
246 gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
247 				    gomp_ull incr, gomp_ull chunk_size,
248 				    gomp_ull *istart, gomp_ull *iend)
249 {
250   struct gomp_thread *thr = gomp_thread ();
251   bool ret;
252 
253   if (gomp_work_share_start (true))
254     {
255       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
256 			  GFS_GUIDED, chunk_size);
257       gomp_mutex_lock (&thr->ts.work_share->lock);
258       gomp_work_share_init_done ();
259     }
260   else
261     gomp_mutex_lock (&thr->ts.work_share->lock);
262 
263   ret = gomp_iter_ull_guided_next_locked (istart, iend);
264   if (ret)
265     gomp_ordered_first ();
266   gomp_mutex_unlock (&thr->ts.work_share->lock);
267 
268   return ret;
269 }
270 
271 bool
272 GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
273 				     gomp_ull incr, gomp_ull *istart,
274 				     gomp_ull *iend)
275 {
276   struct gomp_task_icv *icv = gomp_icv (false);
277   switch (icv->run_sched_var)
278     {
279     case GFS_STATIC:
280       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
281 						 icv->run_sched_modifier,
282 						 istart, iend);
283     case GFS_DYNAMIC:
284       return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
285 						  icv->run_sched_modifier,
286 						  istart, iend);
287     case GFS_GUIDED:
288       return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
289 						 icv->run_sched_modifier,
290 						 istart, iend);
291     case GFS_AUTO:
292       /* For now map to schedule(static), later on we could play with feedback
293 	 driven choice.  */
294       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
295 						 0, istart, iend);
296     default:
297       abort ();
298     }
299 }
300 
301 /* The *_next routines are called when the thread completes processing of
302    the iteration block currently assigned to it.  If the work-share
303    construct is bound directly to a parallel construct, then the iteration
304    bounds may have been set up before the parallel.  In which case, this
305    may be the first iteration for the thread.
306 
307    Returns true if there is work remaining to be performed; *ISTART and
308    *IEND are filled with a new iteration block.  Returns false if all work
309    has been assigned.  */
310 
311 static bool
312 gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
313 {
314   return !gomp_iter_ull_static_next (istart, iend);
315 }
316 
317 static bool
318 gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
319 {
320   bool ret;
321 
322 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
323   ret = gomp_iter_ull_dynamic_next (istart, iend);
324 #else
325   struct gomp_thread *thr = gomp_thread ();
326   gomp_mutex_lock (&thr->ts.work_share->lock);
327   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
328   gomp_mutex_unlock (&thr->ts.work_share->lock);
329 #endif
330 
331   return ret;
332 }
333 
334 static bool
335 gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
336 {
337   bool ret;
338 
339 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
340   ret = gomp_iter_ull_guided_next (istart, iend);
341 #else
342   struct gomp_thread *thr = gomp_thread ();
343   gomp_mutex_lock (&thr->ts.work_share->lock);
344   ret = gomp_iter_ull_guided_next_locked (istart, iend);
345   gomp_mutex_unlock (&thr->ts.work_share->lock);
346 #endif
347 
348   return ret;
349 }
350 
351 bool
352 GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
353 {
354   struct gomp_thread *thr = gomp_thread ();
355 
356   switch (thr->ts.work_share->sched)
357     {
358     case GFS_STATIC:
359     case GFS_AUTO:
360       return gomp_loop_ull_static_next (istart, iend);
361     case GFS_DYNAMIC:
362       return gomp_loop_ull_dynamic_next (istart, iend);
363     case GFS_GUIDED:
364       return gomp_loop_ull_guided_next (istart, iend);
365     default:
366       abort ();
367     }
368 }
369 
370 /* The *_ordered_*_next routines are called when the thread completes
371    processing of the iteration block currently assigned to it.
372 
373    Returns true if there is work remaining to be performed; *ISTART and
374    *IEND are filled with a new iteration block.  Returns false if all work
375    has been assigned.  */
376 
377 static bool
378 gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
379 {
380   struct gomp_thread *thr = gomp_thread ();
381   int test;
382 
383   gomp_ordered_sync ();
384   gomp_mutex_lock (&thr->ts.work_share->lock);
385   test = gomp_iter_ull_static_next (istart, iend);
386   if (test >= 0)
387     gomp_ordered_static_next ();
388   gomp_mutex_unlock (&thr->ts.work_share->lock);
389 
390   return test == 0;
391 }
392 
393 static bool
394 gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
395 {
396   struct gomp_thread *thr = gomp_thread ();
397   bool ret;
398 
399   gomp_ordered_sync ();
400   gomp_mutex_lock (&thr->ts.work_share->lock);
401   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
402   if (ret)
403     gomp_ordered_next ();
404   else
405     gomp_ordered_last ();
406   gomp_mutex_unlock (&thr->ts.work_share->lock);
407 
408   return ret;
409 }
410 
411 static bool
412 gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
413 {
414   struct gomp_thread *thr = gomp_thread ();
415   bool ret;
416 
417   gomp_ordered_sync ();
418   gomp_mutex_lock (&thr->ts.work_share->lock);
419   ret = gomp_iter_ull_guided_next_locked (istart, iend);
420   if (ret)
421     gomp_ordered_next ();
422   else
423     gomp_ordered_last ();
424   gomp_mutex_unlock (&thr->ts.work_share->lock);
425 
426   return ret;
427 }
428 
429 bool
430 GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
431 {
432   struct gomp_thread *thr = gomp_thread ();
433 
434   switch (thr->ts.work_share->sched)
435     {
436     case GFS_STATIC:
437     case GFS_AUTO:
438       return gomp_loop_ull_ordered_static_next (istart, iend);
439     case GFS_DYNAMIC:
440       return gomp_loop_ull_ordered_dynamic_next (istart, iend);
441     case GFS_GUIDED:
442       return gomp_loop_ull_ordered_guided_next (istart, iend);
443     default:
444       abort ();
445     }
446 }
447 
448 /* We use static functions above so that we're sure that the "runtime"
449    function can defer to the proper routine without interposition.  We
450    export the static function with a strong alias when possible, or with
451    a wrapper function otherwise.  */
452 
453 #ifdef HAVE_ATTRIBUTE_ALIAS
454 extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
455 	__attribute__((alias ("gomp_loop_ull_static_start")));
456 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
457 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
458 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
459 	__attribute__((alias ("gomp_loop_ull_guided_start")));
460 
461 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
462 	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
463 extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
464 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
465 extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
466 	__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
467 
468 extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
469 	__attribute__((alias ("gomp_loop_ull_static_next")));
470 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
471 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
472 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
473 	__attribute__((alias ("gomp_loop_ull_guided_next")));
474 
475 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
476 	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
477 extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
478 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
479 extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
480 	__attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
481 #else
482 bool
483 GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
484 			    gomp_ull incr, gomp_ull chunk_size,
485 			    gomp_ull *istart, gomp_ull *iend)
486 {
487   return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
488 				     iend);
489 }
490 
491 bool
492 GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
493 			     gomp_ull incr, gomp_ull chunk_size,
494 			     gomp_ull *istart, gomp_ull *iend)
495 {
496   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
497 				      iend);
498 }
499 
500 bool
501 GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
502 			    gomp_ull incr, gomp_ull chunk_size,
503 			    gomp_ull *istart, gomp_ull *iend)
504 {
505   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
506 				     iend);
507 }
508 
509 bool
510 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
511 				    gomp_ull incr, gomp_ull chunk_size,
512 				    gomp_ull *istart, gomp_ull *iend)
513 {
514   return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
515 					     istart, iend);
516 }
517 
518 bool
519 GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
520 				     gomp_ull incr, gomp_ull chunk_size,
521 				     gomp_ull *istart, gomp_ull *iend)
522 {
523   return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
524 					      istart, iend);
525 }
526 
527 bool
528 GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
529 				    gomp_ull incr, gomp_ull chunk_size,
530 				    gomp_ull *istart, gomp_ull *iend)
531 {
532   return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
533 					     istart, iend);
534 }
535 
536 bool
537 GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
538 {
539   return gomp_loop_ull_static_next (istart, iend);
540 }
541 
542 bool
543 GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
544 {
545   return gomp_loop_ull_dynamic_next (istart, iend);
546 }
547 
548 bool
549 GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
550 {
551   return gomp_loop_ull_guided_next (istart, iend);
552 }
553 
554 bool
555 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
556 {
557   return gomp_loop_ull_ordered_static_next (istart, iend);
558 }
559 
560 bool
561 GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
562 {
563   return gomp_loop_ull_ordered_dynamic_next (istart, iend);
564 }
565 
566 bool
567 GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
568 {
569   return gomp_loop_ull_ordered_guided_next (istart, iend);
570 }
571 #endif
572