1 /* Copyright (C) 2005-2019 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the LOOP (FOR/DO) construct.  */
27 
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libgomp.h"
32 
33 ialias (GOMP_loop_ull_runtime_next)
34 ialias_redirect (GOMP_taskgroup_reduction_register)
35 
36 typedef unsigned long long gomp_ull;
37 
38 /* Initialize the given work share construct from the given arguments.  */
39 
40 static inline void
gomp_loop_ull_init(struct gomp_work_share * ws,bool up,gomp_ull start,gomp_ull end,gomp_ull incr,enum gomp_schedule_type sched,gomp_ull chunk_size)41 gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
42 		    gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
43 		    gomp_ull chunk_size)
44 {
45   ws->sched = sched;
46   ws->chunk_size_ull = chunk_size;
47   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
48   ws->end_ull = ((up && start > end) || (!up && start < end))
49 		? start : end;
50   ws->incr_ull = incr;
51   ws->next_ull = start;
52   ws->mode = 0;
53   if (sched == GFS_DYNAMIC)
54     {
55       ws->chunk_size_ull *= incr;
56 
57 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
58       {
59 	/* For dynamic scheduling prepare things to make each iteration
60 	   faster.  */
61 	struct gomp_thread *thr = gomp_thread ();
62 	struct gomp_team *team = thr->ts.team;
63 	long nthreads = team ? team->nthreads : 1;
64 
65 	if (__builtin_expect (up, 1))
66 	  {
67 	    /* Cheap overflow protection.  */
68 	    if (__builtin_expect ((nthreads | ws->chunk_size_ull)
69 				  < 1ULL << (sizeof (gomp_ull)
70 					     * __CHAR_BIT__ / 2 - 1), 1))
71 	      ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
72 					- (nthreads + 1) * ws->chunk_size_ull);
73 	  }
74 	/* Cheap overflow protection.  */
75 	else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
76 				   < 1ULL << (sizeof (gomp_ull)
77 					      * __CHAR_BIT__ / 2 - 1), 1))
78 	  ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
79 				    - (__LONG_LONG_MAX__ * 2ULL + 1));
80       }
81 #endif
82     }
83   if (!up)
84     ws->mode |= 2;
85 }
86 
87 /* The *_start routines are called when first encountering a loop construct
88    that is not bound directly to a parallel construct.  The first thread
89    that arrives will create the work-share construct; subsequent threads
90    will see the construct exists and allocate work from it.
91 
92    START, END, INCR are the bounds of the loop; due to the restrictions of
93    OpenMP, these values must be the same in every thread.  This is not
94    verified (nor is it entirely verifiable, since START is not necessarily
95    retained intact in the work-share data structure).  CHUNK_SIZE is the
96    scheduling parameter; again this must be identical in all threads.
97 
98    Returns true if there's any work for this thread to perform.  If so,
99    *ISTART and *IEND are filled with the bounds of the iteration block
100    allocated to this thread.  Returns false if all work was assigned to
101    other threads prior to this thread's arrival.  */
102 
103 static bool
gomp_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)104 gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
105 			    gomp_ull incr, gomp_ull chunk_size,
106 			    gomp_ull *istart, gomp_ull *iend)
107 {
108   struct gomp_thread *thr = gomp_thread ();
109 
110   thr->ts.static_trip = 0;
111   if (gomp_work_share_start (0))
112     {
113       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
114 			  GFS_STATIC, chunk_size);
115       gomp_work_share_init_done ();
116     }
117 
118   return !gomp_iter_ull_static_next (istart, iend);
119 }
120 
121 static bool
gomp_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)122 gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
123 			     gomp_ull incr, gomp_ull chunk_size,
124 			     gomp_ull *istart, gomp_ull *iend)
125 {
126   struct gomp_thread *thr = gomp_thread ();
127   bool ret;
128 
129   if (gomp_work_share_start (0))
130     {
131       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
132 			  GFS_DYNAMIC, chunk_size);
133       gomp_work_share_init_done ();
134     }
135 
136 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
137   ret = gomp_iter_ull_dynamic_next (istart, iend);
138 #else
139   gomp_mutex_lock (&thr->ts.work_share->lock);
140   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
141   gomp_mutex_unlock (&thr->ts.work_share->lock);
142 #endif
143 
144   return ret;
145 }
146 
147 static bool
gomp_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)148 gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
149 			    gomp_ull incr, gomp_ull chunk_size,
150 			    gomp_ull *istart, gomp_ull *iend)
151 {
152   struct gomp_thread *thr = gomp_thread ();
153   bool ret;
154 
155   if (gomp_work_share_start (0))
156     {
157       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
158 			  GFS_GUIDED, chunk_size);
159       gomp_work_share_init_done ();
160     }
161 
162 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
163   ret = gomp_iter_ull_guided_next (istart, iend);
164 #else
165   gomp_mutex_lock (&thr->ts.work_share->lock);
166   ret = gomp_iter_ull_guided_next_locked (istart, iend);
167   gomp_mutex_unlock (&thr->ts.work_share->lock);
168 #endif
169 
170   return ret;
171 }
172 
173 bool
GOMP_loop_ull_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)174 GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
175 			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
176 {
177   struct gomp_task_icv *icv = gomp_icv (false);
178   switch (icv->run_sched_var & ~GFS_MONOTONIC)
179     {
180     case GFS_STATIC:
181       return gomp_loop_ull_static_start (up, start, end, incr,
182 					 icv->run_sched_chunk_size,
183 					 istart, iend);
184     case GFS_DYNAMIC:
185       return gomp_loop_ull_dynamic_start (up, start, end, incr,
186 					  icv->run_sched_chunk_size,
187 					  istart, iend);
188     case GFS_GUIDED:
189       return gomp_loop_ull_guided_start (up, start, end, incr,
190 					 icv->run_sched_chunk_size,
191 					 istart, iend);
192     case GFS_AUTO:
193       /* For now map to schedule(static), later on we could play with feedback
194 	 driven choice.  */
195       return gomp_loop_ull_static_start (up, start, end, incr,
196 					 0, istart, iend);
197     default:
198       abort ();
199     }
200 }
201 
202 static long
gomp_adjust_sched(long sched,gomp_ull * chunk_size)203 gomp_adjust_sched (long sched, gomp_ull *chunk_size)
204 {
205   sched &= ~GFS_MONOTONIC;
206   switch (sched)
207     {
208     case GFS_STATIC:
209     case GFS_DYNAMIC:
210     case GFS_GUIDED:
211       return sched;
212     /* GFS_RUNTIME is used for runtime schedule without monotonic
213        or nonmonotonic modifiers on the clause.
214        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
215        modifier.  */
216     case GFS_RUNTIME:
217     /* GFS_AUTO is used for runtime schedule with nonmonotonic
218        modifier.  */
219     case GFS_AUTO:
220       {
221 	struct gomp_task_icv *icv = gomp_icv (false);
222 	sched = icv->run_sched_var & ~GFS_MONOTONIC;
223 	switch (sched)
224 	  {
225 	  case GFS_STATIC:
226 	  case GFS_DYNAMIC:
227 	  case GFS_GUIDED:
228 	    *chunk_size = icv->run_sched_chunk_size;
229 	    break;
230 	  case GFS_AUTO:
231 	    sched = GFS_STATIC;
232 	    *chunk_size = 0;
233 	    break;
234 	  default:
235 	    abort ();
236 	  }
237 	return sched;
238       }
239     default:
240       abort ();
241     }
242 }
243 
244 bool
GOMP_loop_ull_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)245 GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
246 		     gomp_ull incr, long sched, gomp_ull chunk_size,
247 		     gomp_ull *istart, gomp_ull *iend,
248 		     uintptr_t *reductions, void **mem)
249 {
250   struct gomp_thread *thr = gomp_thread ();
251 
252   thr->ts.static_trip = 0;
253   if (reductions)
254     gomp_workshare_taskgroup_start ();
255   if (gomp_work_share_start (0))
256     {
257       sched = gomp_adjust_sched (sched, &chunk_size);
258       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
259       			  sched, chunk_size);
260       if (reductions)
261 	{
262 	  GOMP_taskgroup_reduction_register (reductions);
263 	  thr->task->taskgroup->workshare = true;
264 	  thr->ts.work_share->task_reductions = reductions;
265 	}
266       if (mem)
267 	{
268 	  uintptr_t size = (uintptr_t) *mem;
269 	  if (size > (sizeof (struct gomp_work_share)
270 		      - offsetof (struct gomp_work_share,
271 				  inline_ordered_team_ids)))
272 	    thr->ts.work_share->ordered_team_ids
273 	      = gomp_malloc_cleared (size);
274 	  else
275 	    memset (thr->ts.work_share->ordered_team_ids, '\0', size);
276 	  *mem = (void *) thr->ts.work_share->ordered_team_ids;
277 	}
278       gomp_work_share_init_done ();
279     }
280   else
281     {
282       if (reductions)
283 	{
284 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
285 	  gomp_workshare_task_reduction_register (reductions,
286 						  first_reductions);
287 	}
288       if (mem)
289 	*mem = (void *) thr->ts.work_share->ordered_team_ids;
290     }
291 
292   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
293 }
294 
295 /* The *_ordered_*_start routines are similar.  The only difference is that
296    this work-share construct is initialized to expect an ORDERED section.  */
297 
298 static bool
gomp_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)299 gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
300 				    gomp_ull incr, gomp_ull chunk_size,
301 				    gomp_ull *istart, gomp_ull *iend)
302 {
303   struct gomp_thread *thr = gomp_thread ();
304 
305   thr->ts.static_trip = 0;
306   if (gomp_work_share_start (1))
307     {
308       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
309 			  GFS_STATIC, chunk_size);
310       gomp_ordered_static_init ();
311       gomp_work_share_init_done ();
312     }
313 
314   return !gomp_iter_ull_static_next (istart, iend);
315 }
316 
317 static bool
gomp_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)318 gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
319 				     gomp_ull incr, gomp_ull chunk_size,
320 				     gomp_ull *istart, gomp_ull *iend)
321 {
322   struct gomp_thread *thr = gomp_thread ();
323   bool ret;
324 
325   if (gomp_work_share_start (1))
326     {
327       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
328 			  GFS_DYNAMIC, chunk_size);
329       gomp_mutex_lock (&thr->ts.work_share->lock);
330       gomp_work_share_init_done ();
331     }
332   else
333     gomp_mutex_lock (&thr->ts.work_share->lock);
334 
335   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
336   if (ret)
337     gomp_ordered_first ();
338   gomp_mutex_unlock (&thr->ts.work_share->lock);
339 
340   return ret;
341 }
342 
343 static bool
gomp_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)344 gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
345 				    gomp_ull incr, gomp_ull chunk_size,
346 				    gomp_ull *istart, gomp_ull *iend)
347 {
348   struct gomp_thread *thr = gomp_thread ();
349   bool ret;
350 
351   if (gomp_work_share_start (1))
352     {
353       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
354 			  GFS_GUIDED, chunk_size);
355       gomp_mutex_lock (&thr->ts.work_share->lock);
356       gomp_work_share_init_done ();
357     }
358   else
359     gomp_mutex_lock (&thr->ts.work_share->lock);
360 
361   ret = gomp_iter_ull_guided_next_locked (istart, iend);
362   if (ret)
363     gomp_ordered_first ();
364   gomp_mutex_unlock (&thr->ts.work_share->lock);
365 
366   return ret;
367 }
368 
369 bool
GOMP_loop_ull_ordered_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)370 GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
371 				     gomp_ull incr, gomp_ull *istart,
372 				     gomp_ull *iend)
373 {
374   struct gomp_task_icv *icv = gomp_icv (false);
375   switch (icv->run_sched_var & ~GFS_MONOTONIC)
376     {
377     case GFS_STATIC:
378       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
379 						 icv->run_sched_chunk_size,
380 						 istart, iend);
381     case GFS_DYNAMIC:
382       return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
383 						  icv->run_sched_chunk_size,
384 						  istart, iend);
385     case GFS_GUIDED:
386       return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
387 						 icv->run_sched_chunk_size,
388 						 istart, iend);
389     case GFS_AUTO:
390       /* For now map to schedule(static), later on we could play with feedback
391 	 driven choice.  */
392       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
393 						 0, istart, iend);
394     default:
395       abort ();
396     }
397 }
398 
399 bool
GOMP_loop_ull_ordered_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)400 GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
401 			     gomp_ull incr, long sched, gomp_ull chunk_size,
402 			     gomp_ull *istart, gomp_ull *iend,
403 			     uintptr_t *reductions, void **mem)
404 {
405   struct gomp_thread *thr = gomp_thread ();
406   size_t ordered = 1;
407   bool ret;
408 
409   thr->ts.static_trip = 0;
410   if (reductions)
411     gomp_workshare_taskgroup_start ();
412   if (mem)
413     ordered += (uintptr_t) *mem;
414   if (gomp_work_share_start (ordered))
415     {
416       sched = gomp_adjust_sched (sched, &chunk_size);
417       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
418 			  sched, chunk_size);
419       if (reductions)
420 	{
421 	  GOMP_taskgroup_reduction_register (reductions);
422 	  thr->task->taskgroup->workshare = true;
423 	  thr->ts.work_share->task_reductions = reductions;
424 	}
425       if (sched == GFS_STATIC)
426 	gomp_ordered_static_init ();
427       else
428 	gomp_mutex_lock (&thr->ts.work_share->lock);
429       gomp_work_share_init_done ();
430     }
431   else
432     {
433       if (reductions)
434 	{
435 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
436 	  gomp_workshare_task_reduction_register (reductions,
437 						  first_reductions);
438 	}
439       sched = thr->ts.work_share->sched;
440       if (sched != GFS_STATIC)
441 	gomp_mutex_lock (&thr->ts.work_share->lock);
442     }
443 
444   if (mem)
445     {
446       uintptr_t p
447 	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
448 		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
449       p += __alignof__ (long long) - 1;
450       p &= ~(__alignof__ (long long) - 1);
451       *mem = (void *) p;
452     }
453 
454   switch (sched)
455     {
456     case GFS_STATIC:
457     case GFS_AUTO:
458       return !gomp_iter_ull_static_next (istart, iend);
459     case GFS_DYNAMIC:
460       ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
461       break;
462     case GFS_GUIDED:
463       ret = gomp_iter_ull_guided_next_locked (istart, iend);
464       break;
465     default:
466       abort ();
467     }
468 
469   if (ret)
470     gomp_ordered_first ();
471   gomp_mutex_unlock (&thr->ts.work_share->lock);
472   return ret;
473 }
474 
475 /* The *_doacross_*_start routines are similar.  The only difference is that
476    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
477    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
478    and other COUNTS array elements tell the library number of iterations
479    in the ordered inner loops.  */
480 
481 static bool
gomp_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)482 gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
483 				     gomp_ull chunk_size, gomp_ull *istart,
484 				     gomp_ull *iend)
485 {
486   struct gomp_thread *thr = gomp_thread ();
487 
488   thr->ts.static_trip = 0;
489   if (gomp_work_share_start (0))
490     {
491       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
492 			  GFS_STATIC, chunk_size);
493       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
494       gomp_work_share_init_done ();
495     }
496 
497   return !gomp_iter_ull_static_next (istart, iend);
498 }
499 
500 static bool
gomp_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)501 gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
502 				      gomp_ull chunk_size, gomp_ull *istart,
503 				      gomp_ull *iend)
504 {
505   struct gomp_thread *thr = gomp_thread ();
506   bool ret;
507 
508   if (gomp_work_share_start (0))
509     {
510       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
511 			  GFS_DYNAMIC, chunk_size);
512       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
513       gomp_work_share_init_done ();
514     }
515 
516 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
517   ret = gomp_iter_ull_dynamic_next (istart, iend);
518 #else
519   gomp_mutex_lock (&thr->ts.work_share->lock);
520   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
521   gomp_mutex_unlock (&thr->ts.work_share->lock);
522 #endif
523 
524   return ret;
525 }
526 
527 static bool
gomp_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)528 gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
529 				     gomp_ull chunk_size, gomp_ull *istart,
530 				     gomp_ull *iend)
531 {
532   struct gomp_thread *thr = gomp_thread ();
533   bool ret;
534 
535   if (gomp_work_share_start (0))
536     {
537       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
538 			  GFS_GUIDED, chunk_size);
539       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
540       gomp_work_share_init_done ();
541     }
542 
543 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
544   ret = gomp_iter_ull_guided_next (istart, iend);
545 #else
546   gomp_mutex_lock (&thr->ts.work_share->lock);
547   ret = gomp_iter_ull_guided_next_locked (istart, iend);
548   gomp_mutex_unlock (&thr->ts.work_share->lock);
549 #endif
550 
551   return ret;
552 }
553 
554 bool
GOMP_loop_ull_doacross_runtime_start(unsigned ncounts,gomp_ull * counts,gomp_ull * istart,gomp_ull * iend)555 GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
556 				      gomp_ull *istart, gomp_ull *iend)
557 {
558   struct gomp_task_icv *icv = gomp_icv (false);
559   switch (icv->run_sched_var & ~GFS_MONOTONIC)
560     {
561     case GFS_STATIC:
562       return gomp_loop_ull_doacross_static_start (ncounts, counts,
563 						  icv->run_sched_chunk_size,
564 						  istart, iend);
565     case GFS_DYNAMIC:
566       return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
567 						   icv->run_sched_chunk_size,
568 						   istart, iend);
569     case GFS_GUIDED:
570       return gomp_loop_ull_doacross_guided_start (ncounts, counts,
571 						  icv->run_sched_chunk_size,
572 						  istart, iend);
573     case GFS_AUTO:
574       /* For now map to schedule(static), later on we could play with feedback
575 	 driven choice.  */
576       return gomp_loop_ull_doacross_static_start (ncounts, counts,
577 						  0, istart, iend);
578     default:
579       abort ();
580     }
581 }
582 
583 bool
GOMP_loop_ull_doacross_start(unsigned ncounts,gomp_ull * counts,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)584 GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
585 			      long sched, gomp_ull chunk_size,
586 			      gomp_ull *istart, gomp_ull *iend,
587 			      uintptr_t *reductions, void **mem)
588 {
589   struct gomp_thread *thr = gomp_thread ();
590 
591   thr->ts.static_trip = 0;
592   if (reductions)
593     gomp_workshare_taskgroup_start ();
594   if (gomp_work_share_start (0))
595     {
596       size_t extra = 0;
597       if (mem)
598 	extra = (uintptr_t) *mem;
599       sched = gomp_adjust_sched (sched, &chunk_size);
600       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
601 			  sched, chunk_size);
602       gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
603       if (reductions)
604 	{
605 	  GOMP_taskgroup_reduction_register (reductions);
606 	  thr->task->taskgroup->workshare = true;
607 	  thr->ts.work_share->task_reductions = reductions;
608 	}
609       gomp_work_share_init_done ();
610     }
611   else
612     {
613       if (reductions)
614 	{
615 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
616 	  gomp_workshare_task_reduction_register (reductions,
617 						  first_reductions);
618 	}
619       sched = thr->ts.work_share->sched;
620     }
621 
622   if (mem)
623     *mem = thr->ts.work_share->doacross->extra;
624 
625   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
626 }
627 
628 /* The *_next routines are called when the thread completes processing of
629    the iteration block currently assigned to it.  If the work-share
630    construct is bound directly to a parallel construct, then the iteration
631    bounds may have been set up before the parallel.  In which case, this
632    may be the first iteration for the thread.
633 
634    Returns true if there is work remaining to be performed; *ISTART and
635    *IEND are filled with a new iteration block.  Returns false if all work
636    has been assigned.  */
637 
638 static bool
gomp_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)639 gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
640 {
641   return !gomp_iter_ull_static_next (istart, iend);
642 }
643 
644 static bool
gomp_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)645 gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
646 {
647   bool ret;
648 
649 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
650   ret = gomp_iter_ull_dynamic_next (istart, iend);
651 #else
652   struct gomp_thread *thr = gomp_thread ();
653   gomp_mutex_lock (&thr->ts.work_share->lock);
654   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
655   gomp_mutex_unlock (&thr->ts.work_share->lock);
656 #endif
657 
658   return ret;
659 }
660 
661 static bool
gomp_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)662 gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
663 {
664   bool ret;
665 
666 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
667   ret = gomp_iter_ull_guided_next (istart, iend);
668 #else
669   struct gomp_thread *thr = gomp_thread ();
670   gomp_mutex_lock (&thr->ts.work_share->lock);
671   ret = gomp_iter_ull_guided_next_locked (istart, iend);
672   gomp_mutex_unlock (&thr->ts.work_share->lock);
673 #endif
674 
675   return ret;
676 }
677 
678 bool
GOMP_loop_ull_runtime_next(gomp_ull * istart,gomp_ull * iend)679 GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
680 {
681   struct gomp_thread *thr = gomp_thread ();
682 
683   switch (thr->ts.work_share->sched)
684     {
685     case GFS_STATIC:
686     case GFS_AUTO:
687       return gomp_loop_ull_static_next (istart, iend);
688     case GFS_DYNAMIC:
689       return gomp_loop_ull_dynamic_next (istart, iend);
690     case GFS_GUIDED:
691       return gomp_loop_ull_guided_next (istart, iend);
692     default:
693       abort ();
694     }
695 }
696 
697 /* The *_ordered_*_next routines are called when the thread completes
698    processing of the iteration block currently assigned to it.
699 
700    Returns true if there is work remaining to be performed; *ISTART and
701    *IEND are filled with a new iteration block.  Returns false if all work
702    has been assigned.  */
703 
704 static bool
gomp_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)705 gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
706 {
707   struct gomp_thread *thr = gomp_thread ();
708   int test;
709 
710   gomp_ordered_sync ();
711   gomp_mutex_lock (&thr->ts.work_share->lock);
712   test = gomp_iter_ull_static_next (istart, iend);
713   if (test >= 0)
714     gomp_ordered_static_next ();
715   gomp_mutex_unlock (&thr->ts.work_share->lock);
716 
717   return test == 0;
718 }
719 
720 static bool
gomp_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)721 gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
722 {
723   struct gomp_thread *thr = gomp_thread ();
724   bool ret;
725 
726   gomp_ordered_sync ();
727   gomp_mutex_lock (&thr->ts.work_share->lock);
728   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
729   if (ret)
730     gomp_ordered_next ();
731   else
732     gomp_ordered_last ();
733   gomp_mutex_unlock (&thr->ts.work_share->lock);
734 
735   return ret;
736 }
737 
738 static bool
gomp_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)739 gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
740 {
741   struct gomp_thread *thr = gomp_thread ();
742   bool ret;
743 
744   gomp_ordered_sync ();
745   gomp_mutex_lock (&thr->ts.work_share->lock);
746   ret = gomp_iter_ull_guided_next_locked (istart, iend);
747   if (ret)
748     gomp_ordered_next ();
749   else
750     gomp_ordered_last ();
751   gomp_mutex_unlock (&thr->ts.work_share->lock);
752 
753   return ret;
754 }
755 
756 bool
GOMP_loop_ull_ordered_runtime_next(gomp_ull * istart,gomp_ull * iend)757 GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
758 {
759   struct gomp_thread *thr = gomp_thread ();
760 
761   switch (thr->ts.work_share->sched)
762     {
763     case GFS_STATIC:
764     case GFS_AUTO:
765       return gomp_loop_ull_ordered_static_next (istart, iend);
766     case GFS_DYNAMIC:
767       return gomp_loop_ull_ordered_dynamic_next (istart, iend);
768     case GFS_GUIDED:
769       return gomp_loop_ull_ordered_guided_next (istart, iend);
770     default:
771       abort ();
772     }
773 }
774 
775 /* We use static functions above so that we're sure that the "runtime"
776    function can defer to the proper routine without interposition.  We
777    export the static function with a strong alias when possible, or with
778    a wrapper function otherwise.  */
779 
780 #ifdef HAVE_ATTRIBUTE_ALIAS
781 extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
782 	__attribute__((alias ("gomp_loop_ull_static_start")));
783 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
784 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
785 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
786 	__attribute__((alias ("gomp_loop_ull_guided_start")));
787 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
788 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
789 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
790 	__attribute__((alias ("gomp_loop_ull_guided_start")));
791 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
792 	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
793 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
794 	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
795 
796 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
797 	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
798 extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
799 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
800 extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
801 	__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
802 
803 extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
804 	__attribute__((alias ("gomp_loop_ull_doacross_static_start")));
805 extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
806 	__attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
807 extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
808 	__attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
809 
810 extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
811 	__attribute__((alias ("gomp_loop_ull_static_next")));
812 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
813 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
814 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
815 	__attribute__((alias ("gomp_loop_ull_guided_next")));
816 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
817 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
818 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
819 	__attribute__((alias ("gomp_loop_ull_guided_next")));
820 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
821 	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
822 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
823 	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
824 
825 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
826 	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
827 extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
828 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
829 extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
830 	__attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
831 #else
832 bool
GOMP_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)833 GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
834 			    gomp_ull incr, gomp_ull chunk_size,
835 			    gomp_ull *istart, gomp_ull *iend)
836 {
837   return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
838 				     iend);
839 }
840 
841 bool
GOMP_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)842 GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
843 			     gomp_ull incr, gomp_ull chunk_size,
844 			     gomp_ull *istart, gomp_ull *iend)
845 {
846   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
847 				      iend);
848 }
849 
850 bool
GOMP_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)851 GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
852 			    gomp_ull incr, gomp_ull chunk_size,
853 			    gomp_ull *istart, gomp_ull *iend)
854 {
855   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
856 				     iend);
857 }
858 
859 bool
GOMP_loop_ull_nonmonotonic_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)860 GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
861 					  gomp_ull end, gomp_ull incr,
862 					  gomp_ull chunk_size,
863 					  gomp_ull *istart, gomp_ull *iend)
864 {
865   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
866 				      iend);
867 }
868 
869 bool
GOMP_loop_ull_nonmonotonic_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)870 GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
871 					 gomp_ull incr, gomp_ull chunk_size,
872 					 gomp_ull *istart, gomp_ull *iend)
873 {
874   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
875 				     iend);
876 }
877 
878 bool
GOMP_loop_ull_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)879 GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
880 					  gomp_ull end, gomp_ull incr,
881 					  gomp_ull *istart, gomp_ull *iend)
882 {
883   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
884 }
885 
886 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)887 GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
888 						gomp_ull end, gomp_ull incr,
889 						gomp_ull *istart,
890 						gomp_ull *iend)
891 {
892   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
893 }
894 
895 bool
GOMP_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)896 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
897 				    gomp_ull incr, gomp_ull chunk_size,
898 				    gomp_ull *istart, gomp_ull *iend)
899 {
900   return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
901 					     istart, iend);
902 }
903 
904 bool
GOMP_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)905 GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
906 				     gomp_ull incr, gomp_ull chunk_size,
907 				     gomp_ull *istart, gomp_ull *iend)
908 {
909   return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
910 					      istart, iend);
911 }
912 
913 bool
GOMP_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)914 GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
915 				    gomp_ull incr, gomp_ull chunk_size,
916 				    gomp_ull *istart, gomp_ull *iend)
917 {
918   return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
919 					     istart, iend);
920 }
921 
922 bool
GOMP_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)923 GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
924 				     gomp_ull chunk_size, gomp_ull *istart,
925 				     gomp_ull *iend)
926 {
927   return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
928 					      istart, iend);
929 }
930 
931 bool
GOMP_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)932 GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
933 				      gomp_ull chunk_size, gomp_ull *istart,
934 				      gomp_ull *iend)
935 {
936   return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
937 					       istart, iend);
938 }
939 
940 bool
GOMP_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)941 GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
942 				     gomp_ull chunk_size, gomp_ull *istart,
943 				     gomp_ull *iend)
944 {
945   return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
946 					      istart, iend);
947 }
948 
949 bool
GOMP_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)950 GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
951 {
952   return gomp_loop_ull_static_next (istart, iend);
953 }
954 
955 bool
GOMP_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)956 GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
957 {
958   return gomp_loop_ull_dynamic_next (istart, iend);
959 }
960 
961 bool
GOMP_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)962 GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
963 {
964   return gomp_loop_ull_guided_next (istart, iend);
965 }
966 
967 bool
GOMP_loop_ull_nonmonotonic_dynamic_next(gomp_ull * istart,gomp_ull * iend)968 GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
969 {
970   return gomp_loop_ull_dynamic_next (istart, iend);
971 }
972 
973 bool
GOMP_loop_ull_nonmonotonic_guided_next(gomp_ull * istart,gomp_ull * iend)974 GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
975 {
976   return gomp_loop_ull_guided_next (istart, iend);
977 }
978 
979 bool
GOMP_loop_ull_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)980 GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
981 {
982   return GOMP_loop_ull_runtime_next (istart, iend);
983 }
984 
985 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)986 GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
987 					       gomp_ull *iend)
988 {
989   return GOMP_loop_ull_runtime_next (istart, iend);
990 }
991 
992 bool
GOMP_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)993 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
994 {
995   return gomp_loop_ull_ordered_static_next (istart, iend);
996 }
997 
998 bool
GOMP_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)999 GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
1000 {
1001   return gomp_loop_ull_ordered_dynamic_next (istart, iend);
1002 }
1003 
1004 bool
GOMP_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)1005 GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
1006 {
1007   return gomp_loop_ull_ordered_guided_next (istart, iend);
1008 }
1009 #endif
1010