xref: /netbsd/external/gpl3/gcc/dist/libgomp/loop_ull.c (revision f0fbc68b)
1 /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the LOOP (FOR/DO) construct.  */
27 
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libgomp.h"
32 
33 ialias (GOMP_loop_ull_runtime_next)
34 ialias_redirect (GOMP_taskgroup_reduction_register)
35 
36 typedef unsigned long long gomp_ull;
37 
38 /* Initialize the given work share construct from the given arguments.  */
39 
40 static inline void
gomp_loop_ull_init(struct gomp_work_share * ws,bool up,gomp_ull start,gomp_ull end,gomp_ull incr,enum gomp_schedule_type sched,gomp_ull chunk_size)41 gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
42 		    gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
43 		    gomp_ull chunk_size)
44 {
45   ws->sched = sched;
46   ws->chunk_size_ull = chunk_size;
47   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
48   ws->end_ull = ((up && start > end) || (!up && start < end))
49 		? start : end;
50   ws->incr_ull = incr;
51   ws->next_ull = start;
52   ws->mode = 0;
53   if (sched == GFS_DYNAMIC)
54     {
55       ws->chunk_size_ull *= incr;
56 
57 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
58       {
59 	/* For dynamic scheduling prepare things to make each iteration
60 	   faster.  */
61 	struct gomp_thread *thr = gomp_thread ();
62 	struct gomp_team *team = thr->ts.team;
63 	long nthreads = team ? team->nthreads : 1;
64 
65 	if (__builtin_expect (up, 1))
66 	  {
67 	    /* Cheap overflow protection.  */
68 	    if (__builtin_expect ((nthreads | ws->chunk_size_ull)
69 				  < 1ULL << (sizeof (gomp_ull)
70 					     * __CHAR_BIT__ / 2 - 1), 1))
71 	      ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
72 					- (nthreads + 1) * ws->chunk_size_ull);
73 	  }
74 	/* Cheap overflow protection.  */
75 	else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
76 				   < 1ULL << (sizeof (gomp_ull)
77 					      * __CHAR_BIT__ / 2 - 1), 1))
78 	  ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
79 				    - (__LONG_LONG_MAX__ * 2ULL + 1));
80       }
81 #endif
82     }
83   if (!up)
84     ws->mode |= 2;
85 }
86 
87 /* The *_start routines are called when first encountering a loop construct
88    that is not bound directly to a parallel construct.  The first thread
89    that arrives will create the work-share construct; subsequent threads
90    will see the construct exists and allocate work from it.
91 
92    START, END, INCR are the bounds of the loop; due to the restrictions of
93    OpenMP, these values must be the same in every thread.  This is not
94    verified (nor is it entirely verifiable, since START is not necessarily
95    retained intact in the work-share data structure).  CHUNK_SIZE is the
96    scheduling parameter; again this must be identical in all threads.
97 
98    Returns true if there's any work for this thread to perform.  If so,
99    *ISTART and *IEND are filled with the bounds of the iteration block
100    allocated to this thread.  Returns false if all work was assigned to
101    other threads prior to this thread's arrival.  */
102 
103 static bool
gomp_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)104 gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
105 			    gomp_ull incr, gomp_ull chunk_size,
106 			    gomp_ull *istart, gomp_ull *iend)
107 {
108   struct gomp_thread *thr = gomp_thread ();
109 
110   thr->ts.static_trip = 0;
111   if (gomp_work_share_start (0))
112     {
113       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
114 			  GFS_STATIC, chunk_size);
115       gomp_work_share_init_done ();
116     }
117 
118   return !gomp_iter_ull_static_next (istart, iend);
119 }
120 
121 static bool
gomp_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)122 gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
123 			     gomp_ull incr, gomp_ull chunk_size,
124 			     gomp_ull *istart, gomp_ull *iend)
125 {
126   struct gomp_thread *thr = gomp_thread ();
127   bool ret;
128 
129   if (gomp_work_share_start (0))
130     {
131       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
132 			  GFS_DYNAMIC, chunk_size);
133       gomp_work_share_init_done ();
134     }
135 
136 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
137   ret = gomp_iter_ull_dynamic_next (istart, iend);
138 #else
139   gomp_mutex_lock (&thr->ts.work_share->lock);
140   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
141   gomp_mutex_unlock (&thr->ts.work_share->lock);
142 #endif
143 
144   return ret;
145 }
146 
147 static bool
gomp_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)148 gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
149 			    gomp_ull incr, gomp_ull chunk_size,
150 			    gomp_ull *istart, gomp_ull *iend)
151 {
152   struct gomp_thread *thr = gomp_thread ();
153   bool ret;
154 
155   if (gomp_work_share_start (0))
156     {
157       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
158 			  GFS_GUIDED, chunk_size);
159       gomp_work_share_init_done ();
160     }
161 
162 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
163   ret = gomp_iter_ull_guided_next (istart, iend);
164 #else
165   gomp_mutex_lock (&thr->ts.work_share->lock);
166   ret = gomp_iter_ull_guided_next_locked (istart, iend);
167   gomp_mutex_unlock (&thr->ts.work_share->lock);
168 #endif
169 
170   return ret;
171 }
172 
173 bool
GOMP_loop_ull_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)174 GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
175 			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
176 {
177   struct gomp_task_icv *icv = gomp_icv (false);
178   switch (icv->run_sched_var & ~GFS_MONOTONIC)
179     {
180     case GFS_STATIC:
181       return gomp_loop_ull_static_start (up, start, end, incr,
182 					 icv->run_sched_chunk_size,
183 					 istart, iend);
184     case GFS_DYNAMIC:
185       return gomp_loop_ull_dynamic_start (up, start, end, incr,
186 					  icv->run_sched_chunk_size,
187 					  istart, iend);
188     case GFS_GUIDED:
189       return gomp_loop_ull_guided_start (up, start, end, incr,
190 					 icv->run_sched_chunk_size,
191 					 istart, iend);
192     case GFS_AUTO:
193       /* For now map to schedule(static), later on we could play with feedback
194 	 driven choice.  */
195       return gomp_loop_ull_static_start (up, start, end, incr,
196 					 0, istart, iend);
197     default:
198       abort ();
199     }
200 }
201 
202 static long
gomp_adjust_sched(long sched,gomp_ull * chunk_size)203 gomp_adjust_sched (long sched, gomp_ull *chunk_size)
204 {
205   sched &= ~GFS_MONOTONIC;
206   switch (sched)
207     {
208     case GFS_STATIC:
209     case GFS_DYNAMIC:
210     case GFS_GUIDED:
211       return sched;
212     /* GFS_RUNTIME is used for runtime schedule without monotonic
213        or nonmonotonic modifiers on the clause.
214        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
215        modifier.  */
216     case GFS_RUNTIME:
217     /* GFS_AUTO is used for runtime schedule with nonmonotonic
218        modifier.  */
219     case GFS_AUTO:
220       {
221 	struct gomp_task_icv *icv = gomp_icv (false);
222 	sched = icv->run_sched_var & ~GFS_MONOTONIC;
223 	switch (sched)
224 	  {
225 	  case GFS_STATIC:
226 	  case GFS_DYNAMIC:
227 	  case GFS_GUIDED:
228 	    *chunk_size = icv->run_sched_chunk_size;
229 	    break;
230 	  case GFS_AUTO:
231 	    sched = GFS_STATIC;
232 	    *chunk_size = 0;
233 	    break;
234 	  default:
235 	    abort ();
236 	  }
237 	return sched;
238       }
239     default:
240       abort ();
241     }
242 }
243 
244 bool
GOMP_loop_ull_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)245 GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
246 		     gomp_ull incr, long sched, gomp_ull chunk_size,
247 		     gomp_ull *istart, gomp_ull *iend,
248 		     uintptr_t *reductions, void **mem)
249 {
250   struct gomp_thread *thr = gomp_thread ();
251 
252   thr->ts.static_trip = 0;
253   if (reductions)
254     gomp_workshare_taskgroup_start ();
255   if (gomp_work_share_start (0))
256     {
257       sched = gomp_adjust_sched (sched, &chunk_size);
258       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
259       			  sched, chunk_size);
260       if (reductions)
261 	{
262 	  GOMP_taskgroup_reduction_register (reductions);
263 	  thr->task->taskgroup->workshare = true;
264 	  thr->ts.work_share->task_reductions = reductions;
265 	}
266       if (mem)
267 	{
268 	  uintptr_t size = (uintptr_t) *mem;
269 #define INLINE_ORDERED_TEAM_IDS_OFF \
270   ((offsetof (struct gomp_work_share, inline_ordered_team_ids)		\
271     + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
272 	  if (sizeof (struct gomp_work_share)
273 	      <= INLINE_ORDERED_TEAM_IDS_OFF
274 	      || __alignof__ (struct gomp_work_share) < __alignof__ (long long)
275 	      || size > (sizeof (struct gomp_work_share)
276 			- INLINE_ORDERED_TEAM_IDS_OFF))
277 	    *mem
278 	      = (void *) (thr->ts.work_share->ordered_team_ids
279 			  = gomp_malloc_cleared (size));
280 	  else
281 	    *mem = memset (((char *) thr->ts.work_share)
282 			   + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
283 	}
284       gomp_work_share_init_done ();
285     }
286   else
287     {
288       if (reductions)
289 	{
290 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
291 	  gomp_workshare_task_reduction_register (reductions,
292 						  first_reductions);
293 	}
294       if (mem)
295 	{
296 	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
297 	       & (__alignof__ (long long) - 1)) == 0)
298 	    *mem = (void *) thr->ts.work_share->ordered_team_ids;
299 	  else
300 	    {
301 	      uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
302 	      p += __alignof__ (long long) - 1;
303 	      p &= ~(__alignof__ (long long) - 1);
304 	      *mem = (void *) p;
305 	    }
306 	}
307     }
308 
309   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
310 }
311 
312 /* The *_ordered_*_start routines are similar.  The only difference is that
313    this work-share construct is initialized to expect an ORDERED section.  */
314 
315 static bool
gomp_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)316 gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
317 				    gomp_ull incr, gomp_ull chunk_size,
318 				    gomp_ull *istart, gomp_ull *iend)
319 {
320   struct gomp_thread *thr = gomp_thread ();
321 
322   thr->ts.static_trip = 0;
323   if (gomp_work_share_start (1))
324     {
325       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
326 			  GFS_STATIC, chunk_size);
327       gomp_ordered_static_init ();
328       gomp_work_share_init_done ();
329     }
330 
331   return !gomp_iter_ull_static_next (istart, iend);
332 }
333 
334 static bool
gomp_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)335 gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
336 				     gomp_ull incr, gomp_ull chunk_size,
337 				     gomp_ull *istart, gomp_ull *iend)
338 {
339   struct gomp_thread *thr = gomp_thread ();
340   bool ret;
341 
342   if (gomp_work_share_start (1))
343     {
344       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
345 			  GFS_DYNAMIC, chunk_size);
346       gomp_mutex_lock (&thr->ts.work_share->lock);
347       gomp_work_share_init_done ();
348     }
349   else
350     gomp_mutex_lock (&thr->ts.work_share->lock);
351 
352   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
353   if (ret)
354     gomp_ordered_first ();
355   gomp_mutex_unlock (&thr->ts.work_share->lock);
356 
357   return ret;
358 }
359 
360 static bool
gomp_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)361 gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
362 				    gomp_ull incr, gomp_ull chunk_size,
363 				    gomp_ull *istart, gomp_ull *iend)
364 {
365   struct gomp_thread *thr = gomp_thread ();
366   bool ret;
367 
368   if (gomp_work_share_start (1))
369     {
370       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
371 			  GFS_GUIDED, chunk_size);
372       gomp_mutex_lock (&thr->ts.work_share->lock);
373       gomp_work_share_init_done ();
374     }
375   else
376     gomp_mutex_lock (&thr->ts.work_share->lock);
377 
378   ret = gomp_iter_ull_guided_next_locked (istart, iend);
379   if (ret)
380     gomp_ordered_first ();
381   gomp_mutex_unlock (&thr->ts.work_share->lock);
382 
383   return ret;
384 }
385 
386 bool
GOMP_loop_ull_ordered_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)387 GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
388 				     gomp_ull incr, gomp_ull *istart,
389 				     gomp_ull *iend)
390 {
391   struct gomp_task_icv *icv = gomp_icv (false);
392   switch (icv->run_sched_var & ~GFS_MONOTONIC)
393     {
394     case GFS_STATIC:
395       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
396 						 icv->run_sched_chunk_size,
397 						 istart, iend);
398     case GFS_DYNAMIC:
399       return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
400 						  icv->run_sched_chunk_size,
401 						  istart, iend);
402     case GFS_GUIDED:
403       return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
404 						 icv->run_sched_chunk_size,
405 						 istart, iend);
406     case GFS_AUTO:
407       /* For now map to schedule(static), later on we could play with feedback
408 	 driven choice.  */
409       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
410 						 0, istart, iend);
411     default:
412       abort ();
413     }
414 }
415 
416 bool
GOMP_loop_ull_ordered_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)417 GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
418 			     gomp_ull incr, long sched, gomp_ull chunk_size,
419 			     gomp_ull *istart, gomp_ull *iend,
420 			     uintptr_t *reductions, void **mem)
421 {
422   struct gomp_thread *thr = gomp_thread ();
423   size_t ordered = 1;
424   bool ret;
425 
426   thr->ts.static_trip = 0;
427   if (reductions)
428     gomp_workshare_taskgroup_start ();
429   if (mem)
430     ordered += (uintptr_t) *mem;
431   if (gomp_work_share_start (ordered))
432     {
433       sched = gomp_adjust_sched (sched, &chunk_size);
434       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
435 			  sched, chunk_size);
436       if (reductions)
437 	{
438 	  GOMP_taskgroup_reduction_register (reductions);
439 	  thr->task->taskgroup->workshare = true;
440 	  thr->ts.work_share->task_reductions = reductions;
441 	}
442       if (sched == GFS_STATIC)
443 	gomp_ordered_static_init ();
444       else
445 	gomp_mutex_lock (&thr->ts.work_share->lock);
446       gomp_work_share_init_done ();
447     }
448   else
449     {
450       if (reductions)
451 	{
452 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
453 	  gomp_workshare_task_reduction_register (reductions,
454 						  first_reductions);
455 	}
456       sched = thr->ts.work_share->sched;
457       if (sched != GFS_STATIC)
458 	gomp_mutex_lock (&thr->ts.work_share->lock);
459     }
460 
461   if (mem)
462     {
463       uintptr_t p
464 	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
465 		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
466       p += __alignof__ (long long) - 1;
467       p &= ~(__alignof__ (long long) - 1);
468       *mem = (void *) p;
469     }
470 
471   switch (sched)
472     {
473     case GFS_STATIC:
474     case GFS_AUTO:
475       return !gomp_iter_ull_static_next (istart, iend);
476     case GFS_DYNAMIC:
477       ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
478       break;
479     case GFS_GUIDED:
480       ret = gomp_iter_ull_guided_next_locked (istart, iend);
481       break;
482     default:
483       abort ();
484     }
485 
486   if (ret)
487     gomp_ordered_first ();
488   gomp_mutex_unlock (&thr->ts.work_share->lock);
489   return ret;
490 }
491 
492 /* The *_doacross_*_start routines are similar.  The only difference is that
493    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
494    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
495    and other COUNTS array elements tell the library number of iterations
496    in the ordered inner loops.  */
497 
498 static bool
gomp_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)499 gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
500 				     gomp_ull chunk_size, gomp_ull *istart,
501 				     gomp_ull *iend)
502 {
503   struct gomp_thread *thr = gomp_thread ();
504 
505   thr->ts.static_trip = 0;
506   if (gomp_work_share_start (0))
507     {
508       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
509 			  GFS_STATIC, chunk_size);
510       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
511       gomp_work_share_init_done ();
512     }
513 
514   return !gomp_iter_ull_static_next (istart, iend);
515 }
516 
517 static bool
gomp_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)518 gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
519 				      gomp_ull chunk_size, gomp_ull *istart,
520 				      gomp_ull *iend)
521 {
522   struct gomp_thread *thr = gomp_thread ();
523   bool ret;
524 
525   if (gomp_work_share_start (0))
526     {
527       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
528 			  GFS_DYNAMIC, chunk_size);
529       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
530       gomp_work_share_init_done ();
531     }
532 
533 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
534   ret = gomp_iter_ull_dynamic_next (istart, iend);
535 #else
536   gomp_mutex_lock (&thr->ts.work_share->lock);
537   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
538   gomp_mutex_unlock (&thr->ts.work_share->lock);
539 #endif
540 
541   return ret;
542 }
543 
544 static bool
gomp_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)545 gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
546 				     gomp_ull chunk_size, gomp_ull *istart,
547 				     gomp_ull *iend)
548 {
549   struct gomp_thread *thr = gomp_thread ();
550   bool ret;
551 
552   if (gomp_work_share_start (0))
553     {
554       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
555 			  GFS_GUIDED, chunk_size);
556       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
557       gomp_work_share_init_done ();
558     }
559 
560 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
561   ret = gomp_iter_ull_guided_next (istart, iend);
562 #else
563   gomp_mutex_lock (&thr->ts.work_share->lock);
564   ret = gomp_iter_ull_guided_next_locked (istart, iend);
565   gomp_mutex_unlock (&thr->ts.work_share->lock);
566 #endif
567 
568   return ret;
569 }
570 
571 bool
GOMP_loop_ull_doacross_runtime_start(unsigned ncounts,gomp_ull * counts,gomp_ull * istart,gomp_ull * iend)572 GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
573 				      gomp_ull *istart, gomp_ull *iend)
574 {
575   struct gomp_task_icv *icv = gomp_icv (false);
576   switch (icv->run_sched_var & ~GFS_MONOTONIC)
577     {
578     case GFS_STATIC:
579       return gomp_loop_ull_doacross_static_start (ncounts, counts,
580 						  icv->run_sched_chunk_size,
581 						  istart, iend);
582     case GFS_DYNAMIC:
583       return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
584 						   icv->run_sched_chunk_size,
585 						   istart, iend);
586     case GFS_GUIDED:
587       return gomp_loop_ull_doacross_guided_start (ncounts, counts,
588 						  icv->run_sched_chunk_size,
589 						  istart, iend);
590     case GFS_AUTO:
591       /* For now map to schedule(static), later on we could play with feedback
592 	 driven choice.  */
593       return gomp_loop_ull_doacross_static_start (ncounts, counts,
594 						  0, istart, iend);
595     default:
596       abort ();
597     }
598 }
599 
600 bool
GOMP_loop_ull_doacross_start(unsigned ncounts,gomp_ull * counts,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)601 GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
602 			      long sched, gomp_ull chunk_size,
603 			      gomp_ull *istart, gomp_ull *iend,
604 			      uintptr_t *reductions, void **mem)
605 {
606   struct gomp_thread *thr = gomp_thread ();
607 
608   thr->ts.static_trip = 0;
609   if (reductions)
610     gomp_workshare_taskgroup_start ();
611   if (gomp_work_share_start (0))
612     {
613       size_t extra = 0;
614       if (mem)
615 	extra = (uintptr_t) *mem;
616       sched = gomp_adjust_sched (sched, &chunk_size);
617       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
618 			  sched, chunk_size);
619       gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
620       if (reductions)
621 	{
622 	  GOMP_taskgroup_reduction_register (reductions);
623 	  thr->task->taskgroup->workshare = true;
624 	  thr->ts.work_share->task_reductions = reductions;
625 	}
626       gomp_work_share_init_done ();
627     }
628   else
629     {
630       if (reductions)
631 	{
632 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
633 	  gomp_workshare_task_reduction_register (reductions,
634 						  first_reductions);
635 	}
636       sched = thr->ts.work_share->sched;
637     }
638 
639   if (mem)
640     *mem = thr->ts.work_share->doacross->extra;
641 
642   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
643 }
644 
645 /* The *_next routines are called when the thread completes processing of
646    the iteration block currently assigned to it.  If the work-share
647    construct is bound directly to a parallel construct, then the iteration
648    bounds may have been set up before the parallel.  In which case, this
649    may be the first iteration for the thread.
650 
651    Returns true if there is work remaining to be performed; *ISTART and
652    *IEND are filled with a new iteration block.  Returns false if all work
653    has been assigned.  */
654 
655 static bool
gomp_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)656 gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
657 {
658   return !gomp_iter_ull_static_next (istart, iend);
659 }
660 
661 static bool
gomp_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)662 gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
663 {
664   bool ret;
665 
666 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
667   ret = gomp_iter_ull_dynamic_next (istart, iend);
668 #else
669   struct gomp_thread *thr = gomp_thread ();
670   gomp_mutex_lock (&thr->ts.work_share->lock);
671   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
672   gomp_mutex_unlock (&thr->ts.work_share->lock);
673 #endif
674 
675   return ret;
676 }
677 
678 static bool
gomp_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)679 gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
680 {
681   bool ret;
682 
683 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
684   ret = gomp_iter_ull_guided_next (istart, iend);
685 #else
686   struct gomp_thread *thr = gomp_thread ();
687   gomp_mutex_lock (&thr->ts.work_share->lock);
688   ret = gomp_iter_ull_guided_next_locked (istart, iend);
689   gomp_mutex_unlock (&thr->ts.work_share->lock);
690 #endif
691 
692   return ret;
693 }
694 
695 bool
GOMP_loop_ull_runtime_next(gomp_ull * istart,gomp_ull * iend)696 GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
697 {
698   struct gomp_thread *thr = gomp_thread ();
699 
700   switch (thr->ts.work_share->sched)
701     {
702     case GFS_STATIC:
703     case GFS_AUTO:
704       return gomp_loop_ull_static_next (istart, iend);
705     case GFS_DYNAMIC:
706       return gomp_loop_ull_dynamic_next (istart, iend);
707     case GFS_GUIDED:
708       return gomp_loop_ull_guided_next (istart, iend);
709     default:
710       abort ();
711     }
712 }
713 
714 /* The *_ordered_*_next routines are called when the thread completes
715    processing of the iteration block currently assigned to it.
716 
717    Returns true if there is work remaining to be performed; *ISTART and
718    *IEND are filled with a new iteration block.  Returns false if all work
719    has been assigned.  */
720 
721 static bool
gomp_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)722 gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
723 {
724   struct gomp_thread *thr = gomp_thread ();
725   int test;
726 
727   gomp_ordered_sync ();
728   gomp_mutex_lock (&thr->ts.work_share->lock);
729   test = gomp_iter_ull_static_next (istart, iend);
730   if (test >= 0)
731     gomp_ordered_static_next ();
732   gomp_mutex_unlock (&thr->ts.work_share->lock);
733 
734   return test == 0;
735 }
736 
737 static bool
gomp_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)738 gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
739 {
740   struct gomp_thread *thr = gomp_thread ();
741   bool ret;
742 
743   gomp_ordered_sync ();
744   gomp_mutex_lock (&thr->ts.work_share->lock);
745   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
746   if (ret)
747     gomp_ordered_next ();
748   else
749     gomp_ordered_last ();
750   gomp_mutex_unlock (&thr->ts.work_share->lock);
751 
752   return ret;
753 }
754 
755 static bool
gomp_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)756 gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
757 {
758   struct gomp_thread *thr = gomp_thread ();
759   bool ret;
760 
761   gomp_ordered_sync ();
762   gomp_mutex_lock (&thr->ts.work_share->lock);
763   ret = gomp_iter_ull_guided_next_locked (istart, iend);
764   if (ret)
765     gomp_ordered_next ();
766   else
767     gomp_ordered_last ();
768   gomp_mutex_unlock (&thr->ts.work_share->lock);
769 
770   return ret;
771 }
772 
773 bool
GOMP_loop_ull_ordered_runtime_next(gomp_ull * istart,gomp_ull * iend)774 GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
775 {
776   struct gomp_thread *thr = gomp_thread ();
777 
778   switch (thr->ts.work_share->sched)
779     {
780     case GFS_STATIC:
781     case GFS_AUTO:
782       return gomp_loop_ull_ordered_static_next (istart, iend);
783     case GFS_DYNAMIC:
784       return gomp_loop_ull_ordered_dynamic_next (istart, iend);
785     case GFS_GUIDED:
786       return gomp_loop_ull_ordered_guided_next (istart, iend);
787     default:
788       abort ();
789     }
790 }
791 
792 /* We use static functions above so that we're sure that the "runtime"
793    function can defer to the proper routine without interposition.  We
794    export the static function with a strong alias when possible, or with
795    a wrapper function otherwise.  */
796 
797 #ifdef HAVE_ATTRIBUTE_ALIAS
798 extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
799 	__attribute__((alias ("gomp_loop_ull_static_start")));
800 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
801 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
802 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
803 	__attribute__((alias ("gomp_loop_ull_guided_start")));
804 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
805 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
806 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
807 	__attribute__((alias ("gomp_loop_ull_guided_start")));
808 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
809 	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
810 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
811 	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
812 
813 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
814 	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
815 extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
816 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
817 extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
818 	__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
819 
820 extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
821 	__attribute__((alias ("gomp_loop_ull_doacross_static_start")));
822 extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
823 	__attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
824 extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
825 	__attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
826 
827 extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
828 	__attribute__((alias ("gomp_loop_ull_static_next")));
829 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
830 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
831 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
832 	__attribute__((alias ("gomp_loop_ull_guided_next")));
833 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
834 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
835 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
836 	__attribute__((alias ("gomp_loop_ull_guided_next")));
837 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
838 	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
839 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
840 	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
841 
842 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
843 	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
844 extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
845 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
846 extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
847 	__attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
848 #else
849 bool
GOMP_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)850 GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
851 			    gomp_ull incr, gomp_ull chunk_size,
852 			    gomp_ull *istart, gomp_ull *iend)
853 {
854   return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
855 				     iend);
856 }
857 
858 bool
GOMP_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)859 GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
860 			     gomp_ull incr, gomp_ull chunk_size,
861 			     gomp_ull *istart, gomp_ull *iend)
862 {
863   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
864 				      iend);
865 }
866 
867 bool
GOMP_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)868 GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
869 			    gomp_ull incr, gomp_ull chunk_size,
870 			    gomp_ull *istart, gomp_ull *iend)
871 {
872   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
873 				     iend);
874 }
875 
876 bool
GOMP_loop_ull_nonmonotonic_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)877 GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
878 					  gomp_ull end, gomp_ull incr,
879 					  gomp_ull chunk_size,
880 					  gomp_ull *istart, gomp_ull *iend)
881 {
882   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
883 				      iend);
884 }
885 
886 bool
GOMP_loop_ull_nonmonotonic_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)887 GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
888 					 gomp_ull incr, gomp_ull chunk_size,
889 					 gomp_ull *istart, gomp_ull *iend)
890 {
891   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
892 				     iend);
893 }
894 
895 bool
GOMP_loop_ull_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)896 GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
897 					  gomp_ull end, gomp_ull incr,
898 					  gomp_ull *istart, gomp_ull *iend)
899 {
900   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
901 }
902 
903 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)904 GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
905 						gomp_ull end, gomp_ull incr,
906 						gomp_ull *istart,
907 						gomp_ull *iend)
908 {
909   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
910 }
911 
912 bool
GOMP_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)913 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
914 				    gomp_ull incr, gomp_ull chunk_size,
915 				    gomp_ull *istart, gomp_ull *iend)
916 {
917   return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
918 					     istart, iend);
919 }
920 
921 bool
GOMP_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)922 GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
923 				     gomp_ull incr, gomp_ull chunk_size,
924 				     gomp_ull *istart, gomp_ull *iend)
925 {
926   return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
927 					      istart, iend);
928 }
929 
930 bool
GOMP_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)931 GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
932 				    gomp_ull incr, gomp_ull chunk_size,
933 				    gomp_ull *istart, gomp_ull *iend)
934 {
935   return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
936 					     istart, iend);
937 }
938 
939 bool
GOMP_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)940 GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
941 				     gomp_ull chunk_size, gomp_ull *istart,
942 				     gomp_ull *iend)
943 {
944   return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
945 					      istart, iend);
946 }
947 
948 bool
GOMP_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)949 GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
950 				      gomp_ull chunk_size, gomp_ull *istart,
951 				      gomp_ull *iend)
952 {
953   return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
954 					       istart, iend);
955 }
956 
957 bool
GOMP_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)958 GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
959 				     gomp_ull chunk_size, gomp_ull *istart,
960 				     gomp_ull *iend)
961 {
962   return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
963 					      istart, iend);
964 }
965 
966 bool
GOMP_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)967 GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
968 {
969   return gomp_loop_ull_static_next (istart, iend);
970 }
971 
972 bool
GOMP_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)973 GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
974 {
975   return gomp_loop_ull_dynamic_next (istart, iend);
976 }
977 
978 bool
GOMP_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)979 GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
980 {
981   return gomp_loop_ull_guided_next (istart, iend);
982 }
983 
984 bool
GOMP_loop_ull_nonmonotonic_dynamic_next(gomp_ull * istart,gomp_ull * iend)985 GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
986 {
987   return gomp_loop_ull_dynamic_next (istart, iend);
988 }
989 
990 bool
GOMP_loop_ull_nonmonotonic_guided_next(gomp_ull * istart,gomp_ull * iend)991 GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
992 {
993   return gomp_loop_ull_guided_next (istart, iend);
994 }
995 
996 bool
GOMP_loop_ull_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)997 GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
998 {
999   return GOMP_loop_ull_runtime_next (istart, iend);
1000 }
1001 
1002 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)1003 GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
1004 					       gomp_ull *iend)
1005 {
1006   return GOMP_loop_ull_runtime_next (istart, iend);
1007 }
1008 
1009 bool
GOMP_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)1010 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
1011 {
1012   return gomp_loop_ull_ordered_static_next (istart, iend);
1013 }
1014 
1015 bool
GOMP_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)1016 GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
1017 {
1018   return gomp_loop_ull_ordered_dynamic_next (istart, iend);
1019 }
1020 
1021 bool
GOMP_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)1022 GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
1023 {
1024   return gomp_loop_ull_ordered_guided_next (istart, iend);
1025 }
1026 #endif
1027