1 /* Copyright (C) 2005-2020 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the LOOP (FOR/DO) construct.  */
27 
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libgomp.h"
32 
33 ialias (GOMP_loop_ull_runtime_next)
34 ialias_redirect (GOMP_taskgroup_reduction_register)
35 
36 typedef unsigned long long gomp_ull;
37 
38 /* Initialize the given work share construct from the given arguments.  */
39 
40 static inline void
gomp_loop_ull_init(struct gomp_work_share * ws,bool up,gomp_ull start,gomp_ull end,gomp_ull incr,enum gomp_schedule_type sched,gomp_ull chunk_size)41 gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
42 		    gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
43 		    gomp_ull chunk_size)
44 {
45   ws->sched = sched;
46   ws->chunk_size_ull = chunk_size;
47   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
48   ws->end_ull = ((up && start > end) || (!up && start < end))
49 		? start : end;
50   ws->incr_ull = incr;
51   ws->next_ull = start;
52   ws->mode = 0;
53   if (sched == GFS_DYNAMIC)
54     {
55       ws->chunk_size_ull *= incr;
56 
57 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
58       {
59 	/* For dynamic scheduling prepare things to make each iteration
60 	   faster.  */
61 	struct gomp_thread *thr = gomp_thread ();
62 	struct gomp_team *team = thr->ts.team;
63 	long nthreads = team ? team->nthreads : 1;
64 
65 	if (__builtin_expect (up, 1))
66 	  {
67 	    /* Cheap overflow protection.  */
68 	    if (__builtin_expect ((nthreads | ws->chunk_size_ull)
69 				  < 1ULL << (sizeof (gomp_ull)
70 					     * __CHAR_BIT__ / 2 - 1), 1))
71 	      ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
72 					- (nthreads + 1) * ws->chunk_size_ull);
73 	  }
74 	/* Cheap overflow protection.  */
75 	else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
76 				   < 1ULL << (sizeof (gomp_ull)
77 					      * __CHAR_BIT__ / 2 - 1), 1))
78 	  ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
79 				    - (__LONG_LONG_MAX__ * 2ULL + 1));
80       }
81 #endif
82     }
83   if (!up)
84     ws->mode |= 2;
85 }
86 
87 /* The *_start routines are called when first encountering a loop construct
88    that is not bound directly to a parallel construct.  The first thread
89    that arrives will create the work-share construct; subsequent threads
90    will see the construct exists and allocate work from it.
91 
92    START, END, INCR are the bounds of the loop; due to the restrictions of
93    OpenMP, these values must be the same in every thread.  This is not
94    verified (nor is it entirely verifiable, since START is not necessarily
95    retained intact in the work-share data structure).  CHUNK_SIZE is the
96    scheduling parameter; again this must be identical in all threads.
97 
98    Returns true if there's any work for this thread to perform.  If so,
99    *ISTART and *IEND are filled with the bounds of the iteration block
100    allocated to this thread.  Returns false if all work was assigned to
101    other threads prior to this thread's arrival.  */
102 
103 static bool
gomp_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)104 gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
105 			    gomp_ull incr, gomp_ull chunk_size,
106 			    gomp_ull *istart, gomp_ull *iend)
107 {
108   struct gomp_thread *thr = gomp_thread ();
109 
110   thr->ts.static_trip = 0;
111   if (gomp_work_share_start (0))
112     {
113       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
114 			  GFS_STATIC, chunk_size);
115       gomp_work_share_init_done ();
116     }
117 
118   return !gomp_iter_ull_static_next (istart, iend);
119 }
120 
121 static bool
gomp_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)122 gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
123 			     gomp_ull incr, gomp_ull chunk_size,
124 			     gomp_ull *istart, gomp_ull *iend)
125 {
126   struct gomp_thread *thr = gomp_thread ();
127   bool ret;
128 
129   if (gomp_work_share_start (0))
130     {
131       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
132 			  GFS_DYNAMIC, chunk_size);
133       gomp_work_share_init_done ();
134     }
135 
136 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
137   ret = gomp_iter_ull_dynamic_next (istart, iend);
138 #else
139   gomp_mutex_lock (&thr->ts.work_share->lock);
140   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
141   gomp_mutex_unlock (&thr->ts.work_share->lock);
142 #endif
143 
144   return ret;
145 }
146 
147 static bool
gomp_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)148 gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
149 			    gomp_ull incr, gomp_ull chunk_size,
150 			    gomp_ull *istart, gomp_ull *iend)
151 {
152   struct gomp_thread *thr = gomp_thread ();
153   bool ret;
154 
155   if (gomp_work_share_start (0))
156     {
157       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
158 			  GFS_GUIDED, chunk_size);
159       gomp_work_share_init_done ();
160     }
161 
162 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
163   ret = gomp_iter_ull_guided_next (istart, iend);
164 #else
165   gomp_mutex_lock (&thr->ts.work_share->lock);
166   ret = gomp_iter_ull_guided_next_locked (istart, iend);
167   gomp_mutex_unlock (&thr->ts.work_share->lock);
168 #endif
169 
170   return ret;
171 }
172 
173 bool
GOMP_loop_ull_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)174 GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
175 			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
176 {
177   struct gomp_task_icv *icv = gomp_icv (false);
178   switch (icv->run_sched_var & ~GFS_MONOTONIC)
179     {
180     case GFS_STATIC:
181       return gomp_loop_ull_static_start (up, start, end, incr,
182 					 icv->run_sched_chunk_size,
183 					 istart, iend);
184     case GFS_DYNAMIC:
185       return gomp_loop_ull_dynamic_start (up, start, end, incr,
186 					  icv->run_sched_chunk_size,
187 					  istart, iend);
188     case GFS_GUIDED:
189       return gomp_loop_ull_guided_start (up, start, end, incr,
190 					 icv->run_sched_chunk_size,
191 					 istart, iend);
192     case GFS_AUTO:
193       /* For now map to schedule(static), later on we could play with feedback
194 	 driven choice.  */
195       return gomp_loop_ull_static_start (up, start, end, incr,
196 					 0, istart, iend);
197     default:
198       abort ();
199     }
200 }
201 
202 static long
gomp_adjust_sched(long sched,gomp_ull * chunk_size)203 gomp_adjust_sched (long sched, gomp_ull *chunk_size)
204 {
205   sched &= ~GFS_MONOTONIC;
206   switch (sched)
207     {
208     case GFS_STATIC:
209     case GFS_DYNAMIC:
210     case GFS_GUIDED:
211       return sched;
212     /* GFS_RUNTIME is used for runtime schedule without monotonic
213        or nonmonotonic modifiers on the clause.
214        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
215        modifier.  */
216     case GFS_RUNTIME:
217     /* GFS_AUTO is used for runtime schedule with nonmonotonic
218        modifier.  */
219     case GFS_AUTO:
220       {
221 	struct gomp_task_icv *icv = gomp_icv (false);
222 	sched = icv->run_sched_var & ~GFS_MONOTONIC;
223 	switch (sched)
224 	  {
225 	  case GFS_STATIC:
226 	  case GFS_DYNAMIC:
227 	  case GFS_GUIDED:
228 	    *chunk_size = icv->run_sched_chunk_size;
229 	    break;
230 	  case GFS_AUTO:
231 	    sched = GFS_STATIC;
232 	    *chunk_size = 0;
233 	    break;
234 	  default:
235 	    abort ();
236 	  }
237 	return sched;
238       }
239     default:
240       abort ();
241     }
242 }
243 
244 bool
GOMP_loop_ull_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)245 GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
246 		     gomp_ull incr, long sched, gomp_ull chunk_size,
247 		     gomp_ull *istart, gomp_ull *iend,
248 		     uintptr_t *reductions, void **mem)
249 {
250   struct gomp_thread *thr = gomp_thread ();
251 
252   thr->ts.static_trip = 0;
253   if (reductions)
254     gomp_workshare_taskgroup_start ();
255   if (gomp_work_share_start (0))
256     {
257       sched = gomp_adjust_sched (sched, &chunk_size);
258       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
259       			  sched, chunk_size);
260       if (reductions)
261 	{
262 	  GOMP_taskgroup_reduction_register (reductions);
263 	  thr->task->taskgroup->workshare = true;
264 	  thr->ts.work_share->task_reductions = reductions;
265 	}
266       if (mem)
267 	{
268 	  uintptr_t size = (uintptr_t) *mem;
269 #define INLINE_ORDERED_TEAM_IDS_OFF \
270   ((offsetof (struct gomp_work_share, inline_ordered_team_ids)		\
271     + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
272 	  if (size > (sizeof (struct gomp_work_share)
273 		      - INLINE_ORDERED_TEAM_IDS_OFF))
274 	    *mem
275 	      = (void *) (thr->ts.work_share->ordered_team_ids
276 			  = gomp_malloc_cleared (size));
277 	  else
278 	    *mem = memset (((char *) thr->ts.work_share)
279 			   + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
280 	}
281       gomp_work_share_init_done ();
282     }
283   else
284     {
285       if (reductions)
286 	{
287 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
288 	  gomp_workshare_task_reduction_register (reductions,
289 						  first_reductions);
290 	}
291       if (mem)
292 	{
293 	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
294 	       & (__alignof__ (long long) - 1)) == 0)
295 	    *mem = (void *) thr->ts.work_share->ordered_team_ids;
296 	  else
297 	    {
298 	      uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
299 	      p += __alignof__ (long long) - 1;
300 	      p &= ~(__alignof__ (long long) - 1);
301 	      *mem = (void *) p;
302 	    }
303 	}
304     }
305 
306   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
307 }
308 
309 /* The *_ordered_*_start routines are similar.  The only difference is that
310    this work-share construct is initialized to expect an ORDERED section.  */
311 
312 static bool
gomp_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)313 gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
314 				    gomp_ull incr, gomp_ull chunk_size,
315 				    gomp_ull *istart, gomp_ull *iend)
316 {
317   struct gomp_thread *thr = gomp_thread ();
318 
319   thr->ts.static_trip = 0;
320   if (gomp_work_share_start (1))
321     {
322       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
323 			  GFS_STATIC, chunk_size);
324       gomp_ordered_static_init ();
325       gomp_work_share_init_done ();
326     }
327 
328   return !gomp_iter_ull_static_next (istart, iend);
329 }
330 
331 static bool
gomp_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)332 gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
333 				     gomp_ull incr, gomp_ull chunk_size,
334 				     gomp_ull *istart, gomp_ull *iend)
335 {
336   struct gomp_thread *thr = gomp_thread ();
337   bool ret;
338 
339   if (gomp_work_share_start (1))
340     {
341       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
342 			  GFS_DYNAMIC, chunk_size);
343       gomp_mutex_lock (&thr->ts.work_share->lock);
344       gomp_work_share_init_done ();
345     }
346   else
347     gomp_mutex_lock (&thr->ts.work_share->lock);
348 
349   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
350   if (ret)
351     gomp_ordered_first ();
352   gomp_mutex_unlock (&thr->ts.work_share->lock);
353 
354   return ret;
355 }
356 
357 static bool
gomp_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)358 gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
359 				    gomp_ull incr, gomp_ull chunk_size,
360 				    gomp_ull *istart, gomp_ull *iend)
361 {
362   struct gomp_thread *thr = gomp_thread ();
363   bool ret;
364 
365   if (gomp_work_share_start (1))
366     {
367       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
368 			  GFS_GUIDED, chunk_size);
369       gomp_mutex_lock (&thr->ts.work_share->lock);
370       gomp_work_share_init_done ();
371     }
372   else
373     gomp_mutex_lock (&thr->ts.work_share->lock);
374 
375   ret = gomp_iter_ull_guided_next_locked (istart, iend);
376   if (ret)
377     gomp_ordered_first ();
378   gomp_mutex_unlock (&thr->ts.work_share->lock);
379 
380   return ret;
381 }
382 
383 bool
GOMP_loop_ull_ordered_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)384 GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
385 				     gomp_ull incr, gomp_ull *istart,
386 				     gomp_ull *iend)
387 {
388   struct gomp_task_icv *icv = gomp_icv (false);
389   switch (icv->run_sched_var & ~GFS_MONOTONIC)
390     {
391     case GFS_STATIC:
392       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
393 						 icv->run_sched_chunk_size,
394 						 istart, iend);
395     case GFS_DYNAMIC:
396       return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
397 						  icv->run_sched_chunk_size,
398 						  istart, iend);
399     case GFS_GUIDED:
400       return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
401 						 icv->run_sched_chunk_size,
402 						 istart, iend);
403     case GFS_AUTO:
404       /* For now map to schedule(static), later on we could play with feedback
405 	 driven choice.  */
406       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
407 						 0, istart, iend);
408     default:
409       abort ();
410     }
411 }
412 
413 bool
GOMP_loop_ull_ordered_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)414 GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
415 			     gomp_ull incr, long sched, gomp_ull chunk_size,
416 			     gomp_ull *istart, gomp_ull *iend,
417 			     uintptr_t *reductions, void **mem)
418 {
419   struct gomp_thread *thr = gomp_thread ();
420   size_t ordered = 1;
421   bool ret;
422 
423   thr->ts.static_trip = 0;
424   if (reductions)
425     gomp_workshare_taskgroup_start ();
426   if (mem)
427     ordered += (uintptr_t) *mem;
428   if (gomp_work_share_start (ordered))
429     {
430       sched = gomp_adjust_sched (sched, &chunk_size);
431       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
432 			  sched, chunk_size);
433       if (reductions)
434 	{
435 	  GOMP_taskgroup_reduction_register (reductions);
436 	  thr->task->taskgroup->workshare = true;
437 	  thr->ts.work_share->task_reductions = reductions;
438 	}
439       if (sched == GFS_STATIC)
440 	gomp_ordered_static_init ();
441       else
442 	gomp_mutex_lock (&thr->ts.work_share->lock);
443       gomp_work_share_init_done ();
444     }
445   else
446     {
447       if (reductions)
448 	{
449 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
450 	  gomp_workshare_task_reduction_register (reductions,
451 						  first_reductions);
452 	}
453       sched = thr->ts.work_share->sched;
454       if (sched != GFS_STATIC)
455 	gomp_mutex_lock (&thr->ts.work_share->lock);
456     }
457 
458   if (mem)
459     {
460       uintptr_t p
461 	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
462 		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
463       p += __alignof__ (long long) - 1;
464       p &= ~(__alignof__ (long long) - 1);
465       *mem = (void *) p;
466     }
467 
468   switch (sched)
469     {
470     case GFS_STATIC:
471     case GFS_AUTO:
472       return !gomp_iter_ull_static_next (istart, iend);
473     case GFS_DYNAMIC:
474       ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
475       break;
476     case GFS_GUIDED:
477       ret = gomp_iter_ull_guided_next_locked (istart, iend);
478       break;
479     default:
480       abort ();
481     }
482 
483   if (ret)
484     gomp_ordered_first ();
485   gomp_mutex_unlock (&thr->ts.work_share->lock);
486   return ret;
487 }
488 
489 /* The *_doacross_*_start routines are similar.  The only difference is that
490    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
491    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
492    and other COUNTS array elements tell the library number of iterations
493    in the ordered inner loops.  */
494 
495 static bool
gomp_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)496 gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
497 				     gomp_ull chunk_size, gomp_ull *istart,
498 				     gomp_ull *iend)
499 {
500   struct gomp_thread *thr = gomp_thread ();
501 
502   thr->ts.static_trip = 0;
503   if (gomp_work_share_start (0))
504     {
505       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
506 			  GFS_STATIC, chunk_size);
507       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
508       gomp_work_share_init_done ();
509     }
510 
511   return !gomp_iter_ull_static_next (istart, iend);
512 }
513 
514 static bool
gomp_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)515 gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
516 				      gomp_ull chunk_size, gomp_ull *istart,
517 				      gomp_ull *iend)
518 {
519   struct gomp_thread *thr = gomp_thread ();
520   bool ret;
521 
522   if (gomp_work_share_start (0))
523     {
524       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
525 			  GFS_DYNAMIC, chunk_size);
526       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
527       gomp_work_share_init_done ();
528     }
529 
530 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
531   ret = gomp_iter_ull_dynamic_next (istart, iend);
532 #else
533   gomp_mutex_lock (&thr->ts.work_share->lock);
534   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
535   gomp_mutex_unlock (&thr->ts.work_share->lock);
536 #endif
537 
538   return ret;
539 }
540 
541 static bool
gomp_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)542 gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
543 				     gomp_ull chunk_size, gomp_ull *istart,
544 				     gomp_ull *iend)
545 {
546   struct gomp_thread *thr = gomp_thread ();
547   bool ret;
548 
549   if (gomp_work_share_start (0))
550     {
551       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
552 			  GFS_GUIDED, chunk_size);
553       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
554       gomp_work_share_init_done ();
555     }
556 
557 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
558   ret = gomp_iter_ull_guided_next (istart, iend);
559 #else
560   gomp_mutex_lock (&thr->ts.work_share->lock);
561   ret = gomp_iter_ull_guided_next_locked (istart, iend);
562   gomp_mutex_unlock (&thr->ts.work_share->lock);
563 #endif
564 
565   return ret;
566 }
567 
568 bool
GOMP_loop_ull_doacross_runtime_start(unsigned ncounts,gomp_ull * counts,gomp_ull * istart,gomp_ull * iend)569 GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
570 				      gomp_ull *istart, gomp_ull *iend)
571 {
572   struct gomp_task_icv *icv = gomp_icv (false);
573   switch (icv->run_sched_var & ~GFS_MONOTONIC)
574     {
575     case GFS_STATIC:
576       return gomp_loop_ull_doacross_static_start (ncounts, counts,
577 						  icv->run_sched_chunk_size,
578 						  istart, iend);
579     case GFS_DYNAMIC:
580       return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
581 						   icv->run_sched_chunk_size,
582 						   istart, iend);
583     case GFS_GUIDED:
584       return gomp_loop_ull_doacross_guided_start (ncounts, counts,
585 						  icv->run_sched_chunk_size,
586 						  istart, iend);
587     case GFS_AUTO:
588       /* For now map to schedule(static), later on we could play with feedback
589 	 driven choice.  */
590       return gomp_loop_ull_doacross_static_start (ncounts, counts,
591 						  0, istart, iend);
592     default:
593       abort ();
594     }
595 }
596 
597 bool
GOMP_loop_ull_doacross_start(unsigned ncounts,gomp_ull * counts,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)598 GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
599 			      long sched, gomp_ull chunk_size,
600 			      gomp_ull *istart, gomp_ull *iend,
601 			      uintptr_t *reductions, void **mem)
602 {
603   struct gomp_thread *thr = gomp_thread ();
604 
605   thr->ts.static_trip = 0;
606   if (reductions)
607     gomp_workshare_taskgroup_start ();
608   if (gomp_work_share_start (0))
609     {
610       size_t extra = 0;
611       if (mem)
612 	extra = (uintptr_t) *mem;
613       sched = gomp_adjust_sched (sched, &chunk_size);
614       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
615 			  sched, chunk_size);
616       gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
617       if (reductions)
618 	{
619 	  GOMP_taskgroup_reduction_register (reductions);
620 	  thr->task->taskgroup->workshare = true;
621 	  thr->ts.work_share->task_reductions = reductions;
622 	}
623       gomp_work_share_init_done ();
624     }
625   else
626     {
627       if (reductions)
628 	{
629 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
630 	  gomp_workshare_task_reduction_register (reductions,
631 						  first_reductions);
632 	}
633       sched = thr->ts.work_share->sched;
634     }
635 
636   if (mem)
637     *mem = thr->ts.work_share->doacross->extra;
638 
639   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
640 }
641 
642 /* The *_next routines are called when the thread completes processing of
643    the iteration block currently assigned to it.  If the work-share
644    construct is bound directly to a parallel construct, then the iteration
645    bounds may have been set up before the parallel.  In which case, this
646    may be the first iteration for the thread.
647 
648    Returns true if there is work remaining to be performed; *ISTART and
649    *IEND are filled with a new iteration block.  Returns false if all work
650    has been assigned.  */
651 
652 static bool
gomp_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)653 gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
654 {
655   return !gomp_iter_ull_static_next (istart, iend);
656 }
657 
658 static bool
gomp_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)659 gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
660 {
661   bool ret;
662 
663 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
664   ret = gomp_iter_ull_dynamic_next (istart, iend);
665 #else
666   struct gomp_thread *thr = gomp_thread ();
667   gomp_mutex_lock (&thr->ts.work_share->lock);
668   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
669   gomp_mutex_unlock (&thr->ts.work_share->lock);
670 #endif
671 
672   return ret;
673 }
674 
675 static bool
gomp_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)676 gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
677 {
678   bool ret;
679 
680 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
681   ret = gomp_iter_ull_guided_next (istart, iend);
682 #else
683   struct gomp_thread *thr = gomp_thread ();
684   gomp_mutex_lock (&thr->ts.work_share->lock);
685   ret = gomp_iter_ull_guided_next_locked (istart, iend);
686   gomp_mutex_unlock (&thr->ts.work_share->lock);
687 #endif
688 
689   return ret;
690 }
691 
692 bool
GOMP_loop_ull_runtime_next(gomp_ull * istart,gomp_ull * iend)693 GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
694 {
695   struct gomp_thread *thr = gomp_thread ();
696 
697   switch (thr->ts.work_share->sched)
698     {
699     case GFS_STATIC:
700     case GFS_AUTO:
701       return gomp_loop_ull_static_next (istart, iend);
702     case GFS_DYNAMIC:
703       return gomp_loop_ull_dynamic_next (istart, iend);
704     case GFS_GUIDED:
705       return gomp_loop_ull_guided_next (istart, iend);
706     default:
707       abort ();
708     }
709 }
710 
711 /* The *_ordered_*_next routines are called when the thread completes
712    processing of the iteration block currently assigned to it.
713 
714    Returns true if there is work remaining to be performed; *ISTART and
715    *IEND are filled with a new iteration block.  Returns false if all work
716    has been assigned.  */
717 
718 static bool
gomp_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)719 gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
720 {
721   struct gomp_thread *thr = gomp_thread ();
722   int test;
723 
724   gomp_ordered_sync ();
725   gomp_mutex_lock (&thr->ts.work_share->lock);
726   test = gomp_iter_ull_static_next (istart, iend);
727   if (test >= 0)
728     gomp_ordered_static_next ();
729   gomp_mutex_unlock (&thr->ts.work_share->lock);
730 
731   return test == 0;
732 }
733 
734 static bool
gomp_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)735 gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
736 {
737   struct gomp_thread *thr = gomp_thread ();
738   bool ret;
739 
740   gomp_ordered_sync ();
741   gomp_mutex_lock (&thr->ts.work_share->lock);
742   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
743   if (ret)
744     gomp_ordered_next ();
745   else
746     gomp_ordered_last ();
747   gomp_mutex_unlock (&thr->ts.work_share->lock);
748 
749   return ret;
750 }
751 
752 static bool
gomp_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)753 gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
754 {
755   struct gomp_thread *thr = gomp_thread ();
756   bool ret;
757 
758   gomp_ordered_sync ();
759   gomp_mutex_lock (&thr->ts.work_share->lock);
760   ret = gomp_iter_ull_guided_next_locked (istart, iend);
761   if (ret)
762     gomp_ordered_next ();
763   else
764     gomp_ordered_last ();
765   gomp_mutex_unlock (&thr->ts.work_share->lock);
766 
767   return ret;
768 }
769 
770 bool
GOMP_loop_ull_ordered_runtime_next(gomp_ull * istart,gomp_ull * iend)771 GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
772 {
773   struct gomp_thread *thr = gomp_thread ();
774 
775   switch (thr->ts.work_share->sched)
776     {
777     case GFS_STATIC:
778     case GFS_AUTO:
779       return gomp_loop_ull_ordered_static_next (istart, iend);
780     case GFS_DYNAMIC:
781       return gomp_loop_ull_ordered_dynamic_next (istart, iend);
782     case GFS_GUIDED:
783       return gomp_loop_ull_ordered_guided_next (istart, iend);
784     default:
785       abort ();
786     }
787 }
788 
789 /* We use static functions above so that we're sure that the "runtime"
790    function can defer to the proper routine without interposition.  We
791    export the static function with a strong alias when possible, or with
792    a wrapper function otherwise.  */
793 
794 #ifdef HAVE_ATTRIBUTE_ALIAS
795 extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
796 	__attribute__((alias ("gomp_loop_ull_static_start")));
797 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
798 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
799 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
800 	__attribute__((alias ("gomp_loop_ull_guided_start")));
801 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
802 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
803 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
804 	__attribute__((alias ("gomp_loop_ull_guided_start")));
805 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
806 	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
807 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
808 	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
809 
810 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
811 	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
812 extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
813 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
814 extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
815 	__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
816 
817 extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
818 	__attribute__((alias ("gomp_loop_ull_doacross_static_start")));
819 extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
820 	__attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
821 extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
822 	__attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
823 
824 extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
825 	__attribute__((alias ("gomp_loop_ull_static_next")));
826 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
827 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
828 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
829 	__attribute__((alias ("gomp_loop_ull_guided_next")));
830 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
831 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
832 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
833 	__attribute__((alias ("gomp_loop_ull_guided_next")));
834 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
835 	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
836 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
837 	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
838 
839 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
840 	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
841 extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
842 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
843 extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
844 	__attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
845 #else
846 bool
GOMP_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)847 GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
848 			    gomp_ull incr, gomp_ull chunk_size,
849 			    gomp_ull *istart, gomp_ull *iend)
850 {
851   return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
852 				     iend);
853 }
854 
855 bool
GOMP_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)856 GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
857 			     gomp_ull incr, gomp_ull chunk_size,
858 			     gomp_ull *istart, gomp_ull *iend)
859 {
860   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
861 				      iend);
862 }
863 
864 bool
GOMP_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)865 GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
866 			    gomp_ull incr, gomp_ull chunk_size,
867 			    gomp_ull *istart, gomp_ull *iend)
868 {
869   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
870 				     iend);
871 }
872 
873 bool
GOMP_loop_ull_nonmonotonic_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)874 GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
875 					  gomp_ull end, gomp_ull incr,
876 					  gomp_ull chunk_size,
877 					  gomp_ull *istart, gomp_ull *iend)
878 {
879   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
880 				      iend);
881 }
882 
883 bool
GOMP_loop_ull_nonmonotonic_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)884 GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
885 					 gomp_ull incr, gomp_ull chunk_size,
886 					 gomp_ull *istart, gomp_ull *iend)
887 {
888   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
889 				     iend);
890 }
891 
892 bool
GOMP_loop_ull_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)893 GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
894 					  gomp_ull end, gomp_ull incr,
895 					  gomp_ull *istart, gomp_ull *iend)
896 {
897   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
898 }
899 
900 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)901 GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
902 						gomp_ull end, gomp_ull incr,
903 						gomp_ull *istart,
904 						gomp_ull *iend)
905 {
906   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
907 }
908 
909 bool
GOMP_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)910 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
911 				    gomp_ull incr, gomp_ull chunk_size,
912 				    gomp_ull *istart, gomp_ull *iend)
913 {
914   return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
915 					     istart, iend);
916 }
917 
918 bool
GOMP_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)919 GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
920 				     gomp_ull incr, gomp_ull chunk_size,
921 				     gomp_ull *istart, gomp_ull *iend)
922 {
923   return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
924 					      istart, iend);
925 }
926 
927 bool
GOMP_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)928 GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
929 				    gomp_ull incr, gomp_ull chunk_size,
930 				    gomp_ull *istart, gomp_ull *iend)
931 {
932   return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
933 					     istart, iend);
934 }
935 
936 bool
GOMP_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)937 GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
938 				     gomp_ull chunk_size, gomp_ull *istart,
939 				     gomp_ull *iend)
940 {
941   return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
942 					      istart, iend);
943 }
944 
945 bool
GOMP_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)946 GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
947 				      gomp_ull chunk_size, gomp_ull *istart,
948 				      gomp_ull *iend)
949 {
950   return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
951 					       istart, iend);
952 }
953 
954 bool
GOMP_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)955 GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
956 				     gomp_ull chunk_size, gomp_ull *istart,
957 				     gomp_ull *iend)
958 {
959   return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
960 					      istart, iend);
961 }
962 
963 bool
GOMP_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)964 GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
965 {
966   return gomp_loop_ull_static_next (istart, iend);
967 }
968 
969 bool
GOMP_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)970 GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
971 {
972   return gomp_loop_ull_dynamic_next (istart, iend);
973 }
974 
975 bool
GOMP_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)976 GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
977 {
978   return gomp_loop_ull_guided_next (istart, iend);
979 }
980 
981 bool
GOMP_loop_ull_nonmonotonic_dynamic_next(gomp_ull * istart,gomp_ull * iend)982 GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
983 {
984   return gomp_loop_ull_dynamic_next (istart, iend);
985 }
986 
987 bool
GOMP_loop_ull_nonmonotonic_guided_next(gomp_ull * istart,gomp_ull * iend)988 GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
989 {
990   return gomp_loop_ull_guided_next (istart, iend);
991 }
992 
993 bool
GOMP_loop_ull_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)994 GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
995 {
996   return GOMP_loop_ull_runtime_next (istart, iend);
997 }
998 
999 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)1000 GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
1001 					       gomp_ull *iend)
1002 {
1003   return GOMP_loop_ull_runtime_next (istart, iend);
1004 }
1005 
1006 bool
GOMP_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)1007 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
1008 {
1009   return gomp_loop_ull_ordered_static_next (istart, iend);
1010 }
1011 
1012 bool
GOMP_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)1013 GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
1014 {
1015   return gomp_loop_ull_ordered_dynamic_next (istart, iend);
1016 }
1017 
1018 bool
GOMP_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)1019 GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
1020 {
1021   return gomp_loop_ull_ordered_guided_next (istart, iend);
1022 }
1023 #endif
1024