1 /* Copyright (C) 2005-2019 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the LOOP (FOR/DO) construct. */
27
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libgomp.h"
32
33 ialias (GOMP_loop_ull_runtime_next)
34 ialias_redirect (GOMP_taskgroup_reduction_register)
35
36 typedef unsigned long long gomp_ull;
37
38 /* Initialize the given work share construct from the given arguments. */
39
40 static inline void
gomp_loop_ull_init(struct gomp_work_share * ws,bool up,gomp_ull start,gomp_ull end,gomp_ull incr,enum gomp_schedule_type sched,gomp_ull chunk_size)41 gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
42 gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
43 gomp_ull chunk_size)
44 {
45 ws->sched = sched;
46 ws->chunk_size_ull = chunk_size;
47 /* Canonicalize loops that have zero iterations to ->next == ->end. */
48 ws->end_ull = ((up && start > end) || (!up && start < end))
49 ? start : end;
50 ws->incr_ull = incr;
51 ws->next_ull = start;
52 ws->mode = 0;
53 if (sched == GFS_DYNAMIC)
54 {
55 ws->chunk_size_ull *= incr;
56
57 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
58 {
59 /* For dynamic scheduling prepare things to make each iteration
60 faster. */
61 struct gomp_thread *thr = gomp_thread ();
62 struct gomp_team *team = thr->ts.team;
63 long nthreads = team ? team->nthreads : 1;
64
65 if (__builtin_expect (up, 1))
66 {
67 /* Cheap overflow protection. */
68 if (__builtin_expect ((nthreads | ws->chunk_size_ull)
69 < 1ULL << (sizeof (gomp_ull)
70 * __CHAR_BIT__ / 2 - 1), 1))
71 ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
72 - (nthreads + 1) * ws->chunk_size_ull);
73 }
74 /* Cheap overflow protection. */
75 else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
76 < 1ULL << (sizeof (gomp_ull)
77 * __CHAR_BIT__ / 2 - 1), 1))
78 ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
79 - (__LONG_LONG_MAX__ * 2ULL + 1));
80 }
81 #endif
82 }
83 if (!up)
84 ws->mode |= 2;
85 }
86
87 /* The *_start routines are called when first encountering a loop construct
88 that is not bound directly to a parallel construct. The first thread
89 that arrives will create the work-share construct; subsequent threads
90 will see the construct exists and allocate work from it.
91
92 START, END, INCR are the bounds of the loop; due to the restrictions of
93 OpenMP, these values must be the same in every thread. This is not
94 verified (nor is it entirely verifiable, since START is not necessarily
95 retained intact in the work-share data structure). CHUNK_SIZE is the
96 scheduling parameter; again this must be identical in all threads.
97
98 Returns true if there's any work for this thread to perform. If so,
99 *ISTART and *IEND are filled with the bounds of the iteration block
100 allocated to this thread. Returns false if all work was assigned to
101 other threads prior to this thread's arrival. */
102
103 static bool
gomp_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)104 gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
105 gomp_ull incr, gomp_ull chunk_size,
106 gomp_ull *istart, gomp_ull *iend)
107 {
108 struct gomp_thread *thr = gomp_thread ();
109
110 thr->ts.static_trip = 0;
111 if (gomp_work_share_start (0))
112 {
113 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
114 GFS_STATIC, chunk_size);
115 gomp_work_share_init_done ();
116 }
117
118 return !gomp_iter_ull_static_next (istart, iend);
119 }
120
121 static bool
gomp_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)122 gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
123 gomp_ull incr, gomp_ull chunk_size,
124 gomp_ull *istart, gomp_ull *iend)
125 {
126 struct gomp_thread *thr = gomp_thread ();
127 bool ret;
128
129 if (gomp_work_share_start (0))
130 {
131 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
132 GFS_DYNAMIC, chunk_size);
133 gomp_work_share_init_done ();
134 }
135
136 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
137 ret = gomp_iter_ull_dynamic_next (istart, iend);
138 #else
139 gomp_mutex_lock (&thr->ts.work_share->lock);
140 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
141 gomp_mutex_unlock (&thr->ts.work_share->lock);
142 #endif
143
144 return ret;
145 }
146
147 static bool
gomp_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)148 gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
149 gomp_ull incr, gomp_ull chunk_size,
150 gomp_ull *istart, gomp_ull *iend)
151 {
152 struct gomp_thread *thr = gomp_thread ();
153 bool ret;
154
155 if (gomp_work_share_start (0))
156 {
157 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
158 GFS_GUIDED, chunk_size);
159 gomp_work_share_init_done ();
160 }
161
162 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
163 ret = gomp_iter_ull_guided_next (istart, iend);
164 #else
165 gomp_mutex_lock (&thr->ts.work_share->lock);
166 ret = gomp_iter_ull_guided_next_locked (istart, iend);
167 gomp_mutex_unlock (&thr->ts.work_share->lock);
168 #endif
169
170 return ret;
171 }
172
173 bool
GOMP_loop_ull_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)174 GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
175 gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
176 {
177 struct gomp_task_icv *icv = gomp_icv (false);
178 switch (icv->run_sched_var & ~GFS_MONOTONIC)
179 {
180 case GFS_STATIC:
181 return gomp_loop_ull_static_start (up, start, end, incr,
182 icv->run_sched_chunk_size,
183 istart, iend);
184 case GFS_DYNAMIC:
185 return gomp_loop_ull_dynamic_start (up, start, end, incr,
186 icv->run_sched_chunk_size,
187 istart, iend);
188 case GFS_GUIDED:
189 return gomp_loop_ull_guided_start (up, start, end, incr,
190 icv->run_sched_chunk_size,
191 istart, iend);
192 case GFS_AUTO:
193 /* For now map to schedule(static), later on we could play with feedback
194 driven choice. */
195 return gomp_loop_ull_static_start (up, start, end, incr,
196 0, istart, iend);
197 default:
198 abort ();
199 }
200 }
201
202 static long
gomp_adjust_sched(long sched,gomp_ull * chunk_size)203 gomp_adjust_sched (long sched, gomp_ull *chunk_size)
204 {
205 sched &= ~GFS_MONOTONIC;
206 switch (sched)
207 {
208 case GFS_STATIC:
209 case GFS_DYNAMIC:
210 case GFS_GUIDED:
211 return sched;
212 /* GFS_RUNTIME is used for runtime schedule without monotonic
213 or nonmonotonic modifiers on the clause.
214 GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
215 modifier. */
216 case GFS_RUNTIME:
217 /* GFS_AUTO is used for runtime schedule with nonmonotonic
218 modifier. */
219 case GFS_AUTO:
220 {
221 struct gomp_task_icv *icv = gomp_icv (false);
222 sched = icv->run_sched_var & ~GFS_MONOTONIC;
223 switch (sched)
224 {
225 case GFS_STATIC:
226 case GFS_DYNAMIC:
227 case GFS_GUIDED:
228 *chunk_size = icv->run_sched_chunk_size;
229 break;
230 case GFS_AUTO:
231 sched = GFS_STATIC;
232 *chunk_size = 0;
233 break;
234 default:
235 abort ();
236 }
237 return sched;
238 }
239 default:
240 abort ();
241 }
242 }
243
244 bool
GOMP_loop_ull_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)245 GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
246 gomp_ull incr, long sched, gomp_ull chunk_size,
247 gomp_ull *istart, gomp_ull *iend,
248 uintptr_t *reductions, void **mem)
249 {
250 struct gomp_thread *thr = gomp_thread ();
251
252 thr->ts.static_trip = 0;
253 if (reductions)
254 gomp_workshare_taskgroup_start ();
255 if (gomp_work_share_start (0))
256 {
257 sched = gomp_adjust_sched (sched, &chunk_size);
258 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
259 sched, chunk_size);
260 if (reductions)
261 {
262 GOMP_taskgroup_reduction_register (reductions);
263 thr->task->taskgroup->workshare = true;
264 thr->ts.work_share->task_reductions = reductions;
265 }
266 if (mem)
267 {
268 uintptr_t size = (uintptr_t) *mem;
269 if (size > (sizeof (struct gomp_work_share)
270 - offsetof (struct gomp_work_share,
271 inline_ordered_team_ids)))
272 thr->ts.work_share->ordered_team_ids
273 = gomp_malloc_cleared (size);
274 else
275 memset (thr->ts.work_share->ordered_team_ids, '\0', size);
276 *mem = (void *) thr->ts.work_share->ordered_team_ids;
277 }
278 gomp_work_share_init_done ();
279 }
280 else
281 {
282 if (reductions)
283 {
284 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
285 gomp_workshare_task_reduction_register (reductions,
286 first_reductions);
287 }
288 if (mem)
289 *mem = (void *) thr->ts.work_share->ordered_team_ids;
290 }
291
292 return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
293 }
294
295 /* The *_ordered_*_start routines are similar. The only difference is that
296 this work-share construct is initialized to expect an ORDERED section. */
297
298 static bool
gomp_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)299 gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
300 gomp_ull incr, gomp_ull chunk_size,
301 gomp_ull *istart, gomp_ull *iend)
302 {
303 struct gomp_thread *thr = gomp_thread ();
304
305 thr->ts.static_trip = 0;
306 if (gomp_work_share_start (1))
307 {
308 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
309 GFS_STATIC, chunk_size);
310 gomp_ordered_static_init ();
311 gomp_work_share_init_done ();
312 }
313
314 return !gomp_iter_ull_static_next (istart, iend);
315 }
316
317 static bool
gomp_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)318 gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
319 gomp_ull incr, gomp_ull chunk_size,
320 gomp_ull *istart, gomp_ull *iend)
321 {
322 struct gomp_thread *thr = gomp_thread ();
323 bool ret;
324
325 if (gomp_work_share_start (1))
326 {
327 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
328 GFS_DYNAMIC, chunk_size);
329 gomp_mutex_lock (&thr->ts.work_share->lock);
330 gomp_work_share_init_done ();
331 }
332 else
333 gomp_mutex_lock (&thr->ts.work_share->lock);
334
335 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
336 if (ret)
337 gomp_ordered_first ();
338 gomp_mutex_unlock (&thr->ts.work_share->lock);
339
340 return ret;
341 }
342
343 static bool
gomp_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)344 gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
345 gomp_ull incr, gomp_ull chunk_size,
346 gomp_ull *istart, gomp_ull *iend)
347 {
348 struct gomp_thread *thr = gomp_thread ();
349 bool ret;
350
351 if (gomp_work_share_start (1))
352 {
353 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
354 GFS_GUIDED, chunk_size);
355 gomp_mutex_lock (&thr->ts.work_share->lock);
356 gomp_work_share_init_done ();
357 }
358 else
359 gomp_mutex_lock (&thr->ts.work_share->lock);
360
361 ret = gomp_iter_ull_guided_next_locked (istart, iend);
362 if (ret)
363 gomp_ordered_first ();
364 gomp_mutex_unlock (&thr->ts.work_share->lock);
365
366 return ret;
367 }
368
369 bool
GOMP_loop_ull_ordered_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)370 GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
371 gomp_ull incr, gomp_ull *istart,
372 gomp_ull *iend)
373 {
374 struct gomp_task_icv *icv = gomp_icv (false);
375 switch (icv->run_sched_var & ~GFS_MONOTONIC)
376 {
377 case GFS_STATIC:
378 return gomp_loop_ull_ordered_static_start (up, start, end, incr,
379 icv->run_sched_chunk_size,
380 istart, iend);
381 case GFS_DYNAMIC:
382 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
383 icv->run_sched_chunk_size,
384 istart, iend);
385 case GFS_GUIDED:
386 return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
387 icv->run_sched_chunk_size,
388 istart, iend);
389 case GFS_AUTO:
390 /* For now map to schedule(static), later on we could play with feedback
391 driven choice. */
392 return gomp_loop_ull_ordered_static_start (up, start, end, incr,
393 0, istart, iend);
394 default:
395 abort ();
396 }
397 }
398
399 bool
GOMP_loop_ull_ordered_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)400 GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
401 gomp_ull incr, long sched, gomp_ull chunk_size,
402 gomp_ull *istart, gomp_ull *iend,
403 uintptr_t *reductions, void **mem)
404 {
405 struct gomp_thread *thr = gomp_thread ();
406 size_t ordered = 1;
407 bool ret;
408
409 thr->ts.static_trip = 0;
410 if (reductions)
411 gomp_workshare_taskgroup_start ();
412 if (mem)
413 ordered += (uintptr_t) *mem;
414 if (gomp_work_share_start (ordered))
415 {
416 sched = gomp_adjust_sched (sched, &chunk_size);
417 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
418 sched, chunk_size);
419 if (reductions)
420 {
421 GOMP_taskgroup_reduction_register (reductions);
422 thr->task->taskgroup->workshare = true;
423 thr->ts.work_share->task_reductions = reductions;
424 }
425 if (sched == GFS_STATIC)
426 gomp_ordered_static_init ();
427 else
428 gomp_mutex_lock (&thr->ts.work_share->lock);
429 gomp_work_share_init_done ();
430 }
431 else
432 {
433 if (reductions)
434 {
435 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
436 gomp_workshare_task_reduction_register (reductions,
437 first_reductions);
438 }
439 sched = thr->ts.work_share->sched;
440 if (sched != GFS_STATIC)
441 gomp_mutex_lock (&thr->ts.work_share->lock);
442 }
443
444 if (mem)
445 {
446 uintptr_t p
447 = (uintptr_t) (thr->ts.work_share->ordered_team_ids
448 + (thr->ts.team ? thr->ts.team->nthreads : 1));
449 p += __alignof__ (long long) - 1;
450 p &= ~(__alignof__ (long long) - 1);
451 *mem = (void *) p;
452 }
453
454 switch (sched)
455 {
456 case GFS_STATIC:
457 case GFS_AUTO:
458 return !gomp_iter_ull_static_next (istart, iend);
459 case GFS_DYNAMIC:
460 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
461 break;
462 case GFS_GUIDED:
463 ret = gomp_iter_ull_guided_next_locked (istart, iend);
464 break;
465 default:
466 abort ();
467 }
468
469 if (ret)
470 gomp_ordered_first ();
471 gomp_mutex_unlock (&thr->ts.work_share->lock);
472 return ret;
473 }
474
475 /* The *_doacross_*_start routines are similar. The only difference is that
476 this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
477 section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
478 and other COUNTS array elements tell the library number of iterations
479 in the ordered inner loops. */
480
481 static bool
gomp_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)482 gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
483 gomp_ull chunk_size, gomp_ull *istart,
484 gomp_ull *iend)
485 {
486 struct gomp_thread *thr = gomp_thread ();
487
488 thr->ts.static_trip = 0;
489 if (gomp_work_share_start (0))
490 {
491 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
492 GFS_STATIC, chunk_size);
493 gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
494 gomp_work_share_init_done ();
495 }
496
497 return !gomp_iter_ull_static_next (istart, iend);
498 }
499
500 static bool
gomp_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)501 gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
502 gomp_ull chunk_size, gomp_ull *istart,
503 gomp_ull *iend)
504 {
505 struct gomp_thread *thr = gomp_thread ();
506 bool ret;
507
508 if (gomp_work_share_start (0))
509 {
510 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
511 GFS_DYNAMIC, chunk_size);
512 gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
513 gomp_work_share_init_done ();
514 }
515
516 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
517 ret = gomp_iter_ull_dynamic_next (istart, iend);
518 #else
519 gomp_mutex_lock (&thr->ts.work_share->lock);
520 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
521 gomp_mutex_unlock (&thr->ts.work_share->lock);
522 #endif
523
524 return ret;
525 }
526
527 static bool
gomp_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)528 gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
529 gomp_ull chunk_size, gomp_ull *istart,
530 gomp_ull *iend)
531 {
532 struct gomp_thread *thr = gomp_thread ();
533 bool ret;
534
535 if (gomp_work_share_start (0))
536 {
537 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
538 GFS_GUIDED, chunk_size);
539 gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
540 gomp_work_share_init_done ();
541 }
542
543 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
544 ret = gomp_iter_ull_guided_next (istart, iend);
545 #else
546 gomp_mutex_lock (&thr->ts.work_share->lock);
547 ret = gomp_iter_ull_guided_next_locked (istart, iend);
548 gomp_mutex_unlock (&thr->ts.work_share->lock);
549 #endif
550
551 return ret;
552 }
553
554 bool
GOMP_loop_ull_doacross_runtime_start(unsigned ncounts,gomp_ull * counts,gomp_ull * istart,gomp_ull * iend)555 GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
556 gomp_ull *istart, gomp_ull *iend)
557 {
558 struct gomp_task_icv *icv = gomp_icv (false);
559 switch (icv->run_sched_var & ~GFS_MONOTONIC)
560 {
561 case GFS_STATIC:
562 return gomp_loop_ull_doacross_static_start (ncounts, counts,
563 icv->run_sched_chunk_size,
564 istart, iend);
565 case GFS_DYNAMIC:
566 return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
567 icv->run_sched_chunk_size,
568 istart, iend);
569 case GFS_GUIDED:
570 return gomp_loop_ull_doacross_guided_start (ncounts, counts,
571 icv->run_sched_chunk_size,
572 istart, iend);
573 case GFS_AUTO:
574 /* For now map to schedule(static), later on we could play with feedback
575 driven choice. */
576 return gomp_loop_ull_doacross_static_start (ncounts, counts,
577 0, istart, iend);
578 default:
579 abort ();
580 }
581 }
582
583 bool
GOMP_loop_ull_doacross_start(unsigned ncounts,gomp_ull * counts,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)584 GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
585 long sched, gomp_ull chunk_size,
586 gomp_ull *istart, gomp_ull *iend,
587 uintptr_t *reductions, void **mem)
588 {
589 struct gomp_thread *thr = gomp_thread ();
590
591 thr->ts.static_trip = 0;
592 if (reductions)
593 gomp_workshare_taskgroup_start ();
594 if (gomp_work_share_start (0))
595 {
596 size_t extra = 0;
597 if (mem)
598 extra = (uintptr_t) *mem;
599 sched = gomp_adjust_sched (sched, &chunk_size);
600 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
601 sched, chunk_size);
602 gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
603 if (reductions)
604 {
605 GOMP_taskgroup_reduction_register (reductions);
606 thr->task->taskgroup->workshare = true;
607 thr->ts.work_share->task_reductions = reductions;
608 }
609 gomp_work_share_init_done ();
610 }
611 else
612 {
613 if (reductions)
614 {
615 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
616 gomp_workshare_task_reduction_register (reductions,
617 first_reductions);
618 }
619 sched = thr->ts.work_share->sched;
620 }
621
622 if (mem)
623 *mem = thr->ts.work_share->doacross->extra;
624
625 return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
626 }
627
628 /* The *_next routines are called when the thread completes processing of
629 the iteration block currently assigned to it. If the work-share
630 construct is bound directly to a parallel construct, then the iteration
631 bounds may have been set up before the parallel. In which case, this
632 may be the first iteration for the thread.
633
634 Returns true if there is work remaining to be performed; *ISTART and
635 *IEND are filled with a new iteration block. Returns false if all work
636 has been assigned. */
637
638 static bool
gomp_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)639 gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
640 {
641 return !gomp_iter_ull_static_next (istart, iend);
642 }
643
644 static bool
gomp_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)645 gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
646 {
647 bool ret;
648
649 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
650 ret = gomp_iter_ull_dynamic_next (istart, iend);
651 #else
652 struct gomp_thread *thr = gomp_thread ();
653 gomp_mutex_lock (&thr->ts.work_share->lock);
654 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
655 gomp_mutex_unlock (&thr->ts.work_share->lock);
656 #endif
657
658 return ret;
659 }
660
661 static bool
gomp_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)662 gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
663 {
664 bool ret;
665
666 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
667 ret = gomp_iter_ull_guided_next (istart, iend);
668 #else
669 struct gomp_thread *thr = gomp_thread ();
670 gomp_mutex_lock (&thr->ts.work_share->lock);
671 ret = gomp_iter_ull_guided_next_locked (istart, iend);
672 gomp_mutex_unlock (&thr->ts.work_share->lock);
673 #endif
674
675 return ret;
676 }
677
678 bool
GOMP_loop_ull_runtime_next(gomp_ull * istart,gomp_ull * iend)679 GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
680 {
681 struct gomp_thread *thr = gomp_thread ();
682
683 switch (thr->ts.work_share->sched)
684 {
685 case GFS_STATIC:
686 case GFS_AUTO:
687 return gomp_loop_ull_static_next (istart, iend);
688 case GFS_DYNAMIC:
689 return gomp_loop_ull_dynamic_next (istart, iend);
690 case GFS_GUIDED:
691 return gomp_loop_ull_guided_next (istart, iend);
692 default:
693 abort ();
694 }
695 }
696
697 /* The *_ordered_*_next routines are called when the thread completes
698 processing of the iteration block currently assigned to it.
699
700 Returns true if there is work remaining to be performed; *ISTART and
701 *IEND are filled with a new iteration block. Returns false if all work
702 has been assigned. */
703
704 static bool
gomp_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)705 gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
706 {
707 struct gomp_thread *thr = gomp_thread ();
708 int test;
709
710 gomp_ordered_sync ();
711 gomp_mutex_lock (&thr->ts.work_share->lock);
712 test = gomp_iter_ull_static_next (istart, iend);
713 if (test >= 0)
714 gomp_ordered_static_next ();
715 gomp_mutex_unlock (&thr->ts.work_share->lock);
716
717 return test == 0;
718 }
719
720 static bool
gomp_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)721 gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
722 {
723 struct gomp_thread *thr = gomp_thread ();
724 bool ret;
725
726 gomp_ordered_sync ();
727 gomp_mutex_lock (&thr->ts.work_share->lock);
728 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
729 if (ret)
730 gomp_ordered_next ();
731 else
732 gomp_ordered_last ();
733 gomp_mutex_unlock (&thr->ts.work_share->lock);
734
735 return ret;
736 }
737
738 static bool
gomp_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)739 gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
740 {
741 struct gomp_thread *thr = gomp_thread ();
742 bool ret;
743
744 gomp_ordered_sync ();
745 gomp_mutex_lock (&thr->ts.work_share->lock);
746 ret = gomp_iter_ull_guided_next_locked (istart, iend);
747 if (ret)
748 gomp_ordered_next ();
749 else
750 gomp_ordered_last ();
751 gomp_mutex_unlock (&thr->ts.work_share->lock);
752
753 return ret;
754 }
755
756 bool
GOMP_loop_ull_ordered_runtime_next(gomp_ull * istart,gomp_ull * iend)757 GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
758 {
759 struct gomp_thread *thr = gomp_thread ();
760
761 switch (thr->ts.work_share->sched)
762 {
763 case GFS_STATIC:
764 case GFS_AUTO:
765 return gomp_loop_ull_ordered_static_next (istart, iend);
766 case GFS_DYNAMIC:
767 return gomp_loop_ull_ordered_dynamic_next (istart, iend);
768 case GFS_GUIDED:
769 return gomp_loop_ull_ordered_guided_next (istart, iend);
770 default:
771 abort ();
772 }
773 }
774
775 /* We use static functions above so that we're sure that the "runtime"
776 function can defer to the proper routine without interposition. We
777 export the static function with a strong alias when possible, or with
778 a wrapper function otherwise. */
779
780 #ifdef HAVE_ATTRIBUTE_ALIAS
781 extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
782 __attribute__((alias ("gomp_loop_ull_static_start")));
783 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
784 __attribute__((alias ("gomp_loop_ull_dynamic_start")));
785 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
786 __attribute__((alias ("gomp_loop_ull_guided_start")));
787 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
788 __attribute__((alias ("gomp_loop_ull_dynamic_start")));
789 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
790 __attribute__((alias ("gomp_loop_ull_guided_start")));
791 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
792 __attribute__((alias ("GOMP_loop_ull_runtime_start")));
793 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
794 __attribute__((alias ("GOMP_loop_ull_runtime_start")));
795
796 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
797 __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
798 extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
799 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
800 extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
801 __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
802
803 extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
804 __attribute__((alias ("gomp_loop_ull_doacross_static_start")));
805 extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
806 __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
807 extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
808 __attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
809
810 extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
811 __attribute__((alias ("gomp_loop_ull_static_next")));
812 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
813 __attribute__((alias ("gomp_loop_ull_dynamic_next")));
814 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
815 __attribute__((alias ("gomp_loop_ull_guided_next")));
816 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
817 __attribute__((alias ("gomp_loop_ull_dynamic_next")));
818 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
819 __attribute__((alias ("gomp_loop_ull_guided_next")));
820 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
821 __attribute__((alias ("GOMP_loop_ull_runtime_next")));
822 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
823 __attribute__((alias ("GOMP_loop_ull_runtime_next")));
824
825 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
826 __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
827 extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
828 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
829 extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
830 __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
831 #else
832 bool
GOMP_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)833 GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
834 gomp_ull incr, gomp_ull chunk_size,
835 gomp_ull *istart, gomp_ull *iend)
836 {
837 return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
838 iend);
839 }
840
841 bool
GOMP_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)842 GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
843 gomp_ull incr, gomp_ull chunk_size,
844 gomp_ull *istart, gomp_ull *iend)
845 {
846 return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
847 iend);
848 }
849
850 bool
GOMP_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)851 GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
852 gomp_ull incr, gomp_ull chunk_size,
853 gomp_ull *istart, gomp_ull *iend)
854 {
855 return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
856 iend);
857 }
858
859 bool
GOMP_loop_ull_nonmonotonic_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)860 GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
861 gomp_ull end, gomp_ull incr,
862 gomp_ull chunk_size,
863 gomp_ull *istart, gomp_ull *iend)
864 {
865 return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
866 iend);
867 }
868
869 bool
GOMP_loop_ull_nonmonotonic_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)870 GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
871 gomp_ull incr, gomp_ull chunk_size,
872 gomp_ull *istart, gomp_ull *iend)
873 {
874 return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
875 iend);
876 }
877
878 bool
GOMP_loop_ull_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)879 GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
880 gomp_ull end, gomp_ull incr,
881 gomp_ull *istart, gomp_ull *iend)
882 {
883 return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
884 }
885
886 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)887 GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
888 gomp_ull end, gomp_ull incr,
889 gomp_ull *istart,
890 gomp_ull *iend)
891 {
892 return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
893 }
894
895 bool
GOMP_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)896 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
897 gomp_ull incr, gomp_ull chunk_size,
898 gomp_ull *istart, gomp_ull *iend)
899 {
900 return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
901 istart, iend);
902 }
903
904 bool
GOMP_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)905 GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
906 gomp_ull incr, gomp_ull chunk_size,
907 gomp_ull *istart, gomp_ull *iend)
908 {
909 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
910 istart, iend);
911 }
912
913 bool
GOMP_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)914 GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
915 gomp_ull incr, gomp_ull chunk_size,
916 gomp_ull *istart, gomp_ull *iend)
917 {
918 return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
919 istart, iend);
920 }
921
922 bool
GOMP_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)923 GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
924 gomp_ull chunk_size, gomp_ull *istart,
925 gomp_ull *iend)
926 {
927 return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
928 istart, iend);
929 }
930
931 bool
GOMP_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)932 GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
933 gomp_ull chunk_size, gomp_ull *istart,
934 gomp_ull *iend)
935 {
936 return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
937 istart, iend);
938 }
939
940 bool
GOMP_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)941 GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
942 gomp_ull chunk_size, gomp_ull *istart,
943 gomp_ull *iend)
944 {
945 return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
946 istart, iend);
947 }
948
949 bool
GOMP_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)950 GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
951 {
952 return gomp_loop_ull_static_next (istart, iend);
953 }
954
955 bool
GOMP_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)956 GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
957 {
958 return gomp_loop_ull_dynamic_next (istart, iend);
959 }
960
961 bool
GOMP_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)962 GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
963 {
964 return gomp_loop_ull_guided_next (istart, iend);
965 }
966
967 bool
GOMP_loop_ull_nonmonotonic_dynamic_next(gomp_ull * istart,gomp_ull * iend)968 GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
969 {
970 return gomp_loop_ull_dynamic_next (istart, iend);
971 }
972
973 bool
GOMP_loop_ull_nonmonotonic_guided_next(gomp_ull * istart,gomp_ull * iend)974 GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
975 {
976 return gomp_loop_ull_guided_next (istart, iend);
977 }
978
979 bool
GOMP_loop_ull_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)980 GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
981 {
982 return GOMP_loop_ull_runtime_next (istart, iend);
983 }
984
985 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)986 GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
987 gomp_ull *iend)
988 {
989 return GOMP_loop_ull_runtime_next (istart, iend);
990 }
991
992 bool
GOMP_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)993 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
994 {
995 return gomp_loop_ull_ordered_static_next (istart, iend);
996 }
997
998 bool
GOMP_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)999 GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
1000 {
1001 return gomp_loop_ull_ordered_dynamic_next (istart, iend);
1002 }
1003
1004 bool
GOMP_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)1005 GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
1006 {
1007 return gomp_loop_ull_ordered_guided_next (istart, iend);
1008 }
1009 #endif
1010