1 /* Copyright (C) 2005-2020 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the LOOP (FOR/DO) construct. */
27
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libgomp.h"
32
33 ialias (GOMP_loop_ull_runtime_next)
34 ialias_redirect (GOMP_taskgroup_reduction_register)
35
36 typedef unsigned long long gomp_ull;
37
38 /* Initialize the given work share construct from the given arguments. */
39
40 static inline void
gomp_loop_ull_init(struct gomp_work_share * ws,bool up,gomp_ull start,gomp_ull end,gomp_ull incr,enum gomp_schedule_type sched,gomp_ull chunk_size)41 gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
42 gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
43 gomp_ull chunk_size)
44 {
45 ws->sched = sched;
46 ws->chunk_size_ull = chunk_size;
47 /* Canonicalize loops that have zero iterations to ->next == ->end. */
48 ws->end_ull = ((up && start > end) || (!up && start < end))
49 ? start : end;
50 ws->incr_ull = incr;
51 ws->next_ull = start;
52 ws->mode = 0;
53 if (sched == GFS_DYNAMIC)
54 {
55 ws->chunk_size_ull *= incr;
56
57 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
58 {
59 /* For dynamic scheduling prepare things to make each iteration
60 faster. */
61 struct gomp_thread *thr = gomp_thread ();
62 struct gomp_team *team = thr->ts.team;
63 long nthreads = team ? team->nthreads : 1;
64
65 if (__builtin_expect (up, 1))
66 {
67 /* Cheap overflow protection. */
68 if (__builtin_expect ((nthreads | ws->chunk_size_ull)
69 < 1ULL << (sizeof (gomp_ull)
70 * __CHAR_BIT__ / 2 - 1), 1))
71 ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
72 - (nthreads + 1) * ws->chunk_size_ull);
73 }
74 /* Cheap overflow protection. */
75 else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
76 < 1ULL << (sizeof (gomp_ull)
77 * __CHAR_BIT__ / 2 - 1), 1))
78 ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
79 - (__LONG_LONG_MAX__ * 2ULL + 1));
80 }
81 #endif
82 }
83 if (!up)
84 ws->mode |= 2;
85 }
86
87 /* The *_start routines are called when first encountering a loop construct
88 that is not bound directly to a parallel construct. The first thread
89 that arrives will create the work-share construct; subsequent threads
90 will see the construct exists and allocate work from it.
91
92 START, END, INCR are the bounds of the loop; due to the restrictions of
93 OpenMP, these values must be the same in every thread. This is not
94 verified (nor is it entirely verifiable, since START is not necessarily
95 retained intact in the work-share data structure). CHUNK_SIZE is the
96 scheduling parameter; again this must be identical in all threads.
97
98 Returns true if there's any work for this thread to perform. If so,
99 *ISTART and *IEND are filled with the bounds of the iteration block
100 allocated to this thread. Returns false if all work was assigned to
101 other threads prior to this thread's arrival. */
102
103 static bool
gomp_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)104 gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
105 gomp_ull incr, gomp_ull chunk_size,
106 gomp_ull *istart, gomp_ull *iend)
107 {
108 struct gomp_thread *thr = gomp_thread ();
109
110 thr->ts.static_trip = 0;
111 if (gomp_work_share_start (0))
112 {
113 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
114 GFS_STATIC, chunk_size);
115 gomp_work_share_init_done ();
116 }
117
118 return !gomp_iter_ull_static_next (istart, iend);
119 }
120
121 static bool
gomp_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)122 gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
123 gomp_ull incr, gomp_ull chunk_size,
124 gomp_ull *istart, gomp_ull *iend)
125 {
126 struct gomp_thread *thr = gomp_thread ();
127 bool ret;
128
129 if (gomp_work_share_start (0))
130 {
131 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
132 GFS_DYNAMIC, chunk_size);
133 gomp_work_share_init_done ();
134 }
135
136 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
137 ret = gomp_iter_ull_dynamic_next (istart, iend);
138 #else
139 gomp_mutex_lock (&thr->ts.work_share->lock);
140 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
141 gomp_mutex_unlock (&thr->ts.work_share->lock);
142 #endif
143
144 return ret;
145 }
146
147 static bool
gomp_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)148 gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
149 gomp_ull incr, gomp_ull chunk_size,
150 gomp_ull *istart, gomp_ull *iend)
151 {
152 struct gomp_thread *thr = gomp_thread ();
153 bool ret;
154
155 if (gomp_work_share_start (0))
156 {
157 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
158 GFS_GUIDED, chunk_size);
159 gomp_work_share_init_done ();
160 }
161
162 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
163 ret = gomp_iter_ull_guided_next (istart, iend);
164 #else
165 gomp_mutex_lock (&thr->ts.work_share->lock);
166 ret = gomp_iter_ull_guided_next_locked (istart, iend);
167 gomp_mutex_unlock (&thr->ts.work_share->lock);
168 #endif
169
170 return ret;
171 }
172
173 bool
GOMP_loop_ull_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)174 GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
175 gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
176 {
177 struct gomp_task_icv *icv = gomp_icv (false);
178 switch (icv->run_sched_var & ~GFS_MONOTONIC)
179 {
180 case GFS_STATIC:
181 return gomp_loop_ull_static_start (up, start, end, incr,
182 icv->run_sched_chunk_size,
183 istart, iend);
184 case GFS_DYNAMIC:
185 return gomp_loop_ull_dynamic_start (up, start, end, incr,
186 icv->run_sched_chunk_size,
187 istart, iend);
188 case GFS_GUIDED:
189 return gomp_loop_ull_guided_start (up, start, end, incr,
190 icv->run_sched_chunk_size,
191 istart, iend);
192 case GFS_AUTO:
193 /* For now map to schedule(static), later on we could play with feedback
194 driven choice. */
195 return gomp_loop_ull_static_start (up, start, end, incr,
196 0, istart, iend);
197 default:
198 abort ();
199 }
200 }
201
202 static long
gomp_adjust_sched(long sched,gomp_ull * chunk_size)203 gomp_adjust_sched (long sched, gomp_ull *chunk_size)
204 {
205 sched &= ~GFS_MONOTONIC;
206 switch (sched)
207 {
208 case GFS_STATIC:
209 case GFS_DYNAMIC:
210 case GFS_GUIDED:
211 return sched;
212 /* GFS_RUNTIME is used for runtime schedule without monotonic
213 or nonmonotonic modifiers on the clause.
214 GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
215 modifier. */
216 case GFS_RUNTIME:
217 /* GFS_AUTO is used for runtime schedule with nonmonotonic
218 modifier. */
219 case GFS_AUTO:
220 {
221 struct gomp_task_icv *icv = gomp_icv (false);
222 sched = icv->run_sched_var & ~GFS_MONOTONIC;
223 switch (sched)
224 {
225 case GFS_STATIC:
226 case GFS_DYNAMIC:
227 case GFS_GUIDED:
228 *chunk_size = icv->run_sched_chunk_size;
229 break;
230 case GFS_AUTO:
231 sched = GFS_STATIC;
232 *chunk_size = 0;
233 break;
234 default:
235 abort ();
236 }
237 return sched;
238 }
239 default:
240 abort ();
241 }
242 }
243
244 bool
GOMP_loop_ull_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)245 GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
246 gomp_ull incr, long sched, gomp_ull chunk_size,
247 gomp_ull *istart, gomp_ull *iend,
248 uintptr_t *reductions, void **mem)
249 {
250 struct gomp_thread *thr = gomp_thread ();
251
252 thr->ts.static_trip = 0;
253 if (reductions)
254 gomp_workshare_taskgroup_start ();
255 if (gomp_work_share_start (0))
256 {
257 sched = gomp_adjust_sched (sched, &chunk_size);
258 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
259 sched, chunk_size);
260 if (reductions)
261 {
262 GOMP_taskgroup_reduction_register (reductions);
263 thr->task->taskgroup->workshare = true;
264 thr->ts.work_share->task_reductions = reductions;
265 }
266 if (mem)
267 {
268 uintptr_t size = (uintptr_t) *mem;
269 #define INLINE_ORDERED_TEAM_IDS_OFF \
270 ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \
271 + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
272 if (size > (sizeof (struct gomp_work_share)
273 - INLINE_ORDERED_TEAM_IDS_OFF))
274 *mem
275 = (void *) (thr->ts.work_share->ordered_team_ids
276 = gomp_malloc_cleared (size));
277 else
278 *mem = memset (((char *) thr->ts.work_share)
279 + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
280 }
281 gomp_work_share_init_done ();
282 }
283 else
284 {
285 if (reductions)
286 {
287 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
288 gomp_workshare_task_reduction_register (reductions,
289 first_reductions);
290 }
291 if (mem)
292 {
293 if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
294 & (__alignof__ (long long) - 1)) == 0)
295 *mem = (void *) thr->ts.work_share->ordered_team_ids;
296 else
297 {
298 uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
299 p += __alignof__ (long long) - 1;
300 p &= ~(__alignof__ (long long) - 1);
301 *mem = (void *) p;
302 }
303 }
304 }
305
306 return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
307 }
308
309 /* The *_ordered_*_start routines are similar. The only difference is that
310 this work-share construct is initialized to expect an ORDERED section. */
311
312 static bool
gomp_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)313 gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
314 gomp_ull incr, gomp_ull chunk_size,
315 gomp_ull *istart, gomp_ull *iend)
316 {
317 struct gomp_thread *thr = gomp_thread ();
318
319 thr->ts.static_trip = 0;
320 if (gomp_work_share_start (1))
321 {
322 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
323 GFS_STATIC, chunk_size);
324 gomp_ordered_static_init ();
325 gomp_work_share_init_done ();
326 }
327
328 return !gomp_iter_ull_static_next (istart, iend);
329 }
330
331 static bool
gomp_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)332 gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
333 gomp_ull incr, gomp_ull chunk_size,
334 gomp_ull *istart, gomp_ull *iend)
335 {
336 struct gomp_thread *thr = gomp_thread ();
337 bool ret;
338
339 if (gomp_work_share_start (1))
340 {
341 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
342 GFS_DYNAMIC, chunk_size);
343 gomp_mutex_lock (&thr->ts.work_share->lock);
344 gomp_work_share_init_done ();
345 }
346 else
347 gomp_mutex_lock (&thr->ts.work_share->lock);
348
349 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
350 if (ret)
351 gomp_ordered_first ();
352 gomp_mutex_unlock (&thr->ts.work_share->lock);
353
354 return ret;
355 }
356
357 static bool
gomp_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)358 gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
359 gomp_ull incr, gomp_ull chunk_size,
360 gomp_ull *istart, gomp_ull *iend)
361 {
362 struct gomp_thread *thr = gomp_thread ();
363 bool ret;
364
365 if (gomp_work_share_start (1))
366 {
367 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
368 GFS_GUIDED, chunk_size);
369 gomp_mutex_lock (&thr->ts.work_share->lock);
370 gomp_work_share_init_done ();
371 }
372 else
373 gomp_mutex_lock (&thr->ts.work_share->lock);
374
375 ret = gomp_iter_ull_guided_next_locked (istart, iend);
376 if (ret)
377 gomp_ordered_first ();
378 gomp_mutex_unlock (&thr->ts.work_share->lock);
379
380 return ret;
381 }
382
383 bool
GOMP_loop_ull_ordered_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)384 GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
385 gomp_ull incr, gomp_ull *istart,
386 gomp_ull *iend)
387 {
388 struct gomp_task_icv *icv = gomp_icv (false);
389 switch (icv->run_sched_var & ~GFS_MONOTONIC)
390 {
391 case GFS_STATIC:
392 return gomp_loop_ull_ordered_static_start (up, start, end, incr,
393 icv->run_sched_chunk_size,
394 istart, iend);
395 case GFS_DYNAMIC:
396 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
397 icv->run_sched_chunk_size,
398 istart, iend);
399 case GFS_GUIDED:
400 return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
401 icv->run_sched_chunk_size,
402 istart, iend);
403 case GFS_AUTO:
404 /* For now map to schedule(static), later on we could play with feedback
405 driven choice. */
406 return gomp_loop_ull_ordered_static_start (up, start, end, incr,
407 0, istart, iend);
408 default:
409 abort ();
410 }
411 }
412
413 bool
GOMP_loop_ull_ordered_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)414 GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
415 gomp_ull incr, long sched, gomp_ull chunk_size,
416 gomp_ull *istart, gomp_ull *iend,
417 uintptr_t *reductions, void **mem)
418 {
419 struct gomp_thread *thr = gomp_thread ();
420 size_t ordered = 1;
421 bool ret;
422
423 thr->ts.static_trip = 0;
424 if (reductions)
425 gomp_workshare_taskgroup_start ();
426 if (mem)
427 ordered += (uintptr_t) *mem;
428 if (gomp_work_share_start (ordered))
429 {
430 sched = gomp_adjust_sched (sched, &chunk_size);
431 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
432 sched, chunk_size);
433 if (reductions)
434 {
435 GOMP_taskgroup_reduction_register (reductions);
436 thr->task->taskgroup->workshare = true;
437 thr->ts.work_share->task_reductions = reductions;
438 }
439 if (sched == GFS_STATIC)
440 gomp_ordered_static_init ();
441 else
442 gomp_mutex_lock (&thr->ts.work_share->lock);
443 gomp_work_share_init_done ();
444 }
445 else
446 {
447 if (reductions)
448 {
449 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
450 gomp_workshare_task_reduction_register (reductions,
451 first_reductions);
452 }
453 sched = thr->ts.work_share->sched;
454 if (sched != GFS_STATIC)
455 gomp_mutex_lock (&thr->ts.work_share->lock);
456 }
457
458 if (mem)
459 {
460 uintptr_t p
461 = (uintptr_t) (thr->ts.work_share->ordered_team_ids
462 + (thr->ts.team ? thr->ts.team->nthreads : 1));
463 p += __alignof__ (long long) - 1;
464 p &= ~(__alignof__ (long long) - 1);
465 *mem = (void *) p;
466 }
467
468 switch (sched)
469 {
470 case GFS_STATIC:
471 case GFS_AUTO:
472 return !gomp_iter_ull_static_next (istart, iend);
473 case GFS_DYNAMIC:
474 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
475 break;
476 case GFS_GUIDED:
477 ret = gomp_iter_ull_guided_next_locked (istart, iend);
478 break;
479 default:
480 abort ();
481 }
482
483 if (ret)
484 gomp_ordered_first ();
485 gomp_mutex_unlock (&thr->ts.work_share->lock);
486 return ret;
487 }
488
489 /* The *_doacross_*_start routines are similar. The only difference is that
490 this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
491 section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
492 and other COUNTS array elements tell the library number of iterations
493 in the ordered inner loops. */
494
495 static bool
gomp_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)496 gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
497 gomp_ull chunk_size, gomp_ull *istart,
498 gomp_ull *iend)
499 {
500 struct gomp_thread *thr = gomp_thread ();
501
502 thr->ts.static_trip = 0;
503 if (gomp_work_share_start (0))
504 {
505 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
506 GFS_STATIC, chunk_size);
507 gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
508 gomp_work_share_init_done ();
509 }
510
511 return !gomp_iter_ull_static_next (istart, iend);
512 }
513
514 static bool
gomp_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)515 gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
516 gomp_ull chunk_size, gomp_ull *istart,
517 gomp_ull *iend)
518 {
519 struct gomp_thread *thr = gomp_thread ();
520 bool ret;
521
522 if (gomp_work_share_start (0))
523 {
524 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
525 GFS_DYNAMIC, chunk_size);
526 gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
527 gomp_work_share_init_done ();
528 }
529
530 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
531 ret = gomp_iter_ull_dynamic_next (istart, iend);
532 #else
533 gomp_mutex_lock (&thr->ts.work_share->lock);
534 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
535 gomp_mutex_unlock (&thr->ts.work_share->lock);
536 #endif
537
538 return ret;
539 }
540
541 static bool
gomp_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)542 gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
543 gomp_ull chunk_size, gomp_ull *istart,
544 gomp_ull *iend)
545 {
546 struct gomp_thread *thr = gomp_thread ();
547 bool ret;
548
549 if (gomp_work_share_start (0))
550 {
551 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
552 GFS_GUIDED, chunk_size);
553 gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
554 gomp_work_share_init_done ();
555 }
556
557 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
558 ret = gomp_iter_ull_guided_next (istart, iend);
559 #else
560 gomp_mutex_lock (&thr->ts.work_share->lock);
561 ret = gomp_iter_ull_guided_next_locked (istart, iend);
562 gomp_mutex_unlock (&thr->ts.work_share->lock);
563 #endif
564
565 return ret;
566 }
567
568 bool
GOMP_loop_ull_doacross_runtime_start(unsigned ncounts,gomp_ull * counts,gomp_ull * istart,gomp_ull * iend)569 GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
570 gomp_ull *istart, gomp_ull *iend)
571 {
572 struct gomp_task_icv *icv = gomp_icv (false);
573 switch (icv->run_sched_var & ~GFS_MONOTONIC)
574 {
575 case GFS_STATIC:
576 return gomp_loop_ull_doacross_static_start (ncounts, counts,
577 icv->run_sched_chunk_size,
578 istart, iend);
579 case GFS_DYNAMIC:
580 return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
581 icv->run_sched_chunk_size,
582 istart, iend);
583 case GFS_GUIDED:
584 return gomp_loop_ull_doacross_guided_start (ncounts, counts,
585 icv->run_sched_chunk_size,
586 istart, iend);
587 case GFS_AUTO:
588 /* For now map to schedule(static), later on we could play with feedback
589 driven choice. */
590 return gomp_loop_ull_doacross_static_start (ncounts, counts,
591 0, istart, iend);
592 default:
593 abort ();
594 }
595 }
596
597 bool
GOMP_loop_ull_doacross_start(unsigned ncounts,gomp_ull * counts,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)598 GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
599 long sched, gomp_ull chunk_size,
600 gomp_ull *istart, gomp_ull *iend,
601 uintptr_t *reductions, void **mem)
602 {
603 struct gomp_thread *thr = gomp_thread ();
604
605 thr->ts.static_trip = 0;
606 if (reductions)
607 gomp_workshare_taskgroup_start ();
608 if (gomp_work_share_start (0))
609 {
610 size_t extra = 0;
611 if (mem)
612 extra = (uintptr_t) *mem;
613 sched = gomp_adjust_sched (sched, &chunk_size);
614 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
615 sched, chunk_size);
616 gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
617 if (reductions)
618 {
619 GOMP_taskgroup_reduction_register (reductions);
620 thr->task->taskgroup->workshare = true;
621 thr->ts.work_share->task_reductions = reductions;
622 }
623 gomp_work_share_init_done ();
624 }
625 else
626 {
627 if (reductions)
628 {
629 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
630 gomp_workshare_task_reduction_register (reductions,
631 first_reductions);
632 }
633 sched = thr->ts.work_share->sched;
634 }
635
636 if (mem)
637 *mem = thr->ts.work_share->doacross->extra;
638
639 return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
640 }
641
642 /* The *_next routines are called when the thread completes processing of
643 the iteration block currently assigned to it. If the work-share
644 construct is bound directly to a parallel construct, then the iteration
645 bounds may have been set up before the parallel. In which case, this
646 may be the first iteration for the thread.
647
648 Returns true if there is work remaining to be performed; *ISTART and
649 *IEND are filled with a new iteration block. Returns false if all work
650 has been assigned. */
651
652 static bool
gomp_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)653 gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
654 {
655 return !gomp_iter_ull_static_next (istart, iend);
656 }
657
658 static bool
gomp_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)659 gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
660 {
661 bool ret;
662
663 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
664 ret = gomp_iter_ull_dynamic_next (istart, iend);
665 #else
666 struct gomp_thread *thr = gomp_thread ();
667 gomp_mutex_lock (&thr->ts.work_share->lock);
668 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
669 gomp_mutex_unlock (&thr->ts.work_share->lock);
670 #endif
671
672 return ret;
673 }
674
675 static bool
gomp_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)676 gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
677 {
678 bool ret;
679
680 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
681 ret = gomp_iter_ull_guided_next (istart, iend);
682 #else
683 struct gomp_thread *thr = gomp_thread ();
684 gomp_mutex_lock (&thr->ts.work_share->lock);
685 ret = gomp_iter_ull_guided_next_locked (istart, iend);
686 gomp_mutex_unlock (&thr->ts.work_share->lock);
687 #endif
688
689 return ret;
690 }
691
692 bool
GOMP_loop_ull_runtime_next(gomp_ull * istart,gomp_ull * iend)693 GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
694 {
695 struct gomp_thread *thr = gomp_thread ();
696
697 switch (thr->ts.work_share->sched)
698 {
699 case GFS_STATIC:
700 case GFS_AUTO:
701 return gomp_loop_ull_static_next (istart, iend);
702 case GFS_DYNAMIC:
703 return gomp_loop_ull_dynamic_next (istart, iend);
704 case GFS_GUIDED:
705 return gomp_loop_ull_guided_next (istart, iend);
706 default:
707 abort ();
708 }
709 }
710
711 /* The *_ordered_*_next routines are called when the thread completes
712 processing of the iteration block currently assigned to it.
713
714 Returns true if there is work remaining to be performed; *ISTART and
715 *IEND are filled with a new iteration block. Returns false if all work
716 has been assigned. */
717
718 static bool
gomp_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)719 gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
720 {
721 struct gomp_thread *thr = gomp_thread ();
722 int test;
723
724 gomp_ordered_sync ();
725 gomp_mutex_lock (&thr->ts.work_share->lock);
726 test = gomp_iter_ull_static_next (istart, iend);
727 if (test >= 0)
728 gomp_ordered_static_next ();
729 gomp_mutex_unlock (&thr->ts.work_share->lock);
730
731 return test == 0;
732 }
733
734 static bool
gomp_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)735 gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
736 {
737 struct gomp_thread *thr = gomp_thread ();
738 bool ret;
739
740 gomp_ordered_sync ();
741 gomp_mutex_lock (&thr->ts.work_share->lock);
742 ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
743 if (ret)
744 gomp_ordered_next ();
745 else
746 gomp_ordered_last ();
747 gomp_mutex_unlock (&thr->ts.work_share->lock);
748
749 return ret;
750 }
751
752 static bool
gomp_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)753 gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
754 {
755 struct gomp_thread *thr = gomp_thread ();
756 bool ret;
757
758 gomp_ordered_sync ();
759 gomp_mutex_lock (&thr->ts.work_share->lock);
760 ret = gomp_iter_ull_guided_next_locked (istart, iend);
761 if (ret)
762 gomp_ordered_next ();
763 else
764 gomp_ordered_last ();
765 gomp_mutex_unlock (&thr->ts.work_share->lock);
766
767 return ret;
768 }
769
770 bool
GOMP_loop_ull_ordered_runtime_next(gomp_ull * istart,gomp_ull * iend)771 GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
772 {
773 struct gomp_thread *thr = gomp_thread ();
774
775 switch (thr->ts.work_share->sched)
776 {
777 case GFS_STATIC:
778 case GFS_AUTO:
779 return gomp_loop_ull_ordered_static_next (istart, iend);
780 case GFS_DYNAMIC:
781 return gomp_loop_ull_ordered_dynamic_next (istart, iend);
782 case GFS_GUIDED:
783 return gomp_loop_ull_ordered_guided_next (istart, iend);
784 default:
785 abort ();
786 }
787 }
788
789 /* We use static functions above so that we're sure that the "runtime"
790 function can defer to the proper routine without interposition. We
791 export the static function with a strong alias when possible, or with
792 a wrapper function otherwise. */
793
794 #ifdef HAVE_ATTRIBUTE_ALIAS
795 extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
796 __attribute__((alias ("gomp_loop_ull_static_start")));
797 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
798 __attribute__((alias ("gomp_loop_ull_dynamic_start")));
799 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
800 __attribute__((alias ("gomp_loop_ull_guided_start")));
801 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
802 __attribute__((alias ("gomp_loop_ull_dynamic_start")));
803 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
804 __attribute__((alias ("gomp_loop_ull_guided_start")));
805 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
806 __attribute__((alias ("GOMP_loop_ull_runtime_start")));
807 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
808 __attribute__((alias ("GOMP_loop_ull_runtime_start")));
809
810 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
811 __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
812 extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
813 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
814 extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
815 __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
816
817 extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
818 __attribute__((alias ("gomp_loop_ull_doacross_static_start")));
819 extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
820 __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
821 extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
822 __attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
823
824 extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
825 __attribute__((alias ("gomp_loop_ull_static_next")));
826 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
827 __attribute__((alias ("gomp_loop_ull_dynamic_next")));
828 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
829 __attribute__((alias ("gomp_loop_ull_guided_next")));
830 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
831 __attribute__((alias ("gomp_loop_ull_dynamic_next")));
832 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
833 __attribute__((alias ("gomp_loop_ull_guided_next")));
834 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
835 __attribute__((alias ("GOMP_loop_ull_runtime_next")));
836 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
837 __attribute__((alias ("GOMP_loop_ull_runtime_next")));
838
839 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
840 __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
841 extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
842 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
843 extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
844 __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
845 #else
846 bool
GOMP_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)847 GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
848 gomp_ull incr, gomp_ull chunk_size,
849 gomp_ull *istart, gomp_ull *iend)
850 {
851 return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
852 iend);
853 }
854
855 bool
GOMP_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)856 GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
857 gomp_ull incr, gomp_ull chunk_size,
858 gomp_ull *istart, gomp_ull *iend)
859 {
860 return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
861 iend);
862 }
863
864 bool
GOMP_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)865 GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
866 gomp_ull incr, gomp_ull chunk_size,
867 gomp_ull *istart, gomp_ull *iend)
868 {
869 return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
870 iend);
871 }
872
873 bool
GOMP_loop_ull_nonmonotonic_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)874 GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
875 gomp_ull end, gomp_ull incr,
876 gomp_ull chunk_size,
877 gomp_ull *istart, gomp_ull *iend)
878 {
879 return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
880 iend);
881 }
882
883 bool
GOMP_loop_ull_nonmonotonic_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)884 GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
885 gomp_ull incr, gomp_ull chunk_size,
886 gomp_ull *istart, gomp_ull *iend)
887 {
888 return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
889 iend);
890 }
891
892 bool
GOMP_loop_ull_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)893 GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
894 gomp_ull end, gomp_ull incr,
895 gomp_ull *istart, gomp_ull *iend)
896 {
897 return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
898 }
899
900 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)901 GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
902 gomp_ull end, gomp_ull incr,
903 gomp_ull *istart,
904 gomp_ull *iend)
905 {
906 return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
907 }
908
909 bool
GOMP_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)910 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
911 gomp_ull incr, gomp_ull chunk_size,
912 gomp_ull *istart, gomp_ull *iend)
913 {
914 return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
915 istart, iend);
916 }
917
918 bool
GOMP_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)919 GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
920 gomp_ull incr, gomp_ull chunk_size,
921 gomp_ull *istart, gomp_ull *iend)
922 {
923 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
924 istart, iend);
925 }
926
927 bool
GOMP_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)928 GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
929 gomp_ull incr, gomp_ull chunk_size,
930 gomp_ull *istart, gomp_ull *iend)
931 {
932 return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
933 istart, iend);
934 }
935
936 bool
GOMP_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)937 GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
938 gomp_ull chunk_size, gomp_ull *istart,
939 gomp_ull *iend)
940 {
941 return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
942 istart, iend);
943 }
944
945 bool
GOMP_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)946 GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
947 gomp_ull chunk_size, gomp_ull *istart,
948 gomp_ull *iend)
949 {
950 return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
951 istart, iend);
952 }
953
954 bool
GOMP_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)955 GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
956 gomp_ull chunk_size, gomp_ull *istart,
957 gomp_ull *iend)
958 {
959 return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
960 istart, iend);
961 }
962
963 bool
GOMP_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)964 GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
965 {
966 return gomp_loop_ull_static_next (istart, iend);
967 }
968
969 bool
GOMP_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)970 GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
971 {
972 return gomp_loop_ull_dynamic_next (istart, iend);
973 }
974
975 bool
GOMP_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)976 GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
977 {
978 return gomp_loop_ull_guided_next (istart, iend);
979 }
980
981 bool
GOMP_loop_ull_nonmonotonic_dynamic_next(gomp_ull * istart,gomp_ull * iend)982 GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
983 {
984 return gomp_loop_ull_dynamic_next (istart, iend);
985 }
986
987 bool
GOMP_loop_ull_nonmonotonic_guided_next(gomp_ull * istart,gomp_ull * iend)988 GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
989 {
990 return gomp_loop_ull_guided_next (istart, iend);
991 }
992
993 bool
GOMP_loop_ull_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)994 GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
995 {
996 return GOMP_loop_ull_runtime_next (istart, iend);
997 }
998
999 bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)1000 GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
1001 gomp_ull *iend)
1002 {
1003 return GOMP_loop_ull_runtime_next (istart, iend);
1004 }
1005
1006 bool
GOMP_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)1007 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
1008 {
1009 return gomp_loop_ull_ordered_static_next (istart, iend);
1010 }
1011
1012 bool
GOMP_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)1013 GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
1014 {
1015 return gomp_loop_ull_ordered_dynamic_next (istart, iend);
1016 }
1017
1018 bool
GOMP_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)1019 GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
1020 {
1021 return gomp_loop_ull_ordered_guided_next (istart, iend);
1022 }
1023 #endif
1024