1 /* Copyright (C) 2015-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the taskloop construct. It is included twice, once
27 for the long and once for unsigned long long variant. */
28
29 /* Called when encountering an explicit task directive. If IF_CLAUSE is
30 false, then we must not delay in executing the task. If UNTIED is true,
31 then the task may be executed by any member of the team. */
32
33 void
GOMP_taskloop(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,unsigned flags,unsigned long num_tasks,int priority,TYPE start,TYPE end,TYPE step)34 GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
35 long arg_size, long arg_align, unsigned flags,
36 unsigned long num_tasks, int priority,
37 TYPE start, TYPE end, TYPE step)
38 {
39 struct gomp_thread *thr = gomp_thread ();
40 struct gomp_team *team = thr->ts.team;
41
42 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
43 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
44 tied to one thread all the time. This means UNTIED tasks must be
45 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
46 might be running on different thread than FN. */
47 if (cpyfn)
48 flags &= ~GOMP_TASK_FLAG_IF;
49 flags &= ~GOMP_TASK_FLAG_UNTIED;
50 #endif
51
52 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
53 if (team && gomp_team_barrier_cancelled (&team->barrier))
54 {
55 early_return:
56 if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
57 == GOMP_TASK_FLAG_REDUCTION)
58 {
59 struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
60 uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
61 /* Tell callers GOMP_taskgroup_reduction_register has not been
62 called. */
63 ptr[2] = 0;
64 }
65 return;
66 }
67
68 #ifdef TYPE_is_long
69 TYPE s = step;
70 if (step > 0)
71 {
72 if (start >= end)
73 goto early_return;
74 s--;
75 }
76 else
77 {
78 if (start <= end)
79 goto early_return;
80 s++;
81 }
82 UTYPE n = (end - start + s) / step;
83 #else
84 UTYPE n;
85 if (flags & GOMP_TASK_FLAG_UP)
86 {
87 if (start >= end)
88 goto early_return;
89 n = (end - start + step - 1) / step;
90 }
91 else
92 {
93 if (start <= end)
94 goto early_return;
95 n = (start - end - step - 1) / -step;
96 }
97 #endif
98
99 TYPE task_step = step;
100 unsigned long nfirst = n;
101 if (flags & GOMP_TASK_FLAG_GRAINSIZE)
102 {
103 unsigned long grainsize = num_tasks;
104 #ifdef TYPE_is_long
105 num_tasks = n / grainsize;
106 #else
107 UTYPE ndiv = n / grainsize;
108 num_tasks = ndiv;
109 if (num_tasks != ndiv)
110 num_tasks = ~0UL;
111 #endif
112 if (num_tasks <= 1)
113 {
114 num_tasks = 1;
115 task_step = end - start;
116 }
117 else if (num_tasks >= grainsize
118 #ifndef TYPE_is_long
119 && num_tasks != ~0UL
120 #endif
121 )
122 {
123 UTYPE mul = num_tasks * grainsize;
124 task_step = (TYPE) grainsize * step;
125 if (mul != n)
126 {
127 task_step += step;
128 nfirst = n - mul - 1;
129 }
130 }
131 else
132 {
133 UTYPE div = n / num_tasks;
134 UTYPE mod = n % num_tasks;
135 task_step = (TYPE) div * step;
136 if (mod)
137 {
138 task_step += step;
139 nfirst = mod - 1;
140 }
141 }
142 }
143 else
144 {
145 if (num_tasks == 0)
146 num_tasks = team ? team->nthreads : 1;
147 if (num_tasks >= n)
148 num_tasks = n;
149 else
150 {
151 UTYPE div = n / num_tasks;
152 UTYPE mod = n % num_tasks;
153 task_step = (TYPE) div * step;
154 if (mod)
155 {
156 task_step += step;
157 nfirst = mod - 1;
158 }
159 }
160 }
161
162 if (flags & GOMP_TASK_FLAG_NOGROUP)
163 {
164 if (__builtin_expect (gomp_cancel_var, 0)
165 && thr->task
166 && thr->task->taskgroup)
167 {
168 if (thr->task->taskgroup->cancelled)
169 return;
170 if (thr->task->taskgroup->workshare
171 && thr->task->taskgroup->prev
172 && thr->task->taskgroup->prev->cancelled)
173 return;
174 }
175 }
176 else
177 {
178 ialias_call (GOMP_taskgroup_start) ();
179 if (flags & GOMP_TASK_FLAG_REDUCTION)
180 {
181 struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
182 uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
183 ialias_call (GOMP_taskgroup_reduction_register) (ptr);
184 }
185 }
186
187 if (priority > gomp_max_task_priority_var)
188 priority = gomp_max_task_priority_var;
189
190 if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
191 || (thr->task && thr->task->final_task)
192 || team->task_count + num_tasks > 64 * team->nthreads)
193 {
194 unsigned long i;
195 if (__builtin_expect (cpyfn != NULL, 0))
196 {
197 struct gomp_task task[num_tasks];
198 struct gomp_task *parent = thr->task;
199 arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
200 char buf[num_tasks * arg_size + arg_align - 1];
201 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
202 & ~(uintptr_t) (arg_align - 1));
203 char *orig_arg = arg;
204 for (i = 0; i < num_tasks; i++)
205 {
206 gomp_init_task (&task[i], parent, gomp_icv (false));
207 task[i].priority = priority;
208 task[i].kind = GOMP_TASK_UNDEFERRED;
209 task[i].final_task = (thr->task && thr->task->final_task)
210 || (flags & GOMP_TASK_FLAG_FINAL);
211 if (thr->task)
212 {
213 task[i].in_tied_task = thr->task->in_tied_task;
214 task[i].taskgroup = thr->task->taskgroup;
215 }
216 thr->task = &task[i];
217 cpyfn (arg, data);
218 arg += arg_size;
219 }
220 arg = orig_arg;
221 for (i = 0; i < num_tasks; i++)
222 {
223 thr->task = &task[i];
224 ((TYPE *)arg)[0] = start;
225 start += task_step;
226 ((TYPE *)arg)[1] = start;
227 if (i == nfirst)
228 task_step -= step;
229 fn (arg);
230 arg += arg_size;
231 if (!priority_queue_empty_p (&task[i].children_queue,
232 MEMMODEL_RELAXED))
233 {
234 gomp_mutex_lock (&team->task_lock);
235 gomp_clear_parent (&task[i].children_queue);
236 gomp_mutex_unlock (&team->task_lock);
237 }
238 gomp_end_task ();
239 }
240 }
241 else
242 for (i = 0; i < num_tasks; i++)
243 {
244 struct gomp_task task;
245
246 gomp_init_task (&task, thr->task, gomp_icv (false));
247 task.priority = priority;
248 task.kind = GOMP_TASK_UNDEFERRED;
249 task.final_task = (thr->task && thr->task->final_task)
250 || (flags & GOMP_TASK_FLAG_FINAL);
251 if (thr->task)
252 {
253 task.in_tied_task = thr->task->in_tied_task;
254 task.taskgroup = thr->task->taskgroup;
255 }
256 thr->task = &task;
257 ((TYPE *)data)[0] = start;
258 start += task_step;
259 ((TYPE *)data)[1] = start;
260 if (i == nfirst)
261 task_step -= step;
262 fn (data);
263 if (!priority_queue_empty_p (&task.children_queue,
264 MEMMODEL_RELAXED))
265 {
266 gomp_mutex_lock (&team->task_lock);
267 gomp_clear_parent (&task.children_queue);
268 gomp_mutex_unlock (&team->task_lock);
269 }
270 gomp_end_task ();
271 }
272 }
273 else
274 {
275 struct gomp_task *tasks[num_tasks];
276 struct gomp_task *parent = thr->task;
277 struct gomp_taskgroup *taskgroup = parent->taskgroup;
278 char *arg;
279 int do_wake;
280 unsigned long i;
281
282 for (i = 0; i < num_tasks; i++)
283 {
284 struct gomp_task *task
285 = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
286 tasks[i] = task;
287 arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
288 & ~(uintptr_t) (arg_align - 1));
289 gomp_init_task (task, parent, gomp_icv (false));
290 task->priority = priority;
291 task->kind = GOMP_TASK_UNDEFERRED;
292 task->in_tied_task = parent->in_tied_task;
293 task->taskgroup = taskgroup;
294 thr->task = task;
295 if (cpyfn)
296 {
297 cpyfn (arg, data);
298 task->copy_ctors_done = true;
299 }
300 else
301 memcpy (arg, data, arg_size);
302 ((TYPE *)arg)[0] = start;
303 start += task_step;
304 ((TYPE *)arg)[1] = start;
305 if (i == nfirst)
306 task_step -= step;
307 thr->task = parent;
308 task->kind = GOMP_TASK_WAITING;
309 task->fn = fn;
310 task->fn_data = arg;
311 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
312 }
313 gomp_mutex_lock (&team->task_lock);
314 /* If parallel or taskgroup has been cancelled, don't start new
315 tasks. */
316 if (__builtin_expect (gomp_cancel_var, 0)
317 && cpyfn == NULL)
318 {
319 if (gomp_team_barrier_cancelled (&team->barrier))
320 {
321 do_cancel:
322 gomp_mutex_unlock (&team->task_lock);
323 for (i = 0; i < num_tasks; i++)
324 {
325 gomp_finish_task (tasks[i]);
326 free (tasks[i]);
327 }
328 if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
329 ialias_call (GOMP_taskgroup_end) ();
330 return;
331 }
332 if (taskgroup)
333 {
334 if (taskgroup->cancelled)
335 goto do_cancel;
336 if (taskgroup->workshare
337 && taskgroup->prev
338 && taskgroup->prev->cancelled)
339 goto do_cancel;
340 }
341 }
342 if (taskgroup)
343 taskgroup->num_children += num_tasks;
344 for (i = 0; i < num_tasks; i++)
345 {
346 struct gomp_task *task = tasks[i];
347 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
348 task, priority,
349 PRIORITY_INSERT_BEGIN,
350 /*last_parent_depends_on=*/false,
351 task->parent_depends_on);
352 if (taskgroup)
353 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
354 task, priority, PRIORITY_INSERT_BEGIN,
355 /*last_parent_depends_on=*/false,
356 task->parent_depends_on);
357 priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
358 PRIORITY_INSERT_END,
359 /*last_parent_depends_on=*/false,
360 task->parent_depends_on);
361 ++team->task_count;
362 ++team->task_queued_count;
363 }
364 gomp_team_barrier_set_task_pending (&team->barrier);
365 if (team->task_running_count + !parent->in_tied_task
366 < team->nthreads)
367 {
368 do_wake = team->nthreads - team->task_running_count
369 - !parent->in_tied_task;
370 if ((unsigned long) do_wake > num_tasks)
371 do_wake = num_tasks;
372 }
373 else
374 do_wake = 0;
375 gomp_mutex_unlock (&team->task_lock);
376 if (do_wake)
377 gomp_team_barrier_wake (&team->barrier, do_wake);
378 }
379 if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
380 ialias_call (GOMP_taskgroup_end) ();
381 }
382