1 /* Copyright (C) 2007-2018 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintainence of tasks in response to task
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
33
34 typedef struct gomp_task_depend_entry *hash_entry_type;
35
36 static inline void *
htab_alloc(size_t size)37 htab_alloc (size_t size)
38 {
39 return gomp_malloc (size);
40 }
41
42 static inline void
htab_free(void * ptr)43 htab_free (void *ptr)
44 {
45 free (ptr);
46 }
47
48 #include "hashtab.h"
49
50 static inline hashval_t
htab_hash(hash_entry_type element)51 htab_hash (hash_entry_type element)
52 {
53 return hash_pointer (element->addr);
54 }
55
56 static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)57 htab_eq (hash_entry_type x, hash_entry_type y)
58 {
59 return x->addr == y->addr;
60 }
61
62 /* Create a new task data structure. */
63
64 void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 struct gomp_task_icv *prev_icv)
67 {
68 /* It would seem that using memset here would be a win, but it turns
69 out that partially filling gomp_task allows us to keep the
70 overhead of task creation low. In the nqueens-1.c test, for a
71 sufficiently large N, we drop the overhead from 5-6% to 1%.
72
73 Note, the nqueens-1.c test in serial mode is a good test to
74 benchmark the overhead of creating tasks as there are millions of
75 tiny tasks created that all run undeferred. */
76 task->parent = parent_task;
77 task->icv = *prev_icv;
78 task->kind = GOMP_TASK_IMPLICIT;
79 task->taskwait = NULL;
80 task->in_tied_task = false;
81 task->final_task = false;
82 task->copy_ctors_done = false;
83 task->parent_depends_on = false;
84 priority_queue_init (&task->children_queue);
85 task->taskgroup = NULL;
86 task->dependers = NULL;
87 task->depend_hash = NULL;
88 task->depend_count = 0;
89 }
90
91 /* Clean up a task, after completing it. */
92
93 void
gomp_end_task(void)94 gomp_end_task (void)
95 {
96 struct gomp_thread *thr = gomp_thread ();
97 struct gomp_task *task = thr->task;
98
99 gomp_finish_task (task);
100 thr->task = task->parent;
101 }
102
103 /* Clear the parent field of every task in LIST. */
104
105 static inline void
gomp_clear_parent_in_list(struct priority_list * list)106 gomp_clear_parent_in_list (struct priority_list *list)
107 {
108 struct priority_node *p = list->tasks;
109 if (p)
110 do
111 {
112 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 p = p->next;
114 }
115 while (p != list->tasks);
116 }
117
118 /* Splay tree version of gomp_clear_parent_in_list.
119
120 Clear the parent field of every task in NODE within SP, and free
121 the node when done. */
122
123 static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125 {
126 if (!node)
127 return;
128 prio_splay_tree_node left = node->left, right = node->right;
129 gomp_clear_parent_in_list (&node->key.l);
130 #if _LIBGOMP_CHECKING_
131 memset (node, 0xaf, sizeof (*node));
132 #endif
133 /* No need to remove the node from the tree. We're nuking
134 everything, so just free the nodes and our caller can clear the
135 entire splay tree. */
136 free (node);
137 gomp_clear_parent_in_tree (sp, left);
138 gomp_clear_parent_in_tree (sp, right);
139 }
140
141 /* Clear the parent field of every task in Q and remove every task
142 from Q. */
143
144 static inline void
gomp_clear_parent(struct priority_queue * q)145 gomp_clear_parent (struct priority_queue *q)
146 {
147 if (priority_queue_multi_p (q))
148 {
149 gomp_clear_parent_in_tree (&q->t, q->t.root);
150 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 No need to remove anything. We can just nuke everything. */
152 q->t.root = NULL;
153 }
154 else
155 gomp_clear_parent_in_list (&q->l);
156 }
157
158 /* Helper function for GOMP_task and gomp_create_target_task.
159
160 For a TASK with in/out dependencies, fill in the various dependency
161 queues. PARENT is the parent of said task. DEPEND is as in
162 GOMP_task. */
163
164 static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 void **depend)
167 {
168 size_t ndepend = (uintptr_t) depend[0];
169 size_t nout = (uintptr_t) depend[1];
170 size_t i;
171 hash_entry_type ent;
172
173 task->depend_count = ndepend;
174 task->num_dependees = 0;
175 if (parent->depend_hash == NULL)
176 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
177 for (i = 0; i < ndepend; i++)
178 {
179 task->depend[i].addr = depend[2 + i];
180 task->depend[i].next = NULL;
181 task->depend[i].prev = NULL;
182 task->depend[i].task = task;
183 task->depend[i].is_in = i >= nout;
184 task->depend[i].redundant = false;
185 task->depend[i].redundant_out = false;
186
187 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
188 &task->depend[i], INSERT);
189 hash_entry_type out = NULL, last = NULL;
190 if (*slot)
191 {
192 /* If multiple depends on the same task are the same, all but the
193 first one are redundant. As inout/out come first, if any of them
194 is inout/out, it will win, which is the right semantics. */
195 if ((*slot)->task == task)
196 {
197 task->depend[i].redundant = true;
198 continue;
199 }
200 for (ent = *slot; ent; ent = ent->next)
201 {
202 if (ent->redundant_out)
203 break;
204
205 last = ent;
206
207 /* depend(in:...) doesn't depend on earlier depend(in:...). */
208 if (i >= nout && ent->is_in)
209 continue;
210
211 if (!ent->is_in)
212 out = ent;
213
214 struct gomp_task *tsk = ent->task;
215 if (tsk->dependers == NULL)
216 {
217 tsk->dependers
218 = gomp_malloc (sizeof (struct gomp_dependers_vec)
219 + 6 * sizeof (struct gomp_task *));
220 tsk->dependers->n_elem = 1;
221 tsk->dependers->allocated = 6;
222 tsk->dependers->elem[0] = task;
223 task->num_dependees++;
224 continue;
225 }
226 /* We already have some other dependency on tsk from earlier
227 depend clause. */
228 else if (tsk->dependers->n_elem
229 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
230 == task))
231 continue;
232 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
233 {
234 tsk->dependers->allocated
235 = tsk->dependers->allocated * 2 + 2;
236 tsk->dependers
237 = gomp_realloc (tsk->dependers,
238 sizeof (struct gomp_dependers_vec)
239 + (tsk->dependers->allocated
240 * sizeof (struct gomp_task *)));
241 }
242 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
243 task->num_dependees++;
244 }
245 task->depend[i].next = *slot;
246 (*slot)->prev = &task->depend[i];
247 }
248 *slot = &task->depend[i];
249
250 /* There is no need to store more than one depend({,in}out:) task per
251 address in the hash table chain for the purpose of creation of
252 deferred tasks, because each out depends on all earlier outs, thus it
253 is enough to record just the last depend({,in}out:). For depend(in:),
254 we need to keep all of the previous ones not terminated yet, because
255 a later depend({,in}out:) might need to depend on all of them. So, if
256 the new task's clause is depend({,in}out:), we know there is at most
257 one other depend({,in}out:) clause in the list (out). For
258 non-deferred tasks we want to see all outs, so they are moved to the
259 end of the chain, after first redundant_out entry all following
260 entries should be redundant_out. */
261 if (!task->depend[i].is_in && out)
262 {
263 if (out != last)
264 {
265 out->next->prev = out->prev;
266 out->prev->next = out->next;
267 out->next = last->next;
268 out->prev = last;
269 last->next = out;
270 if (out->next)
271 out->next->prev = out;
272 }
273 out->redundant_out = true;
274 }
275 }
276 }
277
278 /* Called when encountering an explicit task directive. If IF_CLAUSE is
279 false, then we must not delay in executing the task. If UNTIED is true,
280 then the task may be executed by any member of the team.
281
282 DEPEND is an array containing:
283 depend[0]: number of depend elements.
284 depend[1]: number of depend elements of type "out".
285 depend[2..N+1]: address of [1..N]th depend element. */
286
287 void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority)288 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
289 long arg_size, long arg_align, bool if_clause, unsigned flags,
290 void **depend, int priority)
291 {
292 struct gomp_thread *thr = gomp_thread ();
293 struct gomp_team *team = thr->ts.team;
294
295 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
296 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
297 tied to one thread all the time. This means UNTIED tasks must be
298 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
299 might be running on different thread than FN. */
300 if (cpyfn)
301 if_clause = false;
302 flags &= ~GOMP_TASK_FLAG_UNTIED;
303 #endif
304
305 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
306 if (team
307 && (gomp_team_barrier_cancelled (&team->barrier)
308 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
309 return;
310
311 if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
312 priority = 0;
313 else if (priority > gomp_max_task_priority_var)
314 priority = gomp_max_task_priority_var;
315
316 if (!if_clause || team == NULL
317 || (thr->task && thr->task->final_task)
318 || team->task_count > 64 * team->nthreads)
319 {
320 struct gomp_task task;
321
322 /* If there are depend clauses and earlier deferred sibling tasks
323 with depend clauses, check if there isn't a dependency. If there
324 is, we need to wait for them. There is no need to handle
325 depend clauses for non-deferred tasks other than this, because
326 the parent task is suspended until the child task finishes and thus
327 it can't start further child tasks. */
328 if ((flags & GOMP_TASK_FLAG_DEPEND)
329 && thr->task && thr->task->depend_hash)
330 gomp_task_maybe_wait_for_dependencies (depend);
331
332 gomp_init_task (&task, thr->task, gomp_icv (false));
333 task.kind = GOMP_TASK_UNDEFERRED;
334 task.final_task = (thr->task && thr->task->final_task)
335 || (flags & GOMP_TASK_FLAG_FINAL);
336 task.priority = priority;
337 if (thr->task)
338 {
339 task.in_tied_task = thr->task->in_tied_task;
340 task.taskgroup = thr->task->taskgroup;
341 }
342 thr->task = &task;
343 if (__builtin_expect (cpyfn != NULL, 0))
344 {
345 char buf[arg_size + arg_align - 1];
346 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
347 & ~(uintptr_t) (arg_align - 1));
348 cpyfn (arg, data);
349 fn (arg);
350 }
351 else
352 fn (data);
353 /* Access to "children" is normally done inside a task_lock
354 mutex region, but the only way this particular task.children
355 can be set is if this thread's task work function (fn)
356 creates children. So since the setter is *this* thread, we
357 need no barriers here when testing for non-NULL. We can have
358 task.children set by the current thread then changed by a
359 child thread, but seeing a stale non-NULL value is not a
360 problem. Once past the task_lock acquisition, this thread
361 will see the real value of task.children. */
362 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
363 {
364 gomp_mutex_lock (&team->task_lock);
365 gomp_clear_parent (&task.children_queue);
366 gomp_mutex_unlock (&team->task_lock);
367 }
368 gomp_end_task ();
369 }
370 else
371 {
372 struct gomp_task *task;
373 struct gomp_task *parent = thr->task;
374 struct gomp_taskgroup *taskgroup = parent->taskgroup;
375 char *arg;
376 bool do_wake;
377 size_t depend_size = 0;
378
379 if (flags & GOMP_TASK_FLAG_DEPEND)
380 depend_size = ((uintptr_t) depend[0]
381 * sizeof (struct gomp_task_depend_entry));
382 task = gomp_malloc (sizeof (*task) + depend_size
383 + arg_size + arg_align - 1);
384 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
385 & ~(uintptr_t) (arg_align - 1));
386 gomp_init_task (task, parent, gomp_icv (false));
387 task->priority = priority;
388 task->kind = GOMP_TASK_UNDEFERRED;
389 task->in_tied_task = parent->in_tied_task;
390 task->taskgroup = taskgroup;
391 thr->task = task;
392 if (cpyfn)
393 {
394 cpyfn (arg, data);
395 task->copy_ctors_done = true;
396 }
397 else
398 memcpy (arg, data, arg_size);
399 thr->task = parent;
400 task->kind = GOMP_TASK_WAITING;
401 task->fn = fn;
402 task->fn_data = arg;
403 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
404 gomp_mutex_lock (&team->task_lock);
405 /* If parallel or taskgroup has been cancelled, don't start new
406 tasks. */
407 if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
408 || (taskgroup && taskgroup->cancelled))
409 && !task->copy_ctors_done, 0))
410 {
411 gomp_mutex_unlock (&team->task_lock);
412 gomp_finish_task (task);
413 free (task);
414 return;
415 }
416 if (taskgroup)
417 taskgroup->num_children++;
418 if (depend_size)
419 {
420 gomp_task_handle_depend (task, parent, depend);
421 if (task->num_dependees)
422 {
423 /* Tasks that depend on other tasks are not put into the
424 various waiting queues, so we are done for now. Said
425 tasks are instead put into the queues via
426 gomp_task_run_post_handle_dependers() after their
427 dependencies have been satisfied. After which, they
428 can be picked up by the various scheduling
429 points. */
430 gomp_mutex_unlock (&team->task_lock);
431 return;
432 }
433 }
434
435 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
436 task, priority,
437 PRIORITY_INSERT_BEGIN,
438 /*adjust_parent_depends_on=*/false,
439 task->parent_depends_on);
440 if (taskgroup)
441 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
442 task, priority,
443 PRIORITY_INSERT_BEGIN,
444 /*adjust_parent_depends_on=*/false,
445 task->parent_depends_on);
446
447 priority_queue_insert (PQ_TEAM, &team->task_queue,
448 task, priority,
449 PRIORITY_INSERT_END,
450 /*adjust_parent_depends_on=*/false,
451 task->parent_depends_on);
452
453 ++team->task_count;
454 ++team->task_queued_count;
455 gomp_team_barrier_set_task_pending (&team->barrier);
456 do_wake = team->task_running_count + !parent->in_tied_task
457 < team->nthreads;
458 gomp_mutex_unlock (&team->task_lock);
459 if (do_wake)
460 gomp_team_barrier_wake (&team->barrier, 1);
461 }
462 }
463
464 ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)465 ialias (GOMP_taskgroup_end)
466
467 #define TYPE long
468 #define UTYPE unsigned long
469 #define TYPE_is_long 1
470 #include "taskloop.c"
471 #undef TYPE
472 #undef UTYPE
473 #undef TYPE_is_long
474
475 #define TYPE unsigned long long
476 #define UTYPE TYPE
477 #define GOMP_taskloop GOMP_taskloop_ull
478 #include "taskloop.c"
479 #undef TYPE
480 #undef UTYPE
481 #undef GOMP_taskloop
482
483 static void inline
484 priority_queue_move_task_first (enum priority_queue_type type,
485 struct priority_queue *head,
486 struct gomp_task *task)
487 {
488 #if _LIBGOMP_CHECKING_
489 if (!priority_queue_task_in_queue_p (type, head, task))
490 gomp_fatal ("Attempt to move first missing task %p", task);
491 #endif
492 struct priority_list *list;
493 if (priority_queue_multi_p (head))
494 {
495 list = priority_queue_lookup_priority (head, task->priority);
496 #if _LIBGOMP_CHECKING_
497 if (!list)
498 gomp_fatal ("Unable to find priority %d", task->priority);
499 #endif
500 }
501 else
502 list = &head->l;
503 priority_list_remove (list, task_to_priority_node (type, task), 0);
504 priority_list_insert (type, list, task, task->priority,
505 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
506 task->parent_depends_on);
507 }
508
509 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
510 with team->task_lock held, or is executed in the thread that called
511 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
512 run before it acquires team->task_lock. */
513
514 static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)515 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
516 {
517 struct gomp_task *parent = task->parent;
518 if (parent)
519 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
520 task);
521
522 struct gomp_taskgroup *taskgroup = task->taskgroup;
523 if (taskgroup)
524 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
525 task);
526
527 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
528 PRIORITY_INSERT_BEGIN, false,
529 task->parent_depends_on);
530 task->kind = GOMP_TASK_WAITING;
531 if (parent && parent->taskwait)
532 {
533 if (parent->taskwait->in_taskwait)
534 {
535 /* One more task has had its dependencies met.
536 Inform any waiters. */
537 parent->taskwait->in_taskwait = false;
538 gomp_sem_post (&parent->taskwait->taskwait_sem);
539 }
540 else if (parent->taskwait->in_depend_wait)
541 {
542 /* One more task has had its dependencies met.
543 Inform any waiters. */
544 parent->taskwait->in_depend_wait = false;
545 gomp_sem_post (&parent->taskwait->taskwait_sem);
546 }
547 }
548 if (taskgroup && taskgroup->in_taskgroup_wait)
549 {
550 /* One more task has had its dependencies met.
551 Inform any waiters. */
552 taskgroup->in_taskgroup_wait = false;
553 gomp_sem_post (&taskgroup->taskgroup_sem);
554 }
555
556 ++team->task_queued_count;
557 gomp_team_barrier_set_task_pending (&team->barrier);
558 /* I'm afraid this can't be done after releasing team->task_lock,
559 as gomp_target_task_completion is run from unrelated thread and
560 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
561 the team could be gone already. */
562 if (team->nthreads > team->task_running_count)
563 gomp_team_barrier_wake (&team->barrier, 1);
564 }
565
566 /* Signal that a target task TTASK has completed the asynchronously
567 running phase and should be requeued as a task to handle the
568 variable unmapping. */
569
570 void
GOMP_PLUGIN_target_task_completion(void * data)571 GOMP_PLUGIN_target_task_completion (void *data)
572 {
573 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
574 struct gomp_task *task = ttask->task;
575 struct gomp_team *team = ttask->team;
576
577 gomp_mutex_lock (&team->task_lock);
578 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
579 {
580 ttask->state = GOMP_TARGET_TASK_FINISHED;
581 gomp_mutex_unlock (&team->task_lock);
582 return;
583 }
584 ttask->state = GOMP_TARGET_TASK_FINISHED;
585 gomp_target_task_completion (team, task);
586 gomp_mutex_unlock (&team->task_lock);
587 }
588
589 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
590
591 /* Called for nowait target tasks. */
592
593 bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)594 gomp_create_target_task (struct gomp_device_descr *devicep,
595 void (*fn) (void *), size_t mapnum, void **hostaddrs,
596 size_t *sizes, unsigned short *kinds,
597 unsigned int flags, void **depend, void **args,
598 enum gomp_target_task_state state)
599 {
600 struct gomp_thread *thr = gomp_thread ();
601 struct gomp_team *team = thr->ts.team;
602
603 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
604 if (team
605 && (gomp_team_barrier_cancelled (&team->barrier)
606 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
607 return true;
608
609 struct gomp_target_task *ttask;
610 struct gomp_task *task;
611 struct gomp_task *parent = thr->task;
612 struct gomp_taskgroup *taskgroup = parent->taskgroup;
613 bool do_wake;
614 size_t depend_size = 0;
615 uintptr_t depend_cnt = 0;
616 size_t tgt_align = 0, tgt_size = 0;
617
618 if (depend != NULL)
619 {
620 depend_cnt = (uintptr_t) depend[0];
621 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
622 }
623 if (fn)
624 {
625 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
626 firstprivate on the target task. */
627 size_t i;
628 for (i = 0; i < mapnum; i++)
629 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
630 {
631 size_t align = (size_t) 1 << (kinds[i] >> 8);
632 if (tgt_align < align)
633 tgt_align = align;
634 tgt_size = (tgt_size + align - 1) & ~(align - 1);
635 tgt_size += sizes[i];
636 }
637 if (tgt_align)
638 tgt_size += tgt_align - 1;
639 else
640 tgt_size = 0;
641 }
642
643 task = gomp_malloc (sizeof (*task) + depend_size
644 + sizeof (*ttask)
645 + mapnum * (sizeof (void *) + sizeof (size_t)
646 + sizeof (unsigned short))
647 + tgt_size);
648 gomp_init_task (task, parent, gomp_icv (false));
649 task->priority = 0;
650 task->kind = GOMP_TASK_WAITING;
651 task->in_tied_task = parent->in_tied_task;
652 task->taskgroup = taskgroup;
653 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
654 ttask->devicep = devicep;
655 ttask->fn = fn;
656 ttask->mapnum = mapnum;
657 ttask->args = args;
658 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
659 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
660 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
661 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
662 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
663 if (tgt_align)
664 {
665 char *tgt = (char *) &ttask->kinds[mapnum];
666 size_t i;
667 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
668 if (al)
669 tgt += tgt_align - al;
670 tgt_size = 0;
671 for (i = 0; i < mapnum; i++)
672 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
673 {
674 size_t align = (size_t) 1 << (kinds[i] >> 8);
675 tgt_size = (tgt_size + align - 1) & ~(align - 1);
676 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
677 ttask->hostaddrs[i] = tgt + tgt_size;
678 tgt_size = tgt_size + sizes[i];
679 }
680 }
681 ttask->flags = flags;
682 ttask->state = state;
683 ttask->task = task;
684 ttask->team = team;
685 task->fn = NULL;
686 task->fn_data = ttask;
687 task->final_task = 0;
688 gomp_mutex_lock (&team->task_lock);
689 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
690 if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
691 || (taskgroup && taskgroup->cancelled), 0))
692 {
693 gomp_mutex_unlock (&team->task_lock);
694 gomp_finish_task (task);
695 free (task);
696 return true;
697 }
698 if (depend_size)
699 {
700 gomp_task_handle_depend (task, parent, depend);
701 if (task->num_dependees)
702 {
703 if (taskgroup)
704 taskgroup->num_children++;
705 gomp_mutex_unlock (&team->task_lock);
706 return true;
707 }
708 }
709 if (state == GOMP_TARGET_TASK_DATA)
710 {
711 gomp_task_run_post_handle_depend_hash (task);
712 gomp_mutex_unlock (&team->task_lock);
713 gomp_finish_task (task);
714 free (task);
715 return false;
716 }
717 if (taskgroup)
718 taskgroup->num_children++;
719 /* For async offloading, if we don't need to wait for dependencies,
720 run the gomp_target_task_fn right away, essentially schedule the
721 mapping part of the task in the current thread. */
722 if (devicep != NULL
723 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
724 {
725 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
726 PRIORITY_INSERT_END,
727 /*adjust_parent_depends_on=*/false,
728 task->parent_depends_on);
729 if (taskgroup)
730 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
731 task, 0, PRIORITY_INSERT_END,
732 /*adjust_parent_depends_on=*/false,
733 task->parent_depends_on);
734 task->pnode[PQ_TEAM].next = NULL;
735 task->pnode[PQ_TEAM].prev = NULL;
736 task->kind = GOMP_TASK_TIED;
737 ++team->task_count;
738 gomp_mutex_unlock (&team->task_lock);
739
740 thr->task = task;
741 gomp_target_task_fn (task->fn_data);
742 thr->task = parent;
743
744 gomp_mutex_lock (&team->task_lock);
745 task->kind = GOMP_TASK_ASYNC_RUNNING;
746 /* If GOMP_PLUGIN_target_task_completion has run already
747 in between gomp_target_task_fn and the mutex lock,
748 perform the requeuing here. */
749 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
750 gomp_target_task_completion (team, task);
751 else
752 ttask->state = GOMP_TARGET_TASK_RUNNING;
753 gomp_mutex_unlock (&team->task_lock);
754 return true;
755 }
756 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
757 PRIORITY_INSERT_BEGIN,
758 /*adjust_parent_depends_on=*/false,
759 task->parent_depends_on);
760 if (taskgroup)
761 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
762 PRIORITY_INSERT_BEGIN,
763 /*adjust_parent_depends_on=*/false,
764 task->parent_depends_on);
765 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
766 PRIORITY_INSERT_END,
767 /*adjust_parent_depends_on=*/false,
768 task->parent_depends_on);
769 ++team->task_count;
770 ++team->task_queued_count;
771 gomp_team_barrier_set_task_pending (&team->barrier);
772 do_wake = team->task_running_count + !parent->in_tied_task
773 < team->nthreads;
774 gomp_mutex_unlock (&team->task_lock);
775 if (do_wake)
776 gomp_team_barrier_wake (&team->barrier, 1);
777 return true;
778 }
779
780 /* Given a parent_depends_on task in LIST, move it to the front of its
781 priority so it is run as soon as possible.
782
783 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
784
785 We rearrange the queue such that all parent_depends_on tasks are
786 first, and last_parent_depends_on points to the last such task we
787 rearranged. For example, given the following tasks in a queue
788 where PD[123] are the parent_depends_on tasks:
789
790 task->children
791 |
792 V
793 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
794
795 We rearrange such that:
796
797 task->children
798 | +--- last_parent_depends_on
799 | |
800 V V
801 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
802
803 static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)804 priority_list_upgrade_task (struct priority_list *list,
805 struct priority_node *node)
806 {
807 struct priority_node *last_parent_depends_on
808 = list->last_parent_depends_on;
809 if (last_parent_depends_on)
810 {
811 node->prev->next = node->next;
812 node->next->prev = node->prev;
813 node->prev = last_parent_depends_on;
814 node->next = last_parent_depends_on->next;
815 node->prev->next = node;
816 node->next->prev = node;
817 }
818 else if (node != list->tasks)
819 {
820 node->prev->next = node->next;
821 node->next->prev = node->prev;
822 node->prev = list->tasks->prev;
823 node->next = list->tasks;
824 list->tasks = node;
825 node->prev->next = node;
826 node->next->prev = node;
827 }
828 list->last_parent_depends_on = node;
829 }
830
831 /* Given a parent_depends_on TASK in its parent's children_queue, move
832 it to the front of its priority so it is run as soon as possible.
833
834 PARENT is passed as an optimization.
835
836 (This function could be defined in priority_queue.c, but we want it
837 inlined, and putting it in priority_queue.h is not an option, given
838 that gomp_task has not been properly defined at that point). */
839
840 static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)841 priority_queue_upgrade_task (struct gomp_task *task,
842 struct gomp_task *parent)
843 {
844 struct priority_queue *head = &parent->children_queue;
845 struct priority_node *node = &task->pnode[PQ_CHILDREN];
846 #if _LIBGOMP_CHECKING_
847 if (!task->parent_depends_on)
848 gomp_fatal ("priority_queue_upgrade_task: task must be a "
849 "parent_depends_on task");
850 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
851 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
852 #endif
853 if (priority_queue_multi_p (head))
854 {
855 struct priority_list *list
856 = priority_queue_lookup_priority (head, task->priority);
857 priority_list_upgrade_task (list, node);
858 }
859 else
860 priority_list_upgrade_task (&head->l, node);
861 }
862
863 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
864 the way in LIST so that other tasks can be considered for
865 execution. LIST contains tasks of type TYPE.
866
867 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
868 if applicable. */
869
870 static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)871 priority_list_downgrade_task (enum priority_queue_type type,
872 struct priority_list *list,
873 struct gomp_task *child_task)
874 {
875 struct priority_node *node = task_to_priority_node (type, child_task);
876 if (list->tasks == node)
877 list->tasks = node->next;
878 else if (node->next != list->tasks)
879 {
880 /* The task in NODE is about to become TIED and TIED tasks
881 cannot come before WAITING tasks. If we're about to
882 leave the queue in such an indeterminate state, rewire
883 things appropriately. However, a TIED task at the end is
884 perfectly fine. */
885 struct gomp_task *next_task = priority_node_to_task (type, node->next);
886 if (next_task->kind == GOMP_TASK_WAITING)
887 {
888 /* Remove from list. */
889 node->prev->next = node->next;
890 node->next->prev = node->prev;
891 /* Rewire at the end. */
892 node->next = list->tasks;
893 node->prev = list->tasks->prev;
894 list->tasks->prev->next = node;
895 list->tasks->prev = node;
896 }
897 }
898
899 /* If the current task is the last_parent_depends_on for its
900 priority, adjust last_parent_depends_on appropriately. */
901 if (__builtin_expect (child_task->parent_depends_on, 0)
902 && list->last_parent_depends_on == node)
903 {
904 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
905 if (node->prev != node
906 && prev_child->kind == GOMP_TASK_WAITING
907 && prev_child->parent_depends_on)
908 list->last_parent_depends_on = node->prev;
909 else
910 {
911 /* There are no more parent_depends_on entries waiting
912 to run, clear the list. */
913 list->last_parent_depends_on = NULL;
914 }
915 }
916 }
917
918 /* Given a TASK in HEAD that is about to be executed, move it out of
919 the way so that other tasks can be considered for execution. HEAD
920 contains tasks of type TYPE.
921
922 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
923 if applicable.
924
925 (This function could be defined in priority_queue.c, but we want it
926 inlined, and putting it in priority_queue.h is not an option, given
927 that gomp_task has not been properly defined at that point). */
928
929 static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)930 priority_queue_downgrade_task (enum priority_queue_type type,
931 struct priority_queue *head,
932 struct gomp_task *task)
933 {
934 #if _LIBGOMP_CHECKING_
935 if (!priority_queue_task_in_queue_p (type, head, task))
936 gomp_fatal ("Attempt to downgrade missing task %p", task);
937 #endif
938 if (priority_queue_multi_p (head))
939 {
940 struct priority_list *list
941 = priority_queue_lookup_priority (head, task->priority);
942 priority_list_downgrade_task (type, list, task);
943 }
944 else
945 priority_list_downgrade_task (type, &head->l, task);
946 }
947
948 /* Setup CHILD_TASK to execute. This is done by setting the task to
949 TIED, and updating all relevant queues so that CHILD_TASK is no
950 longer chosen for scheduling. Also, remove CHILD_TASK from the
951 overall team task queue entirely.
952
953 Return TRUE if task or its containing taskgroup has been
954 cancelled. */
955
956 static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)957 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
958 struct gomp_team *team)
959 {
960 #if _LIBGOMP_CHECKING_
961 if (child_task->parent)
962 priority_queue_verify (PQ_CHILDREN,
963 &child_task->parent->children_queue, true);
964 if (child_task->taskgroup)
965 priority_queue_verify (PQ_TASKGROUP,
966 &child_task->taskgroup->taskgroup_queue, false);
967 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
968 #endif
969
970 /* Task is about to go tied, move it out of the way. */
971 if (parent)
972 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
973 child_task);
974
975 /* Task is about to go tied, move it out of the way. */
976 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
977 if (taskgroup)
978 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
979 child_task);
980
981 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
982 MEMMODEL_RELAXED);
983 child_task->pnode[PQ_TEAM].next = NULL;
984 child_task->pnode[PQ_TEAM].prev = NULL;
985 child_task->kind = GOMP_TASK_TIED;
986
987 if (--team->task_queued_count == 0)
988 gomp_team_barrier_clear_task_pending (&team->barrier);
989 if ((gomp_team_barrier_cancelled (&team->barrier)
990 || (taskgroup && taskgroup->cancelled))
991 && !child_task->copy_ctors_done)
992 return true;
993 return false;
994 }
995
996 static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)997 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
998 {
999 struct gomp_task *parent = child_task->parent;
1000 size_t i;
1001
1002 for (i = 0; i < child_task->depend_count; i++)
1003 if (!child_task->depend[i].redundant)
1004 {
1005 if (child_task->depend[i].next)
1006 child_task->depend[i].next->prev = child_task->depend[i].prev;
1007 if (child_task->depend[i].prev)
1008 child_task->depend[i].prev->next = child_task->depend[i].next;
1009 else
1010 {
1011 hash_entry_type *slot
1012 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1013 NO_INSERT);
1014 if (*slot != &child_task->depend[i])
1015 abort ();
1016 if (child_task->depend[i].next)
1017 *slot = child_task->depend[i].next;
1018 else
1019 htab_clear_slot (parent->depend_hash, slot);
1020 }
1021 }
1022 }
1023
1024 /* After a CHILD_TASK has been run, adjust the dependency queue for
1025 each task that depends on CHILD_TASK, to record the fact that there
1026 is one less dependency to worry about. If a task that depended on
1027 CHILD_TASK now has no dependencies, place it in the various queues
1028 so it gets scheduled to run.
1029
1030 TEAM is the team to which CHILD_TASK belongs to. */
1031
1032 static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)1033 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1034 struct gomp_team *team)
1035 {
1036 struct gomp_task *parent = child_task->parent;
1037 size_t i, count = child_task->dependers->n_elem, ret = 0;
1038 for (i = 0; i < count; i++)
1039 {
1040 struct gomp_task *task = child_task->dependers->elem[i];
1041
1042 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1043 TASK's remaining dependencies. Once TASK has no other
1044 depenencies, put it into the various queues so it will get
1045 scheduled for execution. */
1046 if (--task->num_dependees != 0)
1047 continue;
1048
1049 struct gomp_taskgroup *taskgroup = task->taskgroup;
1050 if (parent)
1051 {
1052 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1053 task, task->priority,
1054 PRIORITY_INSERT_BEGIN,
1055 /*adjust_parent_depends_on=*/true,
1056 task->parent_depends_on);
1057 if (parent->taskwait)
1058 {
1059 if (parent->taskwait->in_taskwait)
1060 {
1061 /* One more task has had its dependencies met.
1062 Inform any waiters. */
1063 parent->taskwait->in_taskwait = false;
1064 gomp_sem_post (&parent->taskwait->taskwait_sem);
1065 }
1066 else if (parent->taskwait->in_depend_wait)
1067 {
1068 /* One more task has had its dependencies met.
1069 Inform any waiters. */
1070 parent->taskwait->in_depend_wait = false;
1071 gomp_sem_post (&parent->taskwait->taskwait_sem);
1072 }
1073 }
1074 }
1075 if (taskgroup)
1076 {
1077 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1078 task, task->priority,
1079 PRIORITY_INSERT_BEGIN,
1080 /*adjust_parent_depends_on=*/false,
1081 task->parent_depends_on);
1082 if (taskgroup->in_taskgroup_wait)
1083 {
1084 /* One more task has had its dependencies met.
1085 Inform any waiters. */
1086 taskgroup->in_taskgroup_wait = false;
1087 gomp_sem_post (&taskgroup->taskgroup_sem);
1088 }
1089 }
1090 priority_queue_insert (PQ_TEAM, &team->task_queue,
1091 task, task->priority,
1092 PRIORITY_INSERT_END,
1093 /*adjust_parent_depends_on=*/false,
1094 task->parent_depends_on);
1095 ++team->task_count;
1096 ++team->task_queued_count;
1097 ++ret;
1098 }
1099 free (child_task->dependers);
1100 child_task->dependers = NULL;
1101 if (ret > 1)
1102 gomp_team_barrier_set_task_pending (&team->barrier);
1103 return ret;
1104 }
1105
1106 static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)1107 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1108 struct gomp_team *team)
1109 {
1110 if (child_task->depend_count == 0)
1111 return 0;
1112
1113 /* If parent is gone already, the hash table is freed and nothing
1114 will use the hash table anymore, no need to remove anything from it. */
1115 if (child_task->parent != NULL)
1116 gomp_task_run_post_handle_depend_hash (child_task);
1117
1118 if (child_task->dependers == NULL)
1119 return 0;
1120
1121 return gomp_task_run_post_handle_dependers (child_task, team);
1122 }
1123
1124 /* Remove CHILD_TASK from its parent. */
1125
1126 static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)1127 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1128 {
1129 struct gomp_task *parent = child_task->parent;
1130 if (parent == NULL)
1131 return;
1132
1133 /* If this was the last task the parent was depending on,
1134 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1135 clean up and return. */
1136 if (__builtin_expect (child_task->parent_depends_on, 0)
1137 && --parent->taskwait->n_depend == 0
1138 && parent->taskwait->in_depend_wait)
1139 {
1140 parent->taskwait->in_depend_wait = false;
1141 gomp_sem_post (&parent->taskwait->taskwait_sem);
1142 }
1143
1144 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1145 child_task, MEMMODEL_RELEASE)
1146 && parent->taskwait && parent->taskwait->in_taskwait)
1147 {
1148 parent->taskwait->in_taskwait = false;
1149 gomp_sem_post (&parent->taskwait->taskwait_sem);
1150 }
1151 child_task->pnode[PQ_CHILDREN].next = NULL;
1152 child_task->pnode[PQ_CHILDREN].prev = NULL;
1153 }
1154
1155 /* Remove CHILD_TASK from its taskgroup. */
1156
1157 static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)1158 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1159 {
1160 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1161 if (taskgroup == NULL)
1162 return;
1163 bool empty = priority_queue_remove (PQ_TASKGROUP,
1164 &taskgroup->taskgroup_queue,
1165 child_task, MEMMODEL_RELAXED);
1166 child_task->pnode[PQ_TASKGROUP].next = NULL;
1167 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1168 if (taskgroup->num_children > 1)
1169 --taskgroup->num_children;
1170 else
1171 {
1172 /* We access taskgroup->num_children in GOMP_taskgroup_end
1173 outside of the task lock mutex region, so
1174 need a release barrier here to ensure memory
1175 written by child_task->fn above is flushed
1176 before the NULL is written. */
1177 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1178 }
1179 if (empty && taskgroup->in_taskgroup_wait)
1180 {
1181 taskgroup->in_taskgroup_wait = false;
1182 gomp_sem_post (&taskgroup->taskgroup_sem);
1183 }
1184 }
1185
1186 void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)1187 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1188 {
1189 struct gomp_thread *thr = gomp_thread ();
1190 struct gomp_team *team = thr->ts.team;
1191 struct gomp_task *task = thr->task;
1192 struct gomp_task *child_task = NULL;
1193 struct gomp_task *to_free = NULL;
1194 int do_wake = 0;
1195
1196 gomp_mutex_lock (&team->task_lock);
1197 if (gomp_barrier_last_thread (state))
1198 {
1199 if (team->task_count == 0)
1200 {
1201 gomp_team_barrier_done (&team->barrier, state);
1202 gomp_mutex_unlock (&team->task_lock);
1203 gomp_team_barrier_wake (&team->barrier, 0);
1204 return;
1205 }
1206 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1207 }
1208
1209 while (1)
1210 {
1211 bool cancelled = false;
1212 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1213 {
1214 bool ignored;
1215 child_task
1216 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1217 PQ_IGNORED, NULL,
1218 &ignored);
1219 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1220 team);
1221 if (__builtin_expect (cancelled, 0))
1222 {
1223 if (to_free)
1224 {
1225 gomp_finish_task (to_free);
1226 free (to_free);
1227 to_free = NULL;
1228 }
1229 goto finish_cancelled;
1230 }
1231 team->task_running_count++;
1232 child_task->in_tied_task = true;
1233 }
1234 gomp_mutex_unlock (&team->task_lock);
1235 if (do_wake)
1236 {
1237 gomp_team_barrier_wake (&team->barrier, do_wake);
1238 do_wake = 0;
1239 }
1240 if (to_free)
1241 {
1242 gomp_finish_task (to_free);
1243 free (to_free);
1244 to_free = NULL;
1245 }
1246 if (child_task)
1247 {
1248 thr->task = child_task;
1249 if (__builtin_expect (child_task->fn == NULL, 0))
1250 {
1251 if (gomp_target_task_fn (child_task->fn_data))
1252 {
1253 thr->task = task;
1254 gomp_mutex_lock (&team->task_lock);
1255 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1256 team->task_running_count--;
1257 struct gomp_target_task *ttask
1258 = (struct gomp_target_task *) child_task->fn_data;
1259 /* If GOMP_PLUGIN_target_task_completion has run already
1260 in between gomp_target_task_fn and the mutex lock,
1261 perform the requeuing here. */
1262 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1263 gomp_target_task_completion (team, child_task);
1264 else
1265 ttask->state = GOMP_TARGET_TASK_RUNNING;
1266 child_task = NULL;
1267 continue;
1268 }
1269 }
1270 else
1271 child_task->fn (child_task->fn_data);
1272 thr->task = task;
1273 }
1274 else
1275 return;
1276 gomp_mutex_lock (&team->task_lock);
1277 if (child_task)
1278 {
1279 finish_cancelled:;
1280 size_t new_tasks
1281 = gomp_task_run_post_handle_depend (child_task, team);
1282 gomp_task_run_post_remove_parent (child_task);
1283 gomp_clear_parent (&child_task->children_queue);
1284 gomp_task_run_post_remove_taskgroup (child_task);
1285 to_free = child_task;
1286 child_task = NULL;
1287 if (!cancelled)
1288 team->task_running_count--;
1289 if (new_tasks > 1)
1290 {
1291 do_wake = team->nthreads - team->task_running_count;
1292 if (do_wake > new_tasks)
1293 do_wake = new_tasks;
1294 }
1295 if (--team->task_count == 0
1296 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1297 {
1298 gomp_team_barrier_done (&team->barrier, state);
1299 gomp_mutex_unlock (&team->task_lock);
1300 gomp_team_barrier_wake (&team->barrier, 0);
1301 gomp_mutex_lock (&team->task_lock);
1302 }
1303 }
1304 }
1305 }
1306
1307 /* Called when encountering a taskwait directive.
1308
1309 Wait for all children of the current task. */
1310
1311 void
GOMP_taskwait(void)1312 GOMP_taskwait (void)
1313 {
1314 struct gomp_thread *thr = gomp_thread ();
1315 struct gomp_team *team = thr->ts.team;
1316 struct gomp_task *task = thr->task;
1317 struct gomp_task *child_task = NULL;
1318 struct gomp_task *to_free = NULL;
1319 struct gomp_taskwait taskwait;
1320 int do_wake = 0;
1321
1322 /* The acquire barrier on load of task->children here synchronizes
1323 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1324 not necessary that we synchronize with other non-NULL writes at
1325 this point, but we must ensure that all writes to memory by a
1326 child thread task work function are seen before we exit from
1327 GOMP_taskwait. */
1328 if (task == NULL
1329 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1330 return;
1331
1332 memset (&taskwait, 0, sizeof (taskwait));
1333 bool child_q = false;
1334 gomp_mutex_lock (&team->task_lock);
1335 while (1)
1336 {
1337 bool cancelled = false;
1338 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1339 {
1340 bool destroy_taskwait = task->taskwait != NULL;
1341 task->taskwait = NULL;
1342 gomp_mutex_unlock (&team->task_lock);
1343 if (to_free)
1344 {
1345 gomp_finish_task (to_free);
1346 free (to_free);
1347 }
1348 if (destroy_taskwait)
1349 gomp_sem_destroy (&taskwait.taskwait_sem);
1350 return;
1351 }
1352 struct gomp_task *next_task
1353 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1354 PQ_TEAM, &team->task_queue, &child_q);
1355 if (next_task->kind == GOMP_TASK_WAITING)
1356 {
1357 child_task = next_task;
1358 cancelled
1359 = gomp_task_run_pre (child_task, task, team);
1360 if (__builtin_expect (cancelled, 0))
1361 {
1362 if (to_free)
1363 {
1364 gomp_finish_task (to_free);
1365 free (to_free);
1366 to_free = NULL;
1367 }
1368 goto finish_cancelled;
1369 }
1370 }
1371 else
1372 {
1373 /* All tasks we are waiting for are either running in other
1374 threads, or they are tasks that have not had their
1375 dependencies met (so they're not even in the queue). Wait
1376 for them. */
1377 if (task->taskwait == NULL)
1378 {
1379 taskwait.in_depend_wait = false;
1380 gomp_sem_init (&taskwait.taskwait_sem, 0);
1381 task->taskwait = &taskwait;
1382 }
1383 taskwait.in_taskwait = true;
1384 }
1385 gomp_mutex_unlock (&team->task_lock);
1386 if (do_wake)
1387 {
1388 gomp_team_barrier_wake (&team->barrier, do_wake);
1389 do_wake = 0;
1390 }
1391 if (to_free)
1392 {
1393 gomp_finish_task (to_free);
1394 free (to_free);
1395 to_free = NULL;
1396 }
1397 if (child_task)
1398 {
1399 thr->task = child_task;
1400 if (__builtin_expect (child_task->fn == NULL, 0))
1401 {
1402 if (gomp_target_task_fn (child_task->fn_data))
1403 {
1404 thr->task = task;
1405 gomp_mutex_lock (&team->task_lock);
1406 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1407 struct gomp_target_task *ttask
1408 = (struct gomp_target_task *) child_task->fn_data;
1409 /* If GOMP_PLUGIN_target_task_completion has run already
1410 in between gomp_target_task_fn and the mutex lock,
1411 perform the requeuing here. */
1412 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1413 gomp_target_task_completion (team, child_task);
1414 else
1415 ttask->state = GOMP_TARGET_TASK_RUNNING;
1416 child_task = NULL;
1417 continue;
1418 }
1419 }
1420 else
1421 child_task->fn (child_task->fn_data);
1422 thr->task = task;
1423 }
1424 else
1425 gomp_sem_wait (&taskwait.taskwait_sem);
1426 gomp_mutex_lock (&team->task_lock);
1427 if (child_task)
1428 {
1429 finish_cancelled:;
1430 size_t new_tasks
1431 = gomp_task_run_post_handle_depend (child_task, team);
1432
1433 if (child_q)
1434 {
1435 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1436 child_task, MEMMODEL_RELAXED);
1437 child_task->pnode[PQ_CHILDREN].next = NULL;
1438 child_task->pnode[PQ_CHILDREN].prev = NULL;
1439 }
1440
1441 gomp_clear_parent (&child_task->children_queue);
1442
1443 gomp_task_run_post_remove_taskgroup (child_task);
1444
1445 to_free = child_task;
1446 child_task = NULL;
1447 team->task_count--;
1448 if (new_tasks > 1)
1449 {
1450 do_wake = team->nthreads - team->task_running_count
1451 - !task->in_tied_task;
1452 if (do_wake > new_tasks)
1453 do_wake = new_tasks;
1454 }
1455 }
1456 }
1457 }
1458
1459 /* An undeferred task is about to run. Wait for all tasks that this
1460 undeferred task depends on.
1461
1462 This is done by first putting all known ready dependencies
1463 (dependencies that have their own dependencies met) at the top of
1464 the scheduling queues. Then we iterate through these imminently
1465 ready tasks (and possibly other high priority tasks), and run them.
1466 If we run out of ready dependencies to execute, we either wait for
1467 the reamining dependencies to finish, or wait for them to get
1468 scheduled so we can run them.
1469
1470 DEPEND is as in GOMP_task. */
1471
1472 void
gomp_task_maybe_wait_for_dependencies(void ** depend)1473 gomp_task_maybe_wait_for_dependencies (void **depend)
1474 {
1475 struct gomp_thread *thr = gomp_thread ();
1476 struct gomp_task *task = thr->task;
1477 struct gomp_team *team = thr->ts.team;
1478 struct gomp_task_depend_entry elem, *ent = NULL;
1479 struct gomp_taskwait taskwait;
1480 size_t ndepend = (uintptr_t) depend[0];
1481 size_t nout = (uintptr_t) depend[1];
1482 size_t i;
1483 size_t num_awaited = 0;
1484 struct gomp_task *child_task = NULL;
1485 struct gomp_task *to_free = NULL;
1486 int do_wake = 0;
1487
1488 gomp_mutex_lock (&team->task_lock);
1489 for (i = 0; i < ndepend; i++)
1490 {
1491 elem.addr = depend[i + 2];
1492 ent = htab_find (task->depend_hash, &elem);
1493 for (; ent; ent = ent->next)
1494 if (i >= nout && ent->is_in)
1495 continue;
1496 else
1497 {
1498 struct gomp_task *tsk = ent->task;
1499 if (!tsk->parent_depends_on)
1500 {
1501 tsk->parent_depends_on = true;
1502 ++num_awaited;
1503 /* If depenency TSK itself has no dependencies and is
1504 ready to run, move it up front so that we run it as
1505 soon as possible. */
1506 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1507 priority_queue_upgrade_task (tsk, task);
1508 }
1509 }
1510 }
1511 if (num_awaited == 0)
1512 {
1513 gomp_mutex_unlock (&team->task_lock);
1514 return;
1515 }
1516
1517 memset (&taskwait, 0, sizeof (taskwait));
1518 taskwait.n_depend = num_awaited;
1519 gomp_sem_init (&taskwait.taskwait_sem, 0);
1520 task->taskwait = &taskwait;
1521
1522 while (1)
1523 {
1524 bool cancelled = false;
1525 if (taskwait.n_depend == 0)
1526 {
1527 task->taskwait = NULL;
1528 gomp_mutex_unlock (&team->task_lock);
1529 if (to_free)
1530 {
1531 gomp_finish_task (to_free);
1532 free (to_free);
1533 }
1534 gomp_sem_destroy (&taskwait.taskwait_sem);
1535 return;
1536 }
1537
1538 /* Theoretically when we have multiple priorities, we should
1539 chose between the highest priority item in
1540 task->children_queue and team->task_queue here, so we should
1541 use priority_queue_next_task(). However, since we are
1542 running an undeferred task, perhaps that makes all tasks it
1543 depends on undeferred, thus a priority of INF? This would
1544 make it unnecessary to take anything into account here,
1545 but the dependencies.
1546
1547 On the other hand, if we want to use priority_queue_next_task(),
1548 care should be taken to only use priority_queue_remove()
1549 below if the task was actually removed from the children
1550 queue. */
1551 bool ignored;
1552 struct gomp_task *next_task
1553 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1554 PQ_IGNORED, NULL, &ignored);
1555
1556 if (next_task->kind == GOMP_TASK_WAITING)
1557 {
1558 child_task = next_task;
1559 cancelled
1560 = gomp_task_run_pre (child_task, task, team);
1561 if (__builtin_expect (cancelled, 0))
1562 {
1563 if (to_free)
1564 {
1565 gomp_finish_task (to_free);
1566 free (to_free);
1567 to_free = NULL;
1568 }
1569 goto finish_cancelled;
1570 }
1571 }
1572 else
1573 /* All tasks we are waiting for are either running in other
1574 threads, or they are tasks that have not had their
1575 dependencies met (so they're not even in the queue). Wait
1576 for them. */
1577 taskwait.in_depend_wait = true;
1578 gomp_mutex_unlock (&team->task_lock);
1579 if (do_wake)
1580 {
1581 gomp_team_barrier_wake (&team->barrier, do_wake);
1582 do_wake = 0;
1583 }
1584 if (to_free)
1585 {
1586 gomp_finish_task (to_free);
1587 free (to_free);
1588 to_free = NULL;
1589 }
1590 if (child_task)
1591 {
1592 thr->task = child_task;
1593 if (__builtin_expect (child_task->fn == NULL, 0))
1594 {
1595 if (gomp_target_task_fn (child_task->fn_data))
1596 {
1597 thr->task = task;
1598 gomp_mutex_lock (&team->task_lock);
1599 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1600 struct gomp_target_task *ttask
1601 = (struct gomp_target_task *) child_task->fn_data;
1602 /* If GOMP_PLUGIN_target_task_completion has run already
1603 in between gomp_target_task_fn and the mutex lock,
1604 perform the requeuing here. */
1605 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1606 gomp_target_task_completion (team, child_task);
1607 else
1608 ttask->state = GOMP_TARGET_TASK_RUNNING;
1609 child_task = NULL;
1610 continue;
1611 }
1612 }
1613 else
1614 child_task->fn (child_task->fn_data);
1615 thr->task = task;
1616 }
1617 else
1618 gomp_sem_wait (&taskwait.taskwait_sem);
1619 gomp_mutex_lock (&team->task_lock);
1620 if (child_task)
1621 {
1622 finish_cancelled:;
1623 size_t new_tasks
1624 = gomp_task_run_post_handle_depend (child_task, team);
1625 if (child_task->parent_depends_on)
1626 --taskwait.n_depend;
1627
1628 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1629 child_task, MEMMODEL_RELAXED);
1630 child_task->pnode[PQ_CHILDREN].next = NULL;
1631 child_task->pnode[PQ_CHILDREN].prev = NULL;
1632
1633 gomp_clear_parent (&child_task->children_queue);
1634 gomp_task_run_post_remove_taskgroup (child_task);
1635 to_free = child_task;
1636 child_task = NULL;
1637 team->task_count--;
1638 if (new_tasks > 1)
1639 {
1640 do_wake = team->nthreads - team->task_running_count
1641 - !task->in_tied_task;
1642 if (do_wake > new_tasks)
1643 do_wake = new_tasks;
1644 }
1645 }
1646 }
1647 }
1648
1649 /* Called when encountering a taskyield directive. */
1650
1651 void
GOMP_taskyield(void)1652 GOMP_taskyield (void)
1653 {
1654 /* Nothing at the moment. */
1655 }
1656
1657 void
GOMP_taskgroup_start(void)1658 GOMP_taskgroup_start (void)
1659 {
1660 struct gomp_thread *thr = gomp_thread ();
1661 struct gomp_team *team = thr->ts.team;
1662 struct gomp_task *task = thr->task;
1663 struct gomp_taskgroup *taskgroup;
1664
1665 /* If team is NULL, all tasks are executed as
1666 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1667 taskgroup and their descendant tasks will be finished
1668 by the time GOMP_taskgroup_end is called. */
1669 if (team == NULL)
1670 return;
1671 taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1672 taskgroup->prev = task->taskgroup;
1673 priority_queue_init (&taskgroup->taskgroup_queue);
1674 taskgroup->in_taskgroup_wait = false;
1675 taskgroup->cancelled = false;
1676 taskgroup->num_children = 0;
1677 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1678 task->taskgroup = taskgroup;
1679 }
1680
1681 void
GOMP_taskgroup_end(void)1682 GOMP_taskgroup_end (void)
1683 {
1684 struct gomp_thread *thr = gomp_thread ();
1685 struct gomp_team *team = thr->ts.team;
1686 struct gomp_task *task = thr->task;
1687 struct gomp_taskgroup *taskgroup;
1688 struct gomp_task *child_task = NULL;
1689 struct gomp_task *to_free = NULL;
1690 int do_wake = 0;
1691
1692 if (team == NULL)
1693 return;
1694 taskgroup = task->taskgroup;
1695 if (__builtin_expect (taskgroup == NULL, 0)
1696 && thr->ts.level == 0)
1697 {
1698 /* This can happen if GOMP_taskgroup_start is called when
1699 thr->ts.team == NULL, but inside of the taskgroup there
1700 is #pragma omp target nowait that creates an implicit
1701 team with a single thread. In this case, we want to wait
1702 for all outstanding tasks in this team. */
1703 gomp_team_barrier_wait (&team->barrier);
1704 return;
1705 }
1706
1707 /* The acquire barrier on load of taskgroup->num_children here
1708 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1709 It is not necessary that we synchronize with other non-0 writes at
1710 this point, but we must ensure that all writes to memory by a
1711 child thread task work function are seen before we exit from
1712 GOMP_taskgroup_end. */
1713 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1714 goto finish;
1715
1716 bool unused;
1717 gomp_mutex_lock (&team->task_lock);
1718 while (1)
1719 {
1720 bool cancelled = false;
1721 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1722 MEMMODEL_RELAXED))
1723 {
1724 if (taskgroup->num_children)
1725 {
1726 if (priority_queue_empty_p (&task->children_queue,
1727 MEMMODEL_RELAXED))
1728 goto do_wait;
1729 child_task
1730 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1731 PQ_TEAM, &team->task_queue,
1732 &unused);
1733 }
1734 else
1735 {
1736 gomp_mutex_unlock (&team->task_lock);
1737 if (to_free)
1738 {
1739 gomp_finish_task (to_free);
1740 free (to_free);
1741 }
1742 goto finish;
1743 }
1744 }
1745 else
1746 child_task
1747 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1748 PQ_TEAM, &team->task_queue, &unused);
1749 if (child_task->kind == GOMP_TASK_WAITING)
1750 {
1751 cancelled
1752 = gomp_task_run_pre (child_task, child_task->parent, team);
1753 if (__builtin_expect (cancelled, 0))
1754 {
1755 if (to_free)
1756 {
1757 gomp_finish_task (to_free);
1758 free (to_free);
1759 to_free = NULL;
1760 }
1761 goto finish_cancelled;
1762 }
1763 }
1764 else
1765 {
1766 child_task = NULL;
1767 do_wait:
1768 /* All tasks we are waiting for are either running in other
1769 threads, or they are tasks that have not had their
1770 dependencies met (so they're not even in the queue). Wait
1771 for them. */
1772 taskgroup->in_taskgroup_wait = true;
1773 }
1774 gomp_mutex_unlock (&team->task_lock);
1775 if (do_wake)
1776 {
1777 gomp_team_barrier_wake (&team->barrier, do_wake);
1778 do_wake = 0;
1779 }
1780 if (to_free)
1781 {
1782 gomp_finish_task (to_free);
1783 free (to_free);
1784 to_free = NULL;
1785 }
1786 if (child_task)
1787 {
1788 thr->task = child_task;
1789 if (__builtin_expect (child_task->fn == NULL, 0))
1790 {
1791 if (gomp_target_task_fn (child_task->fn_data))
1792 {
1793 thr->task = task;
1794 gomp_mutex_lock (&team->task_lock);
1795 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1796 struct gomp_target_task *ttask
1797 = (struct gomp_target_task *) child_task->fn_data;
1798 /* If GOMP_PLUGIN_target_task_completion has run already
1799 in between gomp_target_task_fn and the mutex lock,
1800 perform the requeuing here. */
1801 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1802 gomp_target_task_completion (team, child_task);
1803 else
1804 ttask->state = GOMP_TARGET_TASK_RUNNING;
1805 child_task = NULL;
1806 continue;
1807 }
1808 }
1809 else
1810 child_task->fn (child_task->fn_data);
1811 thr->task = task;
1812 }
1813 else
1814 gomp_sem_wait (&taskgroup->taskgroup_sem);
1815 gomp_mutex_lock (&team->task_lock);
1816 if (child_task)
1817 {
1818 finish_cancelled:;
1819 size_t new_tasks
1820 = gomp_task_run_post_handle_depend (child_task, team);
1821 gomp_task_run_post_remove_parent (child_task);
1822 gomp_clear_parent (&child_task->children_queue);
1823 gomp_task_run_post_remove_taskgroup (child_task);
1824 to_free = child_task;
1825 child_task = NULL;
1826 team->task_count--;
1827 if (new_tasks > 1)
1828 {
1829 do_wake = team->nthreads - team->task_running_count
1830 - !task->in_tied_task;
1831 if (do_wake > new_tasks)
1832 do_wake = new_tasks;
1833 }
1834 }
1835 }
1836
1837 finish:
1838 task->taskgroup = taskgroup->prev;
1839 gomp_sem_destroy (&taskgroup->taskgroup_sem);
1840 free (taskgroup);
1841 }
1842
1843 int
omp_in_final(void)1844 omp_in_final (void)
1845 {
1846 struct gomp_thread *thr = gomp_thread ();
1847 return thr->task && thr->task->final_task;
1848 }
1849
1850 ialias (omp_in_final)
1851