1 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintenance of tasks in response to task
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include "gomp-constants.h"
34 
35 typedef struct gomp_task_depend_entry *hash_entry_type;
36 
37 static inline void *
htab_alloc(size_t size)38 htab_alloc (size_t size)
39 {
40   return gomp_malloc (size);
41 }
42 
43 static inline void
htab_free(void * ptr)44 htab_free (void *ptr)
45 {
46   free (ptr);
47 }
48 
49 #include "hashtab.h"
50 
51 static inline hashval_t
htab_hash(hash_entry_type element)52 htab_hash (hash_entry_type element)
53 {
54   return hash_pointer (element->addr);
55 }
56 
57 static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)58 htab_eq (hash_entry_type x, hash_entry_type y)
59 {
60   return x->addr == y->addr;
61 }
62 
63 /* Create a new task data structure.  */
64 
65 void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)66 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
67 		struct gomp_task_icv *prev_icv)
68 {
69   /* It would seem that using memset here would be a win, but it turns
70      out that partially filling gomp_task allows us to keep the
71      overhead of task creation low.  In the nqueens-1.c test, for a
72      sufficiently large N, we drop the overhead from 5-6% to 1%.
73 
74      Note, the nqueens-1.c test in serial mode is a good test to
75      benchmark the overhead of creating tasks as there are millions of
76      tiny tasks created that all run undeferred.  */
77   task->parent = parent_task;
78   priority_queue_init (&task->children_queue);
79   task->taskgroup = NULL;
80   task->dependers = NULL;
81   task->depend_hash = NULL;
82   task->taskwait = NULL;
83   task->depend_count = 0;
84   task->completion_sem = NULL;
85   task->deferred_p = false;
86   task->icv = *prev_icv;
87   task->kind = GOMP_TASK_IMPLICIT;
88   task->in_tied_task = false;
89   task->final_task = false;
90   task->copy_ctors_done = false;
91   task->parent_depends_on = false;
92 }
93 
94 /* Clean up a task, after completing it.  */
95 
96 void
gomp_end_task(void)97 gomp_end_task (void)
98 {
99   struct gomp_thread *thr = gomp_thread ();
100   struct gomp_task *task = thr->task;
101 
102   gomp_finish_task (task);
103   thr->task = task->parent;
104 }
105 
106 /* Clear the parent field of every task in LIST.  */
107 
108 static inline void
gomp_clear_parent_in_list(struct priority_list * list)109 gomp_clear_parent_in_list (struct priority_list *list)
110 {
111   struct priority_node *p = list->tasks;
112   if (p)
113     do
114       {
115 	priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
116 	p = p->next;
117       }
118     while (p != list->tasks);
119 }
120 
121 /* Splay tree version of gomp_clear_parent_in_list.
122 
123    Clear the parent field of every task in NODE within SP, and free
124    the node when done.  */
125 
126 static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)127 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
128 {
129   if (!node)
130     return;
131   prio_splay_tree_node left = node->left, right = node->right;
132   gomp_clear_parent_in_list (&node->key.l);
133 #if _LIBGOMP_CHECKING_
134   memset (node, 0xaf, sizeof (*node));
135 #endif
136   /* No need to remove the node from the tree.  We're nuking
137      everything, so just free the nodes and our caller can clear the
138      entire splay tree.  */
139   free (node);
140   gomp_clear_parent_in_tree (sp, left);
141   gomp_clear_parent_in_tree (sp, right);
142 }
143 
144 /* Clear the parent field of every task in Q and remove every task
145    from Q.  */
146 
147 static inline void
gomp_clear_parent(struct priority_queue * q)148 gomp_clear_parent (struct priority_queue *q)
149 {
150   if (priority_queue_multi_p (q))
151     {
152       gomp_clear_parent_in_tree (&q->t, q->t.root);
153       /* All the nodes have been cleared in gomp_clear_parent_in_tree.
154 	 No need to remove anything.  We can just nuke everything.  */
155       q->t.root = NULL;
156     }
157   else
158     gomp_clear_parent_in_list (&q->l);
159 }
160 
161 /* Helper function for GOMP_task and gomp_create_target_task.
162 
163    For a TASK with in/out dependencies, fill in the various dependency
164    queues.  PARENT is the parent of said task.  DEPEND is as in
165    GOMP_task.  */
166 
167 static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)168 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
169 			 void **depend)
170 {
171   size_t ndepend = (uintptr_t) depend[0];
172   size_t i;
173   hash_entry_type ent;
174 
175   if (ndepend)
176     {
177       /* depend[0] is total # */
178       size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
179       /* ndepend - nout is # of in: */
180       for (i = 0; i < ndepend; i++)
181 	{
182 	  task->depend[i].addr = depend[2 + i];
183 	  task->depend[i].is_in = i >= nout;
184 	}
185     }
186   else
187     {
188       ndepend = (uintptr_t) depend[1]; /* total # */
189       size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
190       size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
191       /* For now we treat mutexinoutset like out, which is compliant, but
192 	 inefficient.  */
193       size_t nin = (uintptr_t) depend[4]; /* # of in: */
194       /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
195       size_t normal = nout + nmutexinoutset + nin;
196       size_t n = 0;
197       for (i = normal; i < ndepend; i++)
198 	{
199 	  void **d = (void **) (uintptr_t) depend[5 + i];
200 	  switch ((uintptr_t) d[1])
201 	    {
202 	    case GOMP_DEPEND_OUT:
203 	    case GOMP_DEPEND_INOUT:
204 	    case GOMP_DEPEND_MUTEXINOUTSET:
205 	      break;
206 	    case GOMP_DEPEND_IN:
207 	      continue;
208 	    default:
209 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
210 			  (int) (uintptr_t) d[1]);
211 	    }
212 	  task->depend[n].addr = d[0];
213 	  task->depend[n++].is_in = 0;
214 	}
215       for (i = 0; i < normal; i++)
216 	{
217 	  task->depend[n].addr = depend[5 + i];
218 	  task->depend[n++].is_in = i >= nout + nmutexinoutset;
219 	}
220       for (i = normal; i < ndepend; i++)
221 	{
222 	  void **d = (void **) (uintptr_t) depend[5 + i];
223 	  if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
224 	    continue;
225 	  task->depend[n].addr = d[0];
226 	  task->depend[n++].is_in = 1;
227 	}
228     }
229   task->depend_count = ndepend;
230   task->num_dependees = 0;
231   if (parent->depend_hash == NULL)
232     parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
233   for (i = 0; i < ndepend; i++)
234     {
235       task->depend[i].next = NULL;
236       task->depend[i].prev = NULL;
237       task->depend[i].task = task;
238       task->depend[i].redundant = false;
239       task->depend[i].redundant_out = false;
240 
241       hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
242 					      &task->depend[i], INSERT);
243       hash_entry_type out = NULL, last = NULL;
244       if (*slot)
245 	{
246 	  /* If multiple depends on the same task are the same, all but the
247 	     first one are redundant.  As inout/out come first, if any of them
248 	     is inout/out, it will win, which is the right semantics.  */
249 	  if ((*slot)->task == task)
250 	    {
251 	      task->depend[i].redundant = true;
252 	      continue;
253 	    }
254 	  for (ent = *slot; ent; ent = ent->next)
255 	    {
256 	      if (ent->redundant_out)
257 		break;
258 
259 	      last = ent;
260 
261 	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
262 	      if (task->depend[i].is_in && ent->is_in)
263 		continue;
264 
265 	      if (!ent->is_in)
266 		out = ent;
267 
268 	      struct gomp_task *tsk = ent->task;
269 	      if (tsk->dependers == NULL)
270 		{
271 		  tsk->dependers
272 		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
273 				   + 6 * sizeof (struct gomp_task *));
274 		  tsk->dependers->n_elem = 1;
275 		  tsk->dependers->allocated = 6;
276 		  tsk->dependers->elem[0] = task;
277 		  task->num_dependees++;
278 		  continue;
279 		}
280 	      /* We already have some other dependency on tsk from earlier
281 		 depend clause.  */
282 	      else if (tsk->dependers->n_elem
283 		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
284 			   == task))
285 		continue;
286 	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
287 		{
288 		  tsk->dependers->allocated
289 		    = tsk->dependers->allocated * 2 + 2;
290 		  tsk->dependers
291 		    = gomp_realloc (tsk->dependers,
292 				    sizeof (struct gomp_dependers_vec)
293 				    + (tsk->dependers->allocated
294 				       * sizeof (struct gomp_task *)));
295 		}
296 	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
297 	      task->num_dependees++;
298 	    }
299 	  task->depend[i].next = *slot;
300 	  (*slot)->prev = &task->depend[i];
301 	}
302       *slot = &task->depend[i];
303 
304       /* There is no need to store more than one depend({,in}out:) task per
305 	 address in the hash table chain for the purpose of creation of
306 	 deferred tasks, because each out depends on all earlier outs, thus it
307 	 is enough to record just the last depend({,in}out:).  For depend(in:),
308 	 we need to keep all of the previous ones not terminated yet, because
309 	 a later depend({,in}out:) might need to depend on all of them.  So, if
310 	 the new task's clause is depend({,in}out:), we know there is at most
311 	 one other depend({,in}out:) clause in the list (out).  For
312 	 non-deferred tasks we want to see all outs, so they are moved to the
313 	 end of the chain, after first redundant_out entry all following
314 	 entries should be redundant_out.  */
315       if (!task->depend[i].is_in && out)
316 	{
317 	  if (out != last)
318 	    {
319 	      out->next->prev = out->prev;
320 	      out->prev->next = out->next;
321 	      out->next = last->next;
322 	      out->prev = last;
323 	      last->next = out;
324 	      if (out->next)
325 		out->next->prev = out;
326 	    }
327 	  out->redundant_out = true;
328 	}
329     }
330 }
331 
332 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
333    false, then we must not delay in executing the task.  If UNTIED is true,
334    then the task may be executed by any member of the team.
335 
336    DEPEND is an array containing:
337      if depend[0] is non-zero, then:
338 	depend[0]: number of depend elements.
339 	depend[1]: number of depend elements of type "out/inout".
340 	depend[2..N+1]: address of [1..N]th depend element.
341      otherwise, when depend[0] is zero, then:
342 	depend[1]: number of depend elements.
343 	depend[2]: number of depend elements of type "out/inout".
344 	depend[3]: number of depend elements of type "mutexinoutset".
345 	depend[4]: number of depend elements of type "in".
346 	depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
347 	depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
348 		   omp_depend_t objects.  */
349 
350 void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority_arg,void * detach)351 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
352 	   long arg_size, long arg_align, bool if_clause, unsigned flags,
353 	   void **depend, int priority_arg, void *detach)
354 {
355   struct gomp_thread *thr = gomp_thread ();
356   struct gomp_team *team = thr->ts.team;
357   int priority = 0;
358 
359 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
360   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
361      tied to one thread all the time.  This means UNTIED tasks must be
362      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
363      might be running on different thread than FN.  */
364   if (cpyfn)
365     if_clause = false;
366   flags &= ~GOMP_TASK_FLAG_UNTIED;
367 #endif
368 
369   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
370   if (__builtin_expect (gomp_cancel_var, 0) && team)
371     {
372       if (gomp_team_barrier_cancelled (&team->barrier))
373 	return;
374       if (thr->task->taskgroup)
375 	{
376 	  if (thr->task->taskgroup->cancelled)
377 	    return;
378 	  if (thr->task->taskgroup->workshare
379 	      && thr->task->taskgroup->prev
380 	      && thr->task->taskgroup->prev->cancelled)
381 	    return;
382 	}
383     }
384 
385   if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0))
386     {
387       priority = priority_arg;
388       if (priority > gomp_max_task_priority_var)
389 	priority = gomp_max_task_priority_var;
390     }
391 
392   if (!if_clause || team == NULL
393       || (thr->task && thr->task->final_task)
394       || team->task_count > 64 * team->nthreads)
395     {
396       struct gomp_task task;
397       gomp_sem_t completion_sem;
398 
399       /* If there are depend clauses and earlier deferred sibling tasks
400 	 with depend clauses, check if there isn't a dependency.  If there
401 	 is, we need to wait for them.  There is no need to handle
402 	 depend clauses for non-deferred tasks other than this, because
403 	 the parent task is suspended until the child task finishes and thus
404 	 it can't start further child tasks.  */
405       if ((flags & GOMP_TASK_FLAG_DEPEND)
406 	  && thr->task && thr->task->depend_hash)
407 	gomp_task_maybe_wait_for_dependencies (depend);
408 
409       gomp_init_task (&task, thr->task, gomp_icv (false));
410       task.kind = GOMP_TASK_UNDEFERRED;
411       task.final_task = (thr->task && thr->task->final_task)
412 			|| (flags & GOMP_TASK_FLAG_FINAL);
413       task.priority = priority;
414 
415       if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
416 	{
417 	  gomp_sem_init (&completion_sem, 0);
418 	  task.completion_sem = &completion_sem;
419 	  *(void **) detach = &task;
420 	  if (data)
421 	    *(void **) data = &task;
422 
423 	  gomp_debug (0, "Thread %d: new event: %p\n",
424 		      thr->ts.team_id, &task);
425 	}
426 
427       if (thr->task)
428 	{
429 	  task.in_tied_task = thr->task->in_tied_task;
430 	  task.taskgroup = thr->task->taskgroup;
431 	}
432       thr->task = &task;
433       if (__builtin_expect (cpyfn != NULL, 0))
434 	{
435 	  char buf[arg_size + arg_align - 1];
436 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
437 				& ~(uintptr_t) (arg_align - 1));
438 	  cpyfn (arg, data);
439 	  fn (arg);
440 	}
441       else
442 	fn (data);
443 
444       if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
445 	{
446 	  gomp_sem_wait (&completion_sem);
447 	  gomp_sem_destroy (&completion_sem);
448 	}
449 
450       /* Access to "children" is normally done inside a task_lock
451 	 mutex region, but the only way this particular task.children
452 	 can be set is if this thread's task work function (fn)
453 	 creates children.  So since the setter is *this* thread, we
454 	 need no barriers here when testing for non-NULL.  We can have
455 	 task.children set by the current thread then changed by a
456 	 child thread, but seeing a stale non-NULL value is not a
457 	 problem.  Once past the task_lock acquisition, this thread
458 	 will see the real value of task.children.  */
459       if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
460 	{
461 	  gomp_mutex_lock (&team->task_lock);
462 	  gomp_clear_parent (&task.children_queue);
463 	  gomp_mutex_unlock (&team->task_lock);
464 	}
465       gomp_end_task ();
466     }
467   else
468     {
469       struct gomp_task *task;
470       struct gomp_task *parent = thr->task;
471       struct gomp_taskgroup *taskgroup = parent->taskgroup;
472       char *arg;
473       bool do_wake;
474       size_t depend_size = 0;
475 
476       if (flags & GOMP_TASK_FLAG_DEPEND)
477 	depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
478 		       * sizeof (struct gomp_task_depend_entry));
479       task = gomp_malloc (sizeof (*task) + depend_size
480 			  + arg_size + arg_align - 1);
481       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
482 		      & ~(uintptr_t) (arg_align - 1));
483       gomp_init_task (task, parent, gomp_icv (false));
484       task->priority = priority;
485       task->kind = GOMP_TASK_UNDEFERRED;
486       task->in_tied_task = parent->in_tied_task;
487       task->taskgroup = taskgroup;
488       task->deferred_p = true;
489       if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
490 	{
491 	  task->detach_team = team;
492 
493 	  *(void **) detach = task;
494 	  if (data)
495 	    *(void **) data = task;
496 
497 	  gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task);
498 	}
499       thr->task = task;
500       if (cpyfn)
501 	{
502 	  cpyfn (arg, data);
503 	  task->copy_ctors_done = true;
504 	}
505       else
506 	memcpy (arg, data, arg_size);
507       thr->task = parent;
508       task->kind = GOMP_TASK_WAITING;
509       task->fn = fn;
510       task->fn_data = arg;
511       task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
512       gomp_mutex_lock (&team->task_lock);
513       /* If parallel or taskgroup has been cancelled, don't start new
514 	 tasks.  */
515       if (__builtin_expect (gomp_cancel_var, 0)
516 	  && !task->copy_ctors_done)
517 	{
518 	  if (gomp_team_barrier_cancelled (&team->barrier))
519 	    {
520 	    do_cancel:
521 	      gomp_mutex_unlock (&team->task_lock);
522 	      gomp_finish_task (task);
523 	      free (task);
524 	      return;
525 	    }
526 	  if (taskgroup)
527 	    {
528 	      if (taskgroup->cancelled)
529 		goto do_cancel;
530 	      if (taskgroup->workshare
531 		  && taskgroup->prev
532 		  && taskgroup->prev->cancelled)
533 		goto do_cancel;
534 	    }
535 	}
536       if (taskgroup)
537 	taskgroup->num_children++;
538       if (depend_size)
539 	{
540 	  gomp_task_handle_depend (task, parent, depend);
541 	  if (task->num_dependees)
542 	    {
543 	      /* Tasks that depend on other tasks are not put into the
544 		 various waiting queues, so we are done for now.  Said
545 		 tasks are instead put into the queues via
546 		 gomp_task_run_post_handle_dependers() after their
547 		 dependencies have been satisfied.  After which, they
548 		 can be picked up by the various scheduling
549 		 points.  */
550 	      gomp_mutex_unlock (&team->task_lock);
551 	      return;
552 	    }
553 	}
554 
555       priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
556 			     task, priority,
557 			     PRIORITY_INSERT_BEGIN,
558 			     /*adjust_parent_depends_on=*/false,
559 			     task->parent_depends_on);
560       if (taskgroup)
561 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
562 			       task, priority,
563 			       PRIORITY_INSERT_BEGIN,
564 			       /*adjust_parent_depends_on=*/false,
565 			       task->parent_depends_on);
566 
567       priority_queue_insert (PQ_TEAM, &team->task_queue,
568 			     task, priority,
569 			     PRIORITY_INSERT_END,
570 			     /*adjust_parent_depends_on=*/false,
571 			     task->parent_depends_on);
572 
573       ++team->task_count;
574       ++team->task_queued_count;
575       gomp_team_barrier_set_task_pending (&team->barrier);
576       do_wake = team->task_running_count + !parent->in_tied_task
577 		< team->nthreads;
578       gomp_mutex_unlock (&team->task_lock);
579       if (do_wake)
580 	gomp_team_barrier_wake (&team->barrier, 1);
581     }
582 }
583 
584 ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)585 ialias (GOMP_taskgroup_end)
586 ialias (GOMP_taskgroup_reduction_register)
587 
588 #define TYPE long
589 #define UTYPE unsigned long
590 #define TYPE_is_long 1
591 #include "taskloop.c"
592 #undef TYPE
593 #undef UTYPE
594 #undef TYPE_is_long
595 
596 #define TYPE unsigned long long
597 #define UTYPE TYPE
598 #define GOMP_taskloop GOMP_taskloop_ull
599 #include "taskloop.c"
600 #undef TYPE
601 #undef UTYPE
602 #undef GOMP_taskloop
603 
604 static void inline
605 priority_queue_move_task_first (enum priority_queue_type type,
606 				struct priority_queue *head,
607 				struct gomp_task *task)
608 {
609 #if _LIBGOMP_CHECKING_
610   if (!priority_queue_task_in_queue_p (type, head, task))
611     gomp_fatal ("Attempt to move first missing task %p", task);
612 #endif
613   struct priority_list *list;
614   if (priority_queue_multi_p (head))
615     {
616       list = priority_queue_lookup_priority (head, task->priority);
617 #if _LIBGOMP_CHECKING_
618       if (!list)
619 	gomp_fatal ("Unable to find priority %d", task->priority);
620 #endif
621     }
622   else
623     list = &head->l;
624   priority_list_remove (list, task_to_priority_node (type, task), 0);
625   priority_list_insert (type, list, task, task->priority,
626 			PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
627 			task->parent_depends_on);
628 }
629 
630 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
631    with team->task_lock held, or is executed in the thread that called
632    gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
633    run before it acquires team->task_lock.  */
634 
635 static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)636 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
637 {
638   struct gomp_task *parent = task->parent;
639   if (parent)
640     priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
641 				    task);
642 
643   struct gomp_taskgroup *taskgroup = task->taskgroup;
644   if (taskgroup)
645     priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
646 				    task);
647 
648   priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
649 			 PRIORITY_INSERT_BEGIN, false,
650 			 task->parent_depends_on);
651   task->kind = GOMP_TASK_WAITING;
652   if (parent && parent->taskwait)
653     {
654       if (parent->taskwait->in_taskwait)
655 	{
656 	  /* One more task has had its dependencies met.
657 	     Inform any waiters.  */
658 	  parent->taskwait->in_taskwait = false;
659 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
660 	}
661       else if (parent->taskwait->in_depend_wait)
662 	{
663 	  /* One more task has had its dependencies met.
664 	     Inform any waiters.  */
665 	  parent->taskwait->in_depend_wait = false;
666 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
667 	}
668     }
669   if (taskgroup && taskgroup->in_taskgroup_wait)
670     {
671       /* One more task has had its dependencies met.
672 	 Inform any waiters.  */
673       taskgroup->in_taskgroup_wait = false;
674       gomp_sem_post (&taskgroup->taskgroup_sem);
675     }
676 
677   ++team->task_queued_count;
678   gomp_team_barrier_set_task_pending (&team->barrier);
679   /* I'm afraid this can't be done after releasing team->task_lock,
680      as gomp_target_task_completion is run from unrelated thread and
681      therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
682      the team could be gone already.  */
683   if (team->nthreads > team->task_running_count)
684     gomp_team_barrier_wake (&team->barrier, 1);
685 }
686 
687 /* Signal that a target task TTASK has completed the asynchronously
688    running phase and should be requeued as a task to handle the
689    variable unmapping.  */
690 
691 void
GOMP_PLUGIN_target_task_completion(void * data)692 GOMP_PLUGIN_target_task_completion (void *data)
693 {
694   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
695   struct gomp_task *task = ttask->task;
696   struct gomp_team *team = ttask->team;
697 
698   gomp_mutex_lock (&team->task_lock);
699   if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
700     {
701       ttask->state = GOMP_TARGET_TASK_FINISHED;
702       gomp_mutex_unlock (&team->task_lock);
703       return;
704     }
705   ttask->state = GOMP_TARGET_TASK_FINISHED;
706   gomp_target_task_completion (team, task);
707   gomp_mutex_unlock (&team->task_lock);
708 }
709 
710 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
711 
712 /* Called for nowait target tasks.  */
713 
714 bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)715 gomp_create_target_task (struct gomp_device_descr *devicep,
716 			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
717 			 size_t *sizes, unsigned short *kinds,
718 			 unsigned int flags, void **depend, void **args,
719 			 enum gomp_target_task_state state)
720 {
721   struct gomp_thread *thr = gomp_thread ();
722   struct gomp_team *team = thr->ts.team;
723 
724   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
725   if (__builtin_expect (gomp_cancel_var, 0) && team)
726     {
727       if (gomp_team_barrier_cancelled (&team->barrier))
728 	return true;
729       if (thr->task->taskgroup)
730 	{
731 	  if (thr->task->taskgroup->cancelled)
732 	    return true;
733 	  if (thr->task->taskgroup->workshare
734 	      && thr->task->taskgroup->prev
735 	      && thr->task->taskgroup->prev->cancelled)
736 	    return true;
737 	}
738     }
739 
740   struct gomp_target_task *ttask;
741   struct gomp_task *task;
742   struct gomp_task *parent = thr->task;
743   struct gomp_taskgroup *taskgroup = parent->taskgroup;
744   bool do_wake;
745   size_t depend_size = 0;
746   uintptr_t depend_cnt = 0;
747   size_t tgt_align = 0, tgt_size = 0;
748 
749   if (depend != NULL)
750     {
751       depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
752       depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
753     }
754   if (fn)
755     {
756       /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
757 	 firstprivate on the target task.  */
758       size_t i;
759       for (i = 0; i < mapnum; i++)
760 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
761 	  {
762 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
763 	    if (tgt_align < align)
764 	      tgt_align = align;
765 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
766 	    tgt_size += sizes[i];
767 	  }
768       if (tgt_align)
769 	tgt_size += tgt_align - 1;
770       else
771 	tgt_size = 0;
772     }
773 
774   task = gomp_malloc (sizeof (*task) + depend_size
775 		      + sizeof (*ttask)
776 		      + mapnum * (sizeof (void *) + sizeof (size_t)
777 				  + sizeof (unsigned short))
778 		      + tgt_size);
779   gomp_init_task (task, parent, gomp_icv (false));
780   task->priority = 0;
781   task->kind = GOMP_TASK_WAITING;
782   task->in_tied_task = parent->in_tied_task;
783   task->taskgroup = taskgroup;
784   ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
785   ttask->devicep = devicep;
786   ttask->fn = fn;
787   ttask->mapnum = mapnum;
788   ttask->args = args;
789   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
790   ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
791   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
792   ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
793   memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
794   if (tgt_align)
795     {
796       char *tgt = (char *) &ttask->kinds[mapnum];
797       size_t i;
798       uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
799       if (al)
800 	tgt += tgt_align - al;
801       tgt_size = 0;
802       for (i = 0; i < mapnum; i++)
803 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
804 	  {
805 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
806 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
807 	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
808 	    ttask->hostaddrs[i] = tgt + tgt_size;
809 	    tgt_size = tgt_size + sizes[i];
810 	  }
811     }
812   ttask->flags = flags;
813   ttask->state = state;
814   ttask->task = task;
815   ttask->team = team;
816   task->fn = NULL;
817   task->fn_data = ttask;
818   task->final_task = 0;
819   gomp_mutex_lock (&team->task_lock);
820   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
821   if (__builtin_expect (gomp_cancel_var, 0))
822     {
823       if (gomp_team_barrier_cancelled (&team->barrier))
824 	{
825 	do_cancel:
826 	  gomp_mutex_unlock (&team->task_lock);
827 	  gomp_finish_task (task);
828 	  free (task);
829 	  return true;
830 	}
831       if (taskgroup)
832 	{
833 	  if (taskgroup->cancelled)
834 	    goto do_cancel;
835 	  if (taskgroup->workshare
836 	      && taskgroup->prev
837 	      && taskgroup->prev->cancelled)
838 	    goto do_cancel;
839 	}
840     }
841   if (depend_size)
842     {
843       gomp_task_handle_depend (task, parent, depend);
844       if (task->num_dependees)
845 	{
846 	  if (taskgroup)
847 	    taskgroup->num_children++;
848 	  gomp_mutex_unlock (&team->task_lock);
849 	  return true;
850 	}
851     }
852   if (state == GOMP_TARGET_TASK_DATA)
853     {
854       gomp_task_run_post_handle_depend_hash (task);
855       gomp_mutex_unlock (&team->task_lock);
856       gomp_finish_task (task);
857       free (task);
858       return false;
859     }
860   if (taskgroup)
861     taskgroup->num_children++;
862   /* For async offloading, if we don't need to wait for dependencies,
863      run the gomp_target_task_fn right away, essentially schedule the
864      mapping part of the task in the current thread.  */
865   if (devicep != NULL
866       && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
867     {
868       priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
869 			     PRIORITY_INSERT_END,
870 			     /*adjust_parent_depends_on=*/false,
871 			     task->parent_depends_on);
872       if (taskgroup)
873 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
874 			       task, 0, PRIORITY_INSERT_END,
875 			       /*adjust_parent_depends_on=*/false,
876 			       task->parent_depends_on);
877       task->pnode[PQ_TEAM].next = NULL;
878       task->pnode[PQ_TEAM].prev = NULL;
879       task->kind = GOMP_TASK_TIED;
880       ++team->task_count;
881       gomp_mutex_unlock (&team->task_lock);
882 
883       thr->task = task;
884       gomp_target_task_fn (task->fn_data);
885       thr->task = parent;
886 
887       gomp_mutex_lock (&team->task_lock);
888       task->kind = GOMP_TASK_ASYNC_RUNNING;
889       /* If GOMP_PLUGIN_target_task_completion has run already
890 	 in between gomp_target_task_fn and the mutex lock,
891 	 perform the requeuing here.  */
892       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
893 	gomp_target_task_completion (team, task);
894       else
895 	ttask->state = GOMP_TARGET_TASK_RUNNING;
896       gomp_mutex_unlock (&team->task_lock);
897       return true;
898     }
899   priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
900 			 PRIORITY_INSERT_BEGIN,
901 			 /*adjust_parent_depends_on=*/false,
902 			 task->parent_depends_on);
903   if (taskgroup)
904     priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
905 			   PRIORITY_INSERT_BEGIN,
906 			   /*adjust_parent_depends_on=*/false,
907 			   task->parent_depends_on);
908   priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
909 			 PRIORITY_INSERT_END,
910 			 /*adjust_parent_depends_on=*/false,
911 			 task->parent_depends_on);
912   ++team->task_count;
913   ++team->task_queued_count;
914   gomp_team_barrier_set_task_pending (&team->barrier);
915   do_wake = team->task_running_count + !parent->in_tied_task
916 	    < team->nthreads;
917   gomp_mutex_unlock (&team->task_lock);
918   if (do_wake)
919     gomp_team_barrier_wake (&team->barrier, 1);
920   return true;
921 }
922 
923 /* Given a parent_depends_on task in LIST, move it to the front of its
924    priority so it is run as soon as possible.
925 
926    Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
927 
928    We rearrange the queue such that all parent_depends_on tasks are
929    first, and last_parent_depends_on points to the last such task we
930    rearranged.  For example, given the following tasks in a queue
931    where PD[123] are the parent_depends_on tasks:
932 
933 	task->children
934 	|
935 	V
936 	C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
937 
938 	We rearrange such that:
939 
940 	task->children
941 	|	       +--- last_parent_depends_on
942 	|	       |
943 	V	       V
944 	PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
945 
946 static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)947 priority_list_upgrade_task (struct priority_list *list,
948 			    struct priority_node *node)
949 {
950   struct priority_node *last_parent_depends_on
951     = list->last_parent_depends_on;
952   if (last_parent_depends_on)
953     {
954       node->prev->next = node->next;
955       node->next->prev = node->prev;
956       node->prev = last_parent_depends_on;
957       node->next = last_parent_depends_on->next;
958       node->prev->next = node;
959       node->next->prev = node;
960     }
961   else if (node != list->tasks)
962     {
963       node->prev->next = node->next;
964       node->next->prev = node->prev;
965       node->prev = list->tasks->prev;
966       node->next = list->tasks;
967       list->tasks = node;
968       node->prev->next = node;
969       node->next->prev = node;
970     }
971   list->last_parent_depends_on = node;
972 }
973 
974 /* Given a parent_depends_on TASK in its parent's children_queue, move
975    it to the front of its priority so it is run as soon as possible.
976 
977    PARENT is passed as an optimization.
978 
979    (This function could be defined in priority_queue.c, but we want it
980    inlined, and putting it in priority_queue.h is not an option, given
981    that gomp_task has not been properly defined at that point).  */
982 
983 static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)984 priority_queue_upgrade_task (struct gomp_task *task,
985 			     struct gomp_task *parent)
986 {
987   struct priority_queue *head = &parent->children_queue;
988   struct priority_node *node = &task->pnode[PQ_CHILDREN];
989 #if _LIBGOMP_CHECKING_
990   if (!task->parent_depends_on)
991     gomp_fatal ("priority_queue_upgrade_task: task must be a "
992 		"parent_depends_on task");
993   if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
994     gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
995 #endif
996   if (priority_queue_multi_p (head))
997     {
998       struct priority_list *list
999 	= priority_queue_lookup_priority (head, task->priority);
1000       priority_list_upgrade_task (list, node);
1001     }
1002   else
1003     priority_list_upgrade_task (&head->l, node);
1004 }
1005 
1006 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1007    the way in LIST so that other tasks can be considered for
1008    execution.  LIST contains tasks of type TYPE.
1009 
1010    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1011    if applicable.  */
1012 
1013 static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)1014 priority_list_downgrade_task (enum priority_queue_type type,
1015 			      struct priority_list *list,
1016 			      struct gomp_task *child_task)
1017 {
1018   struct priority_node *node = task_to_priority_node (type, child_task);
1019   if (list->tasks == node)
1020     list->tasks = node->next;
1021   else if (node->next != list->tasks)
1022     {
1023       /* The task in NODE is about to become TIED and TIED tasks
1024 	 cannot come before WAITING tasks.  If we're about to
1025 	 leave the queue in such an indeterminate state, rewire
1026 	 things appropriately.  However, a TIED task at the end is
1027 	 perfectly fine.  */
1028       struct gomp_task *next_task = priority_node_to_task (type, node->next);
1029       if (next_task->kind == GOMP_TASK_WAITING)
1030 	{
1031 	  /* Remove from list.  */
1032 	  node->prev->next = node->next;
1033 	  node->next->prev = node->prev;
1034 	  /* Rewire at the end.  */
1035 	  node->next = list->tasks;
1036 	  node->prev = list->tasks->prev;
1037 	  list->tasks->prev->next = node;
1038 	  list->tasks->prev = node;
1039 	}
1040     }
1041 
1042   /* If the current task is the last_parent_depends_on for its
1043      priority, adjust last_parent_depends_on appropriately.  */
1044   if (__builtin_expect (child_task->parent_depends_on, 0)
1045       && list->last_parent_depends_on == node)
1046     {
1047       struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1048       if (node->prev != node
1049 	  && prev_child->kind == GOMP_TASK_WAITING
1050 	  && prev_child->parent_depends_on)
1051 	list->last_parent_depends_on = node->prev;
1052       else
1053 	{
1054 	  /* There are no more parent_depends_on entries waiting
1055 	     to run, clear the list.  */
1056 	  list->last_parent_depends_on = NULL;
1057 	}
1058     }
1059 }
1060 
1061 /* Given a TASK in HEAD that is about to be executed, move it out of
1062    the way so that other tasks can be considered for execution.  HEAD
1063    contains tasks of type TYPE.
1064 
1065    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1066    if applicable.
1067 
1068    (This function could be defined in priority_queue.c, but we want it
1069    inlined, and putting it in priority_queue.h is not an option, given
1070    that gomp_task has not been properly defined at that point).  */
1071 
1072 static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)1073 priority_queue_downgrade_task (enum priority_queue_type type,
1074 			       struct priority_queue *head,
1075 			       struct gomp_task *task)
1076 {
1077 #if _LIBGOMP_CHECKING_
1078   if (!priority_queue_task_in_queue_p (type, head, task))
1079     gomp_fatal ("Attempt to downgrade missing task %p", task);
1080 #endif
1081   if (priority_queue_multi_p (head))
1082     {
1083       struct priority_list *list
1084 	= priority_queue_lookup_priority (head, task->priority);
1085       priority_list_downgrade_task (type, list, task);
1086     }
1087   else
1088     priority_list_downgrade_task (type, &head->l, task);
1089 }
1090 
1091 /* Setup CHILD_TASK to execute.  This is done by setting the task to
1092    TIED, and updating all relevant queues so that CHILD_TASK is no
1093    longer chosen for scheduling.  Also, remove CHILD_TASK from the
1094    overall team task queue entirely.
1095 
1096    Return TRUE if task or its containing taskgroup has been
1097    cancelled.  */
1098 
1099 static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)1100 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1101 		   struct gomp_team *team)
1102 {
1103 #if _LIBGOMP_CHECKING_
1104   if (child_task->parent)
1105     priority_queue_verify (PQ_CHILDREN,
1106 			   &child_task->parent->children_queue, true);
1107   if (child_task->taskgroup)
1108     priority_queue_verify (PQ_TASKGROUP,
1109 			   &child_task->taskgroup->taskgroup_queue, false);
1110   priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1111 #endif
1112 
1113   /* Task is about to go tied, move it out of the way.  */
1114   if (parent)
1115     priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1116 				   child_task);
1117 
1118   /* Task is about to go tied, move it out of the way.  */
1119   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1120   if (taskgroup)
1121     priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1122 				   child_task);
1123 
1124   priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1125 			 MEMMODEL_RELAXED);
1126   child_task->pnode[PQ_TEAM].next = NULL;
1127   child_task->pnode[PQ_TEAM].prev = NULL;
1128   child_task->kind = GOMP_TASK_TIED;
1129 
1130   if (--team->task_queued_count == 0)
1131     gomp_team_barrier_clear_task_pending (&team->barrier);
1132   if (__builtin_expect (gomp_cancel_var, 0)
1133       && !child_task->copy_ctors_done)
1134     {
1135       if (gomp_team_barrier_cancelled (&team->barrier))
1136 	return true;
1137       if (taskgroup)
1138 	{
1139 	  if (taskgroup->cancelled)
1140 	    return true;
1141 	  if (taskgroup->workshare
1142 	      && taskgroup->prev
1143 	      && taskgroup->prev->cancelled)
1144 	    return true;
1145 	}
1146     }
1147   return false;
1148 }
1149 
1150 static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)1151 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1152 {
1153   struct gomp_task *parent = child_task->parent;
1154   size_t i;
1155 
1156   for (i = 0; i < child_task->depend_count; i++)
1157     if (!child_task->depend[i].redundant)
1158       {
1159 	if (child_task->depend[i].next)
1160 	  child_task->depend[i].next->prev = child_task->depend[i].prev;
1161 	if (child_task->depend[i].prev)
1162 	  child_task->depend[i].prev->next = child_task->depend[i].next;
1163 	else
1164 	  {
1165 	    hash_entry_type *slot
1166 	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1167 				NO_INSERT);
1168 	    if (*slot != &child_task->depend[i])
1169 	      abort ();
1170 	    if (child_task->depend[i].next)
1171 	      *slot = child_task->depend[i].next;
1172 	    else
1173 	      htab_clear_slot (parent->depend_hash, slot);
1174 	  }
1175       }
1176 }
1177 
1178 /* After a CHILD_TASK has been run, adjust the dependency queue for
1179    each task that depends on CHILD_TASK, to record the fact that there
1180    is one less dependency to worry about.  If a task that depended on
1181    CHILD_TASK now has no dependencies, place it in the various queues
1182    so it gets scheduled to run.
1183 
1184    TEAM is the team to which CHILD_TASK belongs to.  */
1185 
1186 static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)1187 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1188 				     struct gomp_team *team)
1189 {
1190   struct gomp_task *parent = child_task->parent;
1191   size_t i, count = child_task->dependers->n_elem, ret = 0;
1192   for (i = 0; i < count; i++)
1193     {
1194       struct gomp_task *task = child_task->dependers->elem[i];
1195 
1196       /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
1197 	 TASK's remaining dependencies.  Once TASK has no other
1198 	 dependencies, put it into the various queues so it will get
1199 	 scheduled for execution.  */
1200       if (--task->num_dependees != 0)
1201 	continue;
1202 
1203       struct gomp_taskgroup *taskgroup = task->taskgroup;
1204       if (parent)
1205 	{
1206 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1207 				 task, task->priority,
1208 				 PRIORITY_INSERT_BEGIN,
1209 				 /*adjust_parent_depends_on=*/true,
1210 				 task->parent_depends_on);
1211 	  if (parent->taskwait)
1212 	    {
1213 	      if (parent->taskwait->in_taskwait)
1214 		{
1215 		  /* One more task has had its dependencies met.
1216 		     Inform any waiters.  */
1217 		  parent->taskwait->in_taskwait = false;
1218 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1219 		}
1220 	      else if (parent->taskwait->in_depend_wait)
1221 		{
1222 		  /* One more task has had its dependencies met.
1223 		     Inform any waiters.  */
1224 		  parent->taskwait->in_depend_wait = false;
1225 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1226 		}
1227 	    }
1228 	}
1229       if (taskgroup)
1230 	{
1231 	  priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1232 				 task, task->priority,
1233 				 PRIORITY_INSERT_BEGIN,
1234 				 /*adjust_parent_depends_on=*/false,
1235 				 task->parent_depends_on);
1236 	  if (taskgroup->in_taskgroup_wait)
1237 	    {
1238 	      /* One more task has had its dependencies met.
1239 		 Inform any waiters.  */
1240 	      taskgroup->in_taskgroup_wait = false;
1241 	      gomp_sem_post (&taskgroup->taskgroup_sem);
1242 	    }
1243 	}
1244       priority_queue_insert (PQ_TEAM, &team->task_queue,
1245 			     task, task->priority,
1246 			     PRIORITY_INSERT_END,
1247 			     /*adjust_parent_depends_on=*/false,
1248 			     task->parent_depends_on);
1249       ++team->task_count;
1250       ++team->task_queued_count;
1251       ++ret;
1252     }
1253   free (child_task->dependers);
1254   child_task->dependers = NULL;
1255   if (ret > 1)
1256     gomp_team_barrier_set_task_pending (&team->barrier);
1257   return ret;
1258 }
1259 
1260 static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)1261 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1262 				  struct gomp_team *team)
1263 {
1264   if (child_task->depend_count == 0)
1265     return 0;
1266 
1267   /* If parent is gone already, the hash table is freed and nothing
1268      will use the hash table anymore, no need to remove anything from it.  */
1269   if (child_task->parent != NULL)
1270     gomp_task_run_post_handle_depend_hash (child_task);
1271 
1272   if (child_task->dependers == NULL)
1273     return 0;
1274 
1275   return gomp_task_run_post_handle_dependers (child_task, team);
1276 }
1277 
1278 /* Remove CHILD_TASK from its parent.  */
1279 
1280 static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)1281 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1282 {
1283   struct gomp_task *parent = child_task->parent;
1284   if (parent == NULL)
1285     return;
1286 
1287   /* If this was the last task the parent was depending on,
1288      synchronize with gomp_task_maybe_wait_for_dependencies so it can
1289      clean up and return.  */
1290   if (__builtin_expect (child_task->parent_depends_on, 0)
1291       && --parent->taskwait->n_depend == 0
1292       && parent->taskwait->in_depend_wait)
1293     {
1294       parent->taskwait->in_depend_wait = false;
1295       gomp_sem_post (&parent->taskwait->taskwait_sem);
1296     }
1297 
1298   if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1299 			     child_task, MEMMODEL_RELEASE)
1300       && parent->taskwait && parent->taskwait->in_taskwait)
1301     {
1302       parent->taskwait->in_taskwait = false;
1303       gomp_sem_post (&parent->taskwait->taskwait_sem);
1304     }
1305   child_task->pnode[PQ_CHILDREN].next = NULL;
1306   child_task->pnode[PQ_CHILDREN].prev = NULL;
1307 }
1308 
1309 /* Remove CHILD_TASK from its taskgroup.  */
1310 
1311 static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)1312 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1313 {
1314   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1315   if (taskgroup == NULL)
1316     return;
1317   bool empty = priority_queue_remove (PQ_TASKGROUP,
1318 				      &taskgroup->taskgroup_queue,
1319 				      child_task, MEMMODEL_RELAXED);
1320   child_task->pnode[PQ_TASKGROUP].next = NULL;
1321   child_task->pnode[PQ_TASKGROUP].prev = NULL;
1322   if (taskgroup->num_children > 1)
1323     --taskgroup->num_children;
1324   else
1325     {
1326       /* We access taskgroup->num_children in GOMP_taskgroup_end
1327 	 outside of the task lock mutex region, so
1328 	 need a release barrier here to ensure memory
1329 	 written by child_task->fn above is flushed
1330 	 before the NULL is written.  */
1331       __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1332     }
1333   if (empty && taskgroup->in_taskgroup_wait)
1334     {
1335       taskgroup->in_taskgroup_wait = false;
1336       gomp_sem_post (&taskgroup->taskgroup_sem);
1337     }
1338 }
1339 
1340 void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)1341 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1342 {
1343   struct gomp_thread *thr = gomp_thread ();
1344   struct gomp_team *team = thr->ts.team;
1345   struct gomp_task *task = thr->task;
1346   struct gomp_task *child_task = NULL;
1347   struct gomp_task *to_free = NULL;
1348   int do_wake = 0;
1349 
1350   gomp_mutex_lock (&team->task_lock);
1351   if (gomp_barrier_last_thread (state))
1352     {
1353       if (team->task_count == 0)
1354 	{
1355 	  gomp_team_barrier_done (&team->barrier, state);
1356 	  gomp_mutex_unlock (&team->task_lock);
1357 	  gomp_team_barrier_wake (&team->barrier, 0);
1358 	  return;
1359 	}
1360       gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1361     }
1362 
1363   while (1)
1364     {
1365       bool cancelled = false;
1366 
1367       if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1368 	{
1369 	  bool ignored;
1370 	  child_task
1371 	    = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1372 					PQ_IGNORED, NULL,
1373 					&ignored);
1374 	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
1375 					 team);
1376 	  if (__builtin_expect (cancelled, 0))
1377 	    {
1378 	      if (to_free)
1379 		{
1380 		  gomp_finish_task (to_free);
1381 		  free (to_free);
1382 		  to_free = NULL;
1383 		}
1384 	      goto finish_cancelled;
1385 	    }
1386 	  team->task_running_count++;
1387 	  child_task->in_tied_task = true;
1388 	}
1389       else if (team->task_count == 0
1390 	       && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1391 	{
1392 	  gomp_team_barrier_done (&team->barrier, state);
1393 	  gomp_mutex_unlock (&team->task_lock);
1394 	  gomp_team_barrier_wake (&team->barrier, 0);
1395 	  if (to_free)
1396 	    {
1397 	      gomp_finish_task (to_free);
1398 	      free (to_free);
1399 	    }
1400 	  return;
1401 	}
1402       gomp_mutex_unlock (&team->task_lock);
1403       if (do_wake)
1404 	{
1405 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1406 	  do_wake = 0;
1407 	}
1408       if (to_free)
1409 	{
1410 	  gomp_finish_task (to_free);
1411 	  free (to_free);
1412 	  to_free = NULL;
1413 	}
1414       if (child_task)
1415 	{
1416 	  thr->task = child_task;
1417 	  if (__builtin_expect (child_task->fn == NULL, 0))
1418 	    {
1419 	      if (gomp_target_task_fn (child_task->fn_data))
1420 		{
1421 		  thr->task = task;
1422 		  gomp_mutex_lock (&team->task_lock);
1423 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1424 		  team->task_running_count--;
1425 		  struct gomp_target_task *ttask
1426 		    = (struct gomp_target_task *) child_task->fn_data;
1427 		  /* If GOMP_PLUGIN_target_task_completion has run already
1428 		     in between gomp_target_task_fn and the mutex lock,
1429 		     perform the requeuing here.  */
1430 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1431 		    gomp_target_task_completion (team, child_task);
1432 		  else
1433 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1434 		  child_task = NULL;
1435 		  continue;
1436 		}
1437 	    }
1438 	  else
1439 	    child_task->fn (child_task->fn_data);
1440 	  thr->task = task;
1441 	}
1442       else
1443 	return;
1444       gomp_mutex_lock (&team->task_lock);
1445       if (child_task)
1446 	{
1447 	  if (child_task->detach_team)
1448 	    {
1449 	      assert (child_task->detach_team == team);
1450 	      child_task->kind = GOMP_TASK_DETACHED;
1451 	      ++team->task_detach_count;
1452 	      --team->task_running_count;
1453 	      gomp_debug (0,
1454 			  "thread %d: task with event %p finished without "
1455 			  "completion event fulfilled in team barrier\n",
1456 			  thr->ts.team_id, child_task);
1457 	      child_task = NULL;
1458 	      continue;
1459 	    }
1460 
1461 	 finish_cancelled:;
1462 	  size_t new_tasks
1463 	    = gomp_task_run_post_handle_depend (child_task, team);
1464 	  gomp_task_run_post_remove_parent (child_task);
1465 	  gomp_clear_parent (&child_task->children_queue);
1466 	  gomp_task_run_post_remove_taskgroup (child_task);
1467 	  to_free = child_task;
1468 	  if (!cancelled)
1469 	    team->task_running_count--;
1470 	  child_task = NULL;
1471 	  if (new_tasks > 1)
1472 	    {
1473 	      do_wake = team->nthreads - team->task_running_count;
1474 	      if (do_wake > new_tasks)
1475 		do_wake = new_tasks;
1476 	    }
1477 	  --team->task_count;
1478 	}
1479     }
1480 }
1481 
1482 /* Called when encountering a taskwait directive.
1483 
1484    Wait for all children of the current task.  */
1485 
1486 void
GOMP_taskwait(void)1487 GOMP_taskwait (void)
1488 {
1489   struct gomp_thread *thr = gomp_thread ();
1490   struct gomp_team *team = thr->ts.team;
1491   struct gomp_task *task = thr->task;
1492   struct gomp_task *child_task = NULL;
1493   struct gomp_task *to_free = NULL;
1494   struct gomp_taskwait taskwait;
1495   int do_wake = 0;
1496 
1497   /* The acquire barrier on load of task->children here synchronizes
1498      with the write of a NULL in gomp_task_run_post_remove_parent.  It is
1499      not necessary that we synchronize with other non-NULL writes at
1500      this point, but we must ensure that all writes to memory by a
1501      child thread task work function are seen before we exit from
1502      GOMP_taskwait.  */
1503   if (task == NULL
1504       || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1505     return;
1506 
1507   memset (&taskwait, 0, sizeof (taskwait));
1508   bool child_q = false;
1509   gomp_mutex_lock (&team->task_lock);
1510   while (1)
1511     {
1512       bool cancelled = false;
1513       if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1514 	{
1515 	  bool destroy_taskwait = task->taskwait != NULL;
1516 	  task->taskwait = NULL;
1517 	  gomp_mutex_unlock (&team->task_lock);
1518 	  if (to_free)
1519 	    {
1520 	      gomp_finish_task (to_free);
1521 	      free (to_free);
1522 	    }
1523 	  if (destroy_taskwait)
1524 	    gomp_sem_destroy (&taskwait.taskwait_sem);
1525 	  return;
1526 	}
1527       struct gomp_task *next_task
1528 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1529 				    PQ_TEAM, &team->task_queue, &child_q);
1530       if (next_task->kind == GOMP_TASK_WAITING)
1531 	{
1532 	  child_task = next_task;
1533 	  cancelled
1534 	    = gomp_task_run_pre (child_task, task, team);
1535 	  if (__builtin_expect (cancelled, 0))
1536 	    {
1537 	      if (to_free)
1538 		{
1539 		  gomp_finish_task (to_free);
1540 		  free (to_free);
1541 		  to_free = NULL;
1542 		}
1543 	      goto finish_cancelled;
1544 	    }
1545 	}
1546       else
1547 	{
1548 	/* All tasks we are waiting for are either running in other
1549 	   threads, are detached and waiting for the completion event to be
1550 	   fulfilled, or they are tasks that have not had their
1551 	   dependencies met (so they're not even in the queue).  Wait
1552 	   for them.  */
1553 	  if (task->taskwait == NULL)
1554 	    {
1555 	      taskwait.in_depend_wait = false;
1556 	      gomp_sem_init (&taskwait.taskwait_sem, 0);
1557 	      task->taskwait = &taskwait;
1558 	    }
1559 	  taskwait.in_taskwait = true;
1560 	}
1561       gomp_mutex_unlock (&team->task_lock);
1562       if (do_wake)
1563 	{
1564 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1565 	  do_wake = 0;
1566 	}
1567       if (to_free)
1568 	{
1569 	  gomp_finish_task (to_free);
1570 	  free (to_free);
1571 	  to_free = NULL;
1572 	}
1573       if (child_task)
1574 	{
1575 	  thr->task = child_task;
1576 	  if (__builtin_expect (child_task->fn == NULL, 0))
1577 	    {
1578 	      if (gomp_target_task_fn (child_task->fn_data))
1579 		{
1580 		  thr->task = task;
1581 		  gomp_mutex_lock (&team->task_lock);
1582 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1583 		  struct gomp_target_task *ttask
1584 		    = (struct gomp_target_task *) child_task->fn_data;
1585 		  /* If GOMP_PLUGIN_target_task_completion has run already
1586 		     in between gomp_target_task_fn and the mutex lock,
1587 		     perform the requeuing here.  */
1588 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1589 		    gomp_target_task_completion (team, child_task);
1590 		  else
1591 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1592 		  child_task = NULL;
1593 		  continue;
1594 		}
1595 	    }
1596 	  else
1597 	    child_task->fn (child_task->fn_data);
1598 	  thr->task = task;
1599 	}
1600       else
1601 	gomp_sem_wait (&taskwait.taskwait_sem);
1602       gomp_mutex_lock (&team->task_lock);
1603       if (child_task)
1604 	{
1605 	  if (child_task->detach_team)
1606 	    {
1607 	      assert (child_task->detach_team == team);
1608 	      child_task->kind = GOMP_TASK_DETACHED;
1609 	      ++team->task_detach_count;
1610 	      gomp_debug (0,
1611 			  "thread %d: task with event %p finished without "
1612 			  "completion event fulfilled in taskwait\n",
1613 			  thr->ts.team_id, child_task);
1614 	      child_task = NULL;
1615 	      continue;
1616 	    }
1617 
1618 	 finish_cancelled:;
1619 	  size_t new_tasks
1620 	    = gomp_task_run_post_handle_depend (child_task, team);
1621 
1622 	  if (child_q)
1623 	    {
1624 	      priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1625 				     child_task, MEMMODEL_RELAXED);
1626 	      child_task->pnode[PQ_CHILDREN].next = NULL;
1627 	      child_task->pnode[PQ_CHILDREN].prev = NULL;
1628 	    }
1629 
1630 	  gomp_clear_parent (&child_task->children_queue);
1631 
1632 	  gomp_task_run_post_remove_taskgroup (child_task);
1633 
1634 	  to_free = child_task;
1635 	  child_task = NULL;
1636 	  team->task_count--;
1637 	  if (new_tasks > 1)
1638 	    {
1639 	      do_wake = team->nthreads - team->task_running_count
1640 			- !task->in_tied_task;
1641 	      if (do_wake > new_tasks)
1642 		do_wake = new_tasks;
1643 	    }
1644 	}
1645     }
1646 }
1647 
1648 /* Called when encountering a taskwait directive with depend clause(s).
1649    Wait as if it was an mergeable included task construct with empty body.  */
1650 
1651 void
GOMP_taskwait_depend(void ** depend)1652 GOMP_taskwait_depend (void **depend)
1653 {
1654   struct gomp_thread *thr = gomp_thread ();
1655   struct gomp_team *team = thr->ts.team;
1656 
1657   /* If parallel or taskgroup has been cancelled, return early.  */
1658   if (__builtin_expect (gomp_cancel_var, 0) && team)
1659     {
1660       if (gomp_team_barrier_cancelled (&team->barrier))
1661 	return;
1662       if (thr->task->taskgroup)
1663 	{
1664 	  if (thr->task->taskgroup->cancelled)
1665 	    return;
1666 	  if (thr->task->taskgroup->workshare
1667 	      && thr->task->taskgroup->prev
1668 	      && thr->task->taskgroup->prev->cancelled)
1669 	    return;
1670 	}
1671     }
1672 
1673   if (thr->task && thr->task->depend_hash)
1674     gomp_task_maybe_wait_for_dependencies (depend);
1675 }
1676 
1677 /* An undeferred task is about to run.  Wait for all tasks that this
1678    undeferred task depends on.
1679 
1680    This is done by first putting all known ready dependencies
1681    (dependencies that have their own dependencies met) at the top of
1682    the scheduling queues.  Then we iterate through these imminently
1683    ready tasks (and possibly other high priority tasks), and run them.
1684    If we run out of ready dependencies to execute, we either wait for
1685    the remaining dependencies to finish, or wait for them to get
1686    scheduled so we can run them.
1687 
1688    DEPEND is as in GOMP_task.  */
1689 
1690 void
gomp_task_maybe_wait_for_dependencies(void ** depend)1691 gomp_task_maybe_wait_for_dependencies (void **depend)
1692 {
1693   struct gomp_thread *thr = gomp_thread ();
1694   struct gomp_task *task = thr->task;
1695   struct gomp_team *team = thr->ts.team;
1696   struct gomp_task_depend_entry elem, *ent = NULL;
1697   struct gomp_taskwait taskwait;
1698   size_t orig_ndepend = (uintptr_t) depend[0];
1699   size_t nout = (uintptr_t) depend[1];
1700   size_t ndepend = orig_ndepend;
1701   size_t normal = ndepend;
1702   size_t n = 2;
1703   size_t i;
1704   size_t num_awaited = 0;
1705   struct gomp_task *child_task = NULL;
1706   struct gomp_task *to_free = NULL;
1707   int do_wake = 0;
1708 
1709   if (ndepend == 0)
1710     {
1711       ndepend = nout;
1712       nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1713       normal = nout + (uintptr_t) depend[4];
1714       n = 5;
1715     }
1716   gomp_mutex_lock (&team->task_lock);
1717   for (i = 0; i < ndepend; i++)
1718     {
1719       elem.addr = depend[i + n];
1720       elem.is_in = i >= nout;
1721       if (__builtin_expect (i >= normal, 0))
1722 	{
1723 	  void **d = (void **) elem.addr;
1724 	  switch ((uintptr_t) d[1])
1725 	    {
1726 	    case GOMP_DEPEND_IN:
1727 	      break;
1728 	    case GOMP_DEPEND_OUT:
1729 	    case GOMP_DEPEND_INOUT:
1730 	    case GOMP_DEPEND_MUTEXINOUTSET:
1731 	      elem.is_in = 0;
1732 	      break;
1733 	    default:
1734 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
1735 			  (int) (uintptr_t) d[1]);
1736 	    }
1737 	  elem.addr = d[0];
1738 	}
1739       ent = htab_find (task->depend_hash, &elem);
1740       for (; ent; ent = ent->next)
1741 	if (elem.is_in && ent->is_in)
1742 	  continue;
1743 	else
1744 	  {
1745 	    struct gomp_task *tsk = ent->task;
1746 	    if (!tsk->parent_depends_on)
1747 	      {
1748 		tsk->parent_depends_on = true;
1749 		++num_awaited;
1750 		/* If dependency TSK itself has no dependencies and is
1751 		   ready to run, move it up front so that we run it as
1752 		   soon as possible.  */
1753 		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1754 		  priority_queue_upgrade_task (tsk, task);
1755 	      }
1756 	  }
1757     }
1758   if (num_awaited == 0)
1759     {
1760       gomp_mutex_unlock (&team->task_lock);
1761       return;
1762     }
1763 
1764   memset (&taskwait, 0, sizeof (taskwait));
1765   taskwait.n_depend = num_awaited;
1766   gomp_sem_init (&taskwait.taskwait_sem, 0);
1767   task->taskwait = &taskwait;
1768 
1769   while (1)
1770     {
1771       bool cancelled = false;
1772       if (taskwait.n_depend == 0)
1773 	{
1774 	  task->taskwait = NULL;
1775 	  gomp_mutex_unlock (&team->task_lock);
1776 	  if (to_free)
1777 	    {
1778 	      gomp_finish_task (to_free);
1779 	      free (to_free);
1780 	    }
1781 	  gomp_sem_destroy (&taskwait.taskwait_sem);
1782 	  return;
1783 	}
1784 
1785       /* Theoretically when we have multiple priorities, we should
1786 	 chose between the highest priority item in
1787 	 task->children_queue and team->task_queue here, so we should
1788 	 use priority_queue_next_task().  However, since we are
1789 	 running an undeferred task, perhaps that makes all tasks it
1790 	 depends on undeferred, thus a priority of INF?  This would
1791 	 make it unnecessary to take anything into account here,
1792 	 but the dependencies.
1793 
1794 	 On the other hand, if we want to use priority_queue_next_task(),
1795 	 care should be taken to only use priority_queue_remove()
1796 	 below if the task was actually removed from the children
1797 	 queue.  */
1798       bool ignored;
1799       struct gomp_task *next_task
1800 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1801 				    PQ_IGNORED, NULL, &ignored);
1802 
1803       if (next_task->kind == GOMP_TASK_WAITING)
1804 	{
1805 	  child_task = next_task;
1806 	  cancelled
1807 	    = gomp_task_run_pre (child_task, task, team);
1808 	  if (__builtin_expect (cancelled, 0))
1809 	    {
1810 	      if (to_free)
1811 		{
1812 		  gomp_finish_task (to_free);
1813 		  free (to_free);
1814 		  to_free = NULL;
1815 		}
1816 	      goto finish_cancelled;
1817 	    }
1818 	}
1819       else
1820 	/* All tasks we are waiting for are either running in other
1821 	   threads, or they are tasks that have not had their
1822 	   dependencies met (so they're not even in the queue).  Wait
1823 	   for them.  */
1824 	taskwait.in_depend_wait = true;
1825       gomp_mutex_unlock (&team->task_lock);
1826       if (do_wake)
1827 	{
1828 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1829 	  do_wake = 0;
1830 	}
1831       if (to_free)
1832 	{
1833 	  gomp_finish_task (to_free);
1834 	  free (to_free);
1835 	  to_free = NULL;
1836 	}
1837       if (child_task)
1838 	{
1839 	  thr->task = child_task;
1840 	  if (__builtin_expect (child_task->fn == NULL, 0))
1841 	    {
1842 	      if (gomp_target_task_fn (child_task->fn_data))
1843 		{
1844 		  thr->task = task;
1845 		  gomp_mutex_lock (&team->task_lock);
1846 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1847 		  struct gomp_target_task *ttask
1848 		    = (struct gomp_target_task *) child_task->fn_data;
1849 		  /* If GOMP_PLUGIN_target_task_completion has run already
1850 		     in between gomp_target_task_fn and the mutex lock,
1851 		     perform the requeuing here.  */
1852 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1853 		    gomp_target_task_completion (team, child_task);
1854 		  else
1855 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1856 		  child_task = NULL;
1857 		  continue;
1858 		}
1859 	    }
1860 	  else
1861 	    child_task->fn (child_task->fn_data);
1862 	  thr->task = task;
1863 	}
1864       else
1865 	gomp_sem_wait (&taskwait.taskwait_sem);
1866       gomp_mutex_lock (&team->task_lock);
1867       if (child_task)
1868 	{
1869 	 finish_cancelled:;
1870 	  size_t new_tasks
1871 	    = gomp_task_run_post_handle_depend (child_task, team);
1872 	  if (child_task->parent_depends_on)
1873 	    --taskwait.n_depend;
1874 
1875 	  priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1876 				 child_task, MEMMODEL_RELAXED);
1877 	  child_task->pnode[PQ_CHILDREN].next = NULL;
1878 	  child_task->pnode[PQ_CHILDREN].prev = NULL;
1879 
1880 	  gomp_clear_parent (&child_task->children_queue);
1881 	  gomp_task_run_post_remove_taskgroup (child_task);
1882 	  to_free = child_task;
1883 	  child_task = NULL;
1884 	  team->task_count--;
1885 	  if (new_tasks > 1)
1886 	    {
1887 	      do_wake = team->nthreads - team->task_running_count
1888 			- !task->in_tied_task;
1889 	      if (do_wake > new_tasks)
1890 		do_wake = new_tasks;
1891 	    }
1892 	}
1893     }
1894 }
1895 
1896 /* Called when encountering a taskyield directive.  */
1897 
1898 void
GOMP_taskyield(void)1899 GOMP_taskyield (void)
1900 {
1901   /* Nothing at the moment.  */
1902 }
1903 
1904 static inline struct gomp_taskgroup *
gomp_taskgroup_init(struct gomp_taskgroup * prev)1905 gomp_taskgroup_init (struct gomp_taskgroup *prev)
1906 {
1907   struct gomp_taskgroup *taskgroup
1908     = gomp_malloc (sizeof (struct gomp_taskgroup));
1909   taskgroup->prev = prev;
1910   priority_queue_init (&taskgroup->taskgroup_queue);
1911   taskgroup->reductions = prev ? prev->reductions : NULL;
1912   taskgroup->in_taskgroup_wait = false;
1913   taskgroup->cancelled = false;
1914   taskgroup->workshare = false;
1915   taskgroup->num_children = 0;
1916   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1917   return taskgroup;
1918 }
1919 
1920 void
GOMP_taskgroup_start(void)1921 GOMP_taskgroup_start (void)
1922 {
1923   struct gomp_thread *thr = gomp_thread ();
1924   struct gomp_team *team = thr->ts.team;
1925   struct gomp_task *task = thr->task;
1926 
1927   /* If team is NULL, all tasks are executed as
1928      GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1929      taskgroup and their descendant tasks will be finished
1930      by the time GOMP_taskgroup_end is called.  */
1931   if (team == NULL)
1932     return;
1933   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
1934 }
1935 
1936 void
GOMP_taskgroup_end(void)1937 GOMP_taskgroup_end (void)
1938 {
1939   struct gomp_thread *thr = gomp_thread ();
1940   struct gomp_team *team = thr->ts.team;
1941   struct gomp_task *task = thr->task;
1942   struct gomp_taskgroup *taskgroup;
1943   struct gomp_task *child_task = NULL;
1944   struct gomp_task *to_free = NULL;
1945   int do_wake = 0;
1946 
1947   if (team == NULL)
1948     return;
1949   taskgroup = task->taskgroup;
1950   if (__builtin_expect (taskgroup == NULL, 0)
1951       && thr->ts.level == 0)
1952     {
1953       /* This can happen if GOMP_taskgroup_start is called when
1954 	 thr->ts.team == NULL, but inside of the taskgroup there
1955 	 is #pragma omp target nowait that creates an implicit
1956 	 team with a single thread.  In this case, we want to wait
1957 	 for all outstanding tasks in this team.  */
1958       gomp_team_barrier_wait (&team->barrier);
1959       return;
1960     }
1961 
1962   /* The acquire barrier on load of taskgroup->num_children here
1963      synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1964      It is not necessary that we synchronize with other non-0 writes at
1965      this point, but we must ensure that all writes to memory by a
1966      child thread task work function are seen before we exit from
1967      GOMP_taskgroup_end.  */
1968   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1969     goto finish;
1970 
1971   bool unused;
1972   gomp_mutex_lock (&team->task_lock);
1973   while (1)
1974     {
1975       bool cancelled = false;
1976       if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1977 				  MEMMODEL_RELAXED))
1978 	{
1979 	  if (taskgroup->num_children)
1980 	    {
1981 	      if (priority_queue_empty_p (&task->children_queue,
1982 					  MEMMODEL_RELAXED))
1983 		goto do_wait;
1984 	      child_task
1985 		= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1986 					    PQ_TEAM, &team->task_queue,
1987 					    &unused);
1988 	    }
1989 	  else
1990 	    {
1991 	      gomp_mutex_unlock (&team->task_lock);
1992 	      if (to_free)
1993 		{
1994 		  gomp_finish_task (to_free);
1995 		  free (to_free);
1996 		}
1997 	      goto finish;
1998 	    }
1999 	}
2000       else
2001 	child_task
2002 	  = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
2003 				      PQ_TEAM, &team->task_queue, &unused);
2004       if (child_task->kind == GOMP_TASK_WAITING)
2005 	{
2006 	  cancelled
2007 	    = gomp_task_run_pre (child_task, child_task->parent, team);
2008 	  if (__builtin_expect (cancelled, 0))
2009 	    {
2010 	      if (to_free)
2011 		{
2012 		  gomp_finish_task (to_free);
2013 		  free (to_free);
2014 		  to_free = NULL;
2015 		}
2016 	      goto finish_cancelled;
2017 	    }
2018 	}
2019       else
2020 	{
2021 	  child_task = NULL;
2022 	 do_wait:
2023 	/* All tasks we are waiting for are either running in other
2024 	   threads, or they are tasks that have not had their
2025 	   dependencies met (so they're not even in the queue).  Wait
2026 	   for them.  */
2027 	  taskgroup->in_taskgroup_wait = true;
2028 	}
2029       gomp_mutex_unlock (&team->task_lock);
2030       if (do_wake)
2031 	{
2032 	  gomp_team_barrier_wake (&team->barrier, do_wake);
2033 	  do_wake = 0;
2034 	}
2035       if (to_free)
2036 	{
2037 	  gomp_finish_task (to_free);
2038 	  free (to_free);
2039 	  to_free = NULL;
2040 	}
2041       if (child_task)
2042 	{
2043 	  thr->task = child_task;
2044 	  if (__builtin_expect (child_task->fn == NULL, 0))
2045 	    {
2046 	      if (gomp_target_task_fn (child_task->fn_data))
2047 		{
2048 		  thr->task = task;
2049 		  gomp_mutex_lock (&team->task_lock);
2050 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2051 		  struct gomp_target_task *ttask
2052 		    = (struct gomp_target_task *) child_task->fn_data;
2053 		  /* If GOMP_PLUGIN_target_task_completion has run already
2054 		     in between gomp_target_task_fn and the mutex lock,
2055 		     perform the requeuing here.  */
2056 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2057 		    gomp_target_task_completion (team, child_task);
2058 		  else
2059 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
2060 		  child_task = NULL;
2061 		  continue;
2062 		}
2063 	    }
2064 	  else
2065 	    child_task->fn (child_task->fn_data);
2066 	  thr->task = task;
2067 	}
2068       else
2069 	gomp_sem_wait (&taskgroup->taskgroup_sem);
2070       gomp_mutex_lock (&team->task_lock);
2071       if (child_task)
2072 	{
2073 	  if (child_task->detach_team)
2074 	    {
2075 	      assert (child_task->detach_team == team);
2076 	      child_task->kind = GOMP_TASK_DETACHED;
2077 	      ++team->task_detach_count;
2078 	      gomp_debug (0,
2079 			  "thread %d: task with event %p finished without "
2080 			  "completion event fulfilled in taskgroup\n",
2081 			  thr->ts.team_id, child_task);
2082 	      child_task = NULL;
2083 	      continue;
2084 	    }
2085 
2086 	 finish_cancelled:;
2087 	  size_t new_tasks
2088 	    = gomp_task_run_post_handle_depend (child_task, team);
2089 	  gomp_task_run_post_remove_parent (child_task);
2090 	  gomp_clear_parent (&child_task->children_queue);
2091 	  gomp_task_run_post_remove_taskgroup (child_task);
2092 	  to_free = child_task;
2093 	  child_task = NULL;
2094 	  team->task_count--;
2095 	  if (new_tasks > 1)
2096 	    {
2097 	      do_wake = team->nthreads - team->task_running_count
2098 			- !task->in_tied_task;
2099 	      if (do_wake > new_tasks)
2100 		do_wake = new_tasks;
2101 	    }
2102 	}
2103     }
2104 
2105  finish:
2106   task->taskgroup = taskgroup->prev;
2107   gomp_sem_destroy (&taskgroup->taskgroup_sem);
2108   free (taskgroup);
2109 }
2110 
2111 static inline __attribute__((always_inline)) void
gomp_reduction_register(uintptr_t * data,uintptr_t * old,uintptr_t * orig,unsigned nthreads)2112 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2113 			 unsigned nthreads)
2114 {
2115   size_t total_cnt = 0;
2116   uintptr_t *d = data;
2117   struct htab *old_htab = NULL, *new_htab;
2118   do
2119     {
2120       if (__builtin_expect (orig != NULL, 0))
2121 	{
2122 	  /* For worksharing task reductions, memory has been allocated
2123 	     already by some other thread that encountered the construct
2124 	     earlier.  */
2125 	  d[2] = orig[2];
2126 	  d[6] = orig[6];
2127 	  orig = (uintptr_t *) orig[4];
2128 	}
2129       else
2130 	{
2131 	  size_t sz = d[1] * nthreads;
2132 	  /* Should use omp_alloc if d[3] is not -1.  */
2133 	  void *ptr = gomp_aligned_alloc (d[2], sz);
2134 	  memset (ptr, '\0', sz);
2135 	  d[2] = (uintptr_t) ptr;
2136 	  d[6] = d[2] + sz;
2137 	}
2138       d[5] = 0;
2139       total_cnt += d[0];
2140       if (d[4] == 0)
2141 	{
2142 	  d[4] = (uintptr_t) old;
2143 	  break;
2144 	}
2145       else
2146 	d = (uintptr_t *) d[4];
2147     }
2148   while (1);
2149   if (old && old[5])
2150     {
2151       old_htab = (struct htab *) old[5];
2152       total_cnt += htab_elements (old_htab);
2153     }
2154   new_htab = htab_create (total_cnt);
2155   if (old_htab)
2156     {
2157       /* Copy old hash table, like in htab_expand.  */
2158       hash_entry_type *p, *olimit;
2159       new_htab->n_elements = htab_elements (old_htab);
2160       olimit = old_htab->entries + old_htab->size;
2161       p = old_htab->entries;
2162       do
2163 	{
2164 	  hash_entry_type x = *p;
2165 	  if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2166 	    *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2167 	  p++;
2168 	}
2169       while (p < olimit);
2170     }
2171   d = data;
2172   do
2173     {
2174       size_t j;
2175       for (j = 0; j < d[0]; ++j)
2176 	{
2177 	  uintptr_t *p = d + 7 + j * 3;
2178 	  p[2] = (uintptr_t) d;
2179 	  /* Ugly hack, hash_entry_type is defined for the task dependencies,
2180 	     which hash on the first element which is a pointer.  We need
2181 	     to hash also on the first sizeof (uintptr_t) bytes which contain
2182 	     a pointer.  Hide the cast from the compiler.  */
2183 	  hash_entry_type n;
2184 	  __asm ("" : "=g" (n) : "0" (p));
2185 	  *htab_find_slot (&new_htab, n, INSERT) = n;
2186 	}
2187       if (d[4] == (uintptr_t) old)
2188 	break;
2189       else
2190 	d = (uintptr_t *) d[4];
2191     }
2192   while (1);
2193   d[5] = (uintptr_t) new_htab;
2194 }
2195 
2196 static void
gomp_create_artificial_team(void)2197 gomp_create_artificial_team (void)
2198 {
2199   struct gomp_thread *thr = gomp_thread ();
2200   struct gomp_task_icv *icv;
2201   struct gomp_team *team = gomp_new_team (1);
2202   struct gomp_task *task = thr->task;
2203   icv = task ? &task->icv : &gomp_global_icv;
2204   team->prev_ts = thr->ts;
2205   thr->ts.team = team;
2206   thr->ts.team_id = 0;
2207   thr->ts.work_share = &team->work_shares[0];
2208   thr->ts.last_work_share = NULL;
2209 #ifdef HAVE_SYNC_BUILTINS
2210   thr->ts.single_count = 0;
2211 #endif
2212   thr->ts.static_trip = 0;
2213   thr->task = &team->implicit_task[0];
2214   gomp_init_task (thr->task, NULL, icv);
2215   if (task)
2216     {
2217       thr->task = task;
2218       gomp_end_task ();
2219       free (task);
2220       thr->task = &team->implicit_task[0];
2221     }
2222 #ifdef LIBGOMP_USE_PTHREADS
2223   else
2224     pthread_setspecific (gomp_thread_destructor, thr);
2225 #endif
2226 }
2227 
2228 /* The format of data is:
2229    data[0]	cnt
2230    data[1]	size
2231    data[2]	alignment (on output array pointer)
2232    data[3]	allocator (-1 if malloc allocator)
2233    data[4]	next pointer
2234    data[5]	used internally (htab pointer)
2235    data[6]	used internally (end of array)
2236    cnt times
2237    ent[0]	address
2238    ent[1]	offset
2239    ent[2]	used internally (pointer to data[0])
2240    The entries are sorted by increasing offset, so that a binary
2241    search can be performed.  Normally, data[8] is 0, exception is
2242    for worksharing construct task reductions in cancellable parallel,
2243    where at offset 0 there should be space for a pointer and an integer
2244    which are used internally.  */
2245 
2246 void
GOMP_taskgroup_reduction_register(uintptr_t * data)2247 GOMP_taskgroup_reduction_register (uintptr_t *data)
2248 {
2249   struct gomp_thread *thr = gomp_thread ();
2250   struct gomp_team *team = thr->ts.team;
2251   struct gomp_task *task;
2252   unsigned nthreads;
2253   if (__builtin_expect (team == NULL, 0))
2254     {
2255       /* The task reduction code needs a team and task, so for
2256 	 orphaned taskgroups just create the implicit team.  */
2257       gomp_create_artificial_team ();
2258       ialias_call (GOMP_taskgroup_start) ();
2259       team = thr->ts.team;
2260     }
2261   nthreads = team->nthreads;
2262   task = thr->task;
2263   gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2264   task->taskgroup->reductions = data;
2265 }
2266 
2267 void
GOMP_taskgroup_reduction_unregister(uintptr_t * data)2268 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2269 {
2270   uintptr_t *d = data;
2271   htab_free ((struct htab *) data[5]);
2272   do
2273     {
2274       gomp_aligned_free ((void *) d[2]);
2275       d = (uintptr_t *) d[4];
2276     }
2277   while (d && !d[5]);
2278 }
ialias(GOMP_taskgroup_reduction_unregister)2279 ialias (GOMP_taskgroup_reduction_unregister)
2280 
2281 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2282    original list item or address of previously remapped original list
2283    item to address of the private copy, store that to ptrs[i].
2284    For i < cntorig, additionally set ptrs[cnt+i] to the address of
2285    the original list item.  */
2286 
2287 void
2288 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2289 {
2290   struct gomp_thread *thr = gomp_thread ();
2291   struct gomp_task *task = thr->task;
2292   unsigned id = thr->ts.team_id;
2293   uintptr_t *data = task->taskgroup->reductions;
2294   uintptr_t *d;
2295   struct htab *reduction_htab = (struct htab *) data[5];
2296   size_t i;
2297   for (i = 0; i < cnt; ++i)
2298     {
2299       hash_entry_type ent, n;
2300       __asm ("" : "=g" (ent) : "0" (ptrs + i));
2301       n = htab_find (reduction_htab, ent);
2302       if (n)
2303 	{
2304 	  uintptr_t *p;
2305 	  __asm ("" : "=g" (p) : "0" (n));
2306 	  /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2307 	     p[1] is the offset within the allocated chunk for each
2308 	     thread, p[2] is the array registered with
2309 	     GOMP_taskgroup_reduction_register, d[2] is the base of the
2310 	     allocated memory and d[1] is the size of the allocated chunk
2311 	     for one thread.  */
2312 	  d = (uintptr_t *) p[2];
2313 	  ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2314 	  if (__builtin_expect (i < cntorig, 0))
2315 	    ptrs[cnt + i] = (void *) p[0];
2316 	  continue;
2317 	}
2318       d = data;
2319       while (d != NULL)
2320 	{
2321 	  if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2322 	    break;
2323 	  d = (uintptr_t *) d[4];
2324 	}
2325       if (d == NULL)
2326 	gomp_fatal ("couldn't find matching task_reduction or reduction with "
2327 		    "task modifier for %p", ptrs[i]);
2328       uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2329       ptrs[i] = (void *) (d[2] + id * d[1] + off);
2330       if (__builtin_expect (i < cntorig, 0))
2331 	{
2332 	  size_t lo = 0, hi = d[0] - 1;
2333 	  while (lo <= hi)
2334 	    {
2335 	      size_t m = (lo + hi) / 2;
2336 	      if (d[7 + 3 * m + 1] < off)
2337 		lo = m + 1;
2338 	      else if (d[7 + 3 * m + 1] == off)
2339 		{
2340 		  ptrs[cnt + i] = (void *) d[7 + 3 * m];
2341 		  break;
2342 		}
2343 	      else
2344 		hi = m - 1;
2345 	    }
2346 	  if (lo > hi)
2347 	    gomp_fatal ("couldn't find matching task_reduction or reduction "
2348 			"with task modifier for %p", ptrs[i]);
2349 	}
2350     }
2351 }
2352 
2353 struct gomp_taskgroup *
gomp_parallel_reduction_register(uintptr_t * data,unsigned nthreads)2354 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2355 {
2356   struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2357   gomp_reduction_register (data, NULL, NULL, nthreads);
2358   taskgroup->reductions = data;
2359   return taskgroup;
2360 }
2361 
2362 void
gomp_workshare_task_reduction_register(uintptr_t * data,uintptr_t * orig)2363 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2364 {
2365   struct gomp_thread *thr = gomp_thread ();
2366   struct gomp_team *team = thr->ts.team;
2367   struct gomp_task *task = thr->task;
2368   unsigned nthreads = team->nthreads;
2369   gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2370   task->taskgroup->reductions = data;
2371 }
2372 
2373 void
gomp_workshare_taskgroup_start(void)2374 gomp_workshare_taskgroup_start (void)
2375 {
2376   struct gomp_thread *thr = gomp_thread ();
2377   struct gomp_team *team = thr->ts.team;
2378   struct gomp_task *task;
2379 
2380   if (team == NULL)
2381     {
2382       gomp_create_artificial_team ();
2383       team = thr->ts.team;
2384     }
2385   task = thr->task;
2386   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2387   task->taskgroup->workshare = true;
2388 }
2389 
2390 void
GOMP_workshare_task_reduction_unregister(bool cancelled)2391 GOMP_workshare_task_reduction_unregister (bool cancelled)
2392 {
2393   struct gomp_thread *thr = gomp_thread ();
2394   struct gomp_task *task = thr->task;
2395   struct gomp_team *team = thr->ts.team;
2396   uintptr_t *data = task->taskgroup->reductions;
2397   ialias_call (GOMP_taskgroup_end) ();
2398   if (thr->ts.team_id == 0)
2399     ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2400   else
2401     htab_free ((struct htab *) data[5]);
2402 
2403   if (!cancelled)
2404     gomp_team_barrier_wait (&team->barrier);
2405 }
2406 
2407 int
omp_in_final(void)2408 omp_in_final (void)
2409 {
2410   struct gomp_thread *thr = gomp_thread ();
2411   return thr->task && thr->task->final_task;
2412 }
2413 
ialias(omp_in_final)2414 ialias (omp_in_final)
2415 
2416 void
2417 omp_fulfill_event (omp_event_handle_t event)
2418 {
2419   struct gomp_task *task = (struct gomp_task *) event;
2420   if (!task->deferred_p)
2421     {
2422       if (gomp_sem_getcount (task->completion_sem) > 0)
2423 	gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task);
2424 
2425       gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n",
2426 		  task);
2427       gomp_sem_post (task->completion_sem);
2428       return;
2429     }
2430 
2431   struct gomp_team *team = __atomic_load_n (&task->detach_team,
2432 					    MEMMODEL_RELAXED);
2433   if (!team)
2434     gomp_fatal ("omp_fulfill_event: %p event is invalid or has already "
2435 		"been fulfilled!\n", task);
2436 
2437   gomp_mutex_lock (&team->task_lock);
2438   if (task->kind != GOMP_TASK_DETACHED)
2439     {
2440       /* The task has not finished running yet.  */
2441       gomp_debug (0,
2442 		  "omp_fulfill_event: %p event fulfilled for unfinished "
2443 		  "task\n", task);
2444       __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED);
2445       gomp_mutex_unlock (&team->task_lock);
2446       return;
2447     }
2448 
2449   gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n",
2450 	      task);
2451   size_t new_tasks = gomp_task_run_post_handle_depend (task, team);
2452   gomp_task_run_post_remove_parent (task);
2453   gomp_clear_parent (&task->children_queue);
2454   gomp_task_run_post_remove_taskgroup (task);
2455   team->task_count--;
2456   team->task_detach_count--;
2457 
2458   int do_wake = 0;
2459   bool shackled_thread_p = team == gomp_thread ()->ts.team;
2460   if (new_tasks > 0)
2461     {
2462       /* Wake up threads to run new tasks.  */
2463       gomp_team_barrier_set_task_pending (&team->barrier);
2464       do_wake = team->nthreads - team->task_running_count;
2465       if (do_wake > new_tasks)
2466 	do_wake = new_tasks;
2467     }
2468 
2469   if (!shackled_thread_p
2470       && !do_wake
2471       && team->task_detach_count == 0
2472       && gomp_team_barrier_waiting_for_tasks (&team->barrier))
2473     /* Ensure that at least one thread is woken up to signal that the
2474        barrier can finish.  */
2475     do_wake = 1;
2476 
2477   /* If we are running in an unshackled thread, the team might vanish before
2478      gomp_team_barrier_wake is run if we release the lock first, so keep the
2479      lock for the call in that case.  */
2480   if (shackled_thread_p)
2481     gomp_mutex_unlock (&team->task_lock);
2482   if (do_wake)
2483     gomp_team_barrier_wake (&team->barrier, do_wake);
2484   if (!shackled_thread_p)
2485     gomp_mutex_unlock (&team->task_lock);
2486 
2487   gomp_finish_task (task);
2488   free (task);
2489 }
2490 
2491 ialias (omp_fulfill_event)
2492