1 /* Copyright (C) 2007-2019 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintainence of tasks in response to task
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
33 
34 typedef struct gomp_task_depend_entry *hash_entry_type;
35 
36 static inline void *
htab_alloc(size_t size)37 htab_alloc (size_t size)
38 {
39   return gomp_malloc (size);
40 }
41 
42 static inline void
htab_free(void * ptr)43 htab_free (void *ptr)
44 {
45   free (ptr);
46 }
47 
48 #include "hashtab.h"
49 
50 static inline hashval_t
htab_hash(hash_entry_type element)51 htab_hash (hash_entry_type element)
52 {
53   return hash_pointer (element->addr);
54 }
55 
56 static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)57 htab_eq (hash_entry_type x, hash_entry_type y)
58 {
59   return x->addr == y->addr;
60 }
61 
62 /* Create a new task data structure.  */
63 
64 void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 		struct gomp_task_icv *prev_icv)
67 {
68   /* It would seem that using memset here would be a win, but it turns
69      out that partially filling gomp_task allows us to keep the
70      overhead of task creation low.  In the nqueens-1.c test, for a
71      sufficiently large N, we drop the overhead from 5-6% to 1%.
72 
73      Note, the nqueens-1.c test in serial mode is a good test to
74      benchmark the overhead of creating tasks as there are millions of
75      tiny tasks created that all run undeferred.  */
76   task->parent = parent_task;
77   task->icv = *prev_icv;
78   task->kind = GOMP_TASK_IMPLICIT;
79   task->taskwait = NULL;
80   task->in_tied_task = false;
81   task->final_task = false;
82   task->copy_ctors_done = false;
83   task->parent_depends_on = false;
84   priority_queue_init (&task->children_queue);
85   task->taskgroup = NULL;
86   task->dependers = NULL;
87   task->depend_hash = NULL;
88   task->depend_count = 0;
89 }
90 
91 /* Clean up a task, after completing it.  */
92 
93 void
gomp_end_task(void)94 gomp_end_task (void)
95 {
96   struct gomp_thread *thr = gomp_thread ();
97   struct gomp_task *task = thr->task;
98 
99   gomp_finish_task (task);
100   thr->task = task->parent;
101 }
102 
103 /* Clear the parent field of every task in LIST.  */
104 
105 static inline void
gomp_clear_parent_in_list(struct priority_list * list)106 gomp_clear_parent_in_list (struct priority_list *list)
107 {
108   struct priority_node *p = list->tasks;
109   if (p)
110     do
111       {
112 	priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 	p = p->next;
114       }
115     while (p != list->tasks);
116 }
117 
118 /* Splay tree version of gomp_clear_parent_in_list.
119 
120    Clear the parent field of every task in NODE within SP, and free
121    the node when done.  */
122 
123 static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125 {
126   if (!node)
127     return;
128   prio_splay_tree_node left = node->left, right = node->right;
129   gomp_clear_parent_in_list (&node->key.l);
130 #if _LIBGOMP_CHECKING_
131   memset (node, 0xaf, sizeof (*node));
132 #endif
133   /* No need to remove the node from the tree.  We're nuking
134      everything, so just free the nodes and our caller can clear the
135      entire splay tree.  */
136   free (node);
137   gomp_clear_parent_in_tree (sp, left);
138   gomp_clear_parent_in_tree (sp, right);
139 }
140 
141 /* Clear the parent field of every task in Q and remove every task
142    from Q.  */
143 
144 static inline void
gomp_clear_parent(struct priority_queue * q)145 gomp_clear_parent (struct priority_queue *q)
146 {
147   if (priority_queue_multi_p (q))
148     {
149       gomp_clear_parent_in_tree (&q->t, q->t.root);
150       /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 	 No need to remove anything.  We can just nuke everything.  */
152       q->t.root = NULL;
153     }
154   else
155     gomp_clear_parent_in_list (&q->l);
156 }
157 
158 /* Helper function for GOMP_task and gomp_create_target_task.
159 
160    For a TASK with in/out dependencies, fill in the various dependency
161    queues.  PARENT is the parent of said task.  DEPEND is as in
162    GOMP_task.  */
163 
164 static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 			 void **depend)
167 {
168   size_t ndepend = (uintptr_t) depend[0];
169   size_t i;
170   hash_entry_type ent;
171 
172   if (ndepend)
173     {
174       /* depend[0] is total # */
175       size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
176       /* ndepend - nout is # of in: */
177       for (i = 0; i < ndepend; i++)
178 	{
179 	  task->depend[i].addr = depend[2 + i];
180 	  task->depend[i].is_in = i >= nout;
181 	}
182     }
183   else
184     {
185       ndepend = (uintptr_t) depend[1]; /* total # */
186       size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
187       size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
188       /* For now we treat mutexinoutset like out, which is compliant, but
189 	 inefficient.  */
190       size_t nin = (uintptr_t) depend[4]; /* # of in: */
191       /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
192       size_t normal = nout + nmutexinoutset + nin;
193       size_t n = 0;
194       for (i = normal; i < ndepend; i++)
195 	{
196 	  void **d = (void **) (uintptr_t) depend[5 + i];
197 	  switch ((uintptr_t) d[1])
198 	    {
199 	    case GOMP_DEPEND_OUT:
200 	    case GOMP_DEPEND_INOUT:
201 	    case GOMP_DEPEND_MUTEXINOUTSET:
202 	      break;
203 	    case GOMP_DEPEND_IN:
204 	      continue;
205 	    default:
206 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
207 			  (int) (uintptr_t) d[1]);
208 	    }
209 	  task->depend[n].addr = d[0];
210 	  task->depend[n++].is_in = 0;
211 	}
212       for (i = 0; i < normal; i++)
213 	{
214 	  task->depend[n].addr = depend[5 + i];
215 	  task->depend[n++].is_in = i >= nout + nmutexinoutset;
216 	}
217       for (i = normal; i < ndepend; i++)
218 	{
219 	  void **d = (void **) (uintptr_t) depend[5 + i];
220 	  if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
221 	    continue;
222 	  task->depend[n].addr = d[0];
223 	  task->depend[n++].is_in = 1;
224 	}
225     }
226   task->depend_count = ndepend;
227   task->num_dependees = 0;
228   if (parent->depend_hash == NULL)
229     parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
230   for (i = 0; i < ndepend; i++)
231     {
232       task->depend[i].next = NULL;
233       task->depend[i].prev = NULL;
234       task->depend[i].task = task;
235       task->depend[i].redundant = false;
236       task->depend[i].redundant_out = false;
237 
238       hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
239 					      &task->depend[i], INSERT);
240       hash_entry_type out = NULL, last = NULL;
241       if (*slot)
242 	{
243 	  /* If multiple depends on the same task are the same, all but the
244 	     first one are redundant.  As inout/out come first, if any of them
245 	     is inout/out, it will win, which is the right semantics.  */
246 	  if ((*slot)->task == task)
247 	    {
248 	      task->depend[i].redundant = true;
249 	      continue;
250 	    }
251 	  for (ent = *slot; ent; ent = ent->next)
252 	    {
253 	      if (ent->redundant_out)
254 		break;
255 
256 	      last = ent;
257 
258 	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
259 	      if (task->depend[i].is_in && ent->is_in)
260 		continue;
261 
262 	      if (!ent->is_in)
263 		out = ent;
264 
265 	      struct gomp_task *tsk = ent->task;
266 	      if (tsk->dependers == NULL)
267 		{
268 		  tsk->dependers
269 		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
270 				   + 6 * sizeof (struct gomp_task *));
271 		  tsk->dependers->n_elem = 1;
272 		  tsk->dependers->allocated = 6;
273 		  tsk->dependers->elem[0] = task;
274 		  task->num_dependees++;
275 		  continue;
276 		}
277 	      /* We already have some other dependency on tsk from earlier
278 		 depend clause.  */
279 	      else if (tsk->dependers->n_elem
280 		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
281 			   == task))
282 		continue;
283 	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
284 		{
285 		  tsk->dependers->allocated
286 		    = tsk->dependers->allocated * 2 + 2;
287 		  tsk->dependers
288 		    = gomp_realloc (tsk->dependers,
289 				    sizeof (struct gomp_dependers_vec)
290 				    + (tsk->dependers->allocated
291 				       * sizeof (struct gomp_task *)));
292 		}
293 	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
294 	      task->num_dependees++;
295 	    }
296 	  task->depend[i].next = *slot;
297 	  (*slot)->prev = &task->depend[i];
298 	}
299       *slot = &task->depend[i];
300 
301       /* There is no need to store more than one depend({,in}out:) task per
302 	 address in the hash table chain for the purpose of creation of
303 	 deferred tasks, because each out depends on all earlier outs, thus it
304 	 is enough to record just the last depend({,in}out:).  For depend(in:),
305 	 we need to keep all of the previous ones not terminated yet, because
306 	 a later depend({,in}out:) might need to depend on all of them.  So, if
307 	 the new task's clause is depend({,in}out:), we know there is at most
308 	 one other depend({,in}out:) clause in the list (out).  For
309 	 non-deferred tasks we want to see all outs, so they are moved to the
310 	 end of the chain, after first redundant_out entry all following
311 	 entries should be redundant_out.  */
312       if (!task->depend[i].is_in && out)
313 	{
314 	  if (out != last)
315 	    {
316 	      out->next->prev = out->prev;
317 	      out->prev->next = out->next;
318 	      out->next = last->next;
319 	      out->prev = last;
320 	      last->next = out;
321 	      if (out->next)
322 		out->next->prev = out;
323 	    }
324 	  out->redundant_out = true;
325 	}
326     }
327 }
328 
329 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
330    false, then we must not delay in executing the task.  If UNTIED is true,
331    then the task may be executed by any member of the team.
332 
333    DEPEND is an array containing:
334      if depend[0] is non-zero, then:
335 	depend[0]: number of depend elements.
336 	depend[1]: number of depend elements of type "out/inout".
337 	depend[2..N+1]: address of [1..N]th depend element.
338      otherwise, when depend[0] is zero, then:
339 	depend[1]: number of depend elements.
340 	depend[2]: number of depend elements of type "out/inout".
341 	depend[3]: number of depend elements of type "mutexinoutset".
342 	depend[4]: number of depend elements of type "in".
343 	depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
344 	depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
345 		   omp_depend_t objects.  */
346 
347 void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority)348 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
349 	   long arg_size, long arg_align, bool if_clause, unsigned flags,
350 	   void **depend, int priority)
351 {
352   struct gomp_thread *thr = gomp_thread ();
353   struct gomp_team *team = thr->ts.team;
354 
355 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
356   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
357      tied to one thread all the time.  This means UNTIED tasks must be
358      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
359      might be running on different thread than FN.  */
360   if (cpyfn)
361     if_clause = false;
362   flags &= ~GOMP_TASK_FLAG_UNTIED;
363 #endif
364 
365   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
366   if (__builtin_expect (gomp_cancel_var, 0) && team)
367     {
368       if (gomp_team_barrier_cancelled (&team->barrier))
369 	return;
370       if (thr->task->taskgroup)
371 	{
372 	  if (thr->task->taskgroup->cancelled)
373 	    return;
374 	  if (thr->task->taskgroup->workshare
375 	      && thr->task->taskgroup->prev
376 	      && thr->task->taskgroup->prev->cancelled)
377 	    return;
378 	}
379     }
380 
381   if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
382     priority = 0;
383   else if (priority > gomp_max_task_priority_var)
384     priority = gomp_max_task_priority_var;
385 
386   if (!if_clause || team == NULL
387       || (thr->task && thr->task->final_task)
388       || team->task_count > 64 * team->nthreads)
389     {
390       struct gomp_task task;
391 
392       /* If there are depend clauses and earlier deferred sibling tasks
393 	 with depend clauses, check if there isn't a dependency.  If there
394 	 is, we need to wait for them.  There is no need to handle
395 	 depend clauses for non-deferred tasks other than this, because
396 	 the parent task is suspended until the child task finishes and thus
397 	 it can't start further child tasks.  */
398       if ((flags & GOMP_TASK_FLAG_DEPEND)
399 	  && thr->task && thr->task->depend_hash)
400 	gomp_task_maybe_wait_for_dependencies (depend);
401 
402       gomp_init_task (&task, thr->task, gomp_icv (false));
403       task.kind = GOMP_TASK_UNDEFERRED;
404       task.final_task = (thr->task && thr->task->final_task)
405 			|| (flags & GOMP_TASK_FLAG_FINAL);
406       task.priority = priority;
407       if (thr->task)
408 	{
409 	  task.in_tied_task = thr->task->in_tied_task;
410 	  task.taskgroup = thr->task->taskgroup;
411 	}
412       thr->task = &task;
413       if (__builtin_expect (cpyfn != NULL, 0))
414 	{
415 	  char buf[arg_size + arg_align - 1];
416 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
417 				& ~(uintptr_t) (arg_align - 1));
418 	  cpyfn (arg, data);
419 	  fn (arg);
420 	}
421       else
422 	fn (data);
423       /* Access to "children" is normally done inside a task_lock
424 	 mutex region, but the only way this particular task.children
425 	 can be set is if this thread's task work function (fn)
426 	 creates children.  So since the setter is *this* thread, we
427 	 need no barriers here when testing for non-NULL.  We can have
428 	 task.children set by the current thread then changed by a
429 	 child thread, but seeing a stale non-NULL value is not a
430 	 problem.  Once past the task_lock acquisition, this thread
431 	 will see the real value of task.children.  */
432       if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
433 	{
434 	  gomp_mutex_lock (&team->task_lock);
435 	  gomp_clear_parent (&task.children_queue);
436 	  gomp_mutex_unlock (&team->task_lock);
437 	}
438       gomp_end_task ();
439     }
440   else
441     {
442       struct gomp_task *task;
443       struct gomp_task *parent = thr->task;
444       struct gomp_taskgroup *taskgroup = parent->taskgroup;
445       char *arg;
446       bool do_wake;
447       size_t depend_size = 0;
448 
449       if (flags & GOMP_TASK_FLAG_DEPEND)
450 	depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
451 		       * sizeof (struct gomp_task_depend_entry));
452       task = gomp_malloc (sizeof (*task) + depend_size
453 			  + arg_size + arg_align - 1);
454       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
455 		      & ~(uintptr_t) (arg_align - 1));
456       gomp_init_task (task, parent, gomp_icv (false));
457       task->priority = priority;
458       task->kind = GOMP_TASK_UNDEFERRED;
459       task->in_tied_task = parent->in_tied_task;
460       task->taskgroup = taskgroup;
461       thr->task = task;
462       if (cpyfn)
463 	{
464 	  cpyfn (arg, data);
465 	  task->copy_ctors_done = true;
466 	}
467       else
468 	memcpy (arg, data, arg_size);
469       thr->task = parent;
470       task->kind = GOMP_TASK_WAITING;
471       task->fn = fn;
472       task->fn_data = arg;
473       task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
474       gomp_mutex_lock (&team->task_lock);
475       /* If parallel or taskgroup has been cancelled, don't start new
476 	 tasks.  */
477       if (__builtin_expect (gomp_cancel_var, 0)
478 	  && !task->copy_ctors_done)
479 	{
480 	  if (gomp_team_barrier_cancelled (&team->barrier))
481 	    {
482 	    do_cancel:
483 	      gomp_mutex_unlock (&team->task_lock);
484 	      gomp_finish_task (task);
485 	      free (task);
486 	      return;
487 	    }
488 	  if (taskgroup)
489 	    {
490 	      if (taskgroup->cancelled)
491 		goto do_cancel;
492 	      if (taskgroup->workshare
493 		  && taskgroup->prev
494 		  && taskgroup->prev->cancelled)
495 		goto do_cancel;
496 	    }
497 	}
498       if (taskgroup)
499 	taskgroup->num_children++;
500       if (depend_size)
501 	{
502 	  gomp_task_handle_depend (task, parent, depend);
503 	  if (task->num_dependees)
504 	    {
505 	      /* Tasks that depend on other tasks are not put into the
506 		 various waiting queues, so we are done for now.  Said
507 		 tasks are instead put into the queues via
508 		 gomp_task_run_post_handle_dependers() after their
509 		 dependencies have been satisfied.  After which, they
510 		 can be picked up by the various scheduling
511 		 points.  */
512 	      gomp_mutex_unlock (&team->task_lock);
513 	      return;
514 	    }
515 	}
516 
517       priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
518 			     task, priority,
519 			     PRIORITY_INSERT_BEGIN,
520 			     /*adjust_parent_depends_on=*/false,
521 			     task->parent_depends_on);
522       if (taskgroup)
523 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
524 			       task, priority,
525 			       PRIORITY_INSERT_BEGIN,
526 			       /*adjust_parent_depends_on=*/false,
527 			       task->parent_depends_on);
528 
529       priority_queue_insert (PQ_TEAM, &team->task_queue,
530 			     task, priority,
531 			     PRIORITY_INSERT_END,
532 			     /*adjust_parent_depends_on=*/false,
533 			     task->parent_depends_on);
534 
535       ++team->task_count;
536       ++team->task_queued_count;
537       gomp_team_barrier_set_task_pending (&team->barrier);
538       do_wake = team->task_running_count + !parent->in_tied_task
539 		< team->nthreads;
540       gomp_mutex_unlock (&team->task_lock);
541       if (do_wake)
542 	gomp_team_barrier_wake (&team->barrier, 1);
543     }
544 }
545 
546 ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)547 ialias (GOMP_taskgroup_end)
548 ialias (GOMP_taskgroup_reduction_register)
549 
550 #define TYPE long
551 #define UTYPE unsigned long
552 #define TYPE_is_long 1
553 #include "taskloop.c"
554 #undef TYPE
555 #undef UTYPE
556 #undef TYPE_is_long
557 
558 #define TYPE unsigned long long
559 #define UTYPE TYPE
560 #define GOMP_taskloop GOMP_taskloop_ull
561 #include "taskloop.c"
562 #undef TYPE
563 #undef UTYPE
564 #undef GOMP_taskloop
565 
566 static void inline
567 priority_queue_move_task_first (enum priority_queue_type type,
568 				struct priority_queue *head,
569 				struct gomp_task *task)
570 {
571 #if _LIBGOMP_CHECKING_
572   if (!priority_queue_task_in_queue_p (type, head, task))
573     gomp_fatal ("Attempt to move first missing task %p", task);
574 #endif
575   struct priority_list *list;
576   if (priority_queue_multi_p (head))
577     {
578       list = priority_queue_lookup_priority (head, task->priority);
579 #if _LIBGOMP_CHECKING_
580       if (!list)
581 	gomp_fatal ("Unable to find priority %d", task->priority);
582 #endif
583     }
584   else
585     list = &head->l;
586   priority_list_remove (list, task_to_priority_node (type, task), 0);
587   priority_list_insert (type, list, task, task->priority,
588 			PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
589 			task->parent_depends_on);
590 }
591 
592 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
593    with team->task_lock held, or is executed in the thread that called
594    gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
595    run before it acquires team->task_lock.  */
596 
597 static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)598 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
599 {
600   struct gomp_task *parent = task->parent;
601   if (parent)
602     priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
603 				    task);
604 
605   struct gomp_taskgroup *taskgroup = task->taskgroup;
606   if (taskgroup)
607     priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
608 				    task);
609 
610   priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
611 			 PRIORITY_INSERT_BEGIN, false,
612 			 task->parent_depends_on);
613   task->kind = GOMP_TASK_WAITING;
614   if (parent && parent->taskwait)
615     {
616       if (parent->taskwait->in_taskwait)
617 	{
618 	  /* One more task has had its dependencies met.
619 	     Inform any waiters.  */
620 	  parent->taskwait->in_taskwait = false;
621 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
622 	}
623       else if (parent->taskwait->in_depend_wait)
624 	{
625 	  /* One more task has had its dependencies met.
626 	     Inform any waiters.  */
627 	  parent->taskwait->in_depend_wait = false;
628 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
629 	}
630     }
631   if (taskgroup && taskgroup->in_taskgroup_wait)
632     {
633       /* One more task has had its dependencies met.
634 	 Inform any waiters.  */
635       taskgroup->in_taskgroup_wait = false;
636       gomp_sem_post (&taskgroup->taskgroup_sem);
637     }
638 
639   ++team->task_queued_count;
640   gomp_team_barrier_set_task_pending (&team->barrier);
641   /* I'm afraid this can't be done after releasing team->task_lock,
642      as gomp_target_task_completion is run from unrelated thread and
643      therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
644      the team could be gone already.  */
645   if (team->nthreads > team->task_running_count)
646     gomp_team_barrier_wake (&team->barrier, 1);
647 }
648 
649 /* Signal that a target task TTASK has completed the asynchronously
650    running phase and should be requeued as a task to handle the
651    variable unmapping.  */
652 
653 void
GOMP_PLUGIN_target_task_completion(void * data)654 GOMP_PLUGIN_target_task_completion (void *data)
655 {
656   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
657   struct gomp_task *task = ttask->task;
658   struct gomp_team *team = ttask->team;
659 
660   gomp_mutex_lock (&team->task_lock);
661   if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
662     {
663       ttask->state = GOMP_TARGET_TASK_FINISHED;
664       gomp_mutex_unlock (&team->task_lock);
665       return;
666     }
667   ttask->state = GOMP_TARGET_TASK_FINISHED;
668   gomp_target_task_completion (team, task);
669   gomp_mutex_unlock (&team->task_lock);
670 }
671 
672 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
673 
674 /* Called for nowait target tasks.  */
675 
676 bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)677 gomp_create_target_task (struct gomp_device_descr *devicep,
678 			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
679 			 size_t *sizes, unsigned short *kinds,
680 			 unsigned int flags, void **depend, void **args,
681 			 enum gomp_target_task_state state)
682 {
683   struct gomp_thread *thr = gomp_thread ();
684   struct gomp_team *team = thr->ts.team;
685 
686   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
687   if (__builtin_expect (gomp_cancel_var, 0) && team)
688     {
689       if (gomp_team_barrier_cancelled (&team->barrier))
690 	return true;
691       if (thr->task->taskgroup)
692 	{
693 	  if (thr->task->taskgroup->cancelled)
694 	    return true;
695 	  if (thr->task->taskgroup->workshare
696 	      && thr->task->taskgroup->prev
697 	      && thr->task->taskgroup->prev->cancelled)
698 	    return true;
699 	}
700     }
701 
702   struct gomp_target_task *ttask;
703   struct gomp_task *task;
704   struct gomp_task *parent = thr->task;
705   struct gomp_taskgroup *taskgroup = parent->taskgroup;
706   bool do_wake;
707   size_t depend_size = 0;
708   uintptr_t depend_cnt = 0;
709   size_t tgt_align = 0, tgt_size = 0;
710 
711   if (depend != NULL)
712     {
713       depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
714       depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
715     }
716   if (fn)
717     {
718       /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
719 	 firstprivate on the target task.  */
720       size_t i;
721       for (i = 0; i < mapnum; i++)
722 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
723 	  {
724 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
725 	    if (tgt_align < align)
726 	      tgt_align = align;
727 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
728 	    tgt_size += sizes[i];
729 	  }
730       if (tgt_align)
731 	tgt_size += tgt_align - 1;
732       else
733 	tgt_size = 0;
734     }
735 
736   task = gomp_malloc (sizeof (*task) + depend_size
737 		      + sizeof (*ttask)
738 		      + mapnum * (sizeof (void *) + sizeof (size_t)
739 				  + sizeof (unsigned short))
740 		      + tgt_size);
741   gomp_init_task (task, parent, gomp_icv (false));
742   task->priority = 0;
743   task->kind = GOMP_TASK_WAITING;
744   task->in_tied_task = parent->in_tied_task;
745   task->taskgroup = taskgroup;
746   ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
747   ttask->devicep = devicep;
748   ttask->fn = fn;
749   ttask->mapnum = mapnum;
750   ttask->args = args;
751   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
752   ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
753   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
754   ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
755   memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
756   if (tgt_align)
757     {
758       char *tgt = (char *) &ttask->kinds[mapnum];
759       size_t i;
760       uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
761       if (al)
762 	tgt += tgt_align - al;
763       tgt_size = 0;
764       for (i = 0; i < mapnum; i++)
765 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
766 	  {
767 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
768 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
769 	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
770 	    ttask->hostaddrs[i] = tgt + tgt_size;
771 	    tgt_size = tgt_size + sizes[i];
772 	  }
773     }
774   ttask->flags = flags;
775   ttask->state = state;
776   ttask->task = task;
777   ttask->team = team;
778   task->fn = NULL;
779   task->fn_data = ttask;
780   task->final_task = 0;
781   gomp_mutex_lock (&team->task_lock);
782   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
783   if (__builtin_expect (gomp_cancel_var, 0))
784     {
785       if (gomp_team_barrier_cancelled (&team->barrier))
786 	{
787 	do_cancel:
788 	  gomp_mutex_unlock (&team->task_lock);
789 	  gomp_finish_task (task);
790 	  free (task);
791 	  return true;
792 	}
793       if (taskgroup)
794 	{
795 	  if (taskgroup->cancelled)
796 	    goto do_cancel;
797 	  if (taskgroup->workshare
798 	      && taskgroup->prev
799 	      && taskgroup->prev->cancelled)
800 	    goto do_cancel;
801 	}
802     }
803   if (depend_size)
804     {
805       gomp_task_handle_depend (task, parent, depend);
806       if (task->num_dependees)
807 	{
808 	  if (taskgroup)
809 	    taskgroup->num_children++;
810 	  gomp_mutex_unlock (&team->task_lock);
811 	  return true;
812 	}
813     }
814   if (state == GOMP_TARGET_TASK_DATA)
815     {
816       gomp_task_run_post_handle_depend_hash (task);
817       gomp_mutex_unlock (&team->task_lock);
818       gomp_finish_task (task);
819       free (task);
820       return false;
821     }
822   if (taskgroup)
823     taskgroup->num_children++;
824   /* For async offloading, if we don't need to wait for dependencies,
825      run the gomp_target_task_fn right away, essentially schedule the
826      mapping part of the task in the current thread.  */
827   if (devicep != NULL
828       && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
829     {
830       priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
831 			     PRIORITY_INSERT_END,
832 			     /*adjust_parent_depends_on=*/false,
833 			     task->parent_depends_on);
834       if (taskgroup)
835 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
836 			       task, 0, PRIORITY_INSERT_END,
837 			       /*adjust_parent_depends_on=*/false,
838 			       task->parent_depends_on);
839       task->pnode[PQ_TEAM].next = NULL;
840       task->pnode[PQ_TEAM].prev = NULL;
841       task->kind = GOMP_TASK_TIED;
842       ++team->task_count;
843       gomp_mutex_unlock (&team->task_lock);
844 
845       thr->task = task;
846       gomp_target_task_fn (task->fn_data);
847       thr->task = parent;
848 
849       gomp_mutex_lock (&team->task_lock);
850       task->kind = GOMP_TASK_ASYNC_RUNNING;
851       /* If GOMP_PLUGIN_target_task_completion has run already
852 	 in between gomp_target_task_fn and the mutex lock,
853 	 perform the requeuing here.  */
854       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
855 	gomp_target_task_completion (team, task);
856       else
857 	ttask->state = GOMP_TARGET_TASK_RUNNING;
858       gomp_mutex_unlock (&team->task_lock);
859       return true;
860     }
861   priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
862 			 PRIORITY_INSERT_BEGIN,
863 			 /*adjust_parent_depends_on=*/false,
864 			 task->parent_depends_on);
865   if (taskgroup)
866     priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
867 			   PRIORITY_INSERT_BEGIN,
868 			   /*adjust_parent_depends_on=*/false,
869 			   task->parent_depends_on);
870   priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
871 			 PRIORITY_INSERT_END,
872 			 /*adjust_parent_depends_on=*/false,
873 			 task->parent_depends_on);
874   ++team->task_count;
875   ++team->task_queued_count;
876   gomp_team_barrier_set_task_pending (&team->barrier);
877   do_wake = team->task_running_count + !parent->in_tied_task
878 	    < team->nthreads;
879   gomp_mutex_unlock (&team->task_lock);
880   if (do_wake)
881     gomp_team_barrier_wake (&team->barrier, 1);
882   return true;
883 }
884 
885 /* Given a parent_depends_on task in LIST, move it to the front of its
886    priority so it is run as soon as possible.
887 
888    Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
889 
890    We rearrange the queue such that all parent_depends_on tasks are
891    first, and last_parent_depends_on points to the last such task we
892    rearranged.  For example, given the following tasks in a queue
893    where PD[123] are the parent_depends_on tasks:
894 
895 	task->children
896 	|
897 	V
898 	C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
899 
900 	We rearrange such that:
901 
902 	task->children
903 	|	       +--- last_parent_depends_on
904 	|	       |
905 	V	       V
906 	PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
907 
908 static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)909 priority_list_upgrade_task (struct priority_list *list,
910 			    struct priority_node *node)
911 {
912   struct priority_node *last_parent_depends_on
913     = list->last_parent_depends_on;
914   if (last_parent_depends_on)
915     {
916       node->prev->next = node->next;
917       node->next->prev = node->prev;
918       node->prev = last_parent_depends_on;
919       node->next = last_parent_depends_on->next;
920       node->prev->next = node;
921       node->next->prev = node;
922     }
923   else if (node != list->tasks)
924     {
925       node->prev->next = node->next;
926       node->next->prev = node->prev;
927       node->prev = list->tasks->prev;
928       node->next = list->tasks;
929       list->tasks = node;
930       node->prev->next = node;
931       node->next->prev = node;
932     }
933   list->last_parent_depends_on = node;
934 }
935 
936 /* Given a parent_depends_on TASK in its parent's children_queue, move
937    it to the front of its priority so it is run as soon as possible.
938 
939    PARENT is passed as an optimization.
940 
941    (This function could be defined in priority_queue.c, but we want it
942    inlined, and putting it in priority_queue.h is not an option, given
943    that gomp_task has not been properly defined at that point).  */
944 
945 static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)946 priority_queue_upgrade_task (struct gomp_task *task,
947 			     struct gomp_task *parent)
948 {
949   struct priority_queue *head = &parent->children_queue;
950   struct priority_node *node = &task->pnode[PQ_CHILDREN];
951 #if _LIBGOMP_CHECKING_
952   if (!task->parent_depends_on)
953     gomp_fatal ("priority_queue_upgrade_task: task must be a "
954 		"parent_depends_on task");
955   if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
956     gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
957 #endif
958   if (priority_queue_multi_p (head))
959     {
960       struct priority_list *list
961 	= priority_queue_lookup_priority (head, task->priority);
962       priority_list_upgrade_task (list, node);
963     }
964   else
965     priority_list_upgrade_task (&head->l, node);
966 }
967 
968 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
969    the way in LIST so that other tasks can be considered for
970    execution.  LIST contains tasks of type TYPE.
971 
972    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
973    if applicable.  */
974 
975 static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)976 priority_list_downgrade_task (enum priority_queue_type type,
977 			      struct priority_list *list,
978 			      struct gomp_task *child_task)
979 {
980   struct priority_node *node = task_to_priority_node (type, child_task);
981   if (list->tasks == node)
982     list->tasks = node->next;
983   else if (node->next != list->tasks)
984     {
985       /* The task in NODE is about to become TIED and TIED tasks
986 	 cannot come before WAITING tasks.  If we're about to
987 	 leave the queue in such an indeterminate state, rewire
988 	 things appropriately.  However, a TIED task at the end is
989 	 perfectly fine.  */
990       struct gomp_task *next_task = priority_node_to_task (type, node->next);
991       if (next_task->kind == GOMP_TASK_WAITING)
992 	{
993 	  /* Remove from list.  */
994 	  node->prev->next = node->next;
995 	  node->next->prev = node->prev;
996 	  /* Rewire at the end.  */
997 	  node->next = list->tasks;
998 	  node->prev = list->tasks->prev;
999 	  list->tasks->prev->next = node;
1000 	  list->tasks->prev = node;
1001 	}
1002     }
1003 
1004   /* If the current task is the last_parent_depends_on for its
1005      priority, adjust last_parent_depends_on appropriately.  */
1006   if (__builtin_expect (child_task->parent_depends_on, 0)
1007       && list->last_parent_depends_on == node)
1008     {
1009       struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1010       if (node->prev != node
1011 	  && prev_child->kind == GOMP_TASK_WAITING
1012 	  && prev_child->parent_depends_on)
1013 	list->last_parent_depends_on = node->prev;
1014       else
1015 	{
1016 	  /* There are no more parent_depends_on entries waiting
1017 	     to run, clear the list.  */
1018 	  list->last_parent_depends_on = NULL;
1019 	}
1020     }
1021 }
1022 
1023 /* Given a TASK in HEAD that is about to be executed, move it out of
1024    the way so that other tasks can be considered for execution.  HEAD
1025    contains tasks of type TYPE.
1026 
1027    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1028    if applicable.
1029 
1030    (This function could be defined in priority_queue.c, but we want it
1031    inlined, and putting it in priority_queue.h is not an option, given
1032    that gomp_task has not been properly defined at that point).  */
1033 
1034 static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)1035 priority_queue_downgrade_task (enum priority_queue_type type,
1036 			       struct priority_queue *head,
1037 			       struct gomp_task *task)
1038 {
1039 #if _LIBGOMP_CHECKING_
1040   if (!priority_queue_task_in_queue_p (type, head, task))
1041     gomp_fatal ("Attempt to downgrade missing task %p", task);
1042 #endif
1043   if (priority_queue_multi_p (head))
1044     {
1045       struct priority_list *list
1046 	= priority_queue_lookup_priority (head, task->priority);
1047       priority_list_downgrade_task (type, list, task);
1048     }
1049   else
1050     priority_list_downgrade_task (type, &head->l, task);
1051 }
1052 
1053 /* Setup CHILD_TASK to execute.  This is done by setting the task to
1054    TIED, and updating all relevant queues so that CHILD_TASK is no
1055    longer chosen for scheduling.  Also, remove CHILD_TASK from the
1056    overall team task queue entirely.
1057 
1058    Return TRUE if task or its containing taskgroup has been
1059    cancelled.  */
1060 
1061 static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)1062 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1063 		   struct gomp_team *team)
1064 {
1065 #if _LIBGOMP_CHECKING_
1066   if (child_task->parent)
1067     priority_queue_verify (PQ_CHILDREN,
1068 			   &child_task->parent->children_queue, true);
1069   if (child_task->taskgroup)
1070     priority_queue_verify (PQ_TASKGROUP,
1071 			   &child_task->taskgroup->taskgroup_queue, false);
1072   priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1073 #endif
1074 
1075   /* Task is about to go tied, move it out of the way.  */
1076   if (parent)
1077     priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1078 				   child_task);
1079 
1080   /* Task is about to go tied, move it out of the way.  */
1081   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1082   if (taskgroup)
1083     priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1084 				   child_task);
1085 
1086   priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1087 			 MEMMODEL_RELAXED);
1088   child_task->pnode[PQ_TEAM].next = NULL;
1089   child_task->pnode[PQ_TEAM].prev = NULL;
1090   child_task->kind = GOMP_TASK_TIED;
1091 
1092   if (--team->task_queued_count == 0)
1093     gomp_team_barrier_clear_task_pending (&team->barrier);
1094   if (__builtin_expect (gomp_cancel_var, 0)
1095       && !child_task->copy_ctors_done)
1096     {
1097       if (gomp_team_barrier_cancelled (&team->barrier))
1098 	return true;
1099       if (taskgroup)
1100 	{
1101 	  if (taskgroup->cancelled)
1102 	    return true;
1103 	  if (taskgroup->workshare
1104 	      && taskgroup->prev
1105 	      && taskgroup->prev->cancelled)
1106 	    return true;
1107 	}
1108     }
1109   return false;
1110 }
1111 
1112 static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)1113 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1114 {
1115   struct gomp_task *parent = child_task->parent;
1116   size_t i;
1117 
1118   for (i = 0; i < child_task->depend_count; i++)
1119     if (!child_task->depend[i].redundant)
1120       {
1121 	if (child_task->depend[i].next)
1122 	  child_task->depend[i].next->prev = child_task->depend[i].prev;
1123 	if (child_task->depend[i].prev)
1124 	  child_task->depend[i].prev->next = child_task->depend[i].next;
1125 	else
1126 	  {
1127 	    hash_entry_type *slot
1128 	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1129 				NO_INSERT);
1130 	    if (*slot != &child_task->depend[i])
1131 	      abort ();
1132 	    if (child_task->depend[i].next)
1133 	      *slot = child_task->depend[i].next;
1134 	    else
1135 	      htab_clear_slot (parent->depend_hash, slot);
1136 	  }
1137       }
1138 }
1139 
1140 /* After a CHILD_TASK has been run, adjust the dependency queue for
1141    each task that depends on CHILD_TASK, to record the fact that there
1142    is one less dependency to worry about.  If a task that depended on
1143    CHILD_TASK now has no dependencies, place it in the various queues
1144    so it gets scheduled to run.
1145 
1146    TEAM is the team to which CHILD_TASK belongs to.  */
1147 
1148 static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)1149 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1150 				     struct gomp_team *team)
1151 {
1152   struct gomp_task *parent = child_task->parent;
1153   size_t i, count = child_task->dependers->n_elem, ret = 0;
1154   for (i = 0; i < count; i++)
1155     {
1156       struct gomp_task *task = child_task->dependers->elem[i];
1157 
1158       /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
1159 	 TASK's remaining dependencies.  Once TASK has no other
1160 	 depenencies, put it into the various queues so it will get
1161 	 scheduled for execution.  */
1162       if (--task->num_dependees != 0)
1163 	continue;
1164 
1165       struct gomp_taskgroup *taskgroup = task->taskgroup;
1166       if (parent)
1167 	{
1168 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1169 				 task, task->priority,
1170 				 PRIORITY_INSERT_BEGIN,
1171 				 /*adjust_parent_depends_on=*/true,
1172 				 task->parent_depends_on);
1173 	  if (parent->taskwait)
1174 	    {
1175 	      if (parent->taskwait->in_taskwait)
1176 		{
1177 		  /* One more task has had its dependencies met.
1178 		     Inform any waiters.  */
1179 		  parent->taskwait->in_taskwait = false;
1180 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1181 		}
1182 	      else if (parent->taskwait->in_depend_wait)
1183 		{
1184 		  /* One more task has had its dependencies met.
1185 		     Inform any waiters.  */
1186 		  parent->taskwait->in_depend_wait = false;
1187 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1188 		}
1189 	    }
1190 	}
1191       if (taskgroup)
1192 	{
1193 	  priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1194 				 task, task->priority,
1195 				 PRIORITY_INSERT_BEGIN,
1196 				 /*adjust_parent_depends_on=*/false,
1197 				 task->parent_depends_on);
1198 	  if (taskgroup->in_taskgroup_wait)
1199 	    {
1200 	      /* One more task has had its dependencies met.
1201 		 Inform any waiters.  */
1202 	      taskgroup->in_taskgroup_wait = false;
1203 	      gomp_sem_post (&taskgroup->taskgroup_sem);
1204 	    }
1205 	}
1206       priority_queue_insert (PQ_TEAM, &team->task_queue,
1207 			     task, task->priority,
1208 			     PRIORITY_INSERT_END,
1209 			     /*adjust_parent_depends_on=*/false,
1210 			     task->parent_depends_on);
1211       ++team->task_count;
1212       ++team->task_queued_count;
1213       ++ret;
1214     }
1215   free (child_task->dependers);
1216   child_task->dependers = NULL;
1217   if (ret > 1)
1218     gomp_team_barrier_set_task_pending (&team->barrier);
1219   return ret;
1220 }
1221 
1222 static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)1223 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1224 				  struct gomp_team *team)
1225 {
1226   if (child_task->depend_count == 0)
1227     return 0;
1228 
1229   /* If parent is gone already, the hash table is freed and nothing
1230      will use the hash table anymore, no need to remove anything from it.  */
1231   if (child_task->parent != NULL)
1232     gomp_task_run_post_handle_depend_hash (child_task);
1233 
1234   if (child_task->dependers == NULL)
1235     return 0;
1236 
1237   return gomp_task_run_post_handle_dependers (child_task, team);
1238 }
1239 
1240 /* Remove CHILD_TASK from its parent.  */
1241 
1242 static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)1243 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1244 {
1245   struct gomp_task *parent = child_task->parent;
1246   if (parent == NULL)
1247     return;
1248 
1249   /* If this was the last task the parent was depending on,
1250      synchronize with gomp_task_maybe_wait_for_dependencies so it can
1251      clean up and return.  */
1252   if (__builtin_expect (child_task->parent_depends_on, 0)
1253       && --parent->taskwait->n_depend == 0
1254       && parent->taskwait->in_depend_wait)
1255     {
1256       parent->taskwait->in_depend_wait = false;
1257       gomp_sem_post (&parent->taskwait->taskwait_sem);
1258     }
1259 
1260   if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1261 			     child_task, MEMMODEL_RELEASE)
1262       && parent->taskwait && parent->taskwait->in_taskwait)
1263     {
1264       parent->taskwait->in_taskwait = false;
1265       gomp_sem_post (&parent->taskwait->taskwait_sem);
1266     }
1267   child_task->pnode[PQ_CHILDREN].next = NULL;
1268   child_task->pnode[PQ_CHILDREN].prev = NULL;
1269 }
1270 
1271 /* Remove CHILD_TASK from its taskgroup.  */
1272 
1273 static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)1274 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1275 {
1276   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1277   if (taskgroup == NULL)
1278     return;
1279   bool empty = priority_queue_remove (PQ_TASKGROUP,
1280 				      &taskgroup->taskgroup_queue,
1281 				      child_task, MEMMODEL_RELAXED);
1282   child_task->pnode[PQ_TASKGROUP].next = NULL;
1283   child_task->pnode[PQ_TASKGROUP].prev = NULL;
1284   if (taskgroup->num_children > 1)
1285     --taskgroup->num_children;
1286   else
1287     {
1288       /* We access taskgroup->num_children in GOMP_taskgroup_end
1289 	 outside of the task lock mutex region, so
1290 	 need a release barrier here to ensure memory
1291 	 written by child_task->fn above is flushed
1292 	 before the NULL is written.  */
1293       __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1294     }
1295   if (empty && taskgroup->in_taskgroup_wait)
1296     {
1297       taskgroup->in_taskgroup_wait = false;
1298       gomp_sem_post (&taskgroup->taskgroup_sem);
1299     }
1300 }
1301 
1302 void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)1303 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1304 {
1305   struct gomp_thread *thr = gomp_thread ();
1306   struct gomp_team *team = thr->ts.team;
1307   struct gomp_task *task = thr->task;
1308   struct gomp_task *child_task = NULL;
1309   struct gomp_task *to_free = NULL;
1310   int do_wake = 0;
1311 
1312   gomp_mutex_lock (&team->task_lock);
1313   if (gomp_barrier_last_thread (state))
1314     {
1315       if (team->task_count == 0)
1316 	{
1317 	  gomp_team_barrier_done (&team->barrier, state);
1318 	  gomp_mutex_unlock (&team->task_lock);
1319 	  gomp_team_barrier_wake (&team->barrier, 0);
1320 	  return;
1321 	}
1322       gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1323     }
1324 
1325   while (1)
1326     {
1327       bool cancelled = false;
1328       if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1329 	{
1330 	  bool ignored;
1331 	  child_task
1332 	    = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1333 					PQ_IGNORED, NULL,
1334 					&ignored);
1335 	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
1336 					 team);
1337 	  if (__builtin_expect (cancelled, 0))
1338 	    {
1339 	      if (to_free)
1340 		{
1341 		  gomp_finish_task (to_free);
1342 		  free (to_free);
1343 		  to_free = NULL;
1344 		}
1345 	      goto finish_cancelled;
1346 	    }
1347 	  team->task_running_count++;
1348 	  child_task->in_tied_task = true;
1349 	}
1350       gomp_mutex_unlock (&team->task_lock);
1351       if (do_wake)
1352 	{
1353 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1354 	  do_wake = 0;
1355 	}
1356       if (to_free)
1357 	{
1358 	  gomp_finish_task (to_free);
1359 	  free (to_free);
1360 	  to_free = NULL;
1361 	}
1362       if (child_task)
1363 	{
1364 	  thr->task = child_task;
1365 	  if (__builtin_expect (child_task->fn == NULL, 0))
1366 	    {
1367 	      if (gomp_target_task_fn (child_task->fn_data))
1368 		{
1369 		  thr->task = task;
1370 		  gomp_mutex_lock (&team->task_lock);
1371 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1372 		  team->task_running_count--;
1373 		  struct gomp_target_task *ttask
1374 		    = (struct gomp_target_task *) child_task->fn_data;
1375 		  /* If GOMP_PLUGIN_target_task_completion has run already
1376 		     in between gomp_target_task_fn and the mutex lock,
1377 		     perform the requeuing here.  */
1378 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1379 		    gomp_target_task_completion (team, child_task);
1380 		  else
1381 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1382 		  child_task = NULL;
1383 		  continue;
1384 		}
1385 	    }
1386 	  else
1387 	    child_task->fn (child_task->fn_data);
1388 	  thr->task = task;
1389 	}
1390       else
1391 	return;
1392       gomp_mutex_lock (&team->task_lock);
1393       if (child_task)
1394 	{
1395 	 finish_cancelled:;
1396 	  size_t new_tasks
1397 	    = gomp_task_run_post_handle_depend (child_task, team);
1398 	  gomp_task_run_post_remove_parent (child_task);
1399 	  gomp_clear_parent (&child_task->children_queue);
1400 	  gomp_task_run_post_remove_taskgroup (child_task);
1401 	  to_free = child_task;
1402 	  child_task = NULL;
1403 	  if (!cancelled)
1404 	    team->task_running_count--;
1405 	  if (new_tasks > 1)
1406 	    {
1407 	      do_wake = team->nthreads - team->task_running_count;
1408 	      if (do_wake > new_tasks)
1409 		do_wake = new_tasks;
1410 	    }
1411 	  if (--team->task_count == 0
1412 	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1413 	    {
1414 	      gomp_team_barrier_done (&team->barrier, state);
1415 	      gomp_mutex_unlock (&team->task_lock);
1416 	      gomp_team_barrier_wake (&team->barrier, 0);
1417 	      gomp_mutex_lock (&team->task_lock);
1418 	    }
1419 	}
1420     }
1421 }
1422 
1423 /* Called when encountering a taskwait directive.
1424 
1425    Wait for all children of the current task.  */
1426 
1427 void
GOMP_taskwait(void)1428 GOMP_taskwait (void)
1429 {
1430   struct gomp_thread *thr = gomp_thread ();
1431   struct gomp_team *team = thr->ts.team;
1432   struct gomp_task *task = thr->task;
1433   struct gomp_task *child_task = NULL;
1434   struct gomp_task *to_free = NULL;
1435   struct gomp_taskwait taskwait;
1436   int do_wake = 0;
1437 
1438   /* The acquire barrier on load of task->children here synchronizes
1439      with the write of a NULL in gomp_task_run_post_remove_parent.  It is
1440      not necessary that we synchronize with other non-NULL writes at
1441      this point, but we must ensure that all writes to memory by a
1442      child thread task work function are seen before we exit from
1443      GOMP_taskwait.  */
1444   if (task == NULL
1445       || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1446     return;
1447 
1448   memset (&taskwait, 0, sizeof (taskwait));
1449   bool child_q = false;
1450   gomp_mutex_lock (&team->task_lock);
1451   while (1)
1452     {
1453       bool cancelled = false;
1454       if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1455 	{
1456 	  bool destroy_taskwait = task->taskwait != NULL;
1457 	  task->taskwait = NULL;
1458 	  gomp_mutex_unlock (&team->task_lock);
1459 	  if (to_free)
1460 	    {
1461 	      gomp_finish_task (to_free);
1462 	      free (to_free);
1463 	    }
1464 	  if (destroy_taskwait)
1465 	    gomp_sem_destroy (&taskwait.taskwait_sem);
1466 	  return;
1467 	}
1468       struct gomp_task *next_task
1469 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1470 				    PQ_TEAM, &team->task_queue, &child_q);
1471       if (next_task->kind == GOMP_TASK_WAITING)
1472 	{
1473 	  child_task = next_task;
1474 	  cancelled
1475 	    = gomp_task_run_pre (child_task, task, team);
1476 	  if (__builtin_expect (cancelled, 0))
1477 	    {
1478 	      if (to_free)
1479 		{
1480 		  gomp_finish_task (to_free);
1481 		  free (to_free);
1482 		  to_free = NULL;
1483 		}
1484 	      goto finish_cancelled;
1485 	    }
1486 	}
1487       else
1488 	{
1489 	/* All tasks we are waiting for are either running in other
1490 	   threads, or they are tasks that have not had their
1491 	   dependencies met (so they're not even in the queue).  Wait
1492 	   for them.  */
1493 	  if (task->taskwait == NULL)
1494 	    {
1495 	      taskwait.in_depend_wait = false;
1496 	      gomp_sem_init (&taskwait.taskwait_sem, 0);
1497 	      task->taskwait = &taskwait;
1498 	    }
1499 	  taskwait.in_taskwait = true;
1500 	}
1501       gomp_mutex_unlock (&team->task_lock);
1502       if (do_wake)
1503 	{
1504 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1505 	  do_wake = 0;
1506 	}
1507       if (to_free)
1508 	{
1509 	  gomp_finish_task (to_free);
1510 	  free (to_free);
1511 	  to_free = NULL;
1512 	}
1513       if (child_task)
1514 	{
1515 	  thr->task = child_task;
1516 	  if (__builtin_expect (child_task->fn == NULL, 0))
1517 	    {
1518 	      if (gomp_target_task_fn (child_task->fn_data))
1519 		{
1520 		  thr->task = task;
1521 		  gomp_mutex_lock (&team->task_lock);
1522 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1523 		  struct gomp_target_task *ttask
1524 		    = (struct gomp_target_task *) child_task->fn_data;
1525 		  /* If GOMP_PLUGIN_target_task_completion has run already
1526 		     in between gomp_target_task_fn and the mutex lock,
1527 		     perform the requeuing here.  */
1528 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1529 		    gomp_target_task_completion (team, child_task);
1530 		  else
1531 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1532 		  child_task = NULL;
1533 		  continue;
1534 		}
1535 	    }
1536 	  else
1537 	    child_task->fn (child_task->fn_data);
1538 	  thr->task = task;
1539 	}
1540       else
1541 	gomp_sem_wait (&taskwait.taskwait_sem);
1542       gomp_mutex_lock (&team->task_lock);
1543       if (child_task)
1544 	{
1545 	 finish_cancelled:;
1546 	  size_t new_tasks
1547 	    = gomp_task_run_post_handle_depend (child_task, team);
1548 
1549 	  if (child_q)
1550 	    {
1551 	      priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1552 				     child_task, MEMMODEL_RELAXED);
1553 	      child_task->pnode[PQ_CHILDREN].next = NULL;
1554 	      child_task->pnode[PQ_CHILDREN].prev = NULL;
1555 	    }
1556 
1557 	  gomp_clear_parent (&child_task->children_queue);
1558 
1559 	  gomp_task_run_post_remove_taskgroup (child_task);
1560 
1561 	  to_free = child_task;
1562 	  child_task = NULL;
1563 	  team->task_count--;
1564 	  if (new_tasks > 1)
1565 	    {
1566 	      do_wake = team->nthreads - team->task_running_count
1567 			- !task->in_tied_task;
1568 	      if (do_wake > new_tasks)
1569 		do_wake = new_tasks;
1570 	    }
1571 	}
1572     }
1573 }
1574 
1575 /* Called when encountering a taskwait directive with depend clause(s).
1576    Wait as if it was an mergeable included task construct with empty body.  */
1577 
1578 void
GOMP_taskwait_depend(void ** depend)1579 GOMP_taskwait_depend (void **depend)
1580 {
1581   struct gomp_thread *thr = gomp_thread ();
1582   struct gomp_team *team = thr->ts.team;
1583 
1584   /* If parallel or taskgroup has been cancelled, return early.  */
1585   if (__builtin_expect (gomp_cancel_var, 0) && team)
1586     {
1587       if (gomp_team_barrier_cancelled (&team->barrier))
1588 	return;
1589       if (thr->task->taskgroup)
1590 	{
1591 	  if (thr->task->taskgroup->cancelled)
1592 	    return;
1593 	  if (thr->task->taskgroup->workshare
1594 	      && thr->task->taskgroup->prev
1595 	      && thr->task->taskgroup->prev->cancelled)
1596 	    return;
1597 	}
1598     }
1599 
1600   if (thr->task && thr->task->depend_hash)
1601     gomp_task_maybe_wait_for_dependencies (depend);
1602 }
1603 
1604 /* An undeferred task is about to run.  Wait for all tasks that this
1605    undeferred task depends on.
1606 
1607    This is done by first putting all known ready dependencies
1608    (dependencies that have their own dependencies met) at the top of
1609    the scheduling queues.  Then we iterate through these imminently
1610    ready tasks (and possibly other high priority tasks), and run them.
1611    If we run out of ready dependencies to execute, we either wait for
1612    the remaining dependencies to finish, or wait for them to get
1613    scheduled so we can run them.
1614 
1615    DEPEND is as in GOMP_task.  */
1616 
1617 void
gomp_task_maybe_wait_for_dependencies(void ** depend)1618 gomp_task_maybe_wait_for_dependencies (void **depend)
1619 {
1620   struct gomp_thread *thr = gomp_thread ();
1621   struct gomp_task *task = thr->task;
1622   struct gomp_team *team = thr->ts.team;
1623   struct gomp_task_depend_entry elem, *ent = NULL;
1624   struct gomp_taskwait taskwait;
1625   size_t orig_ndepend = (uintptr_t) depend[0];
1626   size_t nout = (uintptr_t) depend[1];
1627   size_t ndepend = orig_ndepend;
1628   size_t normal = ndepend;
1629   size_t n = 2;
1630   size_t i;
1631   size_t num_awaited = 0;
1632   struct gomp_task *child_task = NULL;
1633   struct gomp_task *to_free = NULL;
1634   int do_wake = 0;
1635 
1636   if (ndepend == 0)
1637     {
1638       ndepend = nout;
1639       nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1640       normal = nout + (uintptr_t) depend[4];
1641       n = 5;
1642     }
1643   gomp_mutex_lock (&team->task_lock);
1644   for (i = 0; i < ndepend; i++)
1645     {
1646       elem.addr = depend[i + n];
1647       elem.is_in = i >= nout;
1648       if (__builtin_expect (i >= normal, 0))
1649 	{
1650 	  void **d = (void **) elem.addr;
1651 	  switch ((uintptr_t) d[1])
1652 	    {
1653 	    case GOMP_DEPEND_IN:
1654 	      break;
1655 	    case GOMP_DEPEND_OUT:
1656 	    case GOMP_DEPEND_INOUT:
1657 	    case GOMP_DEPEND_MUTEXINOUTSET:
1658 	      elem.is_in = 0;
1659 	      break;
1660 	    default:
1661 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
1662 			  (int) (uintptr_t) d[1]);
1663 	    }
1664 	  elem.addr = d[0];
1665 	}
1666       ent = htab_find (task->depend_hash, &elem);
1667       for (; ent; ent = ent->next)
1668 	if (elem.is_in && ent->is_in)
1669 	  continue;
1670 	else
1671 	  {
1672 	    struct gomp_task *tsk = ent->task;
1673 	    if (!tsk->parent_depends_on)
1674 	      {
1675 		tsk->parent_depends_on = true;
1676 		++num_awaited;
1677 		/* If depenency TSK itself has no dependencies and is
1678 		   ready to run, move it up front so that we run it as
1679 		   soon as possible.  */
1680 		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1681 		  priority_queue_upgrade_task (tsk, task);
1682 	      }
1683 	  }
1684     }
1685   if (num_awaited == 0)
1686     {
1687       gomp_mutex_unlock (&team->task_lock);
1688       return;
1689     }
1690 
1691   memset (&taskwait, 0, sizeof (taskwait));
1692   taskwait.n_depend = num_awaited;
1693   gomp_sem_init (&taskwait.taskwait_sem, 0);
1694   task->taskwait = &taskwait;
1695 
1696   while (1)
1697     {
1698       bool cancelled = false;
1699       if (taskwait.n_depend == 0)
1700 	{
1701 	  task->taskwait = NULL;
1702 	  gomp_mutex_unlock (&team->task_lock);
1703 	  if (to_free)
1704 	    {
1705 	      gomp_finish_task (to_free);
1706 	      free (to_free);
1707 	    }
1708 	  gomp_sem_destroy (&taskwait.taskwait_sem);
1709 	  return;
1710 	}
1711 
1712       /* Theoretically when we have multiple priorities, we should
1713 	 chose between the highest priority item in
1714 	 task->children_queue and team->task_queue here, so we should
1715 	 use priority_queue_next_task().  However, since we are
1716 	 running an undeferred task, perhaps that makes all tasks it
1717 	 depends on undeferred, thus a priority of INF?  This would
1718 	 make it unnecessary to take anything into account here,
1719 	 but the dependencies.
1720 
1721 	 On the other hand, if we want to use priority_queue_next_task(),
1722 	 care should be taken to only use priority_queue_remove()
1723 	 below if the task was actually removed from the children
1724 	 queue.  */
1725       bool ignored;
1726       struct gomp_task *next_task
1727 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1728 				    PQ_IGNORED, NULL, &ignored);
1729 
1730       if (next_task->kind == GOMP_TASK_WAITING)
1731 	{
1732 	  child_task = next_task;
1733 	  cancelled
1734 	    = gomp_task_run_pre (child_task, task, team);
1735 	  if (__builtin_expect (cancelled, 0))
1736 	    {
1737 	      if (to_free)
1738 		{
1739 		  gomp_finish_task (to_free);
1740 		  free (to_free);
1741 		  to_free = NULL;
1742 		}
1743 	      goto finish_cancelled;
1744 	    }
1745 	}
1746       else
1747 	/* All tasks we are waiting for are either running in other
1748 	   threads, or they are tasks that have not had their
1749 	   dependencies met (so they're not even in the queue).  Wait
1750 	   for them.  */
1751 	taskwait.in_depend_wait = true;
1752       gomp_mutex_unlock (&team->task_lock);
1753       if (do_wake)
1754 	{
1755 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1756 	  do_wake = 0;
1757 	}
1758       if (to_free)
1759 	{
1760 	  gomp_finish_task (to_free);
1761 	  free (to_free);
1762 	  to_free = NULL;
1763 	}
1764       if (child_task)
1765 	{
1766 	  thr->task = child_task;
1767 	  if (__builtin_expect (child_task->fn == NULL, 0))
1768 	    {
1769 	      if (gomp_target_task_fn (child_task->fn_data))
1770 		{
1771 		  thr->task = task;
1772 		  gomp_mutex_lock (&team->task_lock);
1773 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1774 		  struct gomp_target_task *ttask
1775 		    = (struct gomp_target_task *) child_task->fn_data;
1776 		  /* If GOMP_PLUGIN_target_task_completion has run already
1777 		     in between gomp_target_task_fn and the mutex lock,
1778 		     perform the requeuing here.  */
1779 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1780 		    gomp_target_task_completion (team, child_task);
1781 		  else
1782 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1783 		  child_task = NULL;
1784 		  continue;
1785 		}
1786 	    }
1787 	  else
1788 	    child_task->fn (child_task->fn_data);
1789 	  thr->task = task;
1790 	}
1791       else
1792 	gomp_sem_wait (&taskwait.taskwait_sem);
1793       gomp_mutex_lock (&team->task_lock);
1794       if (child_task)
1795 	{
1796 	 finish_cancelled:;
1797 	  size_t new_tasks
1798 	    = gomp_task_run_post_handle_depend (child_task, team);
1799 	  if (child_task->parent_depends_on)
1800 	    --taskwait.n_depend;
1801 
1802 	  priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1803 				 child_task, MEMMODEL_RELAXED);
1804 	  child_task->pnode[PQ_CHILDREN].next = NULL;
1805 	  child_task->pnode[PQ_CHILDREN].prev = NULL;
1806 
1807 	  gomp_clear_parent (&child_task->children_queue);
1808 	  gomp_task_run_post_remove_taskgroup (child_task);
1809 	  to_free = child_task;
1810 	  child_task = NULL;
1811 	  team->task_count--;
1812 	  if (new_tasks > 1)
1813 	    {
1814 	      do_wake = team->nthreads - team->task_running_count
1815 			- !task->in_tied_task;
1816 	      if (do_wake > new_tasks)
1817 		do_wake = new_tasks;
1818 	    }
1819 	}
1820     }
1821 }
1822 
1823 /* Called when encountering a taskyield directive.  */
1824 
1825 void
GOMP_taskyield(void)1826 GOMP_taskyield (void)
1827 {
1828   /* Nothing at the moment.  */
1829 }
1830 
1831 static inline struct gomp_taskgroup *
gomp_taskgroup_init(struct gomp_taskgroup * prev)1832 gomp_taskgroup_init (struct gomp_taskgroup *prev)
1833 {
1834   struct gomp_taskgroup *taskgroup
1835     = gomp_malloc (sizeof (struct gomp_taskgroup));
1836   taskgroup->prev = prev;
1837   priority_queue_init (&taskgroup->taskgroup_queue);
1838   taskgroup->reductions = prev ? prev->reductions : NULL;
1839   taskgroup->in_taskgroup_wait = false;
1840   taskgroup->cancelled = false;
1841   taskgroup->workshare = false;
1842   taskgroup->num_children = 0;
1843   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1844   return taskgroup;
1845 }
1846 
1847 void
GOMP_taskgroup_start(void)1848 GOMP_taskgroup_start (void)
1849 {
1850   struct gomp_thread *thr = gomp_thread ();
1851   struct gomp_team *team = thr->ts.team;
1852   struct gomp_task *task = thr->task;
1853 
1854   /* If team is NULL, all tasks are executed as
1855      GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1856      taskgroup and their descendant tasks will be finished
1857      by the time GOMP_taskgroup_end is called.  */
1858   if (team == NULL)
1859     return;
1860   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
1861 }
1862 
1863 void
GOMP_taskgroup_end(void)1864 GOMP_taskgroup_end (void)
1865 {
1866   struct gomp_thread *thr = gomp_thread ();
1867   struct gomp_team *team = thr->ts.team;
1868   struct gomp_task *task = thr->task;
1869   struct gomp_taskgroup *taskgroup;
1870   struct gomp_task *child_task = NULL;
1871   struct gomp_task *to_free = NULL;
1872   int do_wake = 0;
1873 
1874   if (team == NULL)
1875     return;
1876   taskgroup = task->taskgroup;
1877   if (__builtin_expect (taskgroup == NULL, 0)
1878       && thr->ts.level == 0)
1879     {
1880       /* This can happen if GOMP_taskgroup_start is called when
1881 	 thr->ts.team == NULL, but inside of the taskgroup there
1882 	 is #pragma omp target nowait that creates an implicit
1883 	 team with a single thread.  In this case, we want to wait
1884 	 for all outstanding tasks in this team.  */
1885       gomp_team_barrier_wait (&team->barrier);
1886       return;
1887     }
1888 
1889   /* The acquire barrier on load of taskgroup->num_children here
1890      synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1891      It is not necessary that we synchronize with other non-0 writes at
1892      this point, but we must ensure that all writes to memory by a
1893      child thread task work function are seen before we exit from
1894      GOMP_taskgroup_end.  */
1895   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1896     goto finish;
1897 
1898   bool unused;
1899   gomp_mutex_lock (&team->task_lock);
1900   while (1)
1901     {
1902       bool cancelled = false;
1903       if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1904 				  MEMMODEL_RELAXED))
1905 	{
1906 	  if (taskgroup->num_children)
1907 	    {
1908 	      if (priority_queue_empty_p (&task->children_queue,
1909 					  MEMMODEL_RELAXED))
1910 		goto do_wait;
1911 	      child_task
1912 		= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1913 					    PQ_TEAM, &team->task_queue,
1914 					    &unused);
1915 	    }
1916 	  else
1917 	    {
1918 	      gomp_mutex_unlock (&team->task_lock);
1919 	      if (to_free)
1920 		{
1921 		  gomp_finish_task (to_free);
1922 		  free (to_free);
1923 		}
1924 	      goto finish;
1925 	    }
1926 	}
1927       else
1928 	child_task
1929 	  = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1930 				      PQ_TEAM, &team->task_queue, &unused);
1931       if (child_task->kind == GOMP_TASK_WAITING)
1932 	{
1933 	  cancelled
1934 	    = gomp_task_run_pre (child_task, child_task->parent, team);
1935 	  if (__builtin_expect (cancelled, 0))
1936 	    {
1937 	      if (to_free)
1938 		{
1939 		  gomp_finish_task (to_free);
1940 		  free (to_free);
1941 		  to_free = NULL;
1942 		}
1943 	      goto finish_cancelled;
1944 	    }
1945 	}
1946       else
1947 	{
1948 	  child_task = NULL;
1949 	 do_wait:
1950 	/* All tasks we are waiting for are either running in other
1951 	   threads, or they are tasks that have not had their
1952 	   dependencies met (so they're not even in the queue).  Wait
1953 	   for them.  */
1954 	  taskgroup->in_taskgroup_wait = true;
1955 	}
1956       gomp_mutex_unlock (&team->task_lock);
1957       if (do_wake)
1958 	{
1959 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1960 	  do_wake = 0;
1961 	}
1962       if (to_free)
1963 	{
1964 	  gomp_finish_task (to_free);
1965 	  free (to_free);
1966 	  to_free = NULL;
1967 	}
1968       if (child_task)
1969 	{
1970 	  thr->task = child_task;
1971 	  if (__builtin_expect (child_task->fn == NULL, 0))
1972 	    {
1973 	      if (gomp_target_task_fn (child_task->fn_data))
1974 		{
1975 		  thr->task = task;
1976 		  gomp_mutex_lock (&team->task_lock);
1977 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1978 		  struct gomp_target_task *ttask
1979 		    = (struct gomp_target_task *) child_task->fn_data;
1980 		  /* If GOMP_PLUGIN_target_task_completion has run already
1981 		     in between gomp_target_task_fn and the mutex lock,
1982 		     perform the requeuing here.  */
1983 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1984 		    gomp_target_task_completion (team, child_task);
1985 		  else
1986 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1987 		  child_task = NULL;
1988 		  continue;
1989 		}
1990 	    }
1991 	  else
1992 	    child_task->fn (child_task->fn_data);
1993 	  thr->task = task;
1994 	}
1995       else
1996 	gomp_sem_wait (&taskgroup->taskgroup_sem);
1997       gomp_mutex_lock (&team->task_lock);
1998       if (child_task)
1999 	{
2000 	 finish_cancelled:;
2001 	  size_t new_tasks
2002 	    = gomp_task_run_post_handle_depend (child_task, team);
2003 	  gomp_task_run_post_remove_parent (child_task);
2004 	  gomp_clear_parent (&child_task->children_queue);
2005 	  gomp_task_run_post_remove_taskgroup (child_task);
2006 	  to_free = child_task;
2007 	  child_task = NULL;
2008 	  team->task_count--;
2009 	  if (new_tasks > 1)
2010 	    {
2011 	      do_wake = team->nthreads - team->task_running_count
2012 			- !task->in_tied_task;
2013 	      if (do_wake > new_tasks)
2014 		do_wake = new_tasks;
2015 	    }
2016 	}
2017     }
2018 
2019  finish:
2020   task->taskgroup = taskgroup->prev;
2021   gomp_sem_destroy (&taskgroup->taskgroup_sem);
2022   free (taskgroup);
2023 }
2024 
2025 static inline __attribute__((always_inline)) void
gomp_reduction_register(uintptr_t * data,uintptr_t * old,uintptr_t * orig,unsigned nthreads)2026 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2027 			 unsigned nthreads)
2028 {
2029   size_t total_cnt = 0;
2030   uintptr_t *d = data;
2031   struct htab *old_htab = NULL, *new_htab;
2032   do
2033     {
2034       if (__builtin_expect (orig != NULL, 0))
2035 	{
2036 	  /* For worksharing task reductions, memory has been allocated
2037 	     already by some other thread that encountered the construct
2038 	     earlier.  */
2039 	  d[2] = orig[2];
2040 	  d[6] = orig[6];
2041 	  orig = (uintptr_t *) orig[4];
2042 	}
2043       else
2044 	{
2045 	  size_t sz = d[1] * nthreads;
2046 	  /* Should use omp_alloc if d[3] is not -1.  */
2047 	  void *ptr = gomp_aligned_alloc (d[2], sz);
2048 	  memset (ptr, '\0', sz);
2049 	  d[2] = (uintptr_t) ptr;
2050 	  d[6] = d[2] + sz;
2051 	}
2052       d[5] = 0;
2053       total_cnt += d[0];
2054       if (d[4] == 0)
2055 	{
2056 	  d[4] = (uintptr_t) old;
2057 	  break;
2058 	}
2059       else
2060 	d = (uintptr_t *) d[4];
2061     }
2062   while (1);
2063   if (old && old[5])
2064     {
2065       old_htab = (struct htab *) old[5];
2066       total_cnt += htab_elements (old_htab);
2067     }
2068   new_htab = htab_create (total_cnt);
2069   if (old_htab)
2070     {
2071       /* Copy old hash table, like in htab_expand.  */
2072       hash_entry_type *p, *olimit;
2073       new_htab->n_elements = htab_elements (old_htab);
2074       olimit = old_htab->entries + old_htab->size;
2075       p = old_htab->entries;
2076       do
2077 	{
2078 	  hash_entry_type x = *p;
2079 	  if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2080 	    *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2081 	  p++;
2082 	}
2083       while (p < olimit);
2084     }
2085   d = data;
2086   do
2087     {
2088       size_t j;
2089       for (j = 0; j < d[0]; ++j)
2090 	{
2091 	  uintptr_t *p = d + 7 + j * 3;
2092 	  p[2] = (uintptr_t) d;
2093 	  /* Ugly hack, hash_entry_type is defined for the task dependencies,
2094 	     which hash on the first element which is a pointer.  We need
2095 	     to hash also on the first sizeof (uintptr_t) bytes which contain
2096 	     a pointer.  Hide the cast from the compiler.  */
2097 	  hash_entry_type n;
2098 	  __asm ("" : "=g" (n) : "0" (p));
2099 	  *htab_find_slot (&new_htab, n, INSERT) = n;
2100 	}
2101       if (d[4] == (uintptr_t) old)
2102 	break;
2103       else
2104 	d = (uintptr_t *) d[4];
2105     }
2106   while (1);
2107   d[5] = (uintptr_t) new_htab;
2108 }
2109 
2110 static void
gomp_create_artificial_team(void)2111 gomp_create_artificial_team (void)
2112 {
2113   struct gomp_thread *thr = gomp_thread ();
2114   struct gomp_task_icv *icv;
2115   struct gomp_team *team = gomp_new_team (1);
2116   struct gomp_task *task = thr->task;
2117   icv = task ? &task->icv : &gomp_global_icv;
2118   team->prev_ts = thr->ts;
2119   thr->ts.team = team;
2120   thr->ts.team_id = 0;
2121   thr->ts.work_share = &team->work_shares[0];
2122   thr->ts.last_work_share = NULL;
2123 #ifdef HAVE_SYNC_BUILTINS
2124   thr->ts.single_count = 0;
2125 #endif
2126   thr->ts.static_trip = 0;
2127   thr->task = &team->implicit_task[0];
2128   gomp_init_task (thr->task, NULL, icv);
2129   if (task)
2130     {
2131       thr->task = task;
2132       gomp_end_task ();
2133       free (task);
2134       thr->task = &team->implicit_task[0];
2135     }
2136 #ifdef LIBGOMP_USE_PTHREADS
2137   else
2138     pthread_setspecific (gomp_thread_destructor, thr);
2139 #endif
2140 }
2141 
2142 /* The format of data is:
2143    data[0]	cnt
2144    data[1]	size
2145    data[2]	alignment (on output array pointer)
2146    data[3]	allocator (-1 if malloc allocator)
2147    data[4]	next pointer
2148    data[5]	used internally (htab pointer)
2149    data[6]	used internally (end of array)
2150    cnt times
2151    ent[0]	address
2152    ent[1]	offset
2153    ent[2]	used internally (pointer to data[0])
2154    The entries are sorted by increasing offset, so that a binary
2155    search can be performed.  Normally, data[8] is 0, exception is
2156    for worksharing construct task reductions in cancellable parallel,
2157    where at offset 0 there should be space for a pointer and an integer
2158    which are used internally.  */
2159 
2160 void
GOMP_taskgroup_reduction_register(uintptr_t * data)2161 GOMP_taskgroup_reduction_register (uintptr_t *data)
2162 {
2163   struct gomp_thread *thr = gomp_thread ();
2164   struct gomp_team *team = thr->ts.team;
2165   struct gomp_task *task;
2166   unsigned nthreads;
2167   if (__builtin_expect (team == NULL, 0))
2168     {
2169       /* The task reduction code needs a team and task, so for
2170 	 orphaned taskgroups just create the implicit team.  */
2171       gomp_create_artificial_team ();
2172       ialias_call (GOMP_taskgroup_start) ();
2173       team = thr->ts.team;
2174     }
2175   nthreads = team->nthreads;
2176   task = thr->task;
2177   gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2178   task->taskgroup->reductions = data;
2179 }
2180 
2181 void
GOMP_taskgroup_reduction_unregister(uintptr_t * data)2182 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2183 {
2184   uintptr_t *d = data;
2185   htab_free ((struct htab *) data[5]);
2186   do
2187     {
2188       gomp_aligned_free ((void *) d[2]);
2189       d = (uintptr_t *) d[4];
2190     }
2191   while (d && !d[5]);
2192 }
ialias(GOMP_taskgroup_reduction_unregister)2193 ialias (GOMP_taskgroup_reduction_unregister)
2194 
2195 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2196    original list item or address of previously remapped original list
2197    item to address of the private copy, store that to ptrs[i].
2198    For i < cntorig, additionally set ptrs[cnt+i] to the address of
2199    the original list item.  */
2200 
2201 void
2202 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2203 {
2204   struct gomp_thread *thr = gomp_thread ();
2205   struct gomp_task *task = thr->task;
2206   unsigned id = thr->ts.team_id;
2207   uintptr_t *data = task->taskgroup->reductions;
2208   uintptr_t *d;
2209   struct htab *reduction_htab = (struct htab *) data[5];
2210   size_t i;
2211   for (i = 0; i < cnt; ++i)
2212     {
2213       hash_entry_type ent, n;
2214       __asm ("" : "=g" (ent) : "0" (ptrs + i));
2215       n = htab_find (reduction_htab, ent);
2216       if (n)
2217 	{
2218 	  uintptr_t *p;
2219 	  __asm ("" : "=g" (p) : "0" (n));
2220 	  /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2221 	     p[1] is the offset within the allocated chunk for each
2222 	     thread, p[2] is the array registered with
2223 	     GOMP_taskgroup_reduction_register, d[2] is the base of the
2224 	     allocated memory and d[1] is the size of the allocated chunk
2225 	     for one thread.  */
2226 	  d = (uintptr_t *) p[2];
2227 	  ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2228 	  if (__builtin_expect (i < cntorig, 0))
2229 	    ptrs[cnt + i] = (void *) p[0];
2230 	  continue;
2231 	}
2232       d = data;
2233       while (d != NULL)
2234 	{
2235 	  if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2236 	    break;
2237 	  d = (uintptr_t *) d[4];
2238 	}
2239       if (d == NULL)
2240 	gomp_fatal ("couldn't find matching task_reduction or reduction with "
2241 		    "task modifier for %p", ptrs[i]);
2242       uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2243       ptrs[i] = (void *) (d[2] + id * d[1] + off);
2244       if (__builtin_expect (i < cntorig, 0))
2245 	{
2246 	  size_t lo = 0, hi = d[0] - 1;
2247 	  while (lo <= hi)
2248 	    {
2249 	      size_t m = (lo + hi) / 2;
2250 	      if (d[7 + 3 * m + 1] < off)
2251 		lo = m + 1;
2252 	      else if (d[7 + 3 * m + 1] == off)
2253 		{
2254 		  ptrs[cnt + i] = (void *) d[7 + 3 * m];
2255 		  break;
2256 		}
2257 	      else
2258 		hi = m - 1;
2259 	    }
2260 	  if (lo > hi)
2261 	    gomp_fatal ("couldn't find matching task_reduction or reduction "
2262 			"with task modifier for %p", ptrs[i]);
2263 	}
2264     }
2265 }
2266 
2267 struct gomp_taskgroup *
gomp_parallel_reduction_register(uintptr_t * data,unsigned nthreads)2268 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2269 {
2270   struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2271   gomp_reduction_register (data, NULL, NULL, nthreads);
2272   taskgroup->reductions = data;
2273   return taskgroup;
2274 }
2275 
2276 void
gomp_workshare_task_reduction_register(uintptr_t * data,uintptr_t * orig)2277 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2278 {
2279   struct gomp_thread *thr = gomp_thread ();
2280   struct gomp_team *team = thr->ts.team;
2281   struct gomp_task *task = thr->task;
2282   unsigned nthreads = team->nthreads;
2283   gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2284   task->taskgroup->reductions = data;
2285 }
2286 
2287 void
gomp_workshare_taskgroup_start(void)2288 gomp_workshare_taskgroup_start (void)
2289 {
2290   struct gomp_thread *thr = gomp_thread ();
2291   struct gomp_team *team = thr->ts.team;
2292   struct gomp_task *task;
2293 
2294   if (team == NULL)
2295     {
2296       gomp_create_artificial_team ();
2297       team = thr->ts.team;
2298     }
2299   task = thr->task;
2300   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2301   task->taskgroup->workshare = true;
2302 }
2303 
2304 void
GOMP_workshare_task_reduction_unregister(bool cancelled)2305 GOMP_workshare_task_reduction_unregister (bool cancelled)
2306 {
2307   struct gomp_thread *thr = gomp_thread ();
2308   struct gomp_task *task = thr->task;
2309   struct gomp_team *team = thr->ts.team;
2310   uintptr_t *data = task->taskgroup->reductions;
2311   ialias_call (GOMP_taskgroup_end) ();
2312   if (thr->ts.team_id == 0)
2313     ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2314   else
2315     htab_free ((struct htab *) data[5]);
2316 
2317   if (!cancelled)
2318     gomp_team_barrier_wait (&team->barrier);
2319 }
2320 
2321 int
omp_in_final(void)2322 omp_in_final (void)
2323 {
2324   struct gomp_thread *thr = gomp_thread ();
2325   return thr->task && thr->task->final_task;
2326 }
2327 
2328 ialias (omp_in_final)
2329