1 /* Copyright (C) 2015-2021 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the taskloop construct.  It is included twice, once
27    for the long and once for unsigned long long variant.  */
28 
29 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
30    false, then we must not delay in executing the task.  If UNTIED is true,
31    then the task may be executed by any member of the team.  */
32 
33 void
GOMP_taskloop(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,unsigned flags,unsigned long num_tasks,int priority,TYPE start,TYPE end,TYPE step)34 GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
35 	       long arg_size, long arg_align, unsigned flags,
36 	       unsigned long num_tasks, int priority,
37 	       TYPE start, TYPE end, TYPE step)
38 {
39   struct gomp_thread *thr = gomp_thread ();
40   struct gomp_team *team = thr->ts.team;
41 
42 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
43   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
44      tied to one thread all the time.  This means UNTIED tasks must be
45      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
46      might be running on different thread than FN.  */
47   if (cpyfn)
48     flags &= ~GOMP_TASK_FLAG_IF;
49   flags &= ~GOMP_TASK_FLAG_UNTIED;
50 #endif
51 
52   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
53   if (team && gomp_team_barrier_cancelled (&team->barrier))
54     {
55     early_return:
56       if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
57 	  == GOMP_TASK_FLAG_REDUCTION)
58 	{
59 	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
60 	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
61 	  /* Tell callers GOMP_taskgroup_reduction_register has not been
62 	     called.  */
63 	  ptr[2] = 0;
64 	}
65       return;
66     }
67 
68 #ifdef TYPE_is_long
69   TYPE s = step;
70   if (step > 0)
71     {
72       if (start >= end)
73 	goto early_return;
74       s--;
75     }
76   else
77     {
78       if (start <= end)
79 	goto early_return;
80       s++;
81     }
82   UTYPE n = (end - start + s) / step;
83 #else
84   UTYPE n;
85   if (flags & GOMP_TASK_FLAG_UP)
86     {
87       if (start >= end)
88 	goto early_return;
89       n = (end - start + step - 1) / step;
90     }
91   else
92     {
93       if (start <= end)
94 	goto early_return;
95       n = (start - end - step - 1) / -step;
96     }
97 #endif
98 
99   TYPE task_step = step;
100   unsigned long nfirst = n;
101   if (flags & GOMP_TASK_FLAG_GRAINSIZE)
102     {
103       unsigned long grainsize = num_tasks;
104 #ifdef TYPE_is_long
105       num_tasks = n / grainsize;
106 #else
107       UTYPE ndiv = n / grainsize;
108       num_tasks = ndiv;
109       if (num_tasks != ndiv)
110 	num_tasks = ~0UL;
111 #endif
112       if (num_tasks <= 1)
113 	{
114 	  num_tasks = 1;
115 	  task_step = end - start;
116 	}
117       else if (num_tasks >= grainsize
118 #ifndef TYPE_is_long
119 	       && num_tasks != ~0UL
120 #endif
121 	      )
122 	{
123 	  UTYPE mul = num_tasks * grainsize;
124 	  task_step = (TYPE) grainsize * step;
125 	  if (mul != n)
126 	    {
127 	      task_step += step;
128 	      nfirst = n - mul - 1;
129 	    }
130 	}
131       else
132 	{
133 	  UTYPE div = n / num_tasks;
134 	  UTYPE mod = n % num_tasks;
135 	  task_step = (TYPE) div * step;
136 	  if (mod)
137 	    {
138 	      task_step += step;
139 	      nfirst = mod - 1;
140 	    }
141 	}
142     }
143   else
144     {
145       if (num_tasks == 0)
146 	num_tasks = team ? team->nthreads : 1;
147       if (num_tasks >= n)
148 	num_tasks = n;
149       else
150 	{
151 	  UTYPE div = n / num_tasks;
152 	  UTYPE mod = n % num_tasks;
153 	  task_step = (TYPE) div * step;
154 	  if (mod)
155 	    {
156 	      task_step += step;
157 	      nfirst = mod - 1;
158 	    }
159 	}
160     }
161 
162   if (flags & GOMP_TASK_FLAG_NOGROUP)
163     {
164       if (__builtin_expect (gomp_cancel_var, 0)
165 	  && thr->task
166 	  && thr->task->taskgroup)
167 	{
168 	  if (thr->task->taskgroup->cancelled)
169 	    return;
170 	  if (thr->task->taskgroup->workshare
171 	      && thr->task->taskgroup->prev
172 	      && thr->task->taskgroup->prev->cancelled)
173 	    return;
174 	}
175     }
176   else
177     {
178       ialias_call (GOMP_taskgroup_start) ();
179       if (flags & GOMP_TASK_FLAG_REDUCTION)
180 	{
181 	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
182 	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
183 	  ialias_call (GOMP_taskgroup_reduction_register) (ptr);
184 	}
185     }
186 
187   if (priority > gomp_max_task_priority_var)
188     priority = gomp_max_task_priority_var;
189 
190   if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
191       || (thr->task && thr->task->final_task)
192       || team->task_count + num_tasks > 64 * team->nthreads)
193     {
194       unsigned long i;
195       if (__builtin_expect (cpyfn != NULL, 0))
196 	{
197 	  struct gomp_task task[num_tasks];
198 	  struct gomp_task *parent = thr->task;
199 	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
200 	  char buf[num_tasks * arg_size + arg_align - 1];
201 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
202 				& ~(uintptr_t) (arg_align - 1));
203 	  char *orig_arg = arg;
204 	  for (i = 0; i < num_tasks; i++)
205 	    {
206 	      gomp_init_task (&task[i], parent, gomp_icv (false));
207 	      task[i].priority = priority;
208 	      task[i].kind = GOMP_TASK_UNDEFERRED;
209 	      task[i].final_task = (thr->task && thr->task->final_task)
210 				   || (flags & GOMP_TASK_FLAG_FINAL);
211 	      if (thr->task)
212 		{
213 		  task[i].in_tied_task = thr->task->in_tied_task;
214 		  task[i].taskgroup = thr->task->taskgroup;
215 		}
216 	      thr->task = &task[i];
217 	      cpyfn (arg, data);
218 	      arg += arg_size;
219 	    }
220 	  arg = orig_arg;
221 	  for (i = 0; i < num_tasks; i++)
222 	    {
223 	      thr->task = &task[i];
224 	      ((TYPE *)arg)[0] = start;
225 	      start += task_step;
226 	      ((TYPE *)arg)[1] = start;
227 	      if (i == nfirst)
228 		task_step -= step;
229 	      fn (arg);
230 	      arg += arg_size;
231 	      if (!priority_queue_empty_p (&task[i].children_queue,
232 					   MEMMODEL_RELAXED))
233 		{
234 		  gomp_mutex_lock (&team->task_lock);
235 		  gomp_clear_parent (&task[i].children_queue);
236 		  gomp_mutex_unlock (&team->task_lock);
237 		}
238 	      gomp_end_task ();
239 	    }
240 	}
241       else
242 	for (i = 0; i < num_tasks; i++)
243 	  {
244 	    struct gomp_task task;
245 
246 	    gomp_init_task (&task, thr->task, gomp_icv (false));
247 	    task.priority = priority;
248 	    task.kind = GOMP_TASK_UNDEFERRED;
249 	    task.final_task = (thr->task && thr->task->final_task)
250 			      || (flags & GOMP_TASK_FLAG_FINAL);
251 	    if (thr->task)
252 	      {
253 		task.in_tied_task = thr->task->in_tied_task;
254 		task.taskgroup = thr->task->taskgroup;
255 	      }
256 	    thr->task = &task;
257 	    ((TYPE *)data)[0] = start;
258 	    start += task_step;
259 	    ((TYPE *)data)[1] = start;
260 	    if (i == nfirst)
261 	      task_step -= step;
262 	    fn (data);
263 	    if (!priority_queue_empty_p (&task.children_queue,
264 					 MEMMODEL_RELAXED))
265 	      {
266 		gomp_mutex_lock (&team->task_lock);
267 		gomp_clear_parent (&task.children_queue);
268 		gomp_mutex_unlock (&team->task_lock);
269 	      }
270 	    gomp_end_task ();
271 	  }
272     }
273   else
274     {
275       struct gomp_task *tasks[num_tasks];
276       struct gomp_task *parent = thr->task;
277       struct gomp_taskgroup *taskgroup = parent->taskgroup;
278       char *arg;
279       int do_wake;
280       unsigned long i;
281 
282       for (i = 0; i < num_tasks; i++)
283 	{
284 	  struct gomp_task *task
285 	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
286 	  tasks[i] = task;
287 	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
288 			  & ~(uintptr_t) (arg_align - 1));
289 	  gomp_init_task (task, parent, gomp_icv (false));
290 	  task->priority = priority;
291 	  task->kind = GOMP_TASK_UNDEFERRED;
292 	  task->in_tied_task = parent->in_tied_task;
293 	  task->taskgroup = taskgroup;
294 	  thr->task = task;
295 	  if (cpyfn)
296 	    {
297 	      cpyfn (arg, data);
298 	      task->copy_ctors_done = true;
299 	    }
300 	  else
301 	    memcpy (arg, data, arg_size);
302 	  ((TYPE *)arg)[0] = start;
303 	  start += task_step;
304 	  ((TYPE *)arg)[1] = start;
305 	  if (i == nfirst)
306 	    task_step -= step;
307 	  thr->task = parent;
308 	  task->kind = GOMP_TASK_WAITING;
309 	  task->fn = fn;
310 	  task->fn_data = arg;
311 	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
312 	}
313       gomp_mutex_lock (&team->task_lock);
314       /* If parallel or taskgroup has been cancelled, don't start new
315 	 tasks.  */
316       if (__builtin_expect (gomp_cancel_var, 0)
317 	  && cpyfn == NULL)
318 	{
319 	  if (gomp_team_barrier_cancelled (&team->barrier))
320 	    {
321 	    do_cancel:
322 	      gomp_mutex_unlock (&team->task_lock);
323 	      for (i = 0; i < num_tasks; i++)
324 		{
325 		  gomp_finish_task (tasks[i]);
326 		  free (tasks[i]);
327 		}
328 	      if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
329 		ialias_call (GOMP_taskgroup_end) ();
330 	      return;
331 	    }
332 	  if (taskgroup)
333 	    {
334 	      if (taskgroup->cancelled)
335 		goto do_cancel;
336 	      if (taskgroup->workshare
337 		  && taskgroup->prev
338 		  && taskgroup->prev->cancelled)
339 		goto do_cancel;
340 	    }
341 	}
342       if (taskgroup)
343 	taskgroup->num_children += num_tasks;
344       for (i = 0; i < num_tasks; i++)
345 	{
346 	  struct gomp_task *task = tasks[i];
347 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
348 				 task, priority,
349 				 PRIORITY_INSERT_BEGIN,
350 				 /*last_parent_depends_on=*/false,
351 				 task->parent_depends_on);
352 	  if (taskgroup)
353 	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
354 				   task, priority, PRIORITY_INSERT_BEGIN,
355 				   /*last_parent_depends_on=*/false,
356 				   task->parent_depends_on);
357 	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
358 				 PRIORITY_INSERT_END,
359 				 /*last_parent_depends_on=*/false,
360 				 task->parent_depends_on);
361 	  ++team->task_count;
362 	  ++team->task_queued_count;
363 	}
364       gomp_team_barrier_set_task_pending (&team->barrier);
365       if (team->task_running_count + !parent->in_tied_task
366 	  < team->nthreads)
367 	{
368 	  do_wake = team->nthreads - team->task_running_count
369 		    - !parent->in_tied_task;
370 	  if ((unsigned long) do_wake > num_tasks)
371 	    do_wake = num_tasks;
372 	}
373       else
374 	do_wake = 0;
375       gomp_mutex_unlock (&team->task_lock);
376       if (do_wake)
377 	gomp_team_barrier_wake (&team->barrier, do_wake);
378     }
379   if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
380     ialias_call (GOMP_taskgroup_end) ();
381 }
382