1 /* Copyright (C) 2015-2019 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the taskloop construct.  It is included twice, once
27    for the long and once for unsigned long long variant.  */
28 
29 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
30    false, then we must not delay in executing the task.  If UNTIED is true,
31    then the task may be executed by any member of the team.  */
32 
33 void
GOMP_taskloop(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,unsigned flags,unsigned long num_tasks,int priority,TYPE start,TYPE end,TYPE step)34 GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
35 	       long arg_size, long arg_align, unsigned flags,
36 	       unsigned long num_tasks, int priority,
37 	       TYPE start, TYPE end, TYPE step)
38 {
39   struct gomp_thread *thr = gomp_thread ();
40   struct gomp_team *team = thr->ts.team;
41 
42 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
43   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
44      tied to one thread all the time.  This means UNTIED tasks must be
45      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
46      might be running on different thread than FN.  */
47   if (cpyfn)
48     flags &= ~GOMP_TASK_FLAG_IF;
49   flags &= ~GOMP_TASK_FLAG_UNTIED;
50 #endif
51 
52   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
53   if (team && gomp_team_barrier_cancelled (&team->barrier))
54     return;
55 
56 #ifdef TYPE_is_long
57   TYPE s = step;
58   if (step > 0)
59     {
60       if (start >= end)
61 	return;
62       s--;
63     }
64   else
65     {
66       if (start <= end)
67 	return;
68       s++;
69     }
70   UTYPE n = (end - start + s) / step;
71 #else
72   UTYPE n;
73   if (flags & GOMP_TASK_FLAG_UP)
74     {
75       if (start >= end)
76 	return;
77       n = (end - start + step - 1) / step;
78     }
79   else
80     {
81       if (start <= end)
82 	return;
83       n = (start - end - step - 1) / -step;
84     }
85 #endif
86 
87   TYPE task_step = step;
88   unsigned long nfirst = n;
89   if (flags & GOMP_TASK_FLAG_GRAINSIZE)
90     {
91       unsigned long grainsize = num_tasks;
92 #ifdef TYPE_is_long
93       num_tasks = n / grainsize;
94 #else
95       UTYPE ndiv = n / grainsize;
96       num_tasks = ndiv;
97       if (num_tasks != ndiv)
98 	num_tasks = ~0UL;
99 #endif
100       if (num_tasks <= 1)
101 	{
102 	  num_tasks = 1;
103 	  task_step = end - start;
104 	}
105       else if (num_tasks >= grainsize
106 #ifndef TYPE_is_long
107 	       && num_tasks != ~0UL
108 #endif
109 	      )
110 	{
111 	  UTYPE mul = num_tasks * grainsize;
112 	  task_step = (TYPE) grainsize * step;
113 	  if (mul != n)
114 	    {
115 	      task_step += step;
116 	      nfirst = n - mul - 1;
117 	    }
118 	}
119       else
120 	{
121 	  UTYPE div = n / num_tasks;
122 	  UTYPE mod = n % num_tasks;
123 	  task_step = (TYPE) div * step;
124 	  if (mod)
125 	    {
126 	      task_step += step;
127 	      nfirst = mod - 1;
128 	    }
129 	}
130     }
131   else
132     {
133       if (num_tasks == 0)
134 	num_tasks = team ? team->nthreads : 1;
135       if (num_tasks >= n)
136 	num_tasks = n;
137       else
138 	{
139 	  UTYPE div = n / num_tasks;
140 	  UTYPE mod = n % num_tasks;
141 	  task_step = (TYPE) div * step;
142 	  if (mod)
143 	    {
144 	      task_step += step;
145 	      nfirst = mod - 1;
146 	    }
147 	}
148     }
149 
150   if (flags & GOMP_TASK_FLAG_NOGROUP)
151     {
152       if (__builtin_expect (gomp_cancel_var, 0)
153 	  && thr->task
154 	  && thr->task->taskgroup)
155 	{
156 	  if (thr->task->taskgroup->cancelled)
157 	    return;
158 	  if (thr->task->taskgroup->workshare
159 	      && thr->task->taskgroup->prev
160 	      && thr->task->taskgroup->prev->cancelled)
161 	    return;
162 	}
163     }
164   else
165     {
166       ialias_call (GOMP_taskgroup_start) ();
167       if (flags & GOMP_TASK_FLAG_REDUCTION)
168 	{
169 	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
170 	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
171 	  ialias_call (GOMP_taskgroup_reduction_register) (ptr);
172 	}
173     }
174 
175   if (priority > gomp_max_task_priority_var)
176     priority = gomp_max_task_priority_var;
177 
178   if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
179       || (thr->task && thr->task->final_task)
180       || team->task_count + num_tasks > 64 * team->nthreads)
181     {
182       unsigned long i;
183       if (__builtin_expect (cpyfn != NULL, 0))
184 	{
185 	  struct gomp_task task[num_tasks];
186 	  struct gomp_task *parent = thr->task;
187 	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
188 	  char buf[num_tasks * arg_size + arg_align - 1];
189 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
190 				& ~(uintptr_t) (arg_align - 1));
191 	  char *orig_arg = arg;
192 	  for (i = 0; i < num_tasks; i++)
193 	    {
194 	      gomp_init_task (&task[i], parent, gomp_icv (false));
195 	      task[i].priority = priority;
196 	      task[i].kind = GOMP_TASK_UNDEFERRED;
197 	      task[i].final_task = (thr->task && thr->task->final_task)
198 				   || (flags & GOMP_TASK_FLAG_FINAL);
199 	      if (thr->task)
200 		{
201 		  task[i].in_tied_task = thr->task->in_tied_task;
202 		  task[i].taskgroup = thr->task->taskgroup;
203 		}
204 	      thr->task = &task[i];
205 	      cpyfn (arg, data);
206 	      arg += arg_size;
207 	    }
208 	  arg = orig_arg;
209 	  for (i = 0; i < num_tasks; i++)
210 	    {
211 	      thr->task = &task[i];
212 	      ((TYPE *)arg)[0] = start;
213 	      start += task_step;
214 	      ((TYPE *)arg)[1] = start;
215 	      if (i == nfirst)
216 		task_step -= step;
217 	      fn (arg);
218 	      arg += arg_size;
219 	      if (!priority_queue_empty_p (&task[i].children_queue,
220 					   MEMMODEL_RELAXED))
221 		{
222 		  gomp_mutex_lock (&team->task_lock);
223 		  gomp_clear_parent (&task[i].children_queue);
224 		  gomp_mutex_unlock (&team->task_lock);
225 		}
226 	      gomp_end_task ();
227 	    }
228 	}
229       else
230 	for (i = 0; i < num_tasks; i++)
231 	  {
232 	    struct gomp_task task;
233 
234 	    gomp_init_task (&task, thr->task, gomp_icv (false));
235 	    task.priority = priority;
236 	    task.kind = GOMP_TASK_UNDEFERRED;
237 	    task.final_task = (thr->task && thr->task->final_task)
238 			      || (flags & GOMP_TASK_FLAG_FINAL);
239 	    if (thr->task)
240 	      {
241 		task.in_tied_task = thr->task->in_tied_task;
242 		task.taskgroup = thr->task->taskgroup;
243 	      }
244 	    thr->task = &task;
245 	    ((TYPE *)data)[0] = start;
246 	    start += task_step;
247 	    ((TYPE *)data)[1] = start;
248 	    if (i == nfirst)
249 	      task_step -= step;
250 	    fn (data);
251 	    if (!priority_queue_empty_p (&task.children_queue,
252 					 MEMMODEL_RELAXED))
253 	      {
254 		gomp_mutex_lock (&team->task_lock);
255 		gomp_clear_parent (&task.children_queue);
256 		gomp_mutex_unlock (&team->task_lock);
257 	      }
258 	    gomp_end_task ();
259 	  }
260     }
261   else
262     {
263       struct gomp_task *tasks[num_tasks];
264       struct gomp_task *parent = thr->task;
265       struct gomp_taskgroup *taskgroup = parent->taskgroup;
266       char *arg;
267       int do_wake;
268       unsigned long i;
269 
270       for (i = 0; i < num_tasks; i++)
271 	{
272 	  struct gomp_task *task
273 	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
274 	  tasks[i] = task;
275 	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
276 			  & ~(uintptr_t) (arg_align - 1));
277 	  gomp_init_task (task, parent, gomp_icv (false));
278 	  task->priority = priority;
279 	  task->kind = GOMP_TASK_UNDEFERRED;
280 	  task->in_tied_task = parent->in_tied_task;
281 	  task->taskgroup = taskgroup;
282 	  thr->task = task;
283 	  if (cpyfn)
284 	    {
285 	      cpyfn (arg, data);
286 	      task->copy_ctors_done = true;
287 	    }
288 	  else
289 	    memcpy (arg, data, arg_size);
290 	  ((TYPE *)arg)[0] = start;
291 	  start += task_step;
292 	  ((TYPE *)arg)[1] = start;
293 	  if (i == nfirst)
294 	    task_step -= step;
295 	  thr->task = parent;
296 	  task->kind = GOMP_TASK_WAITING;
297 	  task->fn = fn;
298 	  task->fn_data = arg;
299 	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
300 	}
301       gomp_mutex_lock (&team->task_lock);
302       /* If parallel or taskgroup has been cancelled, don't start new
303 	 tasks.  */
304       if (__builtin_expect (gomp_cancel_var, 0)
305 	  && cpyfn == NULL)
306 	{
307 	  if (gomp_team_barrier_cancelled (&team->barrier))
308 	    {
309 	    do_cancel:
310 	      gomp_mutex_unlock (&team->task_lock);
311 	      for (i = 0; i < num_tasks; i++)
312 		{
313 		  gomp_finish_task (tasks[i]);
314 		  free (tasks[i]);
315 		}
316 	      if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
317 		ialias_call (GOMP_taskgroup_end) ();
318 	      return;
319 	    }
320 	  if (taskgroup)
321 	    {
322 	      if (taskgroup->cancelled)
323 		goto do_cancel;
324 	      if (taskgroup->workshare
325 		  && taskgroup->prev
326 		  && taskgroup->prev->cancelled)
327 		goto do_cancel;
328 	    }
329 	}
330       if (taskgroup)
331 	taskgroup->num_children += num_tasks;
332       for (i = 0; i < num_tasks; i++)
333 	{
334 	  struct gomp_task *task = tasks[i];
335 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
336 				 task, priority,
337 				 PRIORITY_INSERT_BEGIN,
338 				 /*last_parent_depends_on=*/false,
339 				 task->parent_depends_on);
340 	  if (taskgroup)
341 	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
342 				   task, priority, PRIORITY_INSERT_BEGIN,
343 				   /*last_parent_depends_on=*/false,
344 				   task->parent_depends_on);
345 	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
346 				 PRIORITY_INSERT_END,
347 				 /*last_parent_depends_on=*/false,
348 				 task->parent_depends_on);
349 	  ++team->task_count;
350 	  ++team->task_queued_count;
351 	}
352       gomp_team_barrier_set_task_pending (&team->barrier);
353       if (team->task_running_count + !parent->in_tied_task
354 	  < team->nthreads)
355 	{
356 	  do_wake = team->nthreads - team->task_running_count
357 		    - !parent->in_tied_task;
358 	  if ((unsigned long) do_wake > num_tasks)
359 	    do_wake = num_tasks;
360 	}
361       else
362 	do_wake = 0;
363       gomp_mutex_unlock (&team->task_lock);
364       if (do_wake)
365 	gomp_team_barrier_wake (&team->barrier, do_wake);
366     }
367   if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
368     ialias_call (GOMP_taskgroup_end) ();
369 }
370