1 /* Copyright (C) 2015-2017 Free Software Foundation, Inc. 2 Contributed by Jakub Jelinek <jakub@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 /* This file handles the taskloop construct. It is included twice, once 27 for the long and once for unsigned long long variant. */ 28 29 /* Called when encountering an explicit task directive. If IF_CLAUSE is 30 false, then we must not delay in executing the task. If UNTIED is true, 31 then the task may be executed by any member of the team. */ 32 33 void 34 GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), 35 long arg_size, long arg_align, unsigned flags, 36 unsigned long num_tasks, int priority, 37 TYPE start, TYPE end, TYPE step) 38 { 39 struct gomp_thread *thr = gomp_thread (); 40 struct gomp_team *team = thr->ts.team; 41 42 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES 43 /* If pthread_mutex_* is used for omp_*lock*, then each task must be 44 tied to one thread all the time. This means UNTIED tasks must be 45 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN 46 might be running on different thread than FN. */ 47 if (cpyfn) 48 flags &= ~GOMP_TASK_FLAG_IF; 49 flags &= ~GOMP_TASK_FLAG_UNTIED; 50 #endif 51 52 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 53 if (team && gomp_team_barrier_cancelled (&team->barrier)) 54 return; 55 56 #ifdef TYPE_is_long 57 TYPE s = step; 58 if (step > 0) 59 { 60 if (start >= end) 61 return; 62 s--; 63 } 64 else 65 { 66 if (start <= end) 67 return; 68 s++; 69 } 70 UTYPE n = (end - start + s) / step; 71 #else 72 UTYPE n; 73 if (flags & GOMP_TASK_FLAG_UP) 74 { 75 if (start >= end) 76 return; 77 n = (end - start + step - 1) / step; 78 } 79 else 80 { 81 if (start <= end) 82 return; 83 n = (start - end - step - 1) / -step; 84 } 85 #endif 86 87 TYPE task_step = step; 88 unsigned long nfirst = n; 89 if (flags & GOMP_TASK_FLAG_GRAINSIZE) 90 { 91 unsigned long grainsize = num_tasks; 92 #ifdef TYPE_is_long 93 num_tasks = n / grainsize; 94 #else 95 UTYPE ndiv = n / grainsize; 96 num_tasks = ndiv; 97 if (num_tasks != ndiv) 98 num_tasks = ~0UL; 99 #endif 100 if (num_tasks <= 1) 101 { 102 num_tasks = 1; 103 task_step = end - start; 104 } 105 else if (num_tasks >= grainsize 106 #ifndef TYPE_is_long 107 && num_tasks != ~0UL 108 #endif 109 ) 110 { 111 UTYPE mul = num_tasks * grainsize; 112 task_step = (TYPE) grainsize * step; 113 if (mul != n) 114 { 115 task_step += step; 116 nfirst = n - mul - 1; 117 } 118 } 119 else 120 { 121 UTYPE div = n / num_tasks; 122 UTYPE mod = n % num_tasks; 123 task_step = (TYPE) div * step; 124 if (mod) 125 { 126 task_step += step; 127 nfirst = mod - 1; 128 } 129 } 130 } 131 else 132 { 133 if (num_tasks == 0) 134 num_tasks = team ? team->nthreads : 1; 135 if (num_tasks >= n) 136 num_tasks = n; 137 else 138 { 139 UTYPE div = n / num_tasks; 140 UTYPE mod = n % num_tasks; 141 task_step = (TYPE) div * step; 142 if (mod) 143 { 144 task_step += step; 145 nfirst = mod - 1; 146 } 147 } 148 } 149 150 if (flags & GOMP_TASK_FLAG_NOGROUP) 151 { 152 if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled) 153 return; 154 } 155 else 156 ialias_call (GOMP_taskgroup_start) (); 157 158 if (priority > gomp_max_task_priority_var) 159 priority = gomp_max_task_priority_var; 160 161 if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL 162 || (thr->task && thr->task->final_task) 163 || team->task_count + num_tasks > 64 * team->nthreads) 164 { 165 unsigned long i; 166 if (__builtin_expect (cpyfn != NULL, 0)) 167 { 168 struct gomp_task task[num_tasks]; 169 struct gomp_task *parent = thr->task; 170 arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); 171 char buf[num_tasks * arg_size + arg_align - 1]; 172 char *arg = (char *) (((uintptr_t) buf + arg_align - 1) 173 & ~(uintptr_t) (arg_align - 1)); 174 char *orig_arg = arg; 175 for (i = 0; i < num_tasks; i++) 176 { 177 gomp_init_task (&task[i], parent, gomp_icv (false)); 178 task[i].priority = priority; 179 task[i].kind = GOMP_TASK_UNDEFERRED; 180 task[i].final_task = (thr->task && thr->task->final_task) 181 || (flags & GOMP_TASK_FLAG_FINAL); 182 if (thr->task) 183 { 184 task[i].in_tied_task = thr->task->in_tied_task; 185 task[i].taskgroup = thr->task->taskgroup; 186 } 187 thr->task = &task[i]; 188 cpyfn (arg, data); 189 arg += arg_size; 190 } 191 arg = orig_arg; 192 for (i = 0; i < num_tasks; i++) 193 { 194 thr->task = &task[i]; 195 ((TYPE *)arg)[0] = start; 196 start += task_step; 197 ((TYPE *)arg)[1] = start; 198 if (i == nfirst) 199 task_step -= step; 200 fn (arg); 201 arg += arg_size; 202 if (!priority_queue_empty_p (&task[i].children_queue, 203 MEMMODEL_RELAXED)) 204 { 205 gomp_mutex_lock (&team->task_lock); 206 gomp_clear_parent (&task[i].children_queue); 207 gomp_mutex_unlock (&team->task_lock); 208 } 209 gomp_end_task (); 210 } 211 } 212 else 213 for (i = 0; i < num_tasks; i++) 214 { 215 struct gomp_task task; 216 217 gomp_init_task (&task, thr->task, gomp_icv (false)); 218 task.priority = priority; 219 task.kind = GOMP_TASK_UNDEFERRED; 220 task.final_task = (thr->task && thr->task->final_task) 221 || (flags & GOMP_TASK_FLAG_FINAL); 222 if (thr->task) 223 { 224 task.in_tied_task = thr->task->in_tied_task; 225 task.taskgroup = thr->task->taskgroup; 226 } 227 thr->task = &task; 228 ((TYPE *)data)[0] = start; 229 start += task_step; 230 ((TYPE *)data)[1] = start; 231 if (i == nfirst) 232 task_step -= step; 233 fn (data); 234 if (!priority_queue_empty_p (&task.children_queue, 235 MEMMODEL_RELAXED)) 236 { 237 gomp_mutex_lock (&team->task_lock); 238 gomp_clear_parent (&task.children_queue); 239 gomp_mutex_unlock (&team->task_lock); 240 } 241 gomp_end_task (); 242 } 243 } 244 else 245 { 246 struct gomp_task *tasks[num_tasks]; 247 struct gomp_task *parent = thr->task; 248 struct gomp_taskgroup *taskgroup = parent->taskgroup; 249 char *arg; 250 int do_wake; 251 unsigned long i; 252 253 for (i = 0; i < num_tasks; i++) 254 { 255 struct gomp_task *task 256 = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); 257 tasks[i] = task; 258 arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) 259 & ~(uintptr_t) (arg_align - 1)); 260 gomp_init_task (task, parent, gomp_icv (false)); 261 task->priority = priority; 262 task->kind = GOMP_TASK_UNDEFERRED; 263 task->in_tied_task = parent->in_tied_task; 264 task->taskgroup = taskgroup; 265 thr->task = task; 266 if (cpyfn) 267 { 268 cpyfn (arg, data); 269 task->copy_ctors_done = true; 270 } 271 else 272 memcpy (arg, data, arg_size); 273 ((TYPE *)arg)[0] = start; 274 start += task_step; 275 ((TYPE *)arg)[1] = start; 276 if (i == nfirst) 277 task_step -= step; 278 thr->task = parent; 279 task->kind = GOMP_TASK_WAITING; 280 task->fn = fn; 281 task->fn_data = arg; 282 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; 283 } 284 gomp_mutex_lock (&team->task_lock); 285 /* If parallel or taskgroup has been cancelled, don't start new 286 tasks. */ 287 if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) 288 || (taskgroup && taskgroup->cancelled)) 289 && cpyfn == NULL, 0)) 290 { 291 gomp_mutex_unlock (&team->task_lock); 292 for (i = 0; i < num_tasks; i++) 293 { 294 gomp_finish_task (tasks[i]); 295 free (tasks[i]); 296 } 297 if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) 298 ialias_call (GOMP_taskgroup_end) (); 299 return; 300 } 301 if (taskgroup) 302 taskgroup->num_children += num_tasks; 303 for (i = 0; i < num_tasks; i++) 304 { 305 struct gomp_task *task = tasks[i]; 306 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, 307 task, priority, 308 PRIORITY_INSERT_BEGIN, 309 /*last_parent_depends_on=*/false, 310 task->parent_depends_on); 311 if (taskgroup) 312 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 313 task, priority, PRIORITY_INSERT_BEGIN, 314 /*last_parent_depends_on=*/false, 315 task->parent_depends_on); 316 priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, 317 PRIORITY_INSERT_END, 318 /*last_parent_depends_on=*/false, 319 task->parent_depends_on); 320 ++team->task_count; 321 ++team->task_queued_count; 322 } 323 gomp_team_barrier_set_task_pending (&team->barrier); 324 if (team->task_running_count + !parent->in_tied_task 325 < team->nthreads) 326 { 327 do_wake = team->nthreads - team->task_running_count 328 - !parent->in_tied_task; 329 if ((unsigned long) do_wake > num_tasks) 330 do_wake = num_tasks; 331 } 332 else 333 do_wake = 0; 334 gomp_mutex_unlock (&team->task_lock); 335 if (do_wake) 336 gomp_team_barrier_wake (&team->barrier, do_wake); 337 } 338 if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) 339 ialias_call (GOMP_taskgroup_end) (); 340 } 341