1 /* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU OpenMP Library (libgomp). 5 6 Libgomp is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 more details. 15 16 Under Section 7 of GPL version 3, you are granted additional 17 permissions described in the GCC Runtime Library Exception, version 18 3.1, as published by the Free Software Foundation. 19 20 You should have received a copy of the GNU General Public License and 21 a copy of the GCC Runtime Library Exception along with this program; 22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 <http://www.gnu.org/licenses/>. */ 24 25 /* This file contains routines for managing work-share iteration, both 26 for loops and sections. */ 27 28 #include "libgomp.h" 29 #include <stdlib.h> 30 31 typedef unsigned long long gomp_ull; 32 33 /* This function implements the STATIC scheduling method. The caller should 34 iterate *pstart <= x < *pend. Return zero if there are more iterations 35 to perform; nonzero if not. Return less than 0 if this thread had 36 received the absolutely last iteration. */ 37 38 int 39 gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend) 40 { 41 struct gomp_thread *thr = gomp_thread (); 42 struct gomp_team *team = thr->ts.team; 43 struct gomp_work_share *ws = thr->ts.work_share; 44 unsigned long nthreads = team ? team->nthreads : 1; 45 46 if (thr->ts.static_trip == -1) 47 return -1; 48 49 /* Quick test for degenerate teams and orphaned constructs. */ 50 if (nthreads == 1) 51 { 52 *pstart = ws->next_ull; 53 *pend = ws->end_ull; 54 thr->ts.static_trip = -1; 55 return ws->next_ull == ws->end_ull; 56 } 57 58 /* We interpret chunk_size zero as "unspecified", which means that we 59 should break up the iterations such that each thread makes only one 60 trip through the outer loop. */ 61 if (ws->chunk_size_ull == 0) 62 { 63 gomp_ull n, q, i, t, s0, e0, s, e; 64 65 if (thr->ts.static_trip > 0) 66 return 1; 67 68 /* Compute the total number of iterations. */ 69 if (__builtin_expect (ws->mode, 0) == 0) 70 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull; 71 else 72 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull; 73 i = thr->ts.team_id; 74 75 /* Compute the "zero-based" start and end points. That is, as 76 if the loop began at zero and incremented by one. */ 77 q = n / nthreads; 78 t = n % nthreads; 79 if (i < t) 80 { 81 t = 0; 82 q++; 83 } 84 s0 = q * i + t; 85 e0 = s0 + q; 86 87 /* Notice when no iterations allocated for this thread. */ 88 if (s0 >= e0) 89 { 90 thr->ts.static_trip = 1; 91 return 1; 92 } 93 94 /* Transform these to the actual start and end numbers. */ 95 s = s0 * ws->incr_ull + ws->next_ull; 96 e = e0 * ws->incr_ull + ws->next_ull; 97 98 *pstart = s; 99 *pend = e; 100 thr->ts.static_trip = (e0 == n ? -1 : 1); 101 return 0; 102 } 103 else 104 { 105 gomp_ull n, s0, e0, i, c, s, e; 106 107 /* Otherwise, each thread gets exactly chunk_size iterations 108 (if available) each time through the loop. */ 109 110 if (__builtin_expect (ws->mode, 0) == 0) 111 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull; 112 else 113 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull; 114 i = thr->ts.team_id; 115 c = ws->chunk_size_ull; 116 117 /* Initial guess is a C sized chunk positioned nthreads iterations 118 in, offset by our thread number. */ 119 s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c; 120 e0 = s0 + c; 121 122 /* Detect overflow. */ 123 if (s0 >= n) 124 return 1; 125 if (e0 > n) 126 e0 = n; 127 128 /* Transform these to the actual start and end numbers. */ 129 s = s0 * ws->incr_ull + ws->next_ull; 130 e = e0 * ws->incr_ull + ws->next_ull; 131 132 *pstart = s; 133 *pend = e; 134 135 if (e0 == n) 136 thr->ts.static_trip = -1; 137 else 138 thr->ts.static_trip++; 139 return 0; 140 } 141 } 142 143 144 /* This function implements the DYNAMIC scheduling method. Arguments are 145 as for gomp_iter_ull_static_next. This function must be called with 146 ws->lock held. */ 147 148 bool 149 gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend) 150 { 151 struct gomp_thread *thr = gomp_thread (); 152 struct gomp_work_share *ws = thr->ts.work_share; 153 gomp_ull start, end, chunk, left; 154 155 start = ws->next_ull; 156 if (start == ws->end_ull) 157 return false; 158 159 chunk = ws->chunk_size_ull; 160 left = ws->end_ull - start; 161 if (__builtin_expect (ws->mode & 2, 0)) 162 { 163 if (chunk < left) 164 chunk = left; 165 } 166 else 167 { 168 if (chunk > left) 169 chunk = left; 170 } 171 end = start + chunk; 172 173 ws->next_ull = end; 174 *pstart = start; 175 *pend = end; 176 return true; 177 } 178 179 180 #if defined HAVE_SYNC_BUILTINS && defined __LP64__ 181 /* Similar, but doesn't require the lock held, and uses compare-and-swap 182 instead. Note that the only memory value that changes is ws->next_ull. */ 183 184 bool 185 gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend) 186 { 187 struct gomp_thread *thr = gomp_thread (); 188 struct gomp_work_share *ws = thr->ts.work_share; 189 gomp_ull start, end, nend, chunk; 190 191 end = ws->end_ull; 192 chunk = ws->chunk_size_ull; 193 194 if (__builtin_expect (ws->mode & 1, 1)) 195 { 196 gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk); 197 if (__builtin_expect (ws->mode & 2, 0) == 0) 198 { 199 if (tmp >= end) 200 return false; 201 nend = tmp + chunk; 202 if (nend > end) 203 nend = end; 204 *pstart = tmp; 205 *pend = nend; 206 return true; 207 } 208 else 209 { 210 if (tmp <= end) 211 return false; 212 nend = tmp + chunk; 213 if (nend < end) 214 nend = end; 215 *pstart = tmp; 216 *pend = nend; 217 return true; 218 } 219 } 220 221 start = ws->next_ull; 222 while (1) 223 { 224 gomp_ull left = end - start; 225 gomp_ull tmp; 226 227 if (start == end) 228 return false; 229 230 if (__builtin_expect (ws->mode & 2, 0)) 231 { 232 if (chunk < left) 233 chunk = left; 234 } 235 else 236 { 237 if (chunk > left) 238 chunk = left; 239 } 240 nend = start + chunk; 241 242 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend); 243 if (__builtin_expect (tmp == start, 1)) 244 break; 245 246 start = tmp; 247 } 248 249 *pstart = start; 250 *pend = nend; 251 return true; 252 } 253 #endif /* HAVE_SYNC_BUILTINS */ 254 255 256 /* This function implements the GUIDED scheduling method. Arguments are 257 as for gomp_iter_ull_static_next. This function must be called with the 258 work share lock held. */ 259 260 bool 261 gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend) 262 { 263 struct gomp_thread *thr = gomp_thread (); 264 struct gomp_work_share *ws = thr->ts.work_share; 265 struct gomp_team *team = thr->ts.team; 266 gomp_ull nthreads = team ? team->nthreads : 1; 267 gomp_ull n, q; 268 gomp_ull start, end; 269 270 if (ws->next_ull == ws->end_ull) 271 return false; 272 273 start = ws->next_ull; 274 if (__builtin_expect (ws->mode, 0) == 0) 275 n = (ws->end_ull - start) / ws->incr_ull; 276 else 277 n = (start - ws->end_ull) / -ws->incr_ull; 278 q = (n + nthreads - 1) / nthreads; 279 280 if (q < ws->chunk_size_ull) 281 q = ws->chunk_size_ull; 282 if (q <= n) 283 end = start + q * ws->incr_ull; 284 else 285 end = ws->end_ull; 286 287 ws->next_ull = end; 288 *pstart = start; 289 *pend = end; 290 return true; 291 } 292 293 #if defined HAVE_SYNC_BUILTINS && defined __LP64__ 294 /* Similar, but doesn't require the lock held, and uses compare-and-swap 295 instead. Note that the only memory value that changes is ws->next_ull. */ 296 297 bool 298 gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend) 299 { 300 struct gomp_thread *thr = gomp_thread (); 301 struct gomp_work_share *ws = thr->ts.work_share; 302 struct gomp_team *team = thr->ts.team; 303 gomp_ull nthreads = team ? team->nthreads : 1; 304 gomp_ull start, end, nend, incr; 305 gomp_ull chunk_size; 306 307 start = ws->next_ull; 308 end = ws->end_ull; 309 incr = ws->incr_ull; 310 chunk_size = ws->chunk_size_ull; 311 312 while (1) 313 { 314 gomp_ull n, q; 315 gomp_ull tmp; 316 317 if (start == end) 318 return false; 319 320 if (__builtin_expect (ws->mode, 0) == 0) 321 n = (end - start) / incr; 322 else 323 n = (start - end) / -incr; 324 q = (n + nthreads - 1) / nthreads; 325 326 if (q < chunk_size) 327 q = chunk_size; 328 if (__builtin_expect (q <= n, 1)) 329 nend = start + q * incr; 330 else 331 nend = end; 332 333 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend); 334 if (__builtin_expect (tmp == start, 1)) 335 break; 336 337 start = tmp; 338 } 339 340 *pstart = start; 341 *pend = nend; 342 return true; 343 } 344 #endif /* HAVE_SYNC_BUILTINS */ 345