1 /* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU OpenMP Library (libgomp). 5 6 Libgomp is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 more details. 15 16 Under Section 7 of GPL version 3, you are granted additional 17 permissions described in the GCC Runtime Library Exception, version 18 3.1, as published by the Free Software Foundation. 19 20 You should have received a copy of the GNU General Public License and 21 a copy of the GCC Runtime Library Exception along with this program; 22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 <http://www.gnu.org/licenses/>. */ 24 25 /* This file contains routines for managing work-share iteration, both 26 for loops and sections. */ 27 28 #include "libgomp.h" 29 #include <stdlib.h> 30 31 32 /* This function implements the STATIC scheduling method. The caller should 33 iterate *pstart <= x < *pend. Return zero if there are more iterations 34 to perform; nonzero if not. Return less than 0 if this thread had 35 received the absolutely last iteration. */ 36 37 int 38 gomp_iter_static_next (long *pstart, long *pend) 39 { 40 struct gomp_thread *thr = gomp_thread (); 41 struct gomp_team *team = thr->ts.team; 42 struct gomp_work_share *ws = thr->ts.work_share; 43 unsigned long nthreads = team ? team->nthreads : 1; 44 45 if (thr->ts.static_trip == -1) 46 return -1; 47 48 /* Quick test for degenerate teams and orphaned constructs. */ 49 if (nthreads == 1) 50 { 51 *pstart = ws->next; 52 *pend = ws->end; 53 thr->ts.static_trip = -1; 54 return ws->next == ws->end; 55 } 56 57 /* We interpret chunk_size zero as "unspecified", which means that we 58 should break up the iterations such that each thread makes only one 59 trip through the outer loop. */ 60 if (ws->chunk_size == 0) 61 { 62 unsigned long n, q, i, t; 63 unsigned long s0, e0; 64 long s, e; 65 66 if (thr->ts.static_trip > 0) 67 return 1; 68 69 /* Compute the total number of iterations. */ 70 s = ws->incr + (ws->incr > 0 ? -1 : 1); 71 n = (ws->end - ws->next + s) / ws->incr; 72 i = thr->ts.team_id; 73 74 /* Compute the "zero-based" start and end points. That is, as 75 if the loop began at zero and incremented by one. */ 76 q = n / nthreads; 77 t = n % nthreads; 78 if (i < t) 79 { 80 t = 0; 81 q++; 82 } 83 s0 = q * i + t; 84 e0 = s0 + q; 85 86 /* Notice when no iterations allocated for this thread. */ 87 if (s0 >= e0) 88 { 89 thr->ts.static_trip = 1; 90 return 1; 91 } 92 93 /* Transform these to the actual start and end numbers. */ 94 s = (long)s0 * ws->incr + ws->next; 95 e = (long)e0 * ws->incr + ws->next; 96 97 *pstart = s; 98 *pend = e; 99 thr->ts.static_trip = (e0 == n ? -1 : 1); 100 return 0; 101 } 102 else 103 { 104 unsigned long n, s0, e0, i, c; 105 long s, e; 106 107 /* Otherwise, each thread gets exactly chunk_size iterations 108 (if available) each time through the loop. */ 109 110 s = ws->incr + (ws->incr > 0 ? -1 : 1); 111 n = (ws->end - ws->next + s) / ws->incr; 112 i = thr->ts.team_id; 113 c = ws->chunk_size; 114 115 /* Initial guess is a C sized chunk positioned nthreads iterations 116 in, offset by our thread number. */ 117 s0 = (thr->ts.static_trip * nthreads + i) * c; 118 e0 = s0 + c; 119 120 /* Detect overflow. */ 121 if (s0 >= n) 122 return 1; 123 if (e0 > n) 124 e0 = n; 125 126 /* Transform these to the actual start and end numbers. */ 127 s = (long)s0 * ws->incr + ws->next; 128 e = (long)e0 * ws->incr + ws->next; 129 130 *pstart = s; 131 *pend = e; 132 133 if (e0 == n) 134 thr->ts.static_trip = -1; 135 else 136 thr->ts.static_trip++; 137 return 0; 138 } 139 } 140 141 142 /* This function implements the DYNAMIC scheduling method. Arguments are 143 as for gomp_iter_static_next. This function must be called with ws->lock 144 held. */ 145 146 bool 147 gomp_iter_dynamic_next_locked (long *pstart, long *pend) 148 { 149 struct gomp_thread *thr = gomp_thread (); 150 struct gomp_work_share *ws = thr->ts.work_share; 151 long start, end, chunk, left; 152 153 start = ws->next; 154 if (start == ws->end) 155 return false; 156 157 chunk = ws->chunk_size; 158 left = ws->end - start; 159 if (ws->incr < 0) 160 { 161 if (chunk < left) 162 chunk = left; 163 } 164 else 165 { 166 if (chunk > left) 167 chunk = left; 168 } 169 end = start + chunk; 170 171 ws->next = end; 172 *pstart = start; 173 *pend = end; 174 return true; 175 } 176 177 178 #ifdef HAVE_SYNC_BUILTINS 179 /* Similar, but doesn't require the lock held, and uses compare-and-swap 180 instead. Note that the only memory value that changes is ws->next. */ 181 182 bool 183 gomp_iter_dynamic_next (long *pstart, long *pend) 184 { 185 struct gomp_thread *thr = gomp_thread (); 186 struct gomp_work_share *ws = thr->ts.work_share; 187 long start, end, nend, chunk, incr; 188 189 end = ws->end; 190 incr = ws->incr; 191 chunk = ws->chunk_size; 192 193 if (__builtin_expect (ws->mode, 1)) 194 { 195 long tmp = __sync_fetch_and_add (&ws->next, chunk); 196 if (incr > 0) 197 { 198 if (tmp >= end) 199 return false; 200 nend = tmp + chunk; 201 if (nend > end) 202 nend = end; 203 *pstart = tmp; 204 *pend = nend; 205 return true; 206 } 207 else 208 { 209 if (tmp <= end) 210 return false; 211 nend = tmp + chunk; 212 if (nend < end) 213 nend = end; 214 *pstart = tmp; 215 *pend = nend; 216 return true; 217 } 218 } 219 220 start = ws->next; 221 while (1) 222 { 223 long left = end - start; 224 long tmp; 225 226 if (start == end) 227 return false; 228 229 if (incr < 0) 230 { 231 if (chunk < left) 232 chunk = left; 233 } 234 else 235 { 236 if (chunk > left) 237 chunk = left; 238 } 239 nend = start + chunk; 240 241 tmp = __sync_val_compare_and_swap (&ws->next, start, nend); 242 if (__builtin_expect (tmp == start, 1)) 243 break; 244 245 start = tmp; 246 } 247 248 *pstart = start; 249 *pend = nend; 250 return true; 251 } 252 #endif /* HAVE_SYNC_BUILTINS */ 253 254 255 /* This function implements the GUIDED scheduling method. Arguments are 256 as for gomp_iter_static_next. This function must be called with the 257 work share lock held. */ 258 259 bool 260 gomp_iter_guided_next_locked (long *pstart, long *pend) 261 { 262 struct gomp_thread *thr = gomp_thread (); 263 struct gomp_work_share *ws = thr->ts.work_share; 264 struct gomp_team *team = thr->ts.team; 265 unsigned long nthreads = team ? team->nthreads : 1; 266 unsigned long n, q; 267 long start, end; 268 269 if (ws->next == ws->end) 270 return false; 271 272 start = ws->next; 273 n = (ws->end - start) / ws->incr; 274 q = (n + nthreads - 1) / nthreads; 275 276 if (q < ws->chunk_size) 277 q = ws->chunk_size; 278 if (q <= n) 279 end = start + q * ws->incr; 280 else 281 end = ws->end; 282 283 ws->next = end; 284 *pstart = start; 285 *pend = end; 286 return true; 287 } 288 289 #ifdef HAVE_SYNC_BUILTINS 290 /* Similar, but doesn't require the lock held, and uses compare-and-swap 291 instead. Note that the only memory value that changes is ws->next. */ 292 293 bool 294 gomp_iter_guided_next (long *pstart, long *pend) 295 { 296 struct gomp_thread *thr = gomp_thread (); 297 struct gomp_work_share *ws = thr->ts.work_share; 298 struct gomp_team *team = thr->ts.team; 299 unsigned long nthreads = team ? team->nthreads : 1; 300 long start, end, nend, incr; 301 unsigned long chunk_size; 302 303 start = ws->next; 304 end = ws->end; 305 incr = ws->incr; 306 chunk_size = ws->chunk_size; 307 308 while (1) 309 { 310 unsigned long n, q; 311 long tmp; 312 313 if (start == end) 314 return false; 315 316 n = (end - start) / incr; 317 q = (n + nthreads - 1) / nthreads; 318 319 if (q < chunk_size) 320 q = chunk_size; 321 if (__builtin_expect (q <= n, 1)) 322 nend = start + q * incr; 323 else 324 nend = end; 325 326 tmp = __sync_val_compare_and_swap (&ws->next, start, nend); 327 if (__builtin_expect (tmp == start, 1)) 328 break; 329 330 start = tmp; 331 } 332 333 *pstart = start; 334 *pend = nend; 335 return true; 336 } 337 #endif /* HAVE_SYNC_BUILTINS */ 338