1 /* Copyright (C) 2005 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU OpenMP Library (libgomp). 5 6 Libgomp is free software; you can redistribute it and/or modify it 7 under the terms of the GNU Lesser General Public License as published by 8 the Free Software Foundation; either version 2.1 of the License, or 9 (at your option) any later version. 10 11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for 14 more details. 15 16 You should have received a copy of the GNU Lesser General Public License 17 along with libgomp; see the file COPYING.LIB. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 19 MA 02110-1301, USA. */ 20 21 /* As a special exception, if you link this library with other files, some 22 of which are compiled with GCC, to produce an executable, this library 23 does not by itself cause the resulting executable to be covered by the 24 GNU General Public License. This exception does not however invalidate 25 any other reasons why the executable file might be covered by the GNU 26 General Public License. */ 27 28 /* This file contains routines for managing work-share iteration, both 29 for loops and sections. */ 30 31 #include "libgomp.h" 32 #include <stdlib.h> 33 34 35 /* This function implements the STATIC scheduling method. The caller should 36 iterate *pstart <= x < *pend. Return zero if there are more iterations 37 to perform; nonzero if not. Return less than 0 if this thread had 38 received the absolutely last iteration. */ 39 40 int 41 gomp_iter_static_next (long *pstart, long *pend) 42 { 43 struct gomp_thread *thr = gomp_thread (); 44 struct gomp_team *team = thr->ts.team; 45 struct gomp_work_share *ws = thr->ts.work_share; 46 unsigned long nthreads = team ? team->nthreads : 1; 47 48 if (thr->ts.static_trip == -1) 49 return -1; 50 51 /* Quick test for degenerate teams and orphaned constructs. */ 52 if (nthreads == 1) 53 { 54 *pstart = ws->next; 55 *pend = ws->end; 56 thr->ts.static_trip = -1; 57 return ws->next == ws->end; 58 } 59 60 /* We interpret chunk_size zero as "unspecified", which means that we 61 should break up the iterations such that each thread makes only one 62 trip through the outer loop. */ 63 if (ws->chunk_size == 0) 64 { 65 unsigned long n, q, i; 66 unsigned long s0, e0; 67 long s, e; 68 69 if (thr->ts.static_trip > 0) 70 return 1; 71 72 /* Compute the total number of iterations. */ 73 s = ws->incr + (ws->incr > 0 ? -1 : 1); 74 n = (ws->end - ws->next + s) / ws->incr; 75 i = thr->ts.team_id; 76 77 /* Compute the "zero-based" start and end points. That is, as 78 if the loop began at zero and incremented by one. */ 79 q = n / nthreads; 80 q += (q * nthreads != n); 81 s0 = q * i; 82 e0 = s0 + q; 83 if (e0 > n) 84 e0 = n; 85 86 /* Notice when no iterations allocated for this thread. */ 87 if (s0 >= e0) 88 { 89 thr->ts.static_trip = 1; 90 return 1; 91 } 92 93 /* Transform these to the actual start and end numbers. */ 94 s = (long)s0 * ws->incr + ws->next; 95 e = (long)e0 * ws->incr + ws->next; 96 97 *pstart = s; 98 *pend = e; 99 thr->ts.static_trip = (e0 == n ? -1 : 1); 100 return 0; 101 } 102 else 103 { 104 unsigned long n, s0, e0, i, c; 105 long s, e; 106 107 /* Otherwise, each thread gets exactly chunk_size iterations 108 (if available) each time through the loop. */ 109 110 s = ws->incr + (ws->incr > 0 ? -1 : 1); 111 n = (ws->end - ws->next + s) / ws->incr; 112 i = thr->ts.team_id; 113 c = ws->chunk_size; 114 115 /* Initial guess is a C sized chunk positioned nthreads iterations 116 in, offset by our thread number. */ 117 s0 = (thr->ts.static_trip * nthreads + i) * c; 118 e0 = s0 + c; 119 120 /* Detect overflow. */ 121 if (s0 >= n) 122 return 1; 123 if (e0 > n) 124 e0 = n; 125 126 /* Transform these to the actual start and end numbers. */ 127 s = (long)s0 * ws->incr + ws->next; 128 e = (long)e0 * ws->incr + ws->next; 129 130 *pstart = s; 131 *pend = e; 132 133 if (e0 == n) 134 thr->ts.static_trip = -1; 135 else 136 thr->ts.static_trip++; 137 return 0; 138 } 139 } 140 141 142 /* This function implements the DYNAMIC scheduling method. Arguments are 143 as for gomp_iter_static_next. This function must be called with ws->lock 144 held. */ 145 146 bool 147 gomp_iter_dynamic_next_locked (long *pstart, long *pend) 148 { 149 struct gomp_thread *thr = gomp_thread (); 150 struct gomp_work_share *ws = thr->ts.work_share; 151 long start, end, chunk, left; 152 153 start = ws->next; 154 if (start == ws->end) 155 return false; 156 157 chunk = ws->chunk_size * ws->incr; 158 left = ws->end - start; 159 if (ws->incr < 0) 160 { 161 if (chunk < left) 162 chunk = left; 163 } 164 else 165 { 166 if (chunk > left) 167 chunk = left; 168 } 169 end = start + chunk; 170 171 ws->next = end; 172 *pstart = start; 173 *pend = end; 174 return true; 175 } 176 177 178 #ifdef HAVE_SYNC_BUILTINS 179 /* Similar, but doesn't require the lock held, and uses compare-and-swap 180 instead. Note that the only memory value that changes is ws->next. */ 181 182 bool 183 gomp_iter_dynamic_next (long *pstart, long *pend) 184 { 185 struct gomp_thread *thr = gomp_thread (); 186 struct gomp_work_share *ws = thr->ts.work_share; 187 long start, end, nend, chunk, incr; 188 189 start = ws->next; 190 end = ws->end; 191 incr = ws->incr; 192 chunk = ws->chunk_size * incr; 193 194 while (1) 195 { 196 long left = end - start; 197 long tmp; 198 199 if (start == end) 200 return false; 201 202 if (incr < 0) 203 { 204 if (chunk < left) 205 chunk = left; 206 } 207 else 208 { 209 if (chunk > left) 210 chunk = left; 211 } 212 nend = start + chunk; 213 214 tmp = __sync_val_compare_and_swap (&ws->next, start, nend); 215 if (__builtin_expect (tmp == start, 1)) 216 break; 217 218 start = tmp; 219 } 220 221 *pstart = start; 222 *pend = nend; 223 return true; 224 } 225 #endif /* HAVE_SYNC_BUILTINS */ 226 227 228 /* This function implements the GUIDED scheduling method. Arguments are 229 as for gomp_iter_static_next. This function must be called with the 230 work share lock held. */ 231 232 bool 233 gomp_iter_guided_next_locked (long *pstart, long *pend) 234 { 235 struct gomp_thread *thr = gomp_thread (); 236 struct gomp_work_share *ws = thr->ts.work_share; 237 struct gomp_team *team = thr->ts.team; 238 unsigned long nthreads = team ? team->nthreads : 1; 239 unsigned long n, q; 240 long start, end; 241 242 if (ws->next == ws->end) 243 return false; 244 245 n = (ws->end - ws->next) / ws->incr; 246 q = (n + nthreads - 1) / nthreads; 247 248 if (q < ws->chunk_size) 249 q = ws->chunk_size; 250 if (q > n) 251 q = n; 252 253 start = ws->next; 254 end = start + q * ws->incr; 255 256 ws->next = end; 257 *pstart = start; 258 *pend = end; 259 return true; 260 } 261 262 #ifdef HAVE_SYNC_BUILTINS 263 /* Similar, but doesn't require the lock held, and uses compare-and-swap 264 instead. Note that the only memory value that changes is ws->next. */ 265 266 bool 267 gomp_iter_guided_next (long *pstart, long *pend) 268 { 269 struct gomp_thread *thr = gomp_thread (); 270 struct gomp_work_share *ws = thr->ts.work_share; 271 struct gomp_team *team = thr->ts.team; 272 unsigned long nthreads = team ? team->nthreads : 1; 273 long start, end, nend, incr; 274 unsigned long chunk_size; 275 276 start = ws->next; 277 end = ws->end; 278 incr = ws->incr; 279 chunk_size = ws->chunk_size; 280 281 while (1) 282 { 283 unsigned long n, q; 284 long tmp; 285 286 if (start == end) 287 return false; 288 289 n = (end - start) / ws->incr; 290 q = (n + nthreads - 1) / nthreads; 291 292 if (q < chunk_size) 293 q = chunk_size; 294 if (q > n) 295 q = n; 296 297 nend = start + q * incr; 298 299 tmp = __sync_val_compare_and_swap (&ws->next, start, nend); 300 if (__builtin_expect (tmp == start, 1)) 301 break; 302 303 start = tmp; 304 } 305 306 *pstart = start; 307 *pend = nend; 308 return true; 309 } 310 #endif /* HAVE_SYNC_BUILTINS */ 311