1 /*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 /* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 t = (l - u) / (-i) + 1; \
56 } \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
59 }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
check_loc(ident_t * & loc)65 static inline void check_loc(ident_t *&loc) {
66 if (loc == NULL)
67 loc = &loc_stub; // may need to report location info to ittnotify
68 }
69
70 template <typename T>
__kmp_for_static_init(ident_t * loc,kmp_int32 global_tid,kmp_int32 schedtype,kmp_int32 * plastiter,T * plower,T * pupper,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk,void * codeptr)71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72 kmp_int32 schedtype, kmp_int32 *plastiter,
73 T *plower, T *pupper,
74 typename traits_t<T>::signed_t *pstride,
75 typename traits_t<T>::signed_t incr,
76 typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78 ,
79 void *codeptr
80 #endif
81 ) {
82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85
86 typedef typename traits_t<T>::unsigned_t UT;
87 typedef typename traits_t<T>::signed_t ST;
88 /* this all has to be changed back to TID and such.. */
89 kmp_int32 gtid = global_tid;
90 kmp_uint32 tid;
91 kmp_uint32 nth;
92 UT trip_count;
93 kmp_team_t *team;
94 __kmp_assert_valid_gtid(gtid);
95 kmp_info_t *th = __kmp_threads[gtid];
96
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98 ompt_team_info_t *team_info = NULL;
99 ompt_task_info_t *task_info = NULL;
100 ompt_work_t ompt_work_type = ompt_work_loop;
101
102 static kmp_int8 warn = 0;
103
104 if (ompt_enabled.ompt_callback_work) {
105 // Only fully initialize variables needed by OMPT if OMPT is enabled.
106 team_info = __ompt_get_teaminfo(0, NULL);
107 task_info = __ompt_get_task_info_object(0);
108 // Determine workshare type
109 if (loc != NULL) {
110 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111 ompt_work_type = ompt_work_loop;
112 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113 ompt_work_type = ompt_work_sections;
114 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115 ompt_work_type = ompt_work_distribute;
116 } else {
117 kmp_int8 bool_res =
118 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119 if (bool_res)
120 KMP_WARNING(OmptOutdatedWorkshare);
121 }
122 KMP_DEBUG_ASSERT(ompt_work_type);
123 }
124 }
125 #endif
126
127 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130 {
131 char *buff;
132 // create format specifiers before the debug output
133 buff = __kmp_str_format(
134 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139 *pstride, incr, chunk));
140 __kmp_str_free(&buff);
141 }
142 #endif
143
144 if (__kmp_env_consistency_check) {
145 __kmp_push_workshare(global_tid, ct_pdo, loc);
146 if (incr == 0) {
147 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148 loc);
149 }
150 }
151 /* special handling for zero-trip loops */
152 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153 if (plastiter != NULL)
154 *plastiter = FALSE;
155 /* leave pupper and plower set to entire iteration space */
156 *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162 {
163 char *buff;
164 // create format specifiers before the debug output
165 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166 "lower=%%%s upper=%%%s stride = %%%s "
167 "signed?<%s>, loc = %%s\n",
168 traits_t<T>::spec, traits_t<T>::spec,
169 traits_t<ST>::spec, traits_t<T>::spec);
170 check_loc(loc);
171 KD_TRACE(100,
172 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
173 __kmp_str_free(&buff);
174 }
175 #endif
176 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
177
178 #if OMPT_SUPPORT && OMPT_OPTIONAL
179 if (ompt_enabled.ompt_callback_work) {
180 ompt_callbacks.ompt_callback(ompt_callback_work)(
181 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
182 &(task_info->task_data), 0, codeptr);
183 }
184 #endif
185 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
186 return;
187 }
188
189 // Although there are schedule enumerations above kmp_ord_upper which are not
190 // schedules for "distribute", the only ones which are useful are dynamic, so
191 // cannot be seen here, since this codepath is only executed for static
192 // schedules.
193 if (schedtype > kmp_ord_upper) {
194 // we are in DISTRIBUTE construct
195 schedtype += kmp_sch_static -
196 kmp_distribute_static; // AC: convert to usual schedule type
197 tid = th->th.th_team->t.t_master_tid;
198 team = th->th.th_team->t.t_parent;
199 } else {
200 tid = __kmp_tid_from_gtid(global_tid);
201 team = th->th.th_team;
202 }
203
204 /* determine if "for" loop is an active worksharing construct */
205 if (team->t.t_serialized) {
206 /* serialized parallel, each thread executes whole iteration space */
207 if (plastiter != NULL)
208 *plastiter = TRUE;
209 /* leave pupper and plower set to entire iteration space */
210 *pstride =
211 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
212
213 #ifdef KMP_DEBUG
214 {
215 char *buff;
216 // create format specifiers before the debug output
217 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
218 "lower=%%%s upper=%%%s stride = %%%s\n",
219 traits_t<T>::spec, traits_t<T>::spec,
220 traits_t<ST>::spec);
221 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
222 __kmp_str_free(&buff);
223 }
224 #endif
225 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
226
227 #if OMPT_SUPPORT && OMPT_OPTIONAL
228 if (ompt_enabled.ompt_callback_work) {
229 ompt_callbacks.ompt_callback(ompt_callback_work)(
230 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
231 &(task_info->task_data), *pstride, codeptr);
232 }
233 #endif
234 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
235 return;
236 }
237 nth = team->t.t_nproc;
238 if (nth == 1) {
239 if (plastiter != NULL)
240 *plastiter = TRUE;
241 *pstride =
242 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
243 #ifdef KMP_DEBUG
244 {
245 char *buff;
246 // create format specifiers before the debug output
247 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
248 "lower=%%%s upper=%%%s stride = %%%s\n",
249 traits_t<T>::spec, traits_t<T>::spec,
250 traits_t<ST>::spec);
251 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
252 __kmp_str_free(&buff);
253 }
254 #endif
255 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
256
257 #if OMPT_SUPPORT && OMPT_OPTIONAL
258 if (ompt_enabled.ompt_callback_work) {
259 ompt_callbacks.ompt_callback(ompt_callback_work)(
260 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
261 &(task_info->task_data), *pstride, codeptr);
262 }
263 #endif
264 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
265 return;
266 }
267
268 /* compute trip count */
269 if (incr == 1) {
270 trip_count = *pupper - *plower + 1;
271 } else if (incr == -1) {
272 trip_count = *plower - *pupper + 1;
273 } else if (incr > 0) {
274 // upper-lower can exceed the limit of signed type
275 trip_count = (UT)(*pupper - *plower) / incr + 1;
276 } else {
277 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
278 }
279
280 #if KMP_STATS_ENABLED
281 if (KMP_MASTER_GTID(gtid)) {
282 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
283 }
284 #endif
285
286 if (__kmp_env_consistency_check) {
287 /* tripcount overflow? */
288 if (trip_count == 0 && *pupper != *plower) {
289 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
290 loc);
291 }
292 }
293
294 /* compute remaining parameters */
295 switch (schedtype) {
296 case kmp_sch_static: {
297 if (trip_count < nth) {
298 KMP_DEBUG_ASSERT(
299 __kmp_static == kmp_sch_static_greedy ||
300 __kmp_static ==
301 kmp_sch_static_balanced); // Unknown static scheduling type.
302 if (tid < trip_count) {
303 *pupper = *plower = *plower + tid * incr;
304 } else {
305 // set bounds so non-active threads execute no iterations
306 *plower = *pupper + (incr > 0 ? 1 : -1);
307 }
308 if (plastiter != NULL)
309 *plastiter = (tid == trip_count - 1);
310 } else {
311 if (__kmp_static == kmp_sch_static_balanced) {
312 UT small_chunk = trip_count / nth;
313 UT extras = trip_count % nth;
314 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
315 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
316 if (plastiter != NULL)
317 *plastiter = (tid == nth - 1);
318 } else {
319 T big_chunk_inc_count =
320 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
321 T old_upper = *pupper;
322
323 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
324 // Unknown static scheduling type.
325
326 *plower += tid * big_chunk_inc_count;
327 *pupper = *plower + big_chunk_inc_count - incr;
328 if (incr > 0) {
329 if (*pupper < *plower)
330 *pupper = traits_t<T>::max_value;
331 if (plastiter != NULL)
332 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
333 if (*pupper > old_upper)
334 *pupper = old_upper; // tracker C73258
335 } else {
336 if (*pupper > *plower)
337 *pupper = traits_t<T>::min_value;
338 if (plastiter != NULL)
339 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
340 if (*pupper < old_upper)
341 *pupper = old_upper; // tracker C73258
342 }
343 }
344 }
345 *pstride = trip_count;
346 break;
347 }
348 case kmp_sch_static_chunked: {
349 ST span;
350 UT nchunks;
351 if (chunk < 1)
352 chunk = 1;
353 else if ((UT)chunk > trip_count)
354 chunk = trip_count;
355 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
356 span = chunk * incr;
357 if (nchunks < nth) {
358 *pstride = span * nchunks;
359 if (tid < nchunks) {
360 *plower = *plower + (span * tid);
361 *pupper = *plower + span - incr;
362 } else {
363 *plower = *pupper + (incr > 0 ? 1 : -1);
364 }
365 } else {
366 *pstride = span * nth;
367 *plower = *plower + (span * tid);
368 *pupper = *plower + span - incr;
369 }
370 if (plastiter != NULL)
371 *plastiter = (tid == (nchunks - 1) % nth);
372 break;
373 }
374 case kmp_sch_static_balanced_chunked: {
375 T old_upper = *pupper;
376 // round up to make sure the chunk is enough to cover all iterations
377 UT span = (trip_count + nth - 1) / nth;
378
379 // perform chunk adjustment
380 chunk = (span + chunk - 1) & ~(chunk - 1);
381
382 span = chunk * incr;
383 *plower = *plower + (span * tid);
384 *pupper = *plower + span - incr;
385 if (incr > 0) {
386 if (*pupper > old_upper)
387 *pupper = old_upper;
388 } else if (*pupper < old_upper)
389 *pupper = old_upper;
390
391 if (plastiter != NULL)
392 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
393 break;
394 }
395 default:
396 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
397 break;
398 }
399
400 #if USE_ITT_BUILD
401 // Report loop metadata
402 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
403 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
404 team->t.t_active_level == 1) {
405 kmp_uint64 cur_chunk = chunk;
406 check_loc(loc);
407 // Calculate chunk in case it was not specified; it is specified for
408 // kmp_sch_static_chunked
409 if (schedtype == kmp_sch_static) {
410 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
411 }
412 // 0 - "static" schedule
413 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
414 }
415 #endif
416 #ifdef KMP_DEBUG
417 {
418 char *buff;
419 // create format specifiers before the debug output
420 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
421 "upper=%%%s stride = %%%s signed?<%s>\n",
422 traits_t<T>::spec, traits_t<T>::spec,
423 traits_t<ST>::spec, traits_t<T>::spec);
424 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
425 __kmp_str_free(&buff);
426 }
427 #endif
428 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
429
430 #if OMPT_SUPPORT && OMPT_OPTIONAL
431 if (ompt_enabled.ompt_callback_work) {
432 ompt_callbacks.ompt_callback(ompt_callback_work)(
433 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
434 &(task_info->task_data), trip_count, codeptr);
435 }
436 #endif
437
438 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
439 return;
440 }
441
442 template <typename T>
__kmp_dist_for_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,T * plower,T * pupper,T * pupperDist,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)443 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
444 kmp_int32 schedule, kmp_int32 *plastiter,
445 T *plower, T *pupper, T *pupperDist,
446 typename traits_t<T>::signed_t *pstride,
447 typename traits_t<T>::signed_t incr,
448 typename traits_t<T>::signed_t chunk) {
449 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
450 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
451 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
452 typedef typename traits_t<T>::unsigned_t UT;
453 typedef typename traits_t<T>::signed_t ST;
454 kmp_uint32 tid;
455 kmp_uint32 nth;
456 kmp_uint32 team_id;
457 kmp_uint32 nteams;
458 UT trip_count;
459 kmp_team_t *team;
460 kmp_info_t *th;
461
462 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
463 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
464 __kmp_assert_valid_gtid(gtid);
465 #ifdef KMP_DEBUG
466 {
467 char *buff;
468 // create format specifiers before the debug output
469 buff = __kmp_str_format(
470 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
471 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
472 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
473 traits_t<ST>::spec, traits_t<T>::spec);
474 KD_TRACE(100,
475 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
476 __kmp_str_free(&buff);
477 }
478 #endif
479
480 if (__kmp_env_consistency_check) {
481 __kmp_push_workshare(gtid, ct_pdo, loc);
482 if (incr == 0) {
483 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
484 loc);
485 }
486 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
487 // The loop is illegal.
488 // Some zero-trip loops maintained by compiler, e.g.:
489 // for(i=10;i<0;++i) // lower >= upper - run-time check
490 // for(i=0;i>10;--i) // lower <= upper - run-time check
491 // for(i=0;i>10;++i) // incr > 0 - compile-time check
492 // for(i=10;i<0;--i) // incr < 0 - compile-time check
493 // Compiler does not check the following illegal loops:
494 // for(i=0;i<10;i+=incr) // where incr<0
495 // for(i=10;i>0;i-=incr) // where incr<0
496 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
497 }
498 }
499 tid = __kmp_tid_from_gtid(gtid);
500 th = __kmp_threads[gtid];
501 nth = th->th.th_team_nproc;
502 team = th->th.th_team;
503 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
504 nteams = th->th.th_teams_size.nteams;
505 team_id = team->t.t_master_tid;
506 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
507
508 // compute global trip count
509 if (incr == 1) {
510 trip_count = *pupper - *plower + 1;
511 } else if (incr == -1) {
512 trip_count = *plower - *pupper + 1;
513 } else if (incr > 0) {
514 // upper-lower can exceed the limit of signed type
515 trip_count = (UT)(*pupper - *plower) / incr + 1;
516 } else {
517 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
518 }
519
520 *pstride = *pupper - *plower; // just in case (can be unused)
521 if (trip_count <= nteams) {
522 KMP_DEBUG_ASSERT(
523 __kmp_static == kmp_sch_static_greedy ||
524 __kmp_static ==
525 kmp_sch_static_balanced); // Unknown static scheduling type.
526 // only primary threads of some teams get single iteration, other threads
527 // get nothing
528 if (team_id < trip_count && tid == 0) {
529 *pupper = *pupperDist = *plower = *plower + team_id * incr;
530 } else {
531 *pupperDist = *pupper;
532 *plower = *pupper + incr; // compiler should skip loop body
533 }
534 if (plastiter != NULL)
535 *plastiter = (tid == 0 && team_id == trip_count - 1);
536 } else {
537 // Get the team's chunk first (each team gets at most one chunk)
538 if (__kmp_static == kmp_sch_static_balanced) {
539 UT chunkD = trip_count / nteams;
540 UT extras = trip_count % nteams;
541 *plower +=
542 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
543 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
544 if (plastiter != NULL)
545 *plastiter = (team_id == nteams - 1);
546 } else {
547 T chunk_inc_count =
548 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
549 T upper = *pupper;
550 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
551 // Unknown static scheduling type.
552 *plower += team_id * chunk_inc_count;
553 *pupperDist = *plower + chunk_inc_count - incr;
554 // Check/correct bounds if needed
555 if (incr > 0) {
556 if (*pupperDist < *plower)
557 *pupperDist = traits_t<T>::max_value;
558 if (plastiter != NULL)
559 *plastiter = *plower <= upper && *pupperDist > upper - incr;
560 if (*pupperDist > upper)
561 *pupperDist = upper; // tracker C73258
562 if (*plower > *pupperDist) {
563 *pupper = *pupperDist; // no iterations available for the team
564 goto end;
565 }
566 } else {
567 if (*pupperDist > *plower)
568 *pupperDist = traits_t<T>::min_value;
569 if (plastiter != NULL)
570 *plastiter = *plower >= upper && *pupperDist < upper - incr;
571 if (*pupperDist < upper)
572 *pupperDist = upper; // tracker C73258
573 if (*plower < *pupperDist) {
574 *pupper = *pupperDist; // no iterations available for the team
575 goto end;
576 }
577 }
578 }
579 // Get the parallel loop chunk now (for thread)
580 // compute trip count for team's chunk
581 if (incr == 1) {
582 trip_count = *pupperDist - *plower + 1;
583 } else if (incr == -1) {
584 trip_count = *plower - *pupperDist + 1;
585 } else if (incr > 1) {
586 // upper-lower can exceed the limit of signed type
587 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
588 } else {
589 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
590 }
591 KMP_DEBUG_ASSERT(trip_count);
592 switch (schedule) {
593 case kmp_sch_static: {
594 if (trip_count <= nth) {
595 KMP_DEBUG_ASSERT(
596 __kmp_static == kmp_sch_static_greedy ||
597 __kmp_static ==
598 kmp_sch_static_balanced); // Unknown static scheduling type.
599 if (tid < trip_count)
600 *pupper = *plower = *plower + tid * incr;
601 else
602 *plower = *pupper + incr; // no iterations available
603 if (plastiter != NULL)
604 if (*plastiter != 0 && !(tid == trip_count - 1))
605 *plastiter = 0;
606 } else {
607 if (__kmp_static == kmp_sch_static_balanced) {
608 UT chunkL = trip_count / nth;
609 UT extras = trip_count % nth;
610 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
611 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
612 if (plastiter != NULL)
613 if (*plastiter != 0 && !(tid == nth - 1))
614 *plastiter = 0;
615 } else {
616 T chunk_inc_count =
617 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
618 T upper = *pupperDist;
619 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
620 // Unknown static scheduling type.
621 *plower += tid * chunk_inc_count;
622 *pupper = *plower + chunk_inc_count - incr;
623 if (incr > 0) {
624 if (*pupper < *plower)
625 *pupper = traits_t<T>::max_value;
626 if (plastiter != NULL)
627 if (*plastiter != 0 &&
628 !(*plower <= upper && *pupper > upper - incr))
629 *plastiter = 0;
630 if (*pupper > upper)
631 *pupper = upper; // tracker C73258
632 } else {
633 if (*pupper > *plower)
634 *pupper = traits_t<T>::min_value;
635 if (plastiter != NULL)
636 if (*plastiter != 0 &&
637 !(*plower >= upper && *pupper < upper - incr))
638 *plastiter = 0;
639 if (*pupper < upper)
640 *pupper = upper; // tracker C73258
641 }
642 }
643 }
644 break;
645 }
646 case kmp_sch_static_chunked: {
647 ST span;
648 if (chunk < 1)
649 chunk = 1;
650 span = chunk * incr;
651 *pstride = span * nth;
652 *plower = *plower + (span * tid);
653 *pupper = *plower + span - incr;
654 if (plastiter != NULL)
655 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
656 *plastiter = 0;
657 break;
658 }
659 default:
660 KMP_ASSERT2(0,
661 "__kmpc_dist_for_static_init: unknown loop scheduling type");
662 break;
663 }
664 }
665 end:;
666 #ifdef KMP_DEBUG
667 {
668 char *buff;
669 // create format specifiers before the debug output
670 buff = __kmp_str_format(
671 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
672 "stride=%%%s signed?<%s>\n",
673 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
674 traits_t<ST>::spec, traits_t<T>::spec);
675 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
676 __kmp_str_free(&buff);
677 }
678 #endif
679 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
680 KMP_STATS_LOOP_END(OMP_distribute_iterations);
681 return;
682 }
683
684 template <typename T>
__kmp_team_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,T * p_lb,T * p_ub,typename traits_t<T>::signed_t * p_st,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)685 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
686 kmp_int32 *p_last, T *p_lb, T *p_ub,
687 typename traits_t<T>::signed_t *p_st,
688 typename traits_t<T>::signed_t incr,
689 typename traits_t<T>::signed_t chunk) {
690 // The routine returns the first chunk distributed to the team and
691 // stride for next chunks calculation.
692 // Last iteration flag set for the team that will execute
693 // the last iteration of the loop.
694 // The routine is called for dist_schedule(static,chunk) only.
695 typedef typename traits_t<T>::unsigned_t UT;
696 typedef typename traits_t<T>::signed_t ST;
697 kmp_uint32 team_id;
698 kmp_uint32 nteams;
699 UT trip_count;
700 T lower;
701 T upper;
702 ST span;
703 kmp_team_t *team;
704 kmp_info_t *th;
705
706 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
707 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
708 __kmp_assert_valid_gtid(gtid);
709 #ifdef KMP_DEBUG
710 {
711 char *buff;
712 // create format specifiers before the debug output
713 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
714 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
715 traits_t<T>::spec, traits_t<T>::spec,
716 traits_t<ST>::spec, traits_t<ST>::spec,
717 traits_t<T>::spec);
718 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
719 __kmp_str_free(&buff);
720 }
721 #endif
722
723 lower = *p_lb;
724 upper = *p_ub;
725 if (__kmp_env_consistency_check) {
726 if (incr == 0) {
727 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
728 loc);
729 }
730 if (incr > 0 ? (upper < lower) : (lower < upper)) {
731 // The loop is illegal.
732 // Some zero-trip loops maintained by compiler, e.g.:
733 // for(i=10;i<0;++i) // lower >= upper - run-time check
734 // for(i=0;i>10;--i) // lower <= upper - run-time check
735 // for(i=0;i>10;++i) // incr > 0 - compile-time check
736 // for(i=10;i<0;--i) // incr < 0 - compile-time check
737 // Compiler does not check the following illegal loops:
738 // for(i=0;i<10;i+=incr) // where incr<0
739 // for(i=10;i>0;i-=incr) // where incr<0
740 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
741 }
742 }
743 th = __kmp_threads[gtid];
744 team = th->th.th_team;
745 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
746 nteams = th->th.th_teams_size.nteams;
747 team_id = team->t.t_master_tid;
748 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
749
750 // compute trip count
751 if (incr == 1) {
752 trip_count = upper - lower + 1;
753 } else if (incr == -1) {
754 trip_count = lower - upper + 1;
755 } else if (incr > 0) {
756 // upper-lower can exceed the limit of signed type
757 trip_count = (UT)(upper - lower) / incr + 1;
758 } else {
759 trip_count = (UT)(lower - upper) / (-incr) + 1;
760 }
761 if (chunk < 1)
762 chunk = 1;
763 span = chunk * incr;
764 *p_st = span * nteams;
765 *p_lb = lower + (span * team_id);
766 *p_ub = *p_lb + span - incr;
767 if (p_last != NULL)
768 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
769 // Correct upper bound if needed
770 if (incr > 0) {
771 if (*p_ub < *p_lb) // overflow?
772 *p_ub = traits_t<T>::max_value;
773 if (*p_ub > upper)
774 *p_ub = upper; // tracker C73258
775 } else { // incr < 0
776 if (*p_ub > *p_lb)
777 *p_ub = traits_t<T>::min_value;
778 if (*p_ub < upper)
779 *p_ub = upper; // tracker C73258
780 }
781 #ifdef KMP_DEBUG
782 {
783 char *buff;
784 // create format specifiers before the debug output
785 buff =
786 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
787 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
788 traits_t<T>::spec, traits_t<T>::spec,
789 traits_t<ST>::spec, traits_t<ST>::spec);
790 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
791 __kmp_str_free(&buff);
792 }
793 #endif
794 }
795
796 //------------------------------------------------------------------------------
797 extern "C" {
798 /*!
799 @ingroup WORK_SHARING
800 @param loc Source code location
801 @param gtid Global thread id of this thread
802 @param schedtype Scheduling type
803 @param plastiter Pointer to the "last iteration" flag
804 @param plower Pointer to the lower bound
805 @param pupper Pointer to the upper bound
806 @param pstride Pointer to the stride
807 @param incr Loop increment
808 @param chunk The chunk size
809
810 Each of the four functions here are identical apart from the argument types.
811
812 The functions compute the upper and lower bounds and stride to be used for the
813 set of iterations to be executed by the current thread from the statically
814 scheduled loop that is described by the initial values of the bounds, stride,
815 increment and chunk size.
816
817 @{
818 */
__kmpc_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)819 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
820 kmp_int32 *plastiter, kmp_int32 *plower,
821 kmp_int32 *pupper, kmp_int32 *pstride,
822 kmp_int32 incr, kmp_int32 chunk) {
823 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
824 pupper, pstride, incr, chunk
825 #if OMPT_SUPPORT && OMPT_OPTIONAL
826 ,
827 OMPT_GET_RETURN_ADDRESS(0)
828 #endif
829 );
830 }
831
832 /*!
833 See @ref __kmpc_for_static_init_4
834 */
__kmpc_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)835 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
836 kmp_int32 schedtype, kmp_int32 *plastiter,
837 kmp_uint32 *plower, kmp_uint32 *pupper,
838 kmp_int32 *pstride, kmp_int32 incr,
839 kmp_int32 chunk) {
840 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
841 pupper, pstride, incr, chunk
842 #if OMPT_SUPPORT && OMPT_OPTIONAL
843 ,
844 OMPT_GET_RETURN_ADDRESS(0)
845 #endif
846 );
847 }
848
849 /*!
850 See @ref __kmpc_for_static_init_4
851 */
__kmpc_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)852 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
853 kmp_int32 *plastiter, kmp_int64 *plower,
854 kmp_int64 *pupper, kmp_int64 *pstride,
855 kmp_int64 incr, kmp_int64 chunk) {
856 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
857 pupper, pstride, incr, chunk
858 #if OMPT_SUPPORT && OMPT_OPTIONAL
859 ,
860 OMPT_GET_RETURN_ADDRESS(0)
861 #endif
862 );
863 }
864
865 /*!
866 See @ref __kmpc_for_static_init_4
867 */
__kmpc_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)868 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
869 kmp_int32 schedtype, kmp_int32 *plastiter,
870 kmp_uint64 *plower, kmp_uint64 *pupper,
871 kmp_int64 *pstride, kmp_int64 incr,
872 kmp_int64 chunk) {
873 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
874 pupper, pstride, incr, chunk
875 #if OMPT_SUPPORT && OMPT_OPTIONAL
876 ,
877 OMPT_GET_RETURN_ADDRESS(0)
878 #endif
879 );
880 }
881 /*!
882 @}
883 */
884
885 /*!
886 @ingroup WORK_SHARING
887 @param loc Source code location
888 @param gtid Global thread id of this thread
889 @param schedule Scheduling type for the parallel loop
890 @param plastiter Pointer to the "last iteration" flag
891 @param plower Pointer to the lower bound
892 @param pupper Pointer to the upper bound of loop chunk
893 @param pupperD Pointer to the upper bound of dist_chunk
894 @param pstride Pointer to the stride for parallel loop
895 @param incr Loop increment
896 @param chunk The chunk size for the parallel loop
897
898 Each of the four functions here are identical apart from the argument types.
899
900 The functions compute the upper and lower bounds and strides to be used for the
901 set of iterations to be executed by the current thread from the statically
902 scheduled loop that is described by the initial values of the bounds, strides,
903 increment and chunks for parallel loop and distribute constructs.
904
905 @{
906 */
__kmpc_dist_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)907 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
908 kmp_int32 schedule, kmp_int32 *plastiter,
909 kmp_int32 *plower, kmp_int32 *pupper,
910 kmp_int32 *pupperD, kmp_int32 *pstride,
911 kmp_int32 incr, kmp_int32 chunk) {
912 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
913 pupper, pupperD, pstride, incr, chunk);
914 }
915
916 /*!
917 See @ref __kmpc_dist_for_static_init_4
918 */
__kmpc_dist_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_uint32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)919 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
920 kmp_int32 schedule, kmp_int32 *plastiter,
921 kmp_uint32 *plower, kmp_uint32 *pupper,
922 kmp_uint32 *pupperD, kmp_int32 *pstride,
923 kmp_int32 incr, kmp_int32 chunk) {
924 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
925 pupper, pupperD, pstride, incr, chunk);
926 }
927
928 /*!
929 See @ref __kmpc_dist_for_static_init_4
930 */
__kmpc_dist_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)931 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
932 kmp_int32 schedule, kmp_int32 *plastiter,
933 kmp_int64 *plower, kmp_int64 *pupper,
934 kmp_int64 *pupperD, kmp_int64 *pstride,
935 kmp_int64 incr, kmp_int64 chunk) {
936 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
937 pupper, pupperD, pstride, incr, chunk);
938 }
939
940 /*!
941 See @ref __kmpc_dist_for_static_init_4
942 */
__kmpc_dist_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_uint64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)943 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
944 kmp_int32 schedule, kmp_int32 *plastiter,
945 kmp_uint64 *plower, kmp_uint64 *pupper,
946 kmp_uint64 *pupperD, kmp_int64 *pstride,
947 kmp_int64 incr, kmp_int64 chunk) {
948 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
949 pupper, pupperD, pstride, incr, chunk);
950 }
951 /*!
952 @}
953 */
954
955 //------------------------------------------------------------------------------
956 // Auxiliary routines for Distribute Parallel Loop construct implementation
957 // Transfer call to template< type T >
958 // __kmp_team_static_init( ident_t *loc, int gtid,
959 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
960
961 /*!
962 @ingroup WORK_SHARING
963 @{
964 @param loc Source location
965 @param gtid Global thread id
966 @param p_last pointer to last iteration flag
967 @param p_lb pointer to Lower bound
968 @param p_ub pointer to Upper bound
969 @param p_st Step (or increment if you prefer)
970 @param incr Loop increment
971 @param chunk The chunk size to block with
972
973 The functions compute the upper and lower bounds and stride to be used for the
974 set of iterations to be executed by the current team from the statically
975 scheduled loop that is described by the initial values of the bounds, stride,
976 increment and chunk for the distribute construct as part of composite distribute
977 parallel loop construct. These functions are all identical apart from the types
978 of the arguments.
979 */
980
__kmpc_team_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int32 * p_lb,kmp_int32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)981 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
982 kmp_int32 *p_lb, kmp_int32 *p_ub,
983 kmp_int32 *p_st, kmp_int32 incr,
984 kmp_int32 chunk) {
985 KMP_DEBUG_ASSERT(__kmp_init_serial);
986 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
987 chunk);
988 }
989
990 /*!
991 See @ref __kmpc_team_static_init_4
992 */
__kmpc_team_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint32 * p_lb,kmp_uint32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)993 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
994 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
995 kmp_int32 *p_st, kmp_int32 incr,
996 kmp_int32 chunk) {
997 KMP_DEBUG_ASSERT(__kmp_init_serial);
998 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
999 chunk);
1000 }
1001
1002 /*!
1003 See @ref __kmpc_team_static_init_4
1004 */
__kmpc_team_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int64 * p_lb,kmp_int64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)1005 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1006 kmp_int64 *p_lb, kmp_int64 *p_ub,
1007 kmp_int64 *p_st, kmp_int64 incr,
1008 kmp_int64 chunk) {
1009 KMP_DEBUG_ASSERT(__kmp_init_serial);
1010 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1011 chunk);
1012 }
1013
1014 /*!
1015 See @ref __kmpc_team_static_init_4
1016 */
__kmpc_team_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint64 * p_lb,kmp_uint64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)1017 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1018 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1019 kmp_int64 *p_st, kmp_int64 incr,
1020 kmp_int64 chunk) {
1021 KMP_DEBUG_ASSERT(__kmp_init_serial);
1022 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1023 chunk);
1024 }
1025 /*!
1026 @}
1027 */
1028
1029 } // extern "C"
1030