1 /*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 /* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 t = (l - u) / (-i) + 1; \
56 } \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
59 }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63
64 template <typename T>
__kmp_for_static_init(ident_t * loc,kmp_int32 global_tid,kmp_int32 schedtype,kmp_int32 * plastiter,T * plower,T * pupper,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk,void * codeptr)65 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
66 kmp_int32 schedtype, kmp_int32 *plastiter,
67 T *plower, T *pupper,
68 typename traits_t<T>::signed_t *pstride,
69 typename traits_t<T>::signed_t incr,
70 typename traits_t<T>::signed_t chunk
71 #if OMPT_SUPPORT && OMPT_OPTIONAL
72 ,
73 void *codeptr
74 #endif
75 ) {
76 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
77 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
78 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
79
80 typedef typename traits_t<T>::unsigned_t UT;
81 typedef typename traits_t<T>::signed_t ST;
82 /* this all has to be changed back to TID and such.. */
83 kmp_int32 gtid = global_tid;
84 kmp_uint32 tid;
85 kmp_uint32 nth;
86 UT trip_count;
87 kmp_team_t *team;
88 kmp_info_t *th = __kmp_threads[gtid];
89
90 #if OMPT_SUPPORT && OMPT_OPTIONAL
91 ompt_team_info_t *team_info = NULL;
92 ompt_task_info_t *task_info = NULL;
93 ompt_work_t ompt_work_type = ompt_work_loop;
94
95 static kmp_int8 warn = 0;
96
97 if (ompt_enabled.ompt_callback_work) {
98 // Only fully initialize variables needed by OMPT if OMPT is enabled.
99 team_info = __ompt_get_teaminfo(0, NULL);
100 task_info = __ompt_get_task_info_object(0);
101 // Determine workshare type
102 if (loc != NULL) {
103 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
104 ompt_work_type = ompt_work_loop;
105 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
106 ompt_work_type = ompt_work_sections;
107 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
108 ompt_work_type = ompt_work_distribute;
109 } else {
110 kmp_int8 bool_res =
111 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
112 if (bool_res)
113 KMP_WARNING(OmptOutdatedWorkshare);
114 }
115 KMP_DEBUG_ASSERT(ompt_work_type);
116 }
117 }
118 #endif
119
120 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
121 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
122 #ifdef KMP_DEBUG
123 {
124 char *buff;
125 // create format specifiers before the debug output
126 buff = __kmp_str_format(
127 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
128 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
129 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
130 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
131 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
132 *pstride, incr, chunk));
133 __kmp_str_free(&buff);
134 }
135 #endif
136
137 if (__kmp_env_consistency_check) {
138 __kmp_push_workshare(global_tid, ct_pdo, loc);
139 if (incr == 0) {
140 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
141 loc);
142 }
143 }
144 /* special handling for zero-trip loops */
145 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
146 if (plastiter != NULL)
147 *plastiter = FALSE;
148 /* leave pupper and plower set to entire iteration space */
149 *pstride = incr; /* value should never be used */
150 // *plower = *pupper - incr;
151 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
152 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
153 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
154 #ifdef KMP_DEBUG
155 {
156 char *buff;
157 // create format specifiers before the debug output
158 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
159 "lower=%%%s upper=%%%s stride = %%%s "
160 "signed?<%s>, loc = %%s\n",
161 traits_t<T>::spec, traits_t<T>::spec,
162 traits_t<ST>::spec, traits_t<T>::spec);
163 KD_TRACE(100,
164 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
165 __kmp_str_free(&buff);
166 }
167 #endif
168 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
169
170 #if OMPT_SUPPORT && OMPT_OPTIONAL
171 if (ompt_enabled.ompt_callback_work) {
172 ompt_callbacks.ompt_callback(ompt_callback_work)(
173 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
174 &(task_info->task_data), 0, codeptr);
175 }
176 #endif
177 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
178 return;
179 }
180
181 // Although there are schedule enumerations above kmp_ord_upper which are not
182 // schedules for "distribute", the only ones which are useful are dynamic, so
183 // cannot be seen here, since this codepath is only executed for static
184 // schedules.
185 if (schedtype > kmp_ord_upper) {
186 // we are in DISTRIBUTE construct
187 schedtype += kmp_sch_static -
188 kmp_distribute_static; // AC: convert to usual schedule type
189 tid = th->th.th_team->t.t_master_tid;
190 team = th->th.th_team->t.t_parent;
191 } else {
192 tid = __kmp_tid_from_gtid(global_tid);
193 team = th->th.th_team;
194 }
195
196 /* determine if "for" loop is an active worksharing construct */
197 if (team->t.t_serialized) {
198 /* serialized parallel, each thread executes whole iteration space */
199 if (plastiter != NULL)
200 *plastiter = TRUE;
201 /* leave pupper and plower set to entire iteration space */
202 *pstride =
203 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
204
205 #ifdef KMP_DEBUG
206 {
207 char *buff;
208 // create format specifiers before the debug output
209 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
210 "lower=%%%s upper=%%%s stride = %%%s\n",
211 traits_t<T>::spec, traits_t<T>::spec,
212 traits_t<ST>::spec);
213 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
214 __kmp_str_free(&buff);
215 }
216 #endif
217 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
218
219 #if OMPT_SUPPORT && OMPT_OPTIONAL
220 if (ompt_enabled.ompt_callback_work) {
221 ompt_callbacks.ompt_callback(ompt_callback_work)(
222 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
223 &(task_info->task_data), *pstride, codeptr);
224 }
225 #endif
226 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
227 return;
228 }
229 nth = team->t.t_nproc;
230 if (nth == 1) {
231 if (plastiter != NULL)
232 *plastiter = TRUE;
233 *pstride =
234 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
235 #ifdef KMP_DEBUG
236 {
237 char *buff;
238 // create format specifiers before the debug output
239 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
240 "lower=%%%s upper=%%%s stride = %%%s\n",
241 traits_t<T>::spec, traits_t<T>::spec,
242 traits_t<ST>::spec);
243 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
244 __kmp_str_free(&buff);
245 }
246 #endif
247 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
248
249 #if OMPT_SUPPORT && OMPT_OPTIONAL
250 if (ompt_enabled.ompt_callback_work) {
251 ompt_callbacks.ompt_callback(ompt_callback_work)(
252 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
253 &(task_info->task_data), *pstride, codeptr);
254 }
255 #endif
256 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
257 return;
258 }
259
260 /* compute trip count */
261 if (incr == 1) {
262 trip_count = *pupper - *plower + 1;
263 } else if (incr == -1) {
264 trip_count = *plower - *pupper + 1;
265 } else if (incr > 0) {
266 // upper-lower can exceed the limit of signed type
267 trip_count = (UT)(*pupper - *plower) / incr + 1;
268 } else {
269 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
270 }
271
272 #if KMP_STATS_ENABLED
273 if (KMP_MASTER_GTID(gtid)) {
274 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
275 }
276 #endif
277
278 if (__kmp_env_consistency_check) {
279 /* tripcount overflow? */
280 if (trip_count == 0 && *pupper != *plower) {
281 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
282 loc);
283 }
284 }
285
286 /* compute remaining parameters */
287 switch (schedtype) {
288 case kmp_sch_static: {
289 if (trip_count < nth) {
290 KMP_DEBUG_ASSERT(
291 __kmp_static == kmp_sch_static_greedy ||
292 __kmp_static ==
293 kmp_sch_static_balanced); // Unknown static scheduling type.
294 if (tid < trip_count) {
295 *pupper = *plower = *plower + tid * incr;
296 } else {
297 *plower = *pupper + incr;
298 }
299 if (plastiter != NULL)
300 *plastiter = (tid == trip_count - 1);
301 } else {
302 if (__kmp_static == kmp_sch_static_balanced) {
303 UT small_chunk = trip_count / nth;
304 UT extras = trip_count % nth;
305 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
306 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
307 if (plastiter != NULL)
308 *plastiter = (tid == nth - 1);
309 } else {
310 T big_chunk_inc_count =
311 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
312 T old_upper = *pupper;
313
314 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
315 // Unknown static scheduling type.
316
317 *plower += tid * big_chunk_inc_count;
318 *pupper = *plower + big_chunk_inc_count - incr;
319 if (incr > 0) {
320 if (*pupper < *plower)
321 *pupper = traits_t<T>::max_value;
322 if (plastiter != NULL)
323 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
324 if (*pupper > old_upper)
325 *pupper = old_upper; // tracker C73258
326 } else {
327 if (*pupper > *plower)
328 *pupper = traits_t<T>::min_value;
329 if (plastiter != NULL)
330 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
331 if (*pupper < old_upper)
332 *pupper = old_upper; // tracker C73258
333 }
334 }
335 }
336 *pstride = trip_count;
337 break;
338 }
339 case kmp_sch_static_chunked: {
340 ST span;
341 if (chunk < 1) {
342 chunk = 1;
343 }
344 span = chunk * incr;
345 *pstride = span * nth;
346 *plower = *plower + (span * tid);
347 *pupper = *plower + span - incr;
348 if (plastiter != NULL)
349 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
350 break;
351 }
352 case kmp_sch_static_balanced_chunked: {
353 T old_upper = *pupper;
354 // round up to make sure the chunk is enough to cover all iterations
355 UT span = (trip_count + nth - 1) / nth;
356
357 // perform chunk adjustment
358 chunk = (span + chunk - 1) & ~(chunk - 1);
359
360 span = chunk * incr;
361 *plower = *plower + (span * tid);
362 *pupper = *plower + span - incr;
363 if (incr > 0) {
364 if (*pupper > old_upper)
365 *pupper = old_upper;
366 } else if (*pupper < old_upper)
367 *pupper = old_upper;
368
369 if (plastiter != NULL)
370 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
371 break;
372 }
373 default:
374 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
375 break;
376 }
377
378 #if USE_ITT_BUILD
379 // Report loop metadata
380 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
381 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
382 team->t.t_active_level == 1) {
383 kmp_uint64 cur_chunk = chunk;
384 // Calculate chunk in case it was not specified; it is specified for
385 // kmp_sch_static_chunked
386 if (schedtype == kmp_sch_static) {
387 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
388 }
389 // 0 - "static" schedule
390 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
391 }
392 #endif
393 #ifdef KMP_DEBUG
394 {
395 char *buff;
396 // create format specifiers before the debug output
397 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
398 "upper=%%%s stride = %%%s signed?<%s>\n",
399 traits_t<T>::spec, traits_t<T>::spec,
400 traits_t<ST>::spec, traits_t<T>::spec);
401 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
402 __kmp_str_free(&buff);
403 }
404 #endif
405 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
406
407 #if OMPT_SUPPORT && OMPT_OPTIONAL
408 if (ompt_enabled.ompt_callback_work) {
409 ompt_callbacks.ompt_callback(ompt_callback_work)(
410 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
411 &(task_info->task_data), trip_count, codeptr);
412 }
413 #endif
414
415 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
416 return;
417 }
418
419 template <typename T>
__kmp_dist_for_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,T * plower,T * pupper,T * pupperDist,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)420 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
421 kmp_int32 schedule, kmp_int32 *plastiter,
422 T *plower, T *pupper, T *pupperDist,
423 typename traits_t<T>::signed_t *pstride,
424 typename traits_t<T>::signed_t incr,
425 typename traits_t<T>::signed_t chunk) {
426 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
427 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
428 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
429 typedef typename traits_t<T>::unsigned_t UT;
430 typedef typename traits_t<T>::signed_t ST;
431 kmp_uint32 tid;
432 kmp_uint32 nth;
433 kmp_uint32 team_id;
434 kmp_uint32 nteams;
435 UT trip_count;
436 kmp_team_t *team;
437 kmp_info_t *th;
438
439 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
440 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
441 #ifdef KMP_DEBUG
442 {
443 char *buff;
444 // create format specifiers before the debug output
445 buff = __kmp_str_format(
446 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
447 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
448 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
449 traits_t<ST>::spec, traits_t<T>::spec);
450 KD_TRACE(100,
451 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
452 __kmp_str_free(&buff);
453 }
454 #endif
455
456 if (__kmp_env_consistency_check) {
457 __kmp_push_workshare(gtid, ct_pdo, loc);
458 if (incr == 0) {
459 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
460 loc);
461 }
462 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
463 // The loop is illegal.
464 // Some zero-trip loops maintained by compiler, e.g.:
465 // for(i=10;i<0;++i) // lower >= upper - run-time check
466 // for(i=0;i>10;--i) // lower <= upper - run-time check
467 // for(i=0;i>10;++i) // incr > 0 - compile-time check
468 // for(i=10;i<0;--i) // incr < 0 - compile-time check
469 // Compiler does not check the following illegal loops:
470 // for(i=0;i<10;i+=incr) // where incr<0
471 // for(i=10;i>0;i-=incr) // where incr<0
472 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
473 }
474 }
475 tid = __kmp_tid_from_gtid(gtid);
476 th = __kmp_threads[gtid];
477 nth = th->th.th_team_nproc;
478 team = th->th.th_team;
479 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
480 nteams = th->th.th_teams_size.nteams;
481 team_id = team->t.t_master_tid;
482 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
483
484 // compute global trip count
485 if (incr == 1) {
486 trip_count = *pupper - *plower + 1;
487 } else if (incr == -1) {
488 trip_count = *plower - *pupper + 1;
489 } else if (incr > 0) {
490 // upper-lower can exceed the limit of signed type
491 trip_count = (UT)(*pupper - *plower) / incr + 1;
492 } else {
493 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
494 }
495
496 *pstride = *pupper - *plower; // just in case (can be unused)
497 if (trip_count <= nteams) {
498 KMP_DEBUG_ASSERT(
499 __kmp_static == kmp_sch_static_greedy ||
500 __kmp_static ==
501 kmp_sch_static_balanced); // Unknown static scheduling type.
502 // only masters of some teams get single iteration, other threads get
503 // nothing
504 if (team_id < trip_count && tid == 0) {
505 *pupper = *pupperDist = *plower = *plower + team_id * incr;
506 } else {
507 *pupperDist = *pupper;
508 *plower = *pupper + incr; // compiler should skip loop body
509 }
510 if (plastiter != NULL)
511 *plastiter = (tid == 0 && team_id == trip_count - 1);
512 } else {
513 // Get the team's chunk first (each team gets at most one chunk)
514 if (__kmp_static == kmp_sch_static_balanced) {
515 UT chunkD = trip_count / nteams;
516 UT extras = trip_count % nteams;
517 *plower +=
518 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
519 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
520 if (plastiter != NULL)
521 *plastiter = (team_id == nteams - 1);
522 } else {
523 T chunk_inc_count =
524 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
525 T upper = *pupper;
526 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
527 // Unknown static scheduling type.
528 *plower += team_id * chunk_inc_count;
529 *pupperDist = *plower + chunk_inc_count - incr;
530 // Check/correct bounds if needed
531 if (incr > 0) {
532 if (*pupperDist < *plower)
533 *pupperDist = traits_t<T>::max_value;
534 if (plastiter != NULL)
535 *plastiter = *plower <= upper && *pupperDist > upper - incr;
536 if (*pupperDist > upper)
537 *pupperDist = upper; // tracker C73258
538 if (*plower > *pupperDist) {
539 *pupper = *pupperDist; // no iterations available for the team
540 goto end;
541 }
542 } else {
543 if (*pupperDist > *plower)
544 *pupperDist = traits_t<T>::min_value;
545 if (plastiter != NULL)
546 *plastiter = *plower >= upper && *pupperDist < upper - incr;
547 if (*pupperDist < upper)
548 *pupperDist = upper; // tracker C73258
549 if (*plower < *pupperDist) {
550 *pupper = *pupperDist; // no iterations available for the team
551 goto end;
552 }
553 }
554 }
555 // Get the parallel loop chunk now (for thread)
556 // compute trip count for team's chunk
557 if (incr == 1) {
558 trip_count = *pupperDist - *plower + 1;
559 } else if (incr == -1) {
560 trip_count = *plower - *pupperDist + 1;
561 } else if (incr > 1) {
562 // upper-lower can exceed the limit of signed type
563 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
564 } else {
565 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
566 }
567 KMP_DEBUG_ASSERT(trip_count);
568 switch (schedule) {
569 case kmp_sch_static: {
570 if (trip_count <= nth) {
571 KMP_DEBUG_ASSERT(
572 __kmp_static == kmp_sch_static_greedy ||
573 __kmp_static ==
574 kmp_sch_static_balanced); // Unknown static scheduling type.
575 if (tid < trip_count)
576 *pupper = *plower = *plower + tid * incr;
577 else
578 *plower = *pupper + incr; // no iterations available
579 if (plastiter != NULL)
580 if (*plastiter != 0 && !(tid == trip_count - 1))
581 *plastiter = 0;
582 } else {
583 if (__kmp_static == kmp_sch_static_balanced) {
584 UT chunkL = trip_count / nth;
585 UT extras = trip_count % nth;
586 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
587 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
588 if (plastiter != NULL)
589 if (*plastiter != 0 && !(tid == nth - 1))
590 *plastiter = 0;
591 } else {
592 T chunk_inc_count =
593 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
594 T upper = *pupperDist;
595 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
596 // Unknown static scheduling type.
597 *plower += tid * chunk_inc_count;
598 *pupper = *plower + chunk_inc_count - incr;
599 if (incr > 0) {
600 if (*pupper < *plower)
601 *pupper = traits_t<T>::max_value;
602 if (plastiter != NULL)
603 if (*plastiter != 0 &&
604 !(*plower <= upper && *pupper > upper - incr))
605 *plastiter = 0;
606 if (*pupper > upper)
607 *pupper = upper; // tracker C73258
608 } else {
609 if (*pupper > *plower)
610 *pupper = traits_t<T>::min_value;
611 if (plastiter != NULL)
612 if (*plastiter != 0 &&
613 !(*plower >= upper && *pupper < upper - incr))
614 *plastiter = 0;
615 if (*pupper < upper)
616 *pupper = upper; // tracker C73258
617 }
618 }
619 }
620 break;
621 }
622 case kmp_sch_static_chunked: {
623 ST span;
624 if (chunk < 1)
625 chunk = 1;
626 span = chunk * incr;
627 *pstride = span * nth;
628 *plower = *plower + (span * tid);
629 *pupper = *plower + span - incr;
630 if (plastiter != NULL)
631 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
632 *plastiter = 0;
633 break;
634 }
635 default:
636 KMP_ASSERT2(0,
637 "__kmpc_dist_for_static_init: unknown loop scheduling type");
638 break;
639 }
640 }
641 end:;
642 #ifdef KMP_DEBUG
643 {
644 char *buff;
645 // create format specifiers before the debug output
646 buff = __kmp_str_format(
647 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
648 "stride=%%%s signed?<%s>\n",
649 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
650 traits_t<ST>::spec, traits_t<T>::spec);
651 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
652 __kmp_str_free(&buff);
653 }
654 #endif
655 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
656 KMP_STATS_LOOP_END(OMP_distribute_iterations);
657 return;
658 }
659
660 template <typename T>
__kmp_team_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,T * p_lb,T * p_ub,typename traits_t<T>::signed_t * p_st,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)661 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
662 kmp_int32 *p_last, T *p_lb, T *p_ub,
663 typename traits_t<T>::signed_t *p_st,
664 typename traits_t<T>::signed_t incr,
665 typename traits_t<T>::signed_t chunk) {
666 // The routine returns the first chunk distributed to the team and
667 // stride for next chunks calculation.
668 // Last iteration flag set for the team that will execute
669 // the last iteration of the loop.
670 // The routine is called for dist_schedue(static,chunk) only.
671 typedef typename traits_t<T>::unsigned_t UT;
672 typedef typename traits_t<T>::signed_t ST;
673 kmp_uint32 team_id;
674 kmp_uint32 nteams;
675 UT trip_count;
676 T lower;
677 T upper;
678 ST span;
679 kmp_team_t *team;
680 kmp_info_t *th;
681
682 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
683 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
684 #ifdef KMP_DEBUG
685 {
686 char *buff;
687 // create format specifiers before the debug output
688 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
689 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
690 traits_t<T>::spec, traits_t<T>::spec,
691 traits_t<ST>::spec, traits_t<ST>::spec,
692 traits_t<T>::spec);
693 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
694 __kmp_str_free(&buff);
695 }
696 #endif
697
698 lower = *p_lb;
699 upper = *p_ub;
700 if (__kmp_env_consistency_check) {
701 if (incr == 0) {
702 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
703 loc);
704 }
705 if (incr > 0 ? (upper < lower) : (lower < upper)) {
706 // The loop is illegal.
707 // Some zero-trip loops maintained by compiler, e.g.:
708 // for(i=10;i<0;++i) // lower >= upper - run-time check
709 // for(i=0;i>10;--i) // lower <= upper - run-time check
710 // for(i=0;i>10;++i) // incr > 0 - compile-time check
711 // for(i=10;i<0;--i) // incr < 0 - compile-time check
712 // Compiler does not check the following illegal loops:
713 // for(i=0;i<10;i+=incr) // where incr<0
714 // for(i=10;i>0;i-=incr) // where incr<0
715 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
716 }
717 }
718 th = __kmp_threads[gtid];
719 team = th->th.th_team;
720 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
721 nteams = th->th.th_teams_size.nteams;
722 team_id = team->t.t_master_tid;
723 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
724
725 // compute trip count
726 if (incr == 1) {
727 trip_count = upper - lower + 1;
728 } else if (incr == -1) {
729 trip_count = lower - upper + 1;
730 } else if (incr > 0) {
731 // upper-lower can exceed the limit of signed type
732 trip_count = (UT)(upper - lower) / incr + 1;
733 } else {
734 trip_count = (UT)(lower - upper) / (-incr) + 1;
735 }
736 if (chunk < 1)
737 chunk = 1;
738 span = chunk * incr;
739 *p_st = span * nteams;
740 *p_lb = lower + (span * team_id);
741 *p_ub = *p_lb + span - incr;
742 if (p_last != NULL)
743 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
744 // Correct upper bound if needed
745 if (incr > 0) {
746 if (*p_ub < *p_lb) // overflow?
747 *p_ub = traits_t<T>::max_value;
748 if (*p_ub > upper)
749 *p_ub = upper; // tracker C73258
750 } else { // incr < 0
751 if (*p_ub > *p_lb)
752 *p_ub = traits_t<T>::min_value;
753 if (*p_ub < upper)
754 *p_ub = upper; // tracker C73258
755 }
756 #ifdef KMP_DEBUG
757 {
758 char *buff;
759 // create format specifiers before the debug output
760 buff =
761 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
762 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
763 traits_t<T>::spec, traits_t<T>::spec,
764 traits_t<ST>::spec, traits_t<ST>::spec);
765 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
766 __kmp_str_free(&buff);
767 }
768 #endif
769 }
770
771 //------------------------------------------------------------------------------
772 extern "C" {
773 /*!
774 @ingroup WORK_SHARING
775 @param loc Source code location
776 @param gtid Global thread id of this thread
777 @param schedtype Scheduling type
778 @param plastiter Pointer to the "last iteration" flag
779 @param plower Pointer to the lower bound
780 @param pupper Pointer to the upper bound
781 @param pstride Pointer to the stride
782 @param incr Loop increment
783 @param chunk The chunk size
784
785 Each of the four functions here are identical apart from the argument types.
786
787 The functions compute the upper and lower bounds and stride to be used for the
788 set of iterations to be executed by the current thread from the statically
789 scheduled loop that is described by the initial values of the bounds, stride,
790 increment and chunk size.
791
792 @{
793 */
__kmpc_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)794 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
795 kmp_int32 *plastiter, kmp_int32 *plower,
796 kmp_int32 *pupper, kmp_int32 *pstride,
797 kmp_int32 incr, kmp_int32 chunk) {
798 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
799 pupper, pstride, incr, chunk
800 #if OMPT_SUPPORT && OMPT_OPTIONAL
801 ,
802 OMPT_GET_RETURN_ADDRESS(0)
803 #endif
804 );
805 }
806
807 /*!
808 See @ref __kmpc_for_static_init_4
809 */
__kmpc_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)810 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
811 kmp_int32 schedtype, kmp_int32 *plastiter,
812 kmp_uint32 *plower, kmp_uint32 *pupper,
813 kmp_int32 *pstride, kmp_int32 incr,
814 kmp_int32 chunk) {
815 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
816 pupper, pstride, incr, chunk
817 #if OMPT_SUPPORT && OMPT_OPTIONAL
818 ,
819 OMPT_GET_RETURN_ADDRESS(0)
820 #endif
821 );
822 }
823
824 /*!
825 See @ref __kmpc_for_static_init_4
826 */
__kmpc_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)827 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
828 kmp_int32 *plastiter, kmp_int64 *plower,
829 kmp_int64 *pupper, kmp_int64 *pstride,
830 kmp_int64 incr, kmp_int64 chunk) {
831 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
832 pupper, pstride, incr, chunk
833 #if OMPT_SUPPORT && OMPT_OPTIONAL
834 ,
835 OMPT_GET_RETURN_ADDRESS(0)
836 #endif
837 );
838 }
839
840 /*!
841 See @ref __kmpc_for_static_init_4
842 */
__kmpc_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)843 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
844 kmp_int32 schedtype, kmp_int32 *plastiter,
845 kmp_uint64 *plower, kmp_uint64 *pupper,
846 kmp_int64 *pstride, kmp_int64 incr,
847 kmp_int64 chunk) {
848 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
849 pupper, pstride, incr, chunk
850 #if OMPT_SUPPORT && OMPT_OPTIONAL
851 ,
852 OMPT_GET_RETURN_ADDRESS(0)
853 #endif
854 );
855 }
856 /*!
857 @}
858 */
859
860 /*!
861 @ingroup WORK_SHARING
862 @param loc Source code location
863 @param gtid Global thread id of this thread
864 @param schedule Scheduling type for the parallel loop
865 @param plastiter Pointer to the "last iteration" flag
866 @param plower Pointer to the lower bound
867 @param pupper Pointer to the upper bound of loop chunk
868 @param pupperD Pointer to the upper bound of dist_chunk
869 @param pstride Pointer to the stride for parallel loop
870 @param incr Loop increment
871 @param chunk The chunk size for the parallel loop
872
873 Each of the four functions here are identical apart from the argument types.
874
875 The functions compute the upper and lower bounds and strides to be used for the
876 set of iterations to be executed by the current thread from the statically
877 scheduled loop that is described by the initial values of the bounds, strides,
878 increment and chunks for parallel loop and distribute constructs.
879
880 @{
881 */
__kmpc_dist_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)882 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
883 kmp_int32 schedule, kmp_int32 *plastiter,
884 kmp_int32 *plower, kmp_int32 *pupper,
885 kmp_int32 *pupperD, kmp_int32 *pstride,
886 kmp_int32 incr, kmp_int32 chunk) {
887 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
888 pupper, pupperD, pstride, incr, chunk);
889 }
890
891 /*!
892 See @ref __kmpc_dist_for_static_init_4
893 */
__kmpc_dist_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_uint32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)894 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
895 kmp_int32 schedule, kmp_int32 *plastiter,
896 kmp_uint32 *plower, kmp_uint32 *pupper,
897 kmp_uint32 *pupperD, kmp_int32 *pstride,
898 kmp_int32 incr, kmp_int32 chunk) {
899 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
900 pupper, pupperD, pstride, incr, chunk);
901 }
902
903 /*!
904 See @ref __kmpc_dist_for_static_init_4
905 */
__kmpc_dist_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)906 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
907 kmp_int32 schedule, kmp_int32 *plastiter,
908 kmp_int64 *plower, kmp_int64 *pupper,
909 kmp_int64 *pupperD, kmp_int64 *pstride,
910 kmp_int64 incr, kmp_int64 chunk) {
911 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
912 pupper, pupperD, pstride, incr, chunk);
913 }
914
915 /*!
916 See @ref __kmpc_dist_for_static_init_4
917 */
__kmpc_dist_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_uint64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)918 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
919 kmp_int32 schedule, kmp_int32 *plastiter,
920 kmp_uint64 *plower, kmp_uint64 *pupper,
921 kmp_uint64 *pupperD, kmp_int64 *pstride,
922 kmp_int64 incr, kmp_int64 chunk) {
923 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
924 pupper, pupperD, pstride, incr, chunk);
925 }
926 /*!
927 @}
928 */
929
930 //------------------------------------------------------------------------------
931 // Auxiliary routines for Distribute Parallel Loop construct implementation
932 // Transfer call to template< type T >
933 // __kmp_team_static_init( ident_t *loc, int gtid,
934 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
935
936 /*!
937 @ingroup WORK_SHARING
938 @{
939 @param loc Source location
940 @param gtid Global thread id
941 @param p_last pointer to last iteration flag
942 @param p_lb pointer to Lower bound
943 @param p_ub pointer to Upper bound
944 @param p_st Step (or increment if you prefer)
945 @param incr Loop increment
946 @param chunk The chunk size to block with
947
948 The functions compute the upper and lower bounds and stride to be used for the
949 set of iterations to be executed by the current team from the statically
950 scheduled loop that is described by the initial values of the bounds, stride,
951 increment and chunk for the distribute construct as part of composite distribute
952 parallel loop construct. These functions are all identical apart from the types
953 of the arguments.
954 */
955
__kmpc_team_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int32 * p_lb,kmp_int32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)956 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
957 kmp_int32 *p_lb, kmp_int32 *p_ub,
958 kmp_int32 *p_st, kmp_int32 incr,
959 kmp_int32 chunk) {
960 KMP_DEBUG_ASSERT(__kmp_init_serial);
961 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
962 chunk);
963 }
964
965 /*!
966 See @ref __kmpc_team_static_init_4
967 */
__kmpc_team_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint32 * p_lb,kmp_uint32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)968 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
969 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
970 kmp_int32 *p_st, kmp_int32 incr,
971 kmp_int32 chunk) {
972 KMP_DEBUG_ASSERT(__kmp_init_serial);
973 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
974 chunk);
975 }
976
977 /*!
978 See @ref __kmpc_team_static_init_4
979 */
__kmpc_team_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int64 * p_lb,kmp_int64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)980 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
981 kmp_int64 *p_lb, kmp_int64 *p_ub,
982 kmp_int64 *p_st, kmp_int64 incr,
983 kmp_int64 chunk) {
984 KMP_DEBUG_ASSERT(__kmp_init_serial);
985 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
986 chunk);
987 }
988
989 /*!
990 See @ref __kmpc_team_static_init_4
991 */
__kmpc_team_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint64 * p_lb,kmp_uint64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)992 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
993 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
994 kmp_int64 *p_st, kmp_int64 incr,
995 kmp_int64 chunk) {
996 KMP_DEBUG_ASSERT(__kmp_init_serial);
997 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
998 chunk);
999 }
1000 /*!
1001 @}
1002 */
1003
1004 } // extern "C"
1005