1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16         it may change values between parallel regions.  __kmp_max_nth
17         is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat)                                               \
43   {                                                                            \
44     kmp_int64 t;                                                               \
45     kmp_int64 u = (kmp_int64)(*pupper);                                        \
46     kmp_int64 l = (kmp_int64)(*plower);                                        \
47     kmp_int64 i = (kmp_int64)incr;                                             \
48     if (i == 1) {                                                              \
49       t = u - l + 1;                                                           \
50     } else if (i == -1) {                                                      \
51       t = l - u + 1;                                                           \
52     } else if (i > 0) {                                                        \
53       t = (u - l) / i + 1;                                                     \
54     } else {                                                                   \
55       t = (l - u) / (-i) + 1;                                                  \
56     }                                                                          \
57     KMP_COUNT_VALUE(stat, t);                                                  \
58     KMP_POP_PARTITIONED_TIMER();                                               \
59   }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65 static inline void check_loc(ident_t *&loc) {
66   if (loc == NULL)
67     loc = &loc_stub; // may need to report location info to ittnotify
68 }
69 
70 template <typename T>
71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72                                   kmp_int32 schedtype, kmp_int32 *plastiter,
73                                   T *plower, T *pupper,
74                                   typename traits_t<T>::signed_t *pstride,
75                                   typename traits_t<T>::signed_t incr,
76                                   typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78                                   ,
79                                   void *codeptr
80 #endif
81 ) {
82   KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85 
86   typedef typename traits_t<T>::unsigned_t UT;
87   typedef typename traits_t<T>::signed_t ST;
88   /*  this all has to be changed back to TID and such.. */
89   kmp_int32 gtid = global_tid;
90   kmp_uint32 tid;
91   kmp_uint32 nth;
92   UT trip_count;
93   kmp_team_t *team;
94   __kmp_assert_valid_gtid(gtid);
95   kmp_info_t *th = __kmp_threads[gtid];
96 
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98   ompt_team_info_t *team_info = NULL;
99   ompt_task_info_t *task_info = NULL;
100   ompt_work_t ompt_work_type = ompt_work_loop;
101 
102   static kmp_int8 warn = 0;
103 
104   if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
105     // Only fully initialize variables needed by OMPT if OMPT is enabled.
106     team_info = __ompt_get_teaminfo(0, NULL);
107     task_info = __ompt_get_task_info_object(0);
108     // Determine workshare type
109     if (loc != NULL) {
110       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111         ompt_work_type = ompt_work_loop;
112       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113         ompt_work_type = ompt_work_sections;
114       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115         ompt_work_type = ompt_work_distribute;
116       } else {
117         kmp_int8 bool_res =
118             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119         if (bool_res)
120           KMP_WARNING(OmptOutdatedWorkshare);
121       }
122       KMP_DEBUG_ASSERT(ompt_work_type);
123     }
124   }
125 #endif
126 
127   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130   {
131     char *buff;
132     // create format specifiers before the debug output
133     buff = __kmp_str_format(
134         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139                    *pstride, incr, chunk));
140     __kmp_str_free(&buff);
141   }
142 #endif
143 
144   if (__kmp_env_consistency_check) {
145     __kmp_push_workshare(global_tid, ct_pdo, loc);
146     if (incr == 0) {
147       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148                             loc);
149     }
150   }
151   /* special handling for zero-trip loops */
152   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153     if (plastiter != NULL)
154       *plastiter = FALSE;
155     /* leave pupper and plower set to entire iteration space */
156     *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162     {
163       char *buff;
164       // create format specifiers before the debug output
165       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166                               "lower=%%%s upper=%%%s stride = %%%s "
167                               "signed?<%s>, loc = %%s\n",
168                               traits_t<T>::spec, traits_t<T>::spec,
169                               traits_t<ST>::spec, traits_t<T>::spec);
170       check_loc(loc);
171       KD_TRACE(100,
172                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
173       __kmp_str_free(&buff);
174     }
175 #endif
176     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
177 
178 #if OMPT_SUPPORT && OMPT_OPTIONAL
179     if (ompt_enabled.ompt_callback_work) {
180       ompt_callbacks.ompt_callback(ompt_callback_work)(
181           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
182           &(task_info->task_data), 0, codeptr);
183     }
184 #endif
185     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
186     return;
187   }
188 
189   // Although there are schedule enumerations above kmp_ord_upper which are not
190   // schedules for "distribute", the only ones which are useful are dynamic, so
191   // cannot be seen here, since this codepath is only executed for static
192   // schedules.
193   if (schedtype > kmp_ord_upper) {
194     // we are in DISTRIBUTE construct
195     schedtype += kmp_sch_static -
196                  kmp_distribute_static; // AC: convert to usual schedule type
197     if (th->th.th_team->t.t_serialized > 1) {
198       tid = 0;
199       team = th->th.th_team;
200     } else {
201       tid = th->th.th_team->t.t_master_tid;
202       team = th->th.th_team->t.t_parent;
203     }
204   } else {
205     tid = __kmp_tid_from_gtid(global_tid);
206     team = th->th.th_team;
207   }
208 
209   /* determine if "for" loop is an active worksharing construct */
210   if (team->t.t_serialized) {
211     /* serialized parallel, each thread executes whole iteration space */
212     if (plastiter != NULL)
213       *plastiter = TRUE;
214     /* leave pupper and plower set to entire iteration space */
215     *pstride =
216         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
217 
218 #ifdef KMP_DEBUG
219     {
220       char *buff;
221       // create format specifiers before the debug output
222       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
223                               "lower=%%%s upper=%%%s stride = %%%s\n",
224                               traits_t<T>::spec, traits_t<T>::spec,
225                               traits_t<ST>::spec);
226       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
227       __kmp_str_free(&buff);
228     }
229 #endif
230     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
231 
232 #if OMPT_SUPPORT && OMPT_OPTIONAL
233     if (ompt_enabled.ompt_callback_work) {
234       ompt_callbacks.ompt_callback(ompt_callback_work)(
235           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
236           &(task_info->task_data), *pstride, codeptr);
237     }
238 #endif
239     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
240     return;
241   }
242   nth = team->t.t_nproc;
243   if (nth == 1) {
244     if (plastiter != NULL)
245       *plastiter = TRUE;
246     *pstride =
247         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
248 #ifdef KMP_DEBUG
249     {
250       char *buff;
251       // create format specifiers before the debug output
252       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
253                               "lower=%%%s upper=%%%s stride = %%%s\n",
254                               traits_t<T>::spec, traits_t<T>::spec,
255                               traits_t<ST>::spec);
256       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
257       __kmp_str_free(&buff);
258     }
259 #endif
260     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
261 
262 #if OMPT_SUPPORT && OMPT_OPTIONAL
263     if (ompt_enabled.ompt_callback_work) {
264       ompt_callbacks.ompt_callback(ompt_callback_work)(
265           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
266           &(task_info->task_data), *pstride, codeptr);
267     }
268 #endif
269     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
270     return;
271   }
272 
273   /* compute trip count */
274   if (incr == 1) {
275     trip_count = *pupper - *plower + 1;
276   } else if (incr == -1) {
277     trip_count = *plower - *pupper + 1;
278   } else if (incr > 0) {
279     // upper-lower can exceed the limit of signed type
280     trip_count = (UT)(*pupper - *plower) / incr + 1;
281   } else {
282     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
283   }
284 
285 #if KMP_STATS_ENABLED
286   if (KMP_MASTER_GTID(gtid)) {
287     KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
288   }
289 #endif
290 
291   if (__kmp_env_consistency_check) {
292     /* tripcount overflow? */
293     if (trip_count == 0 && *pupper != *plower) {
294       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
295                             loc);
296     }
297   }
298 
299   /* compute remaining parameters */
300   switch (schedtype) {
301   case kmp_sch_static: {
302     if (trip_count < nth) {
303       KMP_DEBUG_ASSERT(
304           __kmp_static == kmp_sch_static_greedy ||
305           __kmp_static ==
306               kmp_sch_static_balanced); // Unknown static scheduling type.
307       if (tid < trip_count) {
308         *pupper = *plower = *plower + tid * incr;
309       } else {
310         // set bounds so non-active threads execute no iterations
311         *plower = *pupper + (incr > 0 ? 1 : -1);
312       }
313       if (plastiter != NULL)
314         *plastiter = (tid == trip_count - 1);
315     } else {
316       if (__kmp_static == kmp_sch_static_balanced) {
317         UT small_chunk = trip_count / nth;
318         UT extras = trip_count % nth;
319         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
320         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
321         if (plastiter != NULL)
322           *plastiter = (tid == nth - 1);
323       } else {
324         T big_chunk_inc_count =
325             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
326         T old_upper = *pupper;
327 
328         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
329         // Unknown static scheduling type.
330 
331         *plower += tid * big_chunk_inc_count;
332         *pupper = *plower + big_chunk_inc_count - incr;
333         if (incr > 0) {
334           if (*pupper < *plower)
335             *pupper = traits_t<T>::max_value;
336           if (plastiter != NULL)
337             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
338           if (*pupper > old_upper)
339             *pupper = old_upper; // tracker C73258
340         } else {
341           if (*pupper > *plower)
342             *pupper = traits_t<T>::min_value;
343           if (plastiter != NULL)
344             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
345           if (*pupper < old_upper)
346             *pupper = old_upper; // tracker C73258
347         }
348       }
349     }
350     *pstride = trip_count;
351     break;
352   }
353   case kmp_sch_static_chunked: {
354     ST span;
355     UT nchunks;
356     if (chunk < 1)
357       chunk = 1;
358     else if ((UT)chunk > trip_count)
359       chunk = trip_count;
360     nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
361     span = chunk * incr;
362     if (nchunks < nth) {
363       *pstride = span * nchunks;
364       if (tid < nchunks) {
365         *plower = *plower + (span * tid);
366         *pupper = *plower + span - incr;
367       } else {
368         *plower = *pupper + (incr > 0 ? 1 : -1);
369       }
370     } else {
371       *pstride = span * nth;
372       *plower = *plower + (span * tid);
373       *pupper = *plower + span - incr;
374     }
375     if (plastiter != NULL)
376       *plastiter = (tid == (nchunks - 1) % nth);
377     break;
378   }
379   case kmp_sch_static_balanced_chunked: {
380     T old_upper = *pupper;
381     // round up to make sure the chunk is enough to cover all iterations
382     UT span = (trip_count + nth - 1) / nth;
383 
384     // perform chunk adjustment
385     chunk = (span + chunk - 1) & ~(chunk - 1);
386 
387     span = chunk * incr;
388     *plower = *plower + (span * tid);
389     *pupper = *plower + span - incr;
390     if (incr > 0) {
391       if (*pupper > old_upper)
392         *pupper = old_upper;
393     } else if (*pupper < old_upper)
394       *pupper = old_upper;
395 
396     if (plastiter != NULL)
397       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
398     break;
399   }
400   default:
401     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
402     break;
403   }
404 
405 #if USE_ITT_BUILD
406   // Report loop metadata
407   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
408       __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
409       team->t.t_active_level == 1) {
410     kmp_uint64 cur_chunk = chunk;
411     check_loc(loc);
412     // Calculate chunk in case it was not specified; it is specified for
413     // kmp_sch_static_chunked
414     if (schedtype == kmp_sch_static) {
415       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
416     }
417     // 0 - "static" schedule
418     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
419   }
420 #endif
421 #ifdef KMP_DEBUG
422   {
423     char *buff;
424     // create format specifiers before the debug output
425     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
426                             "upper=%%%s stride = %%%s signed?<%s>\n",
427                             traits_t<T>::spec, traits_t<T>::spec,
428                             traits_t<ST>::spec, traits_t<T>::spec);
429     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
430     __kmp_str_free(&buff);
431   }
432 #endif
433   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
434 
435 #if OMPT_SUPPORT && OMPT_OPTIONAL
436   if (ompt_enabled.ompt_callback_work) {
437     ompt_callbacks.ompt_callback(ompt_callback_work)(
438         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
439         &(task_info->task_data), trip_count, codeptr);
440   }
441   if (ompt_enabled.ompt_callback_dispatch) {
442     ompt_dispatch_t dispatch_type;
443     ompt_data_t instance = ompt_data_none;
444     ompt_dispatch_chunk_t dispatch_chunk;
445     if (ompt_work_type == ompt_work_sections) {
446       dispatch_type = ompt_dispatch_section;
447       instance.ptr = codeptr;
448     } else {
449       OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
450       dispatch_type = (ompt_work_type == ompt_work_distribute)
451                           ? ompt_dispatch_distribute_chunk
452                           : ompt_dispatch_ws_loop_chunk;
453       instance.ptr = &dispatch_chunk;
454     }
455     ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
456         &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
457         instance);
458   }
459 #endif
460 
461   KMP_STATS_LOOP_END(OMP_loop_static_iterations);
462   return;
463 }
464 
465 template <typename T>
466 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
467                                        kmp_int32 schedule, kmp_int32 *plastiter,
468                                        T *plower, T *pupper, T *pupperDist,
469                                        typename traits_t<T>::signed_t *pstride,
470                                        typename traits_t<T>::signed_t incr,
471                                        typename traits_t<T>::signed_t chunk
472 #if OMPT_SUPPORT && OMPT_OPTIONAL
473                                        ,
474                                        void *codeptr
475 #endif
476 ) {
477   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
478   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
479   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
480   typedef typename traits_t<T>::unsigned_t UT;
481   typedef typename traits_t<T>::signed_t ST;
482   kmp_uint32 tid;
483   kmp_uint32 nth;
484   kmp_uint32 team_id;
485   kmp_uint32 nteams;
486   UT trip_count;
487   kmp_team_t *team;
488   kmp_info_t *th;
489 
490   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
491   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
492   __kmp_assert_valid_gtid(gtid);
493 #ifdef KMP_DEBUG
494   {
495     char *buff;
496     // create format specifiers before the debug output
497     buff = __kmp_str_format(
498         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
499         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
500         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
501         traits_t<ST>::spec, traits_t<T>::spec);
502     KD_TRACE(100,
503              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
504     __kmp_str_free(&buff);
505   }
506 #endif
507 
508   if (__kmp_env_consistency_check) {
509     __kmp_push_workshare(gtid, ct_pdo, loc);
510     if (incr == 0) {
511       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
512                             loc);
513     }
514     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
515       // The loop is illegal.
516       // Some zero-trip loops maintained by compiler, e.g.:
517       //   for(i=10;i<0;++i) // lower >= upper - run-time check
518       //   for(i=0;i>10;--i) // lower <= upper - run-time check
519       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
520       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
521       // Compiler does not check the following illegal loops:
522       //   for(i=0;i<10;i+=incr) // where incr<0
523       //   for(i=10;i>0;i-=incr) // where incr<0
524       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
525     }
526   }
527   tid = __kmp_tid_from_gtid(gtid);
528   th = __kmp_threads[gtid];
529   nth = th->th.th_team_nproc;
530   team = th->th.th_team;
531   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
532   nteams = th->th.th_teams_size.nteams;
533   team_id = team->t.t_master_tid;
534   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
535 
536   // compute global trip count
537   if (incr == 1) {
538     trip_count = *pupper - *plower + 1;
539   } else if (incr == -1) {
540     trip_count = *plower - *pupper + 1;
541   } else if (incr > 0) {
542     // upper-lower can exceed the limit of signed type
543     trip_count = (UT)(*pupper - *plower) / incr + 1;
544   } else {
545     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
546   }
547 
548   *pstride = *pupper - *plower; // just in case (can be unused)
549   if (trip_count <= nteams) {
550     KMP_DEBUG_ASSERT(
551         __kmp_static == kmp_sch_static_greedy ||
552         __kmp_static ==
553             kmp_sch_static_balanced); // Unknown static scheduling type.
554     // only primary threads of some teams get single iteration, other threads
555     // get nothing
556     if (team_id < trip_count && tid == 0) {
557       *pupper = *pupperDist = *plower = *plower + team_id * incr;
558     } else {
559       *pupperDist = *pupper;
560       *plower = *pupper + incr; // compiler should skip loop body
561     }
562     if (plastiter != NULL)
563       *plastiter = (tid == 0 && team_id == trip_count - 1);
564   } else {
565     // Get the team's chunk first (each team gets at most one chunk)
566     if (__kmp_static == kmp_sch_static_balanced) {
567       UT chunkD = trip_count / nteams;
568       UT extras = trip_count % nteams;
569       *plower +=
570           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
571       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
572       if (plastiter != NULL)
573         *plastiter = (team_id == nteams - 1);
574     } else {
575       T chunk_inc_count =
576           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
577       T upper = *pupper;
578       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
579       // Unknown static scheduling type.
580       *plower += team_id * chunk_inc_count;
581       *pupperDist = *plower + chunk_inc_count - incr;
582       // Check/correct bounds if needed
583       if (incr > 0) {
584         if (*pupperDist < *plower)
585           *pupperDist = traits_t<T>::max_value;
586         if (plastiter != NULL)
587           *plastiter = *plower <= upper && *pupperDist > upper - incr;
588         if (*pupperDist > upper)
589           *pupperDist = upper; // tracker C73258
590         if (*plower > *pupperDist) {
591           *pupper = *pupperDist; // no iterations available for the team
592           goto end;
593         }
594       } else {
595         if (*pupperDist > *plower)
596           *pupperDist = traits_t<T>::min_value;
597         if (plastiter != NULL)
598           *plastiter = *plower >= upper && *pupperDist < upper - incr;
599         if (*pupperDist < upper)
600           *pupperDist = upper; // tracker C73258
601         if (*plower < *pupperDist) {
602           *pupper = *pupperDist; // no iterations available for the team
603           goto end;
604         }
605       }
606     }
607     // Get the parallel loop chunk now (for thread)
608     // compute trip count for team's chunk
609     if (incr == 1) {
610       trip_count = *pupperDist - *plower + 1;
611     } else if (incr == -1) {
612       trip_count = *plower - *pupperDist + 1;
613     } else if (incr > 1) {
614       // upper-lower can exceed the limit of signed type
615       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
616     } else {
617       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
618     }
619     KMP_DEBUG_ASSERT(trip_count);
620     switch (schedule) {
621     case kmp_sch_static: {
622       if (trip_count <= nth) {
623         KMP_DEBUG_ASSERT(
624             __kmp_static == kmp_sch_static_greedy ||
625             __kmp_static ==
626                 kmp_sch_static_balanced); // Unknown static scheduling type.
627         if (tid < trip_count)
628           *pupper = *plower = *plower + tid * incr;
629         else
630           *plower = *pupper + incr; // no iterations available
631         if (plastiter != NULL)
632           if (*plastiter != 0 && !(tid == trip_count - 1))
633             *plastiter = 0;
634       } else {
635         if (__kmp_static == kmp_sch_static_balanced) {
636           UT chunkL = trip_count / nth;
637           UT extras = trip_count % nth;
638           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
639           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
640           if (plastiter != NULL)
641             if (*plastiter != 0 && !(tid == nth - 1))
642               *plastiter = 0;
643         } else {
644           T chunk_inc_count =
645               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
646           T upper = *pupperDist;
647           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
648           // Unknown static scheduling type.
649           *plower += tid * chunk_inc_count;
650           *pupper = *plower + chunk_inc_count - incr;
651           if (incr > 0) {
652             if (*pupper < *plower)
653               *pupper = traits_t<T>::max_value;
654             if (plastiter != NULL)
655               if (*plastiter != 0 &&
656                   !(*plower <= upper && *pupper > upper - incr))
657                 *plastiter = 0;
658             if (*pupper > upper)
659               *pupper = upper; // tracker C73258
660           } else {
661             if (*pupper > *plower)
662               *pupper = traits_t<T>::min_value;
663             if (plastiter != NULL)
664               if (*plastiter != 0 &&
665                   !(*plower >= upper && *pupper < upper - incr))
666                 *plastiter = 0;
667             if (*pupper < upper)
668               *pupper = upper; // tracker C73258
669           }
670         }
671       }
672       break;
673     }
674     case kmp_sch_static_chunked: {
675       ST span;
676       if (chunk < 1)
677         chunk = 1;
678       span = chunk * incr;
679       *pstride = span * nth;
680       *plower = *plower + (span * tid);
681       *pupper = *plower + span - incr;
682       if (plastiter != NULL)
683         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
684           *plastiter = 0;
685       break;
686     }
687     default:
688       KMP_ASSERT2(0,
689                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
690       break;
691     }
692   }
693 end:;
694 #ifdef KMP_DEBUG
695   {
696     char *buff;
697     // create format specifiers before the debug output
698     buff = __kmp_str_format(
699         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
700         "stride=%%%s signed?<%s>\n",
701         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
702         traits_t<ST>::spec, traits_t<T>::spec);
703     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
704     __kmp_str_free(&buff);
705   }
706 #endif
707   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
708 #if OMPT_SUPPORT && OMPT_OPTIONAL
709   if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
710     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
711     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
712     if (ompt_enabled.ompt_callback_work) {
713       ompt_callbacks.ompt_callback(ompt_callback_work)(
714           ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
715           &(task_info->task_data), 0, codeptr);
716     }
717     if (ompt_enabled.ompt_callback_dispatch) {
718       ompt_data_t instance = ompt_data_none;
719       ompt_dispatch_chunk_t dispatch_chunk;
720       OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
721       instance.ptr = &dispatch_chunk;
722       ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
723           &(team_info->parallel_data), &(task_info->task_data),
724           ompt_dispatch_distribute_chunk, instance);
725     }
726   }
727 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
728   KMP_STATS_LOOP_END(OMP_distribute_iterations);
729   return;
730 }
731 
732 template <typename T>
733 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
734                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
735                                    typename traits_t<T>::signed_t *p_st,
736                                    typename traits_t<T>::signed_t incr,
737                                    typename traits_t<T>::signed_t chunk) {
738   // The routine returns the first chunk distributed to the team and
739   // stride for next chunks calculation.
740   // Last iteration flag set for the team that will execute
741   // the last iteration of the loop.
742   // The routine is called for dist_schedule(static,chunk) only.
743   typedef typename traits_t<T>::unsigned_t UT;
744   typedef typename traits_t<T>::signed_t ST;
745   kmp_uint32 team_id;
746   kmp_uint32 nteams;
747   UT trip_count;
748   T lower;
749   T upper;
750   ST span;
751   kmp_team_t *team;
752   kmp_info_t *th;
753 
754   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
755   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
756   __kmp_assert_valid_gtid(gtid);
757 #ifdef KMP_DEBUG
758   {
759     char *buff;
760     // create format specifiers before the debug output
761     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
762                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
763                             traits_t<T>::spec, traits_t<T>::spec,
764                             traits_t<ST>::spec, traits_t<ST>::spec,
765                             traits_t<T>::spec);
766     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
767     __kmp_str_free(&buff);
768   }
769 #endif
770 
771   lower = *p_lb;
772   upper = *p_ub;
773   if (__kmp_env_consistency_check) {
774     if (incr == 0) {
775       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
776                             loc);
777     }
778     if (incr > 0 ? (upper < lower) : (lower < upper)) {
779       // The loop is illegal.
780       // Some zero-trip loops maintained by compiler, e.g.:
781       //   for(i=10;i<0;++i) // lower >= upper - run-time check
782       //   for(i=0;i>10;--i) // lower <= upper - run-time check
783       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
784       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
785       // Compiler does not check the following illegal loops:
786       //   for(i=0;i<10;i+=incr) // where incr<0
787       //   for(i=10;i>0;i-=incr) // where incr<0
788       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
789     }
790   }
791   th = __kmp_threads[gtid];
792   team = th->th.th_team;
793   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
794   nteams = th->th.th_teams_size.nteams;
795   team_id = team->t.t_master_tid;
796   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
797 
798   // compute trip count
799   if (incr == 1) {
800     trip_count = upper - lower + 1;
801   } else if (incr == -1) {
802     trip_count = lower - upper + 1;
803   } else if (incr > 0) {
804     // upper-lower can exceed the limit of signed type
805     trip_count = (UT)(upper - lower) / incr + 1;
806   } else {
807     trip_count = (UT)(lower - upper) / (-incr) + 1;
808   }
809   if (chunk < 1)
810     chunk = 1;
811   span = chunk * incr;
812   *p_st = span * nteams;
813   *p_lb = lower + (span * team_id);
814   *p_ub = *p_lb + span - incr;
815   if (p_last != NULL)
816     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
817   // Correct upper bound if needed
818   if (incr > 0) {
819     if (*p_ub < *p_lb) // overflow?
820       *p_ub = traits_t<T>::max_value;
821     if (*p_ub > upper)
822       *p_ub = upper; // tracker C73258
823   } else { // incr < 0
824     if (*p_ub > *p_lb)
825       *p_ub = traits_t<T>::min_value;
826     if (*p_ub < upper)
827       *p_ub = upper; // tracker C73258
828   }
829 #ifdef KMP_DEBUG
830   {
831     char *buff;
832     // create format specifiers before the debug output
833     buff =
834         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
835                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
836                          traits_t<T>::spec, traits_t<T>::spec,
837                          traits_t<ST>::spec, traits_t<ST>::spec);
838     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
839     __kmp_str_free(&buff);
840   }
841 #endif
842 }
843 
844 //------------------------------------------------------------------------------
845 extern "C" {
846 /*!
847 @ingroup WORK_SHARING
848 @param    loc       Source code location
849 @param    gtid      Global thread id of this thread
850 @param    schedtype  Scheduling type
851 @param    plastiter Pointer to the "last iteration" flag
852 @param    plower    Pointer to the lower bound
853 @param    pupper    Pointer to the upper bound
854 @param    pstride   Pointer to the stride
855 @param    incr      Loop increment
856 @param    chunk     The chunk size
857 
858 Each of the four functions here are identical apart from the argument types.
859 
860 The functions compute the upper and lower bounds and stride to be used for the
861 set of iterations to be executed by the current thread from the statically
862 scheduled loop that is described by the initial values of the bounds, stride,
863 increment and chunk size.
864 
865 @{
866 */
867 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
868                               kmp_int32 *plastiter, kmp_int32 *plower,
869                               kmp_int32 *pupper, kmp_int32 *pstride,
870                               kmp_int32 incr, kmp_int32 chunk) {
871   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
872                                    pupper, pstride, incr, chunk
873 #if OMPT_SUPPORT && OMPT_OPTIONAL
874                                    ,
875                                    OMPT_GET_RETURN_ADDRESS(0)
876 #endif
877   );
878 }
879 
880 /*!
881  See @ref __kmpc_for_static_init_4
882  */
883 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
884                                kmp_int32 schedtype, kmp_int32 *plastiter,
885                                kmp_uint32 *plower, kmp_uint32 *pupper,
886                                kmp_int32 *pstride, kmp_int32 incr,
887                                kmp_int32 chunk) {
888   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
889                                     pupper, pstride, incr, chunk
890 #if OMPT_SUPPORT && OMPT_OPTIONAL
891                                     ,
892                                     OMPT_GET_RETURN_ADDRESS(0)
893 #endif
894   );
895 }
896 
897 /*!
898  See @ref __kmpc_for_static_init_4
899  */
900 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
901                               kmp_int32 *plastiter, kmp_int64 *plower,
902                               kmp_int64 *pupper, kmp_int64 *pstride,
903                               kmp_int64 incr, kmp_int64 chunk) {
904   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
905                                    pupper, pstride, incr, chunk
906 #if OMPT_SUPPORT && OMPT_OPTIONAL
907                                    ,
908                                    OMPT_GET_RETURN_ADDRESS(0)
909 #endif
910   );
911 }
912 
913 /*!
914  See @ref __kmpc_for_static_init_4
915  */
916 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
917                                kmp_int32 schedtype, kmp_int32 *plastiter,
918                                kmp_uint64 *plower, kmp_uint64 *pupper,
919                                kmp_int64 *pstride, kmp_int64 incr,
920                                kmp_int64 chunk) {
921   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
922                                     pupper, pstride, incr, chunk
923 #if OMPT_SUPPORT && OMPT_OPTIONAL
924                                     ,
925                                     OMPT_GET_RETURN_ADDRESS(0)
926 #endif
927   );
928 }
929 /*!
930 @}
931 */
932 
933 #if OMPT_SUPPORT && OMPT_OPTIONAL
934 #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
935 #else
936 #define OMPT_CODEPTR_ARG
937 #endif
938 
939 /*!
940 @ingroup WORK_SHARING
941 @param    loc       Source code location
942 @param    gtid      Global thread id of this thread
943 @param    schedule  Scheduling type for the parallel loop
944 @param    plastiter Pointer to the "last iteration" flag
945 @param    plower    Pointer to the lower bound
946 @param    pupper    Pointer to the upper bound of loop chunk
947 @param    pupperD   Pointer to the upper bound of dist_chunk
948 @param    pstride   Pointer to the stride for parallel loop
949 @param    incr      Loop increment
950 @param    chunk     The chunk size for the parallel loop
951 
952 Each of the four functions here are identical apart from the argument types.
953 
954 The functions compute the upper and lower bounds and strides to be used for the
955 set of iterations to be executed by the current thread from the statically
956 scheduled loop that is described by the initial values of the bounds, strides,
957 increment and chunks for parallel loop and distribute constructs.
958 
959 @{
960 */
961 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
962                                    kmp_int32 schedule, kmp_int32 *plastiter,
963                                    kmp_int32 *plower, kmp_int32 *pupper,
964                                    kmp_int32 *pupperD, kmp_int32 *pstride,
965                                    kmp_int32 incr, kmp_int32 chunk) {
966   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
967                                         pupper, pupperD, pstride, incr,
968                                         chunk OMPT_CODEPTR_ARG);
969 }
970 
971 /*!
972  See @ref __kmpc_dist_for_static_init_4
973  */
974 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
975                                     kmp_int32 schedule, kmp_int32 *plastiter,
976                                     kmp_uint32 *plower, kmp_uint32 *pupper,
977                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
978                                     kmp_int32 incr, kmp_int32 chunk) {
979   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
980                                          pupper, pupperD, pstride, incr,
981                                          chunk OMPT_CODEPTR_ARG);
982 }
983 
984 /*!
985  See @ref __kmpc_dist_for_static_init_4
986  */
987 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
988                                    kmp_int32 schedule, kmp_int32 *plastiter,
989                                    kmp_int64 *plower, kmp_int64 *pupper,
990                                    kmp_int64 *pupperD, kmp_int64 *pstride,
991                                    kmp_int64 incr, kmp_int64 chunk) {
992   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
993                                         pupper, pupperD, pstride, incr,
994                                         chunk OMPT_CODEPTR_ARG);
995 }
996 
997 /*!
998  See @ref __kmpc_dist_for_static_init_4
999  */
1000 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1001                                     kmp_int32 schedule, kmp_int32 *plastiter,
1002                                     kmp_uint64 *plower, kmp_uint64 *pupper,
1003                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
1004                                     kmp_int64 incr, kmp_int64 chunk) {
1005   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1006                                          pupper, pupperD, pstride, incr,
1007                                          chunk OMPT_CODEPTR_ARG);
1008 }
1009 /*!
1010 @}
1011 */
1012 
1013 //------------------------------------------------------------------------------
1014 // Auxiliary routines for Distribute Parallel Loop construct implementation
1015 //    Transfer call to template< type T >
1016 //    __kmp_team_static_init( ident_t *loc, int gtid,
1017 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1018 
1019 /*!
1020 @ingroup WORK_SHARING
1021 @{
1022 @param loc Source location
1023 @param gtid Global thread id
1024 @param p_last pointer to last iteration flag
1025 @param p_lb  pointer to Lower bound
1026 @param p_ub  pointer to Upper bound
1027 @param p_st  Step (or increment if you prefer)
1028 @param incr  Loop increment
1029 @param chunk The chunk size to block with
1030 
1031 The functions compute the upper and lower bounds and stride to be used for the
1032 set of iterations to be executed by the current team from the statically
1033 scheduled loop that is described by the initial values of the bounds, stride,
1034 increment and chunk for the distribute construct as part of composite distribute
1035 parallel loop construct. These functions are all identical apart from the types
1036 of the arguments.
1037 */
1038 
1039 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1040                                kmp_int32 *p_lb, kmp_int32 *p_ub,
1041                                kmp_int32 *p_st, kmp_int32 incr,
1042                                kmp_int32 chunk) {
1043   KMP_DEBUG_ASSERT(__kmp_init_serial);
1044   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1045                                     chunk);
1046 }
1047 
1048 /*!
1049  See @ref __kmpc_team_static_init_4
1050  */
1051 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1052                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1053                                 kmp_int32 *p_st, kmp_int32 incr,
1054                                 kmp_int32 chunk) {
1055   KMP_DEBUG_ASSERT(__kmp_init_serial);
1056   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1057                                      chunk);
1058 }
1059 
1060 /*!
1061  See @ref __kmpc_team_static_init_4
1062  */
1063 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1064                                kmp_int64 *p_lb, kmp_int64 *p_ub,
1065                                kmp_int64 *p_st, kmp_int64 incr,
1066                                kmp_int64 chunk) {
1067   KMP_DEBUG_ASSERT(__kmp_init_serial);
1068   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1069                                     chunk);
1070 }
1071 
1072 /*!
1073  See @ref __kmpc_team_static_init_4
1074  */
1075 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1076                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1077                                 kmp_int64 *p_st, kmp_int64 incr,
1078                                 kmp_int64 chunk) {
1079   KMP_DEBUG_ASSERT(__kmp_init_serial);
1080   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1081                                      chunk);
1082 }
1083 /*!
1084 @}
1085 */
1086 
1087 } // extern "C"
1088