1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 //                     The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 /* Static scheduling initialization.
15 
16   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
17         it may change values between parallel regions.  __kmp_max_nth
18         is the largest value __kmp_nth may take, 1 is the smallest. */
19 
20 #include "kmp.h"
21 #include "kmp_error.h"
22 #include "kmp_i18n.h"
23 #include "kmp_itt.h"
24 #include "kmp_stats.h"
25 #include "kmp_str.h"
26 
27 #if OMPT_SUPPORT
28 #include "ompt-specific.h"
29 #endif
30 
31 #ifdef KMP_DEBUG
32 //-------------------------------------------------------------------------
33 // template for debug prints specification ( d, u, lld, llu )
34 char const *traits_t<int>::spec = "d";
35 char const *traits_t<unsigned int>::spec = "u";
36 char const *traits_t<long long>::spec = "lld";
37 char const *traits_t<unsigned long long>::spec = "llu";
38 char const *traits_t<long>::spec = "ld";
39 //-------------------------------------------------------------------------
40 #endif
41 
42 template <typename T>
__kmp_for_static_init(ident_t * loc,kmp_int32 global_tid,kmp_int32 schedtype,kmp_int32 * plastiter,T * plower,T * pupper,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk,void * codeptr)43 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
44                                   kmp_int32 schedtype, kmp_int32 *plastiter,
45                                   T *plower, T *pupper,
46                                   typename traits_t<T>::signed_t *pstride,
47                                   typename traits_t<T>::signed_t incr,
48                                   typename traits_t<T>::signed_t chunk
49 #if OMPT_SUPPORT && OMPT_OPTIONAL
50                                   ,
51                                   void *codeptr
52 #endif
53                                   ) {
54   KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
55   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
56   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
57 
58   typedef typename traits_t<T>::unsigned_t UT;
59   typedef typename traits_t<T>::signed_t ST;
60   /*  this all has to be changed back to TID and such.. */
61   kmp_int32 gtid = global_tid;
62   kmp_uint32 tid;
63   kmp_uint32 nth;
64   UT trip_count;
65   kmp_team_t *team;
66   kmp_info_t *th = __kmp_threads[gtid];
67 
68 #if OMPT_SUPPORT && OMPT_OPTIONAL
69   ompt_team_info_t *team_info = NULL;
70   ompt_task_info_t *task_info = NULL;
71   ompt_work_t ompt_work_type = ompt_work_loop;
72 
73   static kmp_int8 warn = 0;
74 
75   if (ompt_enabled.ompt_callback_work) {
76     // Only fully initialize variables needed by OMPT if OMPT is enabled.
77     team_info = __ompt_get_teaminfo(0, NULL);
78     task_info = __ompt_get_task_info_object(0);
79     // Determine workshare type
80     if (loc != NULL) {
81       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
82         ompt_work_type = ompt_work_loop;
83       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
84         ompt_work_type = ompt_work_sections;
85       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
86         ompt_work_type = ompt_work_distribute;
87       } else {
88         kmp_int8 bool_res =
89             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
90         if (bool_res)
91           KMP_WARNING(OmptOutdatedWorkshare);
92       }
93       KMP_DEBUG_ASSERT(ompt_work_type);
94     }
95   }
96 #endif
97 
98   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
99   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
100 #ifdef KMP_DEBUG
101   {
102     char *buff;
103     // create format specifiers before the debug output
104     buff = __kmp_str_format(
105         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
106         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
107         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
108         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
109     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
110                    *pstride, incr, chunk));
111     __kmp_str_free(&buff);
112   }
113 #endif
114 
115   if (__kmp_env_consistency_check) {
116     __kmp_push_workshare(global_tid, ct_pdo, loc);
117     if (incr == 0) {
118       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
119                             loc);
120     }
121   }
122   /* special handling for zero-trip loops */
123   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
124     if (plastiter != NULL)
125       *plastiter = FALSE;
126     /* leave pupper and plower set to entire iteration space */
127     *pstride = incr; /* value should never be used */
128 // *plower = *pupper - incr;
129 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
130 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
131 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
132 #ifdef KMP_DEBUG
133     {
134       char *buff;
135       // create format specifiers before the debug output
136       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
137                               "lower=%%%s upper=%%%s stride = %%%s "
138                               "signed?<%s>, loc = %%s\n",
139                               traits_t<T>::spec, traits_t<T>::spec,
140                               traits_t<ST>::spec, traits_t<T>::spec);
141       KD_TRACE(100,
142                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
143       __kmp_str_free(&buff);
144     }
145 #endif
146     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
147 
148 #if OMPT_SUPPORT && OMPT_OPTIONAL
149     if (ompt_enabled.ompt_callback_work) {
150       ompt_callbacks.ompt_callback(ompt_callback_work)(
151           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
152           &(task_info->task_data), 0, codeptr);
153     }
154 #endif
155     return;
156   }
157 
158 #if OMP_40_ENABLED
159   // Although there are schedule enumerations above kmp_ord_upper which are not
160   // schedules for "distribute", the only ones which are useful are dynamic, so
161   // cannot be seen here, since this codepath is only executed for static
162   // schedules.
163   if (schedtype > kmp_ord_upper) {
164     // we are in DISTRIBUTE construct
165     schedtype += kmp_sch_static -
166                  kmp_distribute_static; // AC: convert to usual schedule type
167     tid = th->th.th_team->t.t_master_tid;
168     team = th->th.th_team->t.t_parent;
169   } else
170 #endif
171   {
172     tid = __kmp_tid_from_gtid(global_tid);
173     team = th->th.th_team;
174   }
175 
176   /* determine if "for" loop is an active worksharing construct */
177   if (team->t.t_serialized) {
178     /* serialized parallel, each thread executes whole iteration space */
179     if (plastiter != NULL)
180       *plastiter = TRUE;
181     /* leave pupper and plower set to entire iteration space */
182     *pstride =
183         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
184 
185 #ifdef KMP_DEBUG
186     {
187       char *buff;
188       // create format specifiers before the debug output
189       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
190                               "lower=%%%s upper=%%%s stride = %%%s\n",
191                               traits_t<T>::spec, traits_t<T>::spec,
192                               traits_t<ST>::spec);
193       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
194       __kmp_str_free(&buff);
195     }
196 #endif
197     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
198 
199 #if OMPT_SUPPORT && OMPT_OPTIONAL
200     if (ompt_enabled.ompt_callback_work) {
201       ompt_callbacks.ompt_callback(ompt_callback_work)(
202           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
203           &(task_info->task_data), *pstride, codeptr);
204     }
205 #endif
206     return;
207   }
208   nth = team->t.t_nproc;
209   if (nth == 1) {
210     if (plastiter != NULL)
211       *plastiter = TRUE;
212     *pstride =
213         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
214 #ifdef KMP_DEBUG
215     {
216       char *buff;
217       // create format specifiers before the debug output
218       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
219                               "lower=%%%s upper=%%%s stride = %%%s\n",
220                               traits_t<T>::spec, traits_t<T>::spec,
221                               traits_t<ST>::spec);
222       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
223       __kmp_str_free(&buff);
224     }
225 #endif
226     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
227 
228 #if OMPT_SUPPORT && OMPT_OPTIONAL
229     if (ompt_enabled.ompt_callback_work) {
230       ompt_callbacks.ompt_callback(ompt_callback_work)(
231           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
232           &(task_info->task_data), *pstride, codeptr);
233     }
234 #endif
235     return;
236   }
237 
238   /* compute trip count */
239   if (incr == 1) {
240     trip_count = *pupper - *plower + 1;
241   } else if (incr == -1) {
242     trip_count = *plower - *pupper + 1;
243   } else if (incr > 0) {
244     // upper-lower can exceed the limit of signed type
245     trip_count = (UT)(*pupper - *plower) / incr + 1;
246   } else {
247     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
248   }
249 
250   if (__kmp_env_consistency_check) {
251     /* tripcount overflow? */
252     if (trip_count == 0 && *pupper != *plower) {
253       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
254                             loc);
255     }
256   }
257 
258   /* compute remaining parameters */
259   switch (schedtype) {
260   case kmp_sch_static: {
261     if (trip_count < nth) {
262       KMP_DEBUG_ASSERT(
263           __kmp_static == kmp_sch_static_greedy ||
264           __kmp_static ==
265               kmp_sch_static_balanced); // Unknown static scheduling type.
266       if (tid < trip_count) {
267         *pupper = *plower = *plower + tid * incr;
268       } else {
269         *plower = *pupper + incr;
270       }
271       if (plastiter != NULL)
272         *plastiter = (tid == trip_count - 1);
273     } else {
274       if (__kmp_static == kmp_sch_static_balanced) {
275         UT small_chunk = trip_count / nth;
276         UT extras = trip_count % nth;
277         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
278         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
279         if (plastiter != NULL)
280           *plastiter = (tid == nth - 1);
281       } else {
282         T big_chunk_inc_count =
283             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
284         T old_upper = *pupper;
285 
286         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
287         // Unknown static scheduling type.
288 
289         *plower += tid * big_chunk_inc_count;
290         *pupper = *plower + big_chunk_inc_count - incr;
291         if (incr > 0) {
292           if (*pupper < *plower)
293             *pupper = traits_t<T>::max_value;
294           if (plastiter != NULL)
295             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
296           if (*pupper > old_upper)
297             *pupper = old_upper; // tracker C73258
298         } else {
299           if (*pupper > *plower)
300             *pupper = traits_t<T>::min_value;
301           if (plastiter != NULL)
302             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
303           if (*pupper < old_upper)
304             *pupper = old_upper; // tracker C73258
305         }
306       }
307     }
308     *pstride = trip_count;
309     break;
310   }
311   case kmp_sch_static_chunked: {
312     ST span;
313     if (chunk < 1) {
314       chunk = 1;
315     }
316     span = chunk * incr;
317     *pstride = span * nth;
318     *plower = *plower + (span * tid);
319     *pupper = *plower + span - incr;
320     if (plastiter != NULL)
321       *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
322     break;
323   }
324 #if OMP_45_ENABLED
325   case kmp_sch_static_balanced_chunked: {
326     T old_upper = *pupper;
327     // round up to make sure the chunk is enough to cover all iterations
328     UT span = (trip_count + nth - 1) / nth;
329 
330     // perform chunk adjustment
331     chunk = (span + chunk - 1) & ~(chunk - 1);
332 
333     span = chunk * incr;
334     *plower = *plower + (span * tid);
335     *pupper = *plower + span - incr;
336     if (incr > 0) {
337       if (*pupper > old_upper)
338         *pupper = old_upper;
339     } else if (*pupper < old_upper)
340       *pupper = old_upper;
341 
342     if (plastiter != NULL)
343       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
344     break;
345   }
346 #endif
347   default:
348     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
349     break;
350   }
351 
352 #if USE_ITT_BUILD
353   // Report loop metadata
354   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
355       __kmp_forkjoin_frames_mode == 3 &&
356 #if OMP_40_ENABLED
357       th->th.th_teams_microtask == NULL &&
358 #endif
359       team->t.t_active_level == 1) {
360     kmp_uint64 cur_chunk = chunk;
361     // Calculate chunk in case it was not specified; it is specified for
362     // kmp_sch_static_chunked
363     if (schedtype == kmp_sch_static) {
364       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
365     }
366     // 0 - "static" schedule
367     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
368   }
369 #endif
370 #ifdef KMP_DEBUG
371   {
372     char *buff;
373     // create format specifiers before the debug output
374     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
375                             "upper=%%%s stride = %%%s signed?<%s>\n",
376                             traits_t<T>::spec, traits_t<T>::spec,
377                             traits_t<ST>::spec, traits_t<T>::spec);
378     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
379     __kmp_str_free(&buff);
380   }
381 #endif
382   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
383 
384 #if OMPT_SUPPORT && OMPT_OPTIONAL
385   if (ompt_enabled.ompt_callback_work) {
386     ompt_callbacks.ompt_callback(ompt_callback_work)(
387         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
388         &(task_info->task_data), trip_count, codeptr);
389   }
390 #endif
391 
392 #if KMP_STATS_ENABLED
393   {
394     kmp_int64 t;
395     kmp_int64 u = (kmp_int64)(*pupper);
396     kmp_int64 l = (kmp_int64)(*plower);
397     kmp_int64 i = (kmp_int64)incr;
398     /* compute trip count */
399     if (i == 1) {
400       t = u - l + 1;
401     } else if (i == -1) {
402       t = l - u + 1;
403     } else if (i > 0) {
404       t = (u - l) / i + 1;
405     } else {
406       t = (l - u) / (-i) + 1;
407     }
408     KMP_COUNT_VALUE(OMP_loop_static_iterations, t);
409     KMP_POP_PARTITIONED_TIMER();
410   }
411 #endif
412   return;
413 }
414 
415 template <typename T>
__kmp_dist_for_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,T * plower,T * pupper,T * pupperDist,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)416 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
417                                        kmp_int32 schedule, kmp_int32 *plastiter,
418                                        T *plower, T *pupper, T *pupperDist,
419                                        typename traits_t<T>::signed_t *pstride,
420                                        typename traits_t<T>::signed_t incr,
421                                        typename traits_t<T>::signed_t chunk) {
422   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
423   typedef typename traits_t<T>::unsigned_t UT;
424   typedef typename traits_t<T>::signed_t ST;
425   kmp_uint32 tid;
426   kmp_uint32 nth;
427   kmp_uint32 team_id;
428   kmp_uint32 nteams;
429   UT trip_count;
430   kmp_team_t *team;
431   kmp_info_t *th;
432 
433   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
434   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
435 #ifdef KMP_DEBUG
436   {
437     char *buff;
438     // create format specifiers before the debug output
439     buff = __kmp_str_format(
440         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
441         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
442         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
443         traits_t<ST>::spec, traits_t<T>::spec);
444     KD_TRACE(100,
445              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
446     __kmp_str_free(&buff);
447   }
448 #endif
449 
450   if (__kmp_env_consistency_check) {
451     __kmp_push_workshare(gtid, ct_pdo, loc);
452     if (incr == 0) {
453       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
454                             loc);
455     }
456     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
457       // The loop is illegal.
458       // Some zero-trip loops maintained by compiler, e.g.:
459       //   for(i=10;i<0;++i) // lower >= upper - run-time check
460       //   for(i=0;i>10;--i) // lower <= upper - run-time check
461       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
462       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
463       // Compiler does not check the following illegal loops:
464       //   for(i=0;i<10;i+=incr) // where incr<0
465       //   for(i=10;i>0;i-=incr) // where incr<0
466       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
467     }
468   }
469   tid = __kmp_tid_from_gtid(gtid);
470   th = __kmp_threads[gtid];
471   nth = th->th.th_team_nproc;
472   team = th->th.th_team;
473 #if OMP_40_ENABLED
474   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
475   nteams = th->th.th_teams_size.nteams;
476 #endif
477   team_id = team->t.t_master_tid;
478   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
479 
480   // compute global trip count
481   if (incr == 1) {
482     trip_count = *pupper - *plower + 1;
483   } else if (incr == -1) {
484     trip_count = *plower - *pupper + 1;
485   } else if (incr > 0) {
486     // upper-lower can exceed the limit of signed type
487     trip_count = (UT)(*pupper - *plower) / incr + 1;
488   } else {
489     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
490   }
491 
492   *pstride = *pupper - *plower; // just in case (can be unused)
493   if (trip_count <= nteams) {
494     KMP_DEBUG_ASSERT(
495         __kmp_static == kmp_sch_static_greedy ||
496         __kmp_static ==
497             kmp_sch_static_balanced); // Unknown static scheduling type.
498     // only masters of some teams get single iteration, other threads get
499     // nothing
500     if (team_id < trip_count && tid == 0) {
501       *pupper = *pupperDist = *plower = *plower + team_id * incr;
502     } else {
503       *pupperDist = *pupper;
504       *plower = *pupper + incr; // compiler should skip loop body
505     }
506     if (plastiter != NULL)
507       *plastiter = (tid == 0 && team_id == trip_count - 1);
508   } else {
509     // Get the team's chunk first (each team gets at most one chunk)
510     if (__kmp_static == kmp_sch_static_balanced) {
511       UT chunkD = trip_count / nteams;
512       UT extras = trip_count % nteams;
513       *plower +=
514           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
515       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
516       if (plastiter != NULL)
517         *plastiter = (team_id == nteams - 1);
518     } else {
519       T chunk_inc_count =
520           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
521       T upper = *pupper;
522       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
523       // Unknown static scheduling type.
524       *plower += team_id * chunk_inc_count;
525       *pupperDist = *plower + chunk_inc_count - incr;
526       // Check/correct bounds if needed
527       if (incr > 0) {
528         if (*pupperDist < *plower)
529           *pupperDist = traits_t<T>::max_value;
530         if (plastiter != NULL)
531           *plastiter = *plower <= upper && *pupperDist > upper - incr;
532         if (*pupperDist > upper)
533           *pupperDist = upper; // tracker C73258
534         if (*plower > *pupperDist) {
535           *pupper = *pupperDist; // no iterations available for the team
536           goto end;
537         }
538       } else {
539         if (*pupperDist > *plower)
540           *pupperDist = traits_t<T>::min_value;
541         if (plastiter != NULL)
542           *plastiter = *plower >= upper && *pupperDist < upper - incr;
543         if (*pupperDist < upper)
544           *pupperDist = upper; // tracker C73258
545         if (*plower < *pupperDist) {
546           *pupper = *pupperDist; // no iterations available for the team
547           goto end;
548         }
549       }
550     }
551     // Get the parallel loop chunk now (for thread)
552     // compute trip count for team's chunk
553     if (incr == 1) {
554       trip_count = *pupperDist - *plower + 1;
555     } else if (incr == -1) {
556       trip_count = *plower - *pupperDist + 1;
557     } else if (incr > 1) {
558       // upper-lower can exceed the limit of signed type
559       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
560     } else {
561       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
562     }
563     KMP_DEBUG_ASSERT(trip_count);
564     switch (schedule) {
565     case kmp_sch_static: {
566       if (trip_count <= nth) {
567         KMP_DEBUG_ASSERT(
568             __kmp_static == kmp_sch_static_greedy ||
569             __kmp_static ==
570                 kmp_sch_static_balanced); // Unknown static scheduling type.
571         if (tid < trip_count)
572           *pupper = *plower = *plower + tid * incr;
573         else
574           *plower = *pupper + incr; // no iterations available
575         if (plastiter != NULL)
576           if (*plastiter != 0 && !(tid == trip_count - 1))
577             *plastiter = 0;
578       } else {
579         if (__kmp_static == kmp_sch_static_balanced) {
580           UT chunkL = trip_count / nth;
581           UT extras = trip_count % nth;
582           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
583           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
584           if (plastiter != NULL)
585             if (*plastiter != 0 && !(tid == nth - 1))
586               *plastiter = 0;
587         } else {
588           T chunk_inc_count =
589               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
590           T upper = *pupperDist;
591           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
592           // Unknown static scheduling type.
593           *plower += tid * chunk_inc_count;
594           *pupper = *plower + chunk_inc_count - incr;
595           if (incr > 0) {
596             if (*pupper < *plower)
597               *pupper = traits_t<T>::max_value;
598             if (plastiter != NULL)
599               if (*plastiter != 0 &&
600                   !(*plower <= upper && *pupper > upper - incr))
601                 *plastiter = 0;
602             if (*pupper > upper)
603               *pupper = upper; // tracker C73258
604           } else {
605             if (*pupper > *plower)
606               *pupper = traits_t<T>::min_value;
607             if (plastiter != NULL)
608               if (*plastiter != 0 &&
609                   !(*plower >= upper && *pupper < upper - incr))
610                 *plastiter = 0;
611             if (*pupper < upper)
612               *pupper = upper; // tracker C73258
613           }
614         }
615       }
616       break;
617     }
618     case kmp_sch_static_chunked: {
619       ST span;
620       if (chunk < 1)
621         chunk = 1;
622       span = chunk * incr;
623       *pstride = span * nth;
624       *plower = *plower + (span * tid);
625       *pupper = *plower + span - incr;
626       if (plastiter != NULL)
627         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
628           *plastiter = 0;
629       break;
630     }
631     default:
632       KMP_ASSERT2(0,
633                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
634       break;
635     }
636   }
637 end:;
638 #ifdef KMP_DEBUG
639   {
640     char *buff;
641     // create format specifiers before the debug output
642     buff = __kmp_str_format(
643         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
644         "stride=%%%s signed?<%s>\n",
645         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
646         traits_t<ST>::spec, traits_t<T>::spec);
647     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
648     __kmp_str_free(&buff);
649   }
650 #endif
651   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
652   return;
653 }
654 
655 template <typename T>
__kmp_team_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,T * p_lb,T * p_ub,typename traits_t<T>::signed_t * p_st,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)656 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
657                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
658                                    typename traits_t<T>::signed_t *p_st,
659                                    typename traits_t<T>::signed_t incr,
660                                    typename traits_t<T>::signed_t chunk) {
661   // The routine returns the first chunk distributed to the team and
662   // stride for next chunks calculation.
663   // Last iteration flag set for the team that will execute
664   // the last iteration of the loop.
665   // The routine is called for dist_schedue(static,chunk) only.
666   typedef typename traits_t<T>::unsigned_t UT;
667   typedef typename traits_t<T>::signed_t ST;
668   kmp_uint32 team_id;
669   kmp_uint32 nteams;
670   UT trip_count;
671   T lower;
672   T upper;
673   ST span;
674   kmp_team_t *team;
675   kmp_info_t *th;
676 
677   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
678   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
679 #ifdef KMP_DEBUG
680   {
681     char *buff;
682     // create format specifiers before the debug output
683     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
684                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
685                             traits_t<T>::spec, traits_t<T>::spec,
686                             traits_t<ST>::spec, traits_t<ST>::spec,
687                             traits_t<T>::spec);
688     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
689     __kmp_str_free(&buff);
690   }
691 #endif
692 
693   lower = *p_lb;
694   upper = *p_ub;
695   if (__kmp_env_consistency_check) {
696     if (incr == 0) {
697       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
698                             loc);
699     }
700     if (incr > 0 ? (upper < lower) : (lower < upper)) {
701       // The loop is illegal.
702       // Some zero-trip loops maintained by compiler, e.g.:
703       //   for(i=10;i<0;++i) // lower >= upper - run-time check
704       //   for(i=0;i>10;--i) // lower <= upper - run-time check
705       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
706       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
707       // Compiler does not check the following illegal loops:
708       //   for(i=0;i<10;i+=incr) // where incr<0
709       //   for(i=10;i>0;i-=incr) // where incr<0
710       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
711     }
712   }
713   th = __kmp_threads[gtid];
714   team = th->th.th_team;
715 #if OMP_40_ENABLED
716   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
717   nteams = th->th.th_teams_size.nteams;
718 #endif
719   team_id = team->t.t_master_tid;
720   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
721 
722   // compute trip count
723   if (incr == 1) {
724     trip_count = upper - lower + 1;
725   } else if (incr == -1) {
726     trip_count = lower - upper + 1;
727   } else if (incr > 0) {
728     // upper-lower can exceed the limit of signed type
729     trip_count = (UT)(upper - lower) / incr + 1;
730   } else {
731     trip_count = (UT)(lower - upper) / (-incr) + 1;
732   }
733   if (chunk < 1)
734     chunk = 1;
735   span = chunk * incr;
736   *p_st = span * nteams;
737   *p_lb = lower + (span * team_id);
738   *p_ub = *p_lb + span - incr;
739   if (p_last != NULL)
740     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
741   // Correct upper bound if needed
742   if (incr > 0) {
743     if (*p_ub < *p_lb) // overflow?
744       *p_ub = traits_t<T>::max_value;
745     if (*p_ub > upper)
746       *p_ub = upper; // tracker C73258
747   } else { // incr < 0
748     if (*p_ub > *p_lb)
749       *p_ub = traits_t<T>::min_value;
750     if (*p_ub < upper)
751       *p_ub = upper; // tracker C73258
752   }
753 #ifdef KMP_DEBUG
754   {
755     char *buff;
756     // create format specifiers before the debug output
757     buff =
758         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
759                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
760                          traits_t<T>::spec, traits_t<T>::spec,
761                          traits_t<ST>::spec, traits_t<ST>::spec);
762     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
763     __kmp_str_free(&buff);
764   }
765 #endif
766 }
767 
768 //------------------------------------------------------------------------------
769 extern "C" {
770 /*!
771 @ingroup WORK_SHARING
772 @param    loc       Source code location
773 @param    gtid      Global thread id of this thread
774 @param    schedtype  Scheduling type
775 @param    plastiter Pointer to the "last iteration" flag
776 @param    plower    Pointer to the lower bound
777 @param    pupper    Pointer to the upper bound
778 @param    pstride   Pointer to the stride
779 @param    incr      Loop increment
780 @param    chunk     The chunk size
781 
782 Each of the four functions here are identical apart from the argument types.
783 
784 The functions compute the upper and lower bounds and stride to be used for the
785 set of iterations to be executed by the current thread from the statically
786 scheduled loop that is described by the initial values of the bounds, stride,
787 increment and chunk size.
788 
789 @{
790 */
__kmpc_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)791 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
792                               kmp_int32 *plastiter, kmp_int32 *plower,
793                               kmp_int32 *pupper, kmp_int32 *pstride,
794                               kmp_int32 incr, kmp_int32 chunk) {
795   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
796                                    pupper, pstride, incr, chunk
797 #if OMPT_SUPPORT && OMPT_OPTIONAL
798                                    ,
799                                    OMPT_GET_RETURN_ADDRESS(0)
800 #endif
801                                        );
802 }
803 
804 /*!
805  See @ref __kmpc_for_static_init_4
806  */
__kmpc_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)807 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
808                                kmp_int32 schedtype, kmp_int32 *plastiter,
809                                kmp_uint32 *plower, kmp_uint32 *pupper,
810                                kmp_int32 *pstride, kmp_int32 incr,
811                                kmp_int32 chunk) {
812   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
813                                     pupper, pstride, incr, chunk
814 #if OMPT_SUPPORT && OMPT_OPTIONAL
815                                     ,
816                                     OMPT_GET_RETURN_ADDRESS(0)
817 #endif
818                                         );
819 }
820 
821 /*!
822  See @ref __kmpc_for_static_init_4
823  */
__kmpc_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)824 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
825                               kmp_int32 *plastiter, kmp_int64 *plower,
826                               kmp_int64 *pupper, kmp_int64 *pstride,
827                               kmp_int64 incr, kmp_int64 chunk) {
828   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
829                                    pupper, pstride, incr, chunk
830 #if OMPT_SUPPORT && OMPT_OPTIONAL
831                                    ,
832                                    OMPT_GET_RETURN_ADDRESS(0)
833 #endif
834                                        );
835 }
836 
837 /*!
838  See @ref __kmpc_for_static_init_4
839  */
__kmpc_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)840 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
841                                kmp_int32 schedtype, kmp_int32 *plastiter,
842                                kmp_uint64 *plower, kmp_uint64 *pupper,
843                                kmp_int64 *pstride, kmp_int64 incr,
844                                kmp_int64 chunk) {
845   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
846                                     pupper, pstride, incr, chunk
847 #if OMPT_SUPPORT && OMPT_OPTIONAL
848                                     ,
849                                     OMPT_GET_RETURN_ADDRESS(0)
850 #endif
851                                         );
852 }
853 /*!
854 @}
855 */
856 
857 /*!
858 @ingroup WORK_SHARING
859 @param    loc       Source code location
860 @param    gtid      Global thread id of this thread
861 @param    schedule  Scheduling type for the parallel loop
862 @param    plastiter Pointer to the "last iteration" flag
863 @param    plower    Pointer to the lower bound
864 @param    pupper    Pointer to the upper bound of loop chunk
865 @param    pupperD   Pointer to the upper bound of dist_chunk
866 @param    pstride   Pointer to the stride for parallel loop
867 @param    incr      Loop increment
868 @param    chunk     The chunk size for the parallel loop
869 
870 Each of the four functions here are identical apart from the argument types.
871 
872 The functions compute the upper and lower bounds and strides to be used for the
873 set of iterations to be executed by the current thread from the statically
874 scheduled loop that is described by the initial values of the bounds, strides,
875 increment and chunks for parallel loop and distribute constructs.
876 
877 @{
878 */
__kmpc_dist_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)879 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
880                                    kmp_int32 schedule, kmp_int32 *plastiter,
881                                    kmp_int32 *plower, kmp_int32 *pupper,
882                                    kmp_int32 *pupperD, kmp_int32 *pstride,
883                                    kmp_int32 incr, kmp_int32 chunk) {
884   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
885                                         pupper, pupperD, pstride, incr, chunk);
886 }
887 
888 /*!
889  See @ref __kmpc_dist_for_static_init_4
890  */
__kmpc_dist_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_uint32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)891 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
892                                     kmp_int32 schedule, kmp_int32 *plastiter,
893                                     kmp_uint32 *plower, kmp_uint32 *pupper,
894                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
895                                     kmp_int32 incr, kmp_int32 chunk) {
896   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
897                                          pupper, pupperD, pstride, incr, chunk);
898 }
899 
900 /*!
901  See @ref __kmpc_dist_for_static_init_4
902  */
__kmpc_dist_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)903 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
904                                    kmp_int32 schedule, kmp_int32 *plastiter,
905                                    kmp_int64 *plower, kmp_int64 *pupper,
906                                    kmp_int64 *pupperD, kmp_int64 *pstride,
907                                    kmp_int64 incr, kmp_int64 chunk) {
908   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
909                                         pupper, pupperD, pstride, incr, chunk);
910 }
911 
912 /*!
913  See @ref __kmpc_dist_for_static_init_4
914  */
__kmpc_dist_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_uint64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)915 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
916                                     kmp_int32 schedule, kmp_int32 *plastiter,
917                                     kmp_uint64 *plower, kmp_uint64 *pupper,
918                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
919                                     kmp_int64 incr, kmp_int64 chunk) {
920   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
921                                          pupper, pupperD, pstride, incr, chunk);
922 }
923 /*!
924 @}
925 */
926 
927 //------------------------------------------------------------------------------
928 // Auxiliary routines for Distribute Parallel Loop construct implementation
929 //    Transfer call to template< type T >
930 //    __kmp_team_static_init( ident_t *loc, int gtid,
931 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
932 
933 /*!
934 @ingroup WORK_SHARING
935 @{
936 @param loc Source location
937 @param gtid Global thread id
938 @param p_last pointer to last iteration flag
939 @param p_lb  pointer to Lower bound
940 @param p_ub  pointer to Upper bound
941 @param p_st  Step (or increment if you prefer)
942 @param incr  Loop increment
943 @param chunk The chunk size to block with
944 
945 The functions compute the upper and lower bounds and stride to be used for the
946 set of iterations to be executed by the current team from the statically
947 scheduled loop that is described by the initial values of the bounds, stride,
948 increment and chunk for the distribute construct as part of composite distribute
949 parallel loop construct. These functions are all identical apart from the types
950 of the arguments.
951 */
952 
__kmpc_team_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int32 * p_lb,kmp_int32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)953 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
954                                kmp_int32 *p_lb, kmp_int32 *p_ub,
955                                kmp_int32 *p_st, kmp_int32 incr,
956                                kmp_int32 chunk) {
957   KMP_DEBUG_ASSERT(__kmp_init_serial);
958   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
959                                     chunk);
960 }
961 
962 /*!
963  See @ref __kmpc_team_static_init_4
964  */
__kmpc_team_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint32 * p_lb,kmp_uint32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)965 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
966                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
967                                 kmp_int32 *p_st, kmp_int32 incr,
968                                 kmp_int32 chunk) {
969   KMP_DEBUG_ASSERT(__kmp_init_serial);
970   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
971                                      chunk);
972 }
973 
974 /*!
975  See @ref __kmpc_team_static_init_4
976  */
__kmpc_team_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int64 * p_lb,kmp_int64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)977 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
978                                kmp_int64 *p_lb, kmp_int64 *p_ub,
979                                kmp_int64 *p_st, kmp_int64 incr,
980                                kmp_int64 chunk) {
981   KMP_DEBUG_ASSERT(__kmp_init_serial);
982   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
983                                     chunk);
984 }
985 
986 /*!
987  See @ref __kmpc_team_static_init_4
988  */
__kmpc_team_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint64 * p_lb,kmp_uint64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)989 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
990                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
991                                 kmp_int64 *p_st, kmp_int64 incr,
992                                 kmp_int64 chunk) {
993   KMP_DEBUG_ASSERT(__kmp_init_serial);
994   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
995                                      chunk);
996 }
997 /*!
998 @}
999 */
1000 
1001 } // extern "C"
1002