1#if USE_ITT_BUILD
2/*
3 * kmp_itt.inl -- Inline functions of ITT Notify.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8//                     The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15// Inline function definitions. This file should be included into kmp_itt.h file
16// for production build (to let compliler inline functions) or into kmp_itt.c
17// file for debug build (to reduce the number of files to recompile and save
18// build time).
19
20#include "kmp.h"
21#include "kmp_str.h"
22
23#if KMP_ITT_DEBUG
24extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
25#define KMP_ITT_DEBUG_LOCK()                                                   \
26  { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
27#define KMP_ITT_DEBUG_PRINT(...)                                               \
28  {                                                                            \
29    fprintf(stderr, "#%02d: ", __kmp_get_gtid());                              \
30    fprintf(stderr, __VA_ARGS__);                                              \
31    fflush(stderr);                                                            \
32    __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock);                       \
33  }
34#else
35#define KMP_ITT_DEBUG_LOCK()
36#define KMP_ITT_DEBUG_PRINT(...)
37#endif // KMP_ITT_DEBUG
38
39// Ensure that the functions are static if they're supposed to be being inlined.
40// Otherwise they cannot be used in more than one file, since there will be
41// multiple definitions.
42#if KMP_DEBUG
43#define LINKAGE
44#else
45#define LINKAGE static inline
46#endif
47
48// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
49// this API to support user-defined synchronization primitives, but does not use
50// ZCA; it would be safe to turn this off until wider support becomes available.
51#if USE_ITT_ZCA
52#ifdef __INTEL_COMPILER
53#if __INTEL_COMPILER >= 1200
54#undef __itt_sync_acquired
55#undef __itt_sync_releasing
56#define __itt_sync_acquired(addr)                                              \
57  __notify_zc_intrinsic((char *)"sync_acquired", addr)
58#define __itt_sync_releasing(addr)                                             \
59  __notify_intrinsic((char *)"sync_releasing", addr)
60#endif
61#endif
62#endif
63
64static kmp_bootstrap_lock_t metadata_lock =
65    KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
66
67/* Parallel region reporting.
68 * __kmp_itt_region_forking should be called by master thread of a team.
69   Exact moment of call does not matter, but it should be completed before any
70   thread of this team calls __kmp_itt_region_starting.
71 * __kmp_itt_region_starting should be called by each thread of a team just
72   before entering parallel region body.
73 * __kmp_itt_region_finished should be called by each thread of a team right
74   after returning from parallel region body.
75 * __kmp_itt_region_joined should be called by master thread of a team, after
76   all threads called __kmp_itt_region_finished.
77
78 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
79 execute some more user code -- such a thread can execute tasks.
80
81 Note: The overhead of logging region_starting and region_finished in each
82 thread is too large, so these calls are not used. */
83
84LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
85#if USE_ITT_NOTIFY
86  kmp_team_t *team = __kmp_team_from_gtid(gtid);
87  if (team->t.t_active_level > 1) {
88    // The frame notifications are only supported for the outermost teams.
89    return;
90  }
91  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
92  if (loc) {
93    // Use the reserved_2 field to store the index to the region domain.
94    // Assume that reserved_2 contains zero initially.  Since zero is special
95    // value here, store the index into domain array increased by 1.
96    if (loc->reserved_2 == 0) {
97      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
98        int frm =
99            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
100        if (frm >= KMP_MAX_FRAME_DOMAINS) {
101          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
102          return; // loc->reserved_2 is still 0
103        }
104        // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
105        //    frm = loc->reserved_2 - 1;   // get value saved by other thread
106        //    for same loc
107        //} // AC: this block is to replace next unsynchronized line
108
109        // We need to save indexes for both region and barrier frames. We'll use
110        // loc->reserved_2 field but put region index to the low two bytes and
111        // barrier indexes to the high two bytes. It is OK because
112        // KMP_MAX_FRAME_DOMAINS = 512.
113        loc->reserved_2 |= (frm + 1); // save "new" value
114
115        // Transform compiler-generated region location into the format
116        // that the tools more or less standardized on:
117        //   "<func>$omp$parallel@[file:]<line>[:<col>]"
118        char *buff = NULL;
119        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
120        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
121                                team_size, str_loc.file, str_loc.line,
122                                str_loc.col);
123
124        __itt_suppress_push(__itt_suppress_memory_errors);
125        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
126        __itt_suppress_pop();
127
128        __kmp_str_free(&buff);
129        if (barriers) {
130          if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
131            int frm = KMP_TEST_THEN_INC32(
132                &__kmp_barrier_domain_count); // get "old" value
133            if (frm >= KMP_MAX_FRAME_DOMAINS) {
134              KMP_TEST_THEN_DEC32(
135                  &__kmp_barrier_domain_count); // revert the count
136              return; // loc->reserved_2 is still 0
137            }
138            char *buff = NULL;
139            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
140                                    str_loc.file, str_loc.col);
141            __itt_suppress_push(__itt_suppress_memory_errors);
142            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
143            __itt_suppress_pop();
144            __kmp_str_free(&buff);
145            // Save the barrier frame index to the high two bytes.
146            loc->reserved_2 |= (frm + 1) << 16;
147          }
148        }
149        __kmp_str_loc_free(&str_loc);
150        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
151      }
152    } else { // Region domain exists for this location
153      // Check if team size was changed. Then create new region domain for this
154      // location
155      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
156      if ((frm < KMP_MAX_FRAME_DOMAINS) &&
157          (__kmp_itt_region_team_size[frm] != team_size)) {
158        char *buff = NULL;
159        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
160        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
161                                team_size, str_loc.file, str_loc.line,
162                                str_loc.col);
163
164        __itt_suppress_push(__itt_suppress_memory_errors);
165        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
166        __itt_suppress_pop();
167
168        __kmp_str_free(&buff);
169        __kmp_str_loc_free(&str_loc);
170        __kmp_itt_region_team_size[frm] = team_size;
171        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
172      } else { // Team size was not changed. Use existing domain.
173        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
174      }
175    }
176    KMP_ITT_DEBUG_LOCK();
177    KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid,
178                        loc->reserved_2, loc);
179  }
180#endif
181} // __kmp_itt_region_forking
182
183// -----------------------------------------------------------------------------
184LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
185                                    __itt_timestamp end, int imbalance,
186                                    ident_t *loc, int team_size, int region) {
187#if USE_ITT_NOTIFY
188  if (region) {
189    kmp_team_t *team = __kmp_team_from_gtid(gtid);
190    int serialized = (region == 2 ? 1 : 0);
191    if (team->t.t_active_level + serialized > 1) {
192      // The frame notifications are only supported for the outermost teams.
193      return;
194    }
195    // Check region domain has not been created before. It's index is saved in
196    // the low two bytes.
197    if ((loc->reserved_2 & 0x0000FFFF) == 0) {
198      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
199        int frm =
200            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
201        if (frm >= KMP_MAX_FRAME_DOMAINS) {
202          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
203          return; // loc->reserved_2 is still 0
204        }
205
206        // We need to save indexes for both region and barrier frames. We'll use
207        // loc->reserved_2 field but put region index to the low two bytes and
208        // barrier indexes to the high two bytes. It is OK because
209        // KMP_MAX_FRAME_DOMAINS = 512.
210        loc->reserved_2 |= (frm + 1); // save "new" value
211
212        // Transform compiler-generated region location into the format
213        // that the tools more or less standardized on:
214        //   "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
215        char *buff = NULL;
216        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
217        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
218                                team_size, str_loc.file, str_loc.line,
219                                str_loc.col);
220
221        __itt_suppress_push(__itt_suppress_memory_errors);
222        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
223        __itt_suppress_pop();
224
225        __kmp_str_free(&buff);
226        __kmp_str_loc_free(&str_loc);
227        __kmp_itt_region_team_size[frm] = team_size;
228        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
229      }
230    } else { // Region domain exists for this location
231      // Check if team size was changed. Then create new region domain for this
232      // location
233      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
234      if ((frm < KMP_MAX_FRAME_DOMAINS) &&
235          (__kmp_itt_region_team_size[frm] != team_size)) {
236        char *buff = NULL;
237        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
238        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
239                                team_size, str_loc.file, str_loc.line,
240                                str_loc.col);
241
242        __itt_suppress_push(__itt_suppress_memory_errors);
243        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
244        __itt_suppress_pop();
245
246        __kmp_str_free(&buff);
247        __kmp_str_loc_free(&str_loc);
248        __kmp_itt_region_team_size[frm] = team_size;
249        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
250      } else { // Team size was not changed. Use existing domain.
251        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
252      }
253    }
254    KMP_ITT_DEBUG_LOCK();
255    KMP_ITT_DEBUG_PRINT(
256        "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
257        gtid, loc->reserved_2, region, loc, begin, end);
258    return;
259  } else { // called for barrier reporting
260    if (loc) {
261      if ((loc->reserved_2 & 0xFFFF0000) == 0) {
262        if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
263          int frm = KMP_TEST_THEN_INC32(
264              &__kmp_barrier_domain_count); // get "old" value
265          if (frm >= KMP_MAX_FRAME_DOMAINS) {
266            KMP_TEST_THEN_DEC32(
267                &__kmp_barrier_domain_count); // revert the count
268            return; // loc->reserved_2 is still 0
269          }
270          // Save the barrier frame index to the high two bytes.
271          loc->reserved_2 |= (frm + 1) << 16; // save "new" value
272
273          // Transform compiler-generated region location into the format
274          // that the tools more or less standardized on:
275          //   "<func>$omp$frame@[file:]<line>[:<col>]"
276          kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
277          if (imbalance) {
278            char *buff_imb = NULL;
279            buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
280                                        str_loc.func, team_size, str_loc.file,
281                                        str_loc.col);
282            __itt_suppress_push(__itt_suppress_memory_errors);
283            __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb);
284            __itt_suppress_pop();
285            __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin,
286                                  end);
287            __kmp_str_free(&buff_imb);
288          } else {
289            char *buff = NULL;
290            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
291                                    str_loc.file, str_loc.col);
292            __itt_suppress_push(__itt_suppress_memory_errors);
293            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
294            __itt_suppress_pop();
295            __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin,
296                                  end);
297            __kmp_str_free(&buff);
298          }
299          __kmp_str_loc_free(&str_loc);
300        }
301      } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
302        if (imbalance) {
303          __itt_frame_submit_v3(
304              __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL,
305              begin, end);
306        } else {
307          __itt_frame_submit_v3(
308              __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL,
309              begin, end);
310        }
311      }
312      KMP_ITT_DEBUG_LOCK();
313      KMP_ITT_DEBUG_PRINT(
314          "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid,
315          loc->reserved_2, loc, begin, end);
316    }
317  }
318#endif
319} // __kmp_itt_frame_submit
320
321// -----------------------------------------------------------------------------
322LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
323                                          kmp_uint64 end, kmp_uint64 imbalance,
324                                          kmp_uint64 reduction) {
325#if USE_ITT_NOTIFY
326  if (metadata_domain == NULL) {
327    __kmp_acquire_bootstrap_lock(&metadata_lock);
328    if (metadata_domain == NULL) {
329      __itt_suppress_push(__itt_suppress_memory_errors);
330      metadata_domain = __itt_domain_create("OMP Metadata");
331      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
332      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
333      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
334      __itt_suppress_pop();
335    }
336    __kmp_release_bootstrap_lock(&metadata_lock);
337  }
338
339  kmp_uint64 imbalance_data[4];
340  imbalance_data[0] = begin;
341  imbalance_data[1] = end;
342  imbalance_data[2] = imbalance;
343  imbalance_data[3] = reduction;
344
345  __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
346                     __itt_metadata_u64, 4, imbalance_data);
347#endif
348} // __kmp_itt_metadata_imbalance
349
350// -----------------------------------------------------------------------------
351LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
352                                     kmp_uint64 iterations, kmp_uint64 chunk) {
353#if USE_ITT_NOTIFY
354  if (metadata_domain == NULL) {
355    __kmp_acquire_bootstrap_lock(&metadata_lock);
356    if (metadata_domain == NULL) {
357      __itt_suppress_push(__itt_suppress_memory_errors);
358      metadata_domain = __itt_domain_create("OMP Metadata");
359      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
360      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
361      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
362      __itt_suppress_pop();
363    }
364    __kmp_release_bootstrap_lock(&metadata_lock);
365  }
366
367  // Parse line and column from psource string: ";file;func;line;col;;"
368  char *s_line;
369  char *s_col;
370  KMP_DEBUG_ASSERT(loc->psource);
371#ifdef __cplusplus
372  s_line = strchr(CCAST(char *, loc->psource), ';');
373#else
374  s_line = strchr(loc->psource, ';');
375#endif
376  KMP_DEBUG_ASSERT(s_line);
377  s_line = strchr(s_line + 1, ';'); // 2-nd semicolon
378  KMP_DEBUG_ASSERT(s_line);
379  s_line = strchr(s_line + 1, ';'); // 3-rd semicolon
380  KMP_DEBUG_ASSERT(s_line);
381  s_col = strchr(s_line + 1, ';'); // 4-th semicolon
382  KMP_DEBUG_ASSERT(s_col);
383
384  kmp_uint64 loop_data[5];
385  loop_data[0] = atoi(s_line + 1); // read line
386  loop_data[1] = atoi(s_col + 1); // read column
387  loop_data[2] = sched_type;
388  loop_data[3] = iterations;
389  loop_data[4] = chunk;
390
391  __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
392                     __itt_metadata_u64, 5, loop_data);
393#endif
394} // __kmp_itt_metadata_loop
395
396// -----------------------------------------------------------------------------
397LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
398#if USE_ITT_NOTIFY
399  if (metadata_domain == NULL) {
400    __kmp_acquire_bootstrap_lock(&metadata_lock);
401    if (metadata_domain == NULL) {
402      __itt_suppress_push(__itt_suppress_memory_errors);
403      metadata_domain = __itt_domain_create("OMP Metadata");
404      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
405      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
406      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
407      __itt_suppress_pop();
408    }
409    __kmp_release_bootstrap_lock(&metadata_lock);
410  }
411
412  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
413  kmp_uint64 single_data[2];
414  single_data[0] = str_loc.line;
415  single_data[1] = str_loc.col;
416
417  __kmp_str_loc_free(&str_loc);
418
419  __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
420                     __itt_metadata_u64, 2, single_data);
421#endif
422} // __kmp_itt_metadata_single
423
424// -----------------------------------------------------------------------------
425LINKAGE void __kmp_itt_region_starting(int gtid) {
426#if USE_ITT_NOTIFY
427#endif
428} // __kmp_itt_region_starting
429
430// -----------------------------------------------------------------------------
431LINKAGE void __kmp_itt_region_finished(int gtid) {
432#if USE_ITT_NOTIFY
433#endif
434} // __kmp_itt_region_finished
435
436// ----------------------------------------------------------------------------
437LINKAGE void __kmp_itt_region_joined(int gtid) {
438#if USE_ITT_NOTIFY
439  kmp_team_t *team = __kmp_team_from_gtid(gtid);
440  if (team->t.t_active_level > 1) {
441    // The frame notifications are only supported for the outermost teams.
442    return;
443  }
444  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
445  if (loc && loc->reserved_2) {
446    unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
447    if (frm < KMP_MAX_FRAME_DOMAINS) {
448      KMP_ITT_DEBUG_LOCK();
449      __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
450      KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid,
451                          loc->reserved_2, loc);
452    }
453  }
454#endif
455} // __kmp_itt_region_joined
456
457/* Barriers reporting.
458
459   A barrier consists of two phases:
460   1. Gather -- master waits for arriving of all the worker threads; each
461      worker thread registers arrival and goes further.
462   2. Release -- each worker threads waits until master lets it go; master lets
463      worker threads go.
464
465   Function should be called by each thread:
466   * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
467   * __kmp_itt_barrier_middle()   -- between gather and release phases.
468   * __kmp_itt_barrier_finished() -- after release phase.
469
470   Note: Call __kmp_itt_barrier_object() before call to
471   __kmp_itt_barrier_starting() and save result in local variable.
472   __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
473   would return itt sync object for the next barrier!
474
475   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
476   does not have barrier object or barrier data structure. Barrier is just a
477   counter in team and thread structures. We could use an address of team
478   structure as an barrier sync object, but ITT wants different objects for
479   different barriers (even whithin the same team). So let us use team address
480   as barrier sync object for the first barrier, then increase it by one for the
481   next barrier, and so on (but wrap it not to use addresses outside of team
482   structure). */
483
484void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
485                               int delta // 0 (current barrier) is default
486                               // value; specify -1 to get previous
487                               // barrier.
488                               ) {
489  void *object = NULL;
490#if USE_ITT_NOTIFY
491  kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
492  kmp_team_t *team = thr->th.th_team;
493
494  // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
495  // be NULL. This "if" helps to avoid crash. However, this is not complete
496  // solution, and reporting fork/join barriers to ITT should be revisited.
497
498  if (team != NULL) {
499    // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
500    // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
501    kmp_uint64 counter =
502        team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
503    // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
504    // barriers of different types do not have the same ids.
505    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
506    // This conditon is a must (we would have zero divide otherwise).
507    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
508    // More strong condition: make sure we have room at least for for two
509    // differtent ids (for each barrier type).
510    object = reinterpret_cast<void *>(
511        kmp_uintptr_t(team) +
512        counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier +
513        bt);
514    KMP_ITT_DEBUG_LOCK();
515    KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
516                        counter, object);
517
518    if (set_name) {
519      ident_t const *loc = NULL;
520      char const *src = NULL;
521      char const *type = "OMP Barrier";
522      switch (bt) {
523      case bs_plain_barrier: {
524        // For plain barrier compiler calls __kmpc_barrier() function, which
525        // saves location in thr->th.th_ident.
526        loc = thr->th.th_ident;
527        // Get the barrier type from flags provided by compiler.
528        kmp_int32 expl = 0;
529        kmp_uint32 impl = 0;
530        if (loc != NULL) {
531          src = loc->psource;
532          expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
533          impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
534        }
535        if (impl) {
536          switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
537          case KMP_IDENT_BARRIER_IMPL_FOR: {
538            type = "OMP For Barrier";
539          } break;
540          case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
541            type = "OMP Sections Barrier";
542          } break;
543          case KMP_IDENT_BARRIER_IMPL_SINGLE: {
544            type = "OMP Single Barrier";
545          } break;
546          case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
547            type = "OMP Workshare Barrier";
548          } break;
549          default: {
550            type = "OMP Implicit Barrier";
551            KMP_DEBUG_ASSERT(0);
552          }
553          }
554        } else if (expl) {
555          type = "OMP Explicit Barrier";
556        }
557      } break;
558      case bs_forkjoin_barrier: {
559        // In case of fork/join barrier we can read thr->th.th_ident, because it
560        // contains location of last passed construct (while join barrier is not
561        // such one). Use th_ident of master thread instead -- __kmp_join_call()
562        // called by the master thread saves location.
563        //
564        // AC: cannot read from master because __kmp_join_call may be not called
565        //    yet, so we read the location from team. This is the same location.
566        //    And team is valid at the enter to join barrier where this happens.
567        loc = team->t.t_ident;
568        if (loc != NULL) {
569          src = loc->psource;
570        }
571        type = "OMP Join Barrier";
572      } break;
573      }
574      KMP_ITT_DEBUG_LOCK();
575      __itt_sync_create(object, type, src, __itt_attr_barrier);
576      KMP_ITT_DEBUG_PRINT(
577          "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
578          type, src);
579    }
580  }
581#endif
582  return object;
583} // __kmp_itt_barrier_object
584
585// -----------------------------------------------------------------------------
586void __kmp_itt_barrier_starting(int gtid, void *object) {
587#if USE_ITT_NOTIFY
588  if (!KMP_MASTER_GTID(gtid)) {
589    KMP_ITT_DEBUG_LOCK();
590    __itt_sync_releasing(object);
591    KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
592  }
593  KMP_ITT_DEBUG_LOCK();
594  __itt_sync_prepare(object);
595  KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
596#endif
597} // __kmp_itt_barrier_starting
598
599// -----------------------------------------------------------------------------
600void __kmp_itt_barrier_middle(int gtid, void *object) {
601#if USE_ITT_NOTIFY
602  if (KMP_MASTER_GTID(gtid)) {
603    KMP_ITT_DEBUG_LOCK();
604    __itt_sync_acquired(object);
605    KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
606    KMP_ITT_DEBUG_LOCK();
607    __itt_sync_releasing(object);
608    KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
609  } else {
610  }
611#endif
612} // __kmp_itt_barrier_middle
613
614// -----------------------------------------------------------------------------
615void __kmp_itt_barrier_finished(int gtid, void *object) {
616#if USE_ITT_NOTIFY
617  if (KMP_MASTER_GTID(gtid)) {
618  } else {
619    KMP_ITT_DEBUG_LOCK();
620    __itt_sync_acquired(object);
621    KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
622  }
623#endif
624} // __kmp_itt_barrier_finished
625
626/* Taskwait reporting.
627   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
628   does not have taskwait structure, so we need to construct something. */
629
630void *__kmp_itt_taskwait_object(int gtid) {
631  void *object = NULL;
632#if USE_ITT_NOTIFY
633  if (__itt_sync_create_ptr) {
634    kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
635    kmp_taskdata_t *taskdata = thread->th.th_current_task;
636    object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
637                                      taskdata->td_taskwait_counter %
638                                          sizeof(kmp_taskdata_t));
639  }
640#endif
641  return object;
642} // __kmp_itt_taskwait_object
643
644void __kmp_itt_taskwait_starting(int gtid, void *object) {
645#if USE_ITT_NOTIFY
646  kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
647  kmp_taskdata_t *taskdata = thread->th.th_current_task;
648  ident_t const *loc = taskdata->td_taskwait_ident;
649  char const *src = (loc == NULL ? NULL : loc->psource);
650  KMP_ITT_DEBUG_LOCK();
651  __itt_sync_create(object, "OMP Taskwait", src, 0);
652  KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
653                      object, src);
654  KMP_ITT_DEBUG_LOCK();
655  __itt_sync_prepare(object);
656  KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
657#endif
658} // __kmp_itt_taskwait_starting
659
660void __kmp_itt_taskwait_finished(int gtid, void *object) {
661#if USE_ITT_NOTIFY
662  KMP_ITT_DEBUG_LOCK();
663  __itt_sync_acquired(object);
664  KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
665  KMP_ITT_DEBUG_LOCK();
666  __itt_sync_destroy(object);
667  KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
668#endif
669} // __kmp_itt_taskwait_finished
670
671/* Task reporting.
672   Only those tasks are reported which are executed by a thread spinning at
673   barrier (or taskwait). Synch object passed to the function must be barrier of
674   taskwait the threads waiting at. */
675
676void __kmp_itt_task_starting(
677    void *object // ITT sync object: barrier or taskwait.
678    ) {
679#if USE_ITT_NOTIFY
680  if (object != NULL) {
681    KMP_ITT_DEBUG_LOCK();
682    __itt_sync_cancel(object);
683    KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
684  }
685#endif
686} // __kmp_itt_task_starting
687
688// -----------------------------------------------------------------------------
689void __kmp_itt_task_finished(
690    void *object // ITT sync object: barrier or taskwait.
691    ) {
692#if USE_ITT_NOTIFY
693  KMP_ITT_DEBUG_LOCK();
694  __itt_sync_prepare(object);
695  KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
696#endif
697} // __kmp_itt_task_finished
698
699/* Lock reporting.
700 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
701   operation (set/unset). It is not a real event shown to the user but just
702   setting a name for synchronization object. `lock' is an address of sync
703   object, the same address should be used in all subsequent calls.
704 * __kmp_itt_lock_acquiring() should be called before setting the lock.
705 * __kmp_itt_lock_acquired() should be called after setting the lock.
706 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
707 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
708   for the lock.
709 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
710   operation. After __kmp_itt_lock_destroyed() all the references to the same
711   address will be considered as another sync object, not related with the
712   original one.  */
713
714#if KMP_USE_DYNAMIC_LOCK
715// Takes location information directly
716__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
717                                       const ident_t *loc) {
718#if USE_ITT_NOTIFY
719  if (__itt_sync_create_ptr) {
720    char const *src = (loc == NULL ? NULL : loc->psource);
721    KMP_ITT_DEBUG_LOCK();
722    __itt_sync_create(lock, type, src, 0);
723    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
724                        src);
725  }
726#endif
727}
728#else // KMP_USE_DYNAMIC_LOCK
729// Internal guts -- common code for locks and critical sections, do not call
730// directly.
731__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
732#if USE_ITT_NOTIFY
733  if (__itt_sync_create_ptr) {
734    ident_t const *loc = NULL;
735    if (__kmp_get_user_lock_location_ != NULL)
736      loc = __kmp_get_user_lock_location_((lock));
737    char const *src = (loc == NULL ? NULL : loc->psource);
738    KMP_ITT_DEBUG_LOCK();
739    __itt_sync_create(lock, type, src, 0);
740    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
741                        src);
742  }
743#endif
744} // ___kmp_itt_lock_init
745#endif // KMP_USE_DYNAMIC_LOCK
746
747// Internal guts -- common code for locks and critical sections, do not call
748// directly.
749__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
750#if USE_ITT_NOTIFY
751  KMP_ITT_DEBUG_LOCK();
752  __itt_sync_destroy(lock);
753  KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
754#endif
755} // ___kmp_itt_lock_fini
756
757// -----------------------------------------------------------------------------
758#if KMP_USE_DYNAMIC_LOCK
759void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
760  ___kmp_itt_lock_init(lock, "OMP Lock", loc);
761}
762#else
763void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
764  ___kmp_itt_lock_init(lock, "OMP Lock");
765} // __kmp_itt_lock_creating
766#endif
767
768void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
769#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
770  // postpone lock object access
771  if (__itt_sync_prepare_ptr) {
772    if (KMP_EXTRACT_D_TAG(lock) == 0) {
773      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
774      __itt_sync_prepare(ilk->lock);
775    } else {
776      __itt_sync_prepare(lock);
777    }
778  }
779#else
780  __itt_sync_prepare(lock);
781#endif
782} // __kmp_itt_lock_acquiring
783
784void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
785#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
786  // postpone lock object access
787  if (__itt_sync_acquired_ptr) {
788    if (KMP_EXTRACT_D_TAG(lock) == 0) {
789      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
790      __itt_sync_acquired(ilk->lock);
791    } else {
792      __itt_sync_acquired(lock);
793    }
794  }
795#else
796  __itt_sync_acquired(lock);
797#endif
798} // __kmp_itt_lock_acquired
799
800void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
801#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
802  if (__itt_sync_releasing_ptr) {
803    if (KMP_EXTRACT_D_TAG(lock) == 0) {
804      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
805      __itt_sync_releasing(ilk->lock);
806    } else {
807      __itt_sync_releasing(lock);
808    }
809  }
810#else
811  __itt_sync_releasing(lock);
812#endif
813} // __kmp_itt_lock_releasing
814
815void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
816#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
817  if (__itt_sync_cancel_ptr) {
818    if (KMP_EXTRACT_D_TAG(lock) == 0) {
819      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
820      __itt_sync_cancel(ilk->lock);
821    } else {
822      __itt_sync_cancel(lock);
823    }
824  }
825#else
826  __itt_sync_cancel(lock);
827#endif
828} // __kmp_itt_lock_cancelled
829
830void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
831  ___kmp_itt_lock_fini(lock, "OMP Lock");
832} // __kmp_itt_lock_destroyed
833
834/* Critical reporting.
835   Critical sections are treated exactly as locks (but have different object
836   type). */
837#if KMP_USE_DYNAMIC_LOCK
838void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
839  ___kmp_itt_lock_init(lock, "OMP Critical", loc);
840}
841#else
842void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
843  ___kmp_itt_lock_init(lock, "OMP Critical");
844} // __kmp_itt_critical_creating
845#endif
846
847void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
848  __itt_sync_prepare(lock);
849} // __kmp_itt_critical_acquiring
850
851void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
852  __itt_sync_acquired(lock);
853} // __kmp_itt_critical_acquired
854
855void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
856  __itt_sync_releasing(lock);
857} // __kmp_itt_critical_releasing
858
859void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
860  ___kmp_itt_lock_fini(lock, "OMP Critical");
861} // __kmp_itt_critical_destroyed
862
863/* Single reporting. */
864
865void __kmp_itt_single_start(int gtid) {
866#if USE_ITT_NOTIFY
867  if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
868    kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
869    ident_t *loc = thr->th.th_ident;
870    char const *src = (loc == NULL ? NULL : loc->psource);
871    kmp_str_buf_t name;
872    __kmp_str_buf_init(&name);
873    __kmp_str_buf_print(&name, "OMP Single-%s", src);
874    KMP_ITT_DEBUG_LOCK();
875    thr->th.th_itt_mark_single = __itt_mark_create(name.str);
876    KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
877                        thr->th.th_itt_mark_single);
878    __kmp_str_buf_free(&name);
879    KMP_ITT_DEBUG_LOCK();
880    __itt_mark(thr->th.th_itt_mark_single, NULL);
881    KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
882                        thr->th.th_itt_mark_single);
883  }
884#endif
885} // __kmp_itt_single_start
886
887void __kmp_itt_single_end(int gtid) {
888#if USE_ITT_NOTIFY
889  __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
890  KMP_ITT_DEBUG_LOCK();
891  __itt_mark_off(mark);
892  KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
893#endif
894} // __kmp_itt_single_end
895
896/* Ordered reporting.
897 * __kmp_itt_ordered_init is called by each thread *before* first using sync
898   object. ITT team would like it to be called once, but it requires extra
899   synchronization.
900 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
901   section (before synchronization).
902 * __kmp_itt_ordered_start is called just before entering user code (after
903   synchronization).
904 * __kmp_itt_ordered_end is called after returning from user code.
905
906 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
907 Events are not generated in case of serialized team. */
908
909void __kmp_itt_ordered_init(int gtid) {
910#if USE_ITT_NOTIFY
911  if (__itt_sync_create_ptr) {
912    kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
913    ident_t const *loc = thr->th.th_ident;
914    char const *src = (loc == NULL ? NULL : loc->psource);
915    __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
916                      "OMP Ordered", src, 0);
917  }
918#endif
919} // __kmp_itt_ordered_init
920
921void __kmp_itt_ordered_prep(int gtid) {
922#if USE_ITT_NOTIFY
923  if (__itt_sync_create_ptr) {
924    kmp_team_t *t = __kmp_team_from_gtid(gtid);
925    if (!t->t.t_serialized) {
926      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
927      __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
928    }
929  }
930#endif
931} // __kmp_itt_ordered_prep
932
933void __kmp_itt_ordered_start(int gtid) {
934#if USE_ITT_NOTIFY
935  if (__itt_sync_create_ptr) {
936    kmp_team_t *t = __kmp_team_from_gtid(gtid);
937    if (!t->t.t_serialized) {
938      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
939      __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
940    }
941  }
942#endif
943} // __kmp_itt_ordered_start
944
945void __kmp_itt_ordered_end(int gtid) {
946#if USE_ITT_NOTIFY
947  if (__itt_sync_create_ptr) {
948    kmp_team_t *t = __kmp_team_from_gtid(gtid);
949    if (!t->t.t_serialized) {
950      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
951      __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
952    }
953  }
954#endif
955} // __kmp_itt_ordered_end
956
957/* Threads reporting. */
958
959void __kmp_itt_thread_ignore() {
960  __itt_thr_ignore();
961} // __kmp_itt_thread_ignore
962
963void __kmp_itt_thread_name(int gtid) {
964#if USE_ITT_NOTIFY
965  if (__itt_thr_name_set_ptr) {
966    kmp_str_buf_t name;
967    __kmp_str_buf_init(&name);
968    if (KMP_MASTER_GTID(gtid)) {
969      __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid);
970    } else {
971      __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
972    }
973    KMP_ITT_DEBUG_LOCK();
974    __itt_thr_name_set(name.str, name.used);
975    KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
976    __kmp_str_buf_free(&name);
977  }
978#endif
979} // __kmp_itt_thread_name
980
981/* System object reporting.
982   ITT catches operations with system sync objects (like Windows* OS on IA-32
983   architecture API critical sections and events). We only need to specify
984   name ("OMP Scheduler") for the object to let ITT know it is an object used
985   by OpenMP RTL for internal purposes. */
986
987void __kmp_itt_system_object_created(void *object, char const *name) {
988#if USE_ITT_NOTIFY
989  KMP_ITT_DEBUG_LOCK();
990  __itt_sync_create(object, "OMP Scheduler", name, 0);
991  KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
992                      object, name);
993#endif
994} // __kmp_itt_system_object_created
995
996/* Stack stitching api.
997   Master calls "create" and put the stitching id into team structure.
998   Workers read the stitching id and call "enter" / "leave" api.
999   Master calls "destroy" at the end of the parallel region. */
1000
1001__itt_caller __kmp_itt_stack_caller_create() {
1002#if USE_ITT_NOTIFY
1003  if (!__itt_stack_caller_create_ptr)
1004    return NULL;
1005  KMP_ITT_DEBUG_LOCK();
1006  __itt_caller id = __itt_stack_caller_create();
1007  KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
1008  return id;
1009#endif
1010  return NULL;
1011}
1012
1013void __kmp_itt_stack_caller_destroy(__itt_caller id) {
1014#if USE_ITT_NOTIFY
1015  if (__itt_stack_caller_destroy_ptr) {
1016    KMP_ITT_DEBUG_LOCK();
1017    __itt_stack_caller_destroy(id);
1018    KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
1019  }
1020#endif
1021}
1022
1023void __kmp_itt_stack_callee_enter(__itt_caller id) {
1024#if USE_ITT_NOTIFY
1025  if (__itt_stack_callee_enter_ptr) {
1026    KMP_ITT_DEBUG_LOCK();
1027    __itt_stack_callee_enter(id);
1028    KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
1029  }
1030#endif
1031}
1032
1033void __kmp_itt_stack_callee_leave(__itt_caller id) {
1034#if USE_ITT_NOTIFY
1035  if (__itt_stack_callee_leave_ptr) {
1036    KMP_ITT_DEBUG_LOCK();
1037    __itt_stack_callee_leave(id);
1038    KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
1039  }
1040#endif
1041}
1042
1043#endif /* USE_ITT_BUILD */
1044