1 #if USE_ITT_BUILD
2 /*
3  * kmp_itt.h -- ITT Notify interface.
4  */
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9 // See https://llvm.org/LICENSE.txt for license information.
10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_ITT_H
15 #define KMP_ITT_H
16 
17 #include "kmp_lock.h"
18 
19 #define INTEL_ITTNOTIFY_API_PRIVATE
20 #include "ittnotify.h"
21 #include "legacy/ittnotify.h"
22 
23 #if KMP_DEBUG
24 #define __kmp_inline // Turn off inlining in debug mode.
25 #else
26 #define __kmp_inline static inline
27 #endif
28 
29 #if USE_ITT_NOTIFY
30 extern kmp_int32 __kmp_itt_prepare_delay;
31 #ifdef __cplusplus
32 extern "C" void __kmp_itt_fini_ittlib(void);
33 #else
34 extern void __kmp_itt_fini_ittlib(void);
35 #endif
36 #endif
37 
38 // Simplify the handling of an argument that is only required when USE_ITT_BUILD
39 // is enabled.
40 #define USE_ITT_BUILD_ARG(x) , x
41 
42 void __kmp_itt_initialize();
43 void __kmp_itt_destroy();
44 void __kmp_itt_reset();
45 
46 // -----------------------------------------------------------------------------
47 // New stuff for reporting high-level constructs.
48 
49 // Note the naming convention:
50 //     __kmp_itt_xxxing() function should be called before action, while
51 //     __kmp_itt_xxxed()  function should be called after action.
52 
53 // --- Parallel region reporting ---
54 __kmp_inline void
55 __kmp_itt_region_forking(int gtid, int team_size,
56                          int barriers); // Primary only, before forking threads.
57 __kmp_inline void
58 __kmp_itt_region_joined(int gtid); // Primary only, after joining threads.
59 // (*) Note: A thread may execute tasks after this point, though.
60 
61 // --- Frame reporting ---
62 // region=0: no regions, region=1: parallel, region=2: serialized parallel
63 __kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
64                                          __itt_timestamp end, int imbalance,
65                                          ident_t *loc, int team_size,
66                                          int region = 0);
67 
68 // --- Metadata reporting ---
69 // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated
70 // wait time value, reduction -if this is a reduction barrier
71 __kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
72                                                kmp_uint64 end,
73                                                kmp_uint64 imbalance,
74                                                kmp_uint64 reduction);
75 // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others);
76 // iterations - loop trip count, chunk - chunk size
77 __kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
78                                           kmp_uint64 iterations,
79                                           kmp_uint64 chunk);
80 __kmp_inline void __kmp_itt_metadata_single(ident_t *loc);
81 
82 // --- Barrier reporting ---
83 __kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0,
84                                             int delta = 0);
85 __kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object);
86 __kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object);
87 __kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object);
88 
89 // --- Taskwait reporting ---
90 __kmp_inline void *__kmp_itt_taskwait_object(int gtid);
91 __kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object);
92 __kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object);
93 #define KMP_ITT_TASKWAIT_STARTING(obj)                                         \
94   if (UNLIKELY(__itt_sync_create_ptr)) {                                       \
95     obj = __kmp_itt_taskwait_object(gtid);                                     \
96     if (obj != NULL) {                                                         \
97       __kmp_itt_taskwait_starting(gtid, obj);                                  \
98     }                                                                          \
99   }
100 #define KMP_ITT_TASKWAIT_FINISHED(obj)                                         \
101   if (UNLIKELY(obj != NULL))                                                   \
102     __kmp_itt_taskwait_finished(gtid, obj);
103 
104 // --- Task reporting ---
105 __kmp_inline void __kmp_itt_task_starting(void *object);
106 __kmp_inline void __kmp_itt_task_finished(void *object);
107 
108 // --- Lock reporting ---
109 #if KMP_USE_DYNAMIC_LOCK
110 __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock,
111                                           const ident_t *);
112 #else
113 __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock);
114 #endif
115 __kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock);
116 __kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock);
117 __kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock);
118 __kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock);
119 __kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock);
120 
121 // --- Critical reporting ---
122 #if KMP_USE_DYNAMIC_LOCK
123 __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock,
124                                               const ident_t *);
125 #else
126 __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock);
127 #endif
128 __kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock);
129 __kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock);
130 __kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock);
131 __kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock);
132 
133 // --- Single reporting ---
134 __kmp_inline void __kmp_itt_single_start(int gtid);
135 __kmp_inline void __kmp_itt_single_end(int gtid);
136 
137 // --- Ordered reporting ---
138 __kmp_inline void __kmp_itt_ordered_init(int gtid);
139 __kmp_inline void __kmp_itt_ordered_prep(int gtid);
140 __kmp_inline void __kmp_itt_ordered_start(int gtid);
141 __kmp_inline void __kmp_itt_ordered_end(int gtid);
142 
143 // --- Threads reporting ---
144 __kmp_inline void __kmp_itt_thread_ignore();
145 __kmp_inline void __kmp_itt_thread_name(int gtid);
146 
147 // --- System objects ---
148 __kmp_inline void __kmp_itt_system_object_created(void *object,
149                                                   char const *name);
150 
151 // --- Stack stitching ---
152 __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
153 __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
154 __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
155 __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
156 
157 // -----------------------------------------------------------------------------
158 // Old stuff for reporting low-level internal synchronization.
159 
160 #if USE_ITT_NOTIFY
161 
162 /* Support for SSC marks, which are used by SDE
163    http://software.intel.com/en-us/articles/intel-software-development-emulator
164    to mark points in instruction traces that represent spin-loops and are
165    therefore uninteresting when collecting traces for architecture simulation.
166  */
167 #ifndef INCLUDE_SSC_MARKS
168 #define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
169 #endif
170 
171 /* Linux 64 only for now */
172 #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
173 // Portable (at least for gcc and icc) code to insert the necessary instructions
174 // to set %ebx and execute the unlikely no-op.
175 #if defined(__INTEL_COMPILER)
176 #define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
177 #else
178 #define INSERT_SSC_MARK(tag)                                                   \
179   __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag)    \
180                        : "%ebx")
181 #endif
182 #else
183 #define INSERT_SSC_MARK(tag) ((void)0)
184 #endif
185 
186 /* Markers for the start and end of regions that represent polling and are
187    therefore uninteresting to architectural simulations 0x4376 and 0x4377 are
188    arbitrary numbers that should be unique in the space of SSC tags, but there
189    is no central issuing authority rather randomness is expected to work. */
190 #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
191 #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
192 
193 // Markers for architecture simulation.
194 // FORKING      : Before the primary thread forks.
195 // JOINING      : At the start of the join.
196 // INVOKING     : Before the threads invoke microtasks.
197 // DISPATCH_INIT: At the start of dynamically scheduled loop.
198 // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
199 #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
200 #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
201 #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
202 #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
203 #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
204 
205 // The object is an address that associates a specific set of the prepare,
206 // acquire, release, and cancel operations.
207 
208 /* Sync prepare indicates a thread is going to start waiting for another thread
209    to send a release event.  This operation should be done just before the
210    thread begins checking for the existence of the release event */
211 
212 /* Sync cancel indicates a thread is cancelling a wait on another thread and
213    continuing execution without waiting for the other thread to release it */
214 
215 /* Sync acquired indicates a thread has received a release event from another
216    thread and has stopped waiting.  This operation must occur only after the
217    release event is received. */
218 
219 /* Sync release indicates a thread is going to send a release event to another
220    thread so it will stop waiting and continue execution. This operation must
221    just happen before the release event. */
222 
223 #define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj))
224 #define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj))
225 #define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj))
226 #define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj))
227 
228 /* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called
229    with a delay (and not called at all if waiting time is small). So, in spin
230    loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
231    spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
232    KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */
233 
234 #undef KMP_FSYNC_SPIN_INIT
235 #define KMP_FSYNC_SPIN_INIT(obj, spin)                                         \
236   int sync_iters = 0;                                                          \
237   if (__itt_fsync_prepare_ptr) {                                               \
238     if (obj == NULL) {                                                         \
239       obj = spin;                                                              \
240     } /* if */                                                                 \
241   } /* if */                                                                   \
242   SSC_MARK_SPIN_START()
243 
244 #undef KMP_FSYNC_SPIN_PREPARE
245 #define KMP_FSYNC_SPIN_PREPARE(obj)                                            \
246   do {                                                                         \
247     if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) {     \
248       ++sync_iters;                                                            \
249       if (sync_iters >= __kmp_itt_prepare_delay) {                             \
250         KMP_FSYNC_PREPARE((void *)obj);                                        \
251       } /* if */                                                               \
252     } /* if */                                                                 \
253   } while (0)
254 #undef KMP_FSYNC_SPIN_ACQUIRED
255 #define KMP_FSYNC_SPIN_ACQUIRED(obj)                                           \
256   do {                                                                         \
257     SSC_MARK_SPIN_END();                                                       \
258     if (sync_iters >= __kmp_itt_prepare_delay) {                               \
259       KMP_FSYNC_ACQUIRED((void *)obj);                                         \
260     } /* if */                                                                 \
261   } while (0)
262 
263 /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
264        KMP_ITT_IGNORE(
265            ptr = malloc( size );
266        );
267 */
268 #define KMP_ITT_IGNORE(statement)                                              \
269   do {                                                                         \
270     __itt_state_t __itt_state_;                                                \
271     if (__itt_state_get_ptr) {                                                 \
272       __itt_state_ = __itt_state_get();                                        \
273       __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set);          \
274     } /* if */                                                                 \
275     { statement }                                                              \
276     if (__itt_state_get_ptr) {                                                 \
277       __itt_state_set(__itt_state_);                                           \
278     } /* if */                                                                 \
279   } while (0)
280 
281 // Maximum number of frame domains to use (maps to
282 // different OpenMP regions in the user source code).
283 const int KMP_MAX_FRAME_DOMAINS = 997;
284 typedef struct kmp_itthash_entry {
285   ident_t *loc;
286   int team_size;
287   __itt_domain *d;
288   struct kmp_itthash_entry *next_in_bucket;
289 } kmp_itthash_entry_t;
290 typedef struct kmp_itthash {
291   kmp_itthash_entry_t *buckets[KMP_MAX_FRAME_DOMAINS];
292   int count; // just a heuristic to limit number of entries
293 } kmp_itthash_t;
294 extern kmp_itthash_t __kmp_itt_region_domains;
295 extern kmp_itthash_t __kmp_itt_barrier_domains;
296 extern __itt_domain *metadata_domain;
297 extern __itt_string_handle *string_handle_imbl;
298 extern __itt_string_handle *string_handle_loop;
299 extern __itt_string_handle *string_handle_sngl;
300 
301 #else
302 
303 // Null definitions of the synchronization tracing functions.
304 #define KMP_FSYNC_PREPARE(obj) ((void)0)
305 #define KMP_FSYNC_CANCEL(obj) ((void)0)
306 #define KMP_FSYNC_ACQUIRED(obj) ((void)0)
307 #define KMP_FSYNC_RELEASING(obj) ((void)0)
308 
309 #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
310 #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
311 #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
312 
313 #define KMP_ITT_IGNORE(stmt)                                                   \
314   do {                                                                         \
315     stmt                                                                       \
316   } while (0)
317 
318 #endif // USE_ITT_NOTIFY
319 
320 #if !KMP_DEBUG
321 // In release mode include definitions of inline functions.
322 #include "kmp_itt.inl"
323 #endif
324 
325 #endif // KMP_ITT_H
326 
327 #else /* USE_ITT_BUILD */
328 
329 // Null definitions of the synchronization tracing functions.
330 // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
331 // By defining these we avoid unpleasant ifdef tests in many places.
332 #define KMP_FSYNC_PREPARE(obj) ((void)0)
333 #define KMP_FSYNC_CANCEL(obj) ((void)0)
334 #define KMP_FSYNC_ACQUIRED(obj) ((void)0)
335 #define KMP_FSYNC_RELEASING(obj) ((void)0)
336 
337 #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
338 #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
339 #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
340 
341 #define KMP_ITT_IGNORE(stmt)                                                   \
342   do {                                                                         \
343     stmt                                                                       \
344   } while (0)
345 
346 #define USE_ITT_BUILD_ARG(x)
347 
348 #endif /* USE_ITT_BUILD */
349