1#if USE_ITT_BUILD 2/* 3 * kmp_itt.inl -- Inline functions of ITT Notify. 4 */ 5 6//===----------------------------------------------------------------------===// 7// 8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9// See https://llvm.org/LICENSE.txt for license information. 10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11// 12//===----------------------------------------------------------------------===// 13 14// Inline function definitions. This file should be included into kmp_itt.h file 15// for production build (to let compiler inline functions) or into kmp_itt.c 16// file for debug build (to reduce the number of files to recompile and save 17// build time). 18 19#include "kmp.h" 20#include "kmp_str.h" 21 22#if KMP_ITT_DEBUG 23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; 24#define KMP_ITT_DEBUG_LOCK() \ 25 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); } 26#define KMP_ITT_DEBUG_PRINT(...) \ 27 { \ 28 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \ 29 fprintf(stderr, __VA_ARGS__); \ 30 fflush(stderr); \ 31 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \ 32 } 33#else 34#define KMP_ITT_DEBUG_LOCK() 35#define KMP_ITT_DEBUG_PRINT(...) 36#endif // KMP_ITT_DEBUG 37 38// Ensure that the functions are static if they're supposed to be being inlined. 39// Otherwise they cannot be used in more than one file, since there will be 40// multiple definitions. 41#if KMP_DEBUG 42#define LINKAGE 43#else 44#define LINKAGE static inline 45#endif 46 47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses 48// this API to support user-defined synchronization primitives, but does not use 49// ZCA; it would be safe to turn this off until wider support becomes available. 50#if USE_ITT_ZCA 51#ifdef __INTEL_COMPILER 52#if __INTEL_COMPILER >= 1200 53#undef __itt_sync_acquired 54#undef __itt_sync_releasing 55#define __itt_sync_acquired(addr) \ 56 __notify_zc_intrinsic((char *)"sync_acquired", addr) 57#define __itt_sync_releasing(addr) \ 58 __notify_intrinsic((char *)"sync_releasing", addr) 59#endif 60#endif 61#endif 62 63static kmp_bootstrap_lock_t metadata_lock = 64 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock); 65 66/* Parallel region reporting. 67 * __kmp_itt_region_forking should be called by primary thread of a team. 68 Exact moment of call does not matter, but it should be completed before any 69 thread of this team calls __kmp_itt_region_starting. 70 * __kmp_itt_region_starting should be called by each thread of a team just 71 before entering parallel region body. 72 * __kmp_itt_region_finished should be called by each thread of a team right 73 after returning from parallel region body. 74 * __kmp_itt_region_joined should be called by primary thread of a team, after 75 all threads called __kmp_itt_region_finished. 76 77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can 78 execute some more user code -- such a thread can execute tasks. 79 80 Note: The overhead of logging region_starting and region_finished in each 81 thread is too large, so these calls are not used. */ 82 83LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) { 84#if USE_ITT_NOTIFY 85 kmp_team_t *team = __kmp_team_from_gtid(gtid); 86 if (team->t.t_active_level > 1) { 87 // The frame notifications are only supported for the outermost teams. 88 return; 89 } 90 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 91 if (loc) { 92 // Use the reserved_2 field to store the index to the region domain. 93 // Assume that reserved_2 contains zero initially. Since zero is special 94 // value here, store the index into domain array increased by 1. 95 if (loc->reserved_2 == 0) { 96 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 97 int frm = 98 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 99 if (frm >= KMP_MAX_FRAME_DOMAINS) { 100 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 101 return; // loc->reserved_2 is still 0 102 } 103 // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { 104 // frm = loc->reserved_2 - 1; // get value saved by other thread 105 // for same loc 106 //} // AC: this block is to replace next unsynchronized line 107 108 // We need to save indexes for both region and barrier frames. We'll use 109 // loc->reserved_2 field but put region index to the low two bytes and 110 // barrier indexes to the high two bytes. It is OK because 111 // KMP_MAX_FRAME_DOMAINS = 512. 112 loc->reserved_2 |= (frm + 1); // save "new" value 113 114 // Transform compiler-generated region location into the format 115 // that the tools more or less standardized on: 116 // "<func>$omp$parallel@[file:]<line>[:<col>]" 117 char *buff = NULL; 118 kmp_str_loc_t str_loc = 119 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 120 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 121 team_size, str_loc.file, str_loc.line, 122 str_loc.col); 123 124 __itt_suppress_push(__itt_suppress_memory_errors); 125 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 126 __itt_suppress_pop(); 127 128 __kmp_str_free(&buff); 129 if (barriers) { 130 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 131 int frm = KMP_TEST_THEN_INC32( 132 &__kmp_barrier_domain_count); // get "old" value 133 if (frm >= KMP_MAX_FRAME_DOMAINS) { 134 KMP_TEST_THEN_DEC32( 135 &__kmp_barrier_domain_count); // revert the count 136 return; // loc->reserved_2 is still 0 137 } 138 char *buff = NULL; 139 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 140 str_loc.file, str_loc.col); 141 __itt_suppress_push(__itt_suppress_memory_errors); 142 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 143 __itt_suppress_pop(); 144 __kmp_str_free(&buff); 145 // Save the barrier frame index to the high two bytes. 146 loc->reserved_2 |= (frm + 1) << 16; 147 } 148 } 149 __kmp_str_loc_free(&str_loc); 150 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 151 } 152 } else { // Region domain exists for this location 153 // Check if team size was changed. Then create new region domain for this 154 // location 155 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 156 if ((frm < KMP_MAX_FRAME_DOMAINS) && 157 (__kmp_itt_region_team_size[frm] != team_size)) { 158 char *buff = NULL; 159 kmp_str_loc_t str_loc = 160 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 161 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 162 team_size, str_loc.file, str_loc.line, 163 str_loc.col); 164 165 __itt_suppress_push(__itt_suppress_memory_errors); 166 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 167 __itt_suppress_pop(); 168 169 __kmp_str_free(&buff); 170 __kmp_str_loc_free(&str_loc); 171 __kmp_itt_region_team_size[frm] = team_size; 172 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 173 } else { // Team size was not changed. Use existing domain. 174 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 175 } 176 } 177 KMP_ITT_DEBUG_LOCK(); 178 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid, 179 loc->reserved_2, loc); 180 } 181#endif 182} // __kmp_itt_region_forking 183 184// ----------------------------------------------------------------------------- 185LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, 186 __itt_timestamp end, int imbalance, 187 ident_t *loc, int team_size, int region) { 188#if USE_ITT_NOTIFY 189 if (region) { 190 kmp_team_t *team = __kmp_team_from_gtid(gtid); 191 int serialized = (region == 2 ? 1 : 0); 192 if (team->t.t_active_level + serialized > 1) { 193 // The frame notifications are only supported for the outermost teams. 194 return; 195 } 196 // Check region domain has not been created before. It's index is saved in 197 // the low two bytes. 198 if ((loc->reserved_2 & 0x0000FFFF) == 0) { 199 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 200 int frm = 201 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 202 if (frm >= KMP_MAX_FRAME_DOMAINS) { 203 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 204 return; // loc->reserved_2 is still 0 205 } 206 207 // We need to save indexes for both region and barrier frames. We'll use 208 // loc->reserved_2 field but put region index to the low two bytes and 209 // barrier indexes to the high two bytes. It is OK because 210 // KMP_MAX_FRAME_DOMAINS = 512. 211 loc->reserved_2 |= (frm + 1); // save "new" value 212 213 // Transform compiler-generated region location into the format 214 // that the tools more or less standardized on: 215 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" 216 char *buff = NULL; 217 kmp_str_loc_t str_loc = 218 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 219 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 220 team_size, str_loc.file, str_loc.line, 221 str_loc.col); 222 223 __itt_suppress_push(__itt_suppress_memory_errors); 224 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 225 __itt_suppress_pop(); 226 227 __kmp_str_free(&buff); 228 __kmp_str_loc_free(&str_loc); 229 __kmp_itt_region_team_size[frm] = team_size; 230 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 231 } 232 } else { // Region domain exists for this location 233 // Check if team size was changed. Then create new region domain for this 234 // location 235 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 236 if (frm >= KMP_MAX_FRAME_DOMAINS) 237 return; // something's gone wrong, returning 238 if (__kmp_itt_region_team_size[frm] != team_size) { 239 char *buff = NULL; 240 kmp_str_loc_t str_loc = 241 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 242 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 243 team_size, str_loc.file, str_loc.line, 244 str_loc.col); 245 246 __itt_suppress_push(__itt_suppress_memory_errors); 247 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 248 __itt_suppress_pop(); 249 250 __kmp_str_free(&buff); 251 __kmp_str_loc_free(&str_loc); 252 __kmp_itt_region_team_size[frm] = team_size; 253 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 254 } else { // Team size was not changed. Use existing domain. 255 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 256 } 257 } 258 KMP_ITT_DEBUG_LOCK(); 259 KMP_ITT_DEBUG_PRINT( 260 "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", 261 gtid, loc->reserved_2, region, loc, begin, end); 262 return; 263 } else { // called for barrier reporting 264 if (loc) { 265 if ((loc->reserved_2 & 0xFFFF0000) == 0) { 266 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 267 int frm = KMP_TEST_THEN_INC32( 268 &__kmp_barrier_domain_count); // get "old" value 269 if (frm >= KMP_MAX_FRAME_DOMAINS) { 270 KMP_TEST_THEN_DEC32( 271 &__kmp_barrier_domain_count); // revert the count 272 return; // loc->reserved_2 is still 0 273 } 274 // Save the barrier frame index to the high two bytes. 275 loc->reserved_2 |= (frm + 1) << 16; // save "new" value 276 277 // Transform compiler-generated region location into the format 278 // that the tools more or less standardized on: 279 // "<func>$omp$frame@[file:]<line>[:<col>]" 280 kmp_str_loc_t str_loc = 281 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 282 if (imbalance) { 283 char *buff_imb = NULL; 284 buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", 285 str_loc.func, team_size, str_loc.file, 286 str_loc.col); 287 __itt_suppress_push(__itt_suppress_memory_errors); 288 __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb); 289 __itt_suppress_pop(); 290 __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin, 291 end); 292 __kmp_str_free(&buff_imb); 293 } else { 294 char *buff = NULL; 295 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 296 str_loc.file, str_loc.col); 297 __itt_suppress_push(__itt_suppress_memory_errors); 298 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 299 __itt_suppress_pop(); 300 __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin, 301 end); 302 __kmp_str_free(&buff); 303 } 304 __kmp_str_loc_free(&str_loc); 305 } 306 } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS 307 if (imbalance) { 308 __itt_frame_submit_v3( 309 __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL, 310 begin, end); 311 } else { 312 __itt_frame_submit_v3( 313 __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, 314 begin, end); 315 } 316 } 317 KMP_ITT_DEBUG_LOCK(); 318 KMP_ITT_DEBUG_PRINT( 319 "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid, 320 loc->reserved_2, loc, begin, end); 321 } 322 } 323#endif 324} // __kmp_itt_frame_submit 325 326// ----------------------------------------------------------------------------- 327LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, 328 kmp_uint64 end, kmp_uint64 imbalance, 329 kmp_uint64 reduction) { 330#if USE_ITT_NOTIFY 331 if (metadata_domain == NULL) { 332 __kmp_acquire_bootstrap_lock(&metadata_lock); 333 if (metadata_domain == NULL) { 334 __itt_suppress_push(__itt_suppress_memory_errors); 335 metadata_domain = __itt_domain_create("OMP Metadata"); 336 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 337 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 338 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 339 __itt_suppress_pop(); 340 } 341 __kmp_release_bootstrap_lock(&metadata_lock); 342 } 343 344 kmp_uint64 imbalance_data[4]; 345 imbalance_data[0] = begin; 346 imbalance_data[1] = end; 347 imbalance_data[2] = imbalance; 348 imbalance_data[3] = reduction; 349 350 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, 351 __itt_metadata_u64, 4, imbalance_data); 352#endif 353} // __kmp_itt_metadata_imbalance 354 355// ----------------------------------------------------------------------------- 356LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, 357 kmp_uint64 iterations, kmp_uint64 chunk) { 358#if USE_ITT_NOTIFY 359 if (metadata_domain == NULL) { 360 __kmp_acquire_bootstrap_lock(&metadata_lock); 361 if (metadata_domain == NULL) { 362 __itt_suppress_push(__itt_suppress_memory_errors); 363 metadata_domain = __itt_domain_create("OMP Metadata"); 364 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 365 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 366 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 367 __itt_suppress_pop(); 368 } 369 __kmp_release_bootstrap_lock(&metadata_lock); 370 } 371 372 // Parse line and column from psource string: ";file;func;line;col;;" 373 KMP_DEBUG_ASSERT(loc->psource); 374 kmp_uint64 loop_data[5]; 375 int line, col; 376 __kmp_str_loc_numbers(loc->psource, &line, &col); 377 loop_data[0] = line; 378 loop_data[1] = col; 379 loop_data[2] = sched_type; 380 loop_data[3] = iterations; 381 loop_data[4] = chunk; 382 383 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, 384 __itt_metadata_u64, 5, loop_data); 385#endif 386} // __kmp_itt_metadata_loop 387 388// ----------------------------------------------------------------------------- 389LINKAGE void __kmp_itt_metadata_single(ident_t *loc) { 390#if USE_ITT_NOTIFY 391 if (metadata_domain == NULL) { 392 __kmp_acquire_bootstrap_lock(&metadata_lock); 393 if (metadata_domain == NULL) { 394 __itt_suppress_push(__itt_suppress_memory_errors); 395 metadata_domain = __itt_domain_create("OMP Metadata"); 396 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 397 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 398 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 399 __itt_suppress_pop(); 400 } 401 __kmp_release_bootstrap_lock(&metadata_lock); 402 } 403 404 int line, col; 405 __kmp_str_loc_numbers(loc->psource, &line, &col); 406 kmp_uint64 single_data[2]; 407 single_data[0] = line; 408 single_data[1] = col; 409 410 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, 411 __itt_metadata_u64, 2, single_data); 412#endif 413} // __kmp_itt_metadata_single 414 415// ----------------------------------------------------------------------------- 416LINKAGE void __kmp_itt_region_starting(int gtid) { 417#if USE_ITT_NOTIFY 418#endif 419} // __kmp_itt_region_starting 420 421// ----------------------------------------------------------------------------- 422LINKAGE void __kmp_itt_region_finished(int gtid) { 423#if USE_ITT_NOTIFY 424#endif 425} // __kmp_itt_region_finished 426 427// ---------------------------------------------------------------------------- 428LINKAGE void __kmp_itt_region_joined(int gtid) { 429#if USE_ITT_NOTIFY 430 kmp_team_t *team = __kmp_team_from_gtid(gtid); 431 if (team->t.t_active_level > 1) { 432 // The frame notifications are only supported for the outermost teams. 433 return; 434 } 435 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 436 if (loc && loc->reserved_2) { 437 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 438 if (frm < KMP_MAX_FRAME_DOMAINS) { 439 KMP_ITT_DEBUG_LOCK(); 440 __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); 441 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid, 442 loc->reserved_2, loc); 443 } 444 } 445#endif 446} // __kmp_itt_region_joined 447 448/* Barriers reporting. 449 450 A barrier consists of two phases: 451 1. Gather -- primary thread waits for all worker threads to arrive; each 452 worker thread registers arrival and goes further. 453 2. Release -- each worker thread waits until primary thread lets it go; 454 primary thread lets worker threads go. 455 456 Function should be called by each thread: 457 * __kmp_itt_barrier_starting() -- before arriving to the gather phase. 458 * __kmp_itt_barrier_middle() -- between gather and release phases. 459 * __kmp_itt_barrier_finished() -- after release phase. 460 461 Note: Call __kmp_itt_barrier_object() before call to 462 __kmp_itt_barrier_starting() and save result in local variable. 463 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase) 464 would return itt sync object for the next barrier! 465 466 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 467 does not have barrier object or barrier data structure. Barrier is just a 468 counter in team and thread structures. We could use an address of team 469 structure as a barrier sync object, but ITT wants different objects for 470 different barriers (even whithin the same team). So let us use team address 471 as barrier sync object for the first barrier, then increase it by one for the 472 next barrier, and so on (but wrap it not to use addresses outside of team 473 structure). */ 474 475void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, 476 int delta // 0 (current barrier) is default 477 // value; specify -1 to get previous 478 // barrier. 479 ) { 480 void *object = NULL; 481#if USE_ITT_NOTIFY 482 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 483 kmp_team_t *team = thr->th.th_team; 484 485 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can 486 // be NULL. This "if" helps to avoid crash. However, this is not complete 487 // solution, and reporting fork/join barriers to ITT should be revisited. 488 489 if (team != NULL) { 490 // Primary thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. 491 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter. 492 kmp_uint64 counter = 493 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta; 494 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so 495 // barriers of different types do not have the same ids. 496 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); 497 // This condition is a must (we would have zero divide otherwise). 498 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); 499 // More strong condition: make sure we have room at least for for two 500 // different ids (for each barrier type). 501 object = reinterpret_cast<void *>( 502 (kmp_uintptr_t)(team) + 503 (kmp_uintptr_t)counter % (sizeof(kmp_team_t) / bs_last_barrier) * 504 bs_last_barrier + 505 bt); 506 KMP_ITT_DEBUG_LOCK(); 507 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt, 508 counter, object); 509 510 if (set_name) { 511 ident_t const *loc = NULL; 512 char const *src = NULL; 513 char const *type = "OMP Barrier"; 514 switch (bt) { 515 case bs_plain_barrier: { 516 // For plain barrier compiler calls __kmpc_barrier() function, which 517 // saves location in thr->th.th_ident. 518 loc = thr->th.th_ident; 519 // Get the barrier type from flags provided by compiler. 520 kmp_int32 expl = 0; 521 kmp_uint32 impl = 0; 522 if (loc != NULL) { 523 src = loc->psource; 524 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0; 525 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0; 526 } 527 if (impl) { 528 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) { 529 case KMP_IDENT_BARRIER_IMPL_FOR: { 530 type = "OMP For Barrier"; 531 } break; 532 case KMP_IDENT_BARRIER_IMPL_SECTIONS: { 533 type = "OMP Sections Barrier"; 534 } break; 535 case KMP_IDENT_BARRIER_IMPL_SINGLE: { 536 type = "OMP Single Barrier"; 537 } break; 538 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: { 539 type = "OMP Workshare Barrier"; 540 } break; 541 default: { 542 type = "OMP Implicit Barrier"; 543 KMP_DEBUG_ASSERT(0); 544 } 545 } 546 } else if (expl) { 547 type = "OMP Explicit Barrier"; 548 } 549 } break; 550 case bs_forkjoin_barrier: { 551 // In case of fork/join barrier we can read thr->th.th_ident, because it 552 // contains location of last passed construct (while join barrier is not 553 // such one). Use th_ident of primary thread instead -- 554 // __kmp_join_call() called by the primary thread saves location. 555 // 556 // AC: cannot read from primary thread because __kmp_join_call may not 557 // be called yet, so we read the location from team. This is the 558 // same location. Team is valid on entry to join barrier where this 559 // happens. 560 loc = team->t.t_ident; 561 if (loc != NULL) { 562 src = loc->psource; 563 } 564 type = "OMP Join Barrier"; 565 } break; 566 } 567 KMP_ITT_DEBUG_LOCK(); 568 __itt_sync_create(object, type, src, __itt_attr_barrier); 569 KMP_ITT_DEBUG_PRINT( 570 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, 571 type, src); 572 } 573 } 574#endif 575 return object; 576} // __kmp_itt_barrier_object 577 578// ----------------------------------------------------------------------------- 579void __kmp_itt_barrier_starting(int gtid, void *object) { 580#if USE_ITT_NOTIFY 581 if (!KMP_MASTER_GTID(gtid)) { 582 KMP_ITT_DEBUG_LOCK(); 583 __itt_sync_releasing(object); 584 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object); 585 } 586 KMP_ITT_DEBUG_LOCK(); 587 __itt_sync_prepare(object); 588 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object); 589#endif 590} // __kmp_itt_barrier_starting 591 592// ----------------------------------------------------------------------------- 593void __kmp_itt_barrier_middle(int gtid, void *object) { 594#if USE_ITT_NOTIFY 595 if (KMP_MASTER_GTID(gtid)) { 596 KMP_ITT_DEBUG_LOCK(); 597 __itt_sync_acquired(object); 598 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object); 599 KMP_ITT_DEBUG_LOCK(); 600 __itt_sync_releasing(object); 601 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object); 602 } else { 603 } 604#endif 605} // __kmp_itt_barrier_middle 606 607// ----------------------------------------------------------------------------- 608void __kmp_itt_barrier_finished(int gtid, void *object) { 609#if USE_ITT_NOTIFY 610 if (KMP_MASTER_GTID(gtid)) { 611 } else { 612 KMP_ITT_DEBUG_LOCK(); 613 __itt_sync_acquired(object); 614 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object); 615 } 616#endif 617} // __kmp_itt_barrier_finished 618 619/* Taskwait reporting. 620 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 621 does not have taskwait structure, so we need to construct something. */ 622 623void *__kmp_itt_taskwait_object(int gtid) { 624 void *object = NULL; 625#if USE_ITT_NOTIFY 626 if (UNLIKELY(__itt_sync_create_ptr)) { 627 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 628 kmp_taskdata_t *taskdata = thread->th.th_current_task; 629 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) + 630 taskdata->td_taskwait_counter % 631 sizeof(kmp_taskdata_t)); 632 } 633#endif 634 return object; 635} // __kmp_itt_taskwait_object 636 637void __kmp_itt_taskwait_starting(int gtid, void *object) { 638#if USE_ITT_NOTIFY 639 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 640 kmp_taskdata_t *taskdata = thread->th.th_current_task; 641 ident_t const *loc = taskdata->td_taskwait_ident; 642 char const *src = (loc == NULL ? NULL : loc->psource); 643 KMP_ITT_DEBUG_LOCK(); 644 __itt_sync_create(object, "OMP Taskwait", src, 0); 645 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", 646 object, src); 647 KMP_ITT_DEBUG_LOCK(); 648 __itt_sync_prepare(object); 649 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object); 650#endif 651} // __kmp_itt_taskwait_starting 652 653void __kmp_itt_taskwait_finished(int gtid, void *object) { 654#if USE_ITT_NOTIFY 655 KMP_ITT_DEBUG_LOCK(); 656 __itt_sync_acquired(object); 657 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object); 658 KMP_ITT_DEBUG_LOCK(); 659 __itt_sync_destroy(object); 660 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object); 661#endif 662} // __kmp_itt_taskwait_finished 663 664/* Task reporting. 665 Only those tasks are reported which are executed by a thread spinning at 666 barrier (or taskwait). Synch object passed to the function must be barrier of 667 taskwait the threads waiting at. */ 668 669void __kmp_itt_task_starting( 670 void *object // ITT sync object: barrier or taskwait. 671 ) { 672#if USE_ITT_NOTIFY 673 if (UNLIKELY(object != NULL)) { 674 KMP_ITT_DEBUG_LOCK(); 675 __itt_sync_cancel(object); 676 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); 677 } 678#endif 679} // __kmp_itt_task_starting 680 681// ----------------------------------------------------------------------------- 682void __kmp_itt_task_finished( 683 void *object // ITT sync object: barrier or taskwait. 684 ) { 685#if USE_ITT_NOTIFY 686 KMP_ITT_DEBUG_LOCK(); 687 __itt_sync_prepare(object); 688 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object); 689#endif 690} // __kmp_itt_task_finished 691 692/* Lock reporting. 693 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock 694 operation (set/unset). It is not a real event shown to the user but just 695 setting a name for synchronization object. `lock' is an address of sync 696 object, the same address should be used in all subsequent calls. 697 * __kmp_itt_lock_acquiring() should be called before setting the lock. 698 * __kmp_itt_lock_acquired() should be called after setting the lock. 699 * __kmp_itt_lock_realeasing() should be called before unsetting the lock. 700 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting 701 for the lock. 702 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock 703 operation. After __kmp_itt_lock_destroyed() all the references to the same 704 address will be considered as another sync object, not related with the 705 original one. */ 706 707#if KMP_USE_DYNAMIC_LOCK 708// Takes location information directly 709__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type, 710 const ident_t *loc) { 711#if USE_ITT_NOTIFY 712 if (__itt_sync_create_ptr) { 713 char const *src = (loc == NULL ? NULL : loc->psource); 714 KMP_ITT_DEBUG_LOCK(); 715 __itt_sync_create(lock, type, src, 0); 716 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 717 src); 718 } 719#endif 720} 721#else // KMP_USE_DYNAMIC_LOCK 722// Internal guts -- common code for locks and critical sections, do not call 723// directly. 724__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) { 725#if USE_ITT_NOTIFY 726 if (__itt_sync_create_ptr) { 727 ident_t const *loc = NULL; 728 if (__kmp_get_user_lock_location_ != NULL) 729 loc = __kmp_get_user_lock_location_((lock)); 730 char const *src = (loc == NULL ? NULL : loc->psource); 731 KMP_ITT_DEBUG_LOCK(); 732 __itt_sync_create(lock, type, src, 0); 733 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 734 src); 735 } 736#endif 737} // ___kmp_itt_lock_init 738#endif // KMP_USE_DYNAMIC_LOCK 739 740// Internal guts -- common code for locks and critical sections, do not call 741// directly. 742__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) { 743#if USE_ITT_NOTIFY 744 KMP_ITT_DEBUG_LOCK(); 745 __itt_sync_destroy(lock); 746 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock); 747#endif 748} // ___kmp_itt_lock_fini 749 750// ----------------------------------------------------------------------------- 751#if KMP_USE_DYNAMIC_LOCK 752void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) { 753 ___kmp_itt_lock_init(lock, "OMP Lock", loc); 754} 755#else 756void __kmp_itt_lock_creating(kmp_user_lock_p lock) { 757 ___kmp_itt_lock_init(lock, "OMP Lock"); 758} // __kmp_itt_lock_creating 759#endif 760 761void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) { 762#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 763 // postpone lock object access 764 if (__itt_sync_prepare_ptr) { 765 if (KMP_EXTRACT_D_TAG(lock) == 0) { 766 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 767 __itt_sync_prepare(ilk->lock); 768 } else { 769 __itt_sync_prepare(lock); 770 } 771 } 772#else 773 __itt_sync_prepare(lock); 774#endif 775} // __kmp_itt_lock_acquiring 776 777void __kmp_itt_lock_acquired(kmp_user_lock_p lock) { 778#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 779 // postpone lock object access 780 if (__itt_sync_acquired_ptr) { 781 if (KMP_EXTRACT_D_TAG(lock) == 0) { 782 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 783 __itt_sync_acquired(ilk->lock); 784 } else { 785 __itt_sync_acquired(lock); 786 } 787 } 788#else 789 __itt_sync_acquired(lock); 790#endif 791} // __kmp_itt_lock_acquired 792 793void __kmp_itt_lock_releasing(kmp_user_lock_p lock) { 794#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 795 if (__itt_sync_releasing_ptr) { 796 if (KMP_EXTRACT_D_TAG(lock) == 0) { 797 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 798 __itt_sync_releasing(ilk->lock); 799 } else { 800 __itt_sync_releasing(lock); 801 } 802 } 803#else 804 __itt_sync_releasing(lock); 805#endif 806} // __kmp_itt_lock_releasing 807 808void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) { 809#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 810 if (__itt_sync_cancel_ptr) { 811 if (KMP_EXTRACT_D_TAG(lock) == 0) { 812 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 813 __itt_sync_cancel(ilk->lock); 814 } else { 815 __itt_sync_cancel(lock); 816 } 817 } 818#else 819 __itt_sync_cancel(lock); 820#endif 821} // __kmp_itt_lock_cancelled 822 823void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) { 824 ___kmp_itt_lock_fini(lock, "OMP Lock"); 825} // __kmp_itt_lock_destroyed 826 827/* Critical reporting. 828 Critical sections are treated exactly as locks (but have different object 829 type). */ 830#if KMP_USE_DYNAMIC_LOCK 831void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) { 832 ___kmp_itt_lock_init(lock, "OMP Critical", loc); 833} 834#else 835void __kmp_itt_critical_creating(kmp_user_lock_p lock) { 836 ___kmp_itt_lock_init(lock, "OMP Critical"); 837} // __kmp_itt_critical_creating 838#endif 839 840void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) { 841 __itt_sync_prepare(lock); 842} // __kmp_itt_critical_acquiring 843 844void __kmp_itt_critical_acquired(kmp_user_lock_p lock) { 845 __itt_sync_acquired(lock); 846} // __kmp_itt_critical_acquired 847 848void __kmp_itt_critical_releasing(kmp_user_lock_p lock) { 849 __itt_sync_releasing(lock); 850} // __kmp_itt_critical_releasing 851 852void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) { 853 ___kmp_itt_lock_fini(lock, "OMP Critical"); 854} // __kmp_itt_critical_destroyed 855 856/* Single reporting. */ 857 858void __kmp_itt_single_start(int gtid) { 859#if USE_ITT_NOTIFY 860 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) { 861 kmp_info_t *thr = __kmp_thread_from_gtid((gtid)); 862 ident_t *loc = thr->th.th_ident; 863 char const *src = (loc == NULL ? NULL : loc->psource); 864 kmp_str_buf_t name; 865 __kmp_str_buf_init(&name); 866 __kmp_str_buf_print(&name, "OMP Single-%s", src); 867 KMP_ITT_DEBUG_LOCK(); 868 thr->th.th_itt_mark_single = __itt_mark_create(name.str); 869 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str, 870 thr->th.th_itt_mark_single); 871 __kmp_str_buf_free(&name); 872 KMP_ITT_DEBUG_LOCK(); 873 __itt_mark(thr->th.th_itt_mark_single, NULL); 874 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n", 875 thr->th.th_itt_mark_single); 876 } 877#endif 878} // __kmp_itt_single_start 879 880void __kmp_itt_single_end(int gtid) { 881#if USE_ITT_NOTIFY 882 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single; 883 KMP_ITT_DEBUG_LOCK(); 884 __itt_mark_off(mark); 885 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark); 886#endif 887} // __kmp_itt_single_end 888 889/* Ordered reporting. 890 * __kmp_itt_ordered_init is called by each thread *before* first using sync 891 object. ITT team would like it to be called once, but it requires extra 892 synchronization. 893 * __kmp_itt_ordered_prep is called when thread is going to enter ordered 894 section (before synchronization). 895 * __kmp_itt_ordered_start is called just before entering user code (after 896 synchronization). 897 * __kmp_itt_ordered_end is called after returning from user code. 898 899 Sync object is th->th.th_dispatch->th_dispatch_sh_current. 900 Events are not generated in case of serialized team. */ 901 902void __kmp_itt_ordered_init(int gtid) { 903#if USE_ITT_NOTIFY 904 if (__itt_sync_create_ptr) { 905 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 906 ident_t const *loc = thr->th.th_ident; 907 char const *src = (loc == NULL ? NULL : loc->psource); 908 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current, 909 "OMP Ordered", src, 0); 910 } 911#endif 912} // __kmp_itt_ordered_init 913 914void __kmp_itt_ordered_prep(int gtid) { 915#if USE_ITT_NOTIFY 916 if (__itt_sync_create_ptr) { 917 kmp_team_t *t = __kmp_team_from_gtid(gtid); 918 if (!t->t.t_serialized) { 919 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 920 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current); 921 } 922 } 923#endif 924} // __kmp_itt_ordered_prep 925 926void __kmp_itt_ordered_start(int gtid) { 927#if USE_ITT_NOTIFY 928 if (__itt_sync_create_ptr) { 929 kmp_team_t *t = __kmp_team_from_gtid(gtid); 930 if (!t->t.t_serialized) { 931 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 932 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current); 933 } 934 } 935#endif 936} // __kmp_itt_ordered_start 937 938void __kmp_itt_ordered_end(int gtid) { 939#if USE_ITT_NOTIFY 940 if (__itt_sync_create_ptr) { 941 kmp_team_t *t = __kmp_team_from_gtid(gtid); 942 if (!t->t.t_serialized) { 943 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 944 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current); 945 } 946 } 947#endif 948} // __kmp_itt_ordered_end 949 950/* Threads reporting. */ 951 952void __kmp_itt_thread_ignore() { 953 __itt_thr_ignore(); 954} // __kmp_itt_thread_ignore 955 956void __kmp_itt_thread_name(int gtid) { 957#if USE_ITT_NOTIFY 958 if (__itt_thr_name_set_ptr) { 959 kmp_str_buf_t name; 960 __kmp_str_buf_init(&name); 961 if (KMP_MASTER_GTID(gtid)) { 962 __kmp_str_buf_print(&name, "OMP Primary Thread #%d", gtid); 963 } else { 964 __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid); 965 } 966 KMP_ITT_DEBUG_LOCK(); 967 __itt_thr_name_set(name.str, name.used); 968 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str); 969 __kmp_str_buf_free(&name); 970 } 971#endif 972} // __kmp_itt_thread_name 973 974/* System object reporting. 975 ITT catches operations with system sync objects (like Windows* OS on IA-32 976 architecture API critical sections and events). We only need to specify 977 name ("OMP Scheduler") for the object to let ITT know it is an object used 978 by OpenMP RTL for internal purposes. */ 979 980void __kmp_itt_system_object_created(void *object, char const *name) { 981#if USE_ITT_NOTIFY 982 KMP_ITT_DEBUG_LOCK(); 983 __itt_sync_create(object, "OMP Scheduler", name, 0); 984 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", 985 object, name); 986#endif 987} // __kmp_itt_system_object_created 988 989/* Stack stitching api. 990 Primary thread calls "create" and put the stitching id into team structure. 991 Workers read the stitching id and call "enter" / "leave" api. 992 Primary thread calls "destroy" at the end of the parallel region. */ 993 994__itt_caller __kmp_itt_stack_caller_create() { 995#if USE_ITT_NOTIFY 996 if (!__itt_stack_caller_create_ptr) 997 return NULL; 998 KMP_ITT_DEBUG_LOCK(); 999 __itt_caller id = __itt_stack_caller_create(); 1000 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id); 1001 return id; 1002#endif 1003 return NULL; 1004} 1005 1006void __kmp_itt_stack_caller_destroy(__itt_caller id) { 1007#if USE_ITT_NOTIFY 1008 if (__itt_stack_caller_destroy_ptr) { 1009 KMP_ITT_DEBUG_LOCK(); 1010 __itt_stack_caller_destroy(id); 1011 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id); 1012 } 1013#endif 1014} 1015 1016void __kmp_itt_stack_callee_enter(__itt_caller id) { 1017#if USE_ITT_NOTIFY 1018 if (__itt_stack_callee_enter_ptr) { 1019 KMP_ITT_DEBUG_LOCK(); 1020 __itt_stack_callee_enter(id); 1021 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id); 1022 } 1023#endif 1024} 1025 1026void __kmp_itt_stack_callee_leave(__itt_caller id) { 1027#if USE_ITT_NOTIFY 1028 if (__itt_stack_callee_leave_ptr) { 1029 KMP_ITT_DEBUG_LOCK(); 1030 __itt_stack_callee_leave(id); 1031 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id); 1032 } 1033#endif 1034} 1035 1036#endif /* USE_ITT_BUILD */ 1037