1#if USE_ITT_BUILD 2/* 3 * kmp_itt.inl -- Inline functions of ITT Notify. 4 */ 5 6//===----------------------------------------------------------------------===// 7// 8// The LLVM Compiler Infrastructure 9// 10// This file is dual licensed under the MIT and the University of Illinois Open 11// Source Licenses. See LICENSE.txt for details. 12// 13//===----------------------------------------------------------------------===// 14 15// Inline function definitions. This file should be included into kmp_itt.h file 16// for production build (to let compliler inline functions) or into kmp_itt.c 17// file for debug build (to reduce the number of files to recompile and save 18// build time). 19 20#include "kmp.h" 21#include "kmp_str.h" 22 23#if KMP_ITT_DEBUG 24extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; 25#define KMP_ITT_DEBUG_LOCK() \ 26 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); } 27#define KMP_ITT_DEBUG_PRINT(...) \ 28 { \ 29 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \ 30 fprintf(stderr, __VA_ARGS__); \ 31 fflush(stderr); \ 32 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \ 33 } 34#else 35#define KMP_ITT_DEBUG_LOCK() 36#define KMP_ITT_DEBUG_PRINT(...) 37#endif // KMP_ITT_DEBUG 38 39// Ensure that the functions are static if they're supposed to be being inlined. 40// Otherwise they cannot be used in more than one file, since there will be 41// multiple definitions. 42#if KMP_DEBUG 43#define LINKAGE 44#else 45#define LINKAGE static inline 46#endif 47 48// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses 49// this API to support user-defined synchronization primitives, but does not use 50// ZCA; it would be safe to turn this off until wider support becomes available. 51#if USE_ITT_ZCA 52#ifdef __INTEL_COMPILER 53#if __INTEL_COMPILER >= 1200 54#undef __itt_sync_acquired 55#undef __itt_sync_releasing 56#define __itt_sync_acquired(addr) \ 57 __notify_zc_intrinsic((char *)"sync_acquired", addr) 58#define __itt_sync_releasing(addr) \ 59 __notify_intrinsic((char *)"sync_releasing", addr) 60#endif 61#endif 62#endif 63 64static kmp_bootstrap_lock_t metadata_lock = 65 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock); 66 67/* Parallel region reporting. 68 * __kmp_itt_region_forking should be called by master thread of a team. 69 Exact moment of call does not matter, but it should be completed before any 70 thread of this team calls __kmp_itt_region_starting. 71 * __kmp_itt_region_starting should be called by each thread of a team just 72 before entering parallel region body. 73 * __kmp_itt_region_finished should be called by each thread of a team right 74 after returning from parallel region body. 75 * __kmp_itt_region_joined should be called by master thread of a team, after 76 all threads called __kmp_itt_region_finished. 77 78 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can 79 execute some more user code -- such a thread can execute tasks. 80 81 Note: The overhead of logging region_starting and region_finished in each 82 thread is too large, so these calls are not used. */ 83 84LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) { 85#if USE_ITT_NOTIFY 86 kmp_team_t *team = __kmp_team_from_gtid(gtid); 87 if (team->t.t_active_level > 1) { 88 // The frame notifications are only supported for the outermost teams. 89 return; 90 } 91 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 92 if (loc) { 93 // Use the reserved_2 field to store the index to the region domain. 94 // Assume that reserved_2 contains zero initially. Since zero is special 95 // value here, store the index into domain array increased by 1. 96 if (loc->reserved_2 == 0) { 97 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 98 int frm = 99 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 100 if (frm >= KMP_MAX_FRAME_DOMAINS) { 101 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 102 return; // loc->reserved_2 is still 0 103 } 104 // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { 105 // frm = loc->reserved_2 - 1; // get value saved by other thread 106 // for same loc 107 //} // AC: this block is to replace next unsynchronized line 108 109 // We need to save indexes for both region and barrier frames. We'll use 110 // loc->reserved_2 field but put region index to the low two bytes and 111 // barrier indexes to the high two bytes. It is OK because 112 // KMP_MAX_FRAME_DOMAINS = 512. 113 loc->reserved_2 |= (frm + 1); // save "new" value 114 115 // Transform compiler-generated region location into the format 116 // that the tools more or less standardized on: 117 // "<func>$omp$parallel@[file:]<line>[:<col>]" 118 char *buff = NULL; 119 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 120 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 121 team_size, str_loc.file, str_loc.line, 122 str_loc.col); 123 124 __itt_suppress_push(__itt_suppress_memory_errors); 125 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 126 __itt_suppress_pop(); 127 128 __kmp_str_free(&buff); 129 if (barriers) { 130 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 131 int frm = KMP_TEST_THEN_INC32( 132 &__kmp_barrier_domain_count); // get "old" value 133 if (frm >= KMP_MAX_FRAME_DOMAINS) { 134 KMP_TEST_THEN_DEC32( 135 &__kmp_barrier_domain_count); // revert the count 136 return; // loc->reserved_2 is still 0 137 } 138 char *buff = NULL; 139 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 140 str_loc.file, str_loc.col); 141 __itt_suppress_push(__itt_suppress_memory_errors); 142 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 143 __itt_suppress_pop(); 144 __kmp_str_free(&buff); 145 // Save the barrier frame index to the high two bytes. 146 loc->reserved_2 |= (frm + 1) << 16; 147 } 148 } 149 __kmp_str_loc_free(&str_loc); 150 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 151 } 152 } else { // Region domain exists for this location 153 // Check if team size was changed. Then create new region domain for this 154 // location 155 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 156 if ((frm < KMP_MAX_FRAME_DOMAINS) && 157 (__kmp_itt_region_team_size[frm] != team_size)) { 158 char *buff = NULL; 159 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 160 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 161 team_size, str_loc.file, str_loc.line, 162 str_loc.col); 163 164 __itt_suppress_push(__itt_suppress_memory_errors); 165 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 166 __itt_suppress_pop(); 167 168 __kmp_str_free(&buff); 169 __kmp_str_loc_free(&str_loc); 170 __kmp_itt_region_team_size[frm] = team_size; 171 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 172 } else { // Team size was not changed. Use existing domain. 173 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 174 } 175 } 176 KMP_ITT_DEBUG_LOCK(); 177 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid, 178 loc->reserved_2, loc); 179 } 180#endif 181} // __kmp_itt_region_forking 182 183// ----------------------------------------------------------------------------- 184LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, 185 __itt_timestamp end, int imbalance, 186 ident_t *loc, int team_size, int region) { 187#if USE_ITT_NOTIFY 188 if (region) { 189 kmp_team_t *team = __kmp_team_from_gtid(gtid); 190 int serialized = (region == 2 ? 1 : 0); 191 if (team->t.t_active_level + serialized > 1) { 192 // The frame notifications are only supported for the outermost teams. 193 return; 194 } 195 // Check region domain has not been created before. It's index is saved in 196 // the low two bytes. 197 if ((loc->reserved_2 & 0x0000FFFF) == 0) { 198 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 199 int frm = 200 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 201 if (frm >= KMP_MAX_FRAME_DOMAINS) { 202 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 203 return; // loc->reserved_2 is still 0 204 } 205 206 // We need to save indexes for both region and barrier frames. We'll use 207 // loc->reserved_2 field but put region index to the low two bytes and 208 // barrier indexes to the high two bytes. It is OK because 209 // KMP_MAX_FRAME_DOMAINS = 512. 210 loc->reserved_2 |= (frm + 1); // save "new" value 211 212 // Transform compiler-generated region location into the format 213 // that the tools more or less standardized on: 214 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" 215 char *buff = NULL; 216 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 217 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 218 team_size, str_loc.file, str_loc.line, 219 str_loc.col); 220 221 __itt_suppress_push(__itt_suppress_memory_errors); 222 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 223 __itt_suppress_pop(); 224 225 __kmp_str_free(&buff); 226 __kmp_str_loc_free(&str_loc); 227 __kmp_itt_region_team_size[frm] = team_size; 228 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 229 } 230 } else { // Region domain exists for this location 231 // Check if team size was changed. Then create new region domain for this 232 // location 233 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 234 if ((frm < KMP_MAX_FRAME_DOMAINS) && 235 (__kmp_itt_region_team_size[frm] != team_size)) { 236 char *buff = NULL; 237 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 238 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 239 team_size, str_loc.file, str_loc.line, 240 str_loc.col); 241 242 __itt_suppress_push(__itt_suppress_memory_errors); 243 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 244 __itt_suppress_pop(); 245 246 __kmp_str_free(&buff); 247 __kmp_str_loc_free(&str_loc); 248 __kmp_itt_region_team_size[frm] = team_size; 249 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 250 } else { // Team size was not changed. Use existing domain. 251 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 252 } 253 } 254 KMP_ITT_DEBUG_LOCK(); 255 KMP_ITT_DEBUG_PRINT( 256 "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", 257 gtid, loc->reserved_2, region, loc, begin, end); 258 return; 259 } else { // called for barrier reporting 260 if (loc) { 261 if ((loc->reserved_2 & 0xFFFF0000) == 0) { 262 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 263 int frm = KMP_TEST_THEN_INC32( 264 &__kmp_barrier_domain_count); // get "old" value 265 if (frm >= KMP_MAX_FRAME_DOMAINS) { 266 KMP_TEST_THEN_DEC32( 267 &__kmp_barrier_domain_count); // revert the count 268 return; // loc->reserved_2 is still 0 269 } 270 // Save the barrier frame index to the high two bytes. 271 loc->reserved_2 |= (frm + 1) << 16; // save "new" value 272 273 // Transform compiler-generated region location into the format 274 // that the tools more or less standardized on: 275 // "<func>$omp$frame@[file:]<line>[:<col>]" 276 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 277 if (imbalance) { 278 char *buff_imb = NULL; 279 buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", 280 str_loc.func, team_size, str_loc.file, 281 str_loc.col); 282 __itt_suppress_push(__itt_suppress_memory_errors); 283 __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb); 284 __itt_suppress_pop(); 285 __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin, 286 end); 287 __kmp_str_free(&buff_imb); 288 } else { 289 char *buff = NULL; 290 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 291 str_loc.file, str_loc.col); 292 __itt_suppress_push(__itt_suppress_memory_errors); 293 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 294 __itt_suppress_pop(); 295 __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin, 296 end); 297 __kmp_str_free(&buff); 298 } 299 __kmp_str_loc_free(&str_loc); 300 } 301 } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS 302 if (imbalance) { 303 __itt_frame_submit_v3( 304 __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL, 305 begin, end); 306 } else { 307 __itt_frame_submit_v3( 308 __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, 309 begin, end); 310 } 311 } 312 KMP_ITT_DEBUG_LOCK(); 313 KMP_ITT_DEBUG_PRINT( 314 "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid, 315 loc->reserved_2, loc, begin, end); 316 } 317 } 318#endif 319} // __kmp_itt_frame_submit 320 321// ----------------------------------------------------------------------------- 322LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, 323 kmp_uint64 end, kmp_uint64 imbalance, 324 kmp_uint64 reduction) { 325#if USE_ITT_NOTIFY 326 if (metadata_domain == NULL) { 327 __kmp_acquire_bootstrap_lock(&metadata_lock); 328 if (metadata_domain == NULL) { 329 __itt_suppress_push(__itt_suppress_memory_errors); 330 metadata_domain = __itt_domain_create("OMP Metadata"); 331 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 332 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 333 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 334 __itt_suppress_pop(); 335 } 336 __kmp_release_bootstrap_lock(&metadata_lock); 337 } 338 339 kmp_uint64 imbalance_data[4]; 340 imbalance_data[0] = begin; 341 imbalance_data[1] = end; 342 imbalance_data[2] = imbalance; 343 imbalance_data[3] = reduction; 344 345 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, 346 __itt_metadata_u64, 4, imbalance_data); 347#endif 348} // __kmp_itt_metadata_imbalance 349 350// ----------------------------------------------------------------------------- 351LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, 352 kmp_uint64 iterations, kmp_uint64 chunk) { 353#if USE_ITT_NOTIFY 354 if (metadata_domain == NULL) { 355 __kmp_acquire_bootstrap_lock(&metadata_lock); 356 if (metadata_domain == NULL) { 357 __itt_suppress_push(__itt_suppress_memory_errors); 358 metadata_domain = __itt_domain_create("OMP Metadata"); 359 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 360 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 361 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 362 __itt_suppress_pop(); 363 } 364 __kmp_release_bootstrap_lock(&metadata_lock); 365 } 366 367 // Parse line and column from psource string: ";file;func;line;col;;" 368 char *s_line; 369 char *s_col; 370 KMP_DEBUG_ASSERT(loc->psource); 371#ifdef __cplusplus 372 s_line = strchr(CCAST(char *, loc->psource), ';'); 373#else 374 s_line = strchr(loc->psource, ';'); 375#endif 376 KMP_DEBUG_ASSERT(s_line); 377 s_line = strchr(s_line + 1, ';'); // 2-nd semicolon 378 KMP_DEBUG_ASSERT(s_line); 379 s_line = strchr(s_line + 1, ';'); // 3-rd semicolon 380 KMP_DEBUG_ASSERT(s_line); 381 s_col = strchr(s_line + 1, ';'); // 4-th semicolon 382 KMP_DEBUG_ASSERT(s_col); 383 384 kmp_uint64 loop_data[5]; 385 loop_data[0] = atoi(s_line + 1); // read line 386 loop_data[1] = atoi(s_col + 1); // read column 387 loop_data[2] = sched_type; 388 loop_data[3] = iterations; 389 loop_data[4] = chunk; 390 391 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, 392 __itt_metadata_u64, 5, loop_data); 393#endif 394} // __kmp_itt_metadata_loop 395 396// ----------------------------------------------------------------------------- 397LINKAGE void __kmp_itt_metadata_single(ident_t *loc) { 398#if USE_ITT_NOTIFY 399 if (metadata_domain == NULL) { 400 __kmp_acquire_bootstrap_lock(&metadata_lock); 401 if (metadata_domain == NULL) { 402 __itt_suppress_push(__itt_suppress_memory_errors); 403 metadata_domain = __itt_domain_create("OMP Metadata"); 404 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 405 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 406 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 407 __itt_suppress_pop(); 408 } 409 __kmp_release_bootstrap_lock(&metadata_lock); 410 } 411 412 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); 413 kmp_uint64 single_data[2]; 414 single_data[0] = str_loc.line; 415 single_data[1] = str_loc.col; 416 417 __kmp_str_loc_free(&str_loc); 418 419 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, 420 __itt_metadata_u64, 2, single_data); 421#endif 422} // __kmp_itt_metadata_single 423 424// ----------------------------------------------------------------------------- 425LINKAGE void __kmp_itt_region_starting(int gtid) { 426#if USE_ITT_NOTIFY 427#endif 428} // __kmp_itt_region_starting 429 430// ----------------------------------------------------------------------------- 431LINKAGE void __kmp_itt_region_finished(int gtid) { 432#if USE_ITT_NOTIFY 433#endif 434} // __kmp_itt_region_finished 435 436// ---------------------------------------------------------------------------- 437LINKAGE void __kmp_itt_region_joined(int gtid) { 438#if USE_ITT_NOTIFY 439 kmp_team_t *team = __kmp_team_from_gtid(gtid); 440 if (team->t.t_active_level > 1) { 441 // The frame notifications are only supported for the outermost teams. 442 return; 443 } 444 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 445 if (loc && loc->reserved_2) { 446 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 447 if (frm < KMP_MAX_FRAME_DOMAINS) { 448 KMP_ITT_DEBUG_LOCK(); 449 __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); 450 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid, 451 loc->reserved_2, loc); 452 } 453 } 454#endif 455} // __kmp_itt_region_joined 456 457/* Barriers reporting. 458 459 A barrier consists of two phases: 460 1. Gather -- master waits for arriving of all the worker threads; each 461 worker thread registers arrival and goes further. 462 2. Release -- each worker threads waits until master lets it go; master lets 463 worker threads go. 464 465 Function should be called by each thread: 466 * __kmp_itt_barrier_starting() -- before arriving to the gather phase. 467 * __kmp_itt_barrier_middle() -- between gather and release phases. 468 * __kmp_itt_barrier_finished() -- after release phase. 469 470 Note: Call __kmp_itt_barrier_object() before call to 471 __kmp_itt_barrier_starting() and save result in local variable. 472 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase) 473 would return itt sync object for the next barrier! 474 475 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 476 does not have barrier object or barrier data structure. Barrier is just a 477 counter in team and thread structures. We could use an address of team 478 structure as an barrier sync object, but ITT wants different objects for 479 different barriers (even whithin the same team). So let us use team address 480 as barrier sync object for the first barrier, then increase it by one for the 481 next barrier, and so on (but wrap it not to use addresses outside of team 482 structure). */ 483 484void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, 485 int delta // 0 (current barrier) is default 486 // value; specify -1 to get previous 487 // barrier. 488 ) { 489 void *object = NULL; 490#if USE_ITT_NOTIFY 491 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 492 kmp_team_t *team = thr->th.th_team; 493 494 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can 495 // be NULL. This "if" helps to avoid crash. However, this is not complete 496 // solution, and reporting fork/join barriers to ITT should be revisited. 497 498 if (team != NULL) { 499 // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. 500 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter. 501 kmp_uint64 counter = 502 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta; 503 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so 504 // barriers of different types do not have the same ids. 505 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); 506 // This conditon is a must (we would have zero divide otherwise). 507 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); 508 // More strong condition: make sure we have room at least for for two 509 // differtent ids (for each barrier type). 510 object = reinterpret_cast<void *>( 511 kmp_uintptr_t(team) + 512 counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier + 513 bt); 514 KMP_ITT_DEBUG_LOCK(); 515 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt, 516 counter, object); 517 518 if (set_name) { 519 ident_t const *loc = NULL; 520 char const *src = NULL; 521 char const *type = "OMP Barrier"; 522 switch (bt) { 523 case bs_plain_barrier: { 524 // For plain barrier compiler calls __kmpc_barrier() function, which 525 // saves location in thr->th.th_ident. 526 loc = thr->th.th_ident; 527 // Get the barrier type from flags provided by compiler. 528 kmp_int32 expl = 0; 529 kmp_uint32 impl = 0; 530 if (loc != NULL) { 531 src = loc->psource; 532 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0; 533 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0; 534 } 535 if (impl) { 536 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) { 537 case KMP_IDENT_BARRIER_IMPL_FOR: { 538 type = "OMP For Barrier"; 539 } break; 540 case KMP_IDENT_BARRIER_IMPL_SECTIONS: { 541 type = "OMP Sections Barrier"; 542 } break; 543 case KMP_IDENT_BARRIER_IMPL_SINGLE: { 544 type = "OMP Single Barrier"; 545 } break; 546 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: { 547 type = "OMP Workshare Barrier"; 548 } break; 549 default: { 550 type = "OMP Implicit Barrier"; 551 KMP_DEBUG_ASSERT(0); 552 } 553 } 554 } else if (expl) { 555 type = "OMP Explicit Barrier"; 556 } 557 } break; 558 case bs_forkjoin_barrier: { 559 // In case of fork/join barrier we can read thr->th.th_ident, because it 560 // contains location of last passed construct (while join barrier is not 561 // such one). Use th_ident of master thread instead -- __kmp_join_call() 562 // called by the master thread saves location. 563 // 564 // AC: cannot read from master because __kmp_join_call may be not called 565 // yet, so we read the location from team. This is the same location. 566 // And team is valid at the enter to join barrier where this happens. 567 loc = team->t.t_ident; 568 if (loc != NULL) { 569 src = loc->psource; 570 } 571 type = "OMP Join Barrier"; 572 } break; 573 } 574 KMP_ITT_DEBUG_LOCK(); 575 __itt_sync_create(object, type, src, __itt_attr_barrier); 576 KMP_ITT_DEBUG_PRINT( 577 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, 578 type, src); 579 } 580 } 581#endif 582 return object; 583} // __kmp_itt_barrier_object 584 585// ----------------------------------------------------------------------------- 586void __kmp_itt_barrier_starting(int gtid, void *object) { 587#if USE_ITT_NOTIFY 588 if (!KMP_MASTER_GTID(gtid)) { 589 KMP_ITT_DEBUG_LOCK(); 590 __itt_sync_releasing(object); 591 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object); 592 } 593 KMP_ITT_DEBUG_LOCK(); 594 __itt_sync_prepare(object); 595 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object); 596#endif 597} // __kmp_itt_barrier_starting 598 599// ----------------------------------------------------------------------------- 600void __kmp_itt_barrier_middle(int gtid, void *object) { 601#if USE_ITT_NOTIFY 602 if (KMP_MASTER_GTID(gtid)) { 603 KMP_ITT_DEBUG_LOCK(); 604 __itt_sync_acquired(object); 605 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object); 606 KMP_ITT_DEBUG_LOCK(); 607 __itt_sync_releasing(object); 608 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object); 609 } else { 610 } 611#endif 612} // __kmp_itt_barrier_middle 613 614// ----------------------------------------------------------------------------- 615void __kmp_itt_barrier_finished(int gtid, void *object) { 616#if USE_ITT_NOTIFY 617 if (KMP_MASTER_GTID(gtid)) { 618 } else { 619 KMP_ITT_DEBUG_LOCK(); 620 __itt_sync_acquired(object); 621 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object); 622 } 623#endif 624} // __kmp_itt_barrier_finished 625 626/* Taskwait reporting. 627 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 628 does not have taskwait structure, so we need to construct something. */ 629 630void *__kmp_itt_taskwait_object(int gtid) { 631 void *object = NULL; 632#if USE_ITT_NOTIFY 633 if (__itt_sync_create_ptr) { 634 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 635 kmp_taskdata_t *taskdata = thread->th.th_current_task; 636 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) + 637 taskdata->td_taskwait_counter % 638 sizeof(kmp_taskdata_t)); 639 } 640#endif 641 return object; 642} // __kmp_itt_taskwait_object 643 644void __kmp_itt_taskwait_starting(int gtid, void *object) { 645#if USE_ITT_NOTIFY 646 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 647 kmp_taskdata_t *taskdata = thread->th.th_current_task; 648 ident_t const *loc = taskdata->td_taskwait_ident; 649 char const *src = (loc == NULL ? NULL : loc->psource); 650 KMP_ITT_DEBUG_LOCK(); 651 __itt_sync_create(object, "OMP Taskwait", src, 0); 652 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", 653 object, src); 654 KMP_ITT_DEBUG_LOCK(); 655 __itt_sync_prepare(object); 656 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object); 657#endif 658} // __kmp_itt_taskwait_starting 659 660void __kmp_itt_taskwait_finished(int gtid, void *object) { 661#if USE_ITT_NOTIFY 662 KMP_ITT_DEBUG_LOCK(); 663 __itt_sync_acquired(object); 664 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object); 665 KMP_ITT_DEBUG_LOCK(); 666 __itt_sync_destroy(object); 667 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object); 668#endif 669} // __kmp_itt_taskwait_finished 670 671/* Task reporting. 672 Only those tasks are reported which are executed by a thread spinning at 673 barrier (or taskwait). Synch object passed to the function must be barrier of 674 taskwait the threads waiting at. */ 675 676void __kmp_itt_task_starting( 677 void *object // ITT sync object: barrier or taskwait. 678 ) { 679#if USE_ITT_NOTIFY 680 if (object != NULL) { 681 KMP_ITT_DEBUG_LOCK(); 682 __itt_sync_cancel(object); 683 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); 684 } 685#endif 686} // __kmp_itt_task_starting 687 688// ----------------------------------------------------------------------------- 689void __kmp_itt_task_finished( 690 void *object // ITT sync object: barrier or taskwait. 691 ) { 692#if USE_ITT_NOTIFY 693 KMP_ITT_DEBUG_LOCK(); 694 __itt_sync_prepare(object); 695 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object); 696#endif 697} // __kmp_itt_task_finished 698 699/* Lock reporting. 700 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock 701 operation (set/unset). It is not a real event shown to the user but just 702 setting a name for synchronization object. `lock' is an address of sync 703 object, the same address should be used in all subsequent calls. 704 * __kmp_itt_lock_acquiring() should be called before setting the lock. 705 * __kmp_itt_lock_acquired() should be called after setting the lock. 706 * __kmp_itt_lock_realeasing() should be called before unsetting the lock. 707 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting 708 for the lock. 709 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock 710 operation. After __kmp_itt_lock_destroyed() all the references to the same 711 address will be considered as another sync object, not related with the 712 original one. */ 713 714#if KMP_USE_DYNAMIC_LOCK 715// Takes location information directly 716__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type, 717 const ident_t *loc) { 718#if USE_ITT_NOTIFY 719 if (__itt_sync_create_ptr) { 720 char const *src = (loc == NULL ? NULL : loc->psource); 721 KMP_ITT_DEBUG_LOCK(); 722 __itt_sync_create(lock, type, src, 0); 723 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 724 src); 725 } 726#endif 727} 728#else // KMP_USE_DYNAMIC_LOCK 729// Internal guts -- common code for locks and critical sections, do not call 730// directly. 731__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) { 732#if USE_ITT_NOTIFY 733 if (__itt_sync_create_ptr) { 734 ident_t const *loc = NULL; 735 if (__kmp_get_user_lock_location_ != NULL) 736 loc = __kmp_get_user_lock_location_((lock)); 737 char const *src = (loc == NULL ? NULL : loc->psource); 738 KMP_ITT_DEBUG_LOCK(); 739 __itt_sync_create(lock, type, src, 0); 740 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 741 src); 742 } 743#endif 744} // ___kmp_itt_lock_init 745#endif // KMP_USE_DYNAMIC_LOCK 746 747// Internal guts -- common code for locks and critical sections, do not call 748// directly. 749__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) { 750#if USE_ITT_NOTIFY 751 KMP_ITT_DEBUG_LOCK(); 752 __itt_sync_destroy(lock); 753 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock); 754#endif 755} // ___kmp_itt_lock_fini 756 757// ----------------------------------------------------------------------------- 758#if KMP_USE_DYNAMIC_LOCK 759void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) { 760 ___kmp_itt_lock_init(lock, "OMP Lock", loc); 761} 762#else 763void __kmp_itt_lock_creating(kmp_user_lock_p lock) { 764 ___kmp_itt_lock_init(lock, "OMP Lock"); 765} // __kmp_itt_lock_creating 766#endif 767 768void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) { 769#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 770 // postpone lock object access 771 if (__itt_sync_prepare_ptr) { 772 if (KMP_EXTRACT_D_TAG(lock) == 0) { 773 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 774 __itt_sync_prepare(ilk->lock); 775 } else { 776 __itt_sync_prepare(lock); 777 } 778 } 779#else 780 __itt_sync_prepare(lock); 781#endif 782} // __kmp_itt_lock_acquiring 783 784void __kmp_itt_lock_acquired(kmp_user_lock_p lock) { 785#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 786 // postpone lock object access 787 if (__itt_sync_acquired_ptr) { 788 if (KMP_EXTRACT_D_TAG(lock) == 0) { 789 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 790 __itt_sync_acquired(ilk->lock); 791 } else { 792 __itt_sync_acquired(lock); 793 } 794 } 795#else 796 __itt_sync_acquired(lock); 797#endif 798} // __kmp_itt_lock_acquired 799 800void __kmp_itt_lock_releasing(kmp_user_lock_p lock) { 801#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 802 if (__itt_sync_releasing_ptr) { 803 if (KMP_EXTRACT_D_TAG(lock) == 0) { 804 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 805 __itt_sync_releasing(ilk->lock); 806 } else { 807 __itt_sync_releasing(lock); 808 } 809 } 810#else 811 __itt_sync_releasing(lock); 812#endif 813} // __kmp_itt_lock_releasing 814 815void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) { 816#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 817 if (__itt_sync_cancel_ptr) { 818 if (KMP_EXTRACT_D_TAG(lock) == 0) { 819 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 820 __itt_sync_cancel(ilk->lock); 821 } else { 822 __itt_sync_cancel(lock); 823 } 824 } 825#else 826 __itt_sync_cancel(lock); 827#endif 828} // __kmp_itt_lock_cancelled 829 830void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) { 831 ___kmp_itt_lock_fini(lock, "OMP Lock"); 832} // __kmp_itt_lock_destroyed 833 834/* Critical reporting. 835 Critical sections are treated exactly as locks (but have different object 836 type). */ 837#if KMP_USE_DYNAMIC_LOCK 838void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) { 839 ___kmp_itt_lock_init(lock, "OMP Critical", loc); 840} 841#else 842void __kmp_itt_critical_creating(kmp_user_lock_p lock) { 843 ___kmp_itt_lock_init(lock, "OMP Critical"); 844} // __kmp_itt_critical_creating 845#endif 846 847void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) { 848 __itt_sync_prepare(lock); 849} // __kmp_itt_critical_acquiring 850 851void __kmp_itt_critical_acquired(kmp_user_lock_p lock) { 852 __itt_sync_acquired(lock); 853} // __kmp_itt_critical_acquired 854 855void __kmp_itt_critical_releasing(kmp_user_lock_p lock) { 856 __itt_sync_releasing(lock); 857} // __kmp_itt_critical_releasing 858 859void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) { 860 ___kmp_itt_lock_fini(lock, "OMP Critical"); 861} // __kmp_itt_critical_destroyed 862 863/* Single reporting. */ 864 865void __kmp_itt_single_start(int gtid) { 866#if USE_ITT_NOTIFY 867 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) { 868 kmp_info_t *thr = __kmp_thread_from_gtid((gtid)); 869 ident_t *loc = thr->th.th_ident; 870 char const *src = (loc == NULL ? NULL : loc->psource); 871 kmp_str_buf_t name; 872 __kmp_str_buf_init(&name); 873 __kmp_str_buf_print(&name, "OMP Single-%s", src); 874 KMP_ITT_DEBUG_LOCK(); 875 thr->th.th_itt_mark_single = __itt_mark_create(name.str); 876 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str, 877 thr->th.th_itt_mark_single); 878 __kmp_str_buf_free(&name); 879 KMP_ITT_DEBUG_LOCK(); 880 __itt_mark(thr->th.th_itt_mark_single, NULL); 881 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n", 882 thr->th.th_itt_mark_single); 883 } 884#endif 885} // __kmp_itt_single_start 886 887void __kmp_itt_single_end(int gtid) { 888#if USE_ITT_NOTIFY 889 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single; 890 KMP_ITT_DEBUG_LOCK(); 891 __itt_mark_off(mark); 892 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark); 893#endif 894} // __kmp_itt_single_end 895 896/* Ordered reporting. 897 * __kmp_itt_ordered_init is called by each thread *before* first using sync 898 object. ITT team would like it to be called once, but it requires extra 899 synchronization. 900 * __kmp_itt_ordered_prep is called when thread is going to enter ordered 901 section (before synchronization). 902 * __kmp_itt_ordered_start is called just before entering user code (after 903 synchronization). 904 * __kmp_itt_ordered_end is called after returning from user code. 905 906 Sync object is th->th.th_dispatch->th_dispatch_sh_current. 907 Events are not generated in case of serialized team. */ 908 909void __kmp_itt_ordered_init(int gtid) { 910#if USE_ITT_NOTIFY 911 if (__itt_sync_create_ptr) { 912 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 913 ident_t const *loc = thr->th.th_ident; 914 char const *src = (loc == NULL ? NULL : loc->psource); 915 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current, 916 "OMP Ordered", src, 0); 917 } 918#endif 919} // __kmp_itt_ordered_init 920 921void __kmp_itt_ordered_prep(int gtid) { 922#if USE_ITT_NOTIFY 923 if (__itt_sync_create_ptr) { 924 kmp_team_t *t = __kmp_team_from_gtid(gtid); 925 if (!t->t.t_serialized) { 926 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 927 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current); 928 } 929 } 930#endif 931} // __kmp_itt_ordered_prep 932 933void __kmp_itt_ordered_start(int gtid) { 934#if USE_ITT_NOTIFY 935 if (__itt_sync_create_ptr) { 936 kmp_team_t *t = __kmp_team_from_gtid(gtid); 937 if (!t->t.t_serialized) { 938 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 939 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current); 940 } 941 } 942#endif 943} // __kmp_itt_ordered_start 944 945void __kmp_itt_ordered_end(int gtid) { 946#if USE_ITT_NOTIFY 947 if (__itt_sync_create_ptr) { 948 kmp_team_t *t = __kmp_team_from_gtid(gtid); 949 if (!t->t.t_serialized) { 950 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 951 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current); 952 } 953 } 954#endif 955} // __kmp_itt_ordered_end 956 957/* Threads reporting. */ 958 959void __kmp_itt_thread_ignore() { 960 __itt_thr_ignore(); 961} // __kmp_itt_thread_ignore 962 963void __kmp_itt_thread_name(int gtid) { 964#if USE_ITT_NOTIFY 965 if (__itt_thr_name_set_ptr) { 966 kmp_str_buf_t name; 967 __kmp_str_buf_init(&name); 968 if (KMP_MASTER_GTID(gtid)) { 969 __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid); 970 } else { 971 __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid); 972 } 973 KMP_ITT_DEBUG_LOCK(); 974 __itt_thr_name_set(name.str, name.used); 975 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str); 976 __kmp_str_buf_free(&name); 977 } 978#endif 979} // __kmp_itt_thread_name 980 981/* System object reporting. 982 ITT catches operations with system sync objects (like Windows* OS on IA-32 983 architecture API critical sections and events). We only need to specify 984 name ("OMP Scheduler") for the object to let ITT know it is an object used 985 by OpenMP RTL for internal purposes. */ 986 987void __kmp_itt_system_object_created(void *object, char const *name) { 988#if USE_ITT_NOTIFY 989 KMP_ITT_DEBUG_LOCK(); 990 __itt_sync_create(object, "OMP Scheduler", name, 0); 991 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", 992 object, name); 993#endif 994} // __kmp_itt_system_object_created 995 996/* Stack stitching api. 997 Master calls "create" and put the stitching id into team structure. 998 Workers read the stitching id and call "enter" / "leave" api. 999 Master calls "destroy" at the end of the parallel region. */ 1000 1001__itt_caller __kmp_itt_stack_caller_create() { 1002#if USE_ITT_NOTIFY 1003 if (!__itt_stack_caller_create_ptr) 1004 return NULL; 1005 KMP_ITT_DEBUG_LOCK(); 1006 __itt_caller id = __itt_stack_caller_create(); 1007 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id); 1008 return id; 1009#endif 1010 return NULL; 1011} 1012 1013void __kmp_itt_stack_caller_destroy(__itt_caller id) { 1014#if USE_ITT_NOTIFY 1015 if (__itt_stack_caller_destroy_ptr) { 1016 KMP_ITT_DEBUG_LOCK(); 1017 __itt_stack_caller_destroy(id); 1018 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id); 1019 } 1020#endif 1021} 1022 1023void __kmp_itt_stack_callee_enter(__itt_caller id) { 1024#if USE_ITT_NOTIFY 1025 if (__itt_stack_callee_enter_ptr) { 1026 KMP_ITT_DEBUG_LOCK(); 1027 __itt_stack_callee_enter(id); 1028 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id); 1029 } 1030#endif 1031} 1032 1033void __kmp_itt_stack_callee_leave(__itt_caller id) { 1034#if USE_ITT_NOTIFY 1035 if (__itt_stack_callee_leave_ptr) { 1036 KMP_ITT_DEBUG_LOCK(); 1037 __itt_stack_callee_leave(id); 1038 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id); 1039 } 1040#endif 1041} 1042 1043#endif /* USE_ITT_BUILD */ 1044