1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; 292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 293 } 294 OMPT_STORE_RETURN_ADDRESS(gtid); 295 #endif 296 297 #if INCLUDE_SSC_MARKS 298 SSC_MARK_FORKING(); 299 #endif 300 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 303 kmp_va_addr_of(ap)); 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_JOINING(); 306 #endif 307 __kmp_join_call(loc, gtid 308 #if OMPT_SUPPORT 309 , 310 fork_context_intel 311 #endif 312 ); 313 314 va_end(ap); 315 316 #if OMPT_SUPPORT 317 if (ompt_enabled.enabled) { 318 ompt_frame->enter_frame = ompt_data_none; 319 } 320 #endif 321 } 322 323 #if KMP_STATS_ENABLED 324 if (previous_state == stats_state_e::SERIAL_REGION) { 325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 326 KMP_SET_THREAD_STATE(previous_state); 327 } else { 328 KMP_POP_PARTITIONED_TIMER(); 329 } 330 #endif // KMP_STATS_ENABLED 331 } 332 333 /*! 334 @ingroup PARALLEL 335 @param loc source location information 336 @param global_tid global thread number 337 @param num_teams number of teams requested for the teams construct 338 @param num_threads number of threads per team requested for the teams construct 339 340 Set the number of teams to be used by the teams construct. 341 This call is only required if the teams construct has a `num_teams` clause 342 or a `thread_limit` clause (or both). 343 */ 344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 345 kmp_int32 num_teams, kmp_int32 num_threads) { 346 KA_TRACE(20, 347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 348 global_tid, num_teams, num_threads)); 349 __kmp_assert_valid_gtid(global_tid); 350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 351 } 352 353 /*! 354 @ingroup PARALLEL 355 @param loc source location information 356 @param global_tid global thread number 357 @param num_teams_lb lower bound on number of teams requested for the teams 358 construct 359 @param num_teams_ub upper bound on number of teams requested for the teams 360 construct 361 @param num_threads number of threads per team requested for the teams construct 362 363 Set the number of teams to be used by the teams construct. The number of initial 364 teams cretaed will be greater than or equal to the lower bound and less than or 365 equal to the upper bound. 366 This call is only required if the teams construct has a `num_teams` clause 367 or a `thread_limit` clause (or both). 368 */ 369 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 371 kmp_int32 num_threads) { 372 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 373 " num_teams_ub=%d num_threads=%d\n", 374 global_tid, num_teams_lb, num_teams_ub, num_threads)); 375 __kmp_assert_valid_gtid(global_tid); 376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 377 num_threads); 378 } 379 380 /*! 381 @ingroup PARALLEL 382 @param loc source location information 383 @param argc total number of arguments in the ellipsis 384 @param microtask pointer to callback routine consisting of outlined teams 385 construct 386 @param ... pointers to shared variables that aren't global 387 388 Do the actual fork and call the microtask in the relevant number of threads. 389 */ 390 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 391 ...) { 392 int gtid = __kmp_entry_gtid(); 393 kmp_info_t *this_thr = __kmp_threads[gtid]; 394 va_list ap; 395 va_start(ap, microtask); 396 397 #if KMP_STATS_ENABLED 398 KMP_COUNT_BLOCK(OMP_TEAMS); 399 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 400 if (previous_state == stats_state_e::SERIAL_REGION) { 401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 402 } else { 403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 404 } 405 #endif 406 407 // remember teams entry point and nesting level 408 this_thr->th.th_teams_microtask = microtask; 409 this_thr->th.th_teams_level = 410 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 411 412 #if OMPT_SUPPORT 413 kmp_team_t *parent_team = this_thr->th.th_team; 414 int tid = __kmp_tid_from_gtid(gtid); 415 if (ompt_enabled.enabled) { 416 parent_team->t.t_implicit_task_taskdata[tid] 417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 418 } 419 OMPT_STORE_RETURN_ADDRESS(gtid); 420 #endif 421 422 // check if __kmpc_push_num_teams called, set default number of teams 423 // otherwise 424 if (this_thr->th.th_teams_size.nteams == 0) { 425 __kmp_push_num_teams(loc, gtid, 0, 0); 426 } 427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 430 431 __kmp_fork_call( 432 loc, gtid, fork_context_intel, argc, 433 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 435 __kmp_join_call(loc, gtid 436 #if OMPT_SUPPORT 437 , 438 fork_context_intel 439 #endif 440 ); 441 442 // Pop current CG root off list 443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 445 this_thr->th.th_cg_roots = tmp->up; 446 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 447 " to node %p. cg_nthreads was %d\n", 448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 449 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 450 int i = tmp->cg_nthreads--; 451 if (i == 1) { // check is we are the last thread in CG (not always the case) 452 __kmp_free(tmp); 453 } 454 // Restore current task's thread_limit from CG root 455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 456 this_thr->th.th_current_task->td_icvs.thread_limit = 457 this_thr->th.th_cg_roots->cg_thread_limit; 458 459 this_thr->th.th_teams_microtask = NULL; 460 this_thr->th.th_teams_level = 0; 461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 462 va_end(ap); 463 #if KMP_STATS_ENABLED 464 if (previous_state == stats_state_e::SERIAL_REGION) { 465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 466 KMP_SET_THREAD_STATE(previous_state); 467 } else { 468 KMP_POP_PARTITIONED_TIMER(); 469 } 470 #endif // KMP_STATS_ENABLED 471 } 472 473 // I don't think this function should ever have been exported. 474 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 475 // openmp code ever called it, but it's been exported from the RTL for so 476 // long that I'm afraid to remove the definition. 477 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 478 479 /*! 480 @ingroup PARALLEL 481 @param loc source location information 482 @param global_tid global thread number 483 484 Enter a serialized parallel construct. This interface is used to handle a 485 conditional parallel region, like this, 486 @code 487 #pragma omp parallel if (condition) 488 @endcode 489 when the condition is false. 490 */ 491 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 492 // The implementation is now in kmp_runtime.cpp so that it can share static 493 // functions with kmp_fork_call since the tasks to be done are similar in 494 // each case. 495 __kmp_assert_valid_gtid(global_tid); 496 #if OMPT_SUPPORT 497 OMPT_STORE_RETURN_ADDRESS(global_tid); 498 #endif 499 __kmp_serialized_parallel(loc, global_tid); 500 } 501 502 /*! 503 @ingroup PARALLEL 504 @param loc source location information 505 @param global_tid global thread number 506 507 Leave a serialized parallel construct. 508 */ 509 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 510 kmp_internal_control_t *top; 511 kmp_info_t *this_thr; 512 kmp_team_t *serial_team; 513 514 KC_TRACE(10, 515 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 516 517 /* skip all this code for autopar serialized loops since it results in 518 unacceptable overhead */ 519 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 520 return; 521 522 // Not autopar code 523 __kmp_assert_valid_gtid(global_tid); 524 if (!TCR_4(__kmp_init_parallel)) 525 __kmp_parallel_initialize(); 526 527 __kmp_resume_if_soft_paused(); 528 529 this_thr = __kmp_threads[global_tid]; 530 serial_team = this_thr->th.th_serial_team; 531 532 kmp_task_team_t *task_team = this_thr->th.th_task_team; 533 // we need to wait for the proxy tasks before finishing the thread 534 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 535 task_team->tt.tt_hidden_helper_task_encountered)) 536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 537 538 KMP_MB(); 539 KMP_DEBUG_ASSERT(serial_team); 540 KMP_ASSERT(serial_team->t.t_serialized); 541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 543 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 545 546 #if OMPT_SUPPORT 547 if (ompt_enabled.enabled && 548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 550 if (ompt_enabled.ompt_callback_implicit_task) { 551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 554 } 555 556 // reset clear the task id only after unlinking the task 557 ompt_data_t *parent_task_data; 558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 559 560 if (ompt_enabled.ompt_callback_parallel_end) { 561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 563 ompt_parallel_invoker_program | ompt_parallel_team, 564 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 565 } 566 __ompt_lw_taskteam_unlink(this_thr); 567 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 568 } 569 #endif 570 571 /* If necessary, pop the internal control stack values and replace the team 572 * values */ 573 top = serial_team->t.t_control_stack_top; 574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 576 serial_team->t.t_control_stack_top = top->next; 577 __kmp_free(top); 578 } 579 580 /* pop dispatch buffers stack */ 581 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 582 { 583 dispatch_private_info_t *disp_buffer = 584 serial_team->t.t_dispatch->th_disp_buffer; 585 serial_team->t.t_dispatch->th_disp_buffer = 586 serial_team->t.t_dispatch->th_disp_buffer->next; 587 __kmp_free(disp_buffer); 588 } 589 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 590 591 --serial_team->t.t_serialized; 592 if (serial_team->t.t_serialized == 0) { 593 594 /* return to the parallel section */ 595 596 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 598 __kmp_clear_x87_fpu_status_word(); 599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 601 } 602 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 603 604 __kmp_pop_current_task_from_thread(this_thr); 605 #if OMPD_SUPPORT 606 if (ompd_state & OMPD_ENABLE_BP) 607 ompd_bp_parallel_end(); 608 #endif 609 610 this_thr->th.th_team = serial_team->t.t_parent; 611 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 612 613 /* restore values cached in the thread */ 614 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 615 this_thr->th.th_team_master = 616 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 617 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 618 619 /* TODO the below shouldn't need to be adjusted for serialized teams */ 620 this_thr->th.th_dispatch = 621 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 622 623 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 624 this_thr->th.th_current_task->td_flags.executing = 1; 625 626 if (__kmp_tasking_mode != tskm_immediate_exec) { 627 // Copy the task team from the new child / old parent team to the thread. 628 this_thr->th.th_task_team = 629 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 630 KA_TRACE(20, 631 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 632 "team %p\n", 633 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 634 } 635 } else { 636 if (__kmp_tasking_mode != tskm_immediate_exec) { 637 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 638 "depth of serial team %p to %d\n", 639 global_tid, serial_team, serial_team->t.t_serialized)); 640 } 641 } 642 643 serial_team->t.t_level--; 644 if (__kmp_env_consistency_check) 645 __kmp_pop_parallel(global_tid, NULL); 646 #if OMPT_SUPPORT 647 if (ompt_enabled.enabled) 648 this_thr->th.ompt_thread_info.state = 649 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 650 : ompt_state_work_parallel); 651 #endif 652 } 653 654 /*! 655 @ingroup SYNCHRONIZATION 656 @param loc source location information. 657 658 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 659 depending on the memory ordering convention obeyed by the compiler 660 even that may not be necessary). 661 */ 662 void __kmpc_flush(ident_t *loc) { 663 KC_TRACE(10, ("__kmpc_flush: called\n")); 664 665 /* need explicit __mf() here since use volatile instead in library */ 666 KMP_MB(); /* Flush all pending memory write invalidates. */ 667 668 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 669 #if KMP_MIC 670 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 671 // We shouldn't need it, though, since the ABI rules require that 672 // * If the compiler generates NGO stores it also generates the fence 673 // * If users hand-code NGO stores they should insert the fence 674 // therefore no incomplete unordered stores should be visible. 675 #else 676 // C74404 677 // This is to address non-temporal store instructions (sfence needed). 678 // The clflush instruction is addressed either (mfence needed). 679 // Probably the non-temporal load monvtdqa instruction should also be 680 // addressed. 681 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 682 if (!__kmp_cpuinfo.initialized) { 683 __kmp_query_cpuid(&__kmp_cpuinfo); 684 } 685 if (!__kmp_cpuinfo.flags.sse2) { 686 // CPU cannot execute SSE2 instructions. 687 } else { 688 #if KMP_COMPILER_ICC || KMP_COMPILER_ICX 689 _mm_mfence(); 690 #elif KMP_COMPILER_MSVC 691 MemoryBarrier(); 692 #else 693 __sync_synchronize(); 694 #endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX 695 } 696 #endif // KMP_MIC 697 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 698 KMP_ARCH_RISCV64) 699 // Nothing to see here move along 700 #elif KMP_ARCH_PPC64 701 // Nothing needed here (we have a real MB above). 702 #else 703 #error Unknown or unsupported architecture 704 #endif 705 706 #if OMPT_SUPPORT && OMPT_OPTIONAL 707 if (ompt_enabled.ompt_callback_flush) { 708 ompt_callbacks.ompt_callback(ompt_callback_flush)( 709 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 710 } 711 #endif 712 } 713 714 /* -------------------------------------------------------------------------- */ 715 /*! 716 @ingroup SYNCHRONIZATION 717 @param loc source location information 718 @param global_tid thread id. 719 720 Execute a barrier. 721 */ 722 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 723 KMP_COUNT_BLOCK(OMP_BARRIER); 724 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 725 __kmp_assert_valid_gtid(global_tid); 726 727 if (!TCR_4(__kmp_init_parallel)) 728 __kmp_parallel_initialize(); 729 730 __kmp_resume_if_soft_paused(); 731 732 if (__kmp_env_consistency_check) { 733 if (loc == 0) { 734 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 735 } 736 __kmp_check_barrier(global_tid, ct_barrier, loc); 737 } 738 739 #if OMPT_SUPPORT 740 ompt_frame_t *ompt_frame; 741 if (ompt_enabled.enabled) { 742 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 743 if (ompt_frame->enter_frame.ptr == NULL) 744 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 745 } 746 OMPT_STORE_RETURN_ADDRESS(global_tid); 747 #endif 748 __kmp_threads[global_tid]->th.th_ident = loc; 749 // TODO: explicit barrier_wait_id: 750 // this function is called when 'barrier' directive is present or 751 // implicit barrier at the end of a worksharing construct. 752 // 1) better to add a per-thread barrier counter to a thread data structure 753 // 2) set to 0 when a new team is created 754 // 4) no sync is required 755 756 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 757 #if OMPT_SUPPORT && OMPT_OPTIONAL 758 if (ompt_enabled.enabled) { 759 ompt_frame->enter_frame = ompt_data_none; 760 } 761 #endif 762 } 763 764 /* The BARRIER for a MASTER section is always explicit */ 765 /*! 766 @ingroup WORK_SHARING 767 @param loc source location information. 768 @param global_tid global thread number . 769 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 770 */ 771 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 772 int status = 0; 773 774 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 775 __kmp_assert_valid_gtid(global_tid); 776 777 if (!TCR_4(__kmp_init_parallel)) 778 __kmp_parallel_initialize(); 779 780 __kmp_resume_if_soft_paused(); 781 782 if (KMP_MASTER_GTID(global_tid)) { 783 KMP_COUNT_BLOCK(OMP_MASTER); 784 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 785 status = 1; 786 } 787 788 #if OMPT_SUPPORT && OMPT_OPTIONAL 789 if (status) { 790 if (ompt_enabled.ompt_callback_masked) { 791 kmp_info_t *this_thr = __kmp_threads[global_tid]; 792 kmp_team_t *team = this_thr->th.th_team; 793 794 int tid = __kmp_tid_from_gtid(global_tid); 795 ompt_callbacks.ompt_callback(ompt_callback_masked)( 796 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 797 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 798 OMPT_GET_RETURN_ADDRESS(0)); 799 } 800 } 801 #endif 802 803 if (__kmp_env_consistency_check) { 804 #if KMP_USE_DYNAMIC_LOCK 805 if (status) 806 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 807 else 808 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 809 #else 810 if (status) 811 __kmp_push_sync(global_tid, ct_master, loc, NULL); 812 else 813 __kmp_check_sync(global_tid, ct_master, loc, NULL); 814 #endif 815 } 816 817 return status; 818 } 819 820 /*! 821 @ingroup WORK_SHARING 822 @param loc source location information. 823 @param global_tid global thread number . 824 825 Mark the end of a <tt>master</tt> region. This should only be called by the 826 thread that executes the <tt>master</tt> region. 827 */ 828 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 829 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 830 __kmp_assert_valid_gtid(global_tid); 831 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 832 KMP_POP_PARTITIONED_TIMER(); 833 834 #if OMPT_SUPPORT && OMPT_OPTIONAL 835 kmp_info_t *this_thr = __kmp_threads[global_tid]; 836 kmp_team_t *team = this_thr->th.th_team; 837 if (ompt_enabled.ompt_callback_masked) { 838 int tid = __kmp_tid_from_gtid(global_tid); 839 ompt_callbacks.ompt_callback(ompt_callback_masked)( 840 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 841 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 842 OMPT_GET_RETURN_ADDRESS(0)); 843 } 844 #endif 845 846 if (__kmp_env_consistency_check) { 847 if (KMP_MASTER_GTID(global_tid)) 848 __kmp_pop_sync(global_tid, ct_master, loc); 849 } 850 } 851 852 /*! 853 @ingroup WORK_SHARING 854 @param loc source location information. 855 @param global_tid global thread number. 856 @param filter result of evaluating filter clause on thread global_tid, or zero 857 if no filter clause present 858 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 859 */ 860 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 861 int status = 0; 862 int tid; 863 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 864 __kmp_assert_valid_gtid(global_tid); 865 866 if (!TCR_4(__kmp_init_parallel)) 867 __kmp_parallel_initialize(); 868 869 __kmp_resume_if_soft_paused(); 870 871 tid = __kmp_tid_from_gtid(global_tid); 872 if (tid == filter) { 873 KMP_COUNT_BLOCK(OMP_MASKED); 874 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 875 status = 1; 876 } 877 878 #if OMPT_SUPPORT && OMPT_OPTIONAL 879 if (status) { 880 if (ompt_enabled.ompt_callback_masked) { 881 kmp_info_t *this_thr = __kmp_threads[global_tid]; 882 kmp_team_t *team = this_thr->th.th_team; 883 ompt_callbacks.ompt_callback(ompt_callback_masked)( 884 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 885 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 886 OMPT_GET_RETURN_ADDRESS(0)); 887 } 888 } 889 #endif 890 891 if (__kmp_env_consistency_check) { 892 #if KMP_USE_DYNAMIC_LOCK 893 if (status) 894 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 895 else 896 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 897 #else 898 if (status) 899 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 900 else 901 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 902 #endif 903 } 904 905 return status; 906 } 907 908 /*! 909 @ingroup WORK_SHARING 910 @param loc source location information. 911 @param global_tid global thread number . 912 913 Mark the end of a <tt>masked</tt> region. This should only be called by the 914 thread that executes the <tt>masked</tt> region. 915 */ 916 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 917 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 918 __kmp_assert_valid_gtid(global_tid); 919 KMP_POP_PARTITIONED_TIMER(); 920 921 #if OMPT_SUPPORT && OMPT_OPTIONAL 922 kmp_info_t *this_thr = __kmp_threads[global_tid]; 923 kmp_team_t *team = this_thr->th.th_team; 924 if (ompt_enabled.ompt_callback_masked) { 925 int tid = __kmp_tid_from_gtid(global_tid); 926 ompt_callbacks.ompt_callback(ompt_callback_masked)( 927 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 928 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 929 OMPT_GET_RETURN_ADDRESS(0)); 930 } 931 #endif 932 933 if (__kmp_env_consistency_check) { 934 __kmp_pop_sync(global_tid, ct_masked, loc); 935 } 936 } 937 938 /*! 939 @ingroup WORK_SHARING 940 @param loc source location information. 941 @param gtid global thread number. 942 943 Start execution of an <tt>ordered</tt> construct. 944 */ 945 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 946 int cid = 0; 947 kmp_info_t *th; 948 KMP_DEBUG_ASSERT(__kmp_init_serial); 949 950 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 951 __kmp_assert_valid_gtid(gtid); 952 953 if (!TCR_4(__kmp_init_parallel)) 954 __kmp_parallel_initialize(); 955 956 __kmp_resume_if_soft_paused(); 957 958 #if USE_ITT_BUILD 959 __kmp_itt_ordered_prep(gtid); 960 // TODO: ordered_wait_id 961 #endif /* USE_ITT_BUILD */ 962 963 th = __kmp_threads[gtid]; 964 965 #if OMPT_SUPPORT && OMPT_OPTIONAL 966 kmp_team_t *team; 967 ompt_wait_id_t lck; 968 void *codeptr_ra; 969 OMPT_STORE_RETURN_ADDRESS(gtid); 970 if (ompt_enabled.enabled) { 971 team = __kmp_team_from_gtid(gtid); 972 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 973 /* OMPT state update */ 974 th->th.ompt_thread_info.wait_id = lck; 975 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 976 977 /* OMPT event callback */ 978 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 979 if (ompt_enabled.ompt_callback_mutex_acquire) { 980 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 981 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 982 codeptr_ra); 983 } 984 } 985 #endif 986 987 if (th->th.th_dispatch->th_deo_fcn != 0) 988 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 989 else 990 __kmp_parallel_deo(>id, &cid, loc); 991 992 #if OMPT_SUPPORT && OMPT_OPTIONAL 993 if (ompt_enabled.enabled) { 994 /* OMPT state update */ 995 th->th.ompt_thread_info.state = ompt_state_work_parallel; 996 th->th.ompt_thread_info.wait_id = 0; 997 998 /* OMPT event callback */ 999 if (ompt_enabled.ompt_callback_mutex_acquired) { 1000 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1001 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1002 } 1003 } 1004 #endif 1005 1006 #if USE_ITT_BUILD 1007 __kmp_itt_ordered_start(gtid); 1008 #endif /* USE_ITT_BUILD */ 1009 } 1010 1011 /*! 1012 @ingroup WORK_SHARING 1013 @param loc source location information. 1014 @param gtid global thread number. 1015 1016 End execution of an <tt>ordered</tt> construct. 1017 */ 1018 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1019 int cid = 0; 1020 kmp_info_t *th; 1021 1022 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1023 __kmp_assert_valid_gtid(gtid); 1024 1025 #if USE_ITT_BUILD 1026 __kmp_itt_ordered_end(gtid); 1027 // TODO: ordered_wait_id 1028 #endif /* USE_ITT_BUILD */ 1029 1030 th = __kmp_threads[gtid]; 1031 1032 if (th->th.th_dispatch->th_dxo_fcn != 0) 1033 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1034 else 1035 __kmp_parallel_dxo(>id, &cid, loc); 1036 1037 #if OMPT_SUPPORT && OMPT_OPTIONAL 1038 OMPT_STORE_RETURN_ADDRESS(gtid); 1039 if (ompt_enabled.ompt_callback_mutex_released) { 1040 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1041 ompt_mutex_ordered, 1042 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1043 ->t.t_ordered.dt.t_value, 1044 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1045 } 1046 #endif 1047 } 1048 1049 #if KMP_USE_DYNAMIC_LOCK 1050 1051 static __forceinline void 1052 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1053 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1054 // Pointer to the allocated indirect lock is written to crit, while indexing 1055 // is ignored. 1056 void *idx; 1057 kmp_indirect_lock_t **lck; 1058 lck = (kmp_indirect_lock_t **)crit; 1059 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1060 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1061 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1062 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1063 KA_TRACE(20, 1064 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1065 #if USE_ITT_BUILD 1066 __kmp_itt_critical_creating(ilk->lock, loc); 1067 #endif 1068 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1069 if (status == 0) { 1070 #if USE_ITT_BUILD 1071 __kmp_itt_critical_destroyed(ilk->lock); 1072 #endif 1073 // We don't really need to destroy the unclaimed lock here since it will be 1074 // cleaned up at program exit. 1075 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1076 } 1077 KMP_DEBUG_ASSERT(*lck != NULL); 1078 } 1079 1080 // Fast-path acquire tas lock 1081 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1082 { \ 1083 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1084 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1085 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1086 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1087 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1088 kmp_uint32 spins; \ 1089 KMP_FSYNC_PREPARE(l); \ 1090 KMP_INIT_YIELD(spins); \ 1091 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1092 do { \ 1093 if (TCR_4(__kmp_nth) > \ 1094 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1095 KMP_YIELD(TRUE); \ 1096 } else { \ 1097 KMP_YIELD_SPIN(spins); \ 1098 } \ 1099 __kmp_spin_backoff(&backoff); \ 1100 } while ( \ 1101 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1102 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1103 } \ 1104 KMP_FSYNC_ACQUIRED(l); \ 1105 } 1106 1107 // Fast-path test tas lock 1108 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1109 { \ 1110 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1111 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1112 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1113 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1114 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1115 } 1116 1117 // Fast-path release tas lock 1118 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1119 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1120 1121 #if KMP_USE_FUTEX 1122 1123 #include <sys/syscall.h> 1124 #include <unistd.h> 1125 #ifndef FUTEX_WAIT 1126 #define FUTEX_WAIT 0 1127 #endif 1128 #ifndef FUTEX_WAKE 1129 #define FUTEX_WAKE 1 1130 #endif 1131 1132 // Fast-path acquire futex lock 1133 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1134 { \ 1135 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1136 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1137 KMP_MB(); \ 1138 KMP_FSYNC_PREPARE(ftx); \ 1139 kmp_int32 poll_val; \ 1140 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1141 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1142 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1143 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1144 if (!cond) { \ 1145 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1146 poll_val | \ 1147 KMP_LOCK_BUSY(1, futex))) { \ 1148 continue; \ 1149 } \ 1150 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1151 } \ 1152 kmp_int32 rc; \ 1153 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1154 NULL, NULL, 0)) != 0) { \ 1155 continue; \ 1156 } \ 1157 gtid_code |= 1; \ 1158 } \ 1159 KMP_FSYNC_ACQUIRED(ftx); \ 1160 } 1161 1162 // Fast-path test futex lock 1163 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1164 { \ 1165 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1166 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1167 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1168 KMP_FSYNC_ACQUIRED(ftx); \ 1169 rc = TRUE; \ 1170 } else { \ 1171 rc = FALSE; \ 1172 } \ 1173 } 1174 1175 // Fast-path release futex lock 1176 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1177 { \ 1178 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1179 KMP_MB(); \ 1180 KMP_FSYNC_RELEASING(ftx); \ 1181 kmp_int32 poll_val = \ 1182 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1183 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1184 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1185 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1186 } \ 1187 KMP_MB(); \ 1188 KMP_YIELD_OVERSUB(); \ 1189 } 1190 1191 #endif // KMP_USE_FUTEX 1192 1193 #else // KMP_USE_DYNAMIC_LOCK 1194 1195 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1196 ident_t const *loc, 1197 kmp_int32 gtid) { 1198 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1199 1200 // Because of the double-check, the following load doesn't need to be volatile 1201 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1202 1203 if (lck == NULL) { 1204 void *idx; 1205 1206 // Allocate & initialize the lock. 1207 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1208 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1209 __kmp_init_user_lock_with_checks(lck); 1210 __kmp_set_user_lock_location(lck, loc); 1211 #if USE_ITT_BUILD 1212 __kmp_itt_critical_creating(lck); 1213 // __kmp_itt_critical_creating() should be called *before* the first usage 1214 // of underlying lock. It is the only place where we can guarantee it. There 1215 // are chances the lock will destroyed with no usage, but it is not a 1216 // problem, because this is not real event seen by user but rather setting 1217 // name for object (lock). See more details in kmp_itt.h. 1218 #endif /* USE_ITT_BUILD */ 1219 1220 // Use a cmpxchg instruction to slam the start of the critical section with 1221 // the lock pointer. If another thread beat us to it, deallocate the lock, 1222 // and use the lock that the other thread allocated. 1223 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1224 1225 if (status == 0) { 1226 // Deallocate the lock and reload the value. 1227 #if USE_ITT_BUILD 1228 __kmp_itt_critical_destroyed(lck); 1229 // Let ITT know the lock is destroyed and the same memory location may be reused 1230 // for another purpose. 1231 #endif /* USE_ITT_BUILD */ 1232 __kmp_destroy_user_lock_with_checks(lck); 1233 __kmp_user_lock_free(&idx, gtid, lck); 1234 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1235 KMP_DEBUG_ASSERT(lck != NULL); 1236 } 1237 } 1238 return lck; 1239 } 1240 1241 #endif // KMP_USE_DYNAMIC_LOCK 1242 1243 /*! 1244 @ingroup WORK_SHARING 1245 @param loc source location information. 1246 @param global_tid global thread number. 1247 @param crit identity of the critical section. This could be a pointer to a lock 1248 associated with the critical section, or some other suitably unique value. 1249 1250 Enter code protected by a `critical` construct. 1251 This function blocks until the executing thread can enter the critical section. 1252 */ 1253 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1254 kmp_critical_name *crit) { 1255 #if KMP_USE_DYNAMIC_LOCK 1256 #if OMPT_SUPPORT && OMPT_OPTIONAL 1257 OMPT_STORE_RETURN_ADDRESS(global_tid); 1258 #endif // OMPT_SUPPORT 1259 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1260 #else 1261 KMP_COUNT_BLOCK(OMP_CRITICAL); 1262 #if OMPT_SUPPORT && OMPT_OPTIONAL 1263 ompt_state_t prev_state = ompt_state_undefined; 1264 ompt_thread_info_t ti; 1265 #endif 1266 kmp_user_lock_p lck; 1267 1268 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1269 __kmp_assert_valid_gtid(global_tid); 1270 1271 // TODO: add THR_OVHD_STATE 1272 1273 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1274 KMP_CHECK_USER_LOCK_INIT(); 1275 1276 if ((__kmp_user_lock_kind == lk_tas) && 1277 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1278 lck = (kmp_user_lock_p)crit; 1279 } 1280 #if KMP_USE_FUTEX 1281 else if ((__kmp_user_lock_kind == lk_futex) && 1282 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1283 lck = (kmp_user_lock_p)crit; 1284 } 1285 #endif 1286 else { // ticket, queuing or drdpa 1287 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1288 } 1289 1290 if (__kmp_env_consistency_check) 1291 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1292 1293 // since the critical directive binds to all threads, not just the current 1294 // team we have to check this even if we are in a serialized team. 1295 // also, even if we are the uber thread, we still have to conduct the lock, 1296 // as we have to contend with sibling threads. 1297 1298 #if USE_ITT_BUILD 1299 __kmp_itt_critical_acquiring(lck); 1300 #endif /* USE_ITT_BUILD */ 1301 #if OMPT_SUPPORT && OMPT_OPTIONAL 1302 OMPT_STORE_RETURN_ADDRESS(gtid); 1303 void *codeptr_ra = NULL; 1304 if (ompt_enabled.enabled) { 1305 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1306 /* OMPT state update */ 1307 prev_state = ti.state; 1308 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1309 ti.state = ompt_state_wait_critical; 1310 1311 /* OMPT event callback */ 1312 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1313 if (ompt_enabled.ompt_callback_mutex_acquire) { 1314 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1315 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1316 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1317 } 1318 } 1319 #endif 1320 // Value of 'crit' should be good for using as a critical_id of the critical 1321 // section directive. 1322 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1323 1324 #if USE_ITT_BUILD 1325 __kmp_itt_critical_acquired(lck); 1326 #endif /* USE_ITT_BUILD */ 1327 #if OMPT_SUPPORT && OMPT_OPTIONAL 1328 if (ompt_enabled.enabled) { 1329 /* OMPT state update */ 1330 ti.state = prev_state; 1331 ti.wait_id = 0; 1332 1333 /* OMPT event callback */ 1334 if (ompt_enabled.ompt_callback_mutex_acquired) { 1335 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1336 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1337 } 1338 } 1339 #endif 1340 KMP_POP_PARTITIONED_TIMER(); 1341 1342 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1343 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1344 #endif // KMP_USE_DYNAMIC_LOCK 1345 } 1346 1347 #if KMP_USE_DYNAMIC_LOCK 1348 1349 // Converts the given hint to an internal lock implementation 1350 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1351 #if KMP_USE_TSX 1352 #define KMP_TSX_LOCK(seq) lockseq_##seq 1353 #else 1354 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1355 #endif 1356 1357 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1358 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) 1359 #else 1360 #define KMP_CPUINFO_RTM 0 1361 #endif 1362 1363 // Hints that do not require further logic 1364 if (hint & kmp_lock_hint_hle) 1365 return KMP_TSX_LOCK(hle); 1366 if (hint & kmp_lock_hint_rtm) 1367 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1368 if (hint & kmp_lock_hint_adaptive) 1369 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1370 1371 // Rule out conflicting hints first by returning the default lock 1372 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1373 return __kmp_user_lock_seq; 1374 if ((hint & omp_lock_hint_speculative) && 1375 (hint & omp_lock_hint_nonspeculative)) 1376 return __kmp_user_lock_seq; 1377 1378 // Do not even consider speculation when it appears to be contended 1379 if (hint & omp_lock_hint_contended) 1380 return lockseq_queuing; 1381 1382 // Uncontended lock without speculation 1383 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1384 return lockseq_tas; 1385 1386 // Use RTM lock for speculation 1387 if (hint & omp_lock_hint_speculative) 1388 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1389 1390 return __kmp_user_lock_seq; 1391 } 1392 1393 #if OMPT_SUPPORT && OMPT_OPTIONAL 1394 #if KMP_USE_DYNAMIC_LOCK 1395 static kmp_mutex_impl_t 1396 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1397 if (user_lock) { 1398 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1399 case 0: 1400 break; 1401 #if KMP_USE_FUTEX 1402 case locktag_futex: 1403 return kmp_mutex_impl_queuing; 1404 #endif 1405 case locktag_tas: 1406 return kmp_mutex_impl_spin; 1407 #if KMP_USE_TSX 1408 case locktag_hle: 1409 case locktag_rtm_spin: 1410 return kmp_mutex_impl_speculative; 1411 #endif 1412 default: 1413 return kmp_mutex_impl_none; 1414 } 1415 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1416 } 1417 KMP_ASSERT(ilock); 1418 switch (ilock->type) { 1419 #if KMP_USE_TSX 1420 case locktag_adaptive: 1421 case locktag_rtm_queuing: 1422 return kmp_mutex_impl_speculative; 1423 #endif 1424 case locktag_nested_tas: 1425 return kmp_mutex_impl_spin; 1426 #if KMP_USE_FUTEX 1427 case locktag_nested_futex: 1428 #endif 1429 case locktag_ticket: 1430 case locktag_queuing: 1431 case locktag_drdpa: 1432 case locktag_nested_ticket: 1433 case locktag_nested_queuing: 1434 case locktag_nested_drdpa: 1435 return kmp_mutex_impl_queuing; 1436 default: 1437 return kmp_mutex_impl_none; 1438 } 1439 } 1440 #else 1441 // For locks without dynamic binding 1442 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1443 switch (__kmp_user_lock_kind) { 1444 case lk_tas: 1445 return kmp_mutex_impl_spin; 1446 #if KMP_USE_FUTEX 1447 case lk_futex: 1448 #endif 1449 case lk_ticket: 1450 case lk_queuing: 1451 case lk_drdpa: 1452 return kmp_mutex_impl_queuing; 1453 #if KMP_USE_TSX 1454 case lk_hle: 1455 case lk_rtm_queuing: 1456 case lk_rtm_spin: 1457 case lk_adaptive: 1458 return kmp_mutex_impl_speculative; 1459 #endif 1460 default: 1461 return kmp_mutex_impl_none; 1462 } 1463 } 1464 #endif // KMP_USE_DYNAMIC_LOCK 1465 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1466 1467 /*! 1468 @ingroup WORK_SHARING 1469 @param loc source location information. 1470 @param global_tid global thread number. 1471 @param crit identity of the critical section. This could be a pointer to a lock 1472 associated with the critical section, or some other suitably unique value. 1473 @param hint the lock hint. 1474 1475 Enter code protected by a `critical` construct with a hint. The hint value is 1476 used to suggest a lock implementation. This function blocks until the executing 1477 thread can enter the critical section unless the hint suggests use of 1478 speculative execution and the hardware supports it. 1479 */ 1480 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1481 kmp_critical_name *crit, uint32_t hint) { 1482 KMP_COUNT_BLOCK(OMP_CRITICAL); 1483 kmp_user_lock_p lck; 1484 #if OMPT_SUPPORT && OMPT_OPTIONAL 1485 ompt_state_t prev_state = ompt_state_undefined; 1486 ompt_thread_info_t ti; 1487 // This is the case, if called from __kmpc_critical: 1488 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1489 if (!codeptr) 1490 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1491 #endif 1492 1493 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1494 __kmp_assert_valid_gtid(global_tid); 1495 1496 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1497 // Check if it is initialized. 1498 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1499 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1500 if (*lk == 0) { 1501 if (KMP_IS_D_LOCK(lockseq)) { 1502 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1503 KMP_GET_D_TAG(lockseq)); 1504 } else { 1505 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1506 } 1507 } 1508 // Branch for accessing the actual lock object and set operation. This 1509 // branching is inevitable since this lock initialization does not follow the 1510 // normal dispatch path (lock table is not used). 1511 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1512 lck = (kmp_user_lock_p)lk; 1513 if (__kmp_env_consistency_check) { 1514 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1515 __kmp_map_hint_to_lock(hint)); 1516 } 1517 #if USE_ITT_BUILD 1518 __kmp_itt_critical_acquiring(lck); 1519 #endif 1520 #if OMPT_SUPPORT && OMPT_OPTIONAL 1521 if (ompt_enabled.enabled) { 1522 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1523 /* OMPT state update */ 1524 prev_state = ti.state; 1525 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1526 ti.state = ompt_state_wait_critical; 1527 1528 /* OMPT event callback */ 1529 if (ompt_enabled.ompt_callback_mutex_acquire) { 1530 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1531 ompt_mutex_critical, (unsigned int)hint, 1532 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1533 codeptr); 1534 } 1535 } 1536 #endif 1537 #if KMP_USE_INLINED_TAS 1538 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1539 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1540 } else 1541 #elif KMP_USE_INLINED_FUTEX 1542 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1543 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1544 } else 1545 #endif 1546 { 1547 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1548 } 1549 } else { 1550 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1551 lck = ilk->lock; 1552 if (__kmp_env_consistency_check) { 1553 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1554 __kmp_map_hint_to_lock(hint)); 1555 } 1556 #if USE_ITT_BUILD 1557 __kmp_itt_critical_acquiring(lck); 1558 #endif 1559 #if OMPT_SUPPORT && OMPT_OPTIONAL 1560 if (ompt_enabled.enabled) { 1561 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1562 /* OMPT state update */ 1563 prev_state = ti.state; 1564 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1565 ti.state = ompt_state_wait_critical; 1566 1567 /* OMPT event callback */ 1568 if (ompt_enabled.ompt_callback_mutex_acquire) { 1569 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1570 ompt_mutex_critical, (unsigned int)hint, 1571 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1572 codeptr); 1573 } 1574 } 1575 #endif 1576 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1577 } 1578 KMP_POP_PARTITIONED_TIMER(); 1579 1580 #if USE_ITT_BUILD 1581 __kmp_itt_critical_acquired(lck); 1582 #endif /* USE_ITT_BUILD */ 1583 #if OMPT_SUPPORT && OMPT_OPTIONAL 1584 if (ompt_enabled.enabled) { 1585 /* OMPT state update */ 1586 ti.state = prev_state; 1587 ti.wait_id = 0; 1588 1589 /* OMPT event callback */ 1590 if (ompt_enabled.ompt_callback_mutex_acquired) { 1591 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1592 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1593 } 1594 } 1595 #endif 1596 1597 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1598 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1599 } // __kmpc_critical_with_hint 1600 1601 #endif // KMP_USE_DYNAMIC_LOCK 1602 1603 /*! 1604 @ingroup WORK_SHARING 1605 @param loc source location information. 1606 @param global_tid global thread number . 1607 @param crit identity of the critical section. This could be a pointer to a lock 1608 associated with the critical section, or some other suitably unique value. 1609 1610 Leave a critical section, releasing any lock that was held during its execution. 1611 */ 1612 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1613 kmp_critical_name *crit) { 1614 kmp_user_lock_p lck; 1615 1616 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1617 1618 #if KMP_USE_DYNAMIC_LOCK 1619 int locktag = KMP_EXTRACT_D_TAG(crit); 1620 if (locktag) { 1621 lck = (kmp_user_lock_p)crit; 1622 KMP_ASSERT(lck != NULL); 1623 if (__kmp_env_consistency_check) { 1624 __kmp_pop_sync(global_tid, ct_critical, loc); 1625 } 1626 #if USE_ITT_BUILD 1627 __kmp_itt_critical_releasing(lck); 1628 #endif 1629 #if KMP_USE_INLINED_TAS 1630 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1631 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1632 } else 1633 #elif KMP_USE_INLINED_FUTEX 1634 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1635 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1636 } else 1637 #endif 1638 { 1639 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1640 } 1641 } else { 1642 kmp_indirect_lock_t *ilk = 1643 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1644 KMP_ASSERT(ilk != NULL); 1645 lck = ilk->lock; 1646 if (__kmp_env_consistency_check) { 1647 __kmp_pop_sync(global_tid, ct_critical, loc); 1648 } 1649 #if USE_ITT_BUILD 1650 __kmp_itt_critical_releasing(lck); 1651 #endif 1652 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1653 } 1654 1655 #else // KMP_USE_DYNAMIC_LOCK 1656 1657 if ((__kmp_user_lock_kind == lk_tas) && 1658 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1659 lck = (kmp_user_lock_p)crit; 1660 } 1661 #if KMP_USE_FUTEX 1662 else if ((__kmp_user_lock_kind == lk_futex) && 1663 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1664 lck = (kmp_user_lock_p)crit; 1665 } 1666 #endif 1667 else { // ticket, queuing or drdpa 1668 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1669 } 1670 1671 KMP_ASSERT(lck != NULL); 1672 1673 if (__kmp_env_consistency_check) 1674 __kmp_pop_sync(global_tid, ct_critical, loc); 1675 1676 #if USE_ITT_BUILD 1677 __kmp_itt_critical_releasing(lck); 1678 #endif /* USE_ITT_BUILD */ 1679 // Value of 'crit' should be good for using as a critical_id of the critical 1680 // section directive. 1681 __kmp_release_user_lock_with_checks(lck, global_tid); 1682 1683 #endif // KMP_USE_DYNAMIC_LOCK 1684 1685 #if OMPT_SUPPORT && OMPT_OPTIONAL 1686 /* OMPT release event triggers after lock is released; place here to trigger 1687 * for all #if branches */ 1688 OMPT_STORE_RETURN_ADDRESS(global_tid); 1689 if (ompt_enabled.ompt_callback_mutex_released) { 1690 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1691 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1692 OMPT_LOAD_RETURN_ADDRESS(0)); 1693 } 1694 #endif 1695 1696 KMP_POP_PARTITIONED_TIMER(); 1697 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1698 } 1699 1700 /*! 1701 @ingroup SYNCHRONIZATION 1702 @param loc source location information 1703 @param global_tid thread id. 1704 @return one if the thread should execute the master block, zero otherwise 1705 1706 Start execution of a combined barrier and master. The barrier is executed inside 1707 this function. 1708 */ 1709 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1710 int status; 1711 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1712 __kmp_assert_valid_gtid(global_tid); 1713 1714 if (!TCR_4(__kmp_init_parallel)) 1715 __kmp_parallel_initialize(); 1716 1717 __kmp_resume_if_soft_paused(); 1718 1719 if (__kmp_env_consistency_check) 1720 __kmp_check_barrier(global_tid, ct_barrier, loc); 1721 1722 #if OMPT_SUPPORT 1723 ompt_frame_t *ompt_frame; 1724 if (ompt_enabled.enabled) { 1725 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1726 if (ompt_frame->enter_frame.ptr == NULL) 1727 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1728 } 1729 OMPT_STORE_RETURN_ADDRESS(global_tid); 1730 #endif 1731 #if USE_ITT_NOTIFY 1732 __kmp_threads[global_tid]->th.th_ident = loc; 1733 #endif 1734 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1735 #if OMPT_SUPPORT && OMPT_OPTIONAL 1736 if (ompt_enabled.enabled) { 1737 ompt_frame->enter_frame = ompt_data_none; 1738 } 1739 #endif 1740 1741 return (status != 0) ? 0 : 1; 1742 } 1743 1744 /*! 1745 @ingroup SYNCHRONIZATION 1746 @param loc source location information 1747 @param global_tid thread id. 1748 1749 Complete the execution of a combined barrier and master. This function should 1750 only be called at the completion of the <tt>master</tt> code. Other threads will 1751 still be waiting at the barrier and this call releases them. 1752 */ 1753 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1754 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1755 __kmp_assert_valid_gtid(global_tid); 1756 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1757 } 1758 1759 /*! 1760 @ingroup SYNCHRONIZATION 1761 @param loc source location information 1762 @param global_tid thread id. 1763 @return one if the thread should execute the master block, zero otherwise 1764 1765 Start execution of a combined barrier and master(nowait) construct. 1766 The barrier is executed inside this function. 1767 There is no equivalent "end" function, since the 1768 */ 1769 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1770 kmp_int32 ret; 1771 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1772 __kmp_assert_valid_gtid(global_tid); 1773 1774 if (!TCR_4(__kmp_init_parallel)) 1775 __kmp_parallel_initialize(); 1776 1777 __kmp_resume_if_soft_paused(); 1778 1779 if (__kmp_env_consistency_check) { 1780 if (loc == 0) { 1781 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1782 } 1783 __kmp_check_barrier(global_tid, ct_barrier, loc); 1784 } 1785 1786 #if OMPT_SUPPORT 1787 ompt_frame_t *ompt_frame; 1788 if (ompt_enabled.enabled) { 1789 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1790 if (ompt_frame->enter_frame.ptr == NULL) 1791 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1792 } 1793 OMPT_STORE_RETURN_ADDRESS(global_tid); 1794 #endif 1795 #if USE_ITT_NOTIFY 1796 __kmp_threads[global_tid]->th.th_ident = loc; 1797 #endif 1798 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1799 #if OMPT_SUPPORT && OMPT_OPTIONAL 1800 if (ompt_enabled.enabled) { 1801 ompt_frame->enter_frame = ompt_data_none; 1802 } 1803 #endif 1804 1805 ret = __kmpc_master(loc, global_tid); 1806 1807 if (__kmp_env_consistency_check) { 1808 /* there's no __kmpc_end_master called; so the (stats) */ 1809 /* actions of __kmpc_end_master are done here */ 1810 if (ret) { 1811 /* only one thread should do the pop since only */ 1812 /* one did the push (see __kmpc_master()) */ 1813 __kmp_pop_sync(global_tid, ct_master, loc); 1814 } 1815 } 1816 1817 return (ret); 1818 } 1819 1820 /* The BARRIER for a SINGLE process section is always explicit */ 1821 /*! 1822 @ingroup WORK_SHARING 1823 @param loc source location information 1824 @param global_tid global thread number 1825 @return One if this thread should execute the single construct, zero otherwise. 1826 1827 Test whether to execute a <tt>single</tt> construct. 1828 There are no implicit barriers in the two "single" calls, rather the compiler 1829 should introduce an explicit barrier if it is required. 1830 */ 1831 1832 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1833 __kmp_assert_valid_gtid(global_tid); 1834 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1835 1836 if (rc) { 1837 // We are going to execute the single statement, so we should count it. 1838 KMP_COUNT_BLOCK(OMP_SINGLE); 1839 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1840 } 1841 1842 #if OMPT_SUPPORT && OMPT_OPTIONAL 1843 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1844 kmp_team_t *team = this_thr->th.th_team; 1845 int tid = __kmp_tid_from_gtid(global_tid); 1846 1847 if (ompt_enabled.enabled) { 1848 if (rc) { 1849 if (ompt_enabled.ompt_callback_work) { 1850 ompt_callbacks.ompt_callback(ompt_callback_work)( 1851 ompt_work_single_executor, ompt_scope_begin, 1852 &(team->t.ompt_team_info.parallel_data), 1853 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1854 1, OMPT_GET_RETURN_ADDRESS(0)); 1855 } 1856 } else { 1857 if (ompt_enabled.ompt_callback_work) { 1858 ompt_callbacks.ompt_callback(ompt_callback_work)( 1859 ompt_work_single_other, ompt_scope_begin, 1860 &(team->t.ompt_team_info.parallel_data), 1861 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1862 1, OMPT_GET_RETURN_ADDRESS(0)); 1863 ompt_callbacks.ompt_callback(ompt_callback_work)( 1864 ompt_work_single_other, ompt_scope_end, 1865 &(team->t.ompt_team_info.parallel_data), 1866 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1867 1, OMPT_GET_RETURN_ADDRESS(0)); 1868 } 1869 } 1870 } 1871 #endif 1872 1873 return rc; 1874 } 1875 1876 /*! 1877 @ingroup WORK_SHARING 1878 @param loc source location information 1879 @param global_tid global thread number 1880 1881 Mark the end of a <tt>single</tt> construct. This function should 1882 only be called by the thread that executed the block of code protected 1883 by the `single` construct. 1884 */ 1885 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1886 __kmp_assert_valid_gtid(global_tid); 1887 __kmp_exit_single(global_tid); 1888 KMP_POP_PARTITIONED_TIMER(); 1889 1890 #if OMPT_SUPPORT && OMPT_OPTIONAL 1891 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1892 kmp_team_t *team = this_thr->th.th_team; 1893 int tid = __kmp_tid_from_gtid(global_tid); 1894 1895 if (ompt_enabled.ompt_callback_work) { 1896 ompt_callbacks.ompt_callback(ompt_callback_work)( 1897 ompt_work_single_executor, ompt_scope_end, 1898 &(team->t.ompt_team_info.parallel_data), 1899 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1900 OMPT_GET_RETURN_ADDRESS(0)); 1901 } 1902 #endif 1903 } 1904 1905 /*! 1906 @ingroup WORK_SHARING 1907 @param loc Source location 1908 @param global_tid Global thread id 1909 1910 Mark the end of a statically scheduled loop. 1911 */ 1912 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1913 KMP_POP_PARTITIONED_TIMER(); 1914 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1915 1916 #if OMPT_SUPPORT && OMPT_OPTIONAL 1917 if (ompt_enabled.ompt_callback_work) { 1918 ompt_work_t ompt_work_type = ompt_work_loop; 1919 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1920 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1921 // Determine workshare type 1922 if (loc != NULL) { 1923 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1924 ompt_work_type = ompt_work_loop; 1925 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1926 ompt_work_type = ompt_work_sections; 1927 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1928 ompt_work_type = ompt_work_distribute; 1929 } else { 1930 // use default set above. 1931 // a warning about this case is provided in __kmpc_for_static_init 1932 } 1933 KMP_DEBUG_ASSERT(ompt_work_type); 1934 } 1935 ompt_callbacks.ompt_callback(ompt_callback_work)( 1936 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1937 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1938 } 1939 #endif 1940 if (__kmp_env_consistency_check) 1941 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1942 } 1943 1944 // User routines which take C-style arguments (call by value) 1945 // different from the Fortran equivalent routines 1946 1947 void ompc_set_num_threads(int arg) { 1948 // !!!!! TODO: check the per-task binding 1949 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1950 } 1951 1952 void ompc_set_dynamic(int flag) { 1953 kmp_info_t *thread; 1954 1955 /* For the thread-private implementation of the internal controls */ 1956 thread = __kmp_entry_thread(); 1957 1958 __kmp_save_internal_controls(thread); 1959 1960 set__dynamic(thread, flag ? true : false); 1961 } 1962 1963 void ompc_set_nested(int flag) { 1964 kmp_info_t *thread; 1965 1966 /* For the thread-private internal controls implementation */ 1967 thread = __kmp_entry_thread(); 1968 1969 __kmp_save_internal_controls(thread); 1970 1971 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1972 } 1973 1974 void ompc_set_max_active_levels(int max_active_levels) { 1975 /* TO DO */ 1976 /* we want per-task implementation of this internal control */ 1977 1978 /* For the per-thread internal controls implementation */ 1979 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1980 } 1981 1982 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1983 // !!!!! TODO: check the per-task binding 1984 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1985 } 1986 1987 int ompc_get_ancestor_thread_num(int level) { 1988 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1989 } 1990 1991 int ompc_get_team_size(int level) { 1992 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1993 } 1994 1995 /* OpenMP 5.0 Affinity Format API */ 1996 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 1997 if (!__kmp_init_serial) { 1998 __kmp_serial_initialize(); 1999 } 2000 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2001 format, KMP_STRLEN(format) + 1); 2002 } 2003 2004 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2005 size_t format_size; 2006 if (!__kmp_init_serial) { 2007 __kmp_serial_initialize(); 2008 } 2009 format_size = KMP_STRLEN(__kmp_affinity_format); 2010 if (buffer && size) { 2011 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2012 format_size + 1); 2013 } 2014 return format_size; 2015 } 2016 2017 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2018 int gtid; 2019 if (!TCR_4(__kmp_init_middle)) { 2020 __kmp_middle_initialize(); 2021 } 2022 __kmp_assign_root_init_mask(); 2023 gtid = __kmp_get_gtid(); 2024 __kmp_aux_display_affinity(gtid, format); 2025 } 2026 2027 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2028 char const *format) { 2029 int gtid; 2030 size_t num_required; 2031 kmp_str_buf_t capture_buf; 2032 if (!TCR_4(__kmp_init_middle)) { 2033 __kmp_middle_initialize(); 2034 } 2035 __kmp_assign_root_init_mask(); 2036 gtid = __kmp_get_gtid(); 2037 __kmp_str_buf_init(&capture_buf); 2038 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2039 if (buffer && buf_size) { 2040 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2041 capture_buf.used + 1); 2042 } 2043 __kmp_str_buf_free(&capture_buf); 2044 return num_required; 2045 } 2046 2047 void kmpc_set_stacksize(int arg) { 2048 // __kmp_aux_set_stacksize initializes the library if needed 2049 __kmp_aux_set_stacksize(arg); 2050 } 2051 2052 void kmpc_set_stacksize_s(size_t arg) { 2053 // __kmp_aux_set_stacksize initializes the library if needed 2054 __kmp_aux_set_stacksize(arg); 2055 } 2056 2057 void kmpc_set_blocktime(int arg) { 2058 int gtid, tid; 2059 kmp_info_t *thread; 2060 2061 gtid = __kmp_entry_gtid(); 2062 tid = __kmp_tid_from_gtid(gtid); 2063 thread = __kmp_thread_from_gtid(gtid); 2064 2065 __kmp_aux_set_blocktime(arg, thread, tid); 2066 } 2067 2068 void kmpc_set_library(int arg) { 2069 // __kmp_user_set_library initializes the library if needed 2070 __kmp_user_set_library((enum library_type)arg); 2071 } 2072 2073 void kmpc_set_defaults(char const *str) { 2074 // __kmp_aux_set_defaults initializes the library if needed 2075 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2076 } 2077 2078 void kmpc_set_disp_num_buffers(int arg) { 2079 // ignore after initialization because some teams have already 2080 // allocated dispatch buffers 2081 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2082 arg <= KMP_MAX_DISP_NUM_BUFF) { 2083 __kmp_dispatch_num_buffers = arg; 2084 } 2085 } 2086 2087 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2088 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2089 return -1; 2090 #else 2091 if (!TCR_4(__kmp_init_middle)) { 2092 __kmp_middle_initialize(); 2093 } 2094 __kmp_assign_root_init_mask(); 2095 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2096 #endif 2097 } 2098 2099 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2100 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2101 return -1; 2102 #else 2103 if (!TCR_4(__kmp_init_middle)) { 2104 __kmp_middle_initialize(); 2105 } 2106 __kmp_assign_root_init_mask(); 2107 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2108 #endif 2109 } 2110 2111 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2112 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2113 return -1; 2114 #else 2115 if (!TCR_4(__kmp_init_middle)) { 2116 __kmp_middle_initialize(); 2117 } 2118 __kmp_assign_root_init_mask(); 2119 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2120 #endif 2121 } 2122 2123 /* -------------------------------------------------------------------------- */ 2124 /*! 2125 @ingroup THREADPRIVATE 2126 @param loc source location information 2127 @param gtid global thread number 2128 @param cpy_size size of the cpy_data buffer 2129 @param cpy_data pointer to data to be copied 2130 @param cpy_func helper function to call for copying data 2131 @param didit flag variable: 1=single thread; 0=not single thread 2132 2133 __kmpc_copyprivate implements the interface for the private data broadcast 2134 needed for the copyprivate clause associated with a single region in an 2135 OpenMP<sup>*</sup> program (both C and Fortran). 2136 All threads participating in the parallel region call this routine. 2137 One of the threads (called the single thread) should have the <tt>didit</tt> 2138 variable set to 1 and all other threads should have that variable set to 0. 2139 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2140 2141 The OpenMP specification forbids the use of nowait on the single region when a 2142 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2143 barrier internally to avoid race conditions, so the code generation for the 2144 single region should avoid generating a barrier after the call to @ref 2145 __kmpc_copyprivate. 2146 2147 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2148 The <tt>loc</tt> parameter is a pointer to source location information. 2149 2150 Internal implementation: The single thread will first copy its descriptor 2151 address (cpy_data) to a team-private location, then the other threads will each 2152 call the function pointed to by the parameter cpy_func, which carries out the 2153 copy by copying the data using the cpy_data buffer. 2154 2155 The cpy_func routine used for the copy and the contents of the data area defined 2156 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2157 to be done. For instance, the cpy_data buffer can hold the actual data to be 2158 copied or it may hold a list of pointers to the data. The cpy_func routine must 2159 interpret the cpy_data buffer appropriately. 2160 2161 The interface to cpy_func is as follows: 2162 @code 2163 void cpy_func( void *destination, void *source ) 2164 @endcode 2165 where void *destination is the cpy_data pointer for the thread being copied to 2166 and void *source is the cpy_data pointer for the thread being copied from. 2167 */ 2168 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2169 void *cpy_data, void (*cpy_func)(void *, void *), 2170 kmp_int32 didit) { 2171 void **data_ptr; 2172 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2173 __kmp_assert_valid_gtid(gtid); 2174 2175 KMP_MB(); 2176 2177 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2178 2179 if (__kmp_env_consistency_check) { 2180 if (loc == 0) { 2181 KMP_WARNING(ConstructIdentInvalid); 2182 } 2183 } 2184 2185 // ToDo: Optimize the following two barriers into some kind of split barrier 2186 2187 if (didit) 2188 *data_ptr = cpy_data; 2189 2190 #if OMPT_SUPPORT 2191 ompt_frame_t *ompt_frame; 2192 if (ompt_enabled.enabled) { 2193 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2194 if (ompt_frame->enter_frame.ptr == NULL) 2195 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2196 } 2197 OMPT_STORE_RETURN_ADDRESS(gtid); 2198 #endif 2199 /* This barrier is not a barrier region boundary */ 2200 #if USE_ITT_NOTIFY 2201 __kmp_threads[gtid]->th.th_ident = loc; 2202 #endif 2203 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2204 2205 if (!didit) 2206 (*cpy_func)(cpy_data, *data_ptr); 2207 2208 // Consider next barrier a user-visible barrier for barrier region boundaries 2209 // Nesting checks are already handled by the single construct checks 2210 { 2211 #if OMPT_SUPPORT 2212 OMPT_STORE_RETURN_ADDRESS(gtid); 2213 #endif 2214 #if USE_ITT_NOTIFY 2215 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2216 // tasks can overwrite the location) 2217 #endif 2218 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2219 #if OMPT_SUPPORT && OMPT_OPTIONAL 2220 if (ompt_enabled.enabled) { 2221 ompt_frame->enter_frame = ompt_data_none; 2222 } 2223 #endif 2224 } 2225 } 2226 2227 /* --------------------------------------------------------------------------*/ 2228 /*! 2229 @ingroup THREADPRIVATE 2230 @param loc source location information 2231 @param gtid global thread number 2232 @param cpy_data pointer to the data to be saved/copied or 0 2233 @return the saved pointer to the data 2234 2235 __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: 2236 __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so 2237 coming from single), and returns that pointer in all calls (for single thread 2238 it's not needed). This version doesn't do any actual data copying. Data copying 2239 has to be done somewhere else, e.g. inline in the generated code. Due to this, 2240 this function doesn't have any barrier at the end of the function, like 2241 __kmpc_copyprivate does, so generated code needs barrier after copying of all 2242 data was done. 2243 */ 2244 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { 2245 void **data_ptr; 2246 2247 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid)); 2248 2249 KMP_MB(); 2250 2251 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2252 2253 if (__kmp_env_consistency_check) { 2254 if (loc == 0) { 2255 KMP_WARNING(ConstructIdentInvalid); 2256 } 2257 } 2258 2259 // ToDo: Optimize the following barrier 2260 2261 if (cpy_data) 2262 *data_ptr = cpy_data; 2263 2264 #if OMPT_SUPPORT 2265 ompt_frame_t *ompt_frame; 2266 if (ompt_enabled.enabled) { 2267 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2268 if (ompt_frame->enter_frame.ptr == NULL) 2269 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2270 OMPT_STORE_RETURN_ADDRESS(gtid); 2271 } 2272 #endif 2273 /* This barrier is not a barrier region boundary */ 2274 #if USE_ITT_NOTIFY 2275 __kmp_threads[gtid]->th.th_ident = loc; 2276 #endif 2277 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2278 2279 return *data_ptr; 2280 } 2281 2282 /* -------------------------------------------------------------------------- */ 2283 2284 #define INIT_LOCK __kmp_init_user_lock_with_checks 2285 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2286 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2287 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2288 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2289 #define ACQUIRE_NESTED_LOCK_TIMED \ 2290 __kmp_acquire_nested_user_lock_with_checks_timed 2291 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2292 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2293 #define TEST_LOCK __kmp_test_user_lock_with_checks 2294 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2295 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2296 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2297 2298 // TODO: Make check abort messages use location info & pass it into 2299 // with_checks routines 2300 2301 #if KMP_USE_DYNAMIC_LOCK 2302 2303 // internal lock initializer 2304 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2305 kmp_dyna_lockseq_t seq) { 2306 if (KMP_IS_D_LOCK(seq)) { 2307 KMP_INIT_D_LOCK(lock, seq); 2308 #if USE_ITT_BUILD 2309 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2310 #endif 2311 } else { 2312 KMP_INIT_I_LOCK(lock, seq); 2313 #if USE_ITT_BUILD 2314 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2315 __kmp_itt_lock_creating(ilk->lock, loc); 2316 #endif 2317 } 2318 } 2319 2320 // internal nest lock initializer 2321 static __forceinline void 2322 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2323 kmp_dyna_lockseq_t seq) { 2324 #if KMP_USE_TSX 2325 // Don't have nested lock implementation for speculative locks 2326 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2327 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2328 seq = __kmp_user_lock_seq; 2329 #endif 2330 switch (seq) { 2331 case lockseq_tas: 2332 seq = lockseq_nested_tas; 2333 break; 2334 #if KMP_USE_FUTEX 2335 case lockseq_futex: 2336 seq = lockseq_nested_futex; 2337 break; 2338 #endif 2339 case lockseq_ticket: 2340 seq = lockseq_nested_ticket; 2341 break; 2342 case lockseq_queuing: 2343 seq = lockseq_nested_queuing; 2344 break; 2345 case lockseq_drdpa: 2346 seq = lockseq_nested_drdpa; 2347 break; 2348 default: 2349 seq = lockseq_nested_queuing; 2350 } 2351 KMP_INIT_I_LOCK(lock, seq); 2352 #if USE_ITT_BUILD 2353 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2354 __kmp_itt_lock_creating(ilk->lock, loc); 2355 #endif 2356 } 2357 2358 /* initialize the lock with a hint */ 2359 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2360 uintptr_t hint) { 2361 KMP_DEBUG_ASSERT(__kmp_init_serial); 2362 if (__kmp_env_consistency_check && user_lock == NULL) { 2363 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2364 } 2365 2366 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2367 2368 #if OMPT_SUPPORT && OMPT_OPTIONAL 2369 // This is the case, if called from omp_init_lock_with_hint: 2370 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2371 if (!codeptr) 2372 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2373 if (ompt_enabled.ompt_callback_lock_init) { 2374 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2375 ompt_mutex_lock, (omp_lock_hint_t)hint, 2376 __ompt_get_mutex_impl_type(user_lock), 2377 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2378 } 2379 #endif 2380 } 2381 2382 /* initialize the lock with a hint */ 2383 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2384 void **user_lock, uintptr_t hint) { 2385 KMP_DEBUG_ASSERT(__kmp_init_serial); 2386 if (__kmp_env_consistency_check && user_lock == NULL) { 2387 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2388 } 2389 2390 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2391 2392 #if OMPT_SUPPORT && OMPT_OPTIONAL 2393 // This is the case, if called from omp_init_lock_with_hint: 2394 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2395 if (!codeptr) 2396 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2397 if (ompt_enabled.ompt_callback_lock_init) { 2398 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2399 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2400 __ompt_get_mutex_impl_type(user_lock), 2401 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2402 } 2403 #endif 2404 } 2405 2406 #endif // KMP_USE_DYNAMIC_LOCK 2407 2408 /* initialize the lock */ 2409 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2410 #if KMP_USE_DYNAMIC_LOCK 2411 2412 KMP_DEBUG_ASSERT(__kmp_init_serial); 2413 if (__kmp_env_consistency_check && user_lock == NULL) { 2414 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2415 } 2416 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2417 2418 #if OMPT_SUPPORT && OMPT_OPTIONAL 2419 // This is the case, if called from omp_init_lock_with_hint: 2420 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2421 if (!codeptr) 2422 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2423 if (ompt_enabled.ompt_callback_lock_init) { 2424 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2425 ompt_mutex_lock, omp_lock_hint_none, 2426 __ompt_get_mutex_impl_type(user_lock), 2427 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2428 } 2429 #endif 2430 2431 #else // KMP_USE_DYNAMIC_LOCK 2432 2433 static char const *const func = "omp_init_lock"; 2434 kmp_user_lock_p lck; 2435 KMP_DEBUG_ASSERT(__kmp_init_serial); 2436 2437 if (__kmp_env_consistency_check) { 2438 if (user_lock == NULL) { 2439 KMP_FATAL(LockIsUninitialized, func); 2440 } 2441 } 2442 2443 KMP_CHECK_USER_LOCK_INIT(); 2444 2445 if ((__kmp_user_lock_kind == lk_tas) && 2446 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2447 lck = (kmp_user_lock_p)user_lock; 2448 } 2449 #if KMP_USE_FUTEX 2450 else if ((__kmp_user_lock_kind == lk_futex) && 2451 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2452 lck = (kmp_user_lock_p)user_lock; 2453 } 2454 #endif 2455 else { 2456 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2457 } 2458 INIT_LOCK(lck); 2459 __kmp_set_user_lock_location(lck, loc); 2460 2461 #if OMPT_SUPPORT && OMPT_OPTIONAL 2462 // This is the case, if called from omp_init_lock_with_hint: 2463 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2464 if (!codeptr) 2465 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2466 if (ompt_enabled.ompt_callback_lock_init) { 2467 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2468 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2469 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2470 } 2471 #endif 2472 2473 #if USE_ITT_BUILD 2474 __kmp_itt_lock_creating(lck); 2475 #endif /* USE_ITT_BUILD */ 2476 2477 #endif // KMP_USE_DYNAMIC_LOCK 2478 } // __kmpc_init_lock 2479 2480 /* initialize the lock */ 2481 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2482 #if KMP_USE_DYNAMIC_LOCK 2483 2484 KMP_DEBUG_ASSERT(__kmp_init_serial); 2485 if (__kmp_env_consistency_check && user_lock == NULL) { 2486 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2487 } 2488 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2489 2490 #if OMPT_SUPPORT && OMPT_OPTIONAL 2491 // This is the case, if called from omp_init_lock_with_hint: 2492 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2493 if (!codeptr) 2494 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2495 if (ompt_enabled.ompt_callback_lock_init) { 2496 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2497 ompt_mutex_nest_lock, omp_lock_hint_none, 2498 __ompt_get_mutex_impl_type(user_lock), 2499 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2500 } 2501 #endif 2502 2503 #else // KMP_USE_DYNAMIC_LOCK 2504 2505 static char const *const func = "omp_init_nest_lock"; 2506 kmp_user_lock_p lck; 2507 KMP_DEBUG_ASSERT(__kmp_init_serial); 2508 2509 if (__kmp_env_consistency_check) { 2510 if (user_lock == NULL) { 2511 KMP_FATAL(LockIsUninitialized, func); 2512 } 2513 } 2514 2515 KMP_CHECK_USER_LOCK_INIT(); 2516 2517 if ((__kmp_user_lock_kind == lk_tas) && 2518 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2519 OMP_NEST_LOCK_T_SIZE)) { 2520 lck = (kmp_user_lock_p)user_lock; 2521 } 2522 #if KMP_USE_FUTEX 2523 else if ((__kmp_user_lock_kind == lk_futex) && 2524 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2525 OMP_NEST_LOCK_T_SIZE)) { 2526 lck = (kmp_user_lock_p)user_lock; 2527 } 2528 #endif 2529 else { 2530 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2531 } 2532 2533 INIT_NESTED_LOCK(lck); 2534 __kmp_set_user_lock_location(lck, loc); 2535 2536 #if OMPT_SUPPORT && OMPT_OPTIONAL 2537 // This is the case, if called from omp_init_lock_with_hint: 2538 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2539 if (!codeptr) 2540 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2541 if (ompt_enabled.ompt_callback_lock_init) { 2542 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2543 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2544 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2545 } 2546 #endif 2547 2548 #if USE_ITT_BUILD 2549 __kmp_itt_lock_creating(lck); 2550 #endif /* USE_ITT_BUILD */ 2551 2552 #endif // KMP_USE_DYNAMIC_LOCK 2553 } // __kmpc_init_nest_lock 2554 2555 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2556 #if KMP_USE_DYNAMIC_LOCK 2557 2558 #if USE_ITT_BUILD 2559 kmp_user_lock_p lck; 2560 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2561 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2562 } else { 2563 lck = (kmp_user_lock_p)user_lock; 2564 } 2565 __kmp_itt_lock_destroyed(lck); 2566 #endif 2567 #if OMPT_SUPPORT && OMPT_OPTIONAL 2568 // This is the case, if called from omp_init_lock_with_hint: 2569 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2570 if (!codeptr) 2571 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2572 if (ompt_enabled.ompt_callback_lock_destroy) { 2573 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2574 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2575 } 2576 #endif 2577 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2578 #else 2579 kmp_user_lock_p lck; 2580 2581 if ((__kmp_user_lock_kind == lk_tas) && 2582 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2583 lck = (kmp_user_lock_p)user_lock; 2584 } 2585 #if KMP_USE_FUTEX 2586 else if ((__kmp_user_lock_kind == lk_futex) && 2587 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2588 lck = (kmp_user_lock_p)user_lock; 2589 } 2590 #endif 2591 else { 2592 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2593 } 2594 2595 #if OMPT_SUPPORT && OMPT_OPTIONAL 2596 // This is the case, if called from omp_init_lock_with_hint: 2597 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2598 if (!codeptr) 2599 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2600 if (ompt_enabled.ompt_callback_lock_destroy) { 2601 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2602 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2603 } 2604 #endif 2605 2606 #if USE_ITT_BUILD 2607 __kmp_itt_lock_destroyed(lck); 2608 #endif /* USE_ITT_BUILD */ 2609 DESTROY_LOCK(lck); 2610 2611 if ((__kmp_user_lock_kind == lk_tas) && 2612 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2613 ; 2614 } 2615 #if KMP_USE_FUTEX 2616 else if ((__kmp_user_lock_kind == lk_futex) && 2617 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2618 ; 2619 } 2620 #endif 2621 else { 2622 __kmp_user_lock_free(user_lock, gtid, lck); 2623 } 2624 #endif // KMP_USE_DYNAMIC_LOCK 2625 } // __kmpc_destroy_lock 2626 2627 /* destroy the lock */ 2628 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2629 #if KMP_USE_DYNAMIC_LOCK 2630 2631 #if USE_ITT_BUILD 2632 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2633 __kmp_itt_lock_destroyed(ilk->lock); 2634 #endif 2635 #if OMPT_SUPPORT && OMPT_OPTIONAL 2636 // This is the case, if called from omp_init_lock_with_hint: 2637 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2638 if (!codeptr) 2639 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2640 if (ompt_enabled.ompt_callback_lock_destroy) { 2641 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2642 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2643 } 2644 #endif 2645 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2646 2647 #else // KMP_USE_DYNAMIC_LOCK 2648 2649 kmp_user_lock_p lck; 2650 2651 if ((__kmp_user_lock_kind == lk_tas) && 2652 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2653 OMP_NEST_LOCK_T_SIZE)) { 2654 lck = (kmp_user_lock_p)user_lock; 2655 } 2656 #if KMP_USE_FUTEX 2657 else if ((__kmp_user_lock_kind == lk_futex) && 2658 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2659 OMP_NEST_LOCK_T_SIZE)) { 2660 lck = (kmp_user_lock_p)user_lock; 2661 } 2662 #endif 2663 else { 2664 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2665 } 2666 2667 #if OMPT_SUPPORT && OMPT_OPTIONAL 2668 // This is the case, if called from omp_init_lock_with_hint: 2669 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2670 if (!codeptr) 2671 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2672 if (ompt_enabled.ompt_callback_lock_destroy) { 2673 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2674 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2675 } 2676 #endif 2677 2678 #if USE_ITT_BUILD 2679 __kmp_itt_lock_destroyed(lck); 2680 #endif /* USE_ITT_BUILD */ 2681 2682 DESTROY_NESTED_LOCK(lck); 2683 2684 if ((__kmp_user_lock_kind == lk_tas) && 2685 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2686 OMP_NEST_LOCK_T_SIZE)) { 2687 ; 2688 } 2689 #if KMP_USE_FUTEX 2690 else if ((__kmp_user_lock_kind == lk_futex) && 2691 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2692 OMP_NEST_LOCK_T_SIZE)) { 2693 ; 2694 } 2695 #endif 2696 else { 2697 __kmp_user_lock_free(user_lock, gtid, lck); 2698 } 2699 #endif // KMP_USE_DYNAMIC_LOCK 2700 } // __kmpc_destroy_nest_lock 2701 2702 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2703 KMP_COUNT_BLOCK(OMP_set_lock); 2704 #if KMP_USE_DYNAMIC_LOCK 2705 int tag = KMP_EXTRACT_D_TAG(user_lock); 2706 #if USE_ITT_BUILD 2707 __kmp_itt_lock_acquiring( 2708 (kmp_user_lock_p) 2709 user_lock); // itt function will get to the right lock object. 2710 #endif 2711 #if OMPT_SUPPORT && OMPT_OPTIONAL 2712 // This is the case, if called from omp_init_lock_with_hint: 2713 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2714 if (!codeptr) 2715 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2716 if (ompt_enabled.ompt_callback_mutex_acquire) { 2717 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2718 ompt_mutex_lock, omp_lock_hint_none, 2719 __ompt_get_mutex_impl_type(user_lock), 2720 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2721 } 2722 #endif 2723 #if KMP_USE_INLINED_TAS 2724 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2725 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2726 } else 2727 #elif KMP_USE_INLINED_FUTEX 2728 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2729 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2730 } else 2731 #endif 2732 { 2733 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2734 } 2735 #if USE_ITT_BUILD 2736 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2737 #endif 2738 #if OMPT_SUPPORT && OMPT_OPTIONAL 2739 if (ompt_enabled.ompt_callback_mutex_acquired) { 2740 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2741 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2742 } 2743 #endif 2744 2745 #else // KMP_USE_DYNAMIC_LOCK 2746 2747 kmp_user_lock_p lck; 2748 2749 if ((__kmp_user_lock_kind == lk_tas) && 2750 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2751 lck = (kmp_user_lock_p)user_lock; 2752 } 2753 #if KMP_USE_FUTEX 2754 else if ((__kmp_user_lock_kind == lk_futex) && 2755 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2756 lck = (kmp_user_lock_p)user_lock; 2757 } 2758 #endif 2759 else { 2760 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2761 } 2762 2763 #if USE_ITT_BUILD 2764 __kmp_itt_lock_acquiring(lck); 2765 #endif /* USE_ITT_BUILD */ 2766 #if OMPT_SUPPORT && OMPT_OPTIONAL 2767 // This is the case, if called from omp_init_lock_with_hint: 2768 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2769 if (!codeptr) 2770 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2771 if (ompt_enabled.ompt_callback_mutex_acquire) { 2772 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2773 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2774 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2775 } 2776 #endif 2777 2778 ACQUIRE_LOCK(lck, gtid); 2779 2780 #if USE_ITT_BUILD 2781 __kmp_itt_lock_acquired(lck); 2782 #endif /* USE_ITT_BUILD */ 2783 2784 #if OMPT_SUPPORT && OMPT_OPTIONAL 2785 if (ompt_enabled.ompt_callback_mutex_acquired) { 2786 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2787 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2788 } 2789 #endif 2790 2791 #endif // KMP_USE_DYNAMIC_LOCK 2792 } 2793 2794 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2795 #if KMP_USE_DYNAMIC_LOCK 2796 2797 #if USE_ITT_BUILD 2798 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2799 #endif 2800 #if OMPT_SUPPORT && OMPT_OPTIONAL 2801 // This is the case, if called from omp_init_lock_with_hint: 2802 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2803 if (!codeptr) 2804 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2805 if (ompt_enabled.enabled) { 2806 if (ompt_enabled.ompt_callback_mutex_acquire) { 2807 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2808 ompt_mutex_nest_lock, omp_lock_hint_none, 2809 __ompt_get_mutex_impl_type(user_lock), 2810 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2811 } 2812 } 2813 #endif 2814 int acquire_status = 2815 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2816 (void)acquire_status; 2817 #if USE_ITT_BUILD 2818 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2819 #endif 2820 2821 #if OMPT_SUPPORT && OMPT_OPTIONAL 2822 if (ompt_enabled.enabled) { 2823 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2824 if (ompt_enabled.ompt_callback_mutex_acquired) { 2825 // lock_first 2826 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2827 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2828 codeptr); 2829 } 2830 } else { 2831 if (ompt_enabled.ompt_callback_nest_lock) { 2832 // lock_next 2833 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2834 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2835 } 2836 } 2837 } 2838 #endif 2839 2840 #else // KMP_USE_DYNAMIC_LOCK 2841 int acquire_status; 2842 kmp_user_lock_p lck; 2843 2844 if ((__kmp_user_lock_kind == lk_tas) && 2845 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2846 OMP_NEST_LOCK_T_SIZE)) { 2847 lck = (kmp_user_lock_p)user_lock; 2848 } 2849 #if KMP_USE_FUTEX 2850 else if ((__kmp_user_lock_kind == lk_futex) && 2851 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2852 OMP_NEST_LOCK_T_SIZE)) { 2853 lck = (kmp_user_lock_p)user_lock; 2854 } 2855 #endif 2856 else { 2857 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2858 } 2859 2860 #if USE_ITT_BUILD 2861 __kmp_itt_lock_acquiring(lck); 2862 #endif /* USE_ITT_BUILD */ 2863 #if OMPT_SUPPORT && OMPT_OPTIONAL 2864 // This is the case, if called from omp_init_lock_with_hint: 2865 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2866 if (!codeptr) 2867 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2868 if (ompt_enabled.enabled) { 2869 if (ompt_enabled.ompt_callback_mutex_acquire) { 2870 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2871 ompt_mutex_nest_lock, omp_lock_hint_none, 2872 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2873 codeptr); 2874 } 2875 } 2876 #endif 2877 2878 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2879 2880 #if USE_ITT_BUILD 2881 __kmp_itt_lock_acquired(lck); 2882 #endif /* USE_ITT_BUILD */ 2883 2884 #if OMPT_SUPPORT && OMPT_OPTIONAL 2885 if (ompt_enabled.enabled) { 2886 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2887 if (ompt_enabled.ompt_callback_mutex_acquired) { 2888 // lock_first 2889 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2890 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2891 } 2892 } else { 2893 if (ompt_enabled.ompt_callback_nest_lock) { 2894 // lock_next 2895 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2896 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2897 } 2898 } 2899 } 2900 #endif 2901 2902 #endif // KMP_USE_DYNAMIC_LOCK 2903 } 2904 2905 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2906 #if KMP_USE_DYNAMIC_LOCK 2907 2908 int tag = KMP_EXTRACT_D_TAG(user_lock); 2909 #if USE_ITT_BUILD 2910 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2911 #endif 2912 #if KMP_USE_INLINED_TAS 2913 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2914 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2915 } else 2916 #elif KMP_USE_INLINED_FUTEX 2917 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2918 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2919 } else 2920 #endif 2921 { 2922 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2923 } 2924 2925 #if OMPT_SUPPORT && OMPT_OPTIONAL 2926 // This is the case, if called from omp_init_lock_with_hint: 2927 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2928 if (!codeptr) 2929 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2930 if (ompt_enabled.ompt_callback_mutex_released) { 2931 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2932 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2933 } 2934 #endif 2935 2936 #else // KMP_USE_DYNAMIC_LOCK 2937 2938 kmp_user_lock_p lck; 2939 2940 /* Can't use serial interval since not block structured */ 2941 /* release the lock */ 2942 2943 if ((__kmp_user_lock_kind == lk_tas) && 2944 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2945 #if KMP_OS_LINUX && \ 2946 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2947 // "fast" path implemented to fix customer performance issue 2948 #if USE_ITT_BUILD 2949 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2950 #endif /* USE_ITT_BUILD */ 2951 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2952 KMP_MB(); 2953 2954 #if OMPT_SUPPORT && OMPT_OPTIONAL 2955 // This is the case, if called from omp_init_lock_with_hint: 2956 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2957 if (!codeptr) 2958 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2959 if (ompt_enabled.ompt_callback_mutex_released) { 2960 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2961 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2962 } 2963 #endif 2964 2965 return; 2966 #else 2967 lck = (kmp_user_lock_p)user_lock; 2968 #endif 2969 } 2970 #if KMP_USE_FUTEX 2971 else if ((__kmp_user_lock_kind == lk_futex) && 2972 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2973 lck = (kmp_user_lock_p)user_lock; 2974 } 2975 #endif 2976 else { 2977 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2978 } 2979 2980 #if USE_ITT_BUILD 2981 __kmp_itt_lock_releasing(lck); 2982 #endif /* USE_ITT_BUILD */ 2983 2984 RELEASE_LOCK(lck, gtid); 2985 2986 #if OMPT_SUPPORT && OMPT_OPTIONAL 2987 // This is the case, if called from omp_init_lock_with_hint: 2988 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2989 if (!codeptr) 2990 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2991 if (ompt_enabled.ompt_callback_mutex_released) { 2992 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2993 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2994 } 2995 #endif 2996 2997 #endif // KMP_USE_DYNAMIC_LOCK 2998 } 2999 3000 /* release the lock */ 3001 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3002 #if KMP_USE_DYNAMIC_LOCK 3003 3004 #if USE_ITT_BUILD 3005 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3006 #endif 3007 int release_status = 3008 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 3009 (void)release_status; 3010 3011 #if OMPT_SUPPORT && OMPT_OPTIONAL 3012 // This is the case, if called from omp_init_lock_with_hint: 3013 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3014 if (!codeptr) 3015 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3016 if (ompt_enabled.enabled) { 3017 if (release_status == KMP_LOCK_RELEASED) { 3018 if (ompt_enabled.ompt_callback_mutex_released) { 3019 // release_lock_last 3020 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3021 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3022 codeptr); 3023 } 3024 } else if (ompt_enabled.ompt_callback_nest_lock) { 3025 // release_lock_prev 3026 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3027 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3028 } 3029 } 3030 #endif 3031 3032 #else // KMP_USE_DYNAMIC_LOCK 3033 3034 kmp_user_lock_p lck; 3035 3036 /* Can't use serial interval since not block structured */ 3037 3038 if ((__kmp_user_lock_kind == lk_tas) && 3039 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3040 OMP_NEST_LOCK_T_SIZE)) { 3041 #if KMP_OS_LINUX && \ 3042 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 3043 // "fast" path implemented to fix customer performance issue 3044 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 3045 #if USE_ITT_BUILD 3046 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3047 #endif /* USE_ITT_BUILD */ 3048 3049 #if OMPT_SUPPORT && OMPT_OPTIONAL 3050 int release_status = KMP_LOCK_STILL_HELD; 3051 #endif 3052 3053 if (--(tl->lk.depth_locked) == 0) { 3054 TCW_4(tl->lk.poll, 0); 3055 #if OMPT_SUPPORT && OMPT_OPTIONAL 3056 release_status = KMP_LOCK_RELEASED; 3057 #endif 3058 } 3059 KMP_MB(); 3060 3061 #if OMPT_SUPPORT && OMPT_OPTIONAL 3062 // This is the case, if called from omp_init_lock_with_hint: 3063 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3064 if (!codeptr) 3065 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3066 if (ompt_enabled.enabled) { 3067 if (release_status == KMP_LOCK_RELEASED) { 3068 if (ompt_enabled.ompt_callback_mutex_released) { 3069 // release_lock_last 3070 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3071 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3072 } 3073 } else if (ompt_enabled.ompt_callback_nest_lock) { 3074 // release_lock_previous 3075 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3076 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3077 } 3078 } 3079 #endif 3080 3081 return; 3082 #else 3083 lck = (kmp_user_lock_p)user_lock; 3084 #endif 3085 } 3086 #if KMP_USE_FUTEX 3087 else if ((__kmp_user_lock_kind == lk_futex) && 3088 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3089 OMP_NEST_LOCK_T_SIZE)) { 3090 lck = (kmp_user_lock_p)user_lock; 3091 } 3092 #endif 3093 else { 3094 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3095 } 3096 3097 #if USE_ITT_BUILD 3098 __kmp_itt_lock_releasing(lck); 3099 #endif /* USE_ITT_BUILD */ 3100 3101 int release_status; 3102 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3103 #if OMPT_SUPPORT && OMPT_OPTIONAL 3104 // This is the case, if called from omp_init_lock_with_hint: 3105 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3106 if (!codeptr) 3107 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3108 if (ompt_enabled.enabled) { 3109 if (release_status == KMP_LOCK_RELEASED) { 3110 if (ompt_enabled.ompt_callback_mutex_released) { 3111 // release_lock_last 3112 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3113 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3114 } 3115 } else if (ompt_enabled.ompt_callback_nest_lock) { 3116 // release_lock_previous 3117 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3118 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3119 } 3120 } 3121 #endif 3122 3123 #endif // KMP_USE_DYNAMIC_LOCK 3124 } 3125 3126 /* try to acquire the lock */ 3127 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3128 KMP_COUNT_BLOCK(OMP_test_lock); 3129 3130 #if KMP_USE_DYNAMIC_LOCK 3131 int rc; 3132 int tag = KMP_EXTRACT_D_TAG(user_lock); 3133 #if USE_ITT_BUILD 3134 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3135 #endif 3136 #if OMPT_SUPPORT && OMPT_OPTIONAL 3137 // This is the case, if called from omp_init_lock_with_hint: 3138 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3139 if (!codeptr) 3140 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3141 if (ompt_enabled.ompt_callback_mutex_acquire) { 3142 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3143 ompt_mutex_lock, omp_lock_hint_none, 3144 __ompt_get_mutex_impl_type(user_lock), 3145 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3146 } 3147 #endif 3148 #if KMP_USE_INLINED_TAS 3149 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3150 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3151 } else 3152 #elif KMP_USE_INLINED_FUTEX 3153 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3154 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3155 } else 3156 #endif 3157 { 3158 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3159 } 3160 if (rc) { 3161 #if USE_ITT_BUILD 3162 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3163 #endif 3164 #if OMPT_SUPPORT && OMPT_OPTIONAL 3165 if (ompt_enabled.ompt_callback_mutex_acquired) { 3166 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3167 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3168 } 3169 #endif 3170 return FTN_TRUE; 3171 } else { 3172 #if USE_ITT_BUILD 3173 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3174 #endif 3175 return FTN_FALSE; 3176 } 3177 3178 #else // KMP_USE_DYNAMIC_LOCK 3179 3180 kmp_user_lock_p lck; 3181 int rc; 3182 3183 if ((__kmp_user_lock_kind == lk_tas) && 3184 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3185 lck = (kmp_user_lock_p)user_lock; 3186 } 3187 #if KMP_USE_FUTEX 3188 else if ((__kmp_user_lock_kind == lk_futex) && 3189 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3190 lck = (kmp_user_lock_p)user_lock; 3191 } 3192 #endif 3193 else { 3194 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3195 } 3196 3197 #if USE_ITT_BUILD 3198 __kmp_itt_lock_acquiring(lck); 3199 #endif /* USE_ITT_BUILD */ 3200 #if OMPT_SUPPORT && OMPT_OPTIONAL 3201 // This is the case, if called from omp_init_lock_with_hint: 3202 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3203 if (!codeptr) 3204 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3205 if (ompt_enabled.ompt_callback_mutex_acquire) { 3206 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3207 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3208 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3209 } 3210 #endif 3211 3212 rc = TEST_LOCK(lck, gtid); 3213 #if USE_ITT_BUILD 3214 if (rc) { 3215 __kmp_itt_lock_acquired(lck); 3216 } else { 3217 __kmp_itt_lock_cancelled(lck); 3218 } 3219 #endif /* USE_ITT_BUILD */ 3220 #if OMPT_SUPPORT && OMPT_OPTIONAL 3221 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3222 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3223 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3224 } 3225 #endif 3226 3227 return (rc ? FTN_TRUE : FTN_FALSE); 3228 3229 /* Can't use serial interval since not block structured */ 3230 3231 #endif // KMP_USE_DYNAMIC_LOCK 3232 } 3233 3234 /* try to acquire the lock */ 3235 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3236 #if KMP_USE_DYNAMIC_LOCK 3237 int rc; 3238 #if USE_ITT_BUILD 3239 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3240 #endif 3241 #if OMPT_SUPPORT && OMPT_OPTIONAL 3242 // This is the case, if called from omp_init_lock_with_hint: 3243 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3244 if (!codeptr) 3245 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3246 if (ompt_enabled.ompt_callback_mutex_acquire) { 3247 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3248 ompt_mutex_nest_lock, omp_lock_hint_none, 3249 __ompt_get_mutex_impl_type(user_lock), 3250 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3251 } 3252 #endif 3253 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3254 #if USE_ITT_BUILD 3255 if (rc) { 3256 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3257 } else { 3258 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3259 } 3260 #endif 3261 #if OMPT_SUPPORT && OMPT_OPTIONAL 3262 if (ompt_enabled.enabled && rc) { 3263 if (rc == 1) { 3264 if (ompt_enabled.ompt_callback_mutex_acquired) { 3265 // lock_first 3266 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3267 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3268 codeptr); 3269 } 3270 } else { 3271 if (ompt_enabled.ompt_callback_nest_lock) { 3272 // lock_next 3273 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3274 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3275 } 3276 } 3277 } 3278 #endif 3279 return rc; 3280 3281 #else // KMP_USE_DYNAMIC_LOCK 3282 3283 kmp_user_lock_p lck; 3284 int rc; 3285 3286 if ((__kmp_user_lock_kind == lk_tas) && 3287 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3288 OMP_NEST_LOCK_T_SIZE)) { 3289 lck = (kmp_user_lock_p)user_lock; 3290 } 3291 #if KMP_USE_FUTEX 3292 else if ((__kmp_user_lock_kind == lk_futex) && 3293 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3294 OMP_NEST_LOCK_T_SIZE)) { 3295 lck = (kmp_user_lock_p)user_lock; 3296 } 3297 #endif 3298 else { 3299 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3300 } 3301 3302 #if USE_ITT_BUILD 3303 __kmp_itt_lock_acquiring(lck); 3304 #endif /* USE_ITT_BUILD */ 3305 3306 #if OMPT_SUPPORT && OMPT_OPTIONAL 3307 // This is the case, if called from omp_init_lock_with_hint: 3308 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3309 if (!codeptr) 3310 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3311 if (ompt_enabled.enabled) && 3312 ompt_enabled.ompt_callback_mutex_acquire) { 3313 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3314 ompt_mutex_nest_lock, omp_lock_hint_none, 3315 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3316 codeptr); 3317 } 3318 #endif 3319 3320 rc = TEST_NESTED_LOCK(lck, gtid); 3321 #if USE_ITT_BUILD 3322 if (rc) { 3323 __kmp_itt_lock_acquired(lck); 3324 } else { 3325 __kmp_itt_lock_cancelled(lck); 3326 } 3327 #endif /* USE_ITT_BUILD */ 3328 #if OMPT_SUPPORT && OMPT_OPTIONAL 3329 if (ompt_enabled.enabled && rc) { 3330 if (rc == 1) { 3331 if (ompt_enabled.ompt_callback_mutex_acquired) { 3332 // lock_first 3333 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3334 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3335 } 3336 } else { 3337 if (ompt_enabled.ompt_callback_nest_lock) { 3338 // lock_next 3339 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3340 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3341 } 3342 } 3343 } 3344 #endif 3345 return rc; 3346 3347 /* Can't use serial interval since not block structured */ 3348 3349 #endif // KMP_USE_DYNAMIC_LOCK 3350 } 3351 3352 // Interface to fast scalable reduce methods routines 3353 3354 // keep the selected method in a thread local structure for cross-function 3355 // usage: will be used in __kmpc_end_reduce* functions; 3356 // another solution: to re-determine the method one more time in 3357 // __kmpc_end_reduce* functions (new prototype required then) 3358 // AT: which solution is better? 3359 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3360 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3361 3362 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3363 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3364 3365 // description of the packed_reduction_method variable: look at the macros in 3366 // kmp.h 3367 3368 // used in a critical section reduce block 3369 static __forceinline void 3370 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3371 kmp_critical_name *crit) { 3372 3373 // this lock was visible to a customer and to the threading profile tool as a 3374 // serial overhead span (although it's used for an internal purpose only) 3375 // why was it visible in previous implementation? 3376 // should we keep it visible in new reduce block? 3377 kmp_user_lock_p lck; 3378 3379 #if KMP_USE_DYNAMIC_LOCK 3380 3381 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3382 // Check if it is initialized. 3383 if (*lk == 0) { 3384 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3385 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3386 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3387 } else { 3388 __kmp_init_indirect_csptr(crit, loc, global_tid, 3389 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3390 } 3391 } 3392 // Branch for accessing the actual lock object and set operation. This 3393 // branching is inevitable since this lock initialization does not follow the 3394 // normal dispatch path (lock table is not used). 3395 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3396 lck = (kmp_user_lock_p)lk; 3397 KMP_DEBUG_ASSERT(lck != NULL); 3398 if (__kmp_env_consistency_check) { 3399 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3400 } 3401 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3402 } else { 3403 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3404 lck = ilk->lock; 3405 KMP_DEBUG_ASSERT(lck != NULL); 3406 if (__kmp_env_consistency_check) { 3407 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3408 } 3409 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3410 } 3411 3412 #else // KMP_USE_DYNAMIC_LOCK 3413 3414 // We know that the fast reduction code is only emitted by Intel compilers 3415 // with 32 byte critical sections. If there isn't enough space, then we 3416 // have to use a pointer. 3417 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3418 lck = (kmp_user_lock_p)crit; 3419 } else { 3420 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3421 } 3422 KMP_DEBUG_ASSERT(lck != NULL); 3423 3424 if (__kmp_env_consistency_check) 3425 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3426 3427 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3428 3429 #endif // KMP_USE_DYNAMIC_LOCK 3430 } 3431 3432 // used in a critical section reduce block 3433 static __forceinline void 3434 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3435 kmp_critical_name *crit) { 3436 3437 kmp_user_lock_p lck; 3438 3439 #if KMP_USE_DYNAMIC_LOCK 3440 3441 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3442 lck = (kmp_user_lock_p)crit; 3443 if (__kmp_env_consistency_check) 3444 __kmp_pop_sync(global_tid, ct_critical, loc); 3445 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3446 } else { 3447 kmp_indirect_lock_t *ilk = 3448 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3449 if (__kmp_env_consistency_check) 3450 __kmp_pop_sync(global_tid, ct_critical, loc); 3451 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3452 } 3453 3454 #else // KMP_USE_DYNAMIC_LOCK 3455 3456 // We know that the fast reduction code is only emitted by Intel compilers 3457 // with 32 byte critical sections. If there isn't enough space, then we have 3458 // to use a pointer. 3459 if (__kmp_base_user_lock_size > 32) { 3460 lck = *((kmp_user_lock_p *)crit); 3461 KMP_ASSERT(lck != NULL); 3462 } else { 3463 lck = (kmp_user_lock_p)crit; 3464 } 3465 3466 if (__kmp_env_consistency_check) 3467 __kmp_pop_sync(global_tid, ct_critical, loc); 3468 3469 __kmp_release_user_lock_with_checks(lck, global_tid); 3470 3471 #endif // KMP_USE_DYNAMIC_LOCK 3472 } // __kmp_end_critical_section_reduce_block 3473 3474 static __forceinline int 3475 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3476 int *task_state) { 3477 kmp_team_t *team; 3478 3479 // Check if we are inside the teams construct? 3480 if (th->th.th_teams_microtask) { 3481 *team_p = team = th->th.th_team; 3482 if (team->t.t_level == th->th.th_teams_level) { 3483 // This is reduction at teams construct. 3484 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3485 // Let's swap teams temporarily for the reduction. 3486 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3487 th->th.th_team = team->t.t_parent; 3488 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3489 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3490 *task_state = th->th.th_task_state; 3491 th->th.th_task_state = 0; 3492 3493 return 1; 3494 } 3495 } 3496 return 0; 3497 } 3498 3499 static __forceinline void 3500 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3501 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3502 th->th.th_info.ds.ds_tid = 0; 3503 th->th.th_team = team; 3504 th->th.th_team_nproc = team->t.t_nproc; 3505 th->th.th_task_team = team->t.t_task_team[task_state]; 3506 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3507 } 3508 3509 /* 2.a.i. Reduce Block without a terminating barrier */ 3510 /*! 3511 @ingroup SYNCHRONIZATION 3512 @param loc source location information 3513 @param global_tid global thread number 3514 @param num_vars number of items (variables) to be reduced 3515 @param reduce_size size of data in bytes to be reduced 3516 @param reduce_data pointer to data to be reduced 3517 @param reduce_func callback function providing reduction operation on two 3518 operands and returning result of reduction in lhs_data 3519 @param lck pointer to the unique lock data structure 3520 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3521 threads if atomic reduction needed 3522 3523 The nowait version is used for a reduce clause with the nowait argument. 3524 */ 3525 kmp_int32 3526 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3527 size_t reduce_size, void *reduce_data, 3528 void (*reduce_func)(void *lhs_data, void *rhs_data), 3529 kmp_critical_name *lck) { 3530 3531 KMP_COUNT_BLOCK(REDUCE_nowait); 3532 int retval = 0; 3533 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3534 kmp_info_t *th; 3535 kmp_team_t *team; 3536 int teams_swapped = 0, task_state; 3537 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3538 __kmp_assert_valid_gtid(global_tid); 3539 3540 // why do we need this initialization here at all? 3541 // Reduction clause can not be used as a stand-alone directive. 3542 3543 // do not call __kmp_serial_initialize(), it will be called by 3544 // __kmp_parallel_initialize() if needed 3545 // possible detection of false-positive race by the threadchecker ??? 3546 if (!TCR_4(__kmp_init_parallel)) 3547 __kmp_parallel_initialize(); 3548 3549 __kmp_resume_if_soft_paused(); 3550 3551 // check correctness of reduce block nesting 3552 #if KMP_USE_DYNAMIC_LOCK 3553 if (__kmp_env_consistency_check) 3554 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3555 #else 3556 if (__kmp_env_consistency_check) 3557 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3558 #endif 3559 3560 th = __kmp_thread_from_gtid(global_tid); 3561 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3562 3563 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3564 // the value should be kept in a variable 3565 // the variable should be either a construct-specific or thread-specific 3566 // property, not a team specific property 3567 // (a thread can reach the next reduce block on the next construct, reduce 3568 // method may differ on the next construct) 3569 // an ident_t "loc" parameter could be used as a construct-specific property 3570 // (what if loc == 0?) 3571 // (if both construct-specific and team-specific variables were shared, 3572 // then unness extra syncs should be needed) 3573 // a thread-specific variable is better regarding two issues above (next 3574 // construct and extra syncs) 3575 // a thread-specific "th_local.reduction_method" variable is used currently 3576 // each thread executes 'determine' and 'set' lines (no need to execute by one 3577 // thread, to avoid unness extra syncs) 3578 3579 packed_reduction_method = __kmp_determine_reduction_method( 3580 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3581 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3582 3583 OMPT_REDUCTION_DECL(th, global_tid); 3584 if (packed_reduction_method == critical_reduce_block) { 3585 3586 OMPT_REDUCTION_BEGIN; 3587 3588 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3589 retval = 1; 3590 3591 } else if (packed_reduction_method == empty_reduce_block) { 3592 3593 OMPT_REDUCTION_BEGIN; 3594 3595 // usage: if team size == 1, no synchronization is required ( Intel 3596 // platforms only ) 3597 retval = 1; 3598 3599 } else if (packed_reduction_method == atomic_reduce_block) { 3600 3601 retval = 2; 3602 3603 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3604 // won't be called by the code gen) 3605 // (it's not quite good, because the checking block has been closed by 3606 // this 'pop', 3607 // but atomic operation has not been executed yet, will be executed 3608 // slightly later, literally on next instruction) 3609 if (__kmp_env_consistency_check) 3610 __kmp_pop_sync(global_tid, ct_reduce, loc); 3611 3612 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3613 tree_reduce_block)) { 3614 3615 // AT: performance issue: a real barrier here 3616 // AT: (if primary thread is slow, other threads are blocked here waiting for 3617 // the primary thread to come and release them) 3618 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3619 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3620 // be confusing to a customer) 3621 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3622 // might go faster and be more in line with sense of NOWAIT 3623 // AT: TO DO: do epcc test and compare times 3624 3625 // this barrier should be invisible to a customer and to the threading profile 3626 // tool (it's neither a terminating barrier nor customer's code, it's 3627 // used for an internal purpose) 3628 #if OMPT_SUPPORT 3629 // JP: can this barrier potentially leed to task scheduling? 3630 // JP: as long as there is a barrier in the implementation, OMPT should and 3631 // will provide the barrier events 3632 // so we set-up the necessary frame/return addresses. 3633 ompt_frame_t *ompt_frame; 3634 if (ompt_enabled.enabled) { 3635 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3636 if (ompt_frame->enter_frame.ptr == NULL) 3637 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3638 } 3639 OMPT_STORE_RETURN_ADDRESS(global_tid); 3640 #endif 3641 #if USE_ITT_NOTIFY 3642 __kmp_threads[global_tid]->th.th_ident = loc; 3643 #endif 3644 retval = 3645 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3646 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3647 retval = (retval != 0) ? (0) : (1); 3648 #if OMPT_SUPPORT && OMPT_OPTIONAL 3649 if (ompt_enabled.enabled) { 3650 ompt_frame->enter_frame = ompt_data_none; 3651 } 3652 #endif 3653 3654 // all other workers except primary thread should do this pop here 3655 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3656 if (__kmp_env_consistency_check) { 3657 if (retval == 0) { 3658 __kmp_pop_sync(global_tid, ct_reduce, loc); 3659 } 3660 } 3661 3662 } else { 3663 3664 // should never reach this block 3665 KMP_ASSERT(0); // "unexpected method" 3666 } 3667 if (teams_swapped) { 3668 __kmp_restore_swapped_teams(th, team, task_state); 3669 } 3670 KA_TRACE( 3671 10, 3672 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3673 global_tid, packed_reduction_method, retval)); 3674 3675 return retval; 3676 } 3677 3678 /*! 3679 @ingroup SYNCHRONIZATION 3680 @param loc source location information 3681 @param global_tid global thread id. 3682 @param lck pointer to the unique lock data structure 3683 3684 Finish the execution of a reduce nowait. 3685 */ 3686 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3687 kmp_critical_name *lck) { 3688 3689 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3690 3691 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3692 __kmp_assert_valid_gtid(global_tid); 3693 3694 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3695 3696 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3697 3698 if (packed_reduction_method == critical_reduce_block) { 3699 3700 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3701 OMPT_REDUCTION_END; 3702 3703 } else if (packed_reduction_method == empty_reduce_block) { 3704 3705 // usage: if team size == 1, no synchronization is required ( on Intel 3706 // platforms only ) 3707 3708 OMPT_REDUCTION_END; 3709 3710 } else if (packed_reduction_method == atomic_reduce_block) { 3711 3712 // neither primary thread nor other workers should get here 3713 // (code gen does not generate this call in case 2: atomic reduce block) 3714 // actually it's better to remove this elseif at all; 3715 // after removal this value will checked by the 'else' and will assert 3716 3717 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3718 tree_reduce_block)) { 3719 3720 // only primary thread gets here 3721 // OMPT: tree reduction is annotated in the barrier code 3722 3723 } else { 3724 3725 // should never reach this block 3726 KMP_ASSERT(0); // "unexpected method" 3727 } 3728 3729 if (__kmp_env_consistency_check) 3730 __kmp_pop_sync(global_tid, ct_reduce, loc); 3731 3732 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3733 global_tid, packed_reduction_method)); 3734 3735 return; 3736 } 3737 3738 /* 2.a.ii. Reduce Block with a terminating barrier */ 3739 3740 /*! 3741 @ingroup SYNCHRONIZATION 3742 @param loc source location information 3743 @param global_tid global thread number 3744 @param num_vars number of items (variables) to be reduced 3745 @param reduce_size size of data in bytes to be reduced 3746 @param reduce_data pointer to data to be reduced 3747 @param reduce_func callback function providing reduction operation on two 3748 operands and returning result of reduction in lhs_data 3749 @param lck pointer to the unique lock data structure 3750 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3751 threads if atomic reduction needed 3752 3753 A blocking reduce that includes an implicit barrier. 3754 */ 3755 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3756 size_t reduce_size, void *reduce_data, 3757 void (*reduce_func)(void *lhs_data, void *rhs_data), 3758 kmp_critical_name *lck) { 3759 KMP_COUNT_BLOCK(REDUCE_wait); 3760 int retval = 0; 3761 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3762 kmp_info_t *th; 3763 kmp_team_t *team; 3764 int teams_swapped = 0, task_state; 3765 3766 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3767 __kmp_assert_valid_gtid(global_tid); 3768 3769 // why do we need this initialization here at all? 3770 // Reduction clause can not be a stand-alone directive. 3771 3772 // do not call __kmp_serial_initialize(), it will be called by 3773 // __kmp_parallel_initialize() if needed 3774 // possible detection of false-positive race by the threadchecker ??? 3775 if (!TCR_4(__kmp_init_parallel)) 3776 __kmp_parallel_initialize(); 3777 3778 __kmp_resume_if_soft_paused(); 3779 3780 // check correctness of reduce block nesting 3781 #if KMP_USE_DYNAMIC_LOCK 3782 if (__kmp_env_consistency_check) 3783 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3784 #else 3785 if (__kmp_env_consistency_check) 3786 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3787 #endif 3788 3789 th = __kmp_thread_from_gtid(global_tid); 3790 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3791 3792 packed_reduction_method = __kmp_determine_reduction_method( 3793 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3794 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3795 3796 OMPT_REDUCTION_DECL(th, global_tid); 3797 3798 if (packed_reduction_method == critical_reduce_block) { 3799 3800 OMPT_REDUCTION_BEGIN; 3801 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3802 retval = 1; 3803 3804 } else if (packed_reduction_method == empty_reduce_block) { 3805 3806 OMPT_REDUCTION_BEGIN; 3807 // usage: if team size == 1, no synchronization is required ( Intel 3808 // platforms only ) 3809 retval = 1; 3810 3811 } else if (packed_reduction_method == atomic_reduce_block) { 3812 3813 retval = 2; 3814 3815 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3816 tree_reduce_block)) { 3817 3818 // case tree_reduce_block: 3819 // this barrier should be visible to a customer and to the threading profile 3820 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3821 #if OMPT_SUPPORT 3822 ompt_frame_t *ompt_frame; 3823 if (ompt_enabled.enabled) { 3824 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3825 if (ompt_frame->enter_frame.ptr == NULL) 3826 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3827 } 3828 OMPT_STORE_RETURN_ADDRESS(global_tid); 3829 #endif 3830 #if USE_ITT_NOTIFY 3831 __kmp_threads[global_tid]->th.th_ident = 3832 loc; // needed for correct notification of frames 3833 #endif 3834 retval = 3835 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3836 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3837 retval = (retval != 0) ? (0) : (1); 3838 #if OMPT_SUPPORT && OMPT_OPTIONAL 3839 if (ompt_enabled.enabled) { 3840 ompt_frame->enter_frame = ompt_data_none; 3841 } 3842 #endif 3843 3844 // all other workers except primary thread should do this pop here 3845 // (none of other workers except primary will enter __kmpc_end_reduce()) 3846 if (__kmp_env_consistency_check) { 3847 if (retval == 0) { // 0: all other workers; 1: primary thread 3848 __kmp_pop_sync(global_tid, ct_reduce, loc); 3849 } 3850 } 3851 3852 } else { 3853 3854 // should never reach this block 3855 KMP_ASSERT(0); // "unexpected method" 3856 } 3857 if (teams_swapped) { 3858 __kmp_restore_swapped_teams(th, team, task_state); 3859 } 3860 3861 KA_TRACE(10, 3862 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3863 global_tid, packed_reduction_method, retval)); 3864 return retval; 3865 } 3866 3867 /*! 3868 @ingroup SYNCHRONIZATION 3869 @param loc source location information 3870 @param global_tid global thread id. 3871 @param lck pointer to the unique lock data structure 3872 3873 Finish the execution of a blocking reduce. 3874 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3875 start function. 3876 */ 3877 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3878 kmp_critical_name *lck) { 3879 3880 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3881 kmp_info_t *th; 3882 kmp_team_t *team; 3883 int teams_swapped = 0, task_state; 3884 3885 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3886 __kmp_assert_valid_gtid(global_tid); 3887 3888 th = __kmp_thread_from_gtid(global_tid); 3889 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3890 3891 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3892 3893 // this barrier should be visible to a customer and to the threading profile 3894 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3895 OMPT_REDUCTION_DECL(th, global_tid); 3896 3897 if (packed_reduction_method == critical_reduce_block) { 3898 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3899 3900 OMPT_REDUCTION_END; 3901 3902 // TODO: implicit barrier: should be exposed 3903 #if OMPT_SUPPORT 3904 ompt_frame_t *ompt_frame; 3905 if (ompt_enabled.enabled) { 3906 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3907 if (ompt_frame->enter_frame.ptr == NULL) 3908 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3909 } 3910 OMPT_STORE_RETURN_ADDRESS(global_tid); 3911 #endif 3912 #if USE_ITT_NOTIFY 3913 __kmp_threads[global_tid]->th.th_ident = loc; 3914 #endif 3915 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3916 #if OMPT_SUPPORT && OMPT_OPTIONAL 3917 if (ompt_enabled.enabled) { 3918 ompt_frame->enter_frame = ompt_data_none; 3919 } 3920 #endif 3921 3922 } else if (packed_reduction_method == empty_reduce_block) { 3923 3924 OMPT_REDUCTION_END; 3925 3926 // usage: if team size==1, no synchronization is required (Intel platforms only) 3927 3928 // TODO: implicit barrier: should be exposed 3929 #if OMPT_SUPPORT 3930 ompt_frame_t *ompt_frame; 3931 if (ompt_enabled.enabled) { 3932 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3933 if (ompt_frame->enter_frame.ptr == NULL) 3934 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3935 } 3936 OMPT_STORE_RETURN_ADDRESS(global_tid); 3937 #endif 3938 #if USE_ITT_NOTIFY 3939 __kmp_threads[global_tid]->th.th_ident = loc; 3940 #endif 3941 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3942 #if OMPT_SUPPORT && OMPT_OPTIONAL 3943 if (ompt_enabled.enabled) { 3944 ompt_frame->enter_frame = ompt_data_none; 3945 } 3946 #endif 3947 3948 } else if (packed_reduction_method == atomic_reduce_block) { 3949 3950 #if OMPT_SUPPORT 3951 ompt_frame_t *ompt_frame; 3952 if (ompt_enabled.enabled) { 3953 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3954 if (ompt_frame->enter_frame.ptr == NULL) 3955 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3956 } 3957 OMPT_STORE_RETURN_ADDRESS(global_tid); 3958 #endif 3959 // TODO: implicit barrier: should be exposed 3960 #if USE_ITT_NOTIFY 3961 __kmp_threads[global_tid]->th.th_ident = loc; 3962 #endif 3963 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3964 #if OMPT_SUPPORT && OMPT_OPTIONAL 3965 if (ompt_enabled.enabled) { 3966 ompt_frame->enter_frame = ompt_data_none; 3967 } 3968 #endif 3969 3970 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3971 tree_reduce_block)) { 3972 3973 // only primary thread executes here (primary releases all other workers) 3974 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3975 global_tid); 3976 3977 } else { 3978 3979 // should never reach this block 3980 KMP_ASSERT(0); // "unexpected method" 3981 } 3982 if (teams_swapped) { 3983 __kmp_restore_swapped_teams(th, team, task_state); 3984 } 3985 3986 if (__kmp_env_consistency_check) 3987 __kmp_pop_sync(global_tid, ct_reduce, loc); 3988 3989 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3990 global_tid, packed_reduction_method)); 3991 3992 return; 3993 } 3994 3995 #undef __KMP_GET_REDUCTION_METHOD 3996 #undef __KMP_SET_REDUCTION_METHOD 3997 3998 /* end of interface to fast scalable reduce routines */ 3999 4000 kmp_uint64 __kmpc_get_taskid() { 4001 4002 kmp_int32 gtid; 4003 kmp_info_t *thread; 4004 4005 gtid = __kmp_get_gtid(); 4006 if (gtid < 0) { 4007 return 0; 4008 } 4009 thread = __kmp_thread_from_gtid(gtid); 4010 return thread->th.th_current_task->td_task_id; 4011 4012 } // __kmpc_get_taskid 4013 4014 kmp_uint64 __kmpc_get_parent_taskid() { 4015 4016 kmp_int32 gtid; 4017 kmp_info_t *thread; 4018 kmp_taskdata_t *parent_task; 4019 4020 gtid = __kmp_get_gtid(); 4021 if (gtid < 0) { 4022 return 0; 4023 } 4024 thread = __kmp_thread_from_gtid(gtid); 4025 parent_task = thread->th.th_current_task->td_parent; 4026 return (parent_task == NULL ? 0 : parent_task->td_task_id); 4027 4028 } // __kmpc_get_parent_taskid 4029 4030 /*! 4031 @ingroup WORK_SHARING 4032 @param loc source location information. 4033 @param gtid global thread number. 4034 @param num_dims number of associated doacross loops. 4035 @param dims info on loops bounds. 4036 4037 Initialize doacross loop information. 4038 Expect compiler send us inclusive bounds, 4039 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 4040 */ 4041 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 4042 const struct kmp_dim *dims) { 4043 __kmp_assert_valid_gtid(gtid); 4044 int j, idx; 4045 kmp_int64 last, trace_count; 4046 kmp_info_t *th = __kmp_threads[gtid]; 4047 kmp_team_t *team = th->th.th_team; 4048 kmp_uint32 *flags; 4049 kmp_disp_t *pr_buf = th->th.th_dispatch; 4050 dispatch_shared_info_t *sh_buf; 4051 4052 KA_TRACE( 4053 20, 4054 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4055 gtid, num_dims, !team->t.t_serialized)); 4056 KMP_DEBUG_ASSERT(dims != NULL); 4057 KMP_DEBUG_ASSERT(num_dims > 0); 4058 4059 if (team->t.t_serialized) { 4060 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4061 return; // no dependencies if team is serialized 4062 } 4063 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4064 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4065 // the next loop 4066 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4067 4068 // Save bounds info into allocated private buffer 4069 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4070 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4071 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4072 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4073 pr_buf->th_doacross_info[0] = 4074 (kmp_int64)num_dims; // first element is number of dimensions 4075 // Save also address of num_done in order to access it later without knowing 4076 // the buffer index 4077 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4078 pr_buf->th_doacross_info[2] = dims[0].lo; 4079 pr_buf->th_doacross_info[3] = dims[0].up; 4080 pr_buf->th_doacross_info[4] = dims[0].st; 4081 last = 5; 4082 for (j = 1; j < num_dims; ++j) { 4083 kmp_int64 4084 range_length; // To keep ranges of all dimensions but the first dims[0] 4085 if (dims[j].st == 1) { // most common case 4086 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4087 range_length = dims[j].up - dims[j].lo + 1; 4088 } else { 4089 if (dims[j].st > 0) { 4090 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4091 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4092 } else { // negative increment 4093 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4094 range_length = 4095 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4096 } 4097 } 4098 pr_buf->th_doacross_info[last++] = range_length; 4099 pr_buf->th_doacross_info[last++] = dims[j].lo; 4100 pr_buf->th_doacross_info[last++] = dims[j].up; 4101 pr_buf->th_doacross_info[last++] = dims[j].st; 4102 } 4103 4104 // Compute total trip count. 4105 // Start with range of dims[0] which we don't need to keep in the buffer. 4106 if (dims[0].st == 1) { // most common case 4107 trace_count = dims[0].up - dims[0].lo + 1; 4108 } else if (dims[0].st > 0) { 4109 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4110 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4111 } else { // negative increment 4112 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4113 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4114 } 4115 for (j = 1; j < num_dims; ++j) { 4116 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4117 } 4118 KMP_DEBUG_ASSERT(trace_count > 0); 4119 4120 // Check if shared buffer is not occupied by other loop (idx - 4121 // __kmp_dispatch_num_buffers) 4122 if (idx != sh_buf->doacross_buf_idx) { 4123 // Shared buffer is occupied, wait for it to be free 4124 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4125 __kmp_eq_4, NULL); 4126 } 4127 #if KMP_32_BIT_ARCH 4128 // Check if we are the first thread. After the CAS the first thread gets 0, 4129 // others get 1 if initialization is in progress, allocated pointer otherwise. 4130 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4131 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4132 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4133 #else 4134 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4135 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4136 #endif 4137 if (flags == NULL) { 4138 // we are the first thread, allocate the array of flags 4139 size_t size = 4140 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4141 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4142 KMP_MB(); 4143 sh_buf->doacross_flags = flags; 4144 } else if (flags == (kmp_uint32 *)1) { 4145 #if KMP_32_BIT_ARCH 4146 // initialization is still in progress, need to wait 4147 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4148 #else 4149 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4150 #endif 4151 KMP_YIELD(TRUE); 4152 KMP_MB(); 4153 } else { 4154 KMP_MB(); 4155 } 4156 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4157 pr_buf->th_doacross_flags = 4158 sh_buf->doacross_flags; // save private copy in order to not 4159 // touch shared buffer on each iteration 4160 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4161 } 4162 4163 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4164 __kmp_assert_valid_gtid(gtid); 4165 kmp_int64 shft; 4166 size_t num_dims, i; 4167 kmp_uint32 flag; 4168 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4169 kmp_info_t *th = __kmp_threads[gtid]; 4170 kmp_team_t *team = th->th.th_team; 4171 kmp_disp_t *pr_buf; 4172 kmp_int64 lo, up, st; 4173 4174 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4175 if (team->t.t_serialized) { 4176 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4177 return; // no dependencies if team is serialized 4178 } 4179 4180 // calculate sequential iteration number and check out-of-bounds condition 4181 pr_buf = th->th.th_dispatch; 4182 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4183 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4184 lo = pr_buf->th_doacross_info[2]; 4185 up = pr_buf->th_doacross_info[3]; 4186 st = pr_buf->th_doacross_info[4]; 4187 #if OMPT_SUPPORT && OMPT_OPTIONAL 4188 ompt_dependence_t deps[num_dims]; 4189 #endif 4190 if (st == 1) { // most common case 4191 if (vec[0] < lo || vec[0] > up) { 4192 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4193 "bounds [%lld,%lld]\n", 4194 gtid, vec[0], lo, up)); 4195 return; 4196 } 4197 iter_number = vec[0] - lo; 4198 } else if (st > 0) { 4199 if (vec[0] < lo || vec[0] > up) { 4200 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4201 "bounds [%lld,%lld]\n", 4202 gtid, vec[0], lo, up)); 4203 return; 4204 } 4205 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4206 } else { // negative increment 4207 if (vec[0] > lo || vec[0] < up) { 4208 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4209 "bounds [%lld,%lld]\n", 4210 gtid, vec[0], lo, up)); 4211 return; 4212 } 4213 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4214 } 4215 #if OMPT_SUPPORT && OMPT_OPTIONAL 4216 deps[0].variable.value = iter_number; 4217 deps[0].dependence_type = ompt_dependence_type_sink; 4218 #endif 4219 for (i = 1; i < num_dims; ++i) { 4220 kmp_int64 iter, ln; 4221 size_t j = i * 4; 4222 ln = pr_buf->th_doacross_info[j + 1]; 4223 lo = pr_buf->th_doacross_info[j + 2]; 4224 up = pr_buf->th_doacross_info[j + 3]; 4225 st = pr_buf->th_doacross_info[j + 4]; 4226 if (st == 1) { 4227 if (vec[i] < lo || vec[i] > up) { 4228 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4229 "bounds [%lld,%lld]\n", 4230 gtid, vec[i], lo, up)); 4231 return; 4232 } 4233 iter = vec[i] - lo; 4234 } else if (st > 0) { 4235 if (vec[i] < lo || vec[i] > up) { 4236 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4237 "bounds [%lld,%lld]\n", 4238 gtid, vec[i], lo, up)); 4239 return; 4240 } 4241 iter = (kmp_uint64)(vec[i] - lo) / st; 4242 } else { // st < 0 4243 if (vec[i] > lo || vec[i] < up) { 4244 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4245 "bounds [%lld,%lld]\n", 4246 gtid, vec[i], lo, up)); 4247 return; 4248 } 4249 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4250 } 4251 iter_number = iter + ln * iter_number; 4252 #if OMPT_SUPPORT && OMPT_OPTIONAL 4253 deps[i].variable.value = iter; 4254 deps[i].dependence_type = ompt_dependence_type_sink; 4255 #endif 4256 } 4257 shft = iter_number % 32; // use 32-bit granularity 4258 iter_number >>= 5; // divided by 32 4259 flag = 1 << shft; 4260 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4261 KMP_YIELD(TRUE); 4262 } 4263 KMP_MB(); 4264 #if OMPT_SUPPORT && OMPT_OPTIONAL 4265 if (ompt_enabled.ompt_callback_dependences) { 4266 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4267 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4268 } 4269 #endif 4270 KA_TRACE(20, 4271 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4272 gtid, (iter_number << 5) + shft)); 4273 } 4274 4275 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4276 __kmp_assert_valid_gtid(gtid); 4277 kmp_int64 shft; 4278 size_t num_dims, i; 4279 kmp_uint32 flag; 4280 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4281 kmp_info_t *th = __kmp_threads[gtid]; 4282 kmp_team_t *team = th->th.th_team; 4283 kmp_disp_t *pr_buf; 4284 kmp_int64 lo, st; 4285 4286 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4287 if (team->t.t_serialized) { 4288 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4289 return; // no dependencies if team is serialized 4290 } 4291 4292 // calculate sequential iteration number (same as in "wait" but no 4293 // out-of-bounds checks) 4294 pr_buf = th->th.th_dispatch; 4295 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4296 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4297 lo = pr_buf->th_doacross_info[2]; 4298 st = pr_buf->th_doacross_info[4]; 4299 #if OMPT_SUPPORT && OMPT_OPTIONAL 4300 ompt_dependence_t deps[num_dims]; 4301 #endif 4302 if (st == 1) { // most common case 4303 iter_number = vec[0] - lo; 4304 } else if (st > 0) { 4305 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4306 } else { // negative increment 4307 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4308 } 4309 #if OMPT_SUPPORT && OMPT_OPTIONAL 4310 deps[0].variable.value = iter_number; 4311 deps[0].dependence_type = ompt_dependence_type_source; 4312 #endif 4313 for (i = 1; i < num_dims; ++i) { 4314 kmp_int64 iter, ln; 4315 size_t j = i * 4; 4316 ln = pr_buf->th_doacross_info[j + 1]; 4317 lo = pr_buf->th_doacross_info[j + 2]; 4318 st = pr_buf->th_doacross_info[j + 4]; 4319 if (st == 1) { 4320 iter = vec[i] - lo; 4321 } else if (st > 0) { 4322 iter = (kmp_uint64)(vec[i] - lo) / st; 4323 } else { // st < 0 4324 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4325 } 4326 iter_number = iter + ln * iter_number; 4327 #if OMPT_SUPPORT && OMPT_OPTIONAL 4328 deps[i].variable.value = iter; 4329 deps[i].dependence_type = ompt_dependence_type_source; 4330 #endif 4331 } 4332 #if OMPT_SUPPORT && OMPT_OPTIONAL 4333 if (ompt_enabled.ompt_callback_dependences) { 4334 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4335 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4336 } 4337 #endif 4338 shft = iter_number % 32; // use 32-bit granularity 4339 iter_number >>= 5; // divided by 32 4340 flag = 1 << shft; 4341 KMP_MB(); 4342 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4343 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4344 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4345 (iter_number << 5) + shft)); 4346 } 4347 4348 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4349 __kmp_assert_valid_gtid(gtid); 4350 kmp_int32 num_done; 4351 kmp_info_t *th = __kmp_threads[gtid]; 4352 kmp_team_t *team = th->th.th_team; 4353 kmp_disp_t *pr_buf = th->th.th_dispatch; 4354 4355 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4356 if (team->t.t_serialized) { 4357 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4358 return; // nothing to do 4359 } 4360 num_done = 4361 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4362 if (num_done == th->th.th_team_nproc) { 4363 // we are the last thread, need to free shared resources 4364 int idx = pr_buf->th_doacross_buf_idx - 1; 4365 dispatch_shared_info_t *sh_buf = 4366 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4367 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4368 (kmp_int64)&sh_buf->doacross_num_done); 4369 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4370 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4371 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4372 sh_buf->doacross_flags = NULL; 4373 sh_buf->doacross_num_done = 0; 4374 sh_buf->doacross_buf_idx += 4375 __kmp_dispatch_num_buffers; // free buffer for future re-use 4376 } 4377 // free private resources (need to keep buffer index forever) 4378 pr_buf->th_doacross_flags = NULL; 4379 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4380 pr_buf->th_doacross_info = NULL; 4381 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4382 } 4383 4384 /* OpenMP 5.1 Memory Management routines */ 4385 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4386 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator); 4387 } 4388 4389 void *omp_aligned_alloc(size_t align, size_t size, 4390 omp_allocator_handle_t allocator) { 4391 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator); 4392 } 4393 4394 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4395 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator); 4396 } 4397 4398 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 4399 omp_allocator_handle_t allocator) { 4400 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator); 4401 } 4402 4403 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4404 omp_allocator_handle_t free_allocator) { 4405 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4406 free_allocator); 4407 } 4408 4409 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4410 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4411 } 4412 /* end of OpenMP 5.1 Memory Management routines */ 4413 4414 int __kmpc_get_target_offload(void) { 4415 if (!__kmp_init_serial) { 4416 __kmp_serial_initialize(); 4417 } 4418 return __kmp_target_offload; 4419 } 4420 4421 int __kmpc_pause_resource(kmp_pause_status_t level) { 4422 if (!__kmp_init_serial) { 4423 return 1; // Can't pause if runtime is not initialized 4424 } 4425 return __kmp_pause_resource(level); 4426 } 4427 4428 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4429 if (!__kmp_init_serial) 4430 __kmp_serial_initialize(); 4431 4432 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4433 4434 #if OMPT_SUPPORT 4435 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4436 ompt_callbacks.ompt_callback(ompt_callback_error)( 4437 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4438 OMPT_GET_RETURN_ADDRESS(0)); 4439 } 4440 #endif // OMPT_SUPPORT 4441 4442 char *src_loc; 4443 if (loc && loc->psource) { 4444 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4445 src_loc = 4446 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4447 __kmp_str_loc_free(&str_loc); 4448 } else { 4449 src_loc = __kmp_str_format("unknown"); 4450 } 4451 4452 if (severity == severity_warning) 4453 KMP_WARNING(UserDirectedWarning, src_loc, message); 4454 else 4455 KMP_FATAL(UserDirectedError, src_loc, message); 4456 4457 __kmp_str_free(&src_loc); 4458 } 4459 4460 // Mark begin of scope directive. 4461 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4462 // reserved is for extension of scope directive and not used. 4463 #if OMPT_SUPPORT && OMPT_OPTIONAL 4464 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4465 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4466 int tid = __kmp_tid_from_gtid(gtid); 4467 ompt_callbacks.ompt_callback(ompt_callback_work)( 4468 ompt_work_scope, ompt_scope_begin, 4469 &(team->t.ompt_team_info.parallel_data), 4470 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4471 OMPT_GET_RETURN_ADDRESS(0)); 4472 } 4473 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4474 } 4475 4476 // Mark end of scope directive 4477 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4478 // reserved is for extension of scope directive and not used. 4479 #if OMPT_SUPPORT && OMPT_OPTIONAL 4480 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4481 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4482 int tid = __kmp_tid_from_gtid(gtid); 4483 ompt_callbacks.ompt_callback(ompt_callback_work)( 4484 ompt_work_scope, ompt_scope_end, 4485 &(team->t.ompt_team_info.parallel_data), 4486 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4487 OMPT_GET_RETURN_ADDRESS(0)); 4488 } 4489 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4490 } 4491 4492 #ifdef KMP_USE_VERSION_SYMBOLS 4493 // For GOMP compatibility there are two versions of each omp_* API. 4494 // One is the plain C symbol and one is the Fortran symbol with an appended 4495 // underscore. When we implement a specific ompc_* version of an omp_* 4496 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4497 // instead of the Fortran versions in kmp_ftn_entry.h 4498 extern "C" { 4499 // Have to undef these from omp.h so they aren't translated into 4500 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4501 #ifdef omp_set_affinity_format 4502 #undef omp_set_affinity_format 4503 #endif 4504 #ifdef omp_get_affinity_format 4505 #undef omp_get_affinity_format 4506 #endif 4507 #ifdef omp_display_affinity 4508 #undef omp_display_affinity 4509 #endif 4510 #ifdef omp_capture_affinity 4511 #undef omp_capture_affinity 4512 #endif 4513 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4514 "OMP_5.0"); 4515 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4516 "OMP_5.0"); 4517 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4518 "OMP_5.0"); 4519 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4520 "OMP_5.0"); 4521 } // extern "C" 4522 #endif 4523