10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_runtime.cpp -- KPTS runtime support library 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "kmp.h" 140b57cec5SDimitry Andric #include "kmp_affinity.h" 150b57cec5SDimitry Andric #include "kmp_atomic.h" 160b57cec5SDimitry Andric #include "kmp_environment.h" 170b57cec5SDimitry Andric #include "kmp_error.h" 180b57cec5SDimitry Andric #include "kmp_i18n.h" 190b57cec5SDimitry Andric #include "kmp_io.h" 200b57cec5SDimitry Andric #include "kmp_itt.h" 210b57cec5SDimitry Andric #include "kmp_settings.h" 220b57cec5SDimitry Andric #include "kmp_stats.h" 230b57cec5SDimitry Andric #include "kmp_str.h" 240b57cec5SDimitry Andric #include "kmp_wait_release.h" 250b57cec5SDimitry Andric #include "kmp_wrapper_getpid.h" 260b57cec5SDimitry Andric #include "kmp_dispatch.h" 270b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 280b57cec5SDimitry Andric #include "kmp_dispatch_hier.h" 290b57cec5SDimitry Andric #endif 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #if OMPT_SUPPORT 320b57cec5SDimitry Andric #include "ompt-specific.h" 330b57cec5SDimitry Andric #endif 34fe6060f1SDimitry Andric #if OMPD_SUPPORT 35fe6060f1SDimitry Andric #include "ompd-specific.h" 36fe6060f1SDimitry Andric #endif 370b57cec5SDimitry Andric 38d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 39e8d8bef9SDimitry Andric #include "llvm/Support/TimeProfiler.h" 40e8d8bef9SDimitry Andric static char *ProfileTraceFile = nullptr; 41e8d8bef9SDimitry Andric #endif 42e8d8bef9SDimitry Andric 430b57cec5SDimitry Andric /* these are temporary issues to be dealt with */ 440b57cec5SDimitry Andric #define KMP_USE_PRCTL 0 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric #if KMP_OS_WINDOWS 470b57cec5SDimitry Andric #include <process.h> 480b57cec5SDimitry Andric #endif 490b57cec5SDimitry Andric 50e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 51e8d8bef9SDimitry Andric // windows does not need include files as it doesn't use shared memory 52e8d8bef9SDimitry Andric #else 53e8d8bef9SDimitry Andric #include <sys/mman.h> 54e8d8bef9SDimitry Andric #include <sys/stat.h> 55e8d8bef9SDimitry Andric #include <fcntl.h> 56e8d8bef9SDimitry Andric #define SHM_SIZE 1024 57e8d8bef9SDimitry Andric #endif 58e8d8bef9SDimitry Andric 590b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 600b57cec5SDimitry Andric char const __kmp_version_alt_comp[] = 610b57cec5SDimitry Andric KMP_VERSION_PREFIX "alternative compiler support: yes"; 620b57cec5SDimitry Andric #endif /* defined(KMP_GOMP_COMPAT) */ 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric char const __kmp_version_omp_api[] = 650b57cec5SDimitry Andric KMP_VERSION_PREFIX "API version: 5.0 (201611)"; 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric #ifdef KMP_DEBUG 680b57cec5SDimitry Andric char const __kmp_version_lock[] = 690b57cec5SDimitry Andric KMP_VERSION_PREFIX "lock type: run time selectable"; 700b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric #if KMP_USE_MONITOR 770b57cec5SDimitry Andric kmp_info_t __kmp_monitor; 780b57cec5SDimitry Andric #endif 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /* Forward declarations */ 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric void __kmp_cleanup(void); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, 850b57cec5SDimitry Andric int gtid); 860b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 870b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 880b57cec5SDimitry Andric ident_t *loc); 890b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 900b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, 910b57cec5SDimitry Andric int update_master_only = 0); 920b57cec5SDimitry Andric #endif 930b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void); 940b57cec5SDimitry Andric void __kmp_fork_barrier(int gtid, int tid); 950b57cec5SDimitry Andric void __kmp_join_barrier(int gtid); 960b57cec5SDimitry Andric void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, 970b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, ident_t *loc); 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 1000b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc); 1010b57cec5SDimitry Andric #endif 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed); 1040b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1050b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid); 1060b57cec5SDimitry Andric #endif 1070b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root); 1080b57cec5SDimitry Andric kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 1090b57cec5SDimitry Andric 110349cc55cSDimitry Andric void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, 111349cc55cSDimitry Andric int new_nthreads); 112349cc55cSDimitry Andric void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads); 113349cc55cSDimitry Andric 1140b57cec5SDimitry Andric /* Calculate the identifier of the current thread */ 1150b57cec5SDimitry Andric /* fast (and somewhat portable) way to get unique identifier of executing 1160b57cec5SDimitry Andric thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ 1170b57cec5SDimitry Andric int __kmp_get_global_thread_id() { 1180b57cec5SDimitry Andric int i; 1190b57cec5SDimitry Andric kmp_info_t **other_threads; 1200b57cec5SDimitry Andric size_t stack_data; 1210b57cec5SDimitry Andric char *stack_addr; 1220b57cec5SDimitry Andric size_t stack_size; 1230b57cec5SDimitry Andric char *stack_base; 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric KA_TRACE( 1260b57cec5SDimitry Andric 1000, 1270b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", 1280b57cec5SDimitry Andric __kmp_nth, __kmp_all_nth)); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to 1310b57cec5SDimitry Andric a parallel region, made it return KMP_GTID_DNE to force serial_initialize 1320b57cec5SDimitry Andric by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 1330b57cec5SDimitry Andric __kmp_init_gtid for this to work. */ 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric if (!TCR_4(__kmp_init_gtid)) 1360b57cec5SDimitry Andric return KMP_GTID_DNE; 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 1390b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 1400b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n")); 1410b57cec5SDimitry Andric return __kmp_gtid; 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric #endif 1440b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 1450b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n")); 1460b57cec5SDimitry Andric return __kmp_gtid_get_specific(); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n")); 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric stack_addr = (char *)&stack_data; 1510b57cec5SDimitry Andric other_threads = __kmp_threads; 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric /* ATT: The code below is a source of potential bugs due to unsynchronized 1540b57cec5SDimitry Andric access to __kmp_threads array. For example: 1550b57cec5SDimitry Andric 1. Current thread loads other_threads[i] to thr and checks it, it is 1560b57cec5SDimitry Andric non-NULL. 1570b57cec5SDimitry Andric 2. Current thread is suspended by OS. 1580b57cec5SDimitry Andric 3. Another thread unregisters and finishes (debug versions of free() 1590b57cec5SDimitry Andric may fill memory with something like 0xEF). 1600b57cec5SDimitry Andric 4. Current thread is resumed. 1610b57cec5SDimitry Andric 5. Current thread reads junk from *thr. 1620b57cec5SDimitry Andric TODO: Fix it. --ln */ 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 1670b57cec5SDimitry Andric if (!thr) 1680b57cec5SDimitry Andric continue; 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 1710b57cec5SDimitry Andric stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric /* stack grows down -- search through all of the active threads */ 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric if (stack_addr <= stack_base) { 1760b57cec5SDimitry Andric size_t stack_diff = stack_base - stack_addr; 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric if (stack_diff <= stack_size) { 1790b57cec5SDimitry Andric /* The only way we can be closer than the allocated */ 1800b57cec5SDimitry Andric /* stack size is if we are running on this thread. */ 1810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i); 1820b57cec5SDimitry Andric return i; 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric /* get specific to try and determine our gtid */ 1880b57cec5SDimitry Andric KA_TRACE(1000, 1890b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: internal alg. failed to find " 1900b57cec5SDimitry Andric "thread, using TLS\n")); 1910b57cec5SDimitry Andric i = __kmp_gtid_get_specific(); 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric /* if we havn't been assigned a gtid, then return code */ 1960b57cec5SDimitry Andric if (i < 0) 1970b57cec5SDimitry Andric return i; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /* dynamically updated stack window for uber threads to avoid get_specific 2000b57cec5SDimitry Andric call */ 2010b57cec5SDimitry Andric if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) { 2020b57cec5SDimitry Andric KMP_FATAL(StackOverflow, i); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 2060b57cec5SDimitry Andric if (stack_addr > stack_base) { 2070b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 2080b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 2090b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - 2100b57cec5SDimitry Andric stack_base); 2110b57cec5SDimitry Andric } else { 2120b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 2130b57cec5SDimitry Andric stack_base - stack_addr); 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric /* Reprint stack bounds for ubermaster since they have been refined */ 2170b57cec5SDimitry Andric if (__kmp_storage_map) { 2180b57cec5SDimitry Andric char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 2190b57cec5SDimitry Andric char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 2200b57cec5SDimitry Andric __kmp_print_storage_map_gtid(i, stack_beg, stack_end, 2210b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize, 2220b57cec5SDimitry Andric "th_%d stack (refinement)", i); 2230b57cec5SDimitry Andric } 2240b57cec5SDimitry Andric return i; 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric int __kmp_get_global_thread_id_reg() { 2280b57cec5SDimitry Andric int gtid; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric if (!__kmp_init_serial) { 2310b57cec5SDimitry Andric gtid = KMP_GTID_DNE; 2320b57cec5SDimitry Andric } else 2330b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 2340b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 2350b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n")); 2360b57cec5SDimitry Andric gtid = __kmp_gtid; 2370b57cec5SDimitry Andric } else 2380b57cec5SDimitry Andric #endif 2390b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 2400b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n")); 2410b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2420b57cec5SDimitry Andric } else { 2430b57cec5SDimitry Andric KA_TRACE(1000, 2440b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id_reg: using internal alg.\n")); 2450b57cec5SDimitry Andric gtid = __kmp_get_global_thread_id(); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric /* we must be a new uber master sibling thread */ 2490b57cec5SDimitry Andric if (gtid == KMP_GTID_DNE) { 2500b57cec5SDimitry Andric KA_TRACE(10, 2510b57cec5SDimitry Andric ("__kmp_get_global_thread_id_reg: Encountered new root thread. " 2520b57cec5SDimitry Andric "Registering a new gtid.\n")); 2530b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 2540b57cec5SDimitry Andric if (!__kmp_init_serial) { 2550b57cec5SDimitry Andric __kmp_do_serial_initialize(); 2560b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2570b57cec5SDimitry Andric } else { 2580b57cec5SDimitry Andric gtid = __kmp_register_root(FALSE); 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 2610b57cec5SDimitry Andric /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric return gtid; 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric /* caller must hold forkjoin_lock */ 2700b57cec5SDimitry Andric void __kmp_check_stack_overlap(kmp_info_t *th) { 2710b57cec5SDimitry Andric int f; 2720b57cec5SDimitry Andric char *stack_beg = NULL; 2730b57cec5SDimitry Andric char *stack_end = NULL; 2740b57cec5SDimitry Andric int gtid; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: called\n")); 2770b57cec5SDimitry Andric if (__kmp_storage_map) { 2780b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 2790b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric if (gtid == KMP_GTID_MONITOR) { 2840b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2850b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2860b57cec5SDimitry Andric "th_%s stack (%s)", "mon", 2870b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2880b57cec5SDimitry Andric } else { 2890b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2900b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2910b57cec5SDimitry Andric "th_%d stack (%s)", gtid, 2920b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2930b57cec5SDimitry Andric } 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric /* No point in checking ubermaster threads since they use refinement and 2970b57cec5SDimitry Andric * cannot overlap */ 2980b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2990b57cec5SDimitry Andric if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) { 3000b57cec5SDimitry Andric KA_TRACE(10, 3010b57cec5SDimitry Andric ("__kmp_check_stack_overlap: performing extensive checking\n")); 3020b57cec5SDimitry Andric if (stack_beg == NULL) { 3030b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 3040b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 3080b57cec5SDimitry Andric kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric if (f_th && f_th != th) { 3110b57cec5SDimitry Andric char *other_stack_end = 3120b57cec5SDimitry Andric (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 3130b57cec5SDimitry Andric char *other_stack_beg = 3140b57cec5SDimitry Andric other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 3150b57cec5SDimitry Andric if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 3160b57cec5SDimitry Andric (stack_end > other_stack_beg && stack_end < other_stack_end)) { 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric /* Print the other stack values before the abort */ 3190b57cec5SDimitry Andric if (__kmp_storage_map) 3200b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 3210b57cec5SDimitry Andric -1, other_stack_beg, other_stack_end, 3220b57cec5SDimitry Andric (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 3230b57cec5SDimitry Andric "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th)); 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit), 3260b57cec5SDimitry Andric __kmp_msg_null); 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n")); 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric void __kmp_infinite_loop(void) { 3370b57cec5SDimitry Andric static int done = FALSE; 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric while (!done) { 3400b57cec5SDimitry Andric KMP_YIELD(TRUE); 3410b57cec5SDimitry Andric } 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric #define MAX_MESSAGE 512 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, 3470b57cec5SDimitry Andric char const *format, ...) { 3480b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 3490b57cec5SDimitry Andric va_list ap; 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric va_start(ap, format); 3520b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, 3530b57cec5SDimitry Andric p2, (unsigned long)size, format); 3540b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 3550b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 3560b57cec5SDimitry Andric #if KMP_PRINT_DATA_PLACEMENT 3570b57cec5SDimitry Andric int node; 3580b57cec5SDimitry Andric if (gtid >= 0) { 3590b57cec5SDimitry Andric if (p1 <= p2 && (char *)p2 - (char *)p1 == size) { 3600b57cec5SDimitry Andric if (__kmp_storage_map_verbose) { 3610b57cec5SDimitry Andric node = __kmp_get_host_node(p1); 3620b57cec5SDimitry Andric if (node < 0) /* doesn't work, so don't try this next time */ 3630b57cec5SDimitry Andric __kmp_storage_map_verbose = FALSE; 3640b57cec5SDimitry Andric else { 3650b57cec5SDimitry Andric char *last; 3660b57cec5SDimitry Andric int lastNode; 3670b57cec5SDimitry Andric int localProc = __kmp_get_cpu_from_gtid(gtid); 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric const int page_size = KMP_GET_PAGE_SIZE(); 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1)); 3720b57cec5SDimitry Andric p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1)); 3730b57cec5SDimitry Andric if (localProc >= 0) 3740b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, 3750b57cec5SDimitry Andric localProc >> 1); 3760b57cec5SDimitry Andric else 3770b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d\n", gtid); 3780b57cec5SDimitry Andric #if KMP_USE_PRCTL 3790b57cec5SDimitry Andric /* The more elaborate format is disabled for now because of the prctl 3800b57cec5SDimitry Andric * hanging bug. */ 3810b57cec5SDimitry Andric do { 3820b57cec5SDimitry Andric last = p1; 3830b57cec5SDimitry Andric lastNode = node; 3840b57cec5SDimitry Andric /* This loop collates adjacent pages with the same host node. */ 3850b57cec5SDimitry Andric do { 3860b57cec5SDimitry Andric (char *)p1 += page_size; 3870b57cec5SDimitry Andric } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 3880b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1, 3890b57cec5SDimitry Andric lastNode); 3900b57cec5SDimitry Andric } while (p1 <= p2); 3910b57cec5SDimitry Andric #else 3920b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, 3930b57cec5SDimitry Andric (char *)p1 + (page_size - 1), 3940b57cec5SDimitry Andric __kmp_get_host_node(p1)); 3950b57cec5SDimitry Andric if (p1 < p2) { 3960b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, 3970b57cec5SDimitry Andric (char *)p2 + (page_size - 1), 3980b57cec5SDimitry Andric __kmp_get_host_node(p2)); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric #endif 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric } else 4040b57cec5SDimitry Andric __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)); 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric #endif /* KMP_PRINT_DATA_PLACEMENT */ 4070b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 40806c3fb27SDimitry Andric 40906c3fb27SDimitry Andric va_end(ap); 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric void __kmp_warn(char const *format, ...) { 4130b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 4140b57cec5SDimitry Andric va_list ap; 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric if (__kmp_generate_warnings == kmp_warnings_off) { 4170b57cec5SDimitry Andric return; 4180b57cec5SDimitry Andric } 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric va_start(ap, format); 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format); 4230b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 4240b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 4250b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric va_end(ap); 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric void __kmp_abort_process() { 4310b57cec5SDimitry Andric // Later threads may stall here, but that's ok because abort() will kill them. 4320b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_exit_lock); 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric if (__kmp_debug_buf) { 4350b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 4360b57cec5SDimitry Andric } 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric if (KMP_OS_WINDOWS) { 4390b57cec5SDimitry Andric // Let other threads know of abnormal termination and prevent deadlock 4400b57cec5SDimitry Andric // if abort happened during library initialization or shutdown 4410b57cec5SDimitry Andric __kmp_global.g.g_abort = SIGABRT; 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric /* On Windows* OS by default abort() causes pop-up error box, which stalls 4440b57cec5SDimitry Andric nightly testing. Unfortunately, we cannot reliably suppress pop-up error 4450b57cec5SDimitry Andric boxes. _set_abort_behavior() works well, but this function is not 4460b57cec5SDimitry Andric available in VS7 (this is not problem for DLL, but it is a problem for 4470b57cec5SDimitry Andric static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not 4480b57cec5SDimitry Andric help, at least in some versions of MS C RTL. 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric It seems following sequence is the only way to simulate abort() and 4510b57cec5SDimitry Andric avoid pop-up error box. */ 4520b57cec5SDimitry Andric raise(SIGABRT); 4530b57cec5SDimitry Andric _exit(3); // Just in case, if signal ignored, exit anyway. 4540b57cec5SDimitry Andric } else { 455e8d8bef9SDimitry Andric __kmp_unregister_library(); 4560b57cec5SDimitry Andric abort(); 4570b57cec5SDimitry Andric } 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric __kmp_infinite_loop(); 4600b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_exit_lock); 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric } // __kmp_abort_process 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric void __kmp_abort_thread(void) { 4650b57cec5SDimitry Andric // TODO: Eliminate g_abort global variable and this function. 4660b57cec5SDimitry Andric // In case of abort just call abort(), it will kill all the threads. 4670b57cec5SDimitry Andric __kmp_infinite_loop(); 4680b57cec5SDimitry Andric } // __kmp_abort_thread 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric /* Print out the storage map for the major kmp_info_t thread data structures 4710b57cec5SDimitry Andric that are allocated together. */ 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { 4740b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", 4750b57cec5SDimitry Andric gtid); 4760b57cec5SDimitry Andric 4770b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team, 4780b57cec5SDimitry Andric sizeof(kmp_desc_t), "th_%d.th_info", gtid); 4790b57cec5SDimitry Andric 4800b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head, 4810b57cec5SDimitry Andric sizeof(kmp_local_t), "th_%d.th_local", gtid); 4820b57cec5SDimitry Andric 4830b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 4840b57cec5SDimitry Andric gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 4850b57cec5SDimitry Andric sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid); 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier], 4880b57cec5SDimitry Andric &thr->th.th_bar[bs_plain_barrier + 1], 4890b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[plain]", 4900b57cec5SDimitry Andric gtid); 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier], 4930b57cec5SDimitry Andric &thr->th.th_bar[bs_forkjoin_barrier + 1], 4940b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", 4950b57cec5SDimitry Andric gtid); 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 4980b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier], 4990b57cec5SDimitry Andric &thr->th.th_bar[bs_reduction_barrier + 1], 5000b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", 5010b57cec5SDimitry Andric gtid); 5020b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric 5050b57cec5SDimitry Andric /* Print out the storage map for the major kmp_team_t team data structures 5060b57cec5SDimitry Andric that are allocated together. */ 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, 5090b57cec5SDimitry Andric int team_id, int num_thr) { 5100b57cec5SDimitry Andric int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 5110b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 5120b57cec5SDimitry Andric header, team_id); 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0], 5150b57cec5SDimitry Andric &team->t.t_bar[bs_last_barrier], 5160b57cec5SDimitry Andric sizeof(kmp_balign_team_t) * bs_last_barrier, 5170b57cec5SDimitry Andric "%s_%d.t_bar", header, team_id); 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier], 5200b57cec5SDimitry Andric &team->t.t_bar[bs_plain_barrier + 1], 5210b57cec5SDimitry Andric sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", 5220b57cec5SDimitry Andric header, team_id); 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier], 5250b57cec5SDimitry Andric &team->t.t_bar[bs_forkjoin_barrier + 1], 5260b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5270b57cec5SDimitry Andric "%s_%d.t_bar[forkjoin]", header, team_id); 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 5300b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier], 5310b57cec5SDimitry Andric &team->t.t_bar[bs_reduction_barrier + 1], 5320b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5330b57cec5SDimitry Andric "%s_%d.t_bar[reduction]", header, team_id); 5340b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5370b57cec5SDimitry Andric -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 5380b57cec5SDimitry Andric sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id); 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5410b57cec5SDimitry Andric -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 5420b57cec5SDimitry Andric sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id); 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0], 5450b57cec5SDimitry Andric &team->t.t_disp_buffer[num_disp_buff], 5460b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff, 5470b57cec5SDimitry Andric "%s_%d.t_disp_buffer", header, team_id); 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric 550fe6060f1SDimitry Andric static void __kmp_init_allocator() { 551fe6060f1SDimitry Andric __kmp_init_memkind(); 552fe6060f1SDimitry Andric __kmp_init_target_mem(); 553fe6060f1SDimitry Andric } 5540b57cec5SDimitry Andric static void __kmp_fini_allocator() { __kmp_fini_memkind(); } 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 5570b57cec5SDimitry Andric 55806c3fb27SDimitry Andric #if ENABLE_LIBOMPTARGET 55906c3fb27SDimitry Andric static void __kmp_init_omptarget() { 56006c3fb27SDimitry Andric __kmp_init_target_task(); 56106c3fb27SDimitry Andric } 56206c3fb27SDimitry Andric #endif 56306c3fb27SDimitry Andric 56406c3fb27SDimitry Andric /* ------------------------------------------------------------------------ */ 56506c3fb27SDimitry Andric 5660b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 5670b57cec5SDimitry Andric #if KMP_OS_WINDOWS 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) { 5700b57cec5SDimitry Andric //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 5710b57cec5SDimitry Andric 5720b57cec5SDimitry Andric switch (fdwReason) { 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric case DLL_PROCESS_ATTACH: 5750b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n")); 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric return TRUE; 5780b57cec5SDimitry Andric 5790b57cec5SDimitry Andric case DLL_PROCESS_DETACH: 5800b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific())); 5810b57cec5SDimitry Andric 582fe6060f1SDimitry Andric // According to Windows* documentation for DllMain entry point: 583fe6060f1SDimitry Andric // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference: 584fe6060f1SDimitry Andric // lpReserved == NULL when FreeLibrary() is called, 585fe6060f1SDimitry Andric // lpReserved != NULL when the process is terminated. 586fe6060f1SDimitry Andric // When FreeLibrary() is called, worker threads remain alive. So the 587fe6060f1SDimitry Andric // runtime's state is consistent and executing proper shutdown is OK. 588fe6060f1SDimitry Andric // When the process is terminated, worker threads have exited or been 589fe6060f1SDimitry Andric // forcefully terminated by the OS and only the shutdown thread remains. 590fe6060f1SDimitry Andric // This can leave the runtime in an inconsistent state. 591fe6060f1SDimitry Andric // Hence, only attempt proper cleanup when FreeLibrary() is called. 592fe6060f1SDimitry Andric // Otherwise, rely on OS to reclaim resources. 593fe6060f1SDimitry Andric if (lpReserved == NULL) 5940b57cec5SDimitry Andric __kmp_internal_end_library(__kmp_gtid_get_specific()); 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric return TRUE; 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric case DLL_THREAD_ATTACH: 5990b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_ATTACH\n")); 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric /* if we want to register new siblings all the time here call 6020b57cec5SDimitry Andric * __kmp_get_gtid(); */ 6030b57cec5SDimitry Andric return TRUE; 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric case DLL_THREAD_DETACH: 6060b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific())); 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric __kmp_internal_end_thread(__kmp_gtid_get_specific()); 6090b57cec5SDimitry Andric return TRUE; 6100b57cec5SDimitry Andric } 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric return TRUE; 6130b57cec5SDimitry Andric } 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 6160b57cec5SDimitry Andric #endif /* KMP_DYNAMIC_LIB */ 6170b57cec5SDimitry Andric 6180b57cec5SDimitry Andric /* __kmp_parallel_deo -- Wait until it's our turn. */ 6190b57cec5SDimitry Andric void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6200b57cec5SDimitry Andric int gtid = *gtid_ref; 6210b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6220b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6230b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6260b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6270b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 6280b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0); 6290b57cec5SDimitry Andric #else 6300b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL); 6310b57cec5SDimitry Andric #endif 6320b57cec5SDimitry Andric } 6330b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6340b57cec5SDimitry Andric if (!team->t.t_serialized) { 6350b57cec5SDimitry Andric KMP_MB(); 6360b57cec5SDimitry Andric KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ, 6370b57cec5SDimitry Andric NULL); 6380b57cec5SDimitry Andric KMP_MB(); 6390b57cec5SDimitry Andric } 6400b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6410b57cec5SDimitry Andric } 6420b57cec5SDimitry Andric 6430b57cec5SDimitry Andric /* __kmp_parallel_dxo -- Signal the next task. */ 6440b57cec5SDimitry Andric void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6450b57cec5SDimitry Andric int gtid = *gtid_ref; 6460b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6470b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 6480b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6490b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6520b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6530b57cec5SDimitry Andric __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref); 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6560b57cec5SDimitry Andric if (!team->t.t_serialized) { 6570b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric /* use the tid of the next thread in this team */ 6600b57cec5SDimitry Andric /* TODO replace with general release procedure */ 6610b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); 6620b57cec5SDimitry Andric 6630b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 6640b57cec5SDimitry Andric } 6650b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6660b57cec5SDimitry Andric } 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 6690b57cec5SDimitry Andric /* The BARRIER for a SINGLE process section is always explicit */ 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { 6720b57cec5SDimitry Andric int status; 6730b57cec5SDimitry Andric kmp_info_t *th; 6740b57cec5SDimitry Andric kmp_team_t *team; 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 6770b57cec5SDimitry Andric __kmp_parallel_initialize(); 6780b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 6790b57cec5SDimitry Andric 6800b57cec5SDimitry Andric th = __kmp_threads[gtid]; 6810b57cec5SDimitry Andric team = th->th.th_team; 6820b57cec5SDimitry Andric status = 0; 6830b57cec5SDimitry Andric 6840b57cec5SDimitry Andric th->th.th_ident = id_ref; 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric if (team->t.t_serialized) { 6870b57cec5SDimitry Andric status = 1; 6880b57cec5SDimitry Andric } else { 6890b57cec5SDimitry Andric kmp_int32 old_this = th->th.th_local.this_construct; 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric ++th->th.th_local.this_construct; 6920b57cec5SDimitry Andric /* try to set team count to thread count--success means thread got the 6930b57cec5SDimitry Andric single block */ 6940b57cec5SDimitry Andric /* TODO: Should this be acquire or release? */ 6950b57cec5SDimitry Andric if (team->t.t_construct == old_this) { 6960b57cec5SDimitry Andric status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this, 6970b57cec5SDimitry Andric th->th.th_local.this_construct); 6980b57cec5SDimitry Andric } 6990b57cec5SDimitry Andric #if USE_ITT_BUILD 7000b57cec5SDimitry Andric if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 7010b57cec5SDimitry Andric KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL && 702fe6060f1SDimitry Andric team->t.t_active_level == 1) { 703fe6060f1SDimitry Andric // Only report metadata by primary thread of active team at level 1 7040b57cec5SDimitry Andric __kmp_itt_metadata_single(id_ref); 7050b57cec5SDimitry Andric } 7060b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7070b57cec5SDimitry Andric } 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 7100b57cec5SDimitry Andric if (status && push_ws) { 7110b57cec5SDimitry Andric __kmp_push_workshare(gtid, ct_psingle, id_ref); 7120b57cec5SDimitry Andric } else { 7130b57cec5SDimitry Andric __kmp_check_workshare(gtid, ct_psingle, id_ref); 7140b57cec5SDimitry Andric } 7150b57cec5SDimitry Andric } 7160b57cec5SDimitry Andric #if USE_ITT_BUILD 7170b57cec5SDimitry Andric if (status) { 7180b57cec5SDimitry Andric __kmp_itt_single_start(gtid); 7190b57cec5SDimitry Andric } 7200b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7210b57cec5SDimitry Andric return status; 7220b57cec5SDimitry Andric } 7230b57cec5SDimitry Andric 7240b57cec5SDimitry Andric void __kmp_exit_single(int gtid) { 7250b57cec5SDimitry Andric #if USE_ITT_BUILD 7260b57cec5SDimitry Andric __kmp_itt_single_end(gtid); 7270b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7280b57cec5SDimitry Andric if (__kmp_env_consistency_check) 7290b57cec5SDimitry Andric __kmp_pop_workshare(gtid, ct_psingle, NULL); 7300b57cec5SDimitry Andric } 7310b57cec5SDimitry Andric 7320b57cec5SDimitry Andric /* determine if we can go parallel or must use a serialized parallel region and 7330b57cec5SDimitry Andric * how many threads we can use 7340b57cec5SDimitry Andric * set_nproc is the number of threads requested for the team 7350b57cec5SDimitry Andric * returns 0 if we should serialize or only use one thread, 7360b57cec5SDimitry Andric * otherwise the number of threads to use 7370b57cec5SDimitry Andric * The forkjoin lock is held by the caller. */ 7380b57cec5SDimitry Andric static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, 7390b57cec5SDimitry Andric int master_tid, int set_nthreads, 7400b57cec5SDimitry Andric int enter_teams) { 7410b57cec5SDimitry Andric int capacity; 7420b57cec5SDimitry Andric int new_nthreads; 7430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 7440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && parent_team); 7450b57cec5SDimitry Andric kmp_info_t *this_thr = parent_team->t.t_threads[master_tid]; 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric // If dyn-var is set, dynamically adjust the number of desired threads, 7480b57cec5SDimitry Andric // according to the method specified by dynamic_mode. 7490b57cec5SDimitry Andric new_nthreads = set_nthreads; 7500b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid)) { 7510b57cec5SDimitry Andric ; 7520b57cec5SDimitry Andric } 7530b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 7540b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { 7550b57cec5SDimitry Andric new_nthreads = __kmp_load_balance_nproc(root, set_nthreads); 7560b57cec5SDimitry Andric if (new_nthreads == 1) { 7570b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 7580b57cec5SDimitry Andric "reservation to 1 thread\n", 7590b57cec5SDimitry Andric master_tid)); 7600b57cec5SDimitry Andric return 1; 7610b57cec5SDimitry Andric } 7620b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7630b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 7640b57cec5SDimitry Andric "reservation to %d threads\n", 7650b57cec5SDimitry Andric master_tid, new_nthreads)); 7660b57cec5SDimitry Andric } 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 7690b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { 7700b57cec5SDimitry Andric new_nthreads = __kmp_avail_proc - __kmp_nth + 7710b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 7720b57cec5SDimitry Andric if (new_nthreads <= 1) { 7730b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 7740b57cec5SDimitry Andric "reservation to 1 thread\n", 7750b57cec5SDimitry Andric master_tid)); 7760b57cec5SDimitry Andric return 1; 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7790b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 7800b57cec5SDimitry Andric "reservation to %d threads\n", 7810b57cec5SDimitry Andric master_tid, new_nthreads)); 7820b57cec5SDimitry Andric } else { 7830b57cec5SDimitry Andric new_nthreads = set_nthreads; 7840b57cec5SDimitry Andric } 7850b57cec5SDimitry Andric } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { 7860b57cec5SDimitry Andric if (set_nthreads > 2) { 7870b57cec5SDimitry Andric new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]); 7880b57cec5SDimitry Andric new_nthreads = (new_nthreads % set_nthreads) + 1; 7890b57cec5SDimitry Andric if (new_nthreads == 1) { 7900b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 7910b57cec5SDimitry Andric "reservation to 1 thread\n", 7920b57cec5SDimitry Andric master_tid)); 7930b57cec5SDimitry Andric return 1; 7940b57cec5SDimitry Andric } 7950b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7960b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 7970b57cec5SDimitry Andric "reservation to %d threads\n", 7980b57cec5SDimitry Andric master_tid, new_nthreads)); 7990b57cec5SDimitry Andric } 8000b57cec5SDimitry Andric } 8010b57cec5SDimitry Andric } else { 8020b57cec5SDimitry Andric KMP_ASSERT(0); 8030b57cec5SDimitry Andric } 8040b57cec5SDimitry Andric 8050b57cec5SDimitry Andric // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT. 8060b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 8070b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8080b57cec5SDimitry Andric __kmp_max_nth) { 8090b57cec5SDimitry Andric int tl_nthreads = __kmp_max_nth - __kmp_nth + 8100b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8110b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8120b57cec5SDimitry Andric tl_nthreads = 1; 8130b57cec5SDimitry Andric } 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8160b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8170b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8180b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8190b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8200b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8210b57cec5SDimitry Andric } 8220b57cec5SDimitry Andric if (tl_nthreads == 1) { 8230b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 8240b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8250b57cec5SDimitry Andric master_tid)); 8260b57cec5SDimitry Andric return 1; 8270b57cec5SDimitry Andric } 8280b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 8290b57cec5SDimitry Andric "reservation to %d threads\n", 8300b57cec5SDimitry Andric master_tid, tl_nthreads)); 8310b57cec5SDimitry Andric new_nthreads = tl_nthreads; 8320b57cec5SDimitry Andric } 8330b57cec5SDimitry Andric 8340b57cec5SDimitry Andric // Respect OMP_THREAD_LIMIT 8350b57cec5SDimitry Andric int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads; 8360b57cec5SDimitry Andric int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit; 8370b57cec5SDimitry Andric if (cg_nthreads + new_nthreads - 8380b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8390b57cec5SDimitry Andric max_cg_threads) { 8400b57cec5SDimitry Andric int tl_nthreads = max_cg_threads - cg_nthreads + 8410b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8420b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8430b57cec5SDimitry Andric tl_nthreads = 1; 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8470b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8480b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8490b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8500b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8510b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8520b57cec5SDimitry Andric } 8530b57cec5SDimitry Andric if (tl_nthreads == 1) { 8540b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 8550b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8560b57cec5SDimitry Andric master_tid)); 8570b57cec5SDimitry Andric return 1; 8580b57cec5SDimitry Andric } 8590b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 8600b57cec5SDimitry Andric "reservation to %d threads\n", 8610b57cec5SDimitry Andric master_tid, tl_nthreads)); 8620b57cec5SDimitry Andric new_nthreads = tl_nthreads; 8630b57cec5SDimitry Andric } 8640b57cec5SDimitry Andric 8650b57cec5SDimitry Andric // Check if the threads array is large enough, or needs expanding. 8660b57cec5SDimitry Andric // See comment in __kmp_register_root() about the adjustment if 8670b57cec5SDimitry Andric // __kmp_threads[0] == NULL. 8680b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 8690b57cec5SDimitry Andric if (TCR_PTR(__kmp_threads[0]) == NULL) { 8700b57cec5SDimitry Andric --capacity; 8710b57cec5SDimitry Andric } 872d409305fSDimitry Andric // If it is not for initializing the hidden helper team, we need to take 873d409305fSDimitry Andric // __kmp_hidden_helper_threads_num out of the capacity because it is included 874d409305fSDimitry Andric // in __kmp_threads_capacity. 875d409305fSDimitry Andric if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { 876d409305fSDimitry Andric capacity -= __kmp_hidden_helper_threads_num; 877d409305fSDimitry Andric } 8780b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 8790b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8800b57cec5SDimitry Andric capacity) { 8810b57cec5SDimitry Andric // Expand the threads array. 8820b57cec5SDimitry Andric int slotsRequired = __kmp_nth + new_nthreads - 8830b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) - 8840b57cec5SDimitry Andric capacity; 8850b57cec5SDimitry Andric int slotsAdded = __kmp_expand_threads(slotsRequired); 8860b57cec5SDimitry Andric if (slotsAdded < slotsRequired) { 8870b57cec5SDimitry Andric // The threads array was not expanded enough. 8880b57cec5SDimitry Andric new_nthreads -= (slotsRequired - slotsAdded); 8890b57cec5SDimitry Andric KMP_ASSERT(new_nthreads >= 1); 8900b57cec5SDimitry Andric 8910b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8920b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8930b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8940b57cec5SDimitry Andric if (__kmp_tp_cached) { 8950b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8960b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 8970b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 8980b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 8990b57cec5SDimitry Andric } else { 9000b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 9010b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 9020b57cec5SDimitry Andric KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); 9030b57cec5SDimitry Andric } 9040b57cec5SDimitry Andric } 9050b57cec5SDimitry Andric } 9060b57cec5SDimitry Andric } 9070b57cec5SDimitry Andric 9080b57cec5SDimitry Andric #ifdef KMP_DEBUG 9090b57cec5SDimitry Andric if (new_nthreads == 1) { 9100b57cec5SDimitry Andric KC_TRACE(10, 9110b57cec5SDimitry Andric ("__kmp_reserve_threads: T#%d serializing team after reclaiming " 9120b57cec5SDimitry Andric "dead roots and rechecking; requested %d threads\n", 9130b57cec5SDimitry Andric __kmp_get_gtid(), set_nthreads)); 9140b57cec5SDimitry Andric } else { 9150b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested" 9160b57cec5SDimitry Andric " %d threads\n", 9170b57cec5SDimitry Andric __kmp_get_gtid(), new_nthreads, set_nthreads)); 9180b57cec5SDimitry Andric } 9190b57cec5SDimitry Andric #endif // KMP_DEBUG 9200b57cec5SDimitry Andric return new_nthreads; 9210b57cec5SDimitry Andric } 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andric /* Allocate threads from the thread pool and assign them to the new team. We are 9240b57cec5SDimitry Andric assured that there are enough threads available, because we checked on that 9250b57cec5SDimitry Andric earlier within critical section forkjoin */ 9260b57cec5SDimitry Andric static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, 927349cc55cSDimitry Andric kmp_info_t *master_th, int master_gtid, 928349cc55cSDimitry Andric int fork_teams_workers) { 9290b57cec5SDimitry Andric int i; 9300b57cec5SDimitry Andric int use_hot_team; 9310b57cec5SDimitry Andric 9320b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc)); 9330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); 9340b57cec5SDimitry Andric KMP_MB(); 9350b57cec5SDimitry Andric 936fe6060f1SDimitry Andric /* first, let's setup the primary thread */ 9370b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = 0; 9380b57cec5SDimitry Andric master_th->th.th_team = team; 9390b57cec5SDimitry Andric master_th->th.th_team_nproc = team->t.t_nproc; 9400b57cec5SDimitry Andric master_th->th.th_team_master = master_th; 9410b57cec5SDimitry Andric master_th->th.th_team_serialized = FALSE; 9420b57cec5SDimitry Andric master_th->th.th_dispatch = &team->t.t_dispatch[0]; 9430b57cec5SDimitry Andric 9440b57cec5SDimitry Andric /* make sure we are not the optimized hot team */ 9450b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 9460b57cec5SDimitry Andric use_hot_team = 0; 9470b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 9480b57cec5SDimitry Andric if (hot_teams) { // hot teams array is not allocated if 9490b57cec5SDimitry Andric // KMP_HOT_TEAMS_MAX_LEVEL=0 9500b57cec5SDimitry Andric int level = team->t.t_active_level - 1; // index in array of hot teams 9510b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { // are we inside the teams? 9520b57cec5SDimitry Andric if (master_th->th.th_teams_size.nteams > 1) { 9530b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9540b57cec5SDimitry Andric // team_of_masters 9550b57cec5SDimitry Andric } 9560b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 9570b57cec5SDimitry Andric master_th->th.th_teams_level == team->t.t_level) { 9580b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9590b57cec5SDimitry Andric // team_of_workers before the parallel 9600b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 9610b57cec5SDimitry Andric } 9620b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 9630b57cec5SDimitry Andric if (hot_teams[level].hot_team) { 9640b57cec5SDimitry Andric // hot team has already been allocated for given level 9650b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 9660b57cec5SDimitry Andric use_hot_team = 1; // the team is ready to use 9670b57cec5SDimitry Andric } else { 9680b57cec5SDimitry Andric use_hot_team = 0; // AC: threads are not allocated yet 9690b57cec5SDimitry Andric hot_teams[level].hot_team = team; // remember new hot team 9700b57cec5SDimitry Andric hot_teams[level].hot_team_nth = team->t.t_nproc; 9710b57cec5SDimitry Andric } 9720b57cec5SDimitry Andric } else { 9730b57cec5SDimitry Andric use_hot_team = 0; 9740b57cec5SDimitry Andric } 9750b57cec5SDimitry Andric } 9760b57cec5SDimitry Andric #else 9770b57cec5SDimitry Andric use_hot_team = team == root->r.r_hot_team; 9780b57cec5SDimitry Andric #endif 9790b57cec5SDimitry Andric if (!use_hot_team) { 9800b57cec5SDimitry Andric 981fe6060f1SDimitry Andric /* install the primary thread */ 9820b57cec5SDimitry Andric team->t.t_threads[0] = master_th; 9830b57cec5SDimitry Andric __kmp_initialize_info(master_th, team, 0, master_gtid); 9840b57cec5SDimitry Andric 9850b57cec5SDimitry Andric /* now, install the worker threads */ 9860b57cec5SDimitry Andric for (i = 1; i < team->t.t_nproc; i++) { 9870b57cec5SDimitry Andric 9880b57cec5SDimitry Andric /* fork or reallocate a new thread and install it in team */ 9890b57cec5SDimitry Andric kmp_info_t *thr = __kmp_allocate_thread(root, team, i); 9900b57cec5SDimitry Andric team->t.t_threads[i] = thr; 9910b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr); 9920b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_team == team); 9930b57cec5SDimitry Andric /* align team and thread arrived states */ 9940b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 9950b57cec5SDimitry Andric "T#%d(%d:%d) join =%llu, plain=%llu\n", 9960b57cec5SDimitry Andric __kmp_gtid_from_tid(0, team), team->t.t_id, 0, 9970b57cec5SDimitry Andric __kmp_gtid_from_tid(i, team), team->t.t_id, i, 9980b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 9990b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 10000b57cec5SDimitry Andric thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 10010b57cec5SDimitry Andric thr->th.th_teams_level = master_th->th.th_teams_level; 10020b57cec5SDimitry Andric thr->th.th_teams_size = master_th->th.th_teams_size; 10030b57cec5SDimitry Andric { // Initialize threads' barrier data. 10040b57cec5SDimitry Andric int b; 10050b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar; 10060b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 10070b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 10080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 10090b57cec5SDimitry Andric #if USE_DEBUGGER 10100b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 10110b57cec5SDimitry Andric #endif 10120b57cec5SDimitry Andric } 10130b57cec5SDimitry Andric } 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 1017349cc55cSDimitry Andric // Do not partition the places list for teams construct workers who 1018349cc55cSDimitry Andric // haven't actually been forked to do real work yet. This partitioning 1019349cc55cSDimitry Andric // will take place in the parallel region nested within the teams construct. 1020349cc55cSDimitry Andric if (!fork_teams_workers) { 10210b57cec5SDimitry Andric __kmp_partition_places(team); 1022349cc55cSDimitry Andric } 10230b57cec5SDimitry Andric #endif 1024bdd1243dSDimitry Andric 1025bdd1243dSDimitry Andric if (team->t.t_nproc > 1 && 1026bdd1243dSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 1027bdd1243dSDimitry Andric team->t.b->update_num_threads(team->t.t_nproc); 1028bdd1243dSDimitry Andric __kmp_add_threads_to_team(team, team->t.t_nproc); 1029bdd1243dSDimitry Andric } 10300b57cec5SDimitry Andric } 10310b57cec5SDimitry Andric 10320b57cec5SDimitry Andric if (__kmp_display_affinity && team->t.t_display_affinity != 1) { 10330b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; i++) { 10340b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[i]; 10350b57cec5SDimitry Andric if (thr->th.th_prev_num_threads != team->t.t_nproc || 10360b57cec5SDimitry Andric thr->th.th_prev_level != team->t.t_level) { 10370b57cec5SDimitry Andric team->t.t_display_affinity = 1; 10380b57cec5SDimitry Andric break; 10390b57cec5SDimitry Andric } 10400b57cec5SDimitry Andric } 10410b57cec5SDimitry Andric } 10420b57cec5SDimitry Andric 10430b57cec5SDimitry Andric KMP_MB(); 10440b57cec5SDimitry Andric } 10450b57cec5SDimitry Andric 10460b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 10470b57cec5SDimitry Andric // Propagate any changes to the floating point control registers out to the team 10480b57cec5SDimitry Andric // We try to avoid unnecessary writes to the relevant cache line in the team 10490b57cec5SDimitry Andric // structure, so we don't make changes unless they are needed. 10500b57cec5SDimitry Andric inline static void propagateFPControl(kmp_team_t *team) { 10510b57cec5SDimitry Andric if (__kmp_inherit_fp_control) { 10520b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 10530b57cec5SDimitry Andric kmp_uint32 mxcsr; 10540b57cec5SDimitry Andric 1055fe6060f1SDimitry Andric // Get primary thread's values of FPU control flags (both X87 and vector) 10560b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 10570b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 10580b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 10590b57cec5SDimitry Andric 10600b57cec5SDimitry Andric // There is no point looking at t_fp_control_saved here. 10610b57cec5SDimitry Andric // If it is TRUE, we still have to update the values if they are different 10620b57cec5SDimitry Andric // from those we now have. If it is FALSE we didn't save anything yet, but 10630b57cec5SDimitry Andric // our objective is the same. We have to ensure that the values in the team 10640b57cec5SDimitry Andric // are the same as those we have. 10650b57cec5SDimitry Andric // So, this code achieves what we need whether or not t_fp_control_saved is 10660b57cec5SDimitry Andric // true. By checking whether the value needs updating we avoid unnecessary 10670b57cec5SDimitry Andric // writes that would put the cache-line into a written state, causing all 10680b57cec5SDimitry Andric // threads in the team to have to read it again. 10690b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); 10700b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); 10710b57cec5SDimitry Andric // Although we don't use this value, other code in the runtime wants to know 10720b57cec5SDimitry Andric // whether it should restore them. So we must ensure it is correct. 10730b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); 10740b57cec5SDimitry Andric } else { 10750b57cec5SDimitry Andric // Similarly here. Don't write to this cache-line in the team structure 10760b57cec5SDimitry Andric // unless we have to. 10770b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); 10780b57cec5SDimitry Andric } 10790b57cec5SDimitry Andric } 10800b57cec5SDimitry Andric 10810b57cec5SDimitry Andric // Do the opposite, setting the hardware registers to the updated values from 10820b57cec5SDimitry Andric // the team. 10830b57cec5SDimitry Andric inline static void updateHWFPControl(kmp_team_t *team) { 10840b57cec5SDimitry Andric if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) { 10850b57cec5SDimitry Andric // Only reset the fp control regs if they have been changed in the team. 10860b57cec5SDimitry Andric // the parallel region that we are exiting. 10870b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 10880b57cec5SDimitry Andric kmp_uint32 mxcsr; 10890b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 10900b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 10910b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 10920b57cec5SDimitry Andric 10930b57cec5SDimitry Andric if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) { 10940b57cec5SDimitry Andric __kmp_clear_x87_fpu_status_word(); 10950b57cec5SDimitry Andric __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word); 10960b57cec5SDimitry Andric } 10970b57cec5SDimitry Andric 10980b57cec5SDimitry Andric if (team->t.t_mxcsr != mxcsr) { 10990b57cec5SDimitry Andric __kmp_load_mxcsr(&team->t.t_mxcsr); 11000b57cec5SDimitry Andric } 11010b57cec5SDimitry Andric } 11020b57cec5SDimitry Andric } 11030b57cec5SDimitry Andric #else 11040b57cec5SDimitry Andric #define propagateFPControl(x) ((void)0) 11050b57cec5SDimitry Andric #define updateHWFPControl(x) ((void)0) 11060b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, 11090b57cec5SDimitry Andric int realloc); // forward declaration 11100b57cec5SDimitry Andric 11110b57cec5SDimitry Andric /* Run a parallel region that has been serialized, so runs only in a team of the 1112fe6060f1SDimitry Andric single primary thread. */ 11130b57cec5SDimitry Andric void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 11140b57cec5SDimitry Andric kmp_info_t *this_thr; 11150b57cec5SDimitry Andric kmp_team_t *serial_team; 11160b57cec5SDimitry Andric 11170b57cec5SDimitry Andric KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid)); 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric /* Skip all this code for autopar serialized loops since it results in 11200b57cec5SDimitry Andric unacceptable overhead */ 11210b57cec5SDimitry Andric if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 11220b57cec5SDimitry Andric return; 11230b57cec5SDimitry Andric 11240b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 11250b57cec5SDimitry Andric __kmp_parallel_initialize(); 11260b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric this_thr = __kmp_threads[global_tid]; 11290b57cec5SDimitry Andric serial_team = this_thr->th.th_serial_team; 11300b57cec5SDimitry Andric 11310b57cec5SDimitry Andric /* utilize the serialized team held by this thread */ 11320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team); 11330b57cec5SDimitry Andric KMP_MB(); 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 11360b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 11370b57cec5SDimitry Andric this_thr->th.th_task_team == 11380b57cec5SDimitry Andric this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 11390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] == 11400b57cec5SDimitry Andric NULL); 11410b57cec5SDimitry Andric KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / " 11420b57cec5SDimitry Andric "team %p, new task_team = NULL\n", 11430b57cec5SDimitry Andric global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 11440b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 11450b57cec5SDimitry Andric } 11460b57cec5SDimitry Andric 11470b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 11480b57cec5SDimitry Andric if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 11490b57cec5SDimitry Andric proc_bind = proc_bind_false; 11500b57cec5SDimitry Andric } else if (proc_bind == proc_bind_default) { 11510b57cec5SDimitry Andric // No proc_bind clause was specified, so use the current value 11520b57cec5SDimitry Andric // of proc-bind-var for this parallel region. 11530b57cec5SDimitry Andric proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 11540b57cec5SDimitry Andric } 11550b57cec5SDimitry Andric // Reset for next parallel region 11560b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 11570b57cec5SDimitry Andric 115806c3fb27SDimitry Andric // Reset num_threads for next parallel region 115906c3fb27SDimitry Andric this_thr->th.th_set_nproc = 0; 116006c3fb27SDimitry Andric 11610b57cec5SDimitry Andric #if OMPT_SUPPORT 11620b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 11630b57cec5SDimitry Andric void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 11640b57cec5SDimitry Andric if (ompt_enabled.enabled && 11650b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric ompt_task_info_t *parent_task_info; 11680b57cec5SDimitry Andric parent_task_info = OMPT_CUR_TASK_INFO(this_thr); 11690b57cec5SDimitry Andric 11700b57cec5SDimitry Andric parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 11710b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 11720b57cec5SDimitry Andric int team_size = 1; 11730b57cec5SDimitry Andric 11740b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 11750b57cec5SDimitry Andric &(parent_task_info->task_data), &(parent_task_info->frame), 1176489b1cf2SDimitry Andric &ompt_parallel_data, team_size, 1177489b1cf2SDimitry Andric ompt_parallel_invoker_program | ompt_parallel_team, codeptr); 11780b57cec5SDimitry Andric } 11790b57cec5SDimitry Andric } 11800b57cec5SDimitry Andric #endif // OMPT_SUPPORT 11810b57cec5SDimitry Andric 11820b57cec5SDimitry Andric if (this_thr->th.th_team != serial_team) { 11830b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 11840b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 11850b57cec5SDimitry Andric 11860b57cec5SDimitry Andric if (serial_team->t.t_serialized) { 11870b57cec5SDimitry Andric /* this serial team was already used 11880b57cec5SDimitry Andric TODO increase performance by making this locks more specific */ 11890b57cec5SDimitry Andric kmp_team_t *new_team; 11900b57cec5SDimitry Andric 11910b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 11920b57cec5SDimitry Andric 11930b57cec5SDimitry Andric new_team = 11940b57cec5SDimitry Andric __kmp_allocate_team(this_thr->th.th_root, 1, 1, 11950b57cec5SDimitry Andric #if OMPT_SUPPORT 11960b57cec5SDimitry Andric ompt_parallel_data, 11970b57cec5SDimitry Andric #endif 11980b57cec5SDimitry Andric proc_bind, &this_thr->th.th_current_task->td_icvs, 11990b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 12000b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 12010b57cec5SDimitry Andric KMP_ASSERT(new_team); 12020b57cec5SDimitry Andric 12030b57cec5SDimitry Andric /* setup new serialized team and install it */ 12040b57cec5SDimitry Andric new_team->t.t_threads[0] = this_thr; 12050b57cec5SDimitry Andric new_team->t.t_parent = this_thr->th.th_team; 12060b57cec5SDimitry Andric serial_team = new_team; 12070b57cec5SDimitry Andric this_thr->th.th_serial_team = serial_team; 12080b57cec5SDimitry Andric 12090b57cec5SDimitry Andric KF_TRACE( 12100b57cec5SDimitry Andric 10, 12110b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 12120b57cec5SDimitry Andric global_tid, serial_team)); 12130b57cec5SDimitry Andric 12140b57cec5SDimitry Andric /* TODO the above breaks the requirement that if we run out of resources, 12150b57cec5SDimitry Andric then we can still guarantee that serialized teams are ok, since we may 12160b57cec5SDimitry Andric need to allocate a new one */ 12170b57cec5SDimitry Andric } else { 12180b57cec5SDimitry Andric KF_TRACE( 12190b57cec5SDimitry Andric 10, 12200b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 12210b57cec5SDimitry Andric global_tid, serial_team)); 12220b57cec5SDimitry Andric } 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric /* we have to initialize this serial team */ 12250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 12260b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 12270b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team); 12280b57cec5SDimitry Andric serial_team->t.t_ident = loc; 12290b57cec5SDimitry Andric serial_team->t.t_serialized = 1; 12300b57cec5SDimitry Andric serial_team->t.t_nproc = 1; 12310b57cec5SDimitry Andric serial_team->t.t_parent = this_thr->th.th_team; 12320b57cec5SDimitry Andric serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched; 12330b57cec5SDimitry Andric this_thr->th.th_team = serial_team; 12340b57cec5SDimitry Andric serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; 12350b57cec5SDimitry Andric 1236349cc55cSDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid, 12370b57cec5SDimitry Andric this_thr->th.th_current_task)); 12380b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); 12390b57cec5SDimitry Andric this_thr->th.th_current_task->td_flags.executing = 0; 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric __kmp_push_current_task_to_thread(this_thr, serial_team, 0); 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an 12440b57cec5SDimitry Andric implicit task for each serialized task represented by 12450b57cec5SDimitry Andric team->t.t_serialized? */ 12460b57cec5SDimitry Andric copy_icvs(&this_thr->th.th_current_task->td_icvs, 12470b57cec5SDimitry Andric &this_thr->th.th_current_task->td_parent->td_icvs); 12480b57cec5SDimitry Andric 12490b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 12500b57cec5SDimitry Andric // level 12510b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 12520b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 12530b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 12540b57cec5SDimitry Andric } 12550b57cec5SDimitry Andric 12560b57cec5SDimitry Andric if (__kmp_nested_proc_bind.used && 12570b57cec5SDimitry Andric (level + 1 < __kmp_nested_proc_bind.used)) { 12580b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.proc_bind = 12590b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[level + 1]; 12600b57cec5SDimitry Andric } 12610b57cec5SDimitry Andric 12620b57cec5SDimitry Andric #if USE_DEBUGGER 12630b57cec5SDimitry Andric serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger. 12640b57cec5SDimitry Andric #endif 12650b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = 0; 12660b57cec5SDimitry Andric 12670b57cec5SDimitry Andric /* set thread cache values */ 12680b57cec5SDimitry Andric this_thr->th.th_team_nproc = 1; 12690b57cec5SDimitry Andric this_thr->th.th_team_master = this_thr; 12700b57cec5SDimitry Andric this_thr->th.th_team_serialized = 1; 12710b57cec5SDimitry Andric 12720b57cec5SDimitry Andric serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; 12730b57cec5SDimitry Andric serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 12740b57cec5SDimitry Andric serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric propagateFPControl(serial_team); 12770b57cec5SDimitry Andric 12780b57cec5SDimitry Andric /* check if we need to allocate dispatch buffers stack */ 12790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 12800b57cec5SDimitry Andric if (!serial_team->t.t_dispatch->th_disp_buffer) { 12810b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = 12820b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 12830b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 12840b57cec5SDimitry Andric } 12850b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 12860b57cec5SDimitry Andric 12870b57cec5SDimitry Andric KMP_MB(); 12880b57cec5SDimitry Andric 12890b57cec5SDimitry Andric } else { 12900b57cec5SDimitry Andric /* this serialized team is already being used, 12910b57cec5SDimitry Andric * that's fine, just add another nested level */ 12920b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 12930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 12940b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 12950b57cec5SDimitry Andric ++serial_team->t.t_serialized; 12960b57cec5SDimitry Andric this_thr->th.th_team_serialized = serial_team->t.t_serialized; 12970b57cec5SDimitry Andric 12980b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 12990b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 13000b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 13010b57cec5SDimitry Andric // level 13020b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 13030b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 13040b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 13050b57cec5SDimitry Andric } 13060b57cec5SDimitry Andric serial_team->t.t_level++; 13070b57cec5SDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " 13080b57cec5SDimitry Andric "of serial team %p to %d\n", 13090b57cec5SDimitry Andric global_tid, serial_team, serial_team->t.t_level)); 13100b57cec5SDimitry Andric 13110b57cec5SDimitry Andric /* allocate/push dispatch buffers stack */ 13120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 13130b57cec5SDimitry Andric { 13140b57cec5SDimitry Andric dispatch_private_info_t *disp_buffer = 13150b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 13160b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 13170b57cec5SDimitry Andric disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 13180b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 13190b57cec5SDimitry Andric } 13200b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 13210b57cec5SDimitry Andric 13220b57cec5SDimitry Andric KMP_MB(); 13230b57cec5SDimitry Andric } 13240b57cec5SDimitry Andric KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); 13250b57cec5SDimitry Andric 13260b57cec5SDimitry Andric // Perform the display affinity functionality for 13270b57cec5SDimitry Andric // serialized parallel regions 13280b57cec5SDimitry Andric if (__kmp_display_affinity) { 13290b57cec5SDimitry Andric if (this_thr->th.th_prev_level != serial_team->t.t_level || 13300b57cec5SDimitry Andric this_thr->th.th_prev_num_threads != 1) { 13310b57cec5SDimitry Andric // NULL means use the affinity-format-var ICV 13320b57cec5SDimitry Andric __kmp_aux_display_affinity(global_tid, NULL); 13330b57cec5SDimitry Andric this_thr->th.th_prev_level = serial_team->t.t_level; 13340b57cec5SDimitry Andric this_thr->th.th_prev_num_threads = 1; 13350b57cec5SDimitry Andric } 13360b57cec5SDimitry Andric } 13370b57cec5SDimitry Andric 13380b57cec5SDimitry Andric if (__kmp_env_consistency_check) 13390b57cec5SDimitry Andric __kmp_push_parallel(global_tid, NULL); 13400b57cec5SDimitry Andric #if OMPT_SUPPORT 13410b57cec5SDimitry Andric serial_team->t.ompt_team_info.master_return_address = codeptr; 13420b57cec5SDimitry Andric if (ompt_enabled.enabled && 13430b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 1344fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = 1345fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 13460b57cec5SDimitry Andric 13470b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 13480b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, 13490b57cec5SDimitry Andric &ompt_parallel_data, codeptr); 13500b57cec5SDimitry Andric 13510b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1); 13520b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 13530b57cec5SDimitry Andric 13540b57cec5SDimitry Andric /* OMPT implicit task begin */ 13550b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 13560b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 13570b57cec5SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), 1358fe6060f1SDimitry Andric OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), 1359fe6060f1SDimitry Andric ompt_task_implicit); // TODO: Can this be ompt_task_initial? 1360fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = 1361fe6060f1SDimitry Andric __kmp_tid_from_gtid(global_tid); 13620b57cec5SDimitry Andric } 13630b57cec5SDimitry Andric 13640b57cec5SDimitry Andric /* OMPT state */ 13650b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 1366fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = 1367fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 13680b57cec5SDimitry Andric } 13690b57cec5SDimitry Andric #endif 13700b57cec5SDimitry Andric } 13710b57cec5SDimitry Andric 1372bdd1243dSDimitry Andric // Test if this fork is for a team closely nested in a teams construct 1373bdd1243dSDimitry Andric static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th, 1374bdd1243dSDimitry Andric microtask_t microtask, int level, 1375bdd1243dSDimitry Andric int teams_level, kmp_va_list ap) { 1376bdd1243dSDimitry Andric return (master_th->th.th_teams_microtask && ap && 1377bdd1243dSDimitry Andric microtask != (microtask_t)__kmp_teams_master && level == teams_level); 1378bdd1243dSDimitry Andric } 1379bdd1243dSDimitry Andric 1380bdd1243dSDimitry Andric // Test if this fork is for the teams construct, i.e. to form the outer league 1381bdd1243dSDimitry Andric // of teams 1382bdd1243dSDimitry Andric static inline bool __kmp_is_entering_teams(int active_level, int level, 1383bdd1243dSDimitry Andric int teams_level, kmp_va_list ap) { 1384bdd1243dSDimitry Andric return ((ap == NULL && active_level == 0) || 1385bdd1243dSDimitry Andric (ap && teams_level > 0 && teams_level == level)); 1386bdd1243dSDimitry Andric } 1387bdd1243dSDimitry Andric 1388bdd1243dSDimitry Andric // AC: This is start of parallel that is nested inside teams construct. 1389bdd1243dSDimitry Andric // The team is actual (hot), all workers are ready at the fork barrier. 1390bdd1243dSDimitry Andric // No lock needed to initialize the team a bit, then free workers. 1391bdd1243dSDimitry Andric static inline int 1392bdd1243dSDimitry Andric __kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team, 1393bdd1243dSDimitry Andric kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root, 1394bdd1243dSDimitry Andric enum fork_context_e call_context, microtask_t microtask, 1395bdd1243dSDimitry Andric launch_t invoker, int master_set_numthreads, int level, 1396bdd1243dSDimitry Andric #if OMPT_SUPPORT 1397bdd1243dSDimitry Andric ompt_data_t ompt_parallel_data, void *return_address, 1398bdd1243dSDimitry Andric #endif 1399bdd1243dSDimitry Andric kmp_va_list ap) { 1400bdd1243dSDimitry Andric void **argv; 1401bdd1243dSDimitry Andric int i; 1402bdd1243dSDimitry Andric 1403bdd1243dSDimitry Andric parent_team->t.t_ident = loc; 1404bdd1243dSDimitry Andric __kmp_alloc_argv_entries(argc, parent_team, TRUE); 1405bdd1243dSDimitry Andric parent_team->t.t_argc = argc; 1406bdd1243dSDimitry Andric argv = (void **)parent_team->t.t_argv; 1407bdd1243dSDimitry Andric for (i = argc - 1; i >= 0; --i) { 1408bdd1243dSDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 1409bdd1243dSDimitry Andric } 1410bdd1243dSDimitry Andric // Increment our nested depth levels, but not increase the serialization 1411bdd1243dSDimitry Andric if (parent_team == master_th->th.th_serial_team) { 1412bdd1243dSDimitry Andric // AC: we are in serialized parallel 1413bdd1243dSDimitry Andric __kmpc_serialized_parallel(loc, gtid); 1414bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); 1415bdd1243dSDimitry Andric 1416bdd1243dSDimitry Andric if (call_context == fork_context_gnu) { 1417bdd1243dSDimitry Andric // AC: need to decrement t_serialized for enquiry functions to work 1418bdd1243dSDimitry Andric // correctly, will restore at join time 1419bdd1243dSDimitry Andric parent_team->t.t_serialized--; 1420bdd1243dSDimitry Andric return TRUE; 1421bdd1243dSDimitry Andric } 1422bdd1243dSDimitry Andric 1423bdd1243dSDimitry Andric #if OMPD_SUPPORT 1424bdd1243dSDimitry Andric parent_team->t.t_pkfn = microtask; 1425bdd1243dSDimitry Andric #endif 1426bdd1243dSDimitry Andric 1427bdd1243dSDimitry Andric #if OMPT_SUPPORT 1428bdd1243dSDimitry Andric void *dummy; 1429bdd1243dSDimitry Andric void **exit_frame_p; 1430bdd1243dSDimitry Andric ompt_data_t *implicit_task_data; 1431bdd1243dSDimitry Andric ompt_lw_taskteam_t lw_taskteam; 1432bdd1243dSDimitry Andric 1433bdd1243dSDimitry Andric if (ompt_enabled.enabled) { 1434bdd1243dSDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1435bdd1243dSDimitry Andric &ompt_parallel_data, return_address); 1436bdd1243dSDimitry Andric exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); 1437bdd1243dSDimitry Andric 1438bdd1243dSDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 1439bdd1243dSDimitry Andric // Don't use lw_taskteam after linking. Content was swapped. 1440bdd1243dSDimitry Andric 1441bdd1243dSDimitry Andric /* OMPT implicit task begin */ 1442bdd1243dSDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 1443bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1444bdd1243dSDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid); 1445bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1446bdd1243dSDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data, 1447bdd1243dSDimitry Andric 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 1448bdd1243dSDimitry Andric } 1449bdd1243dSDimitry Andric 1450bdd1243dSDimitry Andric /* OMPT state */ 1451bdd1243dSDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1452bdd1243dSDimitry Andric } else { 1453bdd1243dSDimitry Andric exit_frame_p = &dummy; 1454bdd1243dSDimitry Andric } 1455bdd1243dSDimitry Andric #endif 1456bdd1243dSDimitry Andric 1457bdd1243dSDimitry Andric // AC: need to decrement t_serialized for enquiry functions to work 1458bdd1243dSDimitry Andric // correctly, will restore at join time 1459bdd1243dSDimitry Andric parent_team->t.t_serialized--; 1460bdd1243dSDimitry Andric 1461bdd1243dSDimitry Andric { 1462bdd1243dSDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1463bdd1243dSDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1464bdd1243dSDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv 1465bdd1243dSDimitry Andric #if OMPT_SUPPORT 1466bdd1243dSDimitry Andric , 1467bdd1243dSDimitry Andric exit_frame_p 1468bdd1243dSDimitry Andric #endif 1469bdd1243dSDimitry Andric ); 1470bdd1243dSDimitry Andric } 1471bdd1243dSDimitry Andric 1472bdd1243dSDimitry Andric #if OMPT_SUPPORT 1473bdd1243dSDimitry Andric if (ompt_enabled.enabled) { 1474bdd1243dSDimitry Andric *exit_frame_p = NULL; 1475bdd1243dSDimitry Andric OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; 1476bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1477bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1478bdd1243dSDimitry Andric ompt_scope_end, NULL, implicit_task_data, 1, 1479bdd1243dSDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 1480bdd1243dSDimitry Andric } 1481bdd1243dSDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 1482bdd1243dSDimitry Andric __ompt_lw_taskteam_unlink(master_th); 1483bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 1484bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1485bdd1243dSDimitry Andric &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th), 1486bdd1243dSDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, return_address); 1487bdd1243dSDimitry Andric } 1488bdd1243dSDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 1489bdd1243dSDimitry Andric } 1490bdd1243dSDimitry Andric #endif 1491bdd1243dSDimitry Andric return TRUE; 1492bdd1243dSDimitry Andric } 1493bdd1243dSDimitry Andric 1494bdd1243dSDimitry Andric parent_team->t.t_pkfn = microtask; 1495bdd1243dSDimitry Andric parent_team->t.t_invoke = invoker; 1496bdd1243dSDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 1497bdd1243dSDimitry Andric parent_team->t.t_active_level++; 1498bdd1243dSDimitry Andric parent_team->t.t_level++; 1499bdd1243dSDimitry Andric parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save 1500bdd1243dSDimitry Andric 1501bdd1243dSDimitry Andric // If the threads allocated to the team are less than the thread limit, update 1502bdd1243dSDimitry Andric // the thread limit here. th_teams_size.nth is specific to this team nested 1503bdd1243dSDimitry Andric // in a teams construct, the team is fully created, and we're about to do 1504bdd1243dSDimitry Andric // the actual fork. Best to do this here so that the subsequent uses below 1505bdd1243dSDimitry Andric // and in the join have the correct value. 1506bdd1243dSDimitry Andric master_th->th.th_teams_size.nth = parent_team->t.t_nproc; 1507bdd1243dSDimitry Andric 1508bdd1243dSDimitry Andric #if OMPT_SUPPORT 1509bdd1243dSDimitry Andric if (ompt_enabled.enabled) { 1510bdd1243dSDimitry Andric ompt_lw_taskteam_t lw_taskteam; 1511bdd1243dSDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data, 1512bdd1243dSDimitry Andric return_address); 1513bdd1243dSDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true); 1514bdd1243dSDimitry Andric } 1515bdd1243dSDimitry Andric #endif 1516bdd1243dSDimitry Andric 1517bdd1243dSDimitry Andric /* Change number of threads in the team if requested */ 1518bdd1243dSDimitry Andric if (master_set_numthreads) { // The parallel has num_threads clause 1519bdd1243dSDimitry Andric if (master_set_numthreads <= master_th->th.th_teams_size.nth) { 1520bdd1243dSDimitry Andric // AC: only can reduce number of threads dynamically, can't increase 1521bdd1243dSDimitry Andric kmp_info_t **other_threads = parent_team->t.t_threads; 1522bdd1243dSDimitry Andric // NOTE: if using distributed barrier, we need to run this code block 1523bdd1243dSDimitry Andric // even when the team size appears not to have changed from the max. 1524bdd1243dSDimitry Andric int old_proc = master_th->th.th_teams_size.nth; 1525bdd1243dSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 1526bdd1243dSDimitry Andric __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads); 1527bdd1243dSDimitry Andric __kmp_add_threads_to_team(parent_team, master_set_numthreads); 1528bdd1243dSDimitry Andric } 1529bdd1243dSDimitry Andric parent_team->t.t_nproc = master_set_numthreads; 1530bdd1243dSDimitry Andric for (i = 0; i < master_set_numthreads; ++i) { 1531bdd1243dSDimitry Andric other_threads[i]->th.th_team_nproc = master_set_numthreads; 1532bdd1243dSDimitry Andric } 1533bdd1243dSDimitry Andric } 1534bdd1243dSDimitry Andric // Keep extra threads hot in the team for possible next parallels 1535bdd1243dSDimitry Andric master_th->th.th_set_nproc = 0; 1536bdd1243dSDimitry Andric } 1537bdd1243dSDimitry Andric 1538bdd1243dSDimitry Andric #if USE_DEBUGGER 1539bdd1243dSDimitry Andric if (__kmp_debugging) { // Let debugger override number of threads. 1540bdd1243dSDimitry Andric int nth = __kmp_omp_num_threads(loc); 1541bdd1243dSDimitry Andric if (nth > 0) { // 0 means debugger doesn't want to change num threads 1542bdd1243dSDimitry Andric master_set_numthreads = nth; 1543bdd1243dSDimitry Andric } 1544bdd1243dSDimitry Andric } 1545bdd1243dSDimitry Andric #endif 1546bdd1243dSDimitry Andric 1547bdd1243dSDimitry Andric // Figure out the proc_bind policy for the nested parallel within teams 1548bdd1243dSDimitry Andric kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 1549bdd1243dSDimitry Andric // proc_bind_default means don't update 1550bdd1243dSDimitry Andric kmp_proc_bind_t proc_bind_icv = proc_bind_default; 1551bdd1243dSDimitry Andric if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 1552bdd1243dSDimitry Andric proc_bind = proc_bind_false; 1553bdd1243dSDimitry Andric } else { 1554bdd1243dSDimitry Andric // No proc_bind clause specified; use current proc-bind-var 1555bdd1243dSDimitry Andric if (proc_bind == proc_bind_default) { 1556bdd1243dSDimitry Andric proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 1557bdd1243dSDimitry Andric } 1558bdd1243dSDimitry Andric /* else: The proc_bind policy was specified explicitly on parallel clause. 1559bdd1243dSDimitry Andric This overrides proc-bind-var for this parallel region, but does not 1560bdd1243dSDimitry Andric change proc-bind-var. */ 1561bdd1243dSDimitry Andric // Figure the value of proc-bind-var for the child threads. 1562bdd1243dSDimitry Andric if ((level + 1 < __kmp_nested_proc_bind.used) && 1563bdd1243dSDimitry Andric (__kmp_nested_proc_bind.bind_types[level + 1] != 1564bdd1243dSDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind)) { 1565bdd1243dSDimitry Andric proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 1566bdd1243dSDimitry Andric } 1567bdd1243dSDimitry Andric } 1568bdd1243dSDimitry Andric KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind); 1569bdd1243dSDimitry Andric // Need to change the bind-var ICV to correct value for each implicit task 1570bdd1243dSDimitry Andric if (proc_bind_icv != proc_bind_default && 1571bdd1243dSDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) { 1572bdd1243dSDimitry Andric kmp_info_t **other_threads = parent_team->t.t_threads; 1573bdd1243dSDimitry Andric for (i = 0; i < master_th->th.th_team_nproc; ++i) { 1574bdd1243dSDimitry Andric other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv; 1575bdd1243dSDimitry Andric } 1576bdd1243dSDimitry Andric } 1577bdd1243dSDimitry Andric // Reset for next parallel region 1578bdd1243dSDimitry Andric master_th->th.th_set_proc_bind = proc_bind_default; 1579bdd1243dSDimitry Andric 1580bdd1243dSDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY 1581bdd1243dSDimitry Andric if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) || 1582bdd1243dSDimitry Andric KMP_ITT_DEBUG) && 1583bdd1243dSDimitry Andric __kmp_forkjoin_frames_mode == 3 && 1584bdd1243dSDimitry Andric parent_team->t.t_active_level == 1 // only report frames at level 1 1585bdd1243dSDimitry Andric && master_th->th.th_teams_size.nteams == 1) { 1586bdd1243dSDimitry Andric kmp_uint64 tmp_time = __itt_get_timestamp(); 1587bdd1243dSDimitry Andric master_th->th.th_frame_time = tmp_time; 1588bdd1243dSDimitry Andric parent_team->t.t_region_time = tmp_time; 1589bdd1243dSDimitry Andric } 1590bdd1243dSDimitry Andric if (__itt_stack_caller_create_ptr) { 1591bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL); 1592bdd1243dSDimitry Andric // create new stack stitching id before entering fork barrier 1593bdd1243dSDimitry Andric parent_team->t.t_stack_id = __kmp_itt_stack_caller_create(); 1594bdd1243dSDimitry Andric } 1595bdd1243dSDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 1596bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED 1597bdd1243dSDimitry Andric __kmp_partition_places(parent_team); 1598bdd1243dSDimitry Andric #endif 1599bdd1243dSDimitry Andric 1600bdd1243dSDimitry Andric KF_TRACE(10, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, " 1601bdd1243dSDimitry Andric "master_th=%p, gtid=%d\n", 1602bdd1243dSDimitry Andric root, parent_team, master_th, gtid)); 1603bdd1243dSDimitry Andric __kmp_internal_fork(loc, gtid, parent_team); 1604bdd1243dSDimitry Andric KF_TRACE(10, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, " 1605bdd1243dSDimitry Andric "master_th=%p, gtid=%d\n", 1606bdd1243dSDimitry Andric root, parent_team, master_th, gtid)); 1607bdd1243dSDimitry Andric 1608bdd1243dSDimitry Andric if (call_context == fork_context_gnu) 1609bdd1243dSDimitry Andric return TRUE; 1610bdd1243dSDimitry Andric 1611bdd1243dSDimitry Andric /* Invoke microtask for PRIMARY thread */ 1612bdd1243dSDimitry Andric KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid, 1613bdd1243dSDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 1614bdd1243dSDimitry Andric 1615bdd1243dSDimitry Andric if (!parent_team->t.t_invoke(gtid)) { 1616bdd1243dSDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread"); 1617bdd1243dSDimitry Andric } 1618bdd1243dSDimitry Andric KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid, 1619bdd1243dSDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 1620bdd1243dSDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 1621bdd1243dSDimitry Andric 1622bdd1243dSDimitry Andric KA_TRACE(20, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid)); 1623bdd1243dSDimitry Andric 1624bdd1243dSDimitry Andric return TRUE; 1625bdd1243dSDimitry Andric } 1626bdd1243dSDimitry Andric 1627bdd1243dSDimitry Andric // Create a serialized parallel region 1628bdd1243dSDimitry Andric static inline int 1629bdd1243dSDimitry Andric __kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, 1630bdd1243dSDimitry Andric kmp_int32 argc, microtask_t microtask, launch_t invoker, 1631bdd1243dSDimitry Andric kmp_info_t *master_th, kmp_team_t *parent_team, 1632bdd1243dSDimitry Andric #if OMPT_SUPPORT 1633bdd1243dSDimitry Andric ompt_data_t *ompt_parallel_data, void **return_address, 1634bdd1243dSDimitry Andric ompt_data_t **parent_task_data, 1635bdd1243dSDimitry Andric #endif 1636bdd1243dSDimitry Andric kmp_va_list ap) { 1637bdd1243dSDimitry Andric kmp_team_t *team; 1638bdd1243dSDimitry Andric int i; 1639bdd1243dSDimitry Andric void **argv; 1640bdd1243dSDimitry Andric 1641bdd1243dSDimitry Andric /* josh todo: hypothetical question: what do we do for OS X*? */ 1642bdd1243dSDimitry Andric #if KMP_OS_LINUX && \ 1643bdd1243dSDimitry Andric (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1644bdd1243dSDimitry Andric void *args[argc]; 1645bdd1243dSDimitry Andric #else 1646bdd1243dSDimitry Andric void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); 1647bdd1243dSDimitry Andric #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ 1648bdd1243dSDimitry Andric KMP_ARCH_AARCH64) */ 1649bdd1243dSDimitry Andric 1650bdd1243dSDimitry Andric KA_TRACE( 1651bdd1243dSDimitry Andric 20, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid)); 1652bdd1243dSDimitry Andric 1653bdd1243dSDimitry Andric __kmpc_serialized_parallel(loc, gtid); 1654bdd1243dSDimitry Andric 1655bdd1243dSDimitry Andric #if OMPD_SUPPORT 1656bdd1243dSDimitry Andric master_th->th.th_serial_team->t.t_pkfn = microtask; 1657bdd1243dSDimitry Andric #endif 1658bdd1243dSDimitry Andric 1659bdd1243dSDimitry Andric if (call_context == fork_context_intel) { 1660bdd1243dSDimitry Andric /* TODO this sucks, use the compiler itself to pass args! :) */ 1661bdd1243dSDimitry Andric master_th->th.th_serial_team->t.t_ident = loc; 1662bdd1243dSDimitry Andric if (!ap) { 1663bdd1243dSDimitry Andric // revert change made in __kmpc_serialized_parallel() 1664bdd1243dSDimitry Andric master_th->th.th_serial_team->t.t_level--; 1665bdd1243dSDimitry Andric // Get args from parent team for teams construct 1666bdd1243dSDimitry Andric 1667bdd1243dSDimitry Andric #if OMPT_SUPPORT 1668bdd1243dSDimitry Andric void *dummy; 1669bdd1243dSDimitry Andric void **exit_frame_p; 1670bdd1243dSDimitry Andric ompt_task_info_t *task_info; 1671bdd1243dSDimitry Andric ompt_lw_taskteam_t lw_taskteam; 1672bdd1243dSDimitry Andric 1673bdd1243dSDimitry Andric if (ompt_enabled.enabled) { 1674bdd1243dSDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1675bdd1243dSDimitry Andric ompt_parallel_data, *return_address); 1676bdd1243dSDimitry Andric 1677bdd1243dSDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 1678bdd1243dSDimitry Andric // don't use lw_taskteam after linking. content was swaped 1679bdd1243dSDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1680bdd1243dSDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 1681bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1682bdd1243dSDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid); 1683bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1684bdd1243dSDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1685bdd1243dSDimitry Andric &(task_info->task_data), 1, 1686bdd1243dSDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 1687bdd1243dSDimitry Andric } 1688bdd1243dSDimitry Andric 1689bdd1243dSDimitry Andric /* OMPT state */ 1690bdd1243dSDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1691bdd1243dSDimitry Andric } else { 1692bdd1243dSDimitry Andric exit_frame_p = &dummy; 1693bdd1243dSDimitry Andric } 1694bdd1243dSDimitry Andric #endif 1695bdd1243dSDimitry Andric 1696bdd1243dSDimitry Andric { 1697bdd1243dSDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1698bdd1243dSDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1699bdd1243dSDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv 1700bdd1243dSDimitry Andric #if OMPT_SUPPORT 1701bdd1243dSDimitry Andric , 1702bdd1243dSDimitry Andric exit_frame_p 1703bdd1243dSDimitry Andric #endif 1704bdd1243dSDimitry Andric ); 1705bdd1243dSDimitry Andric } 1706bdd1243dSDimitry Andric 1707bdd1243dSDimitry Andric #if OMPT_SUPPORT 1708bdd1243dSDimitry Andric if (ompt_enabled.enabled) { 1709bdd1243dSDimitry Andric *exit_frame_p = NULL; 1710bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1711bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1712bdd1243dSDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1713bdd1243dSDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 1714bdd1243dSDimitry Andric } 1715bdd1243dSDimitry Andric *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 1716bdd1243dSDimitry Andric __ompt_lw_taskteam_unlink(master_th); 1717bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 1718bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1719bdd1243dSDimitry Andric ompt_parallel_data, *parent_task_data, 1720bdd1243dSDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address); 1721bdd1243dSDimitry Andric } 1722bdd1243dSDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 1723bdd1243dSDimitry Andric } 1724bdd1243dSDimitry Andric #endif 1725bdd1243dSDimitry Andric } else if (microtask == (microtask_t)__kmp_teams_master) { 1726bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team); 1727bdd1243dSDimitry Andric team = master_th->th.th_team; 1728bdd1243dSDimitry Andric // team->t.t_pkfn = microtask; 1729bdd1243dSDimitry Andric team->t.t_invoke = invoker; 1730bdd1243dSDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 1731bdd1243dSDimitry Andric team->t.t_argc = argc; 1732bdd1243dSDimitry Andric argv = (void **)team->t.t_argv; 1733bdd1243dSDimitry Andric if (ap) { 1734bdd1243dSDimitry Andric for (i = argc - 1; i >= 0; --i) 1735bdd1243dSDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 1736bdd1243dSDimitry Andric } else { 1737bdd1243dSDimitry Andric for (i = 0; i < argc; ++i) 1738bdd1243dSDimitry Andric // Get args from parent team for teams construct 1739bdd1243dSDimitry Andric argv[i] = parent_team->t.t_argv[i]; 1740bdd1243dSDimitry Andric } 1741bdd1243dSDimitry Andric // AC: revert change made in __kmpc_serialized_parallel() 1742bdd1243dSDimitry Andric // because initial code in teams should have level=0 1743bdd1243dSDimitry Andric team->t.t_level--; 1744bdd1243dSDimitry Andric // AC: call special invoker for outer "parallel" of teams construct 1745bdd1243dSDimitry Andric invoker(gtid); 1746bdd1243dSDimitry Andric #if OMPT_SUPPORT 1747bdd1243dSDimitry Andric if (ompt_enabled.enabled) { 1748bdd1243dSDimitry Andric ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th); 1749bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1750bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1751bdd1243dSDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 0, 1752bdd1243dSDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial); 1753bdd1243dSDimitry Andric } 1754bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 1755bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1756bdd1243dSDimitry Andric ompt_parallel_data, *parent_task_data, 1757bdd1243dSDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_league, 1758bdd1243dSDimitry Andric *return_address); 1759bdd1243dSDimitry Andric } 1760bdd1243dSDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 1761bdd1243dSDimitry Andric } 1762bdd1243dSDimitry Andric #endif 1763bdd1243dSDimitry Andric } else { 1764bdd1243dSDimitry Andric argv = args; 1765bdd1243dSDimitry Andric for (i = argc - 1; i >= 0; --i) 1766bdd1243dSDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 1767bdd1243dSDimitry Andric KMP_MB(); 1768bdd1243dSDimitry Andric 1769bdd1243dSDimitry Andric #if OMPT_SUPPORT 1770bdd1243dSDimitry Andric void *dummy; 1771bdd1243dSDimitry Andric void **exit_frame_p; 1772bdd1243dSDimitry Andric ompt_task_info_t *task_info; 1773bdd1243dSDimitry Andric ompt_lw_taskteam_t lw_taskteam; 1774bdd1243dSDimitry Andric ompt_data_t *implicit_task_data; 1775bdd1243dSDimitry Andric 1776bdd1243dSDimitry Andric if (ompt_enabled.enabled) { 1777bdd1243dSDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1778bdd1243dSDimitry Andric ompt_parallel_data, *return_address); 1779bdd1243dSDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 1780bdd1243dSDimitry Andric // don't use lw_taskteam after linking. content was swaped 1781bdd1243dSDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1782bdd1243dSDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 1783bdd1243dSDimitry Andric 1784bdd1243dSDimitry Andric /* OMPT implicit task begin */ 1785bdd1243dSDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 1786bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1787bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1788bdd1243dSDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1789bdd1243dSDimitry Andric implicit_task_data, 1, __kmp_tid_from_gtid(gtid), 1790bdd1243dSDimitry Andric ompt_task_implicit); 1791bdd1243dSDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid); 1792bdd1243dSDimitry Andric } 1793bdd1243dSDimitry Andric 1794bdd1243dSDimitry Andric /* OMPT state */ 1795bdd1243dSDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1796bdd1243dSDimitry Andric } else { 1797bdd1243dSDimitry Andric exit_frame_p = &dummy; 1798bdd1243dSDimitry Andric } 1799bdd1243dSDimitry Andric #endif 1800bdd1243dSDimitry Andric 1801bdd1243dSDimitry Andric { 1802bdd1243dSDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1803bdd1243dSDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1804bdd1243dSDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, args 1805bdd1243dSDimitry Andric #if OMPT_SUPPORT 1806bdd1243dSDimitry Andric , 1807bdd1243dSDimitry Andric exit_frame_p 1808bdd1243dSDimitry Andric #endif 1809bdd1243dSDimitry Andric ); 1810bdd1243dSDimitry Andric } 1811bdd1243dSDimitry Andric 1812bdd1243dSDimitry Andric #if OMPT_SUPPORT 1813bdd1243dSDimitry Andric if (ompt_enabled.enabled) { 1814bdd1243dSDimitry Andric *exit_frame_p = NULL; 1815bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1816bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1817bdd1243dSDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1818bdd1243dSDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 1819bdd1243dSDimitry Andric } 1820bdd1243dSDimitry Andric 1821bdd1243dSDimitry Andric *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 1822bdd1243dSDimitry Andric __ompt_lw_taskteam_unlink(master_th); 1823bdd1243dSDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 1824bdd1243dSDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1825bdd1243dSDimitry Andric ompt_parallel_data, *parent_task_data, 1826bdd1243dSDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address); 1827bdd1243dSDimitry Andric } 1828bdd1243dSDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 1829bdd1243dSDimitry Andric } 1830bdd1243dSDimitry Andric #endif 1831bdd1243dSDimitry Andric } 1832bdd1243dSDimitry Andric } else if (call_context == fork_context_gnu) { 1833bdd1243dSDimitry Andric #if OMPT_SUPPORT 1834bdd1243dSDimitry Andric if (ompt_enabled.enabled) { 1835bdd1243dSDimitry Andric ompt_lw_taskteam_t lwt; 1836bdd1243dSDimitry Andric __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data, 1837bdd1243dSDimitry Andric *return_address); 1838bdd1243dSDimitry Andric 1839bdd1243dSDimitry Andric lwt.ompt_task_info.frame.exit_frame = ompt_data_none; 1840bdd1243dSDimitry Andric __ompt_lw_taskteam_link(&lwt, master_th, 1); 1841bdd1243dSDimitry Andric } 1842bdd1243dSDimitry Andric // don't use lw_taskteam after linking. content was swaped 1843bdd1243dSDimitry Andric #endif 1844bdd1243dSDimitry Andric 1845bdd1243dSDimitry Andric // we were called from GNU native code 1846bdd1243dSDimitry Andric KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid)); 1847bdd1243dSDimitry Andric return FALSE; 1848bdd1243dSDimitry Andric } else { 1849bdd1243dSDimitry Andric KMP_ASSERT2(call_context < fork_context_last, 1850bdd1243dSDimitry Andric "__kmp_serial_fork_call: unknown fork_context parameter"); 1851bdd1243dSDimitry Andric } 1852bdd1243dSDimitry Andric 1853bdd1243dSDimitry Andric KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid)); 1854bdd1243dSDimitry Andric KMP_MB(); 1855bdd1243dSDimitry Andric return FALSE; 1856bdd1243dSDimitry Andric } 1857bdd1243dSDimitry Andric 18580b57cec5SDimitry Andric /* most of the work for a fork */ 18590b57cec5SDimitry Andric /* return true if we really went parallel, false if serialized */ 18600b57cec5SDimitry Andric int __kmp_fork_call(ident_t *loc, int gtid, 18610b57cec5SDimitry Andric enum fork_context_e call_context, // Intel, GNU, ... 18620b57cec5SDimitry Andric kmp_int32 argc, microtask_t microtask, launch_t invoker, 186316794618SDimitry Andric kmp_va_list ap) { 18640b57cec5SDimitry Andric void **argv; 18650b57cec5SDimitry Andric int i; 18660b57cec5SDimitry Andric int master_tid; 18670b57cec5SDimitry Andric int master_this_cons; 18680b57cec5SDimitry Andric kmp_team_t *team; 18690b57cec5SDimitry Andric kmp_team_t *parent_team; 18700b57cec5SDimitry Andric kmp_info_t *master_th; 18710b57cec5SDimitry Andric kmp_root_t *root; 18720b57cec5SDimitry Andric int nthreads; 18730b57cec5SDimitry Andric int master_active; 18740b57cec5SDimitry Andric int master_set_numthreads; 18750b57cec5SDimitry Andric int level; 18760b57cec5SDimitry Andric int active_level; 18770b57cec5SDimitry Andric int teams_level; 18780b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 18790b57cec5SDimitry Andric kmp_hot_team_ptr_t **p_hot_teams; 18800b57cec5SDimitry Andric #endif 18810b57cec5SDimitry Andric { // KMP_TIME_BLOCK 18820b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); 18830b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 18840b57cec5SDimitry Andric 18850b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid)); 18860b57cec5SDimitry Andric if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) { 18870b57cec5SDimitry Andric /* Some systems prefer the stack for the root thread(s) to start with */ 18880b57cec5SDimitry Andric /* some gap from the parent stack to prevent false sharing. */ 18890b57cec5SDimitry Andric void *dummy = KMP_ALLOCA(__kmp_stkpadding); 18900b57cec5SDimitry Andric /* These 2 lines below are so this does not get optimized out */ 18910b57cec5SDimitry Andric if (__kmp_stkpadding > KMP_MAX_STKPADDING) 18920b57cec5SDimitry Andric __kmp_stkpadding += (short)((kmp_int64)dummy); 18930b57cec5SDimitry Andric } 18940b57cec5SDimitry Andric 18950b57cec5SDimitry Andric /* initialize if needed */ 18960b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 18970b57cec5SDimitry Andric __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown 18980b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 18990b57cec5SDimitry Andric __kmp_parallel_initialize(); 19000b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 19010b57cec5SDimitry Andric 19020b57cec5SDimitry Andric /* setup current data */ 1903bdd1243dSDimitry Andric // AC: potentially unsafe, not in sync with library shutdown, 1904bdd1243dSDimitry Andric // __kmp_threads can be freed 1905bdd1243dSDimitry Andric master_th = __kmp_threads[gtid]; 1906bdd1243dSDimitry Andric 19070b57cec5SDimitry Andric parent_team = master_th->th.th_team; 19080b57cec5SDimitry Andric master_tid = master_th->th.th_info.ds.ds_tid; 19090b57cec5SDimitry Andric master_this_cons = master_th->th.th_local.this_construct; 19100b57cec5SDimitry Andric root = master_th->th.th_root; 19110b57cec5SDimitry Andric master_active = root->r.r_active; 19120b57cec5SDimitry Andric master_set_numthreads = master_th->th.th_set_nproc; 19130b57cec5SDimitry Andric 19140b57cec5SDimitry Andric #if OMPT_SUPPORT 19150b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 19160b57cec5SDimitry Andric ompt_data_t *parent_task_data; 19170b57cec5SDimitry Andric ompt_frame_t *ompt_frame; 19180b57cec5SDimitry Andric void *return_address = NULL; 19190b57cec5SDimitry Andric 19200b57cec5SDimitry Andric if (ompt_enabled.enabled) { 19210b57cec5SDimitry Andric __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, 19220b57cec5SDimitry Andric NULL, NULL); 19230b57cec5SDimitry Andric return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); 19240b57cec5SDimitry Andric } 19250b57cec5SDimitry Andric #endif 19260b57cec5SDimitry Andric 1927fe6060f1SDimitry Andric // Assign affinity to root thread if it hasn't happened yet 1928fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 1929fe6060f1SDimitry Andric 19300b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 19310b57cec5SDimitry Andric level = parent_team->t.t_level; 19320b57cec5SDimitry Andric // used to launch non-serial teams even if nested is not allowed 19330b57cec5SDimitry Andric active_level = parent_team->t.t_active_level; 19340b57cec5SDimitry Andric // needed to check nesting inside the teams 19350b57cec5SDimitry Andric teams_level = master_th->th.th_teams_level; 19360b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 19370b57cec5SDimitry Andric p_hot_teams = &master_th->th.th_hot_teams; 19380b57cec5SDimitry Andric if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { 19390b57cec5SDimitry Andric *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( 19400b57cec5SDimitry Andric sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 19410b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 19420b57cec5SDimitry Andric // it is either actual or not needed (when active_level > 0) 19430b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team_nth = 1; 19440b57cec5SDimitry Andric } 19450b57cec5SDimitry Andric #endif 19460b57cec5SDimitry Andric 19470b57cec5SDimitry Andric #if OMPT_SUPPORT 19480b57cec5SDimitry Andric if (ompt_enabled.enabled) { 19490b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 19500b57cec5SDimitry Andric int team_size = master_set_numthreads 19510b57cec5SDimitry Andric ? master_set_numthreads 19520b57cec5SDimitry Andric : get__nproc_2(parent_team, master_tid); 1953489b1cf2SDimitry Andric int flags = OMPT_INVOKER(call_context) | 1954489b1cf2SDimitry Andric ((microtask == (microtask_t)__kmp_teams_master) 1955489b1cf2SDimitry Andric ? ompt_parallel_league 1956489b1cf2SDimitry Andric : ompt_parallel_team); 19570b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 1958489b1cf2SDimitry Andric parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags, 1959489b1cf2SDimitry Andric return_address); 19600b57cec5SDimitry Andric } 19610b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 19620b57cec5SDimitry Andric } 19630b57cec5SDimitry Andric #endif 19640b57cec5SDimitry Andric 19650b57cec5SDimitry Andric master_th->th.th_ident = loc; 19660b57cec5SDimitry Andric 1967bdd1243dSDimitry Andric // Parallel closely nested in teams construct: 1968bdd1243dSDimitry Andric if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) { 1969bdd1243dSDimitry Andric return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root, 1970bdd1243dSDimitry Andric call_context, microtask, invoker, 1971bdd1243dSDimitry Andric master_set_numthreads, level, 19720b57cec5SDimitry Andric #if OMPT_SUPPORT 1973bdd1243dSDimitry Andric ompt_parallel_data, return_address, 19740b57cec5SDimitry Andric #endif 1975bdd1243dSDimitry Andric ap); 1976bdd1243dSDimitry Andric } // End parallel closely nested in teams construct 19770b57cec5SDimitry Andric 19780b57cec5SDimitry Andric #if KMP_DEBUG 19790b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 19800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 19810b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 19820b57cec5SDimitry Andric } 19830b57cec5SDimitry Andric #endif 19840b57cec5SDimitry Andric 1985349cc55cSDimitry Andric // Need this to happen before we determine the number of threads, not while 1986349cc55cSDimitry Andric // we are allocating the team 1987349cc55cSDimitry Andric //__kmp_push_current_task_to_thread(master_th, parent_team, 0); 1988bdd1243dSDimitry Andric 1989bdd1243dSDimitry Andric // Determine the number of threads 1990bdd1243dSDimitry Andric int enter_teams = 1991bdd1243dSDimitry Andric __kmp_is_entering_teams(active_level, level, teams_level, ap); 1992bdd1243dSDimitry Andric if ((!enter_teams && 1993bdd1243dSDimitry Andric (parent_team->t.t_active_level >= 1994bdd1243dSDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels)) || 1995bdd1243dSDimitry Andric (__kmp_library == library_serial)) { 1996bdd1243dSDimitry Andric KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team\n", gtid)); 19970b57cec5SDimitry Andric nthreads = 1; 19980b57cec5SDimitry Andric } else { 1999349cc55cSDimitry Andric nthreads = master_set_numthreads 20000b57cec5SDimitry Andric ? master_set_numthreads 2001349cc55cSDimitry Andric // TODO: get nproc directly from current task 2002349cc55cSDimitry Andric : get__nproc_2(parent_team, master_tid); 20030b57cec5SDimitry Andric // Check if we need to take forkjoin lock? (no need for serialized 2004bdd1243dSDimitry Andric // parallel out of teams construct). 20050b57cec5SDimitry Andric if (nthreads > 1) { 20060b57cec5SDimitry Andric /* determine how many new threads we can use */ 20070b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 20080b57cec5SDimitry Andric /* AC: If we execute teams from parallel region (on host), then teams 20090b57cec5SDimitry Andric should be created but each can only have 1 thread if nesting is 20100b57cec5SDimitry Andric disabled. If teams called from serial region, then teams and their 20110b57cec5SDimitry Andric threads should be created regardless of the nesting setting. */ 20120b57cec5SDimitry Andric nthreads = __kmp_reserve_threads(root, parent_team, master_tid, 20130b57cec5SDimitry Andric nthreads, enter_teams); 20140b57cec5SDimitry Andric if (nthreads == 1) { 20150b57cec5SDimitry Andric // Free lock for single thread execution here; for multi-thread 20160b57cec5SDimitry Andric // execution it will be freed later after team of threads created 20170b57cec5SDimitry Andric // and initialized 20180b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 20190b57cec5SDimitry Andric } 20200b57cec5SDimitry Andric } 20210b57cec5SDimitry Andric } 20220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nthreads > 0); 20230b57cec5SDimitry Andric 20240b57cec5SDimitry Andric // If we temporarily changed the set number of threads then restore it now 20250b57cec5SDimitry Andric master_th->th.th_set_nproc = 0; 20260b57cec5SDimitry Andric 20270b57cec5SDimitry Andric if (nthreads == 1) { 2028bdd1243dSDimitry Andric return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask, 2029bdd1243dSDimitry Andric invoker, master_th, parent_team, 20300b57cec5SDimitry Andric #if OMPT_SUPPORT 2031bdd1243dSDimitry Andric &ompt_parallel_data, &return_address, 2032bdd1243dSDimitry Andric &parent_task_data, 20330b57cec5SDimitry Andric #endif 2034bdd1243dSDimitry Andric ap); 20350b57cec5SDimitry Andric } // if (nthreads == 1) 20360b57cec5SDimitry Andric 20370b57cec5SDimitry Andric // GEH: only modify the executing flag in the case when not serialized 20380b57cec5SDimitry Andric // serialized case is handled in kmpc_serialized_parallel 20390b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 20400b57cec5SDimitry Andric "curtask=%p, curtask_max_aclevel=%d\n", 20410b57cec5SDimitry Andric parent_team->t.t_active_level, master_th, 20420b57cec5SDimitry Andric master_th->th.th_current_task, 20430b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels)); 20440b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 20450b57cec5SDimitry Andric // executing 20460b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 20470b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 0; 20480b57cec5SDimitry Andric 20490b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 20500b57cec5SDimitry Andric /* Increment our nested depth level */ 20510b57cec5SDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 20520b57cec5SDimitry Andric } 20530b57cec5SDimitry Andric 20540b57cec5SDimitry Andric // See if we need to make a copy of the ICVs. 20550b57cec5SDimitry Andric int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 20560b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_nth.used) && 20570b57cec5SDimitry Andric (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { 20580b57cec5SDimitry Andric nthreads_icv = __kmp_nested_nth.nth[level + 1]; 20590b57cec5SDimitry Andric } else { 20600b57cec5SDimitry Andric nthreads_icv = 0; // don't update 20610b57cec5SDimitry Andric } 20620b57cec5SDimitry Andric 20630b57cec5SDimitry Andric // Figure out the proc_bind_policy for the new team. 20640b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 2065349cc55cSDimitry Andric // proc_bind_default means don't update 2066349cc55cSDimitry Andric kmp_proc_bind_t proc_bind_icv = proc_bind_default; 20670b57cec5SDimitry Andric if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 20680b57cec5SDimitry Andric proc_bind = proc_bind_false; 20690b57cec5SDimitry Andric } else { 20700b57cec5SDimitry Andric // No proc_bind clause specified; use current proc-bind-var for this 20710b57cec5SDimitry Andric // parallel region 2072349cc55cSDimitry Andric if (proc_bind == proc_bind_default) { 20730b57cec5SDimitry Andric proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 20740b57cec5SDimitry Andric } 2075349cc55cSDimitry Andric // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND 2076349cc55cSDimitry Andric if (master_th->th.th_teams_microtask && 2077349cc55cSDimitry Andric microtask == (microtask_t)__kmp_teams_master) { 2078349cc55cSDimitry Andric proc_bind = __kmp_teams_proc_bind; 2079349cc55cSDimitry Andric } 20800b57cec5SDimitry Andric /* else: The proc_bind policy was specified explicitly on parallel clause. 20810b57cec5SDimitry Andric This overrides proc-bind-var for this parallel region, but does not 20820b57cec5SDimitry Andric change proc-bind-var. */ 20830b57cec5SDimitry Andric // Figure the value of proc-bind-var for the child threads. 20840b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_proc_bind.used) && 20850b57cec5SDimitry Andric (__kmp_nested_proc_bind.bind_types[level + 1] != 20860b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind)) { 2087349cc55cSDimitry Andric // Do not modify the proc bind icv for the two teams construct forks 2088349cc55cSDimitry Andric // They just let the proc bind icv pass through 2089349cc55cSDimitry Andric if (!master_th->th.th_teams_microtask || 2090349cc55cSDimitry Andric !(microtask == (microtask_t)__kmp_teams_master || ap == NULL)) 20910b57cec5SDimitry Andric proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 20920b57cec5SDimitry Andric } 20930b57cec5SDimitry Andric } 20940b57cec5SDimitry Andric 20950b57cec5SDimitry Andric // Reset for next parallel region 20960b57cec5SDimitry Andric master_th->th.th_set_proc_bind = proc_bind_default; 20970b57cec5SDimitry Andric 20980b57cec5SDimitry Andric if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) { 20990b57cec5SDimitry Andric kmp_internal_control_t new_icvs; 21000b57cec5SDimitry Andric copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 21010b57cec5SDimitry Andric new_icvs.next = NULL; 21020b57cec5SDimitry Andric if (nthreads_icv > 0) { 21030b57cec5SDimitry Andric new_icvs.nproc = nthreads_icv; 21040b57cec5SDimitry Andric } 21050b57cec5SDimitry Andric if (proc_bind_icv != proc_bind_default) { 21060b57cec5SDimitry Andric new_icvs.proc_bind = proc_bind_icv; 21070b57cec5SDimitry Andric } 21080b57cec5SDimitry Andric 21090b57cec5SDimitry Andric /* allocate a new parallel team */ 21100b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 21110b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 21120b57cec5SDimitry Andric #if OMPT_SUPPORT 21130b57cec5SDimitry Andric ompt_parallel_data, 21140b57cec5SDimitry Andric #endif 21150b57cec5SDimitry Andric proc_bind, &new_icvs, 21160b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 2117349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) 2118349cc55cSDimitry Andric copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs); 21190b57cec5SDimitry Andric } else { 21200b57cec5SDimitry Andric /* allocate a new parallel team */ 21210b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 21220b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 21230b57cec5SDimitry Andric #if OMPT_SUPPORT 21240b57cec5SDimitry Andric ompt_parallel_data, 21250b57cec5SDimitry Andric #endif 21260b57cec5SDimitry Andric proc_bind, 21270b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, 21280b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 2129349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) 2130349cc55cSDimitry Andric copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, 2131349cc55cSDimitry Andric &master_th->th.th_current_task->td_icvs); 21320b57cec5SDimitry Andric } 21330b57cec5SDimitry Andric KF_TRACE( 21340b57cec5SDimitry Andric 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); 21350b57cec5SDimitry Andric 21360b57cec5SDimitry Andric /* setup the new team */ 21370b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); 21380b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); 21390b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 21400b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_parent, parent_team); 21410b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); 21420b57cec5SDimitry Andric #if OMPT_SUPPORT 21430b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address, 21440b57cec5SDimitry Andric return_address); 21450b57cec5SDimitry Andric #endif 21460b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe 21470b57cec5SDimitry Andric // TODO: parent_team->t.t_level == INT_MAX ??? 21480b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 21490b57cec5SDimitry Andric int new_level = parent_team->t.t_level + 1; 21500b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 21510b57cec5SDimitry Andric new_level = parent_team->t.t_active_level + 1; 21520b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 21530b57cec5SDimitry Andric } else { 21540b57cec5SDimitry Andric // AC: Do not increase parallel level at start of the teams construct 21550b57cec5SDimitry Andric int new_level = parent_team->t.t_level; 21560b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 21570b57cec5SDimitry Andric new_level = parent_team->t.t_active_level; 21580b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 21590b57cec5SDimitry Andric } 21600b57cec5SDimitry Andric kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); 2161fe6060f1SDimitry Andric // set primary thread's schedule as new run-time schedule 21620b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 21630b57cec5SDimitry Andric 21640b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); 21650b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator); 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andric // Update the floating point rounding in the team if required. 21680b57cec5SDimitry Andric propagateFPControl(team); 2169fe6060f1SDimitry Andric #if OMPD_SUPPORT 2170fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 2171fe6060f1SDimitry Andric ompd_bp_parallel_begin(); 2172fe6060f1SDimitry Andric #endif 21730b57cec5SDimitry Andric 21740b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 2175fe6060f1SDimitry Andric // Set primary thread's task team to team's task team. Unless this is hot 2176fe6060f1SDimitry Andric // team, it should be NULL. 21770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 21780b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 2179fe6060f1SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team " 21800b57cec5SDimitry Andric "%p, new task_team %p / team %p\n", 21810b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), 21820b57cec5SDimitry Andric master_th->th.th_task_team, parent_team, 21830b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], team)); 21840b57cec5SDimitry Andric 21850b57cec5SDimitry Andric if (active_level || master_th->th.th_task_team) { 2186fe6060f1SDimitry Andric // Take a memo of primary thread's task_state 21870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 21880b57cec5SDimitry Andric if (master_th->th.th_task_state_top >= 21890b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz) { // increase size 21900b57cec5SDimitry Andric kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz; 21910b57cec5SDimitry Andric kmp_uint8 *old_stack, *new_stack; 21920b57cec5SDimitry Andric kmp_uint32 i; 21930b57cec5SDimitry Andric new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 21940b57cec5SDimitry Andric for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) { 21950b57cec5SDimitry Andric new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 21960b57cec5SDimitry Andric } 21970b57cec5SDimitry Andric for (i = master_th->th.th_task_state_stack_sz; i < new_size; 21980b57cec5SDimitry Andric ++i) { // zero-init rest of stack 21990b57cec5SDimitry Andric new_stack[i] = 0; 22000b57cec5SDimitry Andric } 22010b57cec5SDimitry Andric old_stack = master_th->th.th_task_state_memo_stack; 22020b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack = new_stack; 22030b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz = new_size; 22040b57cec5SDimitry Andric __kmp_free(old_stack); 22050b57cec5SDimitry Andric } 2206fe6060f1SDimitry Andric // Store primary thread's task_state on stack 22070b57cec5SDimitry Andric master_th->th 22080b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top] = 22090b57cec5SDimitry Andric master_th->th.th_task_state; 22100b57cec5SDimitry Andric master_th->th.th_task_state_top++; 22110b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 22120b57cec5SDimitry Andric if (master_th->th.th_hot_teams && 22130b57cec5SDimitry Andric active_level < __kmp_hot_teams_max_level && 22140b57cec5SDimitry Andric team == master_th->th.th_hot_teams[active_level].hot_team) { 2215fe6060f1SDimitry Andric // Restore primary thread's nested state if nested hot team 22160b57cec5SDimitry Andric master_th->th.th_task_state = 22170b57cec5SDimitry Andric master_th->th 22180b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 22190b57cec5SDimitry Andric } else { 22200b57cec5SDimitry Andric #endif 22210b57cec5SDimitry Andric master_th->th.th_task_state = 0; 22220b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 22230b57cec5SDimitry Andric } 22240b57cec5SDimitry Andric #endif 22250b57cec5SDimitry Andric } 22260b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 22270b57cec5SDimitry Andric KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || 22280b57cec5SDimitry Andric (team == root->r.r_hot_team)); 22290b57cec5SDimitry Andric #endif 22300b57cec5SDimitry Andric } 22310b57cec5SDimitry Andric 22320b57cec5SDimitry Andric KA_TRACE( 22330b57cec5SDimitry Andric 20, 22340b57cec5SDimitry Andric ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 22350b57cec5SDimitry Andric gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, 22360b57cec5SDimitry Andric team->t.t_nproc)); 22370b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team != root->r.r_hot_team || 22380b57cec5SDimitry Andric (team->t.t_master_tid == 0 && 22390b57cec5SDimitry Andric (team->t.t_parent == root->r.r_root_team || 22400b57cec5SDimitry Andric team->t.t_parent->t.t_serialized))); 22410b57cec5SDimitry Andric KMP_MB(); 22420b57cec5SDimitry Andric 22430b57cec5SDimitry Andric /* now, setup the arguments */ 22440b57cec5SDimitry Andric argv = (void **)team->t.t_argv; 22450b57cec5SDimitry Andric if (ap) { 22460b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) { 224716794618SDimitry Andric void *new_argv = va_arg(kmp_va_deref(ap), void *); 22480b57cec5SDimitry Andric KMP_CHECK_UPDATE(*argv, new_argv); 22490b57cec5SDimitry Andric argv++; 22500b57cec5SDimitry Andric } 22510b57cec5SDimitry Andric } else { 22520b57cec5SDimitry Andric for (i = 0; i < argc; ++i) { 22530b57cec5SDimitry Andric // Get args from parent team for teams construct 22540b57cec5SDimitry Andric KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); 22550b57cec5SDimitry Andric } 22560b57cec5SDimitry Andric } 22570b57cec5SDimitry Andric 22580b57cec5SDimitry Andric /* now actually fork the threads */ 22590b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_active, master_active); 22600b57cec5SDimitry Andric if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 22610b57cec5SDimitry Andric root->r.r_active = TRUE; 22620b57cec5SDimitry Andric 2263349cc55cSDimitry Andric __kmp_fork_team_threads(root, team, master_th, gtid, !ap); 22640b57cec5SDimitry Andric __kmp_setup_icv_copy(team, nthreads, 22650b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, loc); 22660b57cec5SDimitry Andric 22670b57cec5SDimitry Andric #if OMPT_SUPPORT 22680b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 22690b57cec5SDimitry Andric #endif 22700b57cec5SDimitry Andric 22710b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 22720b57cec5SDimitry Andric 22730b57cec5SDimitry Andric #if USE_ITT_BUILD 22740b57cec5SDimitry Andric if (team->t.t_active_level == 1 // only report frames at level 1 22750b57cec5SDimitry Andric && !master_th->th.th_teams_microtask) { // not in teams construct 22760b57cec5SDimitry Andric #if USE_ITT_NOTIFY 22770b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 22780b57cec5SDimitry Andric (__kmp_forkjoin_frames_mode == 3 || 22790b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 1)) { 22800b57cec5SDimitry Andric kmp_uint64 tmp_time = 0; 22810b57cec5SDimitry Andric if (__itt_get_timestamp_ptr) 22820b57cec5SDimitry Andric tmp_time = __itt_get_timestamp(); 22830b57cec5SDimitry Andric // Internal fork - report frame begin 22840b57cec5SDimitry Andric master_th->th.th_frame_time = tmp_time; 22850b57cec5SDimitry Andric if (__kmp_forkjoin_frames_mode == 3) 22860b57cec5SDimitry Andric team->t.t_region_time = tmp_time; 22870b57cec5SDimitry Andric } else 22880b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not both) 22890b57cec5SDimitry Andric #endif /* USE_ITT_NOTIFY */ 22900b57cec5SDimitry Andric if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) && 22910b57cec5SDimitry Andric __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) { 22920b57cec5SDimitry Andric // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer. 22930b57cec5SDimitry Andric __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 22940b57cec5SDimitry Andric } 22950b57cec5SDimitry Andric } 22960b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22970b57cec5SDimitry Andric 22980b57cec5SDimitry Andric /* now go on and do the work */ 22990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team); 23000b57cec5SDimitry Andric KMP_MB(); 23010b57cec5SDimitry Andric KF_TRACE(10, 23020b57cec5SDimitry Andric ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 23030b57cec5SDimitry Andric root, team, master_th, gtid)); 23040b57cec5SDimitry Andric 23050b57cec5SDimitry Andric #if USE_ITT_BUILD 23060b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 2307fe6060f1SDimitry Andric // create new stack stitching id before entering fork barrier 2308fe6060f1SDimitry Andric if (!enter_teams) { 2309fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL); 2310fe6060f1SDimitry Andric team->t.t_stack_id = __kmp_itt_stack_caller_create(); 2311fe6060f1SDimitry Andric } else if (parent_team->t.t_serialized) { 2312fe6060f1SDimitry Andric // keep stack stitching id in the serialized parent_team; 2313fe6060f1SDimitry Andric // current team will be used for parallel inside the teams; 2314fe6060f1SDimitry Andric // if parent_team is active, then it already keeps stack stitching id 2315fe6060f1SDimitry Andric // for the league of teams 2316fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL); 2317fe6060f1SDimitry Andric parent_team->t.t_stack_id = __kmp_itt_stack_caller_create(); 2318fe6060f1SDimitry Andric } 23190b57cec5SDimitry Andric } 23200b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 23210b57cec5SDimitry Andric 2322fe6060f1SDimitry Andric // AC: skip __kmp_internal_fork at teams construct, let only primary 23230b57cec5SDimitry Andric // threads execute 23240b57cec5SDimitry Andric if (ap) { 23250b57cec5SDimitry Andric __kmp_internal_fork(loc, gtid, team); 23260b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, " 23270b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 23280b57cec5SDimitry Andric root, team, master_th, gtid)); 23290b57cec5SDimitry Andric } 23300b57cec5SDimitry Andric 23310b57cec5SDimitry Andric if (call_context == fork_context_gnu) { 23320b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 23330b57cec5SDimitry Andric return TRUE; 23340b57cec5SDimitry Andric } 23350b57cec5SDimitry Andric 2336fe6060f1SDimitry Andric /* Invoke microtask for PRIMARY thread */ 23370b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 23380b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 23390b57cec5SDimitry Andric } // END of timer KMP_fork_call block 23400b57cec5SDimitry Andric 23410b57cec5SDimitry Andric #if KMP_STATS_ENABLED 23420b57cec5SDimitry Andric // If beginning a teams construct, then change thread state 23430b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 23440b57cec5SDimitry Andric if (!ap) { 23450b57cec5SDimitry Andric KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION); 23460b57cec5SDimitry Andric } 23470b57cec5SDimitry Andric #endif 23480b57cec5SDimitry Andric 23490b57cec5SDimitry Andric if (!team->t.t_invoke(gtid)) { 2350fe6060f1SDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread"); 23510b57cec5SDimitry Andric } 23520b57cec5SDimitry Andric 23530b57cec5SDimitry Andric #if KMP_STATS_ENABLED 23540b57cec5SDimitry Andric // If was beginning of a teams construct, then reset thread state 23550b57cec5SDimitry Andric if (!ap) { 23560b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 23570b57cec5SDimitry Andric } 23580b57cec5SDimitry Andric #endif 23590b57cec5SDimitry Andric 23600b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 23610b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 23620b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 23630b57cec5SDimitry Andric 23640b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 23650b57cec5SDimitry Andric #if OMPT_SUPPORT 23660b57cec5SDimitry Andric if (ompt_enabled.enabled) { 23670b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 23680b57cec5SDimitry Andric } 23690b57cec5SDimitry Andric #endif 23700b57cec5SDimitry Andric 23710b57cec5SDimitry Andric return TRUE; 23720b57cec5SDimitry Andric } 23730b57cec5SDimitry Andric 23740b57cec5SDimitry Andric #if OMPT_SUPPORT 23750b57cec5SDimitry Andric static inline void __kmp_join_restore_state(kmp_info_t *thread, 23760b57cec5SDimitry Andric kmp_team_t *team) { 23770b57cec5SDimitry Andric // restore state outside the region 23780b57cec5SDimitry Andric thread->th.ompt_thread_info.state = 23790b57cec5SDimitry Andric ((team->t.t_serialized) ? ompt_state_work_serial 23800b57cec5SDimitry Andric : ompt_state_work_parallel); 23810b57cec5SDimitry Andric } 23820b57cec5SDimitry Andric 23830b57cec5SDimitry Andric static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, 23840b57cec5SDimitry Andric kmp_team_t *team, ompt_data_t *parallel_data, 2385489b1cf2SDimitry Andric int flags, void *codeptr) { 23860b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 23870b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 23880b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 2389489b1cf2SDimitry Andric parallel_data, &(task_info->task_data), flags, codeptr); 23900b57cec5SDimitry Andric } 23910b57cec5SDimitry Andric 23920b57cec5SDimitry Andric task_info->frame.enter_frame = ompt_data_none; 23930b57cec5SDimitry Andric __kmp_join_restore_state(thread, team); 23940b57cec5SDimitry Andric } 23950b57cec5SDimitry Andric #endif 23960b57cec5SDimitry Andric 23970b57cec5SDimitry Andric void __kmp_join_call(ident_t *loc, int gtid 23980b57cec5SDimitry Andric #if OMPT_SUPPORT 23990b57cec5SDimitry Andric , 24000b57cec5SDimitry Andric enum fork_context_e fork_context 24010b57cec5SDimitry Andric #endif 24020b57cec5SDimitry Andric , 24030b57cec5SDimitry Andric int exit_teams) { 24040b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); 24050b57cec5SDimitry Andric kmp_team_t *team; 24060b57cec5SDimitry Andric kmp_team_t *parent_team; 24070b57cec5SDimitry Andric kmp_info_t *master_th; 24080b57cec5SDimitry Andric kmp_root_t *root; 24090b57cec5SDimitry Andric int master_active; 24100b57cec5SDimitry Andric 24110b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid)); 24120b57cec5SDimitry Andric 24130b57cec5SDimitry Andric /* setup current data */ 24140b57cec5SDimitry Andric master_th = __kmp_threads[gtid]; 24150b57cec5SDimitry Andric root = master_th->th.th_root; 24160b57cec5SDimitry Andric team = master_th->th.th_team; 24170b57cec5SDimitry Andric parent_team = team->t.t_parent; 24180b57cec5SDimitry Andric 24190b57cec5SDimitry Andric master_th->th.th_ident = loc; 24200b57cec5SDimitry Andric 24210b57cec5SDimitry Andric #if OMPT_SUPPORT 2422489b1cf2SDimitry Andric void *team_microtask = (void *)team->t.t_pkfn; 2423e8d8bef9SDimitry Andric // For GOMP interface with serialized parallel, need the 2424e8d8bef9SDimitry Andric // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task 2425e8d8bef9SDimitry Andric // and end-parallel events. 2426e8d8bef9SDimitry Andric if (ompt_enabled.enabled && 2427e8d8bef9SDimitry Andric !(team->t.t_serialized && fork_context == fork_context_gnu)) { 24280b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 24290b57cec5SDimitry Andric } 24300b57cec5SDimitry Andric #endif 24310b57cec5SDimitry Andric 24320b57cec5SDimitry Andric #if KMP_DEBUG 24330b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { 24340b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, " 24350b57cec5SDimitry Andric "th_task_team = %p\n", 24360b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), team, 24370b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], 24380b57cec5SDimitry Andric master_th->th.th_task_team)); 24390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 24400b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state]); 24410b57cec5SDimitry Andric } 24420b57cec5SDimitry Andric #endif 24430b57cec5SDimitry Andric 24440b57cec5SDimitry Andric if (team->t.t_serialized) { 24450b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { 24460b57cec5SDimitry Andric // We are in teams construct 24470b57cec5SDimitry Andric int level = team->t.t_level; 24480b57cec5SDimitry Andric int tlevel = master_th->th.th_teams_level; 24490b57cec5SDimitry Andric if (level == tlevel) { 24500b57cec5SDimitry Andric // AC: we haven't incremented it earlier at start of teams construct, 24510b57cec5SDimitry Andric // so do it here - at the end of teams construct 24520b57cec5SDimitry Andric team->t.t_level++; 24530b57cec5SDimitry Andric } else if (level == tlevel + 1) { 24540b57cec5SDimitry Andric // AC: we are exiting parallel inside teams, need to increment 24550b57cec5SDimitry Andric // serialization in order to restore it in the next call to 24560b57cec5SDimitry Andric // __kmpc_end_serialized_parallel 24570b57cec5SDimitry Andric team->t.t_serialized++; 24580b57cec5SDimitry Andric } 24590b57cec5SDimitry Andric } 24600b57cec5SDimitry Andric __kmpc_end_serialized_parallel(loc, gtid); 24610b57cec5SDimitry Andric 24620b57cec5SDimitry Andric #if OMPT_SUPPORT 24630b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2464bdd1243dSDimitry Andric if (fork_context == fork_context_gnu) { 2465bdd1243dSDimitry Andric __ompt_lw_taskteam_unlink(master_th); 2466bdd1243dSDimitry Andric } 24670b57cec5SDimitry Andric __kmp_join_restore_state(master_th, parent_team); 24680b57cec5SDimitry Andric } 24690b57cec5SDimitry Andric #endif 24700b57cec5SDimitry Andric 24710b57cec5SDimitry Andric return; 24720b57cec5SDimitry Andric } 24730b57cec5SDimitry Andric 24740b57cec5SDimitry Andric master_active = team->t.t_master_active; 24750b57cec5SDimitry Andric 24760b57cec5SDimitry Andric if (!exit_teams) { 24770b57cec5SDimitry Andric // AC: No barrier for internal teams at exit from teams construct. 24780b57cec5SDimitry Andric // But there is barrier for external team (league). 24790b57cec5SDimitry Andric __kmp_internal_join(loc, gtid, team); 2480fe6060f1SDimitry Andric #if USE_ITT_BUILD 2481fe6060f1SDimitry Andric if (__itt_stack_caller_create_ptr) { 2482fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL); 2483fe6060f1SDimitry Andric // destroy the stack stitching id after join barrier 2484fe6060f1SDimitry Andric __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id); 2485fe6060f1SDimitry Andric team->t.t_stack_id = NULL; 2486fe6060f1SDimitry Andric } 2487fe6060f1SDimitry Andric #endif 24880b57cec5SDimitry Andric } else { 24890b57cec5SDimitry Andric master_th->th.th_task_state = 24900b57cec5SDimitry Andric 0; // AC: no tasking in teams (out of any parallel) 2491fe6060f1SDimitry Andric #if USE_ITT_BUILD 2492fe6060f1SDimitry Andric if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) { 2493fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL); 2494fe6060f1SDimitry Andric // destroy the stack stitching id on exit from the teams construct 2495fe6060f1SDimitry Andric // if parent_team is active, then the id will be destroyed later on 2496fe6060f1SDimitry Andric // by master of the league of teams 2497fe6060f1SDimitry Andric __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id); 2498fe6060f1SDimitry Andric parent_team->t.t_stack_id = NULL; 2499fe6060f1SDimitry Andric } 2500fe6060f1SDimitry Andric #endif 25010b57cec5SDimitry Andric } 25020b57cec5SDimitry Andric 25030b57cec5SDimitry Andric KMP_MB(); 25040b57cec5SDimitry Andric 25050b57cec5SDimitry Andric #if OMPT_SUPPORT 25060b57cec5SDimitry Andric ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); 25070b57cec5SDimitry Andric void *codeptr = team->t.ompt_team_info.master_return_address; 25080b57cec5SDimitry Andric #endif 25090b57cec5SDimitry Andric 25100b57cec5SDimitry Andric #if USE_ITT_BUILD 25110b57cec5SDimitry Andric // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer. 25120b57cec5SDimitry Andric if (team->t.t_active_level == 1 && 2513e8d8bef9SDimitry Andric (!master_th->th.th_teams_microtask || /* not in teams construct */ 2514e8d8bef9SDimitry Andric master_th->th.th_teams_size.nteams == 1)) { 25150b57cec5SDimitry Andric master_th->th.th_ident = loc; 25160b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not 25170b57cec5SDimitry Andric // both) 25180b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 25190b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 3) 25200b57cec5SDimitry Andric __kmp_itt_frame_submit(gtid, team->t.t_region_time, 25210b57cec5SDimitry Andric master_th->th.th_frame_time, 0, loc, 25220b57cec5SDimitry Andric master_th->th.th_team_nproc, 1); 25230b57cec5SDimitry Andric else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) && 25240b57cec5SDimitry Andric !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames) 25250b57cec5SDimitry Andric __kmp_itt_region_joined(gtid); 25260b57cec5SDimitry Andric } // active_level == 1 25270b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 25280b57cec5SDimitry Andric 2529349cc55cSDimitry Andric #if KMP_AFFINITY_SUPPORTED 2530349cc55cSDimitry Andric if (!exit_teams) { 2531349cc55cSDimitry Andric // Restore master thread's partition. 2532349cc55cSDimitry Andric master_th->th.th_first_place = team->t.t_first_place; 2533349cc55cSDimitry Andric master_th->th.th_last_place = team->t.t_last_place; 2534349cc55cSDimitry Andric } 2535349cc55cSDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 2536349cc55cSDimitry Andric 25370b57cec5SDimitry Andric if (master_th->th.th_teams_microtask && !exit_teams && 25380b57cec5SDimitry Andric team->t.t_pkfn != (microtask_t)__kmp_teams_master && 25390b57cec5SDimitry Andric team->t.t_level == master_th->th.th_teams_level + 1) { 25400b57cec5SDimitry Andric // AC: We need to leave the team structure intact at the end of parallel 25410b57cec5SDimitry Andric // inside the teams construct, so that at the next parallel same (hot) team 25420b57cec5SDimitry Andric // works, only adjust nesting levels 2543489b1cf2SDimitry Andric #if OMPT_SUPPORT 2544489b1cf2SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 2545489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 2546489b1cf2SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 2547489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2548489b1cf2SDimitry Andric int ompt_team_size = team->t.t_nproc; 2549489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 2550489b1cf2SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2551489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 2552489b1cf2SDimitry Andric } 2553489b1cf2SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 2554489b1cf2SDimitry Andric task_info->task_data = ompt_data_none; 2555489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 2556489b1cf2SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 2557489b1cf2SDimitry Andric } 2558489b1cf2SDimitry Andric #endif 25590b57cec5SDimitry Andric /* Decrement our nested depth level */ 25600b57cec5SDimitry Andric team->t.t_level--; 25610b57cec5SDimitry Andric team->t.t_active_level--; 25620b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 25630b57cec5SDimitry Andric 25640b57cec5SDimitry Andric // Restore number of threads in the team if needed. This code relies on 25650b57cec5SDimitry Andric // the proper adjustment of th_teams_size.nth after the fork in 2566fe6060f1SDimitry Andric // __kmp_teams_master on each teams primary thread in the case that 25670b57cec5SDimitry Andric // __kmp_reserve_threads reduced it. 25680b57cec5SDimitry Andric if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) { 25690b57cec5SDimitry Andric int old_num = master_th->th.th_team_nproc; 25700b57cec5SDimitry Andric int new_num = master_th->th.th_teams_size.nth; 25710b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 25720b57cec5SDimitry Andric team->t.t_nproc = new_num; 25730b57cec5SDimitry Andric for (int i = 0; i < old_num; ++i) { 25740b57cec5SDimitry Andric other_threads[i]->th.th_team_nproc = new_num; 25750b57cec5SDimitry Andric } 25760b57cec5SDimitry Andric // Adjust states of non-used threads of the team 25770b57cec5SDimitry Andric for (int i = old_num; i < new_num; ++i) { 25780b57cec5SDimitry Andric // Re-initialize thread's barrier data. 25790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(other_threads[i]); 25800b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[i]->th.th_bar; 25810b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 25820b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 25830b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 25840b57cec5SDimitry Andric #if USE_DEBUGGER 25850b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 25860b57cec5SDimitry Andric #endif 25870b57cec5SDimitry Andric } 25880b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 25890b57cec5SDimitry Andric // Synchronize thread's task state 25900b57cec5SDimitry Andric other_threads[i]->th.th_task_state = master_th->th.th_task_state; 25910b57cec5SDimitry Andric } 25920b57cec5SDimitry Andric } 25930b57cec5SDimitry Andric } 25940b57cec5SDimitry Andric 25950b57cec5SDimitry Andric #if OMPT_SUPPORT 25960b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2597489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data, 2598489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr); 25990b57cec5SDimitry Andric } 26000b57cec5SDimitry Andric #endif 26010b57cec5SDimitry Andric 26020b57cec5SDimitry Andric return; 26030b57cec5SDimitry Andric } 26040b57cec5SDimitry Andric 26050b57cec5SDimitry Andric /* do cleanup and restore the parent team */ 26060b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = team->t.t_master_tid; 26070b57cec5SDimitry Andric master_th->th.th_local.this_construct = team->t.t_master_this_cons; 26080b57cec5SDimitry Andric 26090b57cec5SDimitry Andric master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid]; 26100b57cec5SDimitry Andric 26110b57cec5SDimitry Andric /* jc: The following lock has instructions with REL and ACQ semantics, 26120b57cec5SDimitry Andric separating the parallel user code called in this parallel region 26130b57cec5SDimitry Andric from the serial user code called after this function returns. */ 26140b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 26150b57cec5SDimitry Andric 26160b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || 26170b57cec5SDimitry Andric team->t.t_level > master_th->th.th_teams_level) { 26180b57cec5SDimitry Andric /* Decrement our nested depth level */ 26190b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 26200b57cec5SDimitry Andric } 26210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); 26220b57cec5SDimitry Andric 26230b57cec5SDimitry Andric #if OMPT_SUPPORT 26240b57cec5SDimitry Andric if (ompt_enabled.enabled) { 26250b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 26260b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2627489b1cf2SDimitry Andric int flags = (team_microtask == (void *)__kmp_teams_master) 2628489b1cf2SDimitry Andric ? ompt_task_initial 2629489b1cf2SDimitry Andric : ompt_task_implicit; 2630489b1cf2SDimitry Andric int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc; 26310b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 26320b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2633489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, flags); 26340b57cec5SDimitry Andric } 26350b57cec5SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 26360b57cec5SDimitry Andric task_info->task_data = ompt_data_none; 26370b57cec5SDimitry Andric } 26380b57cec5SDimitry Andric #endif 26390b57cec5SDimitry Andric 26400b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0, 26410b57cec5SDimitry Andric master_th, team)); 26420b57cec5SDimitry Andric __kmp_pop_current_task_from_thread(master_th); 26430b57cec5SDimitry Andric 26440b57cec5SDimitry Andric master_th->th.th_def_allocator = team->t.t_def_allocator; 26450b57cec5SDimitry Andric 2646fe6060f1SDimitry Andric #if OMPD_SUPPORT 2647fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 2648fe6060f1SDimitry Andric ompd_bp_parallel_end(); 2649fe6060f1SDimitry Andric #endif 26500b57cec5SDimitry Andric updateHWFPControl(team); 26510b57cec5SDimitry Andric 26520b57cec5SDimitry Andric if (root->r.r_active != master_active) 26530b57cec5SDimitry Andric root->r.r_active = master_active; 26540b57cec5SDimitry Andric 26550b57cec5SDimitry Andric __kmp_free_team(root, team USE_NESTED_HOT_ARG( 26560b57cec5SDimitry Andric master_th)); // this will free worker threads 26570b57cec5SDimitry Andric 26580b57cec5SDimitry Andric /* this race was fun to find. make sure the following is in the critical 26590b57cec5SDimitry Andric region otherwise assertions may fail occasionally since the old team may be 26600b57cec5SDimitry Andric reallocated and the hierarchy appears inconsistent. it is actually safe to 26610b57cec5SDimitry Andric run and won't cause any bugs, but will cause those assertion failures. it's 26620b57cec5SDimitry Andric only one deref&assign so might as well put this in the critical region */ 26630b57cec5SDimitry Andric master_th->th.th_team = parent_team; 26640b57cec5SDimitry Andric master_th->th.th_team_nproc = parent_team->t.t_nproc; 26650b57cec5SDimitry Andric master_th->th.th_team_master = parent_team->t.t_threads[0]; 26660b57cec5SDimitry Andric master_th->th.th_team_serialized = parent_team->t.t_serialized; 26670b57cec5SDimitry Andric 26680b57cec5SDimitry Andric /* restore serialized team, if need be */ 26690b57cec5SDimitry Andric if (parent_team->t.t_serialized && 26700b57cec5SDimitry Andric parent_team != master_th->th.th_serial_team && 26710b57cec5SDimitry Andric parent_team != root->r.r_root_team) { 26720b57cec5SDimitry Andric __kmp_free_team(root, 26730b57cec5SDimitry Andric master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); 26740b57cec5SDimitry Andric master_th->th.th_serial_team = parent_team; 26750b57cec5SDimitry Andric } 26760b57cec5SDimitry Andric 26770b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 26780b57cec5SDimitry Andric if (master_th->th.th_task_state_top > 26790b57cec5SDimitry Andric 0) { // Restore task state from memo stack 26800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2681fe6060f1SDimitry Andric // Remember primary thread's state if we re-use this nested hot team 26820b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = 26830b57cec5SDimitry Andric master_th->th.th_task_state; 26840b57cec5SDimitry Andric --master_th->th.th_task_state_top; // pop 26850b57cec5SDimitry Andric // Now restore state at this level 26860b57cec5SDimitry Andric master_th->th.th_task_state = 26870b57cec5SDimitry Andric master_th->th 26880b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 2689bdd1243dSDimitry Andric } else if (team != root->r.r_hot_team) { 2690bdd1243dSDimitry Andric // Reset the task state of primary thread if we are not hot team because 2691bdd1243dSDimitry Andric // in this case all the worker threads will be free, and their task state 2692bdd1243dSDimitry Andric // will be reset. If not reset the primary's, the task state will be 2693bdd1243dSDimitry Andric // inconsistent. 2694bdd1243dSDimitry Andric master_th->th.th_task_state = 0; 26950b57cec5SDimitry Andric } 2696fe6060f1SDimitry Andric // Copy the task team from the parent team to the primary thread 26970b57cec5SDimitry Andric master_th->th.th_task_team = 26980b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]; 26990b57cec5SDimitry Andric KA_TRACE(20, 2700fe6060f1SDimitry Andric ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n", 27010b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), master_th->th.th_task_team, 27020b57cec5SDimitry Andric parent_team)); 27030b57cec5SDimitry Andric } 27040b57cec5SDimitry Andric 27050b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 27060b57cec5SDimitry Andric // executing 27070b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 27080b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 1; 27090b57cec5SDimitry Andric 27100b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 27110b57cec5SDimitry Andric 2712fcaf7f86SDimitry Andric #if KMP_AFFINITY_SUPPORTED 2713bdd1243dSDimitry Andric if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) { 2714fcaf7f86SDimitry Andric __kmp_reset_root_init_mask(gtid); 2715fcaf7f86SDimitry Andric } 2716fcaf7f86SDimitry Andric #endif 27170b57cec5SDimitry Andric #if OMPT_SUPPORT 2718489b1cf2SDimitry Andric int flags = 2719489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | 2720489b1cf2SDimitry Andric ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league 2721489b1cf2SDimitry Andric : ompt_parallel_team); 27220b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2723489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags, 27240b57cec5SDimitry Andric codeptr); 27250b57cec5SDimitry Andric } 27260b57cec5SDimitry Andric #endif 27270b57cec5SDimitry Andric 27280b57cec5SDimitry Andric KMP_MB(); 27290b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid)); 27300b57cec5SDimitry Andric } 27310b57cec5SDimitry Andric 27320b57cec5SDimitry Andric /* Check whether we should push an internal control record onto the 27330b57cec5SDimitry Andric serial team stack. If so, do it. */ 27340b57cec5SDimitry Andric void __kmp_save_internal_controls(kmp_info_t *thread) { 27350b57cec5SDimitry Andric 27360b57cec5SDimitry Andric if (thread->th.th_team != thread->th.th_serial_team) { 27370b57cec5SDimitry Andric return; 27380b57cec5SDimitry Andric } 27390b57cec5SDimitry Andric if (thread->th.th_team->t.t_serialized > 1) { 27400b57cec5SDimitry Andric int push = 0; 27410b57cec5SDimitry Andric 27420b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top == NULL) { 27430b57cec5SDimitry Andric push = 1; 27440b57cec5SDimitry Andric } else { 27450b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 27460b57cec5SDimitry Andric thread->th.th_team->t.t_serialized) { 27470b57cec5SDimitry Andric push = 1; 27480b57cec5SDimitry Andric } 27490b57cec5SDimitry Andric } 27500b57cec5SDimitry Andric if (push) { /* push a record on the serial team's stack */ 27510b57cec5SDimitry Andric kmp_internal_control_t *control = 27520b57cec5SDimitry Andric (kmp_internal_control_t *)__kmp_allocate( 27530b57cec5SDimitry Andric sizeof(kmp_internal_control_t)); 27540b57cec5SDimitry Andric 27550b57cec5SDimitry Andric copy_icvs(control, &thread->th.th_current_task->td_icvs); 27560b57cec5SDimitry Andric 27570b57cec5SDimitry Andric control->serial_nesting_level = thread->th.th_team->t.t_serialized; 27580b57cec5SDimitry Andric 27590b57cec5SDimitry Andric control->next = thread->th.th_team->t.t_control_stack_top; 27600b57cec5SDimitry Andric thread->th.th_team->t.t_control_stack_top = control; 27610b57cec5SDimitry Andric } 27620b57cec5SDimitry Andric } 27630b57cec5SDimitry Andric } 27640b57cec5SDimitry Andric 27650b57cec5SDimitry Andric /* Changes set_nproc */ 27660b57cec5SDimitry Andric void __kmp_set_num_threads(int new_nth, int gtid) { 27670b57cec5SDimitry Andric kmp_info_t *thread; 27680b57cec5SDimitry Andric kmp_root_t *root; 27690b57cec5SDimitry Andric 27700b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth)); 27710b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 27720b57cec5SDimitry Andric 27730b57cec5SDimitry Andric if (new_nth < 1) 27740b57cec5SDimitry Andric new_nth = 1; 27750b57cec5SDimitry Andric else if (new_nth > __kmp_max_nth) 27760b57cec5SDimitry Andric new_nth = __kmp_max_nth; 27770b57cec5SDimitry Andric 27780b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 27790b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 27800b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc == new_nth) 27810b57cec5SDimitry Andric return; // nothing to do 27820b57cec5SDimitry Andric 27830b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 27840b57cec5SDimitry Andric 27850b57cec5SDimitry Andric set__nproc(thread, new_nth); 27860b57cec5SDimitry Andric 27870b57cec5SDimitry Andric // If this omp_set_num_threads() call will cause the hot team size to be 27880b57cec5SDimitry Andric // reduced (in the absence of a num_threads clause), then reduce it now, 27890b57cec5SDimitry Andric // rather than waiting for the next parallel region. 27900b57cec5SDimitry Andric root = thread->th.th_root; 27910b57cec5SDimitry Andric if (__kmp_init_parallel && (!root->r.r_active) && 27920b57cec5SDimitry Andric (root->r.r_hot_team->t.t_nproc > new_nth) 27930b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 27940b57cec5SDimitry Andric && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 27950b57cec5SDimitry Andric #endif 27960b57cec5SDimitry Andric ) { 27970b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 27980b57cec5SDimitry Andric int f; 27990b57cec5SDimitry Andric 28000b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 28010b57cec5SDimitry Andric 2802349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2803349cc55cSDimitry Andric __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth); 2804349cc55cSDimitry Andric } 28050b57cec5SDimitry Andric // Release the extra threads we don't need any more. 28060b57cec5SDimitry Andric for (f = new_nth; f < hot_team->t.t_nproc; f++) { 28070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 28080b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 28090b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should unref 28100b57cec5SDimitry Andric // task team. 28110b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_task_team = NULL; 28120b57cec5SDimitry Andric } 28130b57cec5SDimitry Andric __kmp_free_thread(hot_team->t.t_threads[f]); 28140b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 28150b57cec5SDimitry Andric } 28160b57cec5SDimitry Andric hot_team->t.t_nproc = new_nth; 28170b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 28180b57cec5SDimitry Andric if (thread->th.th_hot_teams) { 28190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); 28200b57cec5SDimitry Andric thread->th.th_hot_teams[0].hot_team_nth = new_nth; 28210b57cec5SDimitry Andric } 28220b57cec5SDimitry Andric #endif 28230b57cec5SDimitry Andric 2824349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2825349cc55cSDimitry Andric hot_team->t.b->update_num_threads(new_nth); 2826349cc55cSDimitry Andric __kmp_add_threads_to_team(hot_team, new_nth); 2827349cc55cSDimitry Andric } 2828349cc55cSDimitry Andric 28290b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 28300b57cec5SDimitry Andric 28310b57cec5SDimitry Andric // Update the t_nproc field in the threads that are still active. 28320b57cec5SDimitry Andric for (f = 0; f < new_nth; f++) { 28330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 28340b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 28350b57cec5SDimitry Andric } 28360b57cec5SDimitry Andric // Special flag in case omp_set_num_threads() call 28370b57cec5SDimitry Andric hot_team->t.t_size_changed = -1; 28380b57cec5SDimitry Andric } 28390b57cec5SDimitry Andric } 28400b57cec5SDimitry Andric 28410b57cec5SDimitry Andric /* Changes max_active_levels */ 28420b57cec5SDimitry Andric void __kmp_set_max_active_levels(int gtid, int max_active_levels) { 28430b57cec5SDimitry Andric kmp_info_t *thread; 28440b57cec5SDimitry Andric 28450b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread " 28460b57cec5SDimitry Andric "%d = (%d)\n", 28470b57cec5SDimitry Andric gtid, max_active_levels)); 28480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28490b57cec5SDimitry Andric 28500b57cec5SDimitry Andric // validate max_active_levels 28510b57cec5SDimitry Andric if (max_active_levels < 0) { 28520b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsNegative, max_active_levels); 28530b57cec5SDimitry Andric // We ignore this call if the user has specified a negative value. 28540b57cec5SDimitry Andric // The current setting won't be changed. The last valid setting will be 28550b57cec5SDimitry Andric // used. A warning will be issued (if warnings are allowed as controlled by 28560b57cec5SDimitry Andric // the KMP_WARNINGS env var). 28570b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new " 28580b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 28590b57cec5SDimitry Andric gtid, max_active_levels)); 28600b57cec5SDimitry Andric return; 28610b57cec5SDimitry Andric } 28620b57cec5SDimitry Andric if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) { 28630b57cec5SDimitry Andric // it's OK, the max_active_levels is within the valid range: [ 0; 28640b57cec5SDimitry Andric // KMP_MAX_ACTIVE_LEVELS_LIMIT ] 28650b57cec5SDimitry Andric // We allow a zero value. (implementation defined behavior) 28660b57cec5SDimitry Andric } else { 28670b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels, 28680b57cec5SDimitry Andric KMP_MAX_ACTIVE_LEVELS_LIMIT); 28690b57cec5SDimitry Andric max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 28700b57cec5SDimitry Andric // Current upper limit is MAX_INT. (implementation defined behavior) 28710b57cec5SDimitry Andric // If the input exceeds the upper limit, we correct the input to be the 28720b57cec5SDimitry Andric // upper limit. (implementation defined behavior) 28730b57cec5SDimitry Andric // Actually, the flow should never get here until we use MAX_INT limit. 28740b57cec5SDimitry Andric } 28750b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new " 28760b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 28770b57cec5SDimitry Andric gtid, max_active_levels)); 28780b57cec5SDimitry Andric 28790b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28800b57cec5SDimitry Andric 28810b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 28820b57cec5SDimitry Andric 28830b57cec5SDimitry Andric set__max_active_levels(thread, max_active_levels); 28840b57cec5SDimitry Andric } 28850b57cec5SDimitry Andric 28860b57cec5SDimitry Andric /* Gets max_active_levels */ 28870b57cec5SDimitry Andric int __kmp_get_max_active_levels(int gtid) { 28880b57cec5SDimitry Andric kmp_info_t *thread; 28890b57cec5SDimitry Andric 28900b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid)); 28910b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28920b57cec5SDimitry Andric 28930b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28940b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_current_task); 28950b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, " 28960b57cec5SDimitry Andric "curtask_maxaclevel=%d\n", 28970b57cec5SDimitry Andric gtid, thread->th.th_current_task, 28980b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.max_active_levels)); 28990b57cec5SDimitry Andric return thread->th.th_current_task->td_icvs.max_active_levels; 29000b57cec5SDimitry Andric } 29010b57cec5SDimitry Andric 2902fe6060f1SDimitry Andric // nteams-var per-device ICV 2903fe6060f1SDimitry Andric void __kmp_set_num_teams(int num_teams) { 2904fe6060f1SDimitry Andric if (num_teams > 0) 2905fe6060f1SDimitry Andric __kmp_nteams = num_teams; 2906fe6060f1SDimitry Andric } 2907fe6060f1SDimitry Andric int __kmp_get_max_teams(void) { return __kmp_nteams; } 2908fe6060f1SDimitry Andric // teams-thread-limit-var per-device ICV 2909fe6060f1SDimitry Andric void __kmp_set_teams_thread_limit(int limit) { 2910fe6060f1SDimitry Andric if (limit > 0) 2911fe6060f1SDimitry Andric __kmp_teams_thread_limit = limit; 2912fe6060f1SDimitry Andric } 2913fe6060f1SDimitry Andric int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; } 2914fe6060f1SDimitry Andric 29150b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int)); 29160b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int)); 29170b57cec5SDimitry Andric 29180b57cec5SDimitry Andric /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 29190b57cec5SDimitry Andric void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { 29200b57cec5SDimitry Andric kmp_info_t *thread; 29210b57cec5SDimitry Andric kmp_sched_t orig_kind; 29220b57cec5SDimitry Andric // kmp_team_t *team; 29230b57cec5SDimitry Andric 29240b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", 29250b57cec5SDimitry Andric gtid, (int)kind, chunk)); 29260b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29270b57cec5SDimitry Andric 29280b57cec5SDimitry Andric // Check if the kind parameter is valid, correct if needed. 29290b57cec5SDimitry Andric // Valid parameters should fit in one of two intervals - standard or extended: 29300b57cec5SDimitry Andric // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 29310b57cec5SDimitry Andric // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 29320b57cec5SDimitry Andric orig_kind = kind; 29330b57cec5SDimitry Andric kind = __kmp_sched_without_mods(kind); 29340b57cec5SDimitry Andric 29350b57cec5SDimitry Andric if (kind <= kmp_sched_lower || kind >= kmp_sched_upper || 29360b57cec5SDimitry Andric (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) { 29370b57cec5SDimitry Andric // TODO: Hint needs attention in case we change the default schedule. 29380b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind), 29390b57cec5SDimitry Andric KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"), 29400b57cec5SDimitry Andric __kmp_msg_null); 29410b57cec5SDimitry Andric kind = kmp_sched_default; 29420b57cec5SDimitry Andric chunk = 0; // ignore chunk value in case of bad kind 29430b57cec5SDimitry Andric } 29440b57cec5SDimitry Andric 29450b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 29460b57cec5SDimitry Andric 29470b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 29480b57cec5SDimitry Andric 29490b57cec5SDimitry Andric if (kind < kmp_sched_upper_std) { 29500b57cec5SDimitry Andric if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) { 29510b57cec5SDimitry Andric // differ static chunked vs. unchunked: chunk should be invalid to 29520b57cec5SDimitry Andric // indicate unchunked schedule (which is the default) 29530b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 29540b57cec5SDimitry Andric } else { 29550b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 29560b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower - 1]; 29570b57cec5SDimitry Andric } 29580b57cec5SDimitry Andric } else { 29590b57cec5SDimitry Andric // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - 29600b57cec5SDimitry Andric // kmp_sched_lower - 2 ]; 29610b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 29620b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std - 29630b57cec5SDimitry Andric kmp_sched_lower - 2]; 29640b57cec5SDimitry Andric } 29650b57cec5SDimitry Andric __kmp_sched_apply_mods_intkind( 29660b57cec5SDimitry Andric orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type)); 29670b57cec5SDimitry Andric if (kind == kmp_sched_auto || chunk < 1) { 29680b57cec5SDimitry Andric // ignore parameter chunk for schedule auto 29690b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 29700b57cec5SDimitry Andric } else { 29710b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = chunk; 29720b57cec5SDimitry Andric } 29730b57cec5SDimitry Andric } 29740b57cec5SDimitry Andric 29750b57cec5SDimitry Andric /* Gets def_sched_var ICV values */ 29760b57cec5SDimitry Andric void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { 29770b57cec5SDimitry Andric kmp_info_t *thread; 29780b57cec5SDimitry Andric enum sched_type th_type; 29790b57cec5SDimitry Andric 29800b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid)); 29810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29820b57cec5SDimitry Andric 29830b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 29840b57cec5SDimitry Andric 29850b57cec5SDimitry Andric th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 29860b57cec5SDimitry Andric switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) { 29870b57cec5SDimitry Andric case kmp_sch_static: 29880b57cec5SDimitry Andric case kmp_sch_static_greedy: 29890b57cec5SDimitry Andric case kmp_sch_static_balanced: 29900b57cec5SDimitry Andric *kind = kmp_sched_static; 29910b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 29920b57cec5SDimitry Andric *chunk = 0; // chunk was not set, try to show this fact via zero value 29930b57cec5SDimitry Andric return; 29940b57cec5SDimitry Andric case kmp_sch_static_chunked: 29950b57cec5SDimitry Andric *kind = kmp_sched_static; 29960b57cec5SDimitry Andric break; 29970b57cec5SDimitry Andric case kmp_sch_dynamic_chunked: 29980b57cec5SDimitry Andric *kind = kmp_sched_dynamic; 29990b57cec5SDimitry Andric break; 30000b57cec5SDimitry Andric case kmp_sch_guided_chunked: 30010b57cec5SDimitry Andric case kmp_sch_guided_iterative_chunked: 30020b57cec5SDimitry Andric case kmp_sch_guided_analytical_chunked: 30030b57cec5SDimitry Andric *kind = kmp_sched_guided; 30040b57cec5SDimitry Andric break; 30050b57cec5SDimitry Andric case kmp_sch_auto: 30060b57cec5SDimitry Andric *kind = kmp_sched_auto; 30070b57cec5SDimitry Andric break; 30080b57cec5SDimitry Andric case kmp_sch_trapezoidal: 30090b57cec5SDimitry Andric *kind = kmp_sched_trapezoidal; 30100b57cec5SDimitry Andric break; 30110b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 30120b57cec5SDimitry Andric case kmp_sch_static_steal: 30130b57cec5SDimitry Andric *kind = kmp_sched_static_steal; 30140b57cec5SDimitry Andric break; 30150b57cec5SDimitry Andric #endif 30160b57cec5SDimitry Andric default: 30170b57cec5SDimitry Andric KMP_FATAL(UnknownSchedulingType, th_type); 30180b57cec5SDimitry Andric } 30190b57cec5SDimitry Andric 30200b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 30210b57cec5SDimitry Andric *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 30220b57cec5SDimitry Andric } 30230b57cec5SDimitry Andric 30240b57cec5SDimitry Andric int __kmp_get_ancestor_thread_num(int gtid, int level) { 30250b57cec5SDimitry Andric 30260b57cec5SDimitry Andric int ii, dd; 30270b57cec5SDimitry Andric kmp_team_t *team; 30280b57cec5SDimitry Andric kmp_info_t *thr; 30290b57cec5SDimitry Andric 30300b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level)); 30310b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 30320b57cec5SDimitry Andric 30330b57cec5SDimitry Andric // validate level 30340b57cec5SDimitry Andric if (level == 0) 30350b57cec5SDimitry Andric return 0; 30360b57cec5SDimitry Andric if (level < 0) 30370b57cec5SDimitry Andric return -1; 30380b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 30390b57cec5SDimitry Andric team = thr->th.th_team; 30400b57cec5SDimitry Andric ii = team->t.t_level; 30410b57cec5SDimitry Andric if (level > ii) 30420b57cec5SDimitry Andric return -1; 30430b57cec5SDimitry Andric 30440b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 30450b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 30460b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 30470b57cec5SDimitry Andric if (level <= 30480b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 30490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 30500b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 30510b57cec5SDimitry Andric // increase ii 30520b57cec5SDimitry Andric if (ii == tlevel) { 30530b57cec5SDimitry Andric ii += 2; // three teams have same level 30540b57cec5SDimitry Andric } else { 30550b57cec5SDimitry Andric ii++; // two teams have same level 30560b57cec5SDimitry Andric } 30570b57cec5SDimitry Andric } 30580b57cec5SDimitry Andric } 30590b57cec5SDimitry Andric 30600b57cec5SDimitry Andric if (ii == level) 30610b57cec5SDimitry Andric return __kmp_tid_from_gtid(gtid); 30620b57cec5SDimitry Andric 30630b57cec5SDimitry Andric dd = team->t.t_serialized; 30640b57cec5SDimitry Andric level++; 30650b57cec5SDimitry Andric while (ii > level) { 30660b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 30670b57cec5SDimitry Andric } 30680b57cec5SDimitry Andric if ((team->t.t_serialized) && (!dd)) { 30690b57cec5SDimitry Andric team = team->t.t_parent; 30700b57cec5SDimitry Andric continue; 30710b57cec5SDimitry Andric } 30720b57cec5SDimitry Andric if (ii > level) { 30730b57cec5SDimitry Andric team = team->t.t_parent; 30740b57cec5SDimitry Andric dd = team->t.t_serialized; 30750b57cec5SDimitry Andric ii--; 30760b57cec5SDimitry Andric } 30770b57cec5SDimitry Andric } 30780b57cec5SDimitry Andric 30790b57cec5SDimitry Andric return (dd > 1) ? (0) : (team->t.t_master_tid); 30800b57cec5SDimitry Andric } 30810b57cec5SDimitry Andric 30820b57cec5SDimitry Andric int __kmp_get_team_size(int gtid, int level) { 30830b57cec5SDimitry Andric 30840b57cec5SDimitry Andric int ii, dd; 30850b57cec5SDimitry Andric kmp_team_t *team; 30860b57cec5SDimitry Andric kmp_info_t *thr; 30870b57cec5SDimitry Andric 30880b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level)); 30890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 30900b57cec5SDimitry Andric 30910b57cec5SDimitry Andric // validate level 30920b57cec5SDimitry Andric if (level == 0) 30930b57cec5SDimitry Andric return 1; 30940b57cec5SDimitry Andric if (level < 0) 30950b57cec5SDimitry Andric return -1; 30960b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 30970b57cec5SDimitry Andric team = thr->th.th_team; 30980b57cec5SDimitry Andric ii = team->t.t_level; 30990b57cec5SDimitry Andric if (level > ii) 31000b57cec5SDimitry Andric return -1; 31010b57cec5SDimitry Andric 31020b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 31030b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 31040b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 31050b57cec5SDimitry Andric if (level <= 31060b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 31070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 31080b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 31090b57cec5SDimitry Andric // increase ii 31100b57cec5SDimitry Andric if (ii == tlevel) { 31110b57cec5SDimitry Andric ii += 2; // three teams have same level 31120b57cec5SDimitry Andric } else { 31130b57cec5SDimitry Andric ii++; // two teams have same level 31140b57cec5SDimitry Andric } 31150b57cec5SDimitry Andric } 31160b57cec5SDimitry Andric } 31170b57cec5SDimitry Andric 31180b57cec5SDimitry Andric while (ii > level) { 31190b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 31200b57cec5SDimitry Andric } 31210b57cec5SDimitry Andric if (team->t.t_serialized && (!dd)) { 31220b57cec5SDimitry Andric team = team->t.t_parent; 31230b57cec5SDimitry Andric continue; 31240b57cec5SDimitry Andric } 31250b57cec5SDimitry Andric if (ii > level) { 31260b57cec5SDimitry Andric team = team->t.t_parent; 31270b57cec5SDimitry Andric ii--; 31280b57cec5SDimitry Andric } 31290b57cec5SDimitry Andric } 31300b57cec5SDimitry Andric 31310b57cec5SDimitry Andric return team->t.t_nproc; 31320b57cec5SDimitry Andric } 31330b57cec5SDimitry Andric 31340b57cec5SDimitry Andric kmp_r_sched_t __kmp_get_schedule_global() { 31350b57cec5SDimitry Andric // This routine created because pairs (__kmp_sched, __kmp_chunk) and 31360b57cec5SDimitry Andric // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults 31370b57cec5SDimitry Andric // independently. So one can get the updated schedule here. 31380b57cec5SDimitry Andric 31390b57cec5SDimitry Andric kmp_r_sched_t r_sched; 31400b57cec5SDimitry Andric 31410b57cec5SDimitry Andric // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, 31420b57cec5SDimitry Andric // __kmp_guided. __kmp_sched should keep original value, so that user can set 31430b57cec5SDimitry Andric // KMP_SCHEDULE multiple times, and thus have different run-time schedules in 31440b57cec5SDimitry Andric // different roots (even in OMP 2.5) 31450b57cec5SDimitry Andric enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched); 31460b57cec5SDimitry Andric enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched); 31470b57cec5SDimitry Andric if (s == kmp_sch_static) { 31480b57cec5SDimitry Andric // replace STATIC with more detailed schedule (balanced or greedy) 31490b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_static; 31500b57cec5SDimitry Andric } else if (s == kmp_sch_guided_chunked) { 31510b57cec5SDimitry Andric // replace GUIDED with more detailed schedule (iterative or analytical) 31520b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_guided; 31530b57cec5SDimitry Andric } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 31540b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_sched; 31550b57cec5SDimitry Andric } 31560b57cec5SDimitry Andric SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers); 31570b57cec5SDimitry Andric 31580b57cec5SDimitry Andric if (__kmp_chunk < KMP_DEFAULT_CHUNK) { 31590b57cec5SDimitry Andric // __kmp_chunk may be wrong here (if it was not ever set) 31600b57cec5SDimitry Andric r_sched.chunk = KMP_DEFAULT_CHUNK; 31610b57cec5SDimitry Andric } else { 31620b57cec5SDimitry Andric r_sched.chunk = __kmp_chunk; 31630b57cec5SDimitry Andric } 31640b57cec5SDimitry Andric 31650b57cec5SDimitry Andric return r_sched; 31660b57cec5SDimitry Andric } 31670b57cec5SDimitry Andric 31680b57cec5SDimitry Andric /* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 31690b57cec5SDimitry Andric at least argc number of *t_argv entries for the requested team. */ 31700b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { 31710b57cec5SDimitry Andric 31720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 31730b57cec5SDimitry Andric if (!realloc || argc > team->t.t_max_argc) { 31740b57cec5SDimitry Andric 31750b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, " 31760b57cec5SDimitry Andric "current entries=%d\n", 31770b57cec5SDimitry Andric team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0)); 31780b57cec5SDimitry Andric /* if previously allocated heap space for args, free them */ 31790b57cec5SDimitry Andric if (realloc && team->t.t_argv != &team->t.t_inline_argv[0]) 31800b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 31810b57cec5SDimitry Andric 31820b57cec5SDimitry Andric if (argc <= KMP_INLINE_ARGV_ENTRIES) { 31830b57cec5SDimitry Andric /* use unused space in the cache line for arguments */ 31840b57cec5SDimitry Andric team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 31850b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d " 31860b57cec5SDimitry Andric "argv entries\n", 31870b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 31880b57cec5SDimitry Andric team->t.t_argv = &team->t.t_inline_argv[0]; 31890b57cec5SDimitry Andric if (__kmp_storage_map) { 31900b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 31910b57cec5SDimitry Andric -1, &team->t.t_inline_argv[0], 31920b57cec5SDimitry Andric &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 31930b57cec5SDimitry Andric (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv", 31940b57cec5SDimitry Andric team->t.t_id); 31950b57cec5SDimitry Andric } 31960b57cec5SDimitry Andric } else { 31970b57cec5SDimitry Andric /* allocate space for arguments in the heap */ 31980b57cec5SDimitry Andric team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1)) 31990b57cec5SDimitry Andric ? KMP_MIN_MALLOC_ARGV_ENTRIES 32000b57cec5SDimitry Andric : 2 * argc; 32010b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 32020b57cec5SDimitry Andric "argv entries\n", 32030b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 32040b57cec5SDimitry Andric team->t.t_argv = 32050b57cec5SDimitry Andric (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc); 32060b57cec5SDimitry Andric if (__kmp_storage_map) { 32070b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0], 32080b57cec5SDimitry Andric &team->t.t_argv[team->t.t_max_argc], 32090b57cec5SDimitry Andric sizeof(void *) * team->t.t_max_argc, 32100b57cec5SDimitry Andric "team_%d.t_argv", team->t.t_id); 32110b57cec5SDimitry Andric } 32120b57cec5SDimitry Andric } 32130b57cec5SDimitry Andric } 32140b57cec5SDimitry Andric } 32150b57cec5SDimitry Andric 32160b57cec5SDimitry Andric static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { 32170b57cec5SDimitry Andric int i; 32180b57cec5SDimitry Andric int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; 32190b57cec5SDimitry Andric team->t.t_threads = 32200b57cec5SDimitry Andric (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth); 32210b57cec5SDimitry Andric team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate( 32220b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff); 32230b57cec5SDimitry Andric team->t.t_dispatch = 32240b57cec5SDimitry Andric (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth); 32250b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 32260b57cec5SDimitry Andric (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth); 32270b57cec5SDimitry Andric team->t.t_max_nproc = max_nth; 32280b57cec5SDimitry Andric 32290b57cec5SDimitry Andric /* setup dispatch buffers */ 32300b57cec5SDimitry Andric for (i = 0; i < num_disp_buff; ++i) { 32310b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 32320b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 32330b57cec5SDimitry Andric } 32340b57cec5SDimitry Andric } 32350b57cec5SDimitry Andric 32360b57cec5SDimitry Andric static void __kmp_free_team_arrays(kmp_team_t *team) { 32370b57cec5SDimitry Andric /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 32380b57cec5SDimitry Andric int i; 32390b57cec5SDimitry Andric for (i = 0; i < team->t.t_max_nproc; ++i) { 32400b57cec5SDimitry Andric if (team->t.t_dispatch[i].th_disp_buffer != NULL) { 32410b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch[i].th_disp_buffer); 32420b57cec5SDimitry Andric team->t.t_dispatch[i].th_disp_buffer = NULL; 32430b57cec5SDimitry Andric } 32440b57cec5SDimitry Andric } 32450b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 32460b57cec5SDimitry Andric __kmp_dispatch_free_hierarchies(team); 32470b57cec5SDimitry Andric #endif 32480b57cec5SDimitry Andric __kmp_free(team->t.t_threads); 32490b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 32500b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 32510b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 32520b57cec5SDimitry Andric team->t.t_threads = NULL; 32530b57cec5SDimitry Andric team->t.t_disp_buffer = NULL; 32540b57cec5SDimitry Andric team->t.t_dispatch = NULL; 32550b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 0; 32560b57cec5SDimitry Andric } 32570b57cec5SDimitry Andric 32580b57cec5SDimitry Andric static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 32590b57cec5SDimitry Andric kmp_info_t **oldThreads = team->t.t_threads; 32600b57cec5SDimitry Andric 32610b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 32620b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 32630b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 32640b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nth); 32650b57cec5SDimitry Andric 32660b57cec5SDimitry Andric KMP_MEMCPY(team->t.t_threads, oldThreads, 32670b57cec5SDimitry Andric team->t.t_nproc * sizeof(kmp_info_t *)); 32680b57cec5SDimitry Andric 32690b57cec5SDimitry Andric __kmp_free(oldThreads); 32700b57cec5SDimitry Andric } 32710b57cec5SDimitry Andric 32720b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_global_icvs(void) { 32730b57cec5SDimitry Andric 32740b57cec5SDimitry Andric kmp_r_sched_t r_sched = 32750b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 32760b57cec5SDimitry Andric 32770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0); 32780b57cec5SDimitry Andric 32790b57cec5SDimitry Andric kmp_internal_control_t g_icvs = { 32800b57cec5SDimitry Andric 0, // int serial_nesting_level; //corresponds to value of th_team_serialized 32810b57cec5SDimitry Andric (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic 32820b57cec5SDimitry Andric // adjustment of threads (per thread) 32830b57cec5SDimitry Andric (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for 32840b57cec5SDimitry Andric // whether blocktime is explicitly set 32850b57cec5SDimitry Andric __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime 32860b57cec5SDimitry Andric #if KMP_USE_MONITOR 32870b57cec5SDimitry Andric __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime 32880b57cec5SDimitry Andric // intervals 32890b57cec5SDimitry Andric #endif 32900b57cec5SDimitry Andric __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for 32910b57cec5SDimitry Andric // next parallel region (per thread) 32920b57cec5SDimitry Andric // (use a max ub on value if __kmp_parallel_initialize not called yet) 32930b57cec5SDimitry Andric __kmp_cg_max_nth, // int thread_limit; 32940b57cec5SDimitry Andric __kmp_dflt_max_active_levels, // int max_active_levels; //internal control 32950b57cec5SDimitry Andric // for max_active_levels 32960b57cec5SDimitry Andric r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule 32970b57cec5SDimitry Andric // {sched,chunk} pair 32980b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], 32990b57cec5SDimitry Andric __kmp_default_device, 33000b57cec5SDimitry Andric NULL // struct kmp_internal_control *next; 33010b57cec5SDimitry Andric }; 33020b57cec5SDimitry Andric 33030b57cec5SDimitry Andric return g_icvs; 33040b57cec5SDimitry Andric } 33050b57cec5SDimitry Andric 33060b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { 33070b57cec5SDimitry Andric 33080b57cec5SDimitry Andric kmp_internal_control_t gx_icvs; 33090b57cec5SDimitry Andric gx_icvs.serial_nesting_level = 33100b57cec5SDimitry Andric 0; // probably =team->t.t_serial like in save_inter_controls 33110b57cec5SDimitry Andric copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs); 33120b57cec5SDimitry Andric gx_icvs.next = NULL; 33130b57cec5SDimitry Andric 33140b57cec5SDimitry Andric return gx_icvs; 33150b57cec5SDimitry Andric } 33160b57cec5SDimitry Andric 33170b57cec5SDimitry Andric static void __kmp_initialize_root(kmp_root_t *root) { 33180b57cec5SDimitry Andric int f; 33190b57cec5SDimitry Andric kmp_team_t *root_team; 33200b57cec5SDimitry Andric kmp_team_t *hot_team; 33210b57cec5SDimitry Andric int hot_team_max_nth; 33220b57cec5SDimitry Andric kmp_r_sched_t r_sched = 33230b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 33240b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 33250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 33260b57cec5SDimitry Andric KMP_ASSERT(!root->r.r_begin); 33270b57cec5SDimitry Andric 33280b57cec5SDimitry Andric /* setup the root state structure */ 33290b57cec5SDimitry Andric __kmp_init_lock(&root->r.r_begin_lock); 33300b57cec5SDimitry Andric root->r.r_begin = FALSE; 33310b57cec5SDimitry Andric root->r.r_active = FALSE; 33320b57cec5SDimitry Andric root->r.r_in_parallel = 0; 33330b57cec5SDimitry Andric root->r.r_blocktime = __kmp_dflt_blocktime; 3334fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED 3335fe6060f1SDimitry Andric root->r.r_affinity_assigned = FALSE; 3336fe6060f1SDimitry Andric #endif 33370b57cec5SDimitry Andric 33380b57cec5SDimitry Andric /* setup the root team for this task */ 33390b57cec5SDimitry Andric /* allocate the root team structure */ 33400b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); 33410b57cec5SDimitry Andric 33420b57cec5SDimitry Andric root_team = 33430b57cec5SDimitry Andric __kmp_allocate_team(root, 33440b57cec5SDimitry Andric 1, // new_nproc 33450b57cec5SDimitry Andric 1, // max_nproc 33460b57cec5SDimitry Andric #if OMPT_SUPPORT 33470b57cec5SDimitry Andric ompt_data_none, // root parallel id 33480b57cec5SDimitry Andric #endif 33490b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 33500b57cec5SDimitry Andric 0 // argc 3351fe6060f1SDimitry Andric USE_NESTED_HOT_ARG(NULL) // primary thread is unknown 33520b57cec5SDimitry Andric ); 33530b57cec5SDimitry Andric #if USE_DEBUGGER 33540b57cec5SDimitry Andric // Non-NULL value should be assigned to make the debugger display the root 33550b57cec5SDimitry Andric // team. 33560b57cec5SDimitry Andric TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0)); 33570b57cec5SDimitry Andric #endif 33580b57cec5SDimitry Andric 33590b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team)); 33600b57cec5SDimitry Andric 33610b57cec5SDimitry Andric root->r.r_root_team = root_team; 33620b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 33630b57cec5SDimitry Andric 33640b57cec5SDimitry Andric /* initialize root team */ 33650b57cec5SDimitry Andric root_team->t.t_threads[0] = NULL; 33660b57cec5SDimitry Andric root_team->t.t_nproc = 1; 33670b57cec5SDimitry Andric root_team->t.t_serialized = 1; 33680b57cec5SDimitry Andric // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 33690b57cec5SDimitry Andric root_team->t.t_sched.sched = r_sched.sched; 33700b57cec5SDimitry Andric KA_TRACE( 33710b57cec5SDimitry Andric 20, 33720b57cec5SDimitry Andric ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 33730b57cec5SDimitry Andric root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 33740b57cec5SDimitry Andric 33750b57cec5SDimitry Andric /* setup the hot team for this task */ 33760b57cec5SDimitry Andric /* allocate the hot team structure */ 33770b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); 33780b57cec5SDimitry Andric 33790b57cec5SDimitry Andric hot_team = 33800b57cec5SDimitry Andric __kmp_allocate_team(root, 33810b57cec5SDimitry Andric 1, // new_nproc 33820b57cec5SDimitry Andric __kmp_dflt_team_nth_ub * 2, // max_nproc 33830b57cec5SDimitry Andric #if OMPT_SUPPORT 33840b57cec5SDimitry Andric ompt_data_none, // root parallel id 33850b57cec5SDimitry Andric #endif 33860b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 33870b57cec5SDimitry Andric 0 // argc 3388fe6060f1SDimitry Andric USE_NESTED_HOT_ARG(NULL) // primary thread is unknown 33890b57cec5SDimitry Andric ); 33900b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); 33910b57cec5SDimitry Andric 33920b57cec5SDimitry Andric root->r.r_hot_team = hot_team; 33930b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 33940b57cec5SDimitry Andric 33950b57cec5SDimitry Andric /* first-time initialization */ 33960b57cec5SDimitry Andric hot_team->t.t_parent = root_team; 33970b57cec5SDimitry Andric 33980b57cec5SDimitry Andric /* initialize hot team */ 33990b57cec5SDimitry Andric hot_team_max_nth = hot_team->t.t_max_nproc; 34000b57cec5SDimitry Andric for (f = 0; f < hot_team_max_nth; ++f) { 34010b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 34020b57cec5SDimitry Andric } 34030b57cec5SDimitry Andric hot_team->t.t_nproc = 1; 34040b57cec5SDimitry Andric // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 34050b57cec5SDimitry Andric hot_team->t.t_sched.sched = r_sched.sched; 34060b57cec5SDimitry Andric hot_team->t.t_size_changed = 0; 34070b57cec5SDimitry Andric } 34080b57cec5SDimitry Andric 34090b57cec5SDimitry Andric #ifdef KMP_DEBUG 34100b57cec5SDimitry Andric 34110b57cec5SDimitry Andric typedef struct kmp_team_list_item { 34120b57cec5SDimitry Andric kmp_team_p const *entry; 34130b57cec5SDimitry Andric struct kmp_team_list_item *next; 34140b57cec5SDimitry Andric } kmp_team_list_item_t; 34150b57cec5SDimitry Andric typedef kmp_team_list_item_t *kmp_team_list_t; 34160b57cec5SDimitry Andric 34170b57cec5SDimitry Andric static void __kmp_print_structure_team_accum( // Add team to list of teams. 34180b57cec5SDimitry Andric kmp_team_list_t list, // List of teams. 34190b57cec5SDimitry Andric kmp_team_p const *team // Team to add. 34200b57cec5SDimitry Andric ) { 34210b57cec5SDimitry Andric 34220b57cec5SDimitry Andric // List must terminate with item where both entry and next are NULL. 34230b57cec5SDimitry Andric // Team is added to the list only once. 34240b57cec5SDimitry Andric // List is sorted in ascending order by team id. 34250b57cec5SDimitry Andric // Team id is *not* a key. 34260b57cec5SDimitry Andric 34270b57cec5SDimitry Andric kmp_team_list_t l; 34280b57cec5SDimitry Andric 34290b57cec5SDimitry Andric KMP_DEBUG_ASSERT(list != NULL); 34300b57cec5SDimitry Andric if (team == NULL) { 34310b57cec5SDimitry Andric return; 34320b57cec5SDimitry Andric } 34330b57cec5SDimitry Andric 34340b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_parent); 34350b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_next_pool); 34360b57cec5SDimitry Andric 34370b57cec5SDimitry Andric // Search list for the team. 34380b57cec5SDimitry Andric l = list; 34390b57cec5SDimitry Andric while (l->next != NULL && l->entry != team) { 34400b57cec5SDimitry Andric l = l->next; 34410b57cec5SDimitry Andric } 34420b57cec5SDimitry Andric if (l->next != NULL) { 34430b57cec5SDimitry Andric return; // Team has been added before, exit. 34440b57cec5SDimitry Andric } 34450b57cec5SDimitry Andric 34460b57cec5SDimitry Andric // Team is not found. Search list again for insertion point. 34470b57cec5SDimitry Andric l = list; 34480b57cec5SDimitry Andric while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) { 34490b57cec5SDimitry Andric l = l->next; 34500b57cec5SDimitry Andric } 34510b57cec5SDimitry Andric 34520b57cec5SDimitry Andric // Insert team. 34530b57cec5SDimitry Andric { 34540b57cec5SDimitry Andric kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( 34550b57cec5SDimitry Andric sizeof(kmp_team_list_item_t)); 34560b57cec5SDimitry Andric *item = *l; 34570b57cec5SDimitry Andric l->entry = team; 34580b57cec5SDimitry Andric l->next = item; 34590b57cec5SDimitry Andric } 34600b57cec5SDimitry Andric } 34610b57cec5SDimitry Andric 34620b57cec5SDimitry Andric static void __kmp_print_structure_team(char const *title, kmp_team_p const *team 34630b57cec5SDimitry Andric 34640b57cec5SDimitry Andric ) { 34650b57cec5SDimitry Andric __kmp_printf("%s", title); 34660b57cec5SDimitry Andric if (team != NULL) { 34670b57cec5SDimitry Andric __kmp_printf("%2x %p\n", team->t.t_id, team); 34680b57cec5SDimitry Andric } else { 34690b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 34700b57cec5SDimitry Andric } 34710b57cec5SDimitry Andric } 34720b57cec5SDimitry Andric 34730b57cec5SDimitry Andric static void __kmp_print_structure_thread(char const *title, 34740b57cec5SDimitry Andric kmp_info_p const *thread) { 34750b57cec5SDimitry Andric __kmp_printf("%s", title); 34760b57cec5SDimitry Andric if (thread != NULL) { 34770b57cec5SDimitry Andric __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread); 34780b57cec5SDimitry Andric } else { 34790b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 34800b57cec5SDimitry Andric } 34810b57cec5SDimitry Andric } 34820b57cec5SDimitry Andric 34830b57cec5SDimitry Andric void __kmp_print_structure(void) { 34840b57cec5SDimitry Andric 34850b57cec5SDimitry Andric kmp_team_list_t list; 34860b57cec5SDimitry Andric 34870b57cec5SDimitry Andric // Initialize list of teams. 34880b57cec5SDimitry Andric list = 34890b57cec5SDimitry Andric (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t)); 34900b57cec5SDimitry Andric list->entry = NULL; 34910b57cec5SDimitry Andric list->next = NULL; 34920b57cec5SDimitry Andric 34930b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nGlobal Thread " 34940b57cec5SDimitry Andric "Table\n------------------------------\n"); 34950b57cec5SDimitry Andric { 34960b57cec5SDimitry Andric int gtid; 34970b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34980b57cec5SDimitry Andric __kmp_printf("%2d", gtid); 34990b57cec5SDimitry Andric if (__kmp_threads != NULL) { 35000b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_threads[gtid]); 35010b57cec5SDimitry Andric } 35020b57cec5SDimitry Andric if (__kmp_root != NULL) { 35030b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_root[gtid]); 35040b57cec5SDimitry Andric } 35050b57cec5SDimitry Andric __kmp_printf("\n"); 35060b57cec5SDimitry Andric } 35070b57cec5SDimitry Andric } 35080b57cec5SDimitry Andric 35090b57cec5SDimitry Andric // Print out __kmp_threads array. 35100b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nThreads\n--------------------" 35110b57cec5SDimitry Andric "----------\n"); 35120b57cec5SDimitry Andric if (__kmp_threads != NULL) { 35130b57cec5SDimitry Andric int gtid; 35140b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 35150b57cec5SDimitry Andric kmp_info_t const *thread = __kmp_threads[gtid]; 35160b57cec5SDimitry Andric if (thread != NULL) { 35170b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, thread); 35180b57cec5SDimitry Andric __kmp_printf(" Our Root: %p\n", thread->th.th_root); 35190b57cec5SDimitry Andric __kmp_print_structure_team(" Our Team: ", thread->th.th_team); 35200b57cec5SDimitry Andric __kmp_print_structure_team(" Serial Team: ", 35210b57cec5SDimitry Andric thread->th.th_serial_team); 35220b57cec5SDimitry Andric __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc); 3523fe6060f1SDimitry Andric __kmp_print_structure_thread(" Primary: ", 35240b57cec5SDimitry Andric thread->th.th_team_master); 35250b57cec5SDimitry Andric __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized); 35260b57cec5SDimitry Andric __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc); 35270b57cec5SDimitry Andric __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind); 35280b57cec5SDimitry Andric __kmp_print_structure_thread(" Next in pool: ", 35290b57cec5SDimitry Andric thread->th.th_next_pool); 35300b57cec5SDimitry Andric __kmp_printf("\n"); 35310b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_team); 35320b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_serial_team); 35330b57cec5SDimitry Andric } 35340b57cec5SDimitry Andric } 35350b57cec5SDimitry Andric } else { 35360b57cec5SDimitry Andric __kmp_printf("Threads array is not allocated.\n"); 35370b57cec5SDimitry Andric } 35380b57cec5SDimitry Andric 35390b57cec5SDimitry Andric // Print out __kmp_root array. 35400b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nUbers\n----------------------" 35410b57cec5SDimitry Andric "--------\n"); 35420b57cec5SDimitry Andric if (__kmp_root != NULL) { 35430b57cec5SDimitry Andric int gtid; 35440b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 35450b57cec5SDimitry Andric kmp_root_t const *root = __kmp_root[gtid]; 35460b57cec5SDimitry Andric if (root != NULL) { 35470b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, root); 35480b57cec5SDimitry Andric __kmp_print_structure_team(" Root Team: ", root->r.r_root_team); 35490b57cec5SDimitry Andric __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team); 35500b57cec5SDimitry Andric __kmp_print_structure_thread(" Uber Thread: ", 35510b57cec5SDimitry Andric root->r.r_uber_thread); 35520b57cec5SDimitry Andric __kmp_printf(" Active?: %2d\n", root->r.r_active); 35530b57cec5SDimitry Andric __kmp_printf(" In Parallel: %2d\n", 35540b57cec5SDimitry Andric KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)); 35550b57cec5SDimitry Andric __kmp_printf("\n"); 35560b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_root_team); 35570b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_hot_team); 35580b57cec5SDimitry Andric } 35590b57cec5SDimitry Andric } 35600b57cec5SDimitry Andric } else { 35610b57cec5SDimitry Andric __kmp_printf("Ubers array is not allocated.\n"); 35620b57cec5SDimitry Andric } 35630b57cec5SDimitry Andric 35640b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nTeams\n----------------------" 35650b57cec5SDimitry Andric "--------\n"); 35660b57cec5SDimitry Andric while (list->next != NULL) { 35670b57cec5SDimitry Andric kmp_team_p const *team = list->entry; 35680b57cec5SDimitry Andric int i; 35690b57cec5SDimitry Andric __kmp_printf("Team %2x %p:\n", team->t.t_id, team); 35700b57cec5SDimitry Andric __kmp_print_structure_team(" Parent Team: ", team->t.t_parent); 3571fe6060f1SDimitry Andric __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid); 35720b57cec5SDimitry Andric __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc); 35730b57cec5SDimitry Andric __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized); 35740b57cec5SDimitry Andric __kmp_printf(" Number threads: %2d\n", team->t.t_nproc); 35750b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; ++i) { 35760b57cec5SDimitry Andric __kmp_printf(" Thread %2d: ", i); 35770b57cec5SDimitry Andric __kmp_print_structure_thread("", team->t.t_threads[i]); 35780b57cec5SDimitry Andric } 35790b57cec5SDimitry Andric __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool); 35800b57cec5SDimitry Andric __kmp_printf("\n"); 35810b57cec5SDimitry Andric list = list->next; 35820b57cec5SDimitry Andric } 35830b57cec5SDimitry Andric 35840b57cec5SDimitry Andric // Print out __kmp_thread_pool and __kmp_team_pool. 35850b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nPools\n----------------------" 35860b57cec5SDimitry Andric "--------\n"); 35870b57cec5SDimitry Andric __kmp_print_structure_thread("Thread pool: ", 35880b57cec5SDimitry Andric CCAST(kmp_info_t *, __kmp_thread_pool)); 35890b57cec5SDimitry Andric __kmp_print_structure_team("Team pool: ", 35900b57cec5SDimitry Andric CCAST(kmp_team_t *, __kmp_team_pool)); 35910b57cec5SDimitry Andric __kmp_printf("\n"); 35920b57cec5SDimitry Andric 35930b57cec5SDimitry Andric // Free team list. 35940b57cec5SDimitry Andric while (list != NULL) { 35950b57cec5SDimitry Andric kmp_team_list_item_t *item = list; 35960b57cec5SDimitry Andric list = list->next; 35970b57cec5SDimitry Andric KMP_INTERNAL_FREE(item); 35980b57cec5SDimitry Andric } 35990b57cec5SDimitry Andric } 36000b57cec5SDimitry Andric 36010b57cec5SDimitry Andric #endif 36020b57cec5SDimitry Andric 36030b57cec5SDimitry Andric //--------------------------------------------------------------------------- 36040b57cec5SDimitry Andric // Stuff for per-thread fast random number generator 36050b57cec5SDimitry Andric // Table of primes 36060b57cec5SDimitry Andric static const unsigned __kmp_primes[] = { 36070b57cec5SDimitry Andric 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877, 36080b57cec5SDimitry Andric 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 36090b57cec5SDimitry Andric 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201, 36100b57cec5SDimitry Andric 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 36110b57cec5SDimitry Andric 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7, 36120b57cec5SDimitry Andric 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 36130b57cec5SDimitry Andric 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45, 36140b57cec5SDimitry Andric 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 36150b57cec5SDimitry Andric 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363, 36160b57cec5SDimitry Andric 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 36170b57cec5SDimitry Andric 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f}; 36180b57cec5SDimitry Andric 36190b57cec5SDimitry Andric //--------------------------------------------------------------------------- 36200b57cec5SDimitry Andric // __kmp_get_random: Get a random number using a linear congruential method. 36210b57cec5SDimitry Andric unsigned short __kmp_get_random(kmp_info_t *thread) { 36220b57cec5SDimitry Andric unsigned x = thread->th.th_x; 3623e8d8bef9SDimitry Andric unsigned short r = (unsigned short)(x >> 16); 36240b57cec5SDimitry Andric 36250b57cec5SDimitry Andric thread->th.th_x = x * thread->th.th_a + 1; 36260b57cec5SDimitry Andric 36270b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 36280b57cec5SDimitry Andric thread->th.th_info.ds.ds_tid, r)); 36290b57cec5SDimitry Andric 36300b57cec5SDimitry Andric return r; 36310b57cec5SDimitry Andric } 36320b57cec5SDimitry Andric //-------------------------------------------------------- 36330b57cec5SDimitry Andric // __kmp_init_random: Initialize a random number generator 36340b57cec5SDimitry Andric void __kmp_init_random(kmp_info_t *thread) { 36350b57cec5SDimitry Andric unsigned seed = thread->th.th_info.ds.ds_tid; 36360b57cec5SDimitry Andric 36370b57cec5SDimitry Andric thread->th.th_a = 36380b57cec5SDimitry Andric __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))]; 36390b57cec5SDimitry Andric thread->th.th_x = (seed + 1) * thread->th.th_a + 1; 36400b57cec5SDimitry Andric KA_TRACE(30, 36410b57cec5SDimitry Andric ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a)); 36420b57cec5SDimitry Andric } 36430b57cec5SDimitry Andric 36440b57cec5SDimitry Andric #if KMP_OS_WINDOWS 36450b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead, returns number 36460b57cec5SDimitry Andric * reclaimed */ 36470b57cec5SDimitry Andric static int __kmp_reclaim_dead_roots(void) { 36480b57cec5SDimitry Andric int i, r = 0; 36490b57cec5SDimitry Andric 36500b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 36510b57cec5SDimitry Andric if (KMP_UBER_GTID(i) && 36520b57cec5SDimitry Andric !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 36530b57cec5SDimitry Andric !__kmp_root[i] 36540b57cec5SDimitry Andric ->r.r_active) { // AC: reclaim only roots died in non-active state 36550b57cec5SDimitry Andric r += __kmp_unregister_root_other_thread(i); 36560b57cec5SDimitry Andric } 36570b57cec5SDimitry Andric } 36580b57cec5SDimitry Andric return r; 36590b57cec5SDimitry Andric } 36600b57cec5SDimitry Andric #endif 36610b57cec5SDimitry Andric 36620b57cec5SDimitry Andric /* This function attempts to create free entries in __kmp_threads and 36630b57cec5SDimitry Andric __kmp_root, and returns the number of free entries generated. 36640b57cec5SDimitry Andric 36650b57cec5SDimitry Andric For Windows* OS static library, the first mechanism used is to reclaim array 36660b57cec5SDimitry Andric entries for root threads that are already dead. 36670b57cec5SDimitry Andric 36680b57cec5SDimitry Andric On all platforms, expansion is attempted on the arrays __kmp_threads_ and 36690b57cec5SDimitry Andric __kmp_root, with appropriate update to __kmp_threads_capacity. Array 36700b57cec5SDimitry Andric capacity is increased by doubling with clipping to __kmp_tp_capacity, if 36710b57cec5SDimitry Andric threadprivate cache array has been created. Synchronization with 36720b57cec5SDimitry Andric __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 36730b57cec5SDimitry Andric 36740b57cec5SDimitry Andric After any dead root reclamation, if the clipping value allows array expansion 36750b57cec5SDimitry Andric to result in the generation of a total of nNeed free slots, the function does 36760b57cec5SDimitry Andric that expansion. If not, nothing is done beyond the possible initial root 36770b57cec5SDimitry Andric thread reclamation. 36780b57cec5SDimitry Andric 36790b57cec5SDimitry Andric If any argument is negative, the behavior is undefined. */ 36800b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed) { 36810b57cec5SDimitry Andric int added = 0; 36820b57cec5SDimitry Andric int minimumRequiredCapacity; 36830b57cec5SDimitry Andric int newCapacity; 36840b57cec5SDimitry Andric kmp_info_t **newThreads; 36850b57cec5SDimitry Andric kmp_root_t **newRoot; 36860b57cec5SDimitry Andric 36870b57cec5SDimitry Andric // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so 36880b57cec5SDimitry Andric // resizing __kmp_threads does not need additional protection if foreign 36890b57cec5SDimitry Andric // threads are present 36900b57cec5SDimitry Andric 36910b57cec5SDimitry Andric #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB 36920b57cec5SDimitry Andric /* only for Windows static library */ 36930b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead */ 36940b57cec5SDimitry Andric added = __kmp_reclaim_dead_roots(); 36950b57cec5SDimitry Andric 36960b57cec5SDimitry Andric if (nNeed) { 36970b57cec5SDimitry Andric nNeed -= added; 36980b57cec5SDimitry Andric if (nNeed < 0) 36990b57cec5SDimitry Andric nNeed = 0; 37000b57cec5SDimitry Andric } 37010b57cec5SDimitry Andric #endif 37020b57cec5SDimitry Andric if (nNeed <= 0) 37030b57cec5SDimitry Andric return added; 37040b57cec5SDimitry Andric 37050b57cec5SDimitry Andric // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If 37060b57cec5SDimitry Andric // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the 37070b57cec5SDimitry Andric // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become 37080b57cec5SDimitry Andric // > __kmp_max_nth in one of two ways: 37090b57cec5SDimitry Andric // 37100b57cec5SDimitry Andric // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] 37115ffd83dbSDimitry Andric // may not be reused by another thread, so we may need to increase 37120b57cec5SDimitry Andric // __kmp_threads_capacity to __kmp_max_nth + 1. 37130b57cec5SDimitry Andric // 37140b57cec5SDimitry Andric // 2) New foreign root(s) are encountered. We always register new foreign 37150b57cec5SDimitry Andric // roots. This may cause a smaller # of threads to be allocated at 37160b57cec5SDimitry Andric // subsequent parallel regions, but the worker threads hang around (and 37170b57cec5SDimitry Andric // eventually go to sleep) and need slots in the __kmp_threads[] array. 37180b57cec5SDimitry Andric // 37190b57cec5SDimitry Andric // Anyway, that is the reason for moving the check to see if 37200b57cec5SDimitry Andric // __kmp_max_nth was exceeded into __kmp_reserve_threads() 37210b57cec5SDimitry Andric // instead of having it performed here. -BB 37220b57cec5SDimitry Andric 37230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity); 37240b57cec5SDimitry Andric 37250b57cec5SDimitry Andric /* compute expansion headroom to check if we can expand */ 37260b57cec5SDimitry Andric if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) { 37270b57cec5SDimitry Andric /* possible expansion too small -- give up */ 37280b57cec5SDimitry Andric return added; 37290b57cec5SDimitry Andric } 37300b57cec5SDimitry Andric minimumRequiredCapacity = __kmp_threads_capacity + nNeed; 37310b57cec5SDimitry Andric 37320b57cec5SDimitry Andric newCapacity = __kmp_threads_capacity; 37330b57cec5SDimitry Andric do { 37340b57cec5SDimitry Andric newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1) 37350b57cec5SDimitry Andric : __kmp_sys_max_nth; 37360b57cec5SDimitry Andric } while (newCapacity < minimumRequiredCapacity); 37370b57cec5SDimitry Andric newThreads = (kmp_info_t **)__kmp_allocate( 37380b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE); 37390b57cec5SDimitry Andric newRoot = 37400b57cec5SDimitry Andric (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity); 37410b57cec5SDimitry Andric KMP_MEMCPY(newThreads, __kmp_threads, 37420b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_info_t *)); 37430b57cec5SDimitry Andric KMP_MEMCPY(newRoot, __kmp_root, 37440b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_root_t *)); 374581ad6265SDimitry Andric // Put old __kmp_threads array on a list. Any ongoing references to the old 374681ad6265SDimitry Andric // list will be valid. This list is cleaned up at library shutdown. 374781ad6265SDimitry Andric kmp_old_threads_list_t *node = 374881ad6265SDimitry Andric (kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t)); 374981ad6265SDimitry Andric node->threads = __kmp_threads; 375081ad6265SDimitry Andric node->next = __kmp_old_threads_list; 375181ad6265SDimitry Andric __kmp_old_threads_list = node; 37520b57cec5SDimitry Andric 37530b57cec5SDimitry Andric *(kmp_info_t * *volatile *)&__kmp_threads = newThreads; 37540b57cec5SDimitry Andric *(kmp_root_t * *volatile *)&__kmp_root = newRoot; 37550b57cec5SDimitry Andric added += newCapacity - __kmp_threads_capacity; 37560b57cec5SDimitry Andric *(volatile int *)&__kmp_threads_capacity = newCapacity; 37570b57cec5SDimitry Andric 37580b57cec5SDimitry Andric if (newCapacity > __kmp_tp_capacity) { 37590b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 37600b57cec5SDimitry Andric if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 37610b57cec5SDimitry Andric __kmp_threadprivate_resize_cache(newCapacity); 37620b57cec5SDimitry Andric } else { // increase __kmp_tp_capacity to correspond with kmp_threads size 37630b57cec5SDimitry Andric *(volatile int *)&__kmp_tp_capacity = newCapacity; 37640b57cec5SDimitry Andric } 37650b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 37660b57cec5SDimitry Andric } 37670b57cec5SDimitry Andric 37680b57cec5SDimitry Andric return added; 37690b57cec5SDimitry Andric } 37700b57cec5SDimitry Andric 37710b57cec5SDimitry Andric /* Register the current thread as a root thread and obtain our gtid. We must 37720b57cec5SDimitry Andric have the __kmp_initz_lock held at this point. Argument TRUE only if are the 37730b57cec5SDimitry Andric thread that calls from __kmp_do_serial_initialize() */ 37740b57cec5SDimitry Andric int __kmp_register_root(int initial_thread) { 37750b57cec5SDimitry Andric kmp_info_t *root_thread; 37760b57cec5SDimitry Andric kmp_root_t *root; 37770b57cec5SDimitry Andric int gtid; 37780b57cec5SDimitry Andric int capacity; 37790b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 37800b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: entered\n")); 37810b57cec5SDimitry Andric KMP_MB(); 37820b57cec5SDimitry Andric 37830b57cec5SDimitry Andric /* 2007-03-02: 37840b57cec5SDimitry Andric If initial thread did not invoke OpenMP RTL yet, and this thread is not an 37850b57cec5SDimitry Andric initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not 37860b57cec5SDimitry Andric work as expected -- it may return false (that means there is at least one 37870b57cec5SDimitry Andric empty slot in __kmp_threads array), but it is possible the only free slot 37880b57cec5SDimitry Andric is #0, which is reserved for initial thread and so cannot be used for this 37890b57cec5SDimitry Andric one. Following code workarounds this bug. 37900b57cec5SDimitry Andric 37910b57cec5SDimitry Andric However, right solution seems to be not reserving slot #0 for initial 37920b57cec5SDimitry Andric thread because: 37930b57cec5SDimitry Andric (1) there is no magic in slot #0, 37940b57cec5SDimitry Andric (2) we cannot detect initial thread reliably (the first thread which does 37950b57cec5SDimitry Andric serial initialization may be not a real initial thread). 37960b57cec5SDimitry Andric */ 37970b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 37980b57cec5SDimitry Andric if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 37990b57cec5SDimitry Andric --capacity; 38000b57cec5SDimitry Andric } 38010b57cec5SDimitry Andric 3802d409305fSDimitry Andric // If it is not for initializing the hidden helper team, we need to take 3803d409305fSDimitry Andric // __kmp_hidden_helper_threads_num out of the capacity because it is included 3804d409305fSDimitry Andric // in __kmp_threads_capacity. 3805d409305fSDimitry Andric if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { 3806d409305fSDimitry Andric capacity -= __kmp_hidden_helper_threads_num; 3807d409305fSDimitry Andric } 3808d409305fSDimitry Andric 38090b57cec5SDimitry Andric /* see if there are too many threads */ 38100b57cec5SDimitry Andric if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { 38110b57cec5SDimitry Andric if (__kmp_tp_cached) { 38120b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), 38130b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 38140b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 38150b57cec5SDimitry Andric } else { 38160b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads), 38170b57cec5SDimitry Andric __kmp_msg_null); 38180b57cec5SDimitry Andric } 38190b57cec5SDimitry Andric } 38200b57cec5SDimitry Andric 3821e8d8bef9SDimitry Andric // When hidden helper task is enabled, __kmp_threads is organized as follows: 3822e8d8bef9SDimitry Andric // 0: initial thread, also a regular OpenMP thread. 3823e8d8bef9SDimitry Andric // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads. 3824e8d8bef9SDimitry Andric // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for 3825e8d8bef9SDimitry Andric // regular OpenMP threads. 3826e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper_threads)) { 3827e8d8bef9SDimitry Andric // Find an available thread slot for hidden helper thread. Slots for hidden 3828e8d8bef9SDimitry Andric // helper threads start from 1 to __kmp_hidden_helper_threads_num. 3829e8d8bef9SDimitry Andric for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL && 3830e8d8bef9SDimitry Andric gtid <= __kmp_hidden_helper_threads_num; 38310b57cec5SDimitry Andric gtid++) 38320b57cec5SDimitry Andric ; 3833e8d8bef9SDimitry Andric KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num); 3834e8d8bef9SDimitry Andric KA_TRACE(1, ("__kmp_register_root: found slot in threads array for " 3835e8d8bef9SDimitry Andric "hidden helper thread: T#%d\n", 3836e8d8bef9SDimitry Andric gtid)); 3837e8d8bef9SDimitry Andric } else { 3838e8d8bef9SDimitry Andric /* find an available thread slot */ 3839e8d8bef9SDimitry Andric // Don't reassign the zero slot since we need that to only be used by 3840e8d8bef9SDimitry Andric // initial thread. Slots for hidden helper threads should also be skipped. 3841d409305fSDimitry Andric if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 3842e8d8bef9SDimitry Andric gtid = 0; 3843e8d8bef9SDimitry Andric } else { 3844e8d8bef9SDimitry Andric for (gtid = __kmp_hidden_helper_threads_num + 1; 3845e8d8bef9SDimitry Andric TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++) 3846e8d8bef9SDimitry Andric ; 3847e8d8bef9SDimitry Andric } 3848e8d8bef9SDimitry Andric KA_TRACE( 3849e8d8bef9SDimitry Andric 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); 38500b57cec5SDimitry Andric KMP_ASSERT(gtid < __kmp_threads_capacity); 3851e8d8bef9SDimitry Andric } 38520b57cec5SDimitry Andric 38530b57cec5SDimitry Andric /* update global accounting */ 38540b57cec5SDimitry Andric __kmp_all_nth++; 38550b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 38560b57cec5SDimitry Andric 38570b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 38580b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 38590b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 38600b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 38610b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 38620b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 38630b57cec5SDimitry Andric } 38640b57cec5SDimitry Andric } else { 38650b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 38660b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 38670b57cec5SDimitry Andric } 38680b57cec5SDimitry Andric } 38690b57cec5SDimitry Andric } 38700b57cec5SDimitry Andric 38710b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 38720b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary */ 38730b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 38740b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 38750b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 38760b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 38770b57cec5SDimitry Andric } 38780b57cec5SDimitry Andric } 38790b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 38800b57cec5SDimitry Andric 38810b57cec5SDimitry Andric /* setup this new hierarchy */ 38820b57cec5SDimitry Andric if (!(root = __kmp_root[gtid])) { 38830b57cec5SDimitry Andric root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t)); 38840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_root_team); 38850b57cec5SDimitry Andric } 38860b57cec5SDimitry Andric 38870b57cec5SDimitry Andric #if KMP_STATS_ENABLED 38880b57cec5SDimitry Andric // Initialize stats as soon as possible (right after gtid assignment). 38890b57cec5SDimitry Andric __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); 38900b57cec5SDimitry Andric __kmp_stats_thread_ptr->startLife(); 38910b57cec5SDimitry Andric KMP_SET_THREAD_STATE(SERIAL_REGION); 38920b57cec5SDimitry Andric KMP_INIT_PARTITIONED_TIMERS(OMP_serial); 38930b57cec5SDimitry Andric #endif 38940b57cec5SDimitry Andric __kmp_initialize_root(root); 38950b57cec5SDimitry Andric 38960b57cec5SDimitry Andric /* setup new root thread structure */ 38970b57cec5SDimitry Andric if (root->r.r_uber_thread) { 38980b57cec5SDimitry Andric root_thread = root->r.r_uber_thread; 38990b57cec5SDimitry Andric } else { 39000b57cec5SDimitry Andric root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 39010b57cec5SDimitry Andric if (__kmp_storage_map) { 39020b57cec5SDimitry Andric __kmp_print_thread_storage_map(root_thread, gtid); 39030b57cec5SDimitry Andric } 39040b57cec5SDimitry Andric root_thread->th.th_info.ds.ds_gtid = gtid; 39050b57cec5SDimitry Andric #if OMPT_SUPPORT 39060b57cec5SDimitry Andric root_thread->th.ompt_thread_info.thread_data = ompt_data_none; 39070b57cec5SDimitry Andric #endif 39080b57cec5SDimitry Andric root_thread->th.th_root = root; 39090b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 39100b57cec5SDimitry Andric root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); 39110b57cec5SDimitry Andric } 39120b57cec5SDimitry Andric #if USE_FAST_MEMORY 39130b57cec5SDimitry Andric __kmp_initialize_fast_memory(root_thread); 39140b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 39150b57cec5SDimitry Andric 39160b57cec5SDimitry Andric #if KMP_USE_BGET 39170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL); 39180b57cec5SDimitry Andric __kmp_initialize_bget(root_thread); 39190b57cec5SDimitry Andric #endif 39200b57cec5SDimitry Andric __kmp_init_random(root_thread); // Initialize random number generator 39210b57cec5SDimitry Andric } 39220b57cec5SDimitry Andric 39230b57cec5SDimitry Andric /* setup the serial team held in reserve by the root thread */ 39240b57cec5SDimitry Andric if (!root_thread->th.th_serial_team) { 39250b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 39260b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); 39270b57cec5SDimitry Andric root_thread->th.th_serial_team = __kmp_allocate_team( 39280b57cec5SDimitry Andric root, 1, 1, 39290b57cec5SDimitry Andric #if OMPT_SUPPORT 39300b57cec5SDimitry Andric ompt_data_none, // root parallel id 39310b57cec5SDimitry Andric #endif 39320b57cec5SDimitry Andric proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); 39330b57cec5SDimitry Andric } 39340b57cec5SDimitry Andric KMP_ASSERT(root_thread->th.th_serial_team); 39350b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", 39360b57cec5SDimitry Andric root_thread->th.th_serial_team)); 39370b57cec5SDimitry Andric 39380b57cec5SDimitry Andric /* drop root_thread into place */ 39390b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 39400b57cec5SDimitry Andric 39410b57cec5SDimitry Andric root->r.r_root_team->t.t_threads[0] = root_thread; 39420b57cec5SDimitry Andric root->r.r_hot_team->t.t_threads[0] = root_thread; 39430b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 39440b57cec5SDimitry Andric // AC: the team created in reserve, not for execution (it is unused for now). 39450b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_serialized = 0; 39460b57cec5SDimitry Andric root->r.r_uber_thread = root_thread; 39470b57cec5SDimitry Andric 39480b57cec5SDimitry Andric /* initialize the thread, get it ready to go */ 39490b57cec5SDimitry Andric __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid); 39500b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, TRUE); 39510b57cec5SDimitry Andric 3952fe6060f1SDimitry Andric /* prepare the primary thread for get_gtid() */ 39530b57cec5SDimitry Andric __kmp_gtid_set_specific(gtid); 39540b57cec5SDimitry Andric 39550b57cec5SDimitry Andric #if USE_ITT_BUILD 39560b57cec5SDimitry Andric __kmp_itt_thread_name(gtid); 39570b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 39580b57cec5SDimitry Andric 39590b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 39600b57cec5SDimitry Andric __kmp_gtid = gtid; 39610b57cec5SDimitry Andric #endif 39620b57cec5SDimitry Andric __kmp_create_worker(gtid, root_thread, __kmp_stksize); 39630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid); 39640b57cec5SDimitry Andric 39650b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 39660b57cec5SDimitry Andric "plain=%u\n", 39670b57cec5SDimitry Andric gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team), 39680b57cec5SDimitry Andric root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 39690b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE)); 39700b57cec5SDimitry Andric { // Initialize barrier data. 39710b57cec5SDimitry Andric int b; 39720b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 39730b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE; 39740b57cec5SDimitry Andric #if USE_DEBUGGER 39750b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_worker_arrived = 0; 39760b57cec5SDimitry Andric #endif 39770b57cec5SDimitry Andric } 39780b57cec5SDimitry Andric } 39790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 39800b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE); 39810b57cec5SDimitry Andric 39820b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 39830b57cec5SDimitry Andric root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; 39840b57cec5SDimitry Andric root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; 39850b57cec5SDimitry Andric root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; 39860b57cec5SDimitry Andric root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; 39870b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 39880b57cec5SDimitry Andric root_thread->th.th_def_allocator = __kmp_def_allocator; 39890b57cec5SDimitry Andric root_thread->th.th_prev_level = 0; 39900b57cec5SDimitry Andric root_thread->th.th_prev_num_threads = 1; 39910b57cec5SDimitry Andric 39920b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 39930b57cec5SDimitry Andric tmp->cg_root = root_thread; 39940b57cec5SDimitry Andric tmp->cg_thread_limit = __kmp_cg_max_nth; 39950b57cec5SDimitry Andric tmp->cg_nthreads = 1; 39960b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with" 39970b57cec5SDimitry Andric " cg_nthreads init to 1\n", 39980b57cec5SDimitry Andric root_thread, tmp)); 39990b57cec5SDimitry Andric tmp->up = NULL; 40000b57cec5SDimitry Andric root_thread->th.th_cg_roots = tmp; 40010b57cec5SDimitry Andric 40020b57cec5SDimitry Andric __kmp_root_counter++; 40030b57cec5SDimitry Andric 40040b57cec5SDimitry Andric #if OMPT_SUPPORT 40050b57cec5SDimitry Andric if (!initial_thread && ompt_enabled.enabled) { 40060b57cec5SDimitry Andric 40070b57cec5SDimitry Andric kmp_info_t *root_thread = ompt_get_thread(); 40080b57cec5SDimitry Andric 40090b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_overhead); 40100b57cec5SDimitry Andric 40110b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 40120b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 40130b57cec5SDimitry Andric ompt_thread_initial, __ompt_get_thread_data_internal()); 40140b57cec5SDimitry Andric } 40150b57cec5SDimitry Andric ompt_data_t *task_data; 40160b57cec5SDimitry Andric ompt_data_t *parallel_data; 4017fe6060f1SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, 4018fe6060f1SDimitry Andric NULL); 40190b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 40200b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 40210b57cec5SDimitry Andric ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial); 40220b57cec5SDimitry Andric } 40230b57cec5SDimitry Andric 40240b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_work_serial); 40250b57cec5SDimitry Andric } 40260b57cec5SDimitry Andric #endif 4027fe6060f1SDimitry Andric #if OMPD_SUPPORT 4028fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 4029fe6060f1SDimitry Andric ompd_bp_thread_begin(); 4030fe6060f1SDimitry Andric #endif 40310b57cec5SDimitry Andric 40320b57cec5SDimitry Andric KMP_MB(); 40330b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 40340b57cec5SDimitry Andric 40350b57cec5SDimitry Andric return gtid; 40360b57cec5SDimitry Andric } 40370b57cec5SDimitry Andric 40380b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 40390b57cec5SDimitry Andric static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, 40400b57cec5SDimitry Andric const int max_level) { 40410b57cec5SDimitry Andric int i, n, nth; 40420b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 40430b57cec5SDimitry Andric if (!hot_teams || !hot_teams[level].hot_team) { 40440b57cec5SDimitry Andric return 0; 40450b57cec5SDimitry Andric } 40460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(level < max_level); 40470b57cec5SDimitry Andric kmp_team_t *team = hot_teams[level].hot_team; 40480b57cec5SDimitry Andric nth = hot_teams[level].hot_team_nth; 4049fe6060f1SDimitry Andric n = nth - 1; // primary thread is not freed 40500b57cec5SDimitry Andric if (level < max_level - 1) { 40510b57cec5SDimitry Andric for (i = 0; i < nth; ++i) { 40520b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[i]; 40530b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, level + 1, max_level); 40540b57cec5SDimitry Andric if (i > 0 && th->th.th_hot_teams) { 40550b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 40560b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 40570b57cec5SDimitry Andric } 40580b57cec5SDimitry Andric } 40590b57cec5SDimitry Andric } 40600b57cec5SDimitry Andric __kmp_free_team(root, team, NULL); 40610b57cec5SDimitry Andric return n; 40620b57cec5SDimitry Andric } 40630b57cec5SDimitry Andric #endif 40640b57cec5SDimitry Andric 40650b57cec5SDimitry Andric // Resets a root thread and clear its root and hot teams. 40660b57cec5SDimitry Andric // Returns the number of __kmp_threads entries directly and indirectly freed. 40670b57cec5SDimitry Andric static int __kmp_reset_root(int gtid, kmp_root_t *root) { 40680b57cec5SDimitry Andric kmp_team_t *root_team = root->r.r_root_team; 40690b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 40700b57cec5SDimitry Andric int n = hot_team->t.t_nproc; 40710b57cec5SDimitry Andric int i; 40720b57cec5SDimitry Andric 40730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_active); 40740b57cec5SDimitry Andric 40750b57cec5SDimitry Andric root->r.r_root_team = NULL; 40760b57cec5SDimitry Andric root->r.r_hot_team = NULL; 40770b57cec5SDimitry Andric // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team 40780b57cec5SDimitry Andric // before call to __kmp_free_team(). 40790b57cec5SDimitry Andric __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); 40800b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 40810b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 40820b57cec5SDimitry Andric 0) { // need to free nested hot teams and their threads if any 40830b57cec5SDimitry Andric for (i = 0; i < hot_team->t.t_nproc; ++i) { 40840b57cec5SDimitry Andric kmp_info_t *th = hot_team->t.t_threads[i]; 40850b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 1) { 40860b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level); 40870b57cec5SDimitry Andric } 40880b57cec5SDimitry Andric if (th->th.th_hot_teams) { 40890b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 40900b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 40910b57cec5SDimitry Andric } 40920b57cec5SDimitry Andric } 40930b57cec5SDimitry Andric } 40940b57cec5SDimitry Andric #endif 40950b57cec5SDimitry Andric __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); 40960b57cec5SDimitry Andric 40970b57cec5SDimitry Andric // Before we can reap the thread, we need to make certain that all other 40980b57cec5SDimitry Andric // threads in the teams that had this root as ancestor have stopped trying to 40990b57cec5SDimitry Andric // steal tasks. 41000b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 41010b57cec5SDimitry Andric __kmp_wait_to_unref_task_teams(); 41020b57cec5SDimitry Andric } 41030b57cec5SDimitry Andric 41040b57cec5SDimitry Andric #if KMP_OS_WINDOWS 41050b57cec5SDimitry Andric /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 41060b57cec5SDimitry Andric KA_TRACE( 41070b57cec5SDimitry Andric 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC 41080b57cec5SDimitry Andric "\n", 41090b57cec5SDimitry Andric (LPVOID) & (root->r.r_uber_thread->th), 41100b57cec5SDimitry Andric root->r.r_uber_thread->th.th_info.ds.ds_thread)); 41110b57cec5SDimitry Andric __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); 41120b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 41130b57cec5SDimitry Andric 4114fe6060f1SDimitry Andric #if OMPD_SUPPORT 4115fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 4116fe6060f1SDimitry Andric ompd_bp_thread_end(); 4117fe6060f1SDimitry Andric #endif 4118fe6060f1SDimitry Andric 41190b57cec5SDimitry Andric #if OMPT_SUPPORT 41200b57cec5SDimitry Andric ompt_data_t *task_data; 41210b57cec5SDimitry Andric ompt_data_t *parallel_data; 4122fe6060f1SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, 4123fe6060f1SDimitry Andric NULL); 41240b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 41250b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 41260b57cec5SDimitry Andric ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial); 41270b57cec5SDimitry Andric } 41280b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 41290b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)( 41300b57cec5SDimitry Andric &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); 41310b57cec5SDimitry Andric } 41320b57cec5SDimitry Andric #endif 41330b57cec5SDimitry Andric 41340b57cec5SDimitry Andric TCW_4(__kmp_nth, 41350b57cec5SDimitry Andric __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 41360b57cec5SDimitry Andric i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--; 41370b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p" 41380b57cec5SDimitry Andric " to %d\n", 41390b57cec5SDimitry Andric root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots, 41400b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_nthreads)); 41410b57cec5SDimitry Andric if (i == 1) { 41420b57cec5SDimitry Andric // need to free contention group structure 41430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread == 41440b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_root); 41450b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL); 41460b57cec5SDimitry Andric __kmp_free(root->r.r_uber_thread->th.th_cg_roots); 41470b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots = NULL; 41480b57cec5SDimitry Andric } 41490b57cec5SDimitry Andric __kmp_reap_thread(root->r.r_uber_thread, 1); 41500b57cec5SDimitry Andric 4151480093f4SDimitry Andric // We canot put root thread to __kmp_thread_pool, so we have to reap it 4152480093f4SDimitry Andric // instead of freeing. 41530b57cec5SDimitry Andric root->r.r_uber_thread = NULL; 41540b57cec5SDimitry Andric /* mark root as no longer in use */ 41550b57cec5SDimitry Andric root->r.r_begin = FALSE; 41560b57cec5SDimitry Andric 41570b57cec5SDimitry Andric return n; 41580b57cec5SDimitry Andric } 41590b57cec5SDimitry Andric 41600b57cec5SDimitry Andric void __kmp_unregister_root_current_thread(int gtid) { 41610b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid)); 41620b57cec5SDimitry Andric /* this lock should be ok, since unregister_root_current_thread is never 41630b57cec5SDimitry Andric called during an abort, only during a normal close. furthermore, if you 41640b57cec5SDimitry Andric have the forkjoin lock, you should never try to get the initz lock */ 41650b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 41660b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 41670b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, " 41680b57cec5SDimitry Andric "exiting T#%d\n", 41690b57cec5SDimitry Andric gtid)); 41700b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 41710b57cec5SDimitry Andric return; 41720b57cec5SDimitry Andric } 41730b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 41740b57cec5SDimitry Andric 41750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 41760b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 41770b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 41780b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 41790b57cec5SDimitry Andric 41800b57cec5SDimitry Andric KMP_MB(); 41810b57cec5SDimitry Andric 41820b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 41830b57cec5SDimitry Andric kmp_team_t *team = thread->th.th_team; 41840b57cec5SDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 41850b57cec5SDimitry Andric 41860b57cec5SDimitry Andric // we need to wait for the proxy tasks before finishing the thread 418704eeddc0SDimitry Andric if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 418804eeddc0SDimitry Andric task_team->tt.tt_hidden_helper_task_encountered)) { 41890b57cec5SDimitry Andric #if OMPT_SUPPORT 41900b57cec5SDimitry Andric // the runtime is shutting down so we won't report any events 41910b57cec5SDimitry Andric thread->th.ompt_thread_info.state = ompt_state_undefined; 41920b57cec5SDimitry Andric #endif 41930b57cec5SDimitry Andric __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); 41940b57cec5SDimitry Andric } 41950b57cec5SDimitry Andric 41960b57cec5SDimitry Andric __kmp_reset_root(gtid, root); 41970b57cec5SDimitry Andric 41980b57cec5SDimitry Andric KMP_MB(); 41990b57cec5SDimitry Andric KC_TRACE(10, 42000b57cec5SDimitry Andric ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid)); 42010b57cec5SDimitry Andric 42020b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 42030b57cec5SDimitry Andric } 42040b57cec5SDimitry Andric 42050b57cec5SDimitry Andric #if KMP_OS_WINDOWS 42060b57cec5SDimitry Andric /* __kmp_forkjoin_lock must be already held 42070b57cec5SDimitry Andric Unregisters a root thread that is not the current thread. Returns the number 42080b57cec5SDimitry Andric of __kmp_threads entries freed as a result. */ 42090b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid) { 42100b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 42110b57cec5SDimitry Andric int r; 42120b57cec5SDimitry Andric 42130b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid)); 42140b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 42150b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 42160b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 42170b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 42180b57cec5SDimitry Andric 42190b57cec5SDimitry Andric r = __kmp_reset_root(gtid, root); 42200b57cec5SDimitry Andric KC_TRACE(10, 42210b57cec5SDimitry Andric ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid)); 42220b57cec5SDimitry Andric return r; 42230b57cec5SDimitry Andric } 42240b57cec5SDimitry Andric #endif 42250b57cec5SDimitry Andric 42260b57cec5SDimitry Andric #if KMP_DEBUG 42270b57cec5SDimitry Andric void __kmp_task_info() { 42280b57cec5SDimitry Andric 42290b57cec5SDimitry Andric kmp_int32 gtid = __kmp_entry_gtid(); 42300b57cec5SDimitry Andric kmp_int32 tid = __kmp_tid_from_gtid(gtid); 42310b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 42320b57cec5SDimitry Andric kmp_team_t *steam = this_thr->th.th_serial_team; 42330b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 42340b57cec5SDimitry Andric 42350b57cec5SDimitry Andric __kmp_printf( 42360b57cec5SDimitry Andric "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " 42370b57cec5SDimitry Andric "ptask=%p\n", 42380b57cec5SDimitry Andric gtid, tid, this_thr, team, steam, this_thr->th.th_current_task, 42390b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[tid].td_parent); 42400b57cec5SDimitry Andric } 42410b57cec5SDimitry Andric #endif // KMP_DEBUG 42420b57cec5SDimitry Andric 42430b57cec5SDimitry Andric /* TODO optimize with one big memclr, take out what isn't needed, split 42440b57cec5SDimitry Andric responsibility to workers as much as possible, and delay initialization of 42450b57cec5SDimitry Andric features as much as possible */ 42460b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, 42470b57cec5SDimitry Andric int tid, int gtid) { 42480b57cec5SDimitry Andric /* this_thr->th.th_info.ds.ds_gtid is setup in 42490b57cec5SDimitry Andric kmp_allocate_thread/create_worker. 42500b57cec5SDimitry Andric this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 42510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr != NULL); 42520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_serial_team); 42530b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 42540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 42550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 4256fe6060f1SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 42570b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master); 42580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_root); 42590b57cec5SDimitry Andric 42600b57cec5SDimitry Andric KMP_MB(); 42610b57cec5SDimitry Andric 42620b57cec5SDimitry Andric TCW_SYNC_PTR(this_thr->th.th_team, team); 42630b57cec5SDimitry Andric 42640b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = tid; 42650b57cec5SDimitry Andric this_thr->th.th_set_nproc = 0; 42660b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) 42670b57cec5SDimitry Andric // When tasking is possible, threads are not safe to reap until they are 42680b57cec5SDimitry Andric // done tasking; this will be set when tasking code is exited in wait 42690b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 42700b57cec5SDimitry Andric else // no tasking --> always safe to reap 42710b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 42720b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 42730b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 42740b57cec5SDimitry Andric this_thr->th.th_new_place = this_thr->th.th_current_place; 42750b57cec5SDimitry Andric #endif 42760b57cec5SDimitry Andric this_thr->th.th_root = master->th.th_root; 42770b57cec5SDimitry Andric 42780b57cec5SDimitry Andric /* setup the thread's cache of the team structure */ 42790b57cec5SDimitry Andric this_thr->th.th_team_nproc = team->t.t_nproc; 42800b57cec5SDimitry Andric this_thr->th.th_team_master = master; 42810b57cec5SDimitry Andric this_thr->th.th_team_serialized = team->t.t_serialized; 42820b57cec5SDimitry Andric 42830b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); 42840b57cec5SDimitry Andric 42850b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 42860b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 42870b57cec5SDimitry Andric 42880b57cec5SDimitry Andric __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, 42890b57cec5SDimitry Andric team, tid, TRUE); 42900b57cec5SDimitry Andric 42910b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 42920b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 42930b57cec5SDimitry Andric // TODO: Initialize ICVs from parent; GEH - isn't that already done in 42940b57cec5SDimitry Andric // __kmp_initialize_team()? 42950b57cec5SDimitry Andric 42960b57cec5SDimitry Andric /* TODO no worksharing in speculative threads */ 42970b57cec5SDimitry Andric this_thr->th.th_dispatch = &team->t.t_dispatch[tid]; 42980b57cec5SDimitry Andric 42990b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 43000b57cec5SDimitry Andric 43010b57cec5SDimitry Andric if (!this_thr->th.th_pri_common) { 43020b57cec5SDimitry Andric this_thr->th.th_pri_common = 43030b57cec5SDimitry Andric (struct common_table *)__kmp_allocate(sizeof(struct common_table)); 43040b57cec5SDimitry Andric if (__kmp_storage_map) { 43050b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 43060b57cec5SDimitry Andric gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 43070b57cec5SDimitry Andric sizeof(struct common_table), "th_%d.th_pri_common\n", gtid); 43080b57cec5SDimitry Andric } 43090b57cec5SDimitry Andric this_thr->th.th_pri_head = NULL; 43100b57cec5SDimitry Andric } 43110b57cec5SDimitry Andric 4312fe6060f1SDimitry Andric if (this_thr != master && // Primary thread's CG root is initialized elsewhere 43130b57cec5SDimitry Andric this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set 4314fe6060f1SDimitry Andric // Make new thread's CG root same as primary thread's 43150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_cg_roots); 43160b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 43170b57cec5SDimitry Andric if (tmp) { 43180b57cec5SDimitry Andric // worker changes CG, need to check if old CG should be freed 43190b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 43200b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads" 43210b57cec5SDimitry Andric " on node %p of thread %p to %d\n", 43220b57cec5SDimitry Andric this_thr, tmp, tmp->cg_root, tmp->cg_nthreads)); 43230b57cec5SDimitry Andric if (i == 1) { 43240b57cec5SDimitry Andric __kmp_free(tmp); // last thread left CG --> free it 43250b57cec5SDimitry Andric } 43260b57cec5SDimitry Andric } 43270b57cec5SDimitry Andric this_thr->th.th_cg_roots = master->th.th_cg_roots; 43280b57cec5SDimitry Andric // Increment new thread's CG root's counter to add the new thread 43290b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads++; 43300b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on" 43310b57cec5SDimitry Andric " node %p of thread %p to %d\n", 43320b57cec5SDimitry Andric this_thr, this_thr->th.th_cg_roots, 43330b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_root, 43340b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads)); 43350b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.thread_limit = 43360b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_thread_limit; 43370b57cec5SDimitry Andric } 43380b57cec5SDimitry Andric 43390b57cec5SDimitry Andric /* Initialize dynamic dispatch */ 43400b57cec5SDimitry Andric { 43410b57cec5SDimitry Andric volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 43420b57cec5SDimitry Andric // Use team max_nproc since this will never change for the team. 43430b57cec5SDimitry Andric size_t disp_size = 43440b57cec5SDimitry Andric sizeof(dispatch_private_info_t) * 43450b57cec5SDimitry Andric (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers); 43460b57cec5SDimitry Andric KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, 43470b57cec5SDimitry Andric team->t.t_max_nproc)); 43480b57cec5SDimitry Andric KMP_ASSERT(dispatch); 43490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 43500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]); 43510b57cec5SDimitry Andric 43520b57cec5SDimitry Andric dispatch->th_disp_index = 0; 43530b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; 43540b57cec5SDimitry Andric if (!dispatch->th_disp_buffer) { 43550b57cec5SDimitry Andric dispatch->th_disp_buffer = 43560b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate(disp_size); 43570b57cec5SDimitry Andric 43580b57cec5SDimitry Andric if (__kmp_storage_map) { 43590b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 43600b57cec5SDimitry Andric gtid, &dispatch->th_disp_buffer[0], 43610b57cec5SDimitry Andric &dispatch->th_disp_buffer[team->t.t_max_nproc == 1 43620b57cec5SDimitry Andric ? 1 43630b57cec5SDimitry Andric : __kmp_dispatch_num_buffers], 4364fe6060f1SDimitry Andric disp_size, 4365fe6060f1SDimitry Andric "th_%d.th_dispatch.th_disp_buffer " 43660b57cec5SDimitry Andric "(team_%d.t_dispatch[%d].th_disp_buffer)", 43670b57cec5SDimitry Andric gtid, team->t.t_id, gtid); 43680b57cec5SDimitry Andric } 43690b57cec5SDimitry Andric } else { 43700b57cec5SDimitry Andric memset(&dispatch->th_disp_buffer[0], '\0', disp_size); 43710b57cec5SDimitry Andric } 43720b57cec5SDimitry Andric 43730b57cec5SDimitry Andric dispatch->th_dispatch_pr_current = 0; 43740b57cec5SDimitry Andric dispatch->th_dispatch_sh_current = 0; 43750b57cec5SDimitry Andric 43760b57cec5SDimitry Andric dispatch->th_deo_fcn = 0; /* ORDERED */ 43770b57cec5SDimitry Andric dispatch->th_dxo_fcn = 0; /* END ORDERED */ 43780b57cec5SDimitry Andric } 43790b57cec5SDimitry Andric 43800b57cec5SDimitry Andric this_thr->th.th_next_pool = NULL; 43810b57cec5SDimitry Andric 43820b57cec5SDimitry Andric if (!this_thr->th.th_task_state_memo_stack) { 43830b57cec5SDimitry Andric size_t i; 43840b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack = 43850b57cec5SDimitry Andric (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8)); 43860b57cec5SDimitry Andric this_thr->th.th_task_state_top = 0; 43870b57cec5SDimitry Andric this_thr->th.th_task_state_stack_sz = 4; 43880b57cec5SDimitry Andric for (i = 0; i < this_thr->th.th_task_state_stack_sz; 43890b57cec5SDimitry Andric ++i) // zero init the stack 43900b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack[i] = 0; 43910b57cec5SDimitry Andric } 43920b57cec5SDimitry Andric 43930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 43940b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 43950b57cec5SDimitry Andric 43960b57cec5SDimitry Andric KMP_MB(); 43970b57cec5SDimitry Andric } 43980b57cec5SDimitry Andric 43990b57cec5SDimitry Andric /* allocate a new thread for the requesting team. this is only called from 44000b57cec5SDimitry Andric within a forkjoin critical section. we will first try to get an available 44010b57cec5SDimitry Andric thread from the thread pool. if none is available, we will fork a new one 44020b57cec5SDimitry Andric assuming we are able to create a new one. this should be assured, as the 44030b57cec5SDimitry Andric caller should check on this first. */ 44040b57cec5SDimitry Andric kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, 44050b57cec5SDimitry Andric int new_tid) { 44060b57cec5SDimitry Andric kmp_team_t *serial_team; 44070b57cec5SDimitry Andric kmp_info_t *new_thr; 44080b57cec5SDimitry Andric int new_gtid; 44090b57cec5SDimitry Andric 44100b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); 44110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && team); 44120b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 44130b57cec5SDimitry Andric KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); 44140b57cec5SDimitry Andric #endif 44150b57cec5SDimitry Andric KMP_MB(); 44160b57cec5SDimitry Andric 44170b57cec5SDimitry Andric /* first, try to get one from the thread pool */ 44180b57cec5SDimitry Andric if (__kmp_thread_pool) { 44190b57cec5SDimitry Andric new_thr = CCAST(kmp_info_t *, __kmp_thread_pool); 44200b57cec5SDimitry Andric __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool; 44210b57cec5SDimitry Andric if (new_thr == __kmp_thread_pool_insert_pt) { 44220b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 44230b57cec5SDimitry Andric } 44240b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 44250b57cec5SDimitry Andric __kmp_suspend_initialize_thread(new_thr); 44260b57cec5SDimitry Andric __kmp_lock_suspend_mx(new_thr); 44270b57cec5SDimitry Andric if (new_thr->th.th_active_in_pool == TRUE) { 44280b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE); 44290b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 44300b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 44310b57cec5SDimitry Andric } 44320b57cec5SDimitry Andric __kmp_unlock_suspend_mx(new_thr); 44330b57cec5SDimitry Andric 44340b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 44350b57cec5SDimitry Andric __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid)); 44360b57cec5SDimitry Andric KMP_ASSERT(!new_thr->th.th_team); 44370b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity); 44380b57cec5SDimitry Andric 44390b57cec5SDimitry Andric /* setup the thread structure */ 44400b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, 44410b57cec5SDimitry Andric new_thr->th.th_info.ds.ds_gtid); 44420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); 44430b57cec5SDimitry Andric 44440b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 44450b57cec5SDimitry Andric 44460b57cec5SDimitry Andric new_thr->th.th_task_state = 0; 44470b57cec5SDimitry Andric new_thr->th.th_task_state_top = 0; 44480b57cec5SDimitry Andric new_thr->th.th_task_state_stack_sz = 4; 44490b57cec5SDimitry Andric 4450349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 4451349cc55cSDimitry Andric // Make sure pool thread has transitioned to waiting on own thread struct 4452349cc55cSDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0); 4453349cc55cSDimitry Andric // Thread activated in __kmp_allocate_team when increasing team size 4454349cc55cSDimitry Andric } 4455349cc55cSDimitry Andric 44560b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 44570b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 44580b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 44590b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 44600b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 44610b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 44620b57cec5SDimitry Andric } 44630b57cec5SDimitry Andric } 44640b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 44650b57cec5SDimitry Andric 44660b57cec5SDimitry Andric #if KMP_DEBUG 44670b57cec5SDimitry Andric // If thread entered pool via __kmp_free_thread, wait_flag should != 44680b57cec5SDimitry Andric // KMP_BARRIER_PARENT_FLAG. 44690b57cec5SDimitry Andric int b; 44700b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 44710b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) 44720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 44730b57cec5SDimitry Andric #endif 44740b57cec5SDimitry Andric 44750b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 44760b57cec5SDimitry Andric __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid)); 44770b57cec5SDimitry Andric 44780b57cec5SDimitry Andric KMP_MB(); 44790b57cec5SDimitry Andric return new_thr; 44800b57cec5SDimitry Andric } 44810b57cec5SDimitry Andric 44820b57cec5SDimitry Andric /* no, well fork a new one */ 44830b57cec5SDimitry Andric KMP_ASSERT(__kmp_nth == __kmp_all_nth); 44840b57cec5SDimitry Andric KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity); 44850b57cec5SDimitry Andric 44860b57cec5SDimitry Andric #if KMP_USE_MONITOR 44870b57cec5SDimitry Andric // If this is the first worker thread the RTL is creating, then also 44880b57cec5SDimitry Andric // launch the monitor thread. We try to do this as early as possible. 44890b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 44900b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 44910b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 44920b57cec5SDimitry Andric KF_TRACE(10, ("before __kmp_create_monitor\n")); 44930b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 1); 44940b57cec5SDimitry Andric __kmp_create_monitor(&__kmp_monitor); 44950b57cec5SDimitry Andric KF_TRACE(10, ("after __kmp_create_monitor\n")); 44960b57cec5SDimitry Andric #if KMP_OS_WINDOWS 44970b57cec5SDimitry Andric // AC: wait until monitor has started. This is a fix for CQ232808. 44980b57cec5SDimitry Andric // The reason is that if the library is loaded/unloaded in a loop with 44990b57cec5SDimitry Andric // small (parallel) work in between, then there is high probability that 45000b57cec5SDimitry Andric // monitor thread started after the library shutdown. At shutdown it is 4501fe6060f1SDimitry Andric // too late to cope with the problem, because when the primary thread is 4502fe6060f1SDimitry Andric // in DllMain (process detach) the monitor has no chances to start (it is 4503fe6060f1SDimitry Andric // blocked), and primary thread has no means to inform the monitor that 4504fe6060f1SDimitry Andric // the library has gone, because all the memory which the monitor can 4505fe6060f1SDimitry Andric // access is going to be released/reset. 45060b57cec5SDimitry Andric while (TCR_4(__kmp_init_monitor) < 2) { 45070b57cec5SDimitry Andric KMP_YIELD(TRUE); 45080b57cec5SDimitry Andric } 45090b57cec5SDimitry Andric KF_TRACE(10, ("after monitor thread has started\n")); 45100b57cec5SDimitry Andric #endif 45110b57cec5SDimitry Andric } 45120b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 45130b57cec5SDimitry Andric } 45140b57cec5SDimitry Andric #endif 45150b57cec5SDimitry Andric 45160b57cec5SDimitry Andric KMP_MB(); 4517e8d8bef9SDimitry Andric 4518e8d8bef9SDimitry Andric { 4519e8d8bef9SDimitry Andric int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads) 4520e8d8bef9SDimitry Andric ? 1 4521e8d8bef9SDimitry Andric : __kmp_hidden_helper_threads_num + 1; 4522e8d8bef9SDimitry Andric 4523e8d8bef9SDimitry Andric for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL; 4524e8d8bef9SDimitry Andric ++new_gtid) { 45250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); 45260b57cec5SDimitry Andric } 45270b57cec5SDimitry Andric 4528e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper_threads)) { 4529e8d8bef9SDimitry Andric KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num); 4530e8d8bef9SDimitry Andric } 4531e8d8bef9SDimitry Andric } 4532e8d8bef9SDimitry Andric 45330b57cec5SDimitry Andric /* allocate space for it. */ 45340b57cec5SDimitry Andric new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 45350b57cec5SDimitry Andric 45360b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 45370b57cec5SDimitry Andric 4538e8d8bef9SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG 4539e8d8bef9SDimitry Andric // suppress race conditions detection on synchronization flags in debug mode 4540e8d8bef9SDimitry Andric // this helps to analyze library internals eliminating false positives 4541e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4542e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4543e8d8bef9SDimitry Andric &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc)); 4544e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4545e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4546e8d8bef9SDimitry Andric &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state)); 4547e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 4548e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4549e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4550e8d8bef9SDimitry Andric &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init)); 4551e8d8bef9SDimitry Andric #else 4552e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4553e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4554e8d8bef9SDimitry Andric &new_thr->th.th_suspend_init_count, 4555e8d8bef9SDimitry Andric sizeof(new_thr->th.th_suspend_init_count)); 4556e8d8bef9SDimitry Andric #endif 4557e8d8bef9SDimitry Andric // TODO: check if we need to also suppress b_arrived flags 4558e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4559e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4560e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go), 4561e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[0].bb.b_go)); 4562e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4563e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4564e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go), 4565e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[1].bb.b_go)); 4566e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4567e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4568e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go), 4569e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[2].bb.b_go)); 4570e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */ 45710b57cec5SDimitry Andric if (__kmp_storage_map) { 45720b57cec5SDimitry Andric __kmp_print_thread_storage_map(new_thr, new_gtid); 45730b57cec5SDimitry Andric } 45740b57cec5SDimitry Andric 4575fe6060f1SDimitry Andric // add the reserve serialized team, initialized from the team's primary thread 45760b57cec5SDimitry Andric { 45770b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team); 45780b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n")); 45790b57cec5SDimitry Andric new_thr->th.th_serial_team = serial_team = 45800b57cec5SDimitry Andric (kmp_team_t *)__kmp_allocate_team(root, 1, 1, 45810b57cec5SDimitry Andric #if OMPT_SUPPORT 45820b57cec5SDimitry Andric ompt_data_none, // root parallel id 45830b57cec5SDimitry Andric #endif 45840b57cec5SDimitry Andric proc_bind_default, &r_icvs, 45850b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 45860b57cec5SDimitry Andric } 45870b57cec5SDimitry Andric KMP_ASSERT(serial_team); 45880b57cec5SDimitry Andric serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for 45890b57cec5SDimitry Andric // execution (it is unused for now). 45900b57cec5SDimitry Andric serial_team->t.t_threads[0] = new_thr; 45910b57cec5SDimitry Andric KF_TRACE(10, 45920b57cec5SDimitry Andric ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 45930b57cec5SDimitry Andric new_thr)); 45940b57cec5SDimitry Andric 45950b57cec5SDimitry Andric /* setup the thread structures */ 45960b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, new_gtid); 45970b57cec5SDimitry Andric 45980b57cec5SDimitry Andric #if USE_FAST_MEMORY 45990b57cec5SDimitry Andric __kmp_initialize_fast_memory(new_thr); 46000b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 46010b57cec5SDimitry Andric 46020b57cec5SDimitry Andric #if KMP_USE_BGET 46030b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL); 46040b57cec5SDimitry Andric __kmp_initialize_bget(new_thr); 46050b57cec5SDimitry Andric #endif 46060b57cec5SDimitry Andric 46070b57cec5SDimitry Andric __kmp_init_random(new_thr); // Initialize random number generator 46080b57cec5SDimitry Andric 46090b57cec5SDimitry Andric /* Initialize these only once when thread is grabbed for a team allocation */ 46100b57cec5SDimitry Andric KA_TRACE(20, 46110b57cec5SDimitry Andric ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 46120b57cec5SDimitry Andric __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 46130b57cec5SDimitry Andric 46140b57cec5SDimitry Andric int b; 46150b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 46160b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 46170b57cec5SDimitry Andric balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 46180b57cec5SDimitry Andric balign[b].bb.team = NULL; 46190b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 46200b57cec5SDimitry Andric balign[b].bb.use_oncore_barrier = 0; 46210b57cec5SDimitry Andric } 46220b57cec5SDimitry Andric 4623349cc55cSDimitry Andric TCW_PTR(new_thr->th.th_sleep_loc, NULL); 4624349cc55cSDimitry Andric new_thr->th.th_sleep_loc_type = flag_unset; 4625349cc55cSDimitry Andric 46260b57cec5SDimitry Andric new_thr->th.th_spin_here = FALSE; 46270b57cec5SDimitry Andric new_thr->th.th_next_waiting = 0; 46280b57cec5SDimitry Andric #if KMP_OS_UNIX 46290b57cec5SDimitry Andric new_thr->th.th_blocking = false; 46300b57cec5SDimitry Andric #endif 46310b57cec5SDimitry Andric 46320b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 46330b57cec5SDimitry Andric new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 46340b57cec5SDimitry Andric new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 46350b57cec5SDimitry Andric new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 46360b57cec5SDimitry Andric new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 46370b57cec5SDimitry Andric #endif 46380b57cec5SDimitry Andric new_thr->th.th_def_allocator = __kmp_def_allocator; 46390b57cec5SDimitry Andric new_thr->th.th_prev_level = 0; 46400b57cec5SDimitry Andric new_thr->th.th_prev_num_threads = 1; 46410b57cec5SDimitry Andric 46420b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 46430b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 46440b57cec5SDimitry Andric TCW_4(new_thr->th.th_active, TRUE); 46450b57cec5SDimitry Andric 46460b57cec5SDimitry Andric /* adjust the global counters */ 46470b57cec5SDimitry Andric __kmp_all_nth++; 46480b57cec5SDimitry Andric __kmp_nth++; 46490b57cec5SDimitry Andric 46500b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 46510b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 46520b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 46530b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 46540b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 46550b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 46560b57cec5SDimitry Andric } 46570b57cec5SDimitry Andric } else { 46580b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 46590b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 46600b57cec5SDimitry Andric } 46610b57cec5SDimitry Andric } 46620b57cec5SDimitry Andric } 46630b57cec5SDimitry Andric 46640b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 46650b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 46660b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 46670b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 46680b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 46690b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 46700b57cec5SDimitry Andric } 46710b57cec5SDimitry Andric } 46720b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 46730b57cec5SDimitry Andric 46740b57cec5SDimitry Andric /* actually fork it and create the new worker thread */ 46750b57cec5SDimitry Andric KF_TRACE( 46760b57cec5SDimitry Andric 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); 46770b57cec5SDimitry Andric __kmp_create_worker(new_gtid, new_thr, __kmp_stksize); 46780b57cec5SDimitry Andric KF_TRACE(10, 46790b57cec5SDimitry Andric ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr)); 46800b57cec5SDimitry Andric 46810b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), 46820b57cec5SDimitry Andric new_gtid)); 46830b57cec5SDimitry Andric KMP_MB(); 46840b57cec5SDimitry Andric return new_thr; 46850b57cec5SDimitry Andric } 46860b57cec5SDimitry Andric 46870b57cec5SDimitry Andric /* Reinitialize team for reuse. 46880b57cec5SDimitry Andric The hot team code calls this case at every fork barrier, so EPCC barrier 46890b57cec5SDimitry Andric test are extremely sensitive to changes in it, esp. writes to the team 46900b57cec5SDimitry Andric struct, which cause a cache invalidation in all threads. 46910b57cec5SDimitry Andric IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ 46920b57cec5SDimitry Andric static void __kmp_reinitialize_team(kmp_team_t *team, 46930b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 46940b57cec5SDimitry Andric ident_t *loc) { 46950b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 46960b57cec5SDimitry Andric team->t.t_threads[0], team)); 46970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team && new_icvs); 46980b57cec5SDimitry Andric KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); 46990b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 47000b57cec5SDimitry Andric 47010b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); 4702fe6060f1SDimitry Andric // Copy ICVs to the primary thread's implicit taskdata 47030b57cec5SDimitry Andric __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE); 47040b57cec5SDimitry Andric copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 47050b57cec5SDimitry Andric 47060b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 47070b57cec5SDimitry Andric team->t.t_threads[0], team)); 47080b57cec5SDimitry Andric } 47090b57cec5SDimitry Andric 47100b57cec5SDimitry Andric /* Initialize the team data structure. 47110b57cec5SDimitry Andric This assumes the t_threads and t_max_nproc are already set. 47120b57cec5SDimitry Andric Also, we don't touch the arguments */ 47130b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 47140b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 47150b57cec5SDimitry Andric ident_t *loc) { 47160b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team)); 47170b57cec5SDimitry Andric 47180b57cec5SDimitry Andric /* verify */ 47190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 47200b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc); 47210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 47220b57cec5SDimitry Andric KMP_MB(); 47230b57cec5SDimitry Andric 47240b57cec5SDimitry Andric team->t.t_master_tid = 0; /* not needed */ 47250b57cec5SDimitry Andric /* team->t.t_master_bar; not needed */ 47260b57cec5SDimitry Andric team->t.t_serialized = new_nproc > 1 ? 0 : 1; 47270b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 47280b57cec5SDimitry Andric 47290b57cec5SDimitry Andric /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ 47300b57cec5SDimitry Andric team->t.t_next_pool = NULL; 47310b57cec5SDimitry Andric /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess 47320b57cec5SDimitry Andric * up hot team */ 47330b57cec5SDimitry Andric 47340b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 47350b57cec5SDimitry Andric team->t.t_invoke = NULL; /* not needed */ 47360b57cec5SDimitry Andric 47370b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 47380b57cec5SDimitry Andric team->t.t_sched.sched = new_icvs->sched.sched; 47390b57cec5SDimitry Andric 47400b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 47410b57cec5SDimitry Andric team->t.t_fp_control_saved = FALSE; /* not needed */ 47420b57cec5SDimitry Andric team->t.t_x87_fpu_control_word = 0; /* not needed */ 47430b57cec5SDimitry Andric team->t.t_mxcsr = 0; /* not needed */ 47440b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 47450b57cec5SDimitry Andric 47460b57cec5SDimitry Andric team->t.t_construct = 0; 47470b57cec5SDimitry Andric 47480b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 0; 47490b57cec5SDimitry Andric team->t.t_master_active = FALSE; 47500b57cec5SDimitry Andric 47510b57cec5SDimitry Andric #ifdef KMP_DEBUG 47520b57cec5SDimitry Andric team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ 47530b57cec5SDimitry Andric #endif 47540b57cec5SDimitry Andric #if KMP_OS_WINDOWS 47550b57cec5SDimitry Andric team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ 47560b57cec5SDimitry Andric #endif 47570b57cec5SDimitry Andric 47580b57cec5SDimitry Andric team->t.t_control_stack_top = NULL; 47590b57cec5SDimitry Andric 47600b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, loc); 47610b57cec5SDimitry Andric 47620b57cec5SDimitry Andric KMP_MB(); 47630b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); 47640b57cec5SDimitry Andric } 47650b57cec5SDimitry Andric 47660b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 47670b57cec5SDimitry Andric 47680b57cec5SDimitry Andric // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 4769fe6060f1SDimitry Andric // It calculates the worker + primary thread's partition based upon the parent 47700b57cec5SDimitry Andric // thread's partition, and binds each worker to a thread in their partition. 4771fe6060f1SDimitry Andric // The primary thread's partition should already include its current binding. 47720b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { 4773fe6060f1SDimitry Andric // Do not partition places for the hidden helper team 4774fe6060f1SDimitry Andric if (KMP_HIDDEN_HELPER_TEAM(team)) 4775fe6060f1SDimitry Andric return; 4776fe6060f1SDimitry Andric // Copy the primary thread's place partition to the team struct 47770b57cec5SDimitry Andric kmp_info_t *master_th = team->t.t_threads[0]; 47780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th != NULL); 47790b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 47800b57cec5SDimitry Andric int first_place = master_th->th.th_first_place; 47810b57cec5SDimitry Andric int last_place = master_th->th.th_last_place; 47820b57cec5SDimitry Andric int masters_place = master_th->th.th_current_place; 4783bdd1243dSDimitry Andric int num_masks = __kmp_affinity.num_masks; 47840b57cec5SDimitry Andric team->t.t_first_place = first_place; 47850b57cec5SDimitry Andric team->t.t_last_place = last_place; 47860b57cec5SDimitry Andric 47870b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 47880b57cec5SDimitry Andric "bound to place %d partition = [%d,%d]\n", 47890b57cec5SDimitry Andric proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]), 47900b57cec5SDimitry Andric team->t.t_id, masters_place, first_place, last_place)); 47910b57cec5SDimitry Andric 47920b57cec5SDimitry Andric switch (proc_bind) { 47930b57cec5SDimitry Andric 47940b57cec5SDimitry Andric case proc_bind_default: 4795fe6060f1SDimitry Andric // Serial teams might have the proc_bind policy set to proc_bind_default. 4796fe6060f1SDimitry Andric // Not an issue -- we don't rebind primary thread for any proc_bind policy. 47970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 47980b57cec5SDimitry Andric break; 47990b57cec5SDimitry Andric 4800fe6060f1SDimitry Andric case proc_bind_primary: { 48010b57cec5SDimitry Andric int f; 48020b57cec5SDimitry Andric int n_th = team->t.t_nproc; 48030b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 48040b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48050b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48060b57cec5SDimitry Andric th->th.th_first_place = first_place; 48070b57cec5SDimitry Andric th->th.th_last_place = last_place; 48080b57cec5SDimitry Andric th->th.th_new_place = masters_place; 48090b57cec5SDimitry Andric if (__kmp_display_affinity && masters_place != th->th.th_current_place && 48100b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48110b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48120b57cec5SDimitry Andric } 48130b57cec5SDimitry Andric 4814fe6060f1SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d " 48150b57cec5SDimitry Andric "partition = [%d,%d]\n", 48160b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 48170b57cec5SDimitry Andric f, masters_place, first_place, last_place)); 48180b57cec5SDimitry Andric } 48190b57cec5SDimitry Andric } break; 48200b57cec5SDimitry Andric 48210b57cec5SDimitry Andric case proc_bind_close: { 48220b57cec5SDimitry Andric int f; 48230b57cec5SDimitry Andric int n_th = team->t.t_nproc; 48240b57cec5SDimitry Andric int n_places; 48250b57cec5SDimitry Andric if (first_place <= last_place) { 48260b57cec5SDimitry Andric n_places = last_place - first_place + 1; 48270b57cec5SDimitry Andric } else { 4828bdd1243dSDimitry Andric n_places = num_masks - first_place + last_place + 1; 48290b57cec5SDimitry Andric } 48300b57cec5SDimitry Andric if (n_th <= n_places) { 48310b57cec5SDimitry Andric int place = masters_place; 48320b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 48330b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48350b57cec5SDimitry Andric 48360b57cec5SDimitry Andric if (place == last_place) { 48370b57cec5SDimitry Andric place = first_place; 4838bdd1243dSDimitry Andric } else if (place == (num_masks - 1)) { 48390b57cec5SDimitry Andric place = 0; 48400b57cec5SDimitry Andric } else { 48410b57cec5SDimitry Andric place++; 48420b57cec5SDimitry Andric } 48430b57cec5SDimitry Andric th->th.th_first_place = first_place; 48440b57cec5SDimitry Andric th->th.th_last_place = last_place; 48450b57cec5SDimitry Andric th->th.th_new_place = place; 48460b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 48470b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48480b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48490b57cec5SDimitry Andric } 48500b57cec5SDimitry Andric 48510b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 48520b57cec5SDimitry Andric "partition = [%d,%d]\n", 48530b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 48540b57cec5SDimitry Andric team->t.t_id, f, place, first_place, last_place)); 48550b57cec5SDimitry Andric } 48560b57cec5SDimitry Andric } else { 48570b57cec5SDimitry Andric int S, rem, gap, s_count; 48580b57cec5SDimitry Andric S = n_th / n_places; 48590b57cec5SDimitry Andric s_count = 0; 48600b57cec5SDimitry Andric rem = n_th - (S * n_places); 48610b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 48620b57cec5SDimitry Andric int place = masters_place; 48630b57cec5SDimitry Andric int gap_ct = gap; 48640b57cec5SDimitry Andric for (f = 0; f < n_th; f++) { 48650b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48660b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48670b57cec5SDimitry Andric 48680b57cec5SDimitry Andric th->th.th_first_place = first_place; 48690b57cec5SDimitry Andric th->th.th_last_place = last_place; 48700b57cec5SDimitry Andric th->th.th_new_place = place; 48710b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 48720b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48730b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48740b57cec5SDimitry Andric } 48750b57cec5SDimitry Andric s_count++; 48760b57cec5SDimitry Andric 48770b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 48780b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 48790b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 48800b57cec5SDimitry Andric // we added an extra thread to this place; move to next place 48810b57cec5SDimitry Andric if (place == last_place) { 48820b57cec5SDimitry Andric place = first_place; 4883bdd1243dSDimitry Andric } else if (place == (num_masks - 1)) { 48840b57cec5SDimitry Andric place = 0; 48850b57cec5SDimitry Andric } else { 48860b57cec5SDimitry Andric place++; 48870b57cec5SDimitry Andric } 48880b57cec5SDimitry Andric s_count = 0; 48890b57cec5SDimitry Andric gap_ct = 1; 48900b57cec5SDimitry Andric rem--; 48910b57cec5SDimitry Andric } else if (s_count == S) { // place full; don't add extra 48920b57cec5SDimitry Andric if (place == last_place) { 48930b57cec5SDimitry Andric place = first_place; 4894bdd1243dSDimitry Andric } else if (place == (num_masks - 1)) { 48950b57cec5SDimitry Andric place = 0; 48960b57cec5SDimitry Andric } else { 48970b57cec5SDimitry Andric place++; 48980b57cec5SDimitry Andric } 48990b57cec5SDimitry Andric gap_ct++; 49000b57cec5SDimitry Andric s_count = 0; 49010b57cec5SDimitry Andric } 49020b57cec5SDimitry Andric 49030b57cec5SDimitry Andric KA_TRACE(100, 49040b57cec5SDimitry Andric ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 49050b57cec5SDimitry Andric "partition = [%d,%d]\n", 49060b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, 49070b57cec5SDimitry Andric th->th.th_new_place, first_place, last_place)); 49080b57cec5SDimitry Andric } 49090b57cec5SDimitry Andric KMP_DEBUG_ASSERT(place == masters_place); 49100b57cec5SDimitry Andric } 49110b57cec5SDimitry Andric } break; 49120b57cec5SDimitry Andric 49130b57cec5SDimitry Andric case proc_bind_spread: { 49140b57cec5SDimitry Andric int f; 49150b57cec5SDimitry Andric int n_th = team->t.t_nproc; 49160b57cec5SDimitry Andric int n_places; 49170b57cec5SDimitry Andric int thidx; 49180b57cec5SDimitry Andric if (first_place <= last_place) { 49190b57cec5SDimitry Andric n_places = last_place - first_place + 1; 49200b57cec5SDimitry Andric } else { 4921bdd1243dSDimitry Andric n_places = num_masks - first_place + last_place + 1; 49220b57cec5SDimitry Andric } 49230b57cec5SDimitry Andric if (n_th <= n_places) { 49240b57cec5SDimitry Andric int place = -1; 49250b57cec5SDimitry Andric 4926bdd1243dSDimitry Andric if (n_places != num_masks) { 49270b57cec5SDimitry Andric int S = n_places / n_th; 49280b57cec5SDimitry Andric int s_count, rem, gap, gap_ct; 49290b57cec5SDimitry Andric 49300b57cec5SDimitry Andric place = masters_place; 49310b57cec5SDimitry Andric rem = n_places - n_th * S; 49320b57cec5SDimitry Andric gap = rem ? n_th / rem : 1; 49330b57cec5SDimitry Andric gap_ct = gap; 49340b57cec5SDimitry Andric thidx = n_th; 49350b57cec5SDimitry Andric if (update_master_only == 1) 49360b57cec5SDimitry Andric thidx = 1; 49370b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 49380b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 49390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 49400b57cec5SDimitry Andric 49410b57cec5SDimitry Andric th->th.th_first_place = place; 49420b57cec5SDimitry Andric th->th.th_new_place = place; 49430b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 49440b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 49450b57cec5SDimitry Andric team->t.t_display_affinity = 1; 49460b57cec5SDimitry Andric } 49470b57cec5SDimitry Andric s_count = 1; 49480b57cec5SDimitry Andric while (s_count < S) { 49490b57cec5SDimitry Andric if (place == last_place) { 49500b57cec5SDimitry Andric place = first_place; 4951bdd1243dSDimitry Andric } else if (place == (num_masks - 1)) { 49520b57cec5SDimitry Andric place = 0; 49530b57cec5SDimitry Andric } else { 49540b57cec5SDimitry Andric place++; 49550b57cec5SDimitry Andric } 49560b57cec5SDimitry Andric s_count++; 49570b57cec5SDimitry Andric } 49580b57cec5SDimitry Andric if (rem && (gap_ct == gap)) { 49590b57cec5SDimitry Andric if (place == last_place) { 49600b57cec5SDimitry Andric place = first_place; 4961bdd1243dSDimitry Andric } else if (place == (num_masks - 1)) { 49620b57cec5SDimitry Andric place = 0; 49630b57cec5SDimitry Andric } else { 49640b57cec5SDimitry Andric place++; 49650b57cec5SDimitry Andric } 49660b57cec5SDimitry Andric rem--; 49670b57cec5SDimitry Andric gap_ct = 0; 49680b57cec5SDimitry Andric } 49690b57cec5SDimitry Andric th->th.th_last_place = place; 49700b57cec5SDimitry Andric gap_ct++; 49710b57cec5SDimitry Andric 49720b57cec5SDimitry Andric if (place == last_place) { 49730b57cec5SDimitry Andric place = first_place; 4974bdd1243dSDimitry Andric } else if (place == (num_masks - 1)) { 49750b57cec5SDimitry Andric place = 0; 49760b57cec5SDimitry Andric } else { 49770b57cec5SDimitry Andric place++; 49780b57cec5SDimitry Andric } 49790b57cec5SDimitry Andric 49800b57cec5SDimitry Andric KA_TRACE(100, 49810b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4982bdd1243dSDimitry Andric "partition = [%d,%d], num_masks: %u\n", 49830b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 49840b57cec5SDimitry Andric f, th->th.th_new_place, th->th.th_first_place, 4985bdd1243dSDimitry Andric th->th.th_last_place, num_masks)); 49860b57cec5SDimitry Andric } 49870b57cec5SDimitry Andric } else { 49880b57cec5SDimitry Andric /* Having uniform space of available computation places I can create 49890b57cec5SDimitry Andric T partitions of round(P/T) size and put threads into the first 49900b57cec5SDimitry Andric place of each partition. */ 49910b57cec5SDimitry Andric double current = static_cast<double>(masters_place); 49920b57cec5SDimitry Andric double spacing = 49930b57cec5SDimitry Andric (static_cast<double>(n_places + 1) / static_cast<double>(n_th)); 49940b57cec5SDimitry Andric int first, last; 49950b57cec5SDimitry Andric kmp_info_t *th; 49960b57cec5SDimitry Andric 49970b57cec5SDimitry Andric thidx = n_th + 1; 49980b57cec5SDimitry Andric if (update_master_only == 1) 49990b57cec5SDimitry Andric thidx = 1; 50000b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 50010b57cec5SDimitry Andric first = static_cast<int>(current); 50020b57cec5SDimitry Andric last = static_cast<int>(current + spacing) - 1; 50030b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last >= first); 50040b57cec5SDimitry Andric if (first >= n_places) { 50050b57cec5SDimitry Andric if (masters_place) { 50060b57cec5SDimitry Andric first -= n_places; 50070b57cec5SDimitry Andric last -= n_places; 50080b57cec5SDimitry Andric if (first == (masters_place + 1)) { 50090b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 50100b57cec5SDimitry Andric first--; 50110b57cec5SDimitry Andric } 50120b57cec5SDimitry Andric if (last == masters_place) { 50130b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == (n_th - 1)); 50140b57cec5SDimitry Andric last--; 50150b57cec5SDimitry Andric } 50160b57cec5SDimitry Andric } else { 50170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 50180b57cec5SDimitry Andric first = 0; 50190b57cec5SDimitry Andric last = 0; 50200b57cec5SDimitry Andric } 50210b57cec5SDimitry Andric } 50220b57cec5SDimitry Andric if (last >= n_places) { 50230b57cec5SDimitry Andric last = (n_places - 1); 50240b57cec5SDimitry Andric } 50250b57cec5SDimitry Andric place = first; 50260b57cec5SDimitry Andric current += spacing; 50270b57cec5SDimitry Andric if (f < n_th) { 50280b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= first); 50290b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > first); 50300b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= last); 50310b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > last); 50320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last_place >= first_place); 50330b57cec5SDimitry Andric th = team->t.t_threads[f]; 50340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 50350b57cec5SDimitry Andric th->th.th_first_place = first; 50360b57cec5SDimitry Andric th->th.th_new_place = place; 50370b57cec5SDimitry Andric th->th.th_last_place = last; 50380b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 50390b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 50400b57cec5SDimitry Andric team->t.t_display_affinity = 1; 50410b57cec5SDimitry Andric } 50420b57cec5SDimitry Andric KA_TRACE(100, 50430b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 50440b57cec5SDimitry Andric "partition = [%d,%d], spacing = %.4f\n", 50450b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 50460b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 50470b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place, spacing)); 50480b57cec5SDimitry Andric } 50490b57cec5SDimitry Andric } 50500b57cec5SDimitry Andric } 50510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 50520b57cec5SDimitry Andric } else { 50530b57cec5SDimitry Andric int S, rem, gap, s_count; 50540b57cec5SDimitry Andric S = n_th / n_places; 50550b57cec5SDimitry Andric s_count = 0; 50560b57cec5SDimitry Andric rem = n_th - (S * n_places); 50570b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 50580b57cec5SDimitry Andric int place = masters_place; 50590b57cec5SDimitry Andric int gap_ct = gap; 50600b57cec5SDimitry Andric thidx = n_th; 50610b57cec5SDimitry Andric if (update_master_only == 1) 50620b57cec5SDimitry Andric thidx = 1; 50630b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 50640b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 50650b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 50660b57cec5SDimitry Andric 50670b57cec5SDimitry Andric th->th.th_first_place = place; 50680b57cec5SDimitry Andric th->th.th_last_place = place; 50690b57cec5SDimitry Andric th->th.th_new_place = place; 50700b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 50710b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 50720b57cec5SDimitry Andric team->t.t_display_affinity = 1; 50730b57cec5SDimitry Andric } 50740b57cec5SDimitry Andric s_count++; 50750b57cec5SDimitry Andric 50760b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 50770b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 50780b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 50790b57cec5SDimitry Andric // we added an extra thread to this place; move on to next place 50800b57cec5SDimitry Andric if (place == last_place) { 50810b57cec5SDimitry Andric place = first_place; 5082bdd1243dSDimitry Andric } else if (place == (num_masks - 1)) { 50830b57cec5SDimitry Andric place = 0; 50840b57cec5SDimitry Andric } else { 50850b57cec5SDimitry Andric place++; 50860b57cec5SDimitry Andric } 50870b57cec5SDimitry Andric s_count = 0; 50880b57cec5SDimitry Andric gap_ct = 1; 50890b57cec5SDimitry Andric rem--; 50900b57cec5SDimitry Andric } else if (s_count == S) { // place is full; don't add extra thread 50910b57cec5SDimitry Andric if (place == last_place) { 50920b57cec5SDimitry Andric place = first_place; 5093bdd1243dSDimitry Andric } else if (place == (num_masks - 1)) { 50940b57cec5SDimitry Andric place = 0; 50950b57cec5SDimitry Andric } else { 50960b57cec5SDimitry Andric place++; 50970b57cec5SDimitry Andric } 50980b57cec5SDimitry Andric gap_ct++; 50990b57cec5SDimitry Andric s_count = 0; 51000b57cec5SDimitry Andric } 51010b57cec5SDimitry Andric 51020b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 51030b57cec5SDimitry Andric "partition = [%d,%d]\n", 51040b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 51050b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 51060b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place)); 51070b57cec5SDimitry Andric } 51080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 51090b57cec5SDimitry Andric } 51100b57cec5SDimitry Andric } break; 51110b57cec5SDimitry Andric 51120b57cec5SDimitry Andric default: 51130b57cec5SDimitry Andric break; 51140b57cec5SDimitry Andric } 51150b57cec5SDimitry Andric 51160b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id)); 51170b57cec5SDimitry Andric } 51180b57cec5SDimitry Andric 51190b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 51200b57cec5SDimitry Andric 51210b57cec5SDimitry Andric /* allocate a new team data structure to use. take one off of the free pool if 51220b57cec5SDimitry Andric available */ 51230b57cec5SDimitry Andric kmp_team_t * 51240b57cec5SDimitry Andric __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, 51250b57cec5SDimitry Andric #if OMPT_SUPPORT 51260b57cec5SDimitry Andric ompt_data_t ompt_parallel_data, 51270b57cec5SDimitry Andric #endif 51280b57cec5SDimitry Andric kmp_proc_bind_t new_proc_bind, 51290b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 51300b57cec5SDimitry Andric int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { 51310b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); 51320b57cec5SDimitry Andric int f; 51330b57cec5SDimitry Andric kmp_team_t *team; 51340b57cec5SDimitry Andric int use_hot_team = !root->r.r_active; 51350b57cec5SDimitry Andric int level = 0; 5136349cc55cSDimitry Andric int do_place_partition = 1; 51370b57cec5SDimitry Andric 51380b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: called\n")); 51390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); 51400b57cec5SDimitry Andric KMP_DEBUG_ASSERT(max_nproc >= new_nproc); 51410b57cec5SDimitry Andric KMP_MB(); 51420b57cec5SDimitry Andric 51430b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 51440b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams; 51450b57cec5SDimitry Andric if (master) { 51460b57cec5SDimitry Andric team = master->th.th_team; 51470b57cec5SDimitry Andric level = team->t.t_active_level; 51480b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 51490b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1 && 51500b57cec5SDimitry Andric ( // #teams > 1 51510b57cec5SDimitry Andric team->t.t_pkfn == 51520b57cec5SDimitry Andric (microtask_t)__kmp_teams_master || // inner fork of the teams 51530b57cec5SDimitry Andric master->th.th_teams_level < 51540b57cec5SDimitry Andric team->t.t_level)) { // or nested parallel inside the teams 51550b57cec5SDimitry Andric ++level; // not increment if #teams==1, or for outer fork of the teams; 51560b57cec5SDimitry Andric // increment otherwise 51570b57cec5SDimitry Andric } 5158349cc55cSDimitry Andric // Do not perform the place partition if inner fork of the teams 5159349cc55cSDimitry Andric // Wait until nested parallel region encountered inside teams construct 5160349cc55cSDimitry Andric if ((master->th.th_teams_size.nteams == 1 && 5161349cc55cSDimitry Andric master->th.th_teams_level >= team->t.t_level) || 5162349cc55cSDimitry Andric (team->t.t_pkfn == (microtask_t)__kmp_teams_master)) 5163349cc55cSDimitry Andric do_place_partition = 0; 51640b57cec5SDimitry Andric } 51650b57cec5SDimitry Andric hot_teams = master->th.th_hot_teams; 51660b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level && hot_teams && 5167e8d8bef9SDimitry Andric hot_teams[level].hot_team) { 5168e8d8bef9SDimitry Andric // hot team has already been allocated for given level 51690b57cec5SDimitry Andric use_hot_team = 1; 51700b57cec5SDimitry Andric } else { 51710b57cec5SDimitry Andric use_hot_team = 0; 51720b57cec5SDimitry Andric } 5173e8d8bef9SDimitry Andric } else { 5174e8d8bef9SDimitry Andric // check we won't access uninitialized hot_teams, just in case 5175e8d8bef9SDimitry Andric KMP_DEBUG_ASSERT(new_nproc == 1); 51760b57cec5SDimitry Andric } 51770b57cec5SDimitry Andric #endif 51780b57cec5SDimitry Andric // Optimization to use a "hot" team 51790b57cec5SDimitry Andric if (use_hot_team && new_nproc > 1) { 51800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= max_nproc); 51810b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 51820b57cec5SDimitry Andric team = hot_teams[level].hot_team; 51830b57cec5SDimitry Andric #else 51840b57cec5SDimitry Andric team = root->r.r_hot_team; 51850b57cec5SDimitry Andric #endif 51860b57cec5SDimitry Andric #if KMP_DEBUG 51870b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 51880b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 51890b57cec5SDimitry Andric "task_team[1] = %p before reinit\n", 51900b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 51910b57cec5SDimitry Andric } 51920b57cec5SDimitry Andric #endif 51930b57cec5SDimitry Andric 5194349cc55cSDimitry Andric if (team->t.t_nproc != new_nproc && 5195349cc55cSDimitry Andric __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5196349cc55cSDimitry Andric // Distributed barrier may need a resize 5197349cc55cSDimitry Andric int old_nthr = team->t.t_nproc; 5198349cc55cSDimitry Andric __kmp_resize_dist_barrier(team, old_nthr, new_nproc); 5199349cc55cSDimitry Andric } 5200349cc55cSDimitry Andric 5201349cc55cSDimitry Andric // If not doing the place partition, then reset the team's proc bind 5202349cc55cSDimitry Andric // to indicate that partitioning of all threads still needs to take place 5203349cc55cSDimitry Andric if (do_place_partition == 0) 5204349cc55cSDimitry Andric team->t.t_proc_bind = proc_bind_default; 52050b57cec5SDimitry Andric // Has the number of threads changed? 52060b57cec5SDimitry Andric /* Let's assume the most common case is that the number of threads is 52070b57cec5SDimitry Andric unchanged, and put that case first. */ 52080b57cec5SDimitry Andric if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 52090b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n")); 52100b57cec5SDimitry Andric // This case can mean that omp_set_num_threads() was called and the hot 52110b57cec5SDimitry Andric // team size was already reduced, so we check the special flag 52120b57cec5SDimitry Andric if (team->t.t_size_changed == -1) { 52130b57cec5SDimitry Andric team->t.t_size_changed = 1; 52140b57cec5SDimitry Andric } else { 52150b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_size_changed, 0); 52160b57cec5SDimitry Andric } 52170b57cec5SDimitry Andric 52180b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 52190b57cec5SDimitry Andric kmp_r_sched_t new_sched = new_icvs->sched; 5220fe6060f1SDimitry Andric // set primary thread's schedule as new run-time schedule 52210b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 52220b57cec5SDimitry Andric 52230b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 52240b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 52250b57cec5SDimitry Andric 52260b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0, 52270b57cec5SDimitry Andric team->t.t_threads[0], team)); 52280b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 52290b57cec5SDimitry Andric 52300b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 52310b57cec5SDimitry Andric if ((team->t.t_size_changed == 0) && 52320b57cec5SDimitry Andric (team->t.t_proc_bind == new_proc_bind)) { 52330b57cec5SDimitry Andric if (new_proc_bind == proc_bind_spread) { 5234349cc55cSDimitry Andric if (do_place_partition) { 5235349cc55cSDimitry Andric // add flag to update only master for spread 5236349cc55cSDimitry Andric __kmp_partition_places(team, 1); 5237349cc55cSDimitry Andric } 52380b57cec5SDimitry Andric } 52390b57cec5SDimitry Andric KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " 52400b57cec5SDimitry Andric "proc_bind = %d, partition = [%d,%d]\n", 52410b57cec5SDimitry Andric team->t.t_id, new_proc_bind, team->t.t_first_place, 52420b57cec5SDimitry Andric team->t.t_last_place)); 52430b57cec5SDimitry Andric } else { 5244349cc55cSDimitry Andric if (do_place_partition) { 52450b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 52460b57cec5SDimitry Andric __kmp_partition_places(team); 52470b57cec5SDimitry Andric } 5248349cc55cSDimitry Andric } 52490b57cec5SDimitry Andric #else 52500b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 52510b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 52520b57cec5SDimitry Andric } else if (team->t.t_nproc > new_nproc) { 52530b57cec5SDimitry Andric KA_TRACE(20, 52540b57cec5SDimitry Andric ("__kmp_allocate_team: decreasing hot team thread count to %d\n", 52550b57cec5SDimitry Andric new_nproc)); 52560b57cec5SDimitry Andric 52570b57cec5SDimitry Andric team->t.t_size_changed = 1; 5258349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5259349cc55cSDimitry Andric // Barrier size already reduced earlier in this function 5260349cc55cSDimitry Andric // Activate team threads via th_used_in_team 5261349cc55cSDimitry Andric __kmp_add_threads_to_team(team, new_nproc); 5262349cc55cSDimitry Andric } 52630b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52640b57cec5SDimitry Andric if (__kmp_hot_teams_mode == 0) { 52650b57cec5SDimitry Andric // AC: saved number of threads should correspond to team's value in this 52660b57cec5SDimitry Andric // mode, can be bigger in mode 1, when hot team has threads in reserve 52670b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 52680b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; 52690b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 52700b57cec5SDimitry Andric /* release the extra threads we don't need any more */ 52710b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; f++) { 52720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 52730b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 52740b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should 52750b57cec5SDimitry Andric // unref task team. 52760b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 52770b57cec5SDimitry Andric } 52780b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 52790b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 52800b57cec5SDimitry Andric } 52810b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52820b57cec5SDimitry Andric } // (__kmp_hot_teams_mode == 0) 52830b57cec5SDimitry Andric else { 52840b57cec5SDimitry Andric // When keeping extra threads in team, switch threads to wait on own 52850b57cec5SDimitry Andric // b_go flag 52860b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; ++f) { 52870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 52880b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; 52890b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 52900b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { 52910b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 52920b57cec5SDimitry Andric } 52930b57cec5SDimitry Andric KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); 52940b57cec5SDimitry Andric } 52950b57cec5SDimitry Andric } 52960b57cec5SDimitry Andric } 52970b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 52980b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 52990b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 53000b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched); 53010b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 53020b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 53030b57cec5SDimitry Andric 53040b57cec5SDimitry Andric // Update remaining threads 53050b57cec5SDimitry Andric for (f = 0; f < new_nproc; ++f) { 53060b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc = new_nproc; 53070b57cec5SDimitry Andric } 53080b57cec5SDimitry Andric 5309fe6060f1SDimitry Andric // restore the current task state of the primary thread: should be the 53100b57cec5SDimitry Andric // implicit task 53110b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, 53120b57cec5SDimitry Andric team->t.t_threads[0], team)); 53130b57cec5SDimitry Andric 53140b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 53150b57cec5SDimitry Andric 53160b57cec5SDimitry Andric #ifdef KMP_DEBUG 53170b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 53180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 53190b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 53200b57cec5SDimitry Andric team->t.t_nproc); 53210b57cec5SDimitry Andric } 53220b57cec5SDimitry Andric #endif 53230b57cec5SDimitry Andric 5324349cc55cSDimitry Andric if (do_place_partition) { 53250b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 53260b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 53270b57cec5SDimitry Andric __kmp_partition_places(team); 53280b57cec5SDimitry Andric #endif 5329349cc55cSDimitry Andric } 53300b57cec5SDimitry Andric } else { // team->t.t_nproc < new_nproc 53310b57cec5SDimitry Andric 53320b57cec5SDimitry Andric KA_TRACE(20, 53330b57cec5SDimitry Andric ("__kmp_allocate_team: increasing hot team thread count to %d\n", 53340b57cec5SDimitry Andric new_nproc)); 5335349cc55cSDimitry Andric int old_nproc = team->t.t_nproc; // save old value and use to update only 53360b57cec5SDimitry Andric team->t.t_size_changed = 1; 53370b57cec5SDimitry Andric 53380b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 53390b57cec5SDimitry Andric int avail_threads = hot_teams[level].hot_team_nth; 53400b57cec5SDimitry Andric if (new_nproc < avail_threads) 53410b57cec5SDimitry Andric avail_threads = new_nproc; 53420b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 53430b57cec5SDimitry Andric for (f = team->t.t_nproc; f < avail_threads; ++f) { 53440b57cec5SDimitry Andric // Adjust barrier data of reserved threads (if any) of the team 53450b57cec5SDimitry Andric // Other data will be set in __kmp_initialize_info() below. 53460b57cec5SDimitry Andric int b; 53470b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[f]->th.th_bar; 53480b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 53490b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 53500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 53510b57cec5SDimitry Andric #if USE_DEBUGGER 53520b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 53530b57cec5SDimitry Andric #endif 53540b57cec5SDimitry Andric } 53550b57cec5SDimitry Andric } 53560b57cec5SDimitry Andric if (hot_teams[level].hot_team_nth >= new_nproc) { 53570b57cec5SDimitry Andric // we have all needed threads in reserve, no need to allocate any 53580b57cec5SDimitry Andric // this only possible in mode 1, cannot have reserved threads in mode 0 53590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 53600b57cec5SDimitry Andric team->t.t_nproc = new_nproc; // just get reserved threads involved 53610b57cec5SDimitry Andric } else { 5362349cc55cSDimitry Andric // We may have some threads in reserve, but not enough; 5363349cc55cSDimitry Andric // get reserved threads involved if any. 5364349cc55cSDimitry Andric team->t.t_nproc = hot_teams[level].hot_team_nth; 53650b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size 53660b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 53670b57cec5SDimitry Andric if (team->t.t_max_nproc < new_nproc) { 53680b57cec5SDimitry Andric /* reallocate larger arrays */ 53690b57cec5SDimitry Andric __kmp_reallocate_team_arrays(team, new_nproc); 53700b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, NULL); 53710b57cec5SDimitry Andric } 53720b57cec5SDimitry Andric 5373489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 5374fe6060f1SDimitry Andric /* Temporarily set full mask for primary thread before creation of 5375fe6060f1SDimitry Andric workers. The reason is that workers inherit the affinity from the 5376fe6060f1SDimitry Andric primary thread, so if a lot of workers are created on the single 5377fe6060f1SDimitry Andric core quickly, they don't get a chance to set their own affinity for 5378fe6060f1SDimitry Andric a long time. */ 537906c3fb27SDimitry Andric kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask}; 53800b57cec5SDimitry Andric #endif 53810b57cec5SDimitry Andric 53820b57cec5SDimitry Andric /* allocate new threads for the hot team */ 53830b57cec5SDimitry Andric for (f = team->t.t_nproc; f < new_nproc; f++) { 53840b57cec5SDimitry Andric kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f); 53850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_worker); 53860b57cec5SDimitry Andric team->t.t_threads[f] = new_worker; 53870b57cec5SDimitry Andric 53880b57cec5SDimitry Andric KA_TRACE(20, 53890b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init T#%d arrived: " 53900b57cec5SDimitry Andric "join=%llu, plain=%llu\n", 53910b57cec5SDimitry Andric team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f, 53920b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 53930b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 53940b57cec5SDimitry Andric 53950b57cec5SDimitry Andric { // Initialize barrier data for new threads. 53960b57cec5SDimitry Andric int b; 53970b57cec5SDimitry Andric kmp_balign_t *balign = new_worker->th.th_bar; 53980b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 53990b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 54000b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != 54010b57cec5SDimitry Andric KMP_BARRIER_PARENT_FLAG); 54020b57cec5SDimitry Andric #if USE_DEBUGGER 54030b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 54040b57cec5SDimitry Andric #endif 54050b57cec5SDimitry Andric } 54060b57cec5SDimitry Andric } 54070b57cec5SDimitry Andric } 54080b57cec5SDimitry Andric 5409489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 5410fe6060f1SDimitry Andric /* Restore initial primary thread's affinity mask */ 541106c3fb27SDimitry Andric new_temp_affinity.restore(); 54120b57cec5SDimitry Andric #endif 54130b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 54140b57cec5SDimitry Andric } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 54150b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 5416349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5417349cc55cSDimitry Andric // Barrier size already increased earlier in this function 5418349cc55cSDimitry Andric // Activate team threads via th_used_in_team 5419349cc55cSDimitry Andric __kmp_add_threads_to_team(team, new_nproc); 5420349cc55cSDimitry Andric } 54210b57cec5SDimitry Andric /* make sure everyone is syncronized */ 54220b57cec5SDimitry Andric // new threads below 54230b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, 54240b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 54250b57cec5SDimitry Andric 54260b57cec5SDimitry Andric /* reinitialize the threads */ 54270b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 54280b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) 54290b57cec5SDimitry Andric __kmp_initialize_info(team->t.t_threads[f], team, f, 54300b57cec5SDimitry Andric __kmp_gtid_from_tid(f, team)); 54310b57cec5SDimitry Andric 543206c3fb27SDimitry Andric // set th_task_state for new threads in hot team with older thread's state 543306c3fb27SDimitry Andric kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state; 54340b57cec5SDimitry Andric for (f = old_nproc; f < team->t.t_nproc; ++f) 54350b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_state = old_state; 54360b57cec5SDimitry Andric 54370b57cec5SDimitry Andric #ifdef KMP_DEBUG 54380b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { 54390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 54400b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 54410b57cec5SDimitry Andric team->t.t_nproc); 54420b57cec5SDimitry Andric } 54430b57cec5SDimitry Andric #endif 54440b57cec5SDimitry Andric 5445349cc55cSDimitry Andric if (do_place_partition) { 54460b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 54470b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 54480b57cec5SDimitry Andric __kmp_partition_places(team); 54490b57cec5SDimitry Andric #endif 5450349cc55cSDimitry Andric } 54510b57cec5SDimitry Andric } // Check changes in number of threads 54520b57cec5SDimitry Andric 54530b57cec5SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 54540b57cec5SDimitry Andric if (master->th.th_teams_microtask) { 54550b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 54560b57cec5SDimitry Andric // propagate teams construct specific info to workers 54570b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 54580b57cec5SDimitry Andric thr->th.th_teams_microtask = master->th.th_teams_microtask; 54590b57cec5SDimitry Andric thr->th.th_teams_level = master->th.th_teams_level; 54600b57cec5SDimitry Andric thr->th.th_teams_size = master->th.th_teams_size; 54610b57cec5SDimitry Andric } 54620b57cec5SDimitry Andric } 54630b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 54640b57cec5SDimitry Andric if (level) { 54650b57cec5SDimitry Andric // Sync barrier state for nested hot teams, not needed for outermost hot 54660b57cec5SDimitry Andric // team. 54670b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 54680b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 54690b57cec5SDimitry Andric int b; 54700b57cec5SDimitry Andric kmp_balign_t *balign = thr->th.th_bar; 54710b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 54720b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 54730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 54740b57cec5SDimitry Andric #if USE_DEBUGGER 54750b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 54760b57cec5SDimitry Andric #endif 54770b57cec5SDimitry Andric } 54780b57cec5SDimitry Andric } 54790b57cec5SDimitry Andric } 54800b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 54810b57cec5SDimitry Andric 54820b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 54830b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 54840b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 54850b57cec5SDimitry Andric // The hot team re-uses the previous task team, 54860b57cec5SDimitry Andric // if untouched during the previous release->gather phase. 54870b57cec5SDimitry Andric 54880b57cec5SDimitry Andric KF_TRACE(10, (" hot_team = %p\n", team)); 54890b57cec5SDimitry Andric 54900b57cec5SDimitry Andric #if KMP_DEBUG 54910b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 54920b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 54930b57cec5SDimitry Andric "task_team[1] = %p after reinit\n", 54940b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 54950b57cec5SDimitry Andric } 54960b57cec5SDimitry Andric #endif 54970b57cec5SDimitry Andric 54980b57cec5SDimitry Andric #if OMPT_SUPPORT 54990b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 55000b57cec5SDimitry Andric #endif 55010b57cec5SDimitry Andric 55020b57cec5SDimitry Andric KMP_MB(); 55030b57cec5SDimitry Andric 55040b57cec5SDimitry Andric return team; 55050b57cec5SDimitry Andric } 55060b57cec5SDimitry Andric 55070b57cec5SDimitry Andric /* next, let's try to take one from the team pool */ 55080b57cec5SDimitry Andric KMP_MB(); 55090b57cec5SDimitry Andric for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) { 55100b57cec5SDimitry Andric /* TODO: consider resizing undersized teams instead of reaping them, now 55110b57cec5SDimitry Andric that we have a resizing mechanism */ 55120b57cec5SDimitry Andric if (team->t.t_max_nproc >= max_nproc) { 55130b57cec5SDimitry Andric /* take this team from the team pool */ 55140b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 55150b57cec5SDimitry Andric 5516349cc55cSDimitry Andric if (max_nproc > 1 && 5517349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5518349cc55cSDimitry Andric if (!team->t.b) { // Allocate barrier structure 5519349cc55cSDimitry Andric team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); 5520349cc55cSDimitry Andric } 5521349cc55cSDimitry Andric } 5522349cc55cSDimitry Andric 55230b57cec5SDimitry Andric /* setup the team for fresh use */ 55240b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 55250b57cec5SDimitry Andric 55260b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and " 55270b57cec5SDimitry Andric "task_team[1] %p to NULL\n", 55280b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 55290b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; 55300b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; 55310b57cec5SDimitry Andric 55320b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 55330b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 55340b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 55350b57cec5SDimitry Andric 55360b57cec5SDimitry Andric KA_TRACE( 55370b57cec5SDimitry Andric 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 55380b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 55390b57cec5SDimitry Andric { // Initialize barrier data. 55400b57cec5SDimitry Andric int b; 55410b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 55420b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 55430b57cec5SDimitry Andric #if USE_DEBUGGER 55440b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 55450b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 55460b57cec5SDimitry Andric #endif 55470b57cec5SDimitry Andric } 55480b57cec5SDimitry Andric } 55490b57cec5SDimitry Andric 55500b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 55510b57cec5SDimitry Andric 55520b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n", 55530b57cec5SDimitry Andric team->t.t_id)); 55540b57cec5SDimitry Andric 55550b57cec5SDimitry Andric #if OMPT_SUPPORT 55560b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 55570b57cec5SDimitry Andric #endif 55580b57cec5SDimitry Andric 55590b57cec5SDimitry Andric KMP_MB(); 55600b57cec5SDimitry Andric 55610b57cec5SDimitry Andric return team; 55620b57cec5SDimitry Andric } 55630b57cec5SDimitry Andric 55640b57cec5SDimitry Andric /* reap team if it is too small, then loop back and check the next one */ 55650b57cec5SDimitry Andric // not sure if this is wise, but, will be redone during the hot-teams 55660b57cec5SDimitry Andric // rewrite. 55670b57cec5SDimitry Andric /* TODO: Use technique to find the right size hot-team, don't reap them */ 55680b57cec5SDimitry Andric team = __kmp_reap_team(team); 55690b57cec5SDimitry Andric __kmp_team_pool = team; 55700b57cec5SDimitry Andric } 55710b57cec5SDimitry Andric 55720b57cec5SDimitry Andric /* nothing available in the pool, no matter, make a new team! */ 55730b57cec5SDimitry Andric KMP_MB(); 55740b57cec5SDimitry Andric team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t)); 55750b57cec5SDimitry Andric 55760b57cec5SDimitry Andric /* and set it up */ 55770b57cec5SDimitry Andric team->t.t_max_nproc = max_nproc; 5578349cc55cSDimitry Andric if (max_nproc > 1 && 5579349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5580349cc55cSDimitry Andric // Allocate barrier structure 5581349cc55cSDimitry Andric team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); 5582349cc55cSDimitry Andric } 5583349cc55cSDimitry Andric 55840b57cec5SDimitry Andric /* NOTE well, for some reason allocating one big buffer and dividing it up 55850b57cec5SDimitry Andric seems to really hurt performance a lot on the P4, so, let's not use this */ 55860b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nproc); 55870b57cec5SDimitry Andric 55880b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: making a new team\n")); 55890b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 55900b57cec5SDimitry Andric 55910b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 55920b57cec5SDimitry Andric "%p to NULL\n", 55930b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 55940b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes 55950b57cec5SDimitry Andric // memory, no need to duplicate 55960b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes 55970b57cec5SDimitry Andric // memory, no need to duplicate 55980b57cec5SDimitry Andric 55990b57cec5SDimitry Andric if (__kmp_storage_map) { 56000b57cec5SDimitry Andric __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc); 56010b57cec5SDimitry Andric } 56020b57cec5SDimitry Andric 56030b57cec5SDimitry Andric /* allocate space for arguments */ 56040b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, FALSE); 56050b57cec5SDimitry Andric team->t.t_argc = argc; 56060b57cec5SDimitry Andric 56070b57cec5SDimitry Andric KA_TRACE(20, 56080b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 56090b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 56100b57cec5SDimitry Andric { // Initialize barrier data. 56110b57cec5SDimitry Andric int b; 56120b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 56130b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 56140b57cec5SDimitry Andric #if USE_DEBUGGER 56150b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 56160b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 56170b57cec5SDimitry Andric #endif 56180b57cec5SDimitry Andric } 56190b57cec5SDimitry Andric } 56200b57cec5SDimitry Andric 56210b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 56220b57cec5SDimitry Andric 56230b57cec5SDimitry Andric #if OMPT_SUPPORT 56240b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 56250b57cec5SDimitry Andric team->t.ompt_serialized_team_info = NULL; 56260b57cec5SDimitry Andric #endif 56270b57cec5SDimitry Andric 56280b57cec5SDimitry Andric KMP_MB(); 56290b57cec5SDimitry Andric 56300b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", 56310b57cec5SDimitry Andric team->t.t_id)); 56320b57cec5SDimitry Andric 56330b57cec5SDimitry Andric return team; 56340b57cec5SDimitry Andric } 56350b57cec5SDimitry Andric 56360b57cec5SDimitry Andric /* TODO implement hot-teams at all levels */ 56370b57cec5SDimitry Andric /* TODO implement lazy thread release on demand (disband request) */ 56380b57cec5SDimitry Andric 56390b57cec5SDimitry Andric /* free the team. return it to the team pool. release all the threads 56400b57cec5SDimitry Andric * associated with it */ 56410b57cec5SDimitry Andric void __kmp_free_team(kmp_root_t *root, 56420b57cec5SDimitry Andric kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { 56430b57cec5SDimitry Andric int f; 56440b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), 56450b57cec5SDimitry Andric team->t.t_id)); 56460b57cec5SDimitry Andric 56470b57cec5SDimitry Andric /* verify state */ 56480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 56490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 56500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc); 56510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 56520b57cec5SDimitry Andric 56530b57cec5SDimitry Andric int use_hot_team = team == root->r.r_hot_team; 56540b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 56550b57cec5SDimitry Andric int level; 56560b57cec5SDimitry Andric if (master) { 56570b57cec5SDimitry Andric level = team->t.t_active_level - 1; 56580b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 56590b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1) { 56600b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 56610b57cec5SDimitry Andric // team_of_masters 56620b57cec5SDimitry Andric } 56630b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 56640b57cec5SDimitry Andric master->th.th_teams_level == team->t.t_level) { 56650b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 56660b57cec5SDimitry Andric // team_of_workers before the parallel 56670b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 56680b57cec5SDimitry Andric } 5669349cc55cSDimitry Andric #if KMP_DEBUG 5670349cc55cSDimitry Andric kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams; 5671349cc55cSDimitry Andric #endif 56720b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 56730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team); 56740b57cec5SDimitry Andric use_hot_team = 1; 56750b57cec5SDimitry Andric } 56760b57cec5SDimitry Andric } 56770b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 56780b57cec5SDimitry Andric 56790b57cec5SDimitry Andric /* team is done working */ 56800b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, 56810b57cec5SDimitry Andric NULL); // Important for Debugging Support Library. 56820b57cec5SDimitry Andric #if KMP_OS_WINDOWS 56830b57cec5SDimitry Andric team->t.t_copyin_counter = 0; // init counter for possible reuse 56840b57cec5SDimitry Andric #endif 56850b57cec5SDimitry Andric // Do not reset pointer to parent team to NULL for hot teams. 56860b57cec5SDimitry Andric 56870b57cec5SDimitry Andric /* if we are non-hot team, release our threads */ 56880b57cec5SDimitry Andric if (!use_hot_team) { 56890b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 56900b57cec5SDimitry Andric // Wait for threads to reach reapable state 56910b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 56920b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 56930b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 56940b57cec5SDimitry Andric volatile kmp_uint32 *state = &th->th.th_reap_state; 56950b57cec5SDimitry Andric while (*state != KMP_SAFE_TO_REAP) { 56960b57cec5SDimitry Andric #if KMP_OS_WINDOWS 56970b57cec5SDimitry Andric // On Windows a thread can be killed at any time, check this 56980b57cec5SDimitry Andric DWORD ecode; 56990b57cec5SDimitry Andric if (!__kmp_is_thread_alive(th, &ecode)) { 57000b57cec5SDimitry Andric *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread 57010b57cec5SDimitry Andric break; 57020b57cec5SDimitry Andric } 57030b57cec5SDimitry Andric #endif 57040b57cec5SDimitry Andric // first check if thread is sleeping 5705e8d8bef9SDimitry Andric kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); 57060b57cec5SDimitry Andric if (fl.is_sleeping()) 57070b57cec5SDimitry Andric fl.resume(__kmp_gtid_from_thread(th)); 57080b57cec5SDimitry Andric KMP_CPU_PAUSE(); 57090b57cec5SDimitry Andric } 57100b57cec5SDimitry Andric } 57110b57cec5SDimitry Andric 57120b57cec5SDimitry Andric // Delete task teams 57130b57cec5SDimitry Andric int tt_idx; 57140b57cec5SDimitry Andric for (tt_idx = 0; tt_idx < 2; ++tt_idx) { 57150b57cec5SDimitry Andric kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 57160b57cec5SDimitry Andric if (task_team != NULL) { 57170b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams 57180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 57190b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 57200b57cec5SDimitry Andric } 57210b57cec5SDimitry Andric KA_TRACE( 57220b57cec5SDimitry Andric 20, 57230b57cec5SDimitry Andric ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", 57240b57cec5SDimitry Andric __kmp_get_gtid(), task_team, team->t.t_id)); 57250b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 57260b57cec5SDimitry Andric __kmp_free_task_team(master, task_team); 57270b57cec5SDimitry Andric #endif 57280b57cec5SDimitry Andric team->t.t_task_team[tt_idx] = NULL; 57290b57cec5SDimitry Andric } 57300b57cec5SDimitry Andric } 57310b57cec5SDimitry Andric } 57320b57cec5SDimitry Andric 57330b57cec5SDimitry Andric // Reset pointer to parent team only for non-hot teams. 57340b57cec5SDimitry Andric team->t.t_parent = NULL; 57350b57cec5SDimitry Andric team->t.t_level = 0; 57360b57cec5SDimitry Andric team->t.t_active_level = 0; 57370b57cec5SDimitry Andric 57380b57cec5SDimitry Andric /* free the worker threads */ 57390b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57400b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5741349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5742349cc55cSDimitry Andric KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 5743349cc55cSDimitry Andric 1, 2); 5744349cc55cSDimitry Andric } 57450b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 5746349cc55cSDimitry Andric } 5747349cc55cSDimitry Andric 5748349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5749349cc55cSDimitry Andric if (team->t.b) { 5750349cc55cSDimitry Andric // wake up thread at old location 5751349cc55cSDimitry Andric team->t.b->go_release(); 5752349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 5753349cc55cSDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 5754349cc55cSDimitry Andric if (team->t.b->sleep[f].sleep) { 5755349cc55cSDimitry Andric __kmp_atomic_resume_64( 5756349cc55cSDimitry Andric team->t.t_threads[f]->th.th_info.ds.ds_gtid, 5757349cc55cSDimitry Andric (kmp_atomic_flag_64<> *)NULL); 5758349cc55cSDimitry Andric } 5759349cc55cSDimitry Andric } 5760349cc55cSDimitry Andric } 5761349cc55cSDimitry Andric // Wait for threads to be removed from team 5762349cc55cSDimitry Andric for (int f = 1; f < team->t.t_nproc; ++f) { 5763349cc55cSDimitry Andric while (team->t.t_threads[f]->th.th_used_in_team.load() != 0) 5764349cc55cSDimitry Andric KMP_CPU_PAUSE(); 5765349cc55cSDimitry Andric } 5766349cc55cSDimitry Andric } 5767349cc55cSDimitry Andric } 5768349cc55cSDimitry Andric 5769349cc55cSDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57700b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 57710b57cec5SDimitry Andric } 57720b57cec5SDimitry Andric 5773349cc55cSDimitry Andric if (team->t.t_max_nproc > 1 && 5774349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5775349cc55cSDimitry Andric distributedBarrier::deallocate(team->t.b); 5776349cc55cSDimitry Andric team->t.b = NULL; 5777349cc55cSDimitry Andric } 57780b57cec5SDimitry Andric /* put the team back in the team pool */ 57790b57cec5SDimitry Andric /* TODO limit size of team pool, call reap_team if pool too large */ 57800b57cec5SDimitry Andric team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool); 57810b57cec5SDimitry Andric __kmp_team_pool = (volatile kmp_team_t *)team; 5782fe6060f1SDimitry Andric } else { // Check if team was created for primary threads in teams construct 57830b57cec5SDimitry Andric // See if first worker is a CG root 57840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[1] && 57850b57cec5SDimitry Andric team->t.t_threads[1]->th.th_cg_roots); 57860b57cec5SDimitry Andric if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) { 57870b57cec5SDimitry Andric // Clean up the CG root nodes on workers so that this team can be re-used 57880b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57890b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 57900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots && 57910b57cec5SDimitry Andric thr->th.th_cg_roots->cg_root == thr); 57920b57cec5SDimitry Andric // Pop current CG root off list 57930b57cec5SDimitry Andric kmp_cg_root_t *tmp = thr->th.th_cg_roots; 57940b57cec5SDimitry Andric thr->th.th_cg_roots = tmp->up; 57950b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving" 57960b57cec5SDimitry Andric " up to node %p. cg_nthreads was %d\n", 57970b57cec5SDimitry Andric thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads)); 57980b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 57990b57cec5SDimitry Andric if (i == 1) { 58000b57cec5SDimitry Andric __kmp_free(tmp); // free CG if we are the last thread in it 58010b57cec5SDimitry Andric } 58020b57cec5SDimitry Andric // Restore current task's thread_limit from CG root 58030b57cec5SDimitry Andric if (thr->th.th_cg_roots) 58040b57cec5SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = 58050b57cec5SDimitry Andric thr->th.th_cg_roots->cg_thread_limit; 58060b57cec5SDimitry Andric } 58070b57cec5SDimitry Andric } 58080b57cec5SDimitry Andric } 58090b57cec5SDimitry Andric 58100b57cec5SDimitry Andric KMP_MB(); 58110b57cec5SDimitry Andric } 58120b57cec5SDimitry Andric 58130b57cec5SDimitry Andric /* reap the team. destroy it, reclaim all its resources and free its memory */ 58140b57cec5SDimitry Andric kmp_team_t *__kmp_reap_team(kmp_team_t *team) { 58150b57cec5SDimitry Andric kmp_team_t *next_pool = team->t.t_next_pool; 58160b57cec5SDimitry Andric 58170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 58180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 58190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 58200b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 58210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_argv); 58220b57cec5SDimitry Andric 58230b57cec5SDimitry Andric /* TODO clean the threads that are a part of this? */ 58240b57cec5SDimitry Andric 58250b57cec5SDimitry Andric /* free stuff */ 58260b57cec5SDimitry Andric __kmp_free_team_arrays(team); 58270b57cec5SDimitry Andric if (team->t.t_argv != &team->t.t_inline_argv[0]) 58280b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 58290b57cec5SDimitry Andric __kmp_free(team); 58300b57cec5SDimitry Andric 58310b57cec5SDimitry Andric KMP_MB(); 58320b57cec5SDimitry Andric return next_pool; 58330b57cec5SDimitry Andric } 58340b57cec5SDimitry Andric 58350b57cec5SDimitry Andric // Free the thread. Don't reap it, just place it on the pool of available 58360b57cec5SDimitry Andric // threads. 58370b57cec5SDimitry Andric // 58380b57cec5SDimitry Andric // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 58390b57cec5SDimitry Andric // binding for the affinity mechanism to be useful. 58400b57cec5SDimitry Andric // 58410b57cec5SDimitry Andric // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 58420b57cec5SDimitry Andric // However, we want to avoid a potential performance problem by always 58430b57cec5SDimitry Andric // scanning through the list to find the correct point at which to insert 58440b57cec5SDimitry Andric // the thread (potential N**2 behavior). To do this we keep track of the 58450b57cec5SDimitry Andric // last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 58460b57cec5SDimitry Andric // With single-level parallelism, threads will always be added to the tail 58470b57cec5SDimitry Andric // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested 58480b57cec5SDimitry Andric // parallelism, all bets are off and we may need to scan through the entire 58490b57cec5SDimitry Andric // free list. 58500b57cec5SDimitry Andric // 58510b57cec5SDimitry Andric // This change also has a potentially large performance benefit, for some 58520b57cec5SDimitry Andric // applications. Previously, as threads were freed from the hot team, they 58530b57cec5SDimitry Andric // would be placed back on the free list in inverse order. If the hot team 58540b57cec5SDimitry Andric // grew back to it's original size, then the freed thread would be placed 58550b57cec5SDimitry Andric // back on the hot team in reverse order. This could cause bad cache 58560b57cec5SDimitry Andric // locality problems on programs where the size of the hot team regularly 58570b57cec5SDimitry Andric // grew and shrunk. 58580b57cec5SDimitry Andric // 58595ffd83dbSDimitry Andric // Now, for single-level parallelism, the OMP tid is always == gtid. 58600b57cec5SDimitry Andric void __kmp_free_thread(kmp_info_t *this_th) { 58610b57cec5SDimitry Andric int gtid; 58620b57cec5SDimitry Andric kmp_info_t **scan; 58630b57cec5SDimitry Andric 58640b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 58650b57cec5SDimitry Andric __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); 58660b57cec5SDimitry Andric 58670b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th); 58680b57cec5SDimitry Andric 58690b57cec5SDimitry Andric // When moving thread to pool, switch thread to wait on own b_go flag, and 58700b57cec5SDimitry Andric // uninitialized (NULL team). 58710b57cec5SDimitry Andric int b; 58720b57cec5SDimitry Andric kmp_balign_t *balign = this_th->th.th_bar; 58730b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 58740b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 58750b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 58760b57cec5SDimitry Andric balign[b].bb.team = NULL; 58770b57cec5SDimitry Andric balign[b].bb.leaf_kids = 0; 58780b57cec5SDimitry Andric } 58790b57cec5SDimitry Andric this_th->th.th_task_state = 0; 58800b57cec5SDimitry Andric this_th->th.th_reap_state = KMP_SAFE_TO_REAP; 58810b57cec5SDimitry Andric 58820b57cec5SDimitry Andric /* put thread back on the free pool */ 58830b57cec5SDimitry Andric TCW_PTR(this_th->th.th_team, NULL); 58840b57cec5SDimitry Andric TCW_PTR(this_th->th.th_root, NULL); 58850b57cec5SDimitry Andric TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ 58860b57cec5SDimitry Andric 58870b57cec5SDimitry Andric while (this_th->th.th_cg_roots) { 58880b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads--; 58890b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node" 58900b57cec5SDimitry Andric " %p of thread %p to %d\n", 58910b57cec5SDimitry Andric this_th, this_th->th.th_cg_roots, 58920b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_root, 58930b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads)); 58940b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_th->th.th_cg_roots; 58950b57cec5SDimitry Andric if (tmp->cg_root == this_th) { // Thread is a cg_root 58960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0); 58970b57cec5SDimitry Andric KA_TRACE( 58980b57cec5SDimitry Andric 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp)); 58990b57cec5SDimitry Andric this_th->th.th_cg_roots = tmp->up; 59000b57cec5SDimitry Andric __kmp_free(tmp); 59010b57cec5SDimitry Andric } else { // Worker thread 59020b57cec5SDimitry Andric if (tmp->cg_nthreads == 0) { // last thread leaves contention group 59030b57cec5SDimitry Andric __kmp_free(tmp); 59040b57cec5SDimitry Andric } 59050b57cec5SDimitry Andric this_th->th.th_cg_roots = NULL; 59060b57cec5SDimitry Andric break; 59070b57cec5SDimitry Andric } 59080b57cec5SDimitry Andric } 59090b57cec5SDimitry Andric 59100b57cec5SDimitry Andric /* If the implicit task assigned to this thread can be used by other threads 59110b57cec5SDimitry Andric * -> multiple threads can share the data and try to free the task at 59120b57cec5SDimitry Andric * __kmp_reap_thread at exit. This duplicate use of the task data can happen 59130b57cec5SDimitry Andric * with higher probability when hot team is disabled but can occurs even when 59140b57cec5SDimitry Andric * the hot team is enabled */ 59150b57cec5SDimitry Andric __kmp_free_implicit_task(this_th); 59160b57cec5SDimitry Andric this_th->th.th_current_task = NULL; 59170b57cec5SDimitry Andric 59180b57cec5SDimitry Andric // If the __kmp_thread_pool_insert_pt is already past the new insert 59190b57cec5SDimitry Andric // point, then we need to re-scan the entire list. 59200b57cec5SDimitry Andric gtid = this_th->th.th_info.ds.ds_gtid; 59210b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 59220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL); 59230b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) { 59240b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 59250b57cec5SDimitry Andric } 59260b57cec5SDimitry Andric } 59270b57cec5SDimitry Andric 59280b57cec5SDimitry Andric // Scan down the list to find the place to insert the thread. 59290b57cec5SDimitry Andric // scan is the address of a link in the list, possibly the address of 59300b57cec5SDimitry Andric // __kmp_thread_pool itself. 59310b57cec5SDimitry Andric // 59325ffd83dbSDimitry Andric // In the absence of nested parallelism, the for loop will have 0 iterations. 59330b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 59340b57cec5SDimitry Andric scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool); 59350b57cec5SDimitry Andric } else { 59360b57cec5SDimitry Andric scan = CCAST(kmp_info_t **, &__kmp_thread_pool); 59370b57cec5SDimitry Andric } 59380b57cec5SDimitry Andric for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid); 59390b57cec5SDimitry Andric scan = &((*scan)->th.th_next_pool)) 59400b57cec5SDimitry Andric ; 59410b57cec5SDimitry Andric 59420b57cec5SDimitry Andric // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 59430b57cec5SDimitry Andric // to its address. 59440b57cec5SDimitry Andric TCW_PTR(this_th->th.th_next_pool, *scan); 59450b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = *scan = this_th; 59460b57cec5SDimitry Andric KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) || 59470b57cec5SDimitry Andric (this_th->th.th_info.ds.ds_gtid < 59480b57cec5SDimitry Andric this_th->th.th_next_pool->th.th_info.ds.ds_gtid)); 59490b57cec5SDimitry Andric TCW_4(this_th->th.th_in_pool, TRUE); 59500b57cec5SDimitry Andric __kmp_suspend_initialize_thread(this_th); 59510b57cec5SDimitry Andric __kmp_lock_suspend_mx(this_th); 59520b57cec5SDimitry Andric if (this_th->th.th_active == TRUE) { 59530b57cec5SDimitry Andric KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); 59540b57cec5SDimitry Andric this_th->th.th_active_in_pool = TRUE; 59550b57cec5SDimitry Andric } 59560b57cec5SDimitry Andric #if KMP_DEBUG 59570b57cec5SDimitry Andric else { 59580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE); 59590b57cec5SDimitry Andric } 59600b57cec5SDimitry Andric #endif 59610b57cec5SDimitry Andric __kmp_unlock_suspend_mx(this_th); 59620b57cec5SDimitry Andric 59630b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth - 1); 59640b57cec5SDimitry Andric 59650b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 59660b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 59670b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 59680b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 59690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 59700b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 59710b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 59720b57cec5SDimitry Andric } 59730b57cec5SDimitry Andric } 59740b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 59750b57cec5SDimitry Andric 59760b57cec5SDimitry Andric KMP_MB(); 59770b57cec5SDimitry Andric } 59780b57cec5SDimitry Andric 59790b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 59800b57cec5SDimitry Andric 59810b57cec5SDimitry Andric void *__kmp_launch_thread(kmp_info_t *this_thr) { 5982d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 5983e8d8bef9SDimitry Andric ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); 5984e8d8bef9SDimitry Andric // TODO: add a configuration option for time granularity 5985e8d8bef9SDimitry Andric if (ProfileTraceFile) 5986e8d8bef9SDimitry Andric llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget"); 5987e8d8bef9SDimitry Andric #endif 5988e8d8bef9SDimitry Andric 59890b57cec5SDimitry Andric int gtid = this_thr->th.th_info.ds.ds_gtid; 59900b57cec5SDimitry Andric /* void *stack_data;*/ 5991489b1cf2SDimitry Andric kmp_team_t **volatile pteam; 59920b57cec5SDimitry Andric 59930b57cec5SDimitry Andric KMP_MB(); 59940b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); 59950b57cec5SDimitry Andric 59960b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 59970b57cec5SDimitry Andric this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? 59980b57cec5SDimitry Andric } 59990b57cec5SDimitry Andric 6000fe6060f1SDimitry Andric #if OMPD_SUPPORT 6001fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 6002fe6060f1SDimitry Andric ompd_bp_thread_begin(); 6003fe6060f1SDimitry Andric #endif 6004fe6060f1SDimitry Andric 60050b57cec5SDimitry Andric #if OMPT_SUPPORT 6006fe6060f1SDimitry Andric ompt_data_t *thread_data = nullptr; 60070b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60080b57cec5SDimitry Andric thread_data = &(this_thr->th.ompt_thread_info.thread_data); 60090b57cec5SDimitry Andric *thread_data = ompt_data_none; 60100b57cec5SDimitry Andric 60110b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 60120b57cec5SDimitry Andric this_thr->th.ompt_thread_info.wait_id = 0; 60130b57cec5SDimitry Andric this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); 6014489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags = 0; 60150b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 60160b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 60170b57cec5SDimitry Andric ompt_thread_worker, thread_data); 60180b57cec5SDimitry Andric } 60190b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_idle; 60200b57cec5SDimitry Andric } 60210b57cec5SDimitry Andric #endif 6022489b1cf2SDimitry Andric 60230b57cec5SDimitry Andric /* This is the place where threads wait for work */ 60240b57cec5SDimitry Andric while (!TCR_4(__kmp_global.g.g_done)) { 60250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); 60260b57cec5SDimitry Andric KMP_MB(); 60270b57cec5SDimitry Andric 60280b57cec5SDimitry Andric /* wait for work to do */ 60290b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); 60300b57cec5SDimitry Andric 60310b57cec5SDimitry Andric /* No tid yet since not part of a team */ 60320b57cec5SDimitry Andric __kmp_fork_barrier(gtid, KMP_GTID_DNE); 60330b57cec5SDimitry Andric 60340b57cec5SDimitry Andric #if OMPT_SUPPORT 60350b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60360b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 60370b57cec5SDimitry Andric } 60380b57cec5SDimitry Andric #endif 60390b57cec5SDimitry Andric 6040489b1cf2SDimitry Andric pteam = &this_thr->th.th_team; 60410b57cec5SDimitry Andric 60420b57cec5SDimitry Andric /* have we been allocated? */ 60430b57cec5SDimitry Andric if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { 60440b57cec5SDimitry Andric /* we were just woken up, so run our new task */ 60450b57cec5SDimitry Andric if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { 60460b57cec5SDimitry Andric int rc; 60470b57cec5SDimitry Andric KA_TRACE(20, 60480b57cec5SDimitry Andric ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 60490b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 60500b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 60510b57cec5SDimitry Andric 60520b57cec5SDimitry Andric updateHWFPControl(*pteam); 60530b57cec5SDimitry Andric 60540b57cec5SDimitry Andric #if OMPT_SUPPORT 60550b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60560b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 60570b57cec5SDimitry Andric } 60580b57cec5SDimitry Andric #endif 60590b57cec5SDimitry Andric 60600b57cec5SDimitry Andric rc = (*pteam)->t.t_invoke(gtid); 60610b57cec5SDimitry Andric KMP_ASSERT(rc); 60620b57cec5SDimitry Andric 60630b57cec5SDimitry Andric KMP_MB(); 60640b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 60650b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 60660b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 60670b57cec5SDimitry Andric } 60680b57cec5SDimitry Andric #if OMPT_SUPPORT 60690b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60700b57cec5SDimitry Andric /* no frame set while outside task */ 60710b57cec5SDimitry Andric __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none; 60720b57cec5SDimitry Andric 60730b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 60740b57cec5SDimitry Andric } 60750b57cec5SDimitry Andric #endif 60760b57cec5SDimitry Andric /* join barrier after parallel region */ 60770b57cec5SDimitry Andric __kmp_join_barrier(gtid); 60780b57cec5SDimitry Andric } 60790b57cec5SDimitry Andric } 60800b57cec5SDimitry Andric 6081fe6060f1SDimitry Andric #if OMPD_SUPPORT 6082fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 6083fe6060f1SDimitry Andric ompd_bp_thread_end(); 6084fe6060f1SDimitry Andric #endif 6085fe6060f1SDimitry Andric 60860b57cec5SDimitry Andric #if OMPT_SUPPORT 60870b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 60880b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); 60890b57cec5SDimitry Andric } 60900b57cec5SDimitry Andric #endif 60910b57cec5SDimitry Andric 60920b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 60930b57cec5SDimitry Andric /* run the destructors for the threadprivate data for this thread */ 60940b57cec5SDimitry Andric __kmp_common_destroy_gtid(gtid); 60950b57cec5SDimitry Andric 60960b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); 60970b57cec5SDimitry Andric KMP_MB(); 6098e8d8bef9SDimitry Andric 6099d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 6100e8d8bef9SDimitry Andric llvm::timeTraceProfilerFinishThread(); 6101e8d8bef9SDimitry Andric #endif 61020b57cec5SDimitry Andric return this_thr; 61030b57cec5SDimitry Andric } 61040b57cec5SDimitry Andric 61050b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 61060b57cec5SDimitry Andric 61070b57cec5SDimitry Andric void __kmp_internal_end_dest(void *specific_gtid) { 61080b57cec5SDimitry Andric // Make sure no significant bits are lost 6109e8d8bef9SDimitry Andric int gtid; 6110e8d8bef9SDimitry Andric __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id); 61110b57cec5SDimitry Andric 61120b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 61130b57cec5SDimitry Andric /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 61140b57cec5SDimitry Andric * this is because 0 is reserved for the nothing-stored case */ 61150b57cec5SDimitry Andric 61160b57cec5SDimitry Andric __kmp_internal_end_thread(gtid); 61170b57cec5SDimitry Andric } 61180b57cec5SDimitry Andric 61190b57cec5SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 61200b57cec5SDimitry Andric 61210b57cec5SDimitry Andric __attribute__((destructor)) void __kmp_internal_end_dtor(void) { 61220b57cec5SDimitry Andric __kmp_internal_end_atexit(); 61230b57cec5SDimitry Andric } 61240b57cec5SDimitry Andric 61250b57cec5SDimitry Andric #endif 61260b57cec5SDimitry Andric 61270b57cec5SDimitry Andric /* [Windows] josh: when the atexit handler is called, there may still be more 61280b57cec5SDimitry Andric than one thread alive */ 61290b57cec5SDimitry Andric void __kmp_internal_end_atexit(void) { 61300b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_atexit\n")); 61310b57cec5SDimitry Andric /* [Windows] 61320b57cec5SDimitry Andric josh: ideally, we want to completely shutdown the library in this atexit 61330b57cec5SDimitry Andric handler, but stat code that depends on thread specific data for gtid fails 61340b57cec5SDimitry Andric because that data becomes unavailable at some point during the shutdown, so 61350b57cec5SDimitry Andric we call __kmp_internal_end_thread instead. We should eventually remove the 61360b57cec5SDimitry Andric dependency on __kmp_get_specific_gtid in the stat code and use 61370b57cec5SDimitry Andric __kmp_internal_end_library to cleanly shutdown the library. 61380b57cec5SDimitry Andric 61390b57cec5SDimitry Andric // TODO: Can some of this comment about GVS be removed? 61400b57cec5SDimitry Andric I suspect that the offending stat code is executed when the calling thread 61410b57cec5SDimitry Andric tries to clean up a dead root thread's data structures, resulting in GVS 61420b57cec5SDimitry Andric code trying to close the GVS structures for that thread, but since the stat 61430b57cec5SDimitry Andric code uses __kmp_get_specific_gtid to get the gtid with the assumption that 61440b57cec5SDimitry Andric the calling thread is cleaning up itself instead of another thread, it get 61450b57cec5SDimitry Andric confused. This happens because allowing a thread to unregister and cleanup 61460b57cec5SDimitry Andric another thread is a recent modification for addressing an issue. 61470b57cec5SDimitry Andric Based on the current design (20050722), a thread may end up 61480b57cec5SDimitry Andric trying to unregister another thread only if thread death does not trigger 61490b57cec5SDimitry Andric the calling of __kmp_internal_end_thread. For Linux* OS, there is the 61500b57cec5SDimitry Andric thread specific data destructor function to detect thread death. For 61510b57cec5SDimitry Andric Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there 61520b57cec5SDimitry Andric is nothing. Thus, the workaround is applicable only for Windows static 61530b57cec5SDimitry Andric stat library. */ 61540b57cec5SDimitry Andric __kmp_internal_end_library(-1); 61550b57cec5SDimitry Andric #if KMP_OS_WINDOWS 61560b57cec5SDimitry Andric __kmp_close_console(); 61570b57cec5SDimitry Andric #endif 61580b57cec5SDimitry Andric } 61590b57cec5SDimitry Andric 61600b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { 61610b57cec5SDimitry Andric // It is assumed __kmp_forkjoin_lock is acquired. 61620b57cec5SDimitry Andric 61630b57cec5SDimitry Andric int gtid; 61640b57cec5SDimitry Andric 61650b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread != NULL); 61660b57cec5SDimitry Andric 61670b57cec5SDimitry Andric gtid = thread->th.th_info.ds.ds_gtid; 61680b57cec5SDimitry Andric 61690b57cec5SDimitry Andric if (!is_root) { 61700b57cec5SDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 61710b57cec5SDimitry Andric /* Assume the threads are at the fork barrier here */ 61720b57cec5SDimitry Andric KA_TRACE( 61730b57cec5SDimitry Andric 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", 61740b57cec5SDimitry Andric gtid)); 6175349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 6176349cc55cSDimitry Andric while ( 6177349cc55cSDimitry Andric !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3)) 6178349cc55cSDimitry Andric KMP_CPU_PAUSE(); 6179349cc55cSDimitry Andric __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL); 6180349cc55cSDimitry Andric } else { 6181349cc55cSDimitry Andric /* Need release fence here to prevent seg faults for tree forkjoin 6182349cc55cSDimitry Andric barrier (GEH) */ 6183e8d8bef9SDimitry Andric kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, 6184e8d8bef9SDimitry Andric thread); 61850b57cec5SDimitry Andric __kmp_release_64(&flag); 61860b57cec5SDimitry Andric } 6187349cc55cSDimitry Andric } 61880b57cec5SDimitry Andric 61890b57cec5SDimitry Andric // Terminate OS thread. 61900b57cec5SDimitry Andric __kmp_reap_worker(thread); 61910b57cec5SDimitry Andric 61920b57cec5SDimitry Andric // The thread was killed asynchronously. If it was actively 61930b57cec5SDimitry Andric // spinning in the thread pool, decrement the global count. 61940b57cec5SDimitry Andric // 61950b57cec5SDimitry Andric // There is a small timing hole here - if the worker thread was just waking 61960b57cec5SDimitry Andric // up after sleeping in the pool, had reset it's th_active_in_pool flag but 61970b57cec5SDimitry Andric // not decremented the global counter __kmp_thread_pool_active_nth yet, then 61980b57cec5SDimitry Andric // the global counter might not get updated. 61990b57cec5SDimitry Andric // 62000b57cec5SDimitry Andric // Currently, this can only happen as the library is unloaded, 62010b57cec5SDimitry Andric // so there are no harmful side effects. 62020b57cec5SDimitry Andric if (thread->th.th_active_in_pool) { 62030b57cec5SDimitry Andric thread->th.th_active_in_pool = FALSE; 62040b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 62050b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0); 62060b57cec5SDimitry Andric } 62070b57cec5SDimitry Andric } 62080b57cec5SDimitry Andric 62090b57cec5SDimitry Andric __kmp_free_implicit_task(thread); 62100b57cec5SDimitry Andric 62110b57cec5SDimitry Andric // Free the fast memory for tasking 62120b57cec5SDimitry Andric #if USE_FAST_MEMORY 62130b57cec5SDimitry Andric __kmp_free_fast_memory(thread); 62140b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 62150b57cec5SDimitry Andric 62160b57cec5SDimitry Andric __kmp_suspend_uninitialize_thread(thread); 62170b57cec5SDimitry Andric 62180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread); 62190b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 62200b57cec5SDimitry Andric 62210b57cec5SDimitry Andric --__kmp_all_nth; 62220b57cec5SDimitry Andric // __kmp_nth was decremented when thread is added to the pool. 62230b57cec5SDimitry Andric 62240b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 62250b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 62260b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 62270b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 62280b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 62290b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 62300b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 62310b57cec5SDimitry Andric } 62320b57cec5SDimitry Andric } 62330b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 62340b57cec5SDimitry Andric 62350b57cec5SDimitry Andric /* free the memory being used */ 62360b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 62370b57cec5SDimitry Andric if (thread->th.th_cons) { 62380b57cec5SDimitry Andric __kmp_free_cons_stack(thread->th.th_cons); 62390b57cec5SDimitry Andric thread->th.th_cons = NULL; 62400b57cec5SDimitry Andric } 62410b57cec5SDimitry Andric } 62420b57cec5SDimitry Andric 62430b57cec5SDimitry Andric if (thread->th.th_pri_common != NULL) { 62440b57cec5SDimitry Andric __kmp_free(thread->th.th_pri_common); 62450b57cec5SDimitry Andric thread->th.th_pri_common = NULL; 62460b57cec5SDimitry Andric } 62470b57cec5SDimitry Andric 62480b57cec5SDimitry Andric if (thread->th.th_task_state_memo_stack != NULL) { 62490b57cec5SDimitry Andric __kmp_free(thread->th.th_task_state_memo_stack); 62500b57cec5SDimitry Andric thread->th.th_task_state_memo_stack = NULL; 62510b57cec5SDimitry Andric } 62520b57cec5SDimitry Andric 62530b57cec5SDimitry Andric #if KMP_USE_BGET 62540b57cec5SDimitry Andric if (thread->th.th_local.bget_data != NULL) { 62550b57cec5SDimitry Andric __kmp_finalize_bget(thread); 62560b57cec5SDimitry Andric } 62570b57cec5SDimitry Andric #endif 62580b57cec5SDimitry Andric 62590b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 62600b57cec5SDimitry Andric if (thread->th.th_affin_mask != NULL) { 62610b57cec5SDimitry Andric KMP_CPU_FREE(thread->th.th_affin_mask); 62620b57cec5SDimitry Andric thread->th.th_affin_mask = NULL; 62630b57cec5SDimitry Andric } 62640b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 62650b57cec5SDimitry Andric 62660b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 62670b57cec5SDimitry Andric if (thread->th.th_hier_bar_data != NULL) { 62680b57cec5SDimitry Andric __kmp_free(thread->th.th_hier_bar_data); 62690b57cec5SDimitry Andric thread->th.th_hier_bar_data = NULL; 62700b57cec5SDimitry Andric } 62710b57cec5SDimitry Andric #endif 62720b57cec5SDimitry Andric 62730b57cec5SDimitry Andric __kmp_reap_team(thread->th.th_serial_team); 62740b57cec5SDimitry Andric thread->th.th_serial_team = NULL; 62750b57cec5SDimitry Andric __kmp_free(thread); 62760b57cec5SDimitry Andric 62770b57cec5SDimitry Andric KMP_MB(); 62780b57cec5SDimitry Andric 62790b57cec5SDimitry Andric } // __kmp_reap_thread 62800b57cec5SDimitry Andric 6281349cc55cSDimitry Andric static void __kmp_itthash_clean(kmp_info_t *th) { 6282349cc55cSDimitry Andric #if USE_ITT_NOTIFY 6283349cc55cSDimitry Andric if (__kmp_itt_region_domains.count > 0) { 6284349cc55cSDimitry Andric for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) { 6285349cc55cSDimitry Andric kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i]; 6286349cc55cSDimitry Andric while (bucket) { 6287349cc55cSDimitry Andric kmp_itthash_entry_t *next = bucket->next_in_bucket; 6288349cc55cSDimitry Andric __kmp_thread_free(th, bucket); 6289349cc55cSDimitry Andric bucket = next; 6290349cc55cSDimitry Andric } 6291349cc55cSDimitry Andric } 6292349cc55cSDimitry Andric } 6293349cc55cSDimitry Andric if (__kmp_itt_barrier_domains.count > 0) { 6294349cc55cSDimitry Andric for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) { 6295349cc55cSDimitry Andric kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i]; 6296349cc55cSDimitry Andric while (bucket) { 6297349cc55cSDimitry Andric kmp_itthash_entry_t *next = bucket->next_in_bucket; 6298349cc55cSDimitry Andric __kmp_thread_free(th, bucket); 6299349cc55cSDimitry Andric bucket = next; 6300349cc55cSDimitry Andric } 6301349cc55cSDimitry Andric } 6302349cc55cSDimitry Andric } 6303349cc55cSDimitry Andric #endif 6304349cc55cSDimitry Andric } 6305349cc55cSDimitry Andric 63060b57cec5SDimitry Andric static void __kmp_internal_end(void) { 63070b57cec5SDimitry Andric int i; 63080b57cec5SDimitry Andric 63090b57cec5SDimitry Andric /* First, unregister the library */ 63100b57cec5SDimitry Andric __kmp_unregister_library(); 63110b57cec5SDimitry Andric 63120b57cec5SDimitry Andric #if KMP_OS_WINDOWS 63130b57cec5SDimitry Andric /* In Win static library, we can't tell when a root actually dies, so we 63140b57cec5SDimitry Andric reclaim the data structures for any root threads that have died but not 63150b57cec5SDimitry Andric unregistered themselves, in order to shut down cleanly. 63160b57cec5SDimitry Andric In Win dynamic library we also can't tell when a thread dies. */ 63170b57cec5SDimitry Andric __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of 63180b57cec5SDimitry Andric // dead roots 63190b57cec5SDimitry Andric #endif 63200b57cec5SDimitry Andric 63210b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) 63220b57cec5SDimitry Andric if (__kmp_root[i]) 63230b57cec5SDimitry Andric if (__kmp_root[i]->r.r_active) 63240b57cec5SDimitry Andric break; 63250b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 63260b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 63270b57cec5SDimitry Andric 63280b57cec5SDimitry Andric if (i < __kmp_threads_capacity) { 63290b57cec5SDimitry Andric #if KMP_USE_MONITOR 63300b57cec5SDimitry Andric // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 63310b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 63320b57cec5SDimitry Andric 63330b57cec5SDimitry Andric // Need to check that monitor was initialized before reaping it. If we are 63340b57cec5SDimitry Andric // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then 63350b57cec5SDimitry Andric // __kmp_monitor will appear to contain valid data, but it is only valid in 63360b57cec5SDimitry Andric // the parent process, not the child. 63370b57cec5SDimitry Andric // New behavior (201008): instead of keying off of the flag 63380b57cec5SDimitry Andric // __kmp_init_parallel, the monitor thread creation is keyed off 63390b57cec5SDimitry Andric // of the new flag __kmp_init_monitor. 63400b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 63410b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 63420b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 63430b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 63440b57cec5SDimitry Andric } 63450b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 63460b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 63470b57cec5SDimitry Andric #endif // KMP_USE_MONITOR 63480b57cec5SDimitry Andric } else { 63490b57cec5SDimitry Andric /* TODO move this to cleanup code */ 63500b57cec5SDimitry Andric #ifdef KMP_DEBUG 63510b57cec5SDimitry Andric /* make sure that everything has properly ended */ 63520b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 63530b57cec5SDimitry Andric if (__kmp_root[i]) { 63540b57cec5SDimitry Andric // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: 63550b57cec5SDimitry Andric // there can be uber threads alive here 63560b57cec5SDimitry Andric KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active? 63570b57cec5SDimitry Andric } 63580b57cec5SDimitry Andric } 63590b57cec5SDimitry Andric #endif 63600b57cec5SDimitry Andric 63610b57cec5SDimitry Andric KMP_MB(); 63620b57cec5SDimitry Andric 63630b57cec5SDimitry Andric // Reap the worker threads. 63640b57cec5SDimitry Andric // This is valid for now, but be careful if threads are reaped sooner. 63650b57cec5SDimitry Andric while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool. 63660b57cec5SDimitry Andric // Get the next thread from the pool. 63670b57cec5SDimitry Andric kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool); 63680b57cec5SDimitry Andric __kmp_thread_pool = thread->th.th_next_pool; 63690b57cec5SDimitry Andric // Reap it. 63700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); 63710b57cec5SDimitry Andric thread->th.th_next_pool = NULL; 63720b57cec5SDimitry Andric thread->th.th_in_pool = FALSE; 63730b57cec5SDimitry Andric __kmp_reap_thread(thread, 0); 63740b57cec5SDimitry Andric } 63750b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 63760b57cec5SDimitry Andric 63770b57cec5SDimitry Andric // Reap teams. 63780b57cec5SDimitry Andric while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool. 63790b57cec5SDimitry Andric // Get the next team from the pool. 63800b57cec5SDimitry Andric kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool); 63810b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 63820b57cec5SDimitry Andric // Reap it. 63830b57cec5SDimitry Andric team->t.t_next_pool = NULL; 63840b57cec5SDimitry Andric __kmp_reap_team(team); 63850b57cec5SDimitry Andric } 63860b57cec5SDimitry Andric 63870b57cec5SDimitry Andric __kmp_reap_task_teams(); 63880b57cec5SDimitry Andric 63890b57cec5SDimitry Andric #if KMP_OS_UNIX 63900b57cec5SDimitry Andric // Threads that are not reaped should not access any resources since they 63910b57cec5SDimitry Andric // are going to be deallocated soon, so the shutdown sequence should wait 63920b57cec5SDimitry Andric // until all threads either exit the final spin-waiting loop or begin 63930b57cec5SDimitry Andric // sleeping after the given blocktime. 63940b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 63950b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[i]; 63960b57cec5SDimitry Andric while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)) 63970b57cec5SDimitry Andric KMP_CPU_PAUSE(); 63980b57cec5SDimitry Andric } 63990b57cec5SDimitry Andric #endif 64000b57cec5SDimitry Andric 64010b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 64020b57cec5SDimitry Andric // TBD: Add some checking... 64030b57cec5SDimitry Andric // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 64040b57cec5SDimitry Andric } 64050b57cec5SDimitry Andric 64060b57cec5SDimitry Andric /* Make sure all threadprivate destructors get run by joining with all 64070b57cec5SDimitry Andric worker threads before resetting this flag */ 64080b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_common, FALSE); 64090b57cec5SDimitry Andric 64100b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n")); 64110b57cec5SDimitry Andric KMP_MB(); 64120b57cec5SDimitry Andric 64130b57cec5SDimitry Andric #if KMP_USE_MONITOR 64140b57cec5SDimitry Andric // See note above: One of the possible fixes for CQ138434 / CQ140126 64150b57cec5SDimitry Andric // 64160b57cec5SDimitry Andric // FIXME: push both code fragments down and CSE them? 64170b57cec5SDimitry Andric // push them into __kmp_cleanup() ? 64180b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 64190b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 64200b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 64210b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 64220b57cec5SDimitry Andric } 64230b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 64240b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 64250b57cec5SDimitry Andric #endif 64260b57cec5SDimitry Andric } /* else !__kmp_global.t_active */ 64270b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, FALSE); 64280b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 64290b57cec5SDimitry Andric 64300b57cec5SDimitry Andric __kmp_cleanup(); 64310b57cec5SDimitry Andric #if OMPT_SUPPORT 64320b57cec5SDimitry Andric ompt_fini(); 64330b57cec5SDimitry Andric #endif 64340b57cec5SDimitry Andric } 64350b57cec5SDimitry Andric 64360b57cec5SDimitry Andric void __kmp_internal_end_library(int gtid_req) { 64370b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 64380b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 64390b57cec5SDimitry Andric only place to clear __kmp_serial_init */ 64400b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 64410b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 64425ffd83dbSDimitry Andric // redundant, because the next check will work in any case. 64430b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 64440b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n")); 64450b57cec5SDimitry Andric /* TODO abort? */ 64460b57cec5SDimitry Andric return; 64470b57cec5SDimitry Andric } 64480b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 64490b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: already finished\n")); 64500b57cec5SDimitry Andric return; 64510b57cec5SDimitry Andric } 64520b57cec5SDimitry Andric 6453fe6060f1SDimitry Andric // If hidden helper team has been initialized, we need to deinit it 6454fe6060f1SDimitry Andric if (TCR_4(__kmp_init_hidden_helper) && 6455fe6060f1SDimitry Andric !TCR_4(__kmp_hidden_helper_team_done)) { 6456fe6060f1SDimitry Andric TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); 6457fe6060f1SDimitry Andric // First release the main thread to let it continue its work 6458fe6060f1SDimitry Andric __kmp_hidden_helper_main_thread_release(); 6459fe6060f1SDimitry Andric // Wait until the hidden helper team has been destroyed 6460fe6060f1SDimitry Andric __kmp_hidden_helper_threads_deinitz_wait(); 6461fe6060f1SDimitry Andric } 6462fe6060f1SDimitry Andric 64630b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 64640b57cec5SDimitry Andric /* find out who we are and what we should do */ 64650b57cec5SDimitry Andric { 64660b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 64670b57cec5SDimitry Andric KA_TRACE( 64680b57cec5SDimitry Andric 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req)); 64690b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 64700b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system " 64710b57cec5SDimitry Andric "already shutdown\n")); 64720b57cec5SDimitry Andric return; 64730b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 64740b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not " 64750b57cec5SDimitry Andric "registered, or system shutdown\n")); 64760b57cec5SDimitry Andric return; 64770b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 64780b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system " 64790b57cec5SDimitry Andric "shutdown\n")); 64800b57cec5SDimitry Andric /* we don't know who we are, but we may still shutdown the library */ 64810b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 64820b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 64830b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 64840b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 64850b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 6486e8d8bef9SDimitry Andric __kmp_unregister_library(); 64870b57cec5SDimitry Andric KA_TRACE(10, 64880b57cec5SDimitry Andric ("__kmp_internal_end_library: root still active, abort T#%d\n", 64890b57cec5SDimitry Andric gtid)); 64900b57cec5SDimitry Andric return; 64910b57cec5SDimitry Andric } else { 6492349cc55cSDimitry Andric __kmp_itthash_clean(__kmp_threads[gtid]); 64930b57cec5SDimitry Andric KA_TRACE( 64940b57cec5SDimitry Andric 10, 64950b57cec5SDimitry Andric ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid)); 64960b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 64970b57cec5SDimitry Andric } 64980b57cec5SDimitry Andric } else { 64990b57cec5SDimitry Andric /* worker threads may call this function through the atexit handler, if they 65000b57cec5SDimitry Andric * call exit() */ 65010b57cec5SDimitry Andric /* For now, skip the usual subsequent processing and just dump the debug buffer. 65020b57cec5SDimitry Andric TODO: do a thorough shutdown instead */ 65030b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 65040b57cec5SDimitry Andric if (__kmp_debug_buf) 65050b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 65060b57cec5SDimitry Andric #endif 6507e8d8bef9SDimitry Andric // added unregister library call here when we switch to shm linux 6508e8d8bef9SDimitry Andric // if we don't, it will leave lots of files in /dev/shm 6509e8d8bef9SDimitry Andric // cleanup shared memory file before exiting. 6510e8d8bef9SDimitry Andric __kmp_unregister_library(); 65110b57cec5SDimitry Andric return; 65120b57cec5SDimitry Andric } 65130b57cec5SDimitry Andric } 65140b57cec5SDimitry Andric /* synchronize the termination process */ 65150b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 65160b57cec5SDimitry Andric 65170b57cec5SDimitry Andric /* have we already finished */ 65180b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 65190b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n")); 65200b57cec5SDimitry Andric /* TODO abort? */ 65210b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 65220b57cec5SDimitry Andric return; 65230b57cec5SDimitry Andric } 65240b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 65250b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 65260b57cec5SDimitry Andric return; 65270b57cec5SDimitry Andric } 65280b57cec5SDimitry Andric 65290b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 65300b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 65310b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 65320b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 65330b57cec5SDimitry Andric __kmp_internal_end_*. */ 65340b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 65350b57cec5SDimitry Andric 65360b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 65370b57cec5SDimitry Andric __kmp_internal_end(); 65380b57cec5SDimitry Andric 65390b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 65400b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 65410b57cec5SDimitry Andric 65420b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: exit\n")); 65430b57cec5SDimitry Andric 65440b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 65450b57cec5SDimitry Andric if (__kmp_debug_buf) 65460b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 65470b57cec5SDimitry Andric #endif 65480b57cec5SDimitry Andric 65490b57cec5SDimitry Andric #if KMP_OS_WINDOWS 65500b57cec5SDimitry Andric __kmp_close_console(); 65510b57cec5SDimitry Andric #endif 65520b57cec5SDimitry Andric 65530b57cec5SDimitry Andric __kmp_fini_allocator(); 65540b57cec5SDimitry Andric 65550b57cec5SDimitry Andric } // __kmp_internal_end_library 65560b57cec5SDimitry Andric 65570b57cec5SDimitry Andric void __kmp_internal_end_thread(int gtid_req) { 65580b57cec5SDimitry Andric int i; 65590b57cec5SDimitry Andric 65600b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 65610b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 65620b57cec5SDimitry Andric * only place to clear __kmp_serial_init */ 65630b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 65640b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 65650b57cec5SDimitry Andric // redundant, because the next check will work in any case. 65660b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 65670b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n")); 65680b57cec5SDimitry Andric /* TODO abort? */ 65690b57cec5SDimitry Andric return; 65700b57cec5SDimitry Andric } 65710b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 65720b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n")); 65730b57cec5SDimitry Andric return; 65740b57cec5SDimitry Andric } 65750b57cec5SDimitry Andric 6576e8d8bef9SDimitry Andric // If hidden helper team has been initialized, we need to deinit it 6577fe6060f1SDimitry Andric if (TCR_4(__kmp_init_hidden_helper) && 6578fe6060f1SDimitry Andric !TCR_4(__kmp_hidden_helper_team_done)) { 6579e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); 6580e8d8bef9SDimitry Andric // First release the main thread to let it continue its work 6581e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread_release(); 6582e8d8bef9SDimitry Andric // Wait until the hidden helper team has been destroyed 6583e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_deinitz_wait(); 6584e8d8bef9SDimitry Andric } 6585e8d8bef9SDimitry Andric 65860b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 65870b57cec5SDimitry Andric 65880b57cec5SDimitry Andric /* find out who we are and what we should do */ 65890b57cec5SDimitry Andric { 65900b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 65910b57cec5SDimitry Andric KA_TRACE(10, 65920b57cec5SDimitry Andric ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req)); 65930b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 65940b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system " 65950b57cec5SDimitry Andric "already shutdown\n")); 65960b57cec5SDimitry Andric return; 65970b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 65980b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not " 65990b57cec5SDimitry Andric "registered, or system shutdown\n")); 66000b57cec5SDimitry Andric return; 66010b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 66020b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system " 66030b57cec5SDimitry Andric "shutdown\n")); 66040b57cec5SDimitry Andric return; 66050b57cec5SDimitry Andric /* we don't know who we are */ 66060b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 66070b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 66080b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 66090b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 66100b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 66110b57cec5SDimitry Andric KA_TRACE(10, 66120b57cec5SDimitry Andric ("__kmp_internal_end_thread: root still active, abort T#%d\n", 66130b57cec5SDimitry Andric gtid)); 66140b57cec5SDimitry Andric return; 66150b57cec5SDimitry Andric } else { 66160b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", 66170b57cec5SDimitry Andric gtid)); 66180b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 66190b57cec5SDimitry Andric } 66200b57cec5SDimitry Andric } else { 66210b57cec5SDimitry Andric /* just a worker thread, let's leave */ 66220b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid)); 66230b57cec5SDimitry Andric 66240b57cec5SDimitry Andric if (gtid >= 0) { 66250b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_task_team = NULL; 66260b57cec5SDimitry Andric } 66270b57cec5SDimitry Andric 66280b57cec5SDimitry Andric KA_TRACE(10, 66290b57cec5SDimitry Andric ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", 66300b57cec5SDimitry Andric gtid)); 66310b57cec5SDimitry Andric return; 66320b57cec5SDimitry Andric } 66330b57cec5SDimitry Andric } 66340b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 66350b57cec5SDimitry Andric if (__kmp_pause_status != kmp_hard_paused) 66360b57cec5SDimitry Andric // AC: lets not shutdown the dynamic library at the exit of uber thread, 66370b57cec5SDimitry Andric // because we will better shutdown later in the library destructor. 66380b57cec5SDimitry Andric { 66390b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req)); 66400b57cec5SDimitry Andric return; 66410b57cec5SDimitry Andric } 66420b57cec5SDimitry Andric #endif 66430b57cec5SDimitry Andric /* synchronize the termination process */ 66440b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 66450b57cec5SDimitry Andric 66460b57cec5SDimitry Andric /* have we already finished */ 66470b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 66480b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n")); 66490b57cec5SDimitry Andric /* TODO abort? */ 66500b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66510b57cec5SDimitry Andric return; 66520b57cec5SDimitry Andric } 66530b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 66540b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66550b57cec5SDimitry Andric return; 66560b57cec5SDimitry Andric } 66570b57cec5SDimitry Andric 66580b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 66590b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 66600b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 66610b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 66620b57cec5SDimitry Andric __kmp_internal_end_*. */ 66630b57cec5SDimitry Andric 66640b57cec5SDimitry Andric /* should we finish the run-time? are all siblings done? */ 66650b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 66660b57cec5SDimitry Andric 66670b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 66680b57cec5SDimitry Andric if (KMP_UBER_GTID(i)) { 66690b57cec5SDimitry Andric KA_TRACE( 66700b57cec5SDimitry Andric 10, 66710b57cec5SDimitry Andric ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i)); 66720b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 66730b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66740b57cec5SDimitry Andric return; 66750b57cec5SDimitry Andric } 66760b57cec5SDimitry Andric } 66770b57cec5SDimitry Andric 66780b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 66790b57cec5SDimitry Andric 66800b57cec5SDimitry Andric __kmp_internal_end(); 66810b57cec5SDimitry Andric 66820b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 66830b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66840b57cec5SDimitry Andric 66850b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req)); 66860b57cec5SDimitry Andric 66870b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 66880b57cec5SDimitry Andric if (__kmp_debug_buf) 66890b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 66900b57cec5SDimitry Andric #endif 66910b57cec5SDimitry Andric } // __kmp_internal_end_thread 66920b57cec5SDimitry Andric 66930b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 66940b57cec5SDimitry Andric // Library registration stuff. 66950b57cec5SDimitry Andric 66960b57cec5SDimitry Andric static long __kmp_registration_flag = 0; 66970b57cec5SDimitry Andric // Random value used to indicate library initialization. 66980b57cec5SDimitry Andric static char *__kmp_registration_str = NULL; 66990b57cec5SDimitry Andric // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 67000b57cec5SDimitry Andric 67010b57cec5SDimitry Andric static inline char *__kmp_reg_status_name() { 67020b57cec5SDimitry Andric /* On RHEL 3u5 if linked statically, getpid() returns different values in 67030b57cec5SDimitry Andric each thread. If registration and unregistration go in different threads 67040b57cec5SDimitry Andric (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env 67050b57cec5SDimitry Andric env var can not be found, because the name will contain different pid. */ 6706e8d8bef9SDimitry Andric // macOS* complains about name being too long with additional getuid() 6707e8d8bef9SDimitry Andric #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB 6708e8d8bef9SDimitry Andric return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(), 6709e8d8bef9SDimitry Andric (int)getuid()); 6710e8d8bef9SDimitry Andric #else 67110b57cec5SDimitry Andric return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid()); 6712e8d8bef9SDimitry Andric #endif 67130b57cec5SDimitry Andric } // __kmp_reg_status_get 67140b57cec5SDimitry Andric 6715bdd1243dSDimitry Andric #if defined(KMP_USE_SHM) 6716bdd1243dSDimitry Andric // If /dev/shm is not accessible, we will create a temporary file under /tmp. 6717bdd1243dSDimitry Andric char *temp_reg_status_file_name = nullptr; 6718bdd1243dSDimitry Andric #endif 6719bdd1243dSDimitry Andric 67200b57cec5SDimitry Andric void __kmp_register_library_startup(void) { 67210b57cec5SDimitry Andric 67220b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); // Name of the environment variable. 67230b57cec5SDimitry Andric int done = 0; 67240b57cec5SDimitry Andric union { 67250b57cec5SDimitry Andric double dtime; 67260b57cec5SDimitry Andric long ltime; 67270b57cec5SDimitry Andric } time; 67280b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 67290b57cec5SDimitry Andric __kmp_initialize_system_tick(); 67300b57cec5SDimitry Andric #endif 67310b57cec5SDimitry Andric __kmp_read_system_time(&time.dtime); 67320b57cec5SDimitry Andric __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL); 67330b57cec5SDimitry Andric __kmp_registration_str = 67340b57cec5SDimitry Andric __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag, 67350b57cec5SDimitry Andric __kmp_registration_flag, KMP_LIBRARY_FILE); 67360b57cec5SDimitry Andric 67370b57cec5SDimitry Andric KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name, 67380b57cec5SDimitry Andric __kmp_registration_str)); 67390b57cec5SDimitry Andric 67400b57cec5SDimitry Andric while (!done) { 67410b57cec5SDimitry Andric 67420b57cec5SDimitry Andric char *value = NULL; // Actual value of the environment variable. 67430b57cec5SDimitry Andric 6744349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6745e8d8bef9SDimitry Andric char *shm_name = __kmp_str_format("/%s", name); 6746e8d8bef9SDimitry Andric int shm_preexist = 0; 6747e8d8bef9SDimitry Andric char *data1; 6748e8d8bef9SDimitry Andric int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666); 6749e8d8bef9SDimitry Andric if ((fd1 == -1) && (errno == EEXIST)) { 6750e8d8bef9SDimitry Andric // file didn't open because it already exists. 6751e8d8bef9SDimitry Andric // try opening existing file 6752e8d8bef9SDimitry Andric fd1 = shm_open(shm_name, O_RDWR, 0666); 6753e8d8bef9SDimitry Andric if (fd1 == -1) { // file didn't open 6754e8d8bef9SDimitry Andric // error out here 6755e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM"), KMP_ERR(0), 6756e8d8bef9SDimitry Andric __kmp_msg_null); 6757e8d8bef9SDimitry Andric } else { 6758e8d8bef9SDimitry Andric // able to open existing file 6759e8d8bef9SDimitry Andric shm_preexist = 1; 6760e8d8bef9SDimitry Andric } 6761bdd1243dSDimitry Andric } else if (fd1 == -1) { 6762bdd1243dSDimitry Andric // SHM didn't open; it was due to error other than already exists. Try to 6763bdd1243dSDimitry Andric // create a temp file under /tmp. 6764bdd1243dSDimitry Andric // TODO: /tmp might not always be the temporary directory. For now we will 6765bdd1243dSDimitry Andric // not consider TMPDIR. If /tmp is not accessible, we simply error out. 6766bdd1243dSDimitry Andric char *temp_file_name = __kmp_str_format("/tmp/%sXXXXXX", name); 6767bdd1243dSDimitry Andric fd1 = mkstemp(temp_file_name); 6768bdd1243dSDimitry Andric if (fd1 == -1) { 6769e8d8bef9SDimitry Andric // error out here. 6770bdd1243dSDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't open TEMP"), KMP_ERR(errno), 6771e8d8bef9SDimitry Andric __kmp_msg_null); 6772e8d8bef9SDimitry Andric } 6773bdd1243dSDimitry Andric temp_reg_status_file_name = temp_file_name; 6774bdd1243dSDimitry Andric } 6775e8d8bef9SDimitry Andric if (shm_preexist == 0) { 6776e8d8bef9SDimitry Andric // we created SHM now set size 6777e8d8bef9SDimitry Andric if (ftruncate(fd1, SHM_SIZE) == -1) { 6778e8d8bef9SDimitry Andric // error occured setting size; 6779e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't set size of SHM"), 6780e8d8bef9SDimitry Andric KMP_ERR(errno), __kmp_msg_null); 6781e8d8bef9SDimitry Andric } 6782e8d8bef9SDimitry Andric } 6783e8d8bef9SDimitry Andric data1 = 6784e8d8bef9SDimitry Andric (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0); 6785e8d8bef9SDimitry Andric if (data1 == MAP_FAILED) { 6786e8d8bef9SDimitry Andric // failed to map shared memory 6787e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't map SHM"), KMP_ERR(errno), 6788e8d8bef9SDimitry Andric __kmp_msg_null); 6789e8d8bef9SDimitry Andric } 6790e8d8bef9SDimitry Andric if (shm_preexist == 0) { // set data to SHM, set value 6791e8d8bef9SDimitry Andric KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str); 6792e8d8bef9SDimitry Andric } 6793e8d8bef9SDimitry Andric // Read value from either what we just wrote or existing file. 6794e8d8bef9SDimitry Andric value = __kmp_str_format("%s", data1); // read value from SHM 6795e8d8bef9SDimitry Andric munmap(data1, SHM_SIZE); 6796e8d8bef9SDimitry Andric close(fd1); 6797e8d8bef9SDimitry Andric #else // Windows and unix with static library 67980b57cec5SDimitry Andric // Set environment variable, but do not overwrite if it is exist. 67990b57cec5SDimitry Andric __kmp_env_set(name, __kmp_registration_str, 0); 6800e8d8bef9SDimitry Andric // read value to see if it got set 68010b57cec5SDimitry Andric value = __kmp_env_get(name); 6802e8d8bef9SDimitry Andric #endif 6803e8d8bef9SDimitry Andric 68040b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 68050b57cec5SDimitry Andric done = 1; // Ok, environment variable set successfully, exit the loop. 68060b57cec5SDimitry Andric } else { 68070b57cec5SDimitry Andric // Oops. Write failed. Another copy of OpenMP RTL is in memory. 68080b57cec5SDimitry Andric // Check whether it alive or dead. 68090b57cec5SDimitry Andric int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 68100b57cec5SDimitry Andric char *tail = value; 68110b57cec5SDimitry Andric char *flag_addr_str = NULL; 68120b57cec5SDimitry Andric char *flag_val_str = NULL; 68130b57cec5SDimitry Andric char const *file_name = NULL; 68140b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_addr_str, &tail); 68150b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_val_str, &tail); 68160b57cec5SDimitry Andric file_name = tail; 68170b57cec5SDimitry Andric if (tail != NULL) { 6818fe6060f1SDimitry Andric unsigned long *flag_addr = 0; 6819fe6060f1SDimitry Andric unsigned long flag_val = 0; 68200b57cec5SDimitry Andric KMP_SSCANF(flag_addr_str, "%p", RCAST(void **, &flag_addr)); 68210b57cec5SDimitry Andric KMP_SSCANF(flag_val_str, "%lx", &flag_val); 68220b57cec5SDimitry Andric if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) { 68230b57cec5SDimitry Andric // First, check whether environment-encoded address is mapped into 68240b57cec5SDimitry Andric // addr space. 68250b57cec5SDimitry Andric // If so, dereference it to see if it still has the right value. 68260b57cec5SDimitry Andric if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) { 68270b57cec5SDimitry Andric neighbor = 1; 68280b57cec5SDimitry Andric } else { 68290b57cec5SDimitry Andric // If not, then we know the other copy of the library is no longer 68300b57cec5SDimitry Andric // running. 68310b57cec5SDimitry Andric neighbor = 2; 68320b57cec5SDimitry Andric } 68330b57cec5SDimitry Andric } 68340b57cec5SDimitry Andric } 68350b57cec5SDimitry Andric switch (neighbor) { 68360b57cec5SDimitry Andric case 0: // Cannot parse environment variable -- neighbor status unknown. 68370b57cec5SDimitry Andric // Assume it is the incompatible format of future version of the 68380b57cec5SDimitry Andric // library. Assume the other library is alive. 68390b57cec5SDimitry Andric // WARN( ... ); // TODO: Issue a warning. 68400b57cec5SDimitry Andric file_name = "unknown library"; 68410b57cec5SDimitry Andric KMP_FALLTHROUGH(); 68420b57cec5SDimitry Andric // Attention! Falling to the next case. That's intentional. 68430b57cec5SDimitry Andric case 1: { // Neighbor is alive. 68440b57cec5SDimitry Andric // Check it is allowed. 68450b57cec5SDimitry Andric char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK"); 68460b57cec5SDimitry Andric if (!__kmp_str_match_true(duplicate_ok)) { 68470b57cec5SDimitry Andric // That's not allowed. Issue fatal error. 68480b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name), 68490b57cec5SDimitry Andric KMP_HNT(DuplicateLibrary), __kmp_msg_null); 68500b57cec5SDimitry Andric } 68510b57cec5SDimitry Andric KMP_INTERNAL_FREE(duplicate_ok); 68520b57cec5SDimitry Andric __kmp_duplicate_library_ok = 1; 68530b57cec5SDimitry Andric done = 1; // Exit the loop. 68540b57cec5SDimitry Andric } break; 68550b57cec5SDimitry Andric case 2: { // Neighbor is dead. 6856e8d8bef9SDimitry Andric 6857349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6858e8d8bef9SDimitry Andric // close shared memory. 6859e8d8bef9SDimitry Andric shm_unlink(shm_name); // this removes file in /dev/shm 6860e8d8bef9SDimitry Andric #else 68610b57cec5SDimitry Andric // Clear the variable and try to register library again. 68620b57cec5SDimitry Andric __kmp_env_unset(name); 6863e8d8bef9SDimitry Andric #endif 68640b57cec5SDimitry Andric } break; 6865fe6060f1SDimitry Andric default: { 6866fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(0); 6867fe6060f1SDimitry Andric } break; 68680b57cec5SDimitry Andric } 68690b57cec5SDimitry Andric } 68700b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)value); 6871349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6872e8d8bef9SDimitry Andric KMP_INTERNAL_FREE((void *)shm_name); 6873e8d8bef9SDimitry Andric #endif 6874e8d8bef9SDimitry Andric } // while 68750b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)name); 68760b57cec5SDimitry Andric 68770b57cec5SDimitry Andric } // func __kmp_register_library_startup 68780b57cec5SDimitry Andric 68790b57cec5SDimitry Andric void __kmp_unregister_library(void) { 68800b57cec5SDimitry Andric 68810b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); 6882e8d8bef9SDimitry Andric char *value = NULL; 6883e8d8bef9SDimitry Andric 6884349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6885bdd1243dSDimitry Andric bool use_shm = true; 6886e8d8bef9SDimitry Andric char *shm_name = __kmp_str_format("/%s", name); 6887e8d8bef9SDimitry Andric int fd1 = shm_open(shm_name, O_RDONLY, 0666); 6888e8d8bef9SDimitry Andric if (fd1 == -1) { 6889bdd1243dSDimitry Andric // File did not open. Try the temporary file. 6890bdd1243dSDimitry Andric use_shm = false; 6891bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(temp_reg_status_file_name); 68921ac55f4cSDimitry Andric fd1 = open(temp_reg_status_file_name, O_RDONLY); 68931ac55f4cSDimitry Andric if (fd1 == -1) { 6894bdd1243dSDimitry Andric // give it up now. 6895e8d8bef9SDimitry Andric return; 6896e8d8bef9SDimitry Andric } 6897bdd1243dSDimitry Andric } 6898e8d8bef9SDimitry Andric char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0); 6899e8d8bef9SDimitry Andric if (data1 != MAP_FAILED) { 6900e8d8bef9SDimitry Andric value = __kmp_str_format("%s", data1); // read value from SHM 6901e8d8bef9SDimitry Andric munmap(data1, SHM_SIZE); 6902e8d8bef9SDimitry Andric } 6903e8d8bef9SDimitry Andric close(fd1); 6904e8d8bef9SDimitry Andric #else 6905e8d8bef9SDimitry Andric value = __kmp_env_get(name); 6906e8d8bef9SDimitry Andric #endif 69070b57cec5SDimitry Andric 69080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); 69090b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); 69100b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 69110b57cec5SDimitry Andric // Ok, this is our variable. Delete it. 6912349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6913bdd1243dSDimitry Andric if (use_shm) { 6914e8d8bef9SDimitry Andric shm_unlink(shm_name); // this removes file in /dev/shm 6915bdd1243dSDimitry Andric } else { 6916bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(temp_reg_status_file_name); 6917bdd1243dSDimitry Andric unlink(temp_reg_status_file_name); // this removes the temp file 6918bdd1243dSDimitry Andric } 6919e8d8bef9SDimitry Andric #else 69200b57cec5SDimitry Andric __kmp_env_unset(name); 6921e8d8bef9SDimitry Andric #endif 69220b57cec5SDimitry Andric } 69230b57cec5SDimitry Andric 6924349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6925e8d8bef9SDimitry Andric KMP_INTERNAL_FREE(shm_name); 6926bdd1243dSDimitry Andric if (!use_shm) { 6927bdd1243dSDimitry Andric KMP_DEBUG_ASSERT(temp_reg_status_file_name); 6928bdd1243dSDimitry Andric KMP_INTERNAL_FREE(temp_reg_status_file_name); 6929bdd1243dSDimitry Andric } 6930e8d8bef9SDimitry Andric #endif 6931e8d8bef9SDimitry Andric 69320b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_registration_str); 69330b57cec5SDimitry Andric KMP_INTERNAL_FREE(value); 69340b57cec5SDimitry Andric KMP_INTERNAL_FREE(name); 69350b57cec5SDimitry Andric 69360b57cec5SDimitry Andric __kmp_registration_flag = 0; 69370b57cec5SDimitry Andric __kmp_registration_str = NULL; 69380b57cec5SDimitry Andric 69390b57cec5SDimitry Andric } // __kmp_unregister_library 69400b57cec5SDimitry Andric 69410b57cec5SDimitry Andric // End of Library registration stuff. 69420b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 69430b57cec5SDimitry Andric 69440b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 69450b57cec5SDimitry Andric 69460b57cec5SDimitry Andric static void __kmp_check_mic_type() { 69470b57cec5SDimitry Andric kmp_cpuid_t cpuid_state = {0}; 69480b57cec5SDimitry Andric kmp_cpuid_t *cs_p = &cpuid_state; 69490b57cec5SDimitry Andric __kmp_x86_cpuid(1, 0, cs_p); 69500b57cec5SDimitry Andric // We don't support mic1 at the moment 69510b57cec5SDimitry Andric if ((cs_p->eax & 0xff0) == 0xB10) { 69520b57cec5SDimitry Andric __kmp_mic_type = mic2; 69530b57cec5SDimitry Andric } else if ((cs_p->eax & 0xf0ff0) == 0x50670) { 69540b57cec5SDimitry Andric __kmp_mic_type = mic3; 69550b57cec5SDimitry Andric } else { 69560b57cec5SDimitry Andric __kmp_mic_type = non_mic; 69570b57cec5SDimitry Andric } 69580b57cec5SDimitry Andric } 69590b57cec5SDimitry Andric 69600b57cec5SDimitry Andric #endif /* KMP_MIC_SUPPORTED */ 69610b57cec5SDimitry Andric 6962e8d8bef9SDimitry Andric #if KMP_HAVE_UMWAIT 6963e8d8bef9SDimitry Andric static void __kmp_user_level_mwait_init() { 6964e8d8bef9SDimitry Andric struct kmp_cpuid buf; 6965e8d8bef9SDimitry Andric __kmp_x86_cpuid(7, 0, &buf); 696604eeddc0SDimitry Andric __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1); 696704eeddc0SDimitry Andric __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait; 696804eeddc0SDimitry Andric __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0); 6969e8d8bef9SDimitry Andric KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n", 6970e8d8bef9SDimitry Andric __kmp_umwait_enabled)); 6971e8d8bef9SDimitry Andric } 6972e8d8bef9SDimitry Andric #elif KMP_HAVE_MWAIT 6973e8d8bef9SDimitry Andric #ifndef AT_INTELPHIUSERMWAIT 6974e8d8bef9SDimitry Andric // Spurious, non-existent value that should always fail to return anything. 6975e8d8bef9SDimitry Andric // Will be replaced with the correct value when we know that. 6976e8d8bef9SDimitry Andric #define AT_INTELPHIUSERMWAIT 10000 6977e8d8bef9SDimitry Andric #endif 6978e8d8bef9SDimitry Andric // getauxval() function is available in RHEL7 and SLES12. If a system with an 6979e8d8bef9SDimitry Andric // earlier OS is used to build the RTL, we'll use the following internal 6980e8d8bef9SDimitry Andric // function when the entry is not found. 6981e8d8bef9SDimitry Andric unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL; 6982e8d8bef9SDimitry Andric unsigned long getauxval(unsigned long) { return 0; } 6983e8d8bef9SDimitry Andric 6984e8d8bef9SDimitry Andric static void __kmp_user_level_mwait_init() { 6985e8d8bef9SDimitry Andric // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available 6986e8d8bef9SDimitry Andric // use them to find if the user-level mwait is enabled. Otherwise, forcibly 6987e8d8bef9SDimitry Andric // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable 6988e8d8bef9SDimitry Andric // KMP_USER_LEVEL_MWAIT was set to TRUE. 6989e8d8bef9SDimitry Andric if (__kmp_mic_type == mic3) { 6990e8d8bef9SDimitry Andric unsigned long res = getauxval(AT_INTELPHIUSERMWAIT); 6991e8d8bef9SDimitry Andric if ((res & 0x1) || __kmp_user_level_mwait) { 6992e8d8bef9SDimitry Andric __kmp_mwait_enabled = TRUE; 6993e8d8bef9SDimitry Andric if (__kmp_user_level_mwait) { 6994e8d8bef9SDimitry Andric KMP_INFORM(EnvMwaitWarn); 6995e8d8bef9SDimitry Andric } 6996e8d8bef9SDimitry Andric } else { 6997e8d8bef9SDimitry Andric __kmp_mwait_enabled = FALSE; 6998e8d8bef9SDimitry Andric } 6999e8d8bef9SDimitry Andric } 7000e8d8bef9SDimitry Andric KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, " 7001e8d8bef9SDimitry Andric "__kmp_mwait_enabled = %d\n", 7002e8d8bef9SDimitry Andric __kmp_mic_type, __kmp_mwait_enabled)); 7003e8d8bef9SDimitry Andric } 7004e8d8bef9SDimitry Andric #endif /* KMP_HAVE_UMWAIT */ 7005e8d8bef9SDimitry Andric 70060b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void) { 70070b57cec5SDimitry Andric int i, gtid; 7008e8d8bef9SDimitry Andric size_t size; 70090b57cec5SDimitry Andric 70100b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n")); 70110b57cec5SDimitry Andric 70120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4); 70130b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4); 70140b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8); 70150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8); 70160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *)); 70170b57cec5SDimitry Andric 70180b57cec5SDimitry Andric #if OMPT_SUPPORT 70190b57cec5SDimitry Andric ompt_pre_init(); 70200b57cec5SDimitry Andric #endif 7021fe6060f1SDimitry Andric #if OMPD_SUPPORT 7022fe6060f1SDimitry Andric __kmp_env_dump(); 7023fe6060f1SDimitry Andric ompd_init(); 7024fe6060f1SDimitry Andric #endif 70250b57cec5SDimitry Andric 70260b57cec5SDimitry Andric __kmp_validate_locks(); 70270b57cec5SDimitry Andric 702806c3fb27SDimitry Andric #if ENABLE_LIBOMPTARGET 702906c3fb27SDimitry Andric /* Initialize functions from libomptarget */ 703006c3fb27SDimitry Andric __kmp_init_omptarget(); 703106c3fb27SDimitry Andric #endif 703206c3fb27SDimitry Andric 70330b57cec5SDimitry Andric /* Initialize internal memory allocator */ 70340b57cec5SDimitry Andric __kmp_init_allocator(); 70350b57cec5SDimitry Andric 7036fcaf7f86SDimitry Andric /* Register the library startup via an environment variable or via mapped 7037fcaf7f86SDimitry Andric shared memory file and check to see whether another copy of the library is 7038fcaf7f86SDimitry Andric already registered. Since forked child process is often terminated, we 7039fcaf7f86SDimitry Andric postpone the registration till middle initialization in the child */ 7040fcaf7f86SDimitry Andric if (__kmp_need_register_serial) 70410b57cec5SDimitry Andric __kmp_register_library_startup(); 70420b57cec5SDimitry Andric 70430b57cec5SDimitry Andric /* TODO reinitialization of library */ 70440b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 70450b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n")); 70460b57cec5SDimitry Andric } 70470b57cec5SDimitry Andric 70480b57cec5SDimitry Andric __kmp_global.g.g_abort = 0; 70490b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 70500b57cec5SDimitry Andric 70510b57cec5SDimitry Andric /* initialize the locks */ 70520b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 70530b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 70540b57cec5SDimitry Andric __kmp_init_speculative_stats(); 70550b57cec5SDimitry Andric #endif 70560b57cec5SDimitry Andric #endif 70570b57cec5SDimitry Andric #if KMP_STATS_ENABLED 70580b57cec5SDimitry Andric __kmp_stats_init(); 70590b57cec5SDimitry Andric #endif 70600b57cec5SDimitry Andric __kmp_init_lock(&__kmp_global_lock); 70610b57cec5SDimitry Andric __kmp_init_queuing_lock(&__kmp_dispatch_lock); 70620b57cec5SDimitry Andric __kmp_init_lock(&__kmp_debug_lock); 70630b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock); 70640b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); 70650b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_2i); 70660b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4i); 70670b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4r); 70680b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8i); 70690b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8r); 70700b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8c); 70710b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_10r); 70720b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16r); 70730b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16c); 70740b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_20c); 70750b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_32c); 70760b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock); 70770b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_exit_lock); 70780b57cec5SDimitry Andric #if KMP_USE_MONITOR 70790b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_monitor_lock); 70800b57cec5SDimitry Andric #endif 70810b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock); 70820b57cec5SDimitry Andric 70830b57cec5SDimitry Andric /* conduct initialization and initial setup of configuration */ 70840b57cec5SDimitry Andric 70850b57cec5SDimitry Andric __kmp_runtime_initialize(); 70860b57cec5SDimitry Andric 70870b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 70880b57cec5SDimitry Andric __kmp_check_mic_type(); 70890b57cec5SDimitry Andric #endif 70900b57cec5SDimitry Andric 70910b57cec5SDimitry Andric // Some global variable initialization moved here from kmp_env_initialize() 70920b57cec5SDimitry Andric #ifdef KMP_DEBUG 70930b57cec5SDimitry Andric kmp_diag = 0; 70940b57cec5SDimitry Andric #endif 70950b57cec5SDimitry Andric __kmp_abort_delay = 0; 70960b57cec5SDimitry Andric 70970b57cec5SDimitry Andric // From __kmp_init_dflt_team_nth() 70980b57cec5SDimitry Andric /* assume the entire machine will be used */ 70990b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_xproc; 71000b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) { 71010b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 71020b57cec5SDimitry Andric } 71030b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) { 71040b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 71050b57cec5SDimitry Andric } 71060b57cec5SDimitry Andric __kmp_max_nth = __kmp_sys_max_nth; 71070b57cec5SDimitry Andric __kmp_cg_max_nth = __kmp_sys_max_nth; 71080b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default 71090b57cec5SDimitry Andric if (__kmp_teams_max_nth > __kmp_sys_max_nth) { 71100b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_sys_max_nth; 71110b57cec5SDimitry Andric } 71120b57cec5SDimitry Andric 71130b57cec5SDimitry Andric // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" 71140b57cec5SDimitry Andric // part 71150b57cec5SDimitry Andric __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 71160b57cec5SDimitry Andric #if KMP_USE_MONITOR 71170b57cec5SDimitry Andric __kmp_monitor_wakeups = 71180b57cec5SDimitry Andric KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 71190b57cec5SDimitry Andric __kmp_bt_intervals = 71200b57cec5SDimitry Andric KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 71210b57cec5SDimitry Andric #endif 71220b57cec5SDimitry Andric // From "KMP_LIBRARY" part of __kmp_env_initialize() 71230b57cec5SDimitry Andric __kmp_library = library_throughput; 71240b57cec5SDimitry Andric // From KMP_SCHEDULE initialization 71250b57cec5SDimitry Andric __kmp_static = kmp_sch_static_balanced; 71260b57cec5SDimitry Andric // AC: do not use analytical here, because it is non-monotonous 71270b57cec5SDimitry Andric //__kmp_guided = kmp_sch_guided_iterative_chunked; 71280b57cec5SDimitry Andric //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no 71290b57cec5SDimitry Andric // need to repeat assignment 71300b57cec5SDimitry Andric // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch 71310b57cec5SDimitry Andric // bit control and barrier method control parts 71320b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 71330b57cec5SDimitry Andric #define kmp_reduction_barrier_gather_bb ((int)1) 71340b57cec5SDimitry Andric #define kmp_reduction_barrier_release_bb ((int)1) 7135349cc55cSDimitry Andric #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt 7136349cc55cSDimitry Andric #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt 71370b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 71380b57cec5SDimitry Andric for (i = bs_plain_barrier; i < bs_last_barrier; i++) { 71390b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; 71400b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; 71410b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt; 71420b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt; 71430b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 71440b57cec5SDimitry Andric if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only ( 71450b57cec5SDimitry Andric // lin_64 ): hyper,1 71460b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb; 71470b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb; 71480b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat; 71490b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat; 71500b57cec5SDimitry Andric } 71510b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 71520b57cec5SDimitry Andric } 71530b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 71540b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_pat 71550b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_pat 71560b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_bb 71570b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_bb 71580b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 71590b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 71600b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 71610b57cec5SDimitry Andric // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 71620b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather 71630b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] = 71640b57cec5SDimitry Andric 1; // forkjoin release 71650b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 71660b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 71670b57cec5SDimitry Andric } 71680b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 71690b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 71700b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 71710b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 71720b57cec5SDimitry Andric } 71730b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 71740b57cec5SDimitry Andric #endif // KMP_MIC_SUPPORTED 71750b57cec5SDimitry Andric 71760b57cec5SDimitry Andric // From KMP_CHECKS initialization 71770b57cec5SDimitry Andric #ifdef KMP_DEBUG 71780b57cec5SDimitry Andric __kmp_env_checks = TRUE; /* development versions have the extra checks */ 71790b57cec5SDimitry Andric #else 71800b57cec5SDimitry Andric __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ 71810b57cec5SDimitry Andric #endif 71820b57cec5SDimitry Andric 71830b57cec5SDimitry Andric // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 71840b57cec5SDimitry Andric __kmp_foreign_tp = TRUE; 71850b57cec5SDimitry Andric 71860b57cec5SDimitry Andric __kmp_global.g.g_dynamic = FALSE; 71870b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_default; 71880b57cec5SDimitry Andric 7189fe6060f1SDimitry Andric __kmp_init_nesting_mode(); 7190fe6060f1SDimitry Andric 71910b57cec5SDimitry Andric __kmp_env_initialize(NULL); 71920b57cec5SDimitry Andric 7193e8d8bef9SDimitry Andric #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 7194e8d8bef9SDimitry Andric __kmp_user_level_mwait_init(); 7195e8d8bef9SDimitry Andric #endif 71960b57cec5SDimitry Andric // Print all messages in message catalog for testing purposes. 71970b57cec5SDimitry Andric #ifdef KMP_DEBUG 71980b57cec5SDimitry Andric char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); 71990b57cec5SDimitry Andric if (__kmp_str_match_true(val)) { 72000b57cec5SDimitry Andric kmp_str_buf_t buffer; 72010b57cec5SDimitry Andric __kmp_str_buf_init(&buffer); 72020b57cec5SDimitry Andric __kmp_i18n_dump_catalog(&buffer); 72030b57cec5SDimitry Andric __kmp_printf("%s", buffer.str); 72040b57cec5SDimitry Andric __kmp_str_buf_free(&buffer); 72050b57cec5SDimitry Andric } 72060b57cec5SDimitry Andric __kmp_env_free(&val); 72070b57cec5SDimitry Andric #endif 72080b57cec5SDimitry Andric 72090b57cec5SDimitry Andric __kmp_threads_capacity = 72100b57cec5SDimitry Andric __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); 72110b57cec5SDimitry Andric // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 72120b57cec5SDimitry Andric __kmp_tp_capacity = __kmp_default_tp_capacity( 72130b57cec5SDimitry Andric __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 72140b57cec5SDimitry Andric 72150b57cec5SDimitry Andric // If the library is shut down properly, both pools must be NULL. Just in 72160b57cec5SDimitry Andric // case, set them to NULL -- some memory may leak, but subsequent code will 72170b57cec5SDimitry Andric // work even if pools are not freed. 72180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL); 72190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL); 72200b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_team_pool == NULL); 72210b57cec5SDimitry Andric __kmp_thread_pool = NULL; 72220b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 72230b57cec5SDimitry Andric __kmp_team_pool = NULL; 72240b57cec5SDimitry Andric 72250b57cec5SDimitry Andric /* Allocate all of the variable sized records */ 72260b57cec5SDimitry Andric /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are 72270b57cec5SDimitry Andric * expandable */ 72280b57cec5SDimitry Andric /* Since allocation is cache-aligned, just add extra padding at the end */ 72290b57cec5SDimitry Andric size = 72300b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + 72310b57cec5SDimitry Andric CACHE_LINE; 72320b57cec5SDimitry Andric __kmp_threads = (kmp_info_t **)__kmp_allocate(size); 72330b57cec5SDimitry Andric __kmp_root = (kmp_root_t **)((char *)__kmp_threads + 72340b57cec5SDimitry Andric sizeof(kmp_info_t *) * __kmp_threads_capacity); 72350b57cec5SDimitry Andric 72360b57cec5SDimitry Andric /* init thread counts */ 72370b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_all_nth == 72380b57cec5SDimitry Andric 0); // Asserts fail if the library is reinitializing and 72390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination. 72400b57cec5SDimitry Andric __kmp_all_nth = 0; 72410b57cec5SDimitry Andric __kmp_nth = 0; 72420b57cec5SDimitry Andric 72430b57cec5SDimitry Andric /* setup the uber master thread and hierarchy */ 72440b57cec5SDimitry Andric gtid = __kmp_register_root(TRUE); 72450b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid)); 72460b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 72470b57cec5SDimitry Andric KMP_ASSERT(KMP_INITIAL_GTID(gtid)); 72480b57cec5SDimitry Andric 72490b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 72500b57cec5SDimitry Andric 72510b57cec5SDimitry Andric __kmp_common_initialize(); 72520b57cec5SDimitry Andric 72530b57cec5SDimitry Andric #if KMP_OS_UNIX 72540b57cec5SDimitry Andric /* invoke the child fork handler */ 72550b57cec5SDimitry Andric __kmp_register_atfork(); 72560b57cec5SDimitry Andric #endif 72570b57cec5SDimitry Andric 7258bdd1243dSDimitry Andric #if !KMP_DYNAMIC_LIB || \ 7259bdd1243dSDimitry Andric ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN) 72600b57cec5SDimitry Andric { 72610b57cec5SDimitry Andric /* Invoke the exit handler when the program finishes, only for static 7262bdd1243dSDimitry Andric library and macOS* dynamic. For other dynamic libraries, we already 7263bdd1243dSDimitry Andric have _fini and DllMain. */ 72640b57cec5SDimitry Andric int rc = atexit(__kmp_internal_end_atexit); 72650b57cec5SDimitry Andric if (rc != 0) { 72660b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc), 72670b57cec5SDimitry Andric __kmp_msg_null); 72680b57cec5SDimitry Andric } 72690b57cec5SDimitry Andric } 72700b57cec5SDimitry Andric #endif 72710b57cec5SDimitry Andric 72720b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 72730b57cec5SDimitry Andric #if KMP_OS_UNIX 72740b57cec5SDimitry Andric /* NOTE: make sure that this is called before the user installs their own 72750b57cec5SDimitry Andric signal handlers so that the user handlers are called first. this way they 72760b57cec5SDimitry Andric can return false, not call our handler, avoid terminating the library, and 72770b57cec5SDimitry Andric continue execution where they left off. */ 72780b57cec5SDimitry Andric __kmp_install_signals(FALSE); 72790b57cec5SDimitry Andric #endif /* KMP_OS_UNIX */ 72800b57cec5SDimitry Andric #if KMP_OS_WINDOWS 72810b57cec5SDimitry Andric __kmp_install_signals(TRUE); 72820b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 72830b57cec5SDimitry Andric #endif 72840b57cec5SDimitry Andric 72850b57cec5SDimitry Andric /* we have finished the serial initialization */ 72860b57cec5SDimitry Andric __kmp_init_counter++; 72870b57cec5SDimitry Andric 72880b57cec5SDimitry Andric __kmp_init_serial = TRUE; 72890b57cec5SDimitry Andric 729006c3fb27SDimitry Andric if (__kmp_version) { 729106c3fb27SDimitry Andric __kmp_print_version_1(); 729206c3fb27SDimitry Andric } 729306c3fb27SDimitry Andric 72940b57cec5SDimitry Andric if (__kmp_settings) { 72950b57cec5SDimitry Andric __kmp_env_print(); 72960b57cec5SDimitry Andric } 72970b57cec5SDimitry Andric 72980b57cec5SDimitry Andric if (__kmp_display_env || __kmp_display_env_verbose) { 72990b57cec5SDimitry Andric __kmp_env_print_2(); 73000b57cec5SDimitry Andric } 73010b57cec5SDimitry Andric 73020b57cec5SDimitry Andric #if OMPT_SUPPORT 73030b57cec5SDimitry Andric ompt_post_init(); 73040b57cec5SDimitry Andric #endif 73050b57cec5SDimitry Andric 73060b57cec5SDimitry Andric KMP_MB(); 73070b57cec5SDimitry Andric 73080b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n")); 73090b57cec5SDimitry Andric } 73100b57cec5SDimitry Andric 73110b57cec5SDimitry Andric void __kmp_serial_initialize(void) { 73120b57cec5SDimitry Andric if (__kmp_init_serial) { 73130b57cec5SDimitry Andric return; 73140b57cec5SDimitry Andric } 73150b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 73160b57cec5SDimitry Andric if (__kmp_init_serial) { 73170b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73180b57cec5SDimitry Andric return; 73190b57cec5SDimitry Andric } 73200b57cec5SDimitry Andric __kmp_do_serial_initialize(); 73210b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73220b57cec5SDimitry Andric } 73230b57cec5SDimitry Andric 73240b57cec5SDimitry Andric static void __kmp_do_middle_initialize(void) { 73250b57cec5SDimitry Andric int i, j; 73260b57cec5SDimitry Andric int prev_dflt_team_nth; 73270b57cec5SDimitry Andric 73280b57cec5SDimitry Andric if (!__kmp_init_serial) { 73290b57cec5SDimitry Andric __kmp_do_serial_initialize(); 73300b57cec5SDimitry Andric } 73310b57cec5SDimitry Andric 73320b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_middle_initialize: enter\n")); 73330b57cec5SDimitry Andric 7334fcaf7f86SDimitry Andric if (UNLIKELY(!__kmp_need_register_serial)) { 7335fcaf7f86SDimitry Andric // We are in a forked child process. The registration was skipped during 7336fcaf7f86SDimitry Andric // serial initialization in __kmp_atfork_child handler. Do it here. 7337fcaf7f86SDimitry Andric __kmp_register_library_startup(); 7338fcaf7f86SDimitry Andric } 7339fcaf7f86SDimitry Andric 73400b57cec5SDimitry Andric // Save the previous value for the __kmp_dflt_team_nth so that 73410b57cec5SDimitry Andric // we can avoid some reinitialization if it hasn't changed. 73420b57cec5SDimitry Andric prev_dflt_team_nth = __kmp_dflt_team_nth; 73430b57cec5SDimitry Andric 73440b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 73450b57cec5SDimitry Andric // __kmp_affinity_initialize() will try to set __kmp_ncores to the 73460b57cec5SDimitry Andric // number of cores on the machine. 7347bdd1243dSDimitry Andric __kmp_affinity_initialize(__kmp_affinity); 73480b57cec5SDimitry Andric 73490b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 73500b57cec5SDimitry Andric 73510b57cec5SDimitry Andric KMP_ASSERT(__kmp_xproc > 0); 73520b57cec5SDimitry Andric if (__kmp_avail_proc == 0) { 73530b57cec5SDimitry Andric __kmp_avail_proc = __kmp_xproc; 73540b57cec5SDimitry Andric } 73550b57cec5SDimitry Andric 73560b57cec5SDimitry Andric // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), 73570b57cec5SDimitry Andric // correct them now 73580b57cec5SDimitry Andric j = 0; 73590b57cec5SDimitry Andric while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) { 73600b57cec5SDimitry Andric __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = 73610b57cec5SDimitry Andric __kmp_avail_proc; 73620b57cec5SDimitry Andric j++; 73630b57cec5SDimitry Andric } 73640b57cec5SDimitry Andric 73650b57cec5SDimitry Andric if (__kmp_dflt_team_nth == 0) { 73660b57cec5SDimitry Andric #ifdef KMP_DFLT_NTH_CORES 73670b57cec5SDimitry Andric // Default #threads = #cores 73680b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_ncores; 73690b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 73700b57cec5SDimitry Andric "__kmp_ncores (%d)\n", 73710b57cec5SDimitry Andric __kmp_dflt_team_nth)); 73720b57cec5SDimitry Andric #else 73730b57cec5SDimitry Andric // Default #threads = #available OS procs 73740b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_avail_proc; 73750b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 73760b57cec5SDimitry Andric "__kmp_avail_proc(%d)\n", 73770b57cec5SDimitry Andric __kmp_dflt_team_nth)); 73780b57cec5SDimitry Andric #endif /* KMP_DFLT_NTH_CORES */ 73790b57cec5SDimitry Andric } 73800b57cec5SDimitry Andric 73810b57cec5SDimitry Andric if (__kmp_dflt_team_nth < KMP_MIN_NTH) { 73820b57cec5SDimitry Andric __kmp_dflt_team_nth = KMP_MIN_NTH; 73830b57cec5SDimitry Andric } 73840b57cec5SDimitry Andric if (__kmp_dflt_team_nth > __kmp_sys_max_nth) { 73850b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_sys_max_nth; 73860b57cec5SDimitry Andric } 73870b57cec5SDimitry Andric 7388fe6060f1SDimitry Andric if (__kmp_nesting_mode > 0) 7389fe6060f1SDimitry Andric __kmp_set_nesting_mode_threads(); 7390fe6060f1SDimitry Andric 73910b57cec5SDimitry Andric // There's no harm in continuing if the following check fails, 73920b57cec5SDimitry Andric // but it indicates an error in the previous logic. 73930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub); 73940b57cec5SDimitry Andric 73950b57cec5SDimitry Andric if (__kmp_dflt_team_nth != prev_dflt_team_nth) { 73960b57cec5SDimitry Andric // Run through the __kmp_threads array and set the num threads icv for each 73970b57cec5SDimitry Andric // root thread that is currently registered with the RTL (which has not 73980b57cec5SDimitry Andric // already explicitly set its nthreads-var with a call to 73990b57cec5SDimitry Andric // omp_set_num_threads()). 74000b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 74010b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[i]; 74020b57cec5SDimitry Andric if (thread == NULL) 74030b57cec5SDimitry Andric continue; 74040b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc != 0) 74050b57cec5SDimitry Andric continue; 74060b57cec5SDimitry Andric 74070b57cec5SDimitry Andric set__nproc(__kmp_threads[i], __kmp_dflt_team_nth); 74080b57cec5SDimitry Andric } 74090b57cec5SDimitry Andric } 74100b57cec5SDimitry Andric KA_TRACE( 74110b57cec5SDimitry Andric 20, 74120b57cec5SDimitry Andric ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 74130b57cec5SDimitry Andric __kmp_dflt_team_nth)); 74140b57cec5SDimitry Andric 74150b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 74160b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */ 74170b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 74180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 74190b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 74200b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 74210b57cec5SDimitry Andric } 74220b57cec5SDimitry Andric } 74230b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 74240b57cec5SDimitry Andric 74250b57cec5SDimitry Andric /* we have finished middle initialization */ 74260b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_middle, TRUE); 74270b57cec5SDimitry Andric 74280b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n")); 74290b57cec5SDimitry Andric } 74300b57cec5SDimitry Andric 74310b57cec5SDimitry Andric void __kmp_middle_initialize(void) { 74320b57cec5SDimitry Andric if (__kmp_init_middle) { 74330b57cec5SDimitry Andric return; 74340b57cec5SDimitry Andric } 74350b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 74360b57cec5SDimitry Andric if (__kmp_init_middle) { 74370b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 74380b57cec5SDimitry Andric return; 74390b57cec5SDimitry Andric } 74400b57cec5SDimitry Andric __kmp_do_middle_initialize(); 74410b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 74420b57cec5SDimitry Andric } 74430b57cec5SDimitry Andric 74440b57cec5SDimitry Andric void __kmp_parallel_initialize(void) { 74450b57cec5SDimitry Andric int gtid = __kmp_entry_gtid(); // this might be a new root 74460b57cec5SDimitry Andric 74470b57cec5SDimitry Andric /* synchronize parallel initialization (for sibling) */ 74480b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) 74490b57cec5SDimitry Andric return; 74500b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 74510b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 74520b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 74530b57cec5SDimitry Andric return; 74540b57cec5SDimitry Andric } 74550b57cec5SDimitry Andric 74560b57cec5SDimitry Andric /* TODO reinitialization after we have already shut down */ 74570b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 74580b57cec5SDimitry Andric KA_TRACE( 74590b57cec5SDimitry Andric 10, 74600b57cec5SDimitry Andric ("__kmp_parallel_initialize: attempt to init while shutting down\n")); 74610b57cec5SDimitry Andric __kmp_infinite_loop(); 74620b57cec5SDimitry Andric } 74630b57cec5SDimitry Andric 74640b57cec5SDimitry Andric /* jc: The lock __kmp_initz_lock is already held, so calling 74650b57cec5SDimitry Andric __kmp_serial_initialize would cause a deadlock. So we call 74660b57cec5SDimitry Andric __kmp_do_serial_initialize directly. */ 74670b57cec5SDimitry Andric if (!__kmp_init_middle) { 74680b57cec5SDimitry Andric __kmp_do_middle_initialize(); 74690b57cec5SDimitry Andric } 7470fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 74710b57cec5SDimitry Andric __kmp_resume_if_hard_paused(); 74720b57cec5SDimitry Andric 74730b57cec5SDimitry Andric /* begin initialization */ 74740b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); 74750b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 74760b57cec5SDimitry Andric 74770b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 74780b57cec5SDimitry Andric // Save the FP control regs. 74790b57cec5SDimitry Andric // Worker threads will set theirs to these values at thread startup. 74800b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); 74810b57cec5SDimitry Andric __kmp_store_mxcsr(&__kmp_init_mxcsr); 74820b57cec5SDimitry Andric __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 74830b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 74840b57cec5SDimitry Andric 74850b57cec5SDimitry Andric #if KMP_OS_UNIX 74860b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 74870b57cec5SDimitry Andric /* must be after __kmp_serial_initialize */ 74880b57cec5SDimitry Andric __kmp_install_signals(TRUE); 74890b57cec5SDimitry Andric #endif 74900b57cec5SDimitry Andric #endif 74910b57cec5SDimitry Andric 74920b57cec5SDimitry Andric __kmp_suspend_initialize(); 74930b57cec5SDimitry Andric 74940b57cec5SDimitry Andric #if defined(USE_LOAD_BALANCE) 74950b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 74960b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 74970b57cec5SDimitry Andric } 74980b57cec5SDimitry Andric #else 74990b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 75000b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 75010b57cec5SDimitry Andric } 75020b57cec5SDimitry Andric #endif 75030b57cec5SDimitry Andric 75040b57cec5SDimitry Andric if (__kmp_version) { 75050b57cec5SDimitry Andric __kmp_print_version_2(); 75060b57cec5SDimitry Andric } 75070b57cec5SDimitry Andric 75080b57cec5SDimitry Andric /* we have finished parallel initialization */ 75090b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_parallel, TRUE); 75100b57cec5SDimitry Andric 75110b57cec5SDimitry Andric KMP_MB(); 75120b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); 75130b57cec5SDimitry Andric 75140b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 75150b57cec5SDimitry Andric } 75160b57cec5SDimitry Andric 7517e8d8bef9SDimitry Andric void __kmp_hidden_helper_initialize() { 7518e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper)) 7519e8d8bef9SDimitry Andric return; 7520e8d8bef9SDimitry Andric 7521e8d8bef9SDimitry Andric // __kmp_parallel_initialize is required before we initialize hidden helper 7522e8d8bef9SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 7523e8d8bef9SDimitry Andric __kmp_parallel_initialize(); 7524e8d8bef9SDimitry Andric 7525e8d8bef9SDimitry Andric // Double check. Note that this double check should not be placed before 7526e8d8bef9SDimitry Andric // __kmp_parallel_initialize as it will cause dead lock. 7527e8d8bef9SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 7528e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper)) { 7529e8d8bef9SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7530e8d8bef9SDimitry Andric return; 7531e8d8bef9SDimitry Andric } 7532e8d8bef9SDimitry Andric 7533bdd1243dSDimitry Andric #if KMP_AFFINITY_SUPPORTED 7534bdd1243dSDimitry Andric // Initialize hidden helper affinity settings. 7535bdd1243dSDimitry Andric // The above __kmp_parallel_initialize() will initialize 7536bdd1243dSDimitry Andric // regular affinity (and topology) if not already done. 7537bdd1243dSDimitry Andric if (!__kmp_hh_affinity.flags.initialized) 7538bdd1243dSDimitry Andric __kmp_affinity_initialize(__kmp_hh_affinity); 7539bdd1243dSDimitry Andric #endif 7540bdd1243dSDimitry Andric 7541e8d8bef9SDimitry Andric // Set the count of hidden helper tasks to be executed to zero 7542e8d8bef9SDimitry Andric KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0); 7543e8d8bef9SDimitry Andric 7544e8d8bef9SDimitry Andric // Set the global variable indicating that we're initializing hidden helper 7545e8d8bef9SDimitry Andric // team/threads 7546e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE); 7547e8d8bef9SDimitry Andric 7548e8d8bef9SDimitry Andric // Platform independent initialization 7549e8d8bef9SDimitry Andric __kmp_do_initialize_hidden_helper_threads(); 7550e8d8bef9SDimitry Andric 7551e8d8bef9SDimitry Andric // Wait here for the finish of initialization of hidden helper teams 7552e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_initz_wait(); 7553e8d8bef9SDimitry Andric 7554e8d8bef9SDimitry Andric // We have finished hidden helper initialization 7555e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper, TRUE); 7556e8d8bef9SDimitry Andric 7557e8d8bef9SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7558e8d8bef9SDimitry Andric } 7559e8d8bef9SDimitry Andric 75600b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 75610b57cec5SDimitry Andric 75620b57cec5SDimitry Andric void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 75630b57cec5SDimitry Andric kmp_team_t *team) { 75640b57cec5SDimitry Andric kmp_disp_t *dispatch; 75650b57cec5SDimitry Andric 75660b57cec5SDimitry Andric KMP_MB(); 75670b57cec5SDimitry Andric 75680b57cec5SDimitry Andric /* none of the threads have encountered any constructs, yet. */ 75690b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 75700b57cec5SDimitry Andric #if KMP_CACHE_MANAGE 75710b57cec5SDimitry Andric KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived); 75720b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */ 75730b57cec5SDimitry Andric dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 75740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch); 75750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 75760b57cec5SDimitry Andric // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ 75770b57cec5SDimitry Andric // this_thr->th.th_info.ds.ds_tid ] ); 75780b57cec5SDimitry Andric 75790b57cec5SDimitry Andric dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ 75800b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter 75810b57cec5SDimitry Andric if (__kmp_env_consistency_check) 75820b57cec5SDimitry Andric __kmp_push_parallel(gtid, team->t.t_ident); 75830b57cec5SDimitry Andric 75840b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 75850b57cec5SDimitry Andric } 75860b57cec5SDimitry Andric 75870b57cec5SDimitry Andric void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 75880b57cec5SDimitry Andric kmp_team_t *team) { 75890b57cec5SDimitry Andric if (__kmp_env_consistency_check) 75900b57cec5SDimitry Andric __kmp_pop_parallel(gtid, team->t.t_ident); 75910b57cec5SDimitry Andric 75920b57cec5SDimitry Andric __kmp_finish_implicit_task(this_thr); 75930b57cec5SDimitry Andric } 75940b57cec5SDimitry Andric 75950b57cec5SDimitry Andric int __kmp_invoke_task_func(int gtid) { 75960b57cec5SDimitry Andric int rc; 75970b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 75980b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 75990b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 76000b57cec5SDimitry Andric 76010b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, tid, this_thr, team); 76020b57cec5SDimitry Andric #if USE_ITT_BUILD 76030b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 7604fe6060f1SDimitry Andric // inform ittnotify about entering user's code 7605fe6060f1SDimitry Andric if (team->t.t_stack_id != NULL) { 7606fe6060f1SDimitry Andric __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id); 7607fe6060f1SDimitry Andric } else { 7608fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL); 76090b57cec5SDimitry Andric __kmp_itt_stack_callee_enter( 7610fe6060f1SDimitry Andric (__itt_caller)team->t.t_parent->t.t_stack_id); 7611fe6060f1SDimitry Andric } 76120b57cec5SDimitry Andric } 76130b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 76140b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 76150b57cec5SDimitry Andric SSC_MARK_INVOKING(); 76160b57cec5SDimitry Andric #endif 76170b57cec5SDimitry Andric 76180b57cec5SDimitry Andric #if OMPT_SUPPORT 76190b57cec5SDimitry Andric void *dummy; 7620489b1cf2SDimitry Andric void **exit_frame_p; 76210b57cec5SDimitry Andric ompt_data_t *my_task_data; 76220b57cec5SDimitry Andric ompt_data_t *my_parallel_data; 76230b57cec5SDimitry Andric int ompt_team_size; 76240b57cec5SDimitry Andric 76250b57cec5SDimitry Andric if (ompt_enabled.enabled) { 7626fe6060f1SDimitry Andric exit_frame_p = &(team->t.t_implicit_task_taskdata[tid] 7627fe6060f1SDimitry Andric .ompt_task_info.frame.exit_frame.ptr); 76280b57cec5SDimitry Andric } else { 7629489b1cf2SDimitry Andric exit_frame_p = &dummy; 76300b57cec5SDimitry Andric } 76310b57cec5SDimitry Andric 76320b57cec5SDimitry Andric my_task_data = 76330b57cec5SDimitry Andric &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data); 76340b57cec5SDimitry Andric my_parallel_data = &(team->t.ompt_team_info.parallel_data); 76350b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 76360b57cec5SDimitry Andric ompt_team_size = team->t.t_nproc; 76370b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 76380b57cec5SDimitry Andric ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, 7639489b1cf2SDimitry Andric __kmp_tid_from_gtid(gtid), ompt_task_implicit); 76400b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); 76410b57cec5SDimitry Andric } 76420b57cec5SDimitry Andric #endif 76430b57cec5SDimitry Andric 76440b57cec5SDimitry Andric #if KMP_STATS_ENABLED 76450b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 76460b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 76470b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_teams); 76480b57cec5SDimitry Andric } else { 76490b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_parallel); 76500b57cec5SDimitry Andric } 76510b57cec5SDimitry Andric KMP_SET_THREAD_STATE(IMPLICIT_TASK); 76520b57cec5SDimitry Andric #endif 76530b57cec5SDimitry Andric 76540b57cec5SDimitry Andric rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid, 76550b57cec5SDimitry Andric tid, (int)team->t.t_argc, (void **)team->t.t_argv 76560b57cec5SDimitry Andric #if OMPT_SUPPORT 76570b57cec5SDimitry Andric , 7658489b1cf2SDimitry Andric exit_frame_p 76590b57cec5SDimitry Andric #endif 76600b57cec5SDimitry Andric ); 76610b57cec5SDimitry Andric #if OMPT_SUPPORT 7662489b1cf2SDimitry Andric *exit_frame_p = NULL; 7663489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team; 76640b57cec5SDimitry Andric #endif 76650b57cec5SDimitry Andric 76660b57cec5SDimitry Andric #if KMP_STATS_ENABLED 76670b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 76680b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 76690b57cec5SDimitry Andric } 76700b57cec5SDimitry Andric KMP_POP_PARTITIONED_TIMER(); 76710b57cec5SDimitry Andric #endif 76720b57cec5SDimitry Andric 76730b57cec5SDimitry Andric #if USE_ITT_BUILD 76740b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 7675fe6060f1SDimitry Andric // inform ittnotify about leaving user's code 7676fe6060f1SDimitry Andric if (team->t.t_stack_id != NULL) { 7677fe6060f1SDimitry Andric __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id); 7678fe6060f1SDimitry Andric } else { 7679fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL); 76800b57cec5SDimitry Andric __kmp_itt_stack_callee_leave( 7681fe6060f1SDimitry Andric (__itt_caller)team->t.t_parent->t.t_stack_id); 7682fe6060f1SDimitry Andric } 76830b57cec5SDimitry Andric } 76840b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 76850b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, tid, this_thr, team); 76860b57cec5SDimitry Andric 76870b57cec5SDimitry Andric return rc; 76880b57cec5SDimitry Andric } 76890b57cec5SDimitry Andric 76900b57cec5SDimitry Andric void __kmp_teams_master(int gtid) { 7691fe6060f1SDimitry Andric // This routine is called by all primary threads in teams construct 76920b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 76930b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 76940b57cec5SDimitry Andric ident_t *loc = team->t.t_ident; 76950b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nth; 76960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_teams_microtask); 76970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_set_nproc); 76980b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid, 76990b57cec5SDimitry Andric __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask)); 77000b57cec5SDimitry Andric 77010b57cec5SDimitry Andric // This thread is a new CG root. Set up the proper variables. 77020b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 77030b57cec5SDimitry Andric tmp->cg_root = thr; // Make thr the CG root 7704fe6060f1SDimitry Andric // Init to thread limit stored when league primary threads were forked 77050b57cec5SDimitry Andric tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit; 77060b57cec5SDimitry Andric tmp->cg_nthreads = 1; // Init counter to one active thread, this one 77070b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init" 77080b57cec5SDimitry Andric " cg_nthreads to 1\n", 77090b57cec5SDimitry Andric thr, tmp)); 77100b57cec5SDimitry Andric tmp->up = thr->th.th_cg_roots; 77110b57cec5SDimitry Andric thr->th.th_cg_roots = tmp; 77120b57cec5SDimitry Andric 77130b57cec5SDimitry Andric // Launch league of teams now, but not let workers execute 77140b57cec5SDimitry Andric // (they hang on fork barrier until next parallel) 77150b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 77160b57cec5SDimitry Andric SSC_MARK_FORKING(); 77170b57cec5SDimitry Andric #endif 77180b57cec5SDimitry Andric __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, 77190b57cec5SDimitry Andric (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 77200b57cec5SDimitry Andric VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); 77210b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 77220b57cec5SDimitry Andric SSC_MARK_JOINING(); 77230b57cec5SDimitry Andric #endif 77240b57cec5SDimitry Andric // If the team size was reduced from the limit, set it to the new size 77250b57cec5SDimitry Andric if (thr->th.th_team_nproc < thr->th.th_teams_size.nth) 77260b57cec5SDimitry Andric thr->th.th_teams_size.nth = thr->th.th_team_nproc; 77270b57cec5SDimitry Andric // AC: last parameter "1" eliminates join barrier which won't work because 77280b57cec5SDimitry Andric // worker threads are in a fork barrier waiting for more parallel regions 77290b57cec5SDimitry Andric __kmp_join_call(loc, gtid 77300b57cec5SDimitry Andric #if OMPT_SUPPORT 77310b57cec5SDimitry Andric , 77320b57cec5SDimitry Andric fork_context_intel 77330b57cec5SDimitry Andric #endif 77340b57cec5SDimitry Andric , 77350b57cec5SDimitry Andric 1); 77360b57cec5SDimitry Andric } 77370b57cec5SDimitry Andric 77380b57cec5SDimitry Andric int __kmp_invoke_teams_master(int gtid) { 77390b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 77400b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 77410b57cec5SDimitry Andric #if KMP_DEBUG 77420b57cec5SDimitry Andric if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) 77430b57cec5SDimitry Andric KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == 77440b57cec5SDimitry Andric (void *)__kmp_teams_master); 77450b57cec5SDimitry Andric #endif 77460b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, 0, this_thr, team); 7747489b1cf2SDimitry Andric #if OMPT_SUPPORT 7748489b1cf2SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 7749489b1cf2SDimitry Andric ompt_data_t *task_data = 7750489b1cf2SDimitry Andric &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data; 7751489b1cf2SDimitry Andric ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data; 7752489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 7753489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 7754489b1cf2SDimitry Andric ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid, 7755489b1cf2SDimitry Andric ompt_task_initial); 7756489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid; 7757489b1cf2SDimitry Andric } 7758489b1cf2SDimitry Andric #endif 77590b57cec5SDimitry Andric __kmp_teams_master(gtid); 7760489b1cf2SDimitry Andric #if OMPT_SUPPORT 7761489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league; 7762489b1cf2SDimitry Andric #endif 77630b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, 0, this_thr, team); 77640b57cec5SDimitry Andric return 1; 77650b57cec5SDimitry Andric } 77660b57cec5SDimitry Andric 77670b57cec5SDimitry Andric /* this sets the requested number of threads for the next parallel region 77680b57cec5SDimitry Andric encountered by this team. since this should be enclosed in the forkjoin 7769480093f4SDimitry Andric critical section it should avoid race conditions with asymmetrical nested 77700b57cec5SDimitry Andric parallelism */ 77710b57cec5SDimitry Andric 77720b57cec5SDimitry Andric void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { 77730b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 77740b57cec5SDimitry Andric 77750b57cec5SDimitry Andric if (num_threads > 0) 77760b57cec5SDimitry Andric thr->th.th_set_nproc = num_threads; 77770b57cec5SDimitry Andric } 77780b57cec5SDimitry Andric 7779fe6060f1SDimitry Andric static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams, 7780fe6060f1SDimitry Andric int num_threads) { 7781fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(thr); 7782fe6060f1SDimitry Andric // Remember the number of threads for inner parallel regions 7783fe6060f1SDimitry Andric if (!TCR_4(__kmp_init_middle)) 7784fe6060f1SDimitry Andric __kmp_middle_initialize(); // get internal globals calculated 7785fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 7786fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc); 7787fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth); 7788fe6060f1SDimitry Andric 7789fe6060f1SDimitry Andric if (num_threads == 0) { 7790fe6060f1SDimitry Andric if (__kmp_teams_thread_limit > 0) { 7791fe6060f1SDimitry Andric num_threads = __kmp_teams_thread_limit; 7792fe6060f1SDimitry Andric } else { 7793fe6060f1SDimitry Andric num_threads = __kmp_avail_proc / num_teams; 7794fe6060f1SDimitry Andric } 7795fe6060f1SDimitry Andric // adjust num_threads w/o warning as it is not user setting 7796fe6060f1SDimitry Andric // num_threads = min(num_threads, nthreads-var, thread-limit-var) 7797fe6060f1SDimitry Andric // no thread_limit clause specified - do not change thread-limit-var ICV 7798fe6060f1SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7799fe6060f1SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7800fe6060f1SDimitry Andric } 7801fe6060f1SDimitry Andric if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) { 7802fe6060f1SDimitry Andric num_threads = thr->th.th_current_task->td_icvs.thread_limit; 7803fe6060f1SDimitry Andric } // prevent team size to exceed thread-limit-var 7804fe6060f1SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 7805fe6060f1SDimitry Andric num_threads = __kmp_teams_max_nth / num_teams; 7806fe6060f1SDimitry Andric } 7807fe6060f1SDimitry Andric if (num_threads == 0) { 7808fe6060f1SDimitry Andric num_threads = 1; 7809fe6060f1SDimitry Andric } 7810fe6060f1SDimitry Andric } else { 78110eae32dcSDimitry Andric if (num_threads < 0) { 78120eae32dcSDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1), 78130eae32dcSDimitry Andric __kmp_msg_null); 78140eae32dcSDimitry Andric num_threads = 1; 78150eae32dcSDimitry Andric } 7816fe6060f1SDimitry Andric // This thread will be the primary thread of the league primary threads 7817fe6060f1SDimitry Andric // Store new thread limit; old limit is saved in th_cg_roots list 7818fe6060f1SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = num_threads; 7819fe6060f1SDimitry Andric // num_threads = min(num_threads, nthreads-var) 7820fe6060f1SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7821fe6060f1SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7822fe6060f1SDimitry Andric } 7823fe6060f1SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 7824fe6060f1SDimitry Andric int new_threads = __kmp_teams_max_nth / num_teams; 7825fe6060f1SDimitry Andric if (new_threads == 0) { 7826fe6060f1SDimitry Andric new_threads = 1; 7827fe6060f1SDimitry Andric } 7828fe6060f1SDimitry Andric if (new_threads != num_threads) { 7829fe6060f1SDimitry Andric if (!__kmp_reserve_warn) { // user asked for too many threads 7830fe6060f1SDimitry Andric __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT 7831fe6060f1SDimitry Andric __kmp_msg(kmp_ms_warning, 7832fe6060f1SDimitry Andric KMP_MSG(CantFormThrTeam, num_threads, new_threads), 7833fe6060f1SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 7834fe6060f1SDimitry Andric } 7835fe6060f1SDimitry Andric } 7836fe6060f1SDimitry Andric num_threads = new_threads; 7837fe6060f1SDimitry Andric } 7838fe6060f1SDimitry Andric } 7839fe6060f1SDimitry Andric thr->th.th_teams_size.nth = num_threads; 7840fe6060f1SDimitry Andric } 7841fe6060f1SDimitry Andric 78420b57cec5SDimitry Andric /* this sets the requested number of teams for the teams region and/or 78430b57cec5SDimitry Andric the number of threads for the next parallel region encountered */ 78440b57cec5SDimitry Andric void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, 78450b57cec5SDimitry Andric int num_threads) { 78460b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 78470eae32dcSDimitry Andric if (num_teams < 0) { 78480eae32dcSDimitry Andric // OpenMP specification requires requested values to be positive, 78490eae32dcSDimitry Andric // but people can send us any value, so we'd better check 78500eae32dcSDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1), 78510eae32dcSDimitry Andric __kmp_msg_null); 78520eae32dcSDimitry Andric num_teams = 1; 78530eae32dcSDimitry Andric } 7854fe6060f1SDimitry Andric if (num_teams == 0) { 7855fe6060f1SDimitry Andric if (__kmp_nteams > 0) { 7856fe6060f1SDimitry Andric num_teams = __kmp_nteams; 7857fe6060f1SDimitry Andric } else { 78580b57cec5SDimitry Andric num_teams = 1; // default number of teams is 1. 7859fe6060f1SDimitry Andric } 7860fe6060f1SDimitry Andric } 78610b57cec5SDimitry Andric if (num_teams > __kmp_teams_max_nth) { // if too many teams requested? 78620b57cec5SDimitry Andric if (!__kmp_reserve_warn) { 78630b57cec5SDimitry Andric __kmp_reserve_warn = 1; 78640b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 78650b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), 78660b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 78670b57cec5SDimitry Andric } 78680b57cec5SDimitry Andric num_teams = __kmp_teams_max_nth; 78690b57cec5SDimitry Andric } 78700b57cec5SDimitry Andric // Set number of teams (number of threads in the outer "parallel" of the 78710b57cec5SDimitry Andric // teams) 78720b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 78730b57cec5SDimitry Andric 7874fe6060f1SDimitry Andric __kmp_push_thread_limit(thr, num_teams, num_threads); 7875489b1cf2SDimitry Andric } 7876fe6060f1SDimitry Andric 7877fe6060f1SDimitry Andric /* This sets the requested number of teams for the teams region and/or 7878fe6060f1SDimitry Andric the number of threads for the next parallel region encountered */ 7879fe6060f1SDimitry Andric void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb, 7880fe6060f1SDimitry Andric int num_teams_ub, int num_threads) { 7881fe6060f1SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 7882fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0); 7883fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb); 7884fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_threads >= 0); 7885fe6060f1SDimitry Andric 7886fe6060f1SDimitry Andric if (num_teams_lb > num_teams_ub) { 7887fe6060f1SDimitry Andric __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub), 7888fe6060f1SDimitry Andric KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null); 78890b57cec5SDimitry Andric } 7890fe6060f1SDimitry Andric 7891fe6060f1SDimitry Andric int num_teams = 1; // defalt number of teams is 1. 7892fe6060f1SDimitry Andric 7893fe6060f1SDimitry Andric if (num_teams_lb == 0 && num_teams_ub > 0) 7894fe6060f1SDimitry Andric num_teams_lb = num_teams_ub; 7895fe6060f1SDimitry Andric 7896fe6060f1SDimitry Andric if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause 7897fe6060f1SDimitry Andric num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams; 7898fe6060f1SDimitry Andric if (num_teams > __kmp_teams_max_nth) { 7899fe6060f1SDimitry Andric if (!__kmp_reserve_warn) { 7900fe6060f1SDimitry Andric __kmp_reserve_warn = 1; 79010b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 7902fe6060f1SDimitry Andric KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), 79030b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 79040b57cec5SDimitry Andric } 7905fe6060f1SDimitry Andric num_teams = __kmp_teams_max_nth; 7906fe6060f1SDimitry Andric } 7907fe6060f1SDimitry Andric } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams 7908fe6060f1SDimitry Andric num_teams = num_teams_ub; 7909fe6060f1SDimitry Andric } else { // num_teams_lb <= num_teams <= num_teams_ub 79100eae32dcSDimitry Andric if (num_threads <= 0) { 7911fe6060f1SDimitry Andric if (num_teams_ub > __kmp_teams_max_nth) { 7912fe6060f1SDimitry Andric num_teams = num_teams_lb; 7913fe6060f1SDimitry Andric } else { 7914fe6060f1SDimitry Andric num_teams = num_teams_ub; 7915fe6060f1SDimitry Andric } 7916fe6060f1SDimitry Andric } else { 7917fe6060f1SDimitry Andric num_teams = (num_threads > __kmp_teams_max_nth) 7918fe6060f1SDimitry Andric ? num_teams 7919fe6060f1SDimitry Andric : __kmp_teams_max_nth / num_threads; 7920fe6060f1SDimitry Andric if (num_teams < num_teams_lb) { 7921fe6060f1SDimitry Andric num_teams = num_teams_lb; 7922fe6060f1SDimitry Andric } else if (num_teams > num_teams_ub) { 7923fe6060f1SDimitry Andric num_teams = num_teams_ub; 79240b57cec5SDimitry Andric } 79250b57cec5SDimitry Andric } 7926fe6060f1SDimitry Andric } 7927fe6060f1SDimitry Andric // Set number of teams (number of threads in the outer "parallel" of the 7928fe6060f1SDimitry Andric // teams) 7929fe6060f1SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 7930fe6060f1SDimitry Andric 7931fe6060f1SDimitry Andric __kmp_push_thread_limit(thr, num_teams, num_threads); 79320b57cec5SDimitry Andric } 79330b57cec5SDimitry Andric 79340b57cec5SDimitry Andric // Set the proc_bind var to use in the following parallel region. 79350b57cec5SDimitry Andric void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { 79360b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 79370b57cec5SDimitry Andric thr->th.th_set_proc_bind = proc_bind; 79380b57cec5SDimitry Andric } 79390b57cec5SDimitry Andric 79400b57cec5SDimitry Andric /* Launch the worker threads into the microtask. */ 79410b57cec5SDimitry Andric 79420b57cec5SDimitry Andric void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { 79430b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 79440b57cec5SDimitry Andric 79450b57cec5SDimitry Andric #ifdef KMP_DEBUG 79460b57cec5SDimitry Andric int f; 79470b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 79480b57cec5SDimitry Andric 79490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 79500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 79510b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 79520b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 79530b57cec5SDimitry Andric 79540b57cec5SDimitry Andric team->t.t_construct = 0; /* no single directives seen yet */ 79550b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 79560b57cec5SDimitry Andric 0; /* thread 0 enters the ordered section first */ 79570b57cec5SDimitry Andric 79580b57cec5SDimitry Andric /* Reset the identifiers on the dispatch buffer */ 79590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 79600b57cec5SDimitry Andric if (team->t.t_max_nproc > 1) { 79610b57cec5SDimitry Andric int i; 79620b57cec5SDimitry Andric for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { 79630b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 79640b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 79650b57cec5SDimitry Andric } 79660b57cec5SDimitry Andric } else { 79670b57cec5SDimitry Andric team->t.t_disp_buffer[0].buffer_index = 0; 79680b57cec5SDimitry Andric team->t.t_disp_buffer[0].doacross_buf_idx = 0; 79690b57cec5SDimitry Andric } 79700b57cec5SDimitry Andric 79710b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 79720b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 79730b57cec5SDimitry Andric 79740b57cec5SDimitry Andric #ifdef KMP_DEBUG 79750b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 79760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 79770b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc); 79780b57cec5SDimitry Andric } 79790b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 79800b57cec5SDimitry Andric 79810b57cec5SDimitry Andric /* release the worker threads so they may begin working */ 79820b57cec5SDimitry Andric __kmp_fork_barrier(gtid, 0); 79830b57cec5SDimitry Andric } 79840b57cec5SDimitry Andric 79850b57cec5SDimitry Andric void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { 79860b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 79870b57cec5SDimitry Andric 79880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 79890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 79900b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 79910b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 79920b57cec5SDimitry Andric 79930b57cec5SDimitry Andric /* Join barrier after fork */ 79940b57cec5SDimitry Andric 79950b57cec5SDimitry Andric #ifdef KMP_DEBUG 79960b57cec5SDimitry Andric if (__kmp_threads[gtid] && 79970b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) { 79980b57cec5SDimitry Andric __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid, 79990b57cec5SDimitry Andric __kmp_threads[gtid]); 80000b57cec5SDimitry Andric __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 80010b57cec5SDimitry Andric "team->t.t_nproc=%d\n", 80020b57cec5SDimitry Andric gtid, __kmp_threads[gtid]->th.th_team_nproc, team, 80030b57cec5SDimitry Andric team->t.t_nproc); 80040b57cec5SDimitry Andric __kmp_print_structure(); 80050b57cec5SDimitry Andric } 80060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 80070b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc); 80080b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 80090b57cec5SDimitry Andric 80100b57cec5SDimitry Andric __kmp_join_barrier(gtid); /* wait for everyone */ 80110b57cec5SDimitry Andric #if OMPT_SUPPORT 80120b57cec5SDimitry Andric if (ompt_enabled.enabled && 80130b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { 80140b57cec5SDimitry Andric int ds_tid = this_thr->th.th_info.ds.ds_tid; 80150b57cec5SDimitry Andric ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr); 80160b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 80170b57cec5SDimitry Andric #if OMPT_OPTIONAL 80180b57cec5SDimitry Andric void *codeptr = NULL; 80190b57cec5SDimitry Andric if (KMP_MASTER_TID(ds_tid) && 80200b57cec5SDimitry Andric (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || 80210b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region))) 80220b57cec5SDimitry Andric codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address; 80230b57cec5SDimitry Andric 80240b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region_wait) { 80250b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 80260b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 80270b57cec5SDimitry Andric codeptr); 80280b57cec5SDimitry Andric } 80290b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region) { 80300b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 80310b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 80320b57cec5SDimitry Andric codeptr); 80330b57cec5SDimitry Andric } 80340b57cec5SDimitry Andric #endif 80350b57cec5SDimitry Andric if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { 80360b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 8037fe6060f1SDimitry Andric ompt_scope_end, NULL, task_data, 0, ds_tid, 8038fe6060f1SDimitry Andric ompt_task_implicit); // TODO: Can this be ompt_task_initial? 80390b57cec5SDimitry Andric } 80400b57cec5SDimitry Andric } 80410b57cec5SDimitry Andric #endif 80420b57cec5SDimitry Andric 80430b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 80440b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 80450b57cec5SDimitry Andric } 80460b57cec5SDimitry Andric 80470b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 80480b57cec5SDimitry Andric 80490b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 80500b57cec5SDimitry Andric 80510b57cec5SDimitry Andric // Return the worker threads actively spinning in the hot team, if we 80520b57cec5SDimitry Andric // are at the outermost level of parallelism. Otherwise, return 0. 80530b57cec5SDimitry Andric static int __kmp_active_hot_team_nproc(kmp_root_t *root) { 80540b57cec5SDimitry Andric int i; 80550b57cec5SDimitry Andric int retval; 80560b57cec5SDimitry Andric kmp_team_t *hot_team; 80570b57cec5SDimitry Andric 80580b57cec5SDimitry Andric if (root->r.r_active) { 80590b57cec5SDimitry Andric return 0; 80600b57cec5SDimitry Andric } 80610b57cec5SDimitry Andric hot_team = root->r.r_hot_team; 80620b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 8063fe6060f1SDimitry Andric return hot_team->t.t_nproc - 1; // Don't count primary thread 80640b57cec5SDimitry Andric } 80650b57cec5SDimitry Andric 8066fe6060f1SDimitry Andric // Skip the primary thread - it is accounted for elsewhere. 80670b57cec5SDimitry Andric retval = 0; 80680b57cec5SDimitry Andric for (i = 1; i < hot_team->t.t_nproc; i++) { 80690b57cec5SDimitry Andric if (hot_team->t.t_threads[i]->th.th_active) { 80700b57cec5SDimitry Andric retval++; 80710b57cec5SDimitry Andric } 80720b57cec5SDimitry Andric } 80730b57cec5SDimitry Andric return retval; 80740b57cec5SDimitry Andric } 80750b57cec5SDimitry Andric 80760b57cec5SDimitry Andric // Perform an automatic adjustment to the number of 80770b57cec5SDimitry Andric // threads used by the next parallel region. 80780b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { 80790b57cec5SDimitry Andric int retval; 80800b57cec5SDimitry Andric int pool_active; 80810b57cec5SDimitry Andric int hot_team_active; 80820b57cec5SDimitry Andric int team_curr_active; 80830b57cec5SDimitry Andric int system_active; 80840b57cec5SDimitry Andric 80850b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root, 80860b57cec5SDimitry Andric set_nproc)); 80870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 80880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0] 80890b57cec5SDimitry Andric ->th.th_current_task->td_icvs.dynamic == TRUE); 80900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(set_nproc > 1); 80910b57cec5SDimitry Andric 80920b57cec5SDimitry Andric if (set_nproc == 1) { 80930b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n")); 80940b57cec5SDimitry Andric return 1; 80950b57cec5SDimitry Andric } 80960b57cec5SDimitry Andric 80970b57cec5SDimitry Andric // Threads that are active in the thread pool, active in the hot team for this 80980b57cec5SDimitry Andric // particular root (if we are at the outer par level), and the currently 8099fe6060f1SDimitry Andric // executing thread (to become the primary thread) are available to add to the 8100fe6060f1SDimitry Andric // new team, but are currently contributing to the system load, and must be 81010b57cec5SDimitry Andric // accounted for. 81020b57cec5SDimitry Andric pool_active = __kmp_thread_pool_active_nth; 81030b57cec5SDimitry Andric hot_team_active = __kmp_active_hot_team_nproc(root); 81040b57cec5SDimitry Andric team_curr_active = pool_active + hot_team_active + 1; 81050b57cec5SDimitry Andric 81060b57cec5SDimitry Andric // Check the system load. 81070b57cec5SDimitry Andric system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active); 81080b57cec5SDimitry Andric KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d " 81090b57cec5SDimitry Andric "hot team active = %d\n", 81100b57cec5SDimitry Andric system_active, pool_active, hot_team_active)); 81110b57cec5SDimitry Andric 81120b57cec5SDimitry Andric if (system_active < 0) { 81130b57cec5SDimitry Andric // There was an error reading the necessary info from /proc, so use the 81140b57cec5SDimitry Andric // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode 81150b57cec5SDimitry Andric // = dynamic_thread_limit, we shouldn't wind up getting back here. 81160b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 81170b57cec5SDimitry Andric KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit"); 81180b57cec5SDimitry Andric 81190b57cec5SDimitry Andric // Make this call behave like the thread limit algorithm. 81200b57cec5SDimitry Andric retval = __kmp_avail_proc - __kmp_nth + 81210b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 81220b57cec5SDimitry Andric if (retval > set_nproc) { 81230b57cec5SDimitry Andric retval = set_nproc; 81240b57cec5SDimitry Andric } 81250b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 81260b57cec5SDimitry Andric retval = KMP_MIN_NTH; 81270b57cec5SDimitry Andric } 81280b57cec5SDimitry Andric 81290b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", 81300b57cec5SDimitry Andric retval)); 81310b57cec5SDimitry Andric return retval; 81320b57cec5SDimitry Andric } 81330b57cec5SDimitry Andric 81340b57cec5SDimitry Andric // There is a slight delay in the load balance algorithm in detecting new 81350b57cec5SDimitry Andric // running procs. The real system load at this instant should be at least as 81360b57cec5SDimitry Andric // large as the #active omp thread that are available to add to the team. 81370b57cec5SDimitry Andric if (system_active < team_curr_active) { 81380b57cec5SDimitry Andric system_active = team_curr_active; 81390b57cec5SDimitry Andric } 81400b57cec5SDimitry Andric retval = __kmp_avail_proc - system_active + team_curr_active; 81410b57cec5SDimitry Andric if (retval > set_nproc) { 81420b57cec5SDimitry Andric retval = set_nproc; 81430b57cec5SDimitry Andric } 81440b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 81450b57cec5SDimitry Andric retval = KMP_MIN_NTH; 81460b57cec5SDimitry Andric } 81470b57cec5SDimitry Andric 81480b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval)); 81490b57cec5SDimitry Andric return retval; 81500b57cec5SDimitry Andric } // __kmp_load_balance_nproc() 81510b57cec5SDimitry Andric 81520b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 81530b57cec5SDimitry Andric 81540b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 81550b57cec5SDimitry Andric 81560b57cec5SDimitry Andric /* NOTE: this is called with the __kmp_init_lock held */ 81570b57cec5SDimitry Andric void __kmp_cleanup(void) { 81580b57cec5SDimitry Andric int f; 81590b57cec5SDimitry Andric 81600b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: enter\n")); 81610b57cec5SDimitry Andric 81620b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 81630b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 81640b57cec5SDimitry Andric __kmp_remove_signals(); 81650b57cec5SDimitry Andric #endif 81660b57cec5SDimitry Andric TCW_4(__kmp_init_parallel, FALSE); 81670b57cec5SDimitry Andric } 81680b57cec5SDimitry Andric 81690b57cec5SDimitry Andric if (TCR_4(__kmp_init_middle)) { 81700b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 81710b57cec5SDimitry Andric __kmp_affinity_uninitialize(); 81720b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 81730b57cec5SDimitry Andric __kmp_cleanup_hierarchy(); 81740b57cec5SDimitry Andric TCW_4(__kmp_init_middle, FALSE); 81750b57cec5SDimitry Andric } 81760b57cec5SDimitry Andric 81770b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n")); 81780b57cec5SDimitry Andric 81790b57cec5SDimitry Andric if (__kmp_init_serial) { 81800b57cec5SDimitry Andric __kmp_runtime_destroy(); 81810b57cec5SDimitry Andric __kmp_init_serial = FALSE; 81820b57cec5SDimitry Andric } 81830b57cec5SDimitry Andric 81840b57cec5SDimitry Andric __kmp_cleanup_threadprivate_caches(); 81850b57cec5SDimitry Andric 81860b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 81870b57cec5SDimitry Andric if (__kmp_root[f] != NULL) { 81880b57cec5SDimitry Andric __kmp_free(__kmp_root[f]); 81890b57cec5SDimitry Andric __kmp_root[f] = NULL; 81900b57cec5SDimitry Andric } 81910b57cec5SDimitry Andric } 81920b57cec5SDimitry Andric __kmp_free(__kmp_threads); 81930b57cec5SDimitry Andric // __kmp_threads and __kmp_root were allocated at once, as single block, so 81940b57cec5SDimitry Andric // there is no need in freeing __kmp_root. 81950b57cec5SDimitry Andric __kmp_threads = NULL; 81960b57cec5SDimitry Andric __kmp_root = NULL; 81970b57cec5SDimitry Andric __kmp_threads_capacity = 0; 81980b57cec5SDimitry Andric 819981ad6265SDimitry Andric // Free old __kmp_threads arrays if they exist. 820081ad6265SDimitry Andric kmp_old_threads_list_t *ptr = __kmp_old_threads_list; 820181ad6265SDimitry Andric while (ptr) { 820281ad6265SDimitry Andric kmp_old_threads_list_t *next = ptr->next; 820381ad6265SDimitry Andric __kmp_free(ptr->threads); 820481ad6265SDimitry Andric __kmp_free(ptr); 820581ad6265SDimitry Andric ptr = next; 820681ad6265SDimitry Andric } 820781ad6265SDimitry Andric 82080b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 82090b57cec5SDimitry Andric __kmp_cleanup_indirect_user_locks(); 82100b57cec5SDimitry Andric #else 82110b57cec5SDimitry Andric __kmp_cleanup_user_locks(); 82120b57cec5SDimitry Andric #endif 8213fe6060f1SDimitry Andric #if OMPD_SUPPORT 8214fe6060f1SDimitry Andric if (ompd_state) { 8215fe6060f1SDimitry Andric __kmp_free(ompd_env_block); 8216fe6060f1SDimitry Andric ompd_env_block = NULL; 8217fe6060f1SDimitry Andric ompd_env_block_size = 0; 8218fe6060f1SDimitry Andric } 8219fe6060f1SDimitry Andric #endif 82200b57cec5SDimitry Andric 82210b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 82220b57cec5SDimitry Andric KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file)); 82230b57cec5SDimitry Andric __kmp_cpuinfo_file = NULL; 82240b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 82250b57cec5SDimitry Andric 82260b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 82270b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 82280b57cec5SDimitry Andric __kmp_print_speculative_stats(); 82290b57cec5SDimitry Andric #endif 82300b57cec5SDimitry Andric #endif 82310b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_nth.nth); 82320b57cec5SDimitry Andric __kmp_nested_nth.nth = NULL; 82330b57cec5SDimitry Andric __kmp_nested_nth.size = 0; 82340b57cec5SDimitry Andric __kmp_nested_nth.used = 0; 82350b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); 82360b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types = NULL; 82370b57cec5SDimitry Andric __kmp_nested_proc_bind.size = 0; 82380b57cec5SDimitry Andric __kmp_nested_proc_bind.used = 0; 82390b57cec5SDimitry Andric if (__kmp_affinity_format) { 82400b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_affinity_format); 82410b57cec5SDimitry Andric __kmp_affinity_format = NULL; 82420b57cec5SDimitry Andric } 82430b57cec5SDimitry Andric 82440b57cec5SDimitry Andric __kmp_i18n_catclose(); 82450b57cec5SDimitry Andric 82460b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 82470b57cec5SDimitry Andric __kmp_hier_scheds.deallocate(); 82480b57cec5SDimitry Andric #endif 82490b57cec5SDimitry Andric 82500b57cec5SDimitry Andric #if KMP_STATS_ENABLED 82510b57cec5SDimitry Andric __kmp_stats_fini(); 82520b57cec5SDimitry Andric #endif 82530b57cec5SDimitry Andric 82540b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: exit\n")); 82550b57cec5SDimitry Andric } 82560b57cec5SDimitry Andric 82570b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 82580b57cec5SDimitry Andric 82590b57cec5SDimitry Andric int __kmp_ignore_mppbeg(void) { 82600b57cec5SDimitry Andric char *env; 82610b57cec5SDimitry Andric 82620b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) { 82630b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 82640b57cec5SDimitry Andric return FALSE; 82650b57cec5SDimitry Andric } 82660b57cec5SDimitry Andric // By default __kmpc_begin() is no-op. 82670b57cec5SDimitry Andric return TRUE; 82680b57cec5SDimitry Andric } 82690b57cec5SDimitry Andric 82700b57cec5SDimitry Andric int __kmp_ignore_mppend(void) { 82710b57cec5SDimitry Andric char *env; 82720b57cec5SDimitry Andric 82730b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) { 82740b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 82750b57cec5SDimitry Andric return FALSE; 82760b57cec5SDimitry Andric } 82770b57cec5SDimitry Andric // By default __kmpc_end() is no-op. 82780b57cec5SDimitry Andric return TRUE; 82790b57cec5SDimitry Andric } 82800b57cec5SDimitry Andric 82810b57cec5SDimitry Andric void __kmp_internal_begin(void) { 82820b57cec5SDimitry Andric int gtid; 82830b57cec5SDimitry Andric kmp_root_t *root; 82840b57cec5SDimitry Andric 82850b57cec5SDimitry Andric /* this is a very important step as it will register new sibling threads 82860b57cec5SDimitry Andric and assign these new uber threads a new gtid */ 82870b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 82880b57cec5SDimitry Andric root = __kmp_threads[gtid]->th.th_root; 82890b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 82900b57cec5SDimitry Andric 82910b57cec5SDimitry Andric if (root->r.r_begin) 82920b57cec5SDimitry Andric return; 82930b57cec5SDimitry Andric __kmp_acquire_lock(&root->r.r_begin_lock, gtid); 82940b57cec5SDimitry Andric if (root->r.r_begin) { 82950b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 82960b57cec5SDimitry Andric return; 82970b57cec5SDimitry Andric } 82980b57cec5SDimitry Andric 82990b57cec5SDimitry Andric root->r.r_begin = TRUE; 83000b57cec5SDimitry Andric 83010b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 83020b57cec5SDimitry Andric } 83030b57cec5SDimitry Andric 83040b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 83050b57cec5SDimitry Andric 83060b57cec5SDimitry Andric void __kmp_user_set_library(enum library_type arg) { 83070b57cec5SDimitry Andric int gtid; 83080b57cec5SDimitry Andric kmp_root_t *root; 83090b57cec5SDimitry Andric kmp_info_t *thread; 83100b57cec5SDimitry Andric 83110b57cec5SDimitry Andric /* first, make sure we are initialized so we can get our gtid */ 83120b57cec5SDimitry Andric 83130b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 83140b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 83150b57cec5SDimitry Andric 83160b57cec5SDimitry Andric root = thread->th.th_root; 83170b57cec5SDimitry Andric 83180b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, 83190b57cec5SDimitry Andric library_serial)); 83200b57cec5SDimitry Andric if (root->r.r_in_parallel) { /* Must be called in serial section of top-level 83210b57cec5SDimitry Andric thread */ 83220b57cec5SDimitry Andric KMP_WARNING(SetLibraryIncorrectCall); 83230b57cec5SDimitry Andric return; 83240b57cec5SDimitry Andric } 83250b57cec5SDimitry Andric 83260b57cec5SDimitry Andric switch (arg) { 83270b57cec5SDimitry Andric case library_serial: 83280b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 83290b57cec5SDimitry Andric set__nproc(thread, 1); 83300b57cec5SDimitry Andric break; 83310b57cec5SDimitry Andric case library_turnaround: 83320b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 83330b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 83340b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 83350b57cec5SDimitry Andric break; 83360b57cec5SDimitry Andric case library_throughput: 83370b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 83380b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 83390b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 83400b57cec5SDimitry Andric break; 83410b57cec5SDimitry Andric default: 83420b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 83430b57cec5SDimitry Andric } 83440b57cec5SDimitry Andric 83450b57cec5SDimitry Andric __kmp_aux_set_library(arg); 83460b57cec5SDimitry Andric } 83470b57cec5SDimitry Andric 83480b57cec5SDimitry Andric void __kmp_aux_set_stacksize(size_t arg) { 83490b57cec5SDimitry Andric if (!__kmp_init_serial) 83500b57cec5SDimitry Andric __kmp_serial_initialize(); 83510b57cec5SDimitry Andric 83520b57cec5SDimitry Andric #if KMP_OS_DARWIN 83530b57cec5SDimitry Andric if (arg & (0x1000 - 1)) { 83540b57cec5SDimitry Andric arg &= ~(0x1000 - 1); 83550b57cec5SDimitry Andric if (arg + 0x1000) /* check for overflow if we round up */ 83560b57cec5SDimitry Andric arg += 0x1000; 83570b57cec5SDimitry Andric } 83580b57cec5SDimitry Andric #endif 83590b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 83600b57cec5SDimitry Andric 83610b57cec5SDimitry Andric /* only change the default stacksize before the first parallel region */ 83620b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) { 83630b57cec5SDimitry Andric size_t value = arg; /* argument is in bytes */ 83640b57cec5SDimitry Andric 83650b57cec5SDimitry Andric if (value < __kmp_sys_min_stksize) 83660b57cec5SDimitry Andric value = __kmp_sys_min_stksize; 83670b57cec5SDimitry Andric else if (value > KMP_MAX_STKSIZE) 83680b57cec5SDimitry Andric value = KMP_MAX_STKSIZE; 83690b57cec5SDimitry Andric 83700b57cec5SDimitry Andric __kmp_stksize = value; 83710b57cec5SDimitry Andric 83720b57cec5SDimitry Andric __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ 83730b57cec5SDimitry Andric } 83740b57cec5SDimitry Andric 83750b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 83760b57cec5SDimitry Andric } 83770b57cec5SDimitry Andric 83780b57cec5SDimitry Andric /* set the behaviour of the runtime library */ 83790b57cec5SDimitry Andric /* TODO this can cause some odd behaviour with sibling parallelism... */ 83800b57cec5SDimitry Andric void __kmp_aux_set_library(enum library_type arg) { 83810b57cec5SDimitry Andric __kmp_library = arg; 83820b57cec5SDimitry Andric 83830b57cec5SDimitry Andric switch (__kmp_library) { 83840b57cec5SDimitry Andric case library_serial: { 83850b57cec5SDimitry Andric KMP_INFORM(LibraryIsSerial); 83860b57cec5SDimitry Andric } break; 83870b57cec5SDimitry Andric case library_turnaround: 83880b57cec5SDimitry Andric if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set) 83890b57cec5SDimitry Andric __kmp_use_yield = 2; // only yield when oversubscribed 83900b57cec5SDimitry Andric break; 83910b57cec5SDimitry Andric case library_throughput: 83920b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) 839381ad6265SDimitry Andric __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 83940b57cec5SDimitry Andric break; 83950b57cec5SDimitry Andric default: 83960b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 83970b57cec5SDimitry Andric } 83980b57cec5SDimitry Andric } 83990b57cec5SDimitry Andric 84000b57cec5SDimitry Andric /* Getting team information common for all team API */ 84010b57cec5SDimitry Andric // Returns NULL if not in teams construct 84020b57cec5SDimitry Andric static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) { 84030b57cec5SDimitry Andric kmp_info_t *thr = __kmp_entry_thread(); 84040b57cec5SDimitry Andric teams_serialized = 0; 84050b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 84060b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 84070b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 84080b57cec5SDimitry Andric int ii = team->t.t_level; 84090b57cec5SDimitry Andric teams_serialized = team->t.t_serialized; 84100b57cec5SDimitry Andric int level = tlevel + 1; 84110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 84120b57cec5SDimitry Andric while (ii > level) { 84130b57cec5SDimitry Andric for (teams_serialized = team->t.t_serialized; 84140b57cec5SDimitry Andric (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) { 84150b57cec5SDimitry Andric } 84160b57cec5SDimitry Andric if (team->t.t_serialized && (!teams_serialized)) { 84170b57cec5SDimitry Andric team = team->t.t_parent; 84180b57cec5SDimitry Andric continue; 84190b57cec5SDimitry Andric } 84200b57cec5SDimitry Andric if (ii > level) { 84210b57cec5SDimitry Andric team = team->t.t_parent; 84220b57cec5SDimitry Andric ii--; 84230b57cec5SDimitry Andric } 84240b57cec5SDimitry Andric } 84250b57cec5SDimitry Andric return team; 84260b57cec5SDimitry Andric } 84270b57cec5SDimitry Andric return NULL; 84280b57cec5SDimitry Andric } 84290b57cec5SDimitry Andric 84300b57cec5SDimitry Andric int __kmp_aux_get_team_num() { 84310b57cec5SDimitry Andric int serialized; 84320b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 84330b57cec5SDimitry Andric if (team) { 84340b57cec5SDimitry Andric if (serialized > 1) { 84350b57cec5SDimitry Andric return 0; // teams region is serialized ( 1 team of 1 thread ). 84360b57cec5SDimitry Andric } else { 84370b57cec5SDimitry Andric return team->t.t_master_tid; 84380b57cec5SDimitry Andric } 84390b57cec5SDimitry Andric } 84400b57cec5SDimitry Andric return 0; 84410b57cec5SDimitry Andric } 84420b57cec5SDimitry Andric 84430b57cec5SDimitry Andric int __kmp_aux_get_num_teams() { 84440b57cec5SDimitry Andric int serialized; 84450b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 84460b57cec5SDimitry Andric if (team) { 84470b57cec5SDimitry Andric if (serialized > 1) { 84480b57cec5SDimitry Andric return 1; 84490b57cec5SDimitry Andric } else { 84500b57cec5SDimitry Andric return team->t.t_parent->t.t_nproc; 84510b57cec5SDimitry Andric } 84520b57cec5SDimitry Andric } 84530b57cec5SDimitry Andric return 1; 84540b57cec5SDimitry Andric } 84550b57cec5SDimitry Andric 84560b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 84570b57cec5SDimitry Andric 84580b57cec5SDimitry Andric /* 84590b57cec5SDimitry Andric * Affinity Format Parser 84600b57cec5SDimitry Andric * 84610b57cec5SDimitry Andric * Field is in form of: %[[[0].]size]type 84620b57cec5SDimitry Andric * % and type are required (%% means print a literal '%') 84630b57cec5SDimitry Andric * type is either single char or long name surrounded by {}, 84640b57cec5SDimitry Andric * e.g., N or {num_threads} 84650b57cec5SDimitry Andric * 0 => leading zeros 84660b57cec5SDimitry Andric * . => right justified when size is specified 84670b57cec5SDimitry Andric * by default output is left justified 84680b57cec5SDimitry Andric * size is the *minimum* field length 84690b57cec5SDimitry Andric * All other characters are printed as is 84700b57cec5SDimitry Andric * 84710b57cec5SDimitry Andric * Available field types: 84720b57cec5SDimitry Andric * L {thread_level} - omp_get_level() 84730b57cec5SDimitry Andric * n {thread_num} - omp_get_thread_num() 84740b57cec5SDimitry Andric * h {host} - name of host machine 84750b57cec5SDimitry Andric * P {process_id} - process id (integer) 84760b57cec5SDimitry Andric * T {thread_identifier} - native thread identifier (integer) 84770b57cec5SDimitry Andric * N {num_threads} - omp_get_num_threads() 84780b57cec5SDimitry Andric * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1) 84790b57cec5SDimitry Andric * a {thread_affinity} - comma separated list of integers or integer ranges 84800b57cec5SDimitry Andric * (values of affinity mask) 84810b57cec5SDimitry Andric * 84820b57cec5SDimitry Andric * Implementation-specific field types can be added 84830b57cec5SDimitry Andric * If a type is unknown, print "undefined" 84840b57cec5SDimitry Andric */ 84850b57cec5SDimitry Andric 84860b57cec5SDimitry Andric // Structure holding the short name, long name, and corresponding data type 84870b57cec5SDimitry Andric // for snprintf. A table of these will represent the entire valid keyword 84880b57cec5SDimitry Andric // field types. 84890b57cec5SDimitry Andric typedef struct kmp_affinity_format_field_t { 84900b57cec5SDimitry Andric char short_name; // from spec e.g., L -> thread level 84910b57cec5SDimitry Andric const char *long_name; // from spec thread_level -> thread level 84920b57cec5SDimitry Andric char field_format; // data type for snprintf (typically 'd' or 's' 84930b57cec5SDimitry Andric // for integer or string) 84940b57cec5SDimitry Andric } kmp_affinity_format_field_t; 84950b57cec5SDimitry Andric 84960b57cec5SDimitry Andric static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = { 84970b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 84980b57cec5SDimitry Andric {'A', "thread_affinity", 's'}, 84990b57cec5SDimitry Andric #endif 85000b57cec5SDimitry Andric {'t', "team_num", 'd'}, 85010b57cec5SDimitry Andric {'T', "num_teams", 'd'}, 85020b57cec5SDimitry Andric {'L', "nesting_level", 'd'}, 85030b57cec5SDimitry Andric {'n', "thread_num", 'd'}, 85040b57cec5SDimitry Andric {'N', "num_threads", 'd'}, 85050b57cec5SDimitry Andric {'a', "ancestor_tnum", 'd'}, 85060b57cec5SDimitry Andric {'H', "host", 's'}, 85070b57cec5SDimitry Andric {'P', "process_id", 'd'}, 85080b57cec5SDimitry Andric {'i', "native_thread_id", 'd'}}; 85090b57cec5SDimitry Andric 85100b57cec5SDimitry Andric // Return the number of characters it takes to hold field 85110b57cec5SDimitry Andric static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, 85120b57cec5SDimitry Andric const char **ptr, 85130b57cec5SDimitry Andric kmp_str_buf_t *field_buffer) { 85140b57cec5SDimitry Andric int rc, format_index, field_value; 85150b57cec5SDimitry Andric const char *width_left, *width_right; 85160b57cec5SDimitry Andric bool pad_zeros, right_justify, parse_long_name, found_valid_name; 85170b57cec5SDimitry Andric static const int FORMAT_SIZE = 20; 85180b57cec5SDimitry Andric char format[FORMAT_SIZE] = {0}; 85190b57cec5SDimitry Andric char absolute_short_name = 0; 85200b57cec5SDimitry Andric 85210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 85220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 85230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(**ptr == '%'); 85240b57cec5SDimitry Andric KMP_DEBUG_ASSERT(field_buffer); 85250b57cec5SDimitry Andric 85260b57cec5SDimitry Andric __kmp_str_buf_clear(field_buffer); 85270b57cec5SDimitry Andric 85280b57cec5SDimitry Andric // Skip the initial % 85290b57cec5SDimitry Andric (*ptr)++; 85300b57cec5SDimitry Andric 85310b57cec5SDimitry Andric // Check for %% first 85320b57cec5SDimitry Andric if (**ptr == '%') { 85330b57cec5SDimitry Andric __kmp_str_buf_cat(field_buffer, "%", 1); 85340b57cec5SDimitry Andric (*ptr)++; // skip over the second % 85350b57cec5SDimitry Andric return 1; 85360b57cec5SDimitry Andric } 85370b57cec5SDimitry Andric 85380b57cec5SDimitry Andric // Parse field modifiers if they are present 85390b57cec5SDimitry Andric pad_zeros = false; 85400b57cec5SDimitry Andric if (**ptr == '0') { 85410b57cec5SDimitry Andric pad_zeros = true; 85420b57cec5SDimitry Andric (*ptr)++; // skip over 0 85430b57cec5SDimitry Andric } 85440b57cec5SDimitry Andric right_justify = false; 85450b57cec5SDimitry Andric if (**ptr == '.') { 85460b57cec5SDimitry Andric right_justify = true; 85470b57cec5SDimitry Andric (*ptr)++; // skip over . 85480b57cec5SDimitry Andric } 85490b57cec5SDimitry Andric // Parse width of field: [width_left, width_right) 85500b57cec5SDimitry Andric width_left = width_right = NULL; 85510b57cec5SDimitry Andric if (**ptr >= '0' && **ptr <= '9') { 85520b57cec5SDimitry Andric width_left = *ptr; 85530b57cec5SDimitry Andric SKIP_DIGITS(*ptr); 85540b57cec5SDimitry Andric width_right = *ptr; 85550b57cec5SDimitry Andric } 85560b57cec5SDimitry Andric 85570b57cec5SDimitry Andric // Create the format for KMP_SNPRINTF based on flags parsed above 85580b57cec5SDimitry Andric format_index = 0; 85590b57cec5SDimitry Andric format[format_index++] = '%'; 85600b57cec5SDimitry Andric if (!right_justify) 85610b57cec5SDimitry Andric format[format_index++] = '-'; 85620b57cec5SDimitry Andric if (pad_zeros) 85630b57cec5SDimitry Andric format[format_index++] = '0'; 85640b57cec5SDimitry Andric if (width_left && width_right) { 85650b57cec5SDimitry Andric int i = 0; 85660b57cec5SDimitry Andric // Only allow 8 digit number widths. 85670b57cec5SDimitry Andric // This also prevents overflowing format variable 85680b57cec5SDimitry Andric while (i < 8 && width_left < width_right) { 85690b57cec5SDimitry Andric format[format_index++] = *width_left; 85700b57cec5SDimitry Andric width_left++; 85710b57cec5SDimitry Andric i++; 85720b57cec5SDimitry Andric } 85730b57cec5SDimitry Andric } 85740b57cec5SDimitry Andric 85750b57cec5SDimitry Andric // Parse a name (long or short) 85760b57cec5SDimitry Andric // Canonicalize the name into absolute_short_name 85770b57cec5SDimitry Andric found_valid_name = false; 85780b57cec5SDimitry Andric parse_long_name = (**ptr == '{'); 85790b57cec5SDimitry Andric if (parse_long_name) 85800b57cec5SDimitry Andric (*ptr)++; // skip initial left brace 85810b57cec5SDimitry Andric for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) / 85820b57cec5SDimitry Andric sizeof(__kmp_affinity_format_table[0]); 85830b57cec5SDimitry Andric ++i) { 85840b57cec5SDimitry Andric char short_name = __kmp_affinity_format_table[i].short_name; 85850b57cec5SDimitry Andric const char *long_name = __kmp_affinity_format_table[i].long_name; 85860b57cec5SDimitry Andric char field_format = __kmp_affinity_format_table[i].field_format; 85870b57cec5SDimitry Andric if (parse_long_name) { 8588e8d8bef9SDimitry Andric size_t length = KMP_STRLEN(long_name); 85890b57cec5SDimitry Andric if (strncmp(*ptr, long_name, length) == 0) { 85900b57cec5SDimitry Andric found_valid_name = true; 85910b57cec5SDimitry Andric (*ptr) += length; // skip the long name 85920b57cec5SDimitry Andric } 85930b57cec5SDimitry Andric } else if (**ptr == short_name) { 85940b57cec5SDimitry Andric found_valid_name = true; 85950b57cec5SDimitry Andric (*ptr)++; // skip the short name 85960b57cec5SDimitry Andric } 85970b57cec5SDimitry Andric if (found_valid_name) { 85980b57cec5SDimitry Andric format[format_index++] = field_format; 85990b57cec5SDimitry Andric format[format_index++] = '\0'; 86000b57cec5SDimitry Andric absolute_short_name = short_name; 86010b57cec5SDimitry Andric break; 86020b57cec5SDimitry Andric } 86030b57cec5SDimitry Andric } 86040b57cec5SDimitry Andric if (parse_long_name) { 86050b57cec5SDimitry Andric if (**ptr != '}') { 86060b57cec5SDimitry Andric absolute_short_name = 0; 86070b57cec5SDimitry Andric } else { 86080b57cec5SDimitry Andric (*ptr)++; // skip over the right brace 86090b57cec5SDimitry Andric } 86100b57cec5SDimitry Andric } 86110b57cec5SDimitry Andric 86120b57cec5SDimitry Andric // Attempt to fill the buffer with the requested 86130b57cec5SDimitry Andric // value using snprintf within __kmp_str_buf_print() 86140b57cec5SDimitry Andric switch (absolute_short_name) { 86150b57cec5SDimitry Andric case 't': 86160b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num()); 86170b57cec5SDimitry Andric break; 86180b57cec5SDimitry Andric case 'T': 86190b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams()); 86200b57cec5SDimitry Andric break; 86210b57cec5SDimitry Andric case 'L': 86220b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level); 86230b57cec5SDimitry Andric break; 86240b57cec5SDimitry Andric case 'n': 86250b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid)); 86260b57cec5SDimitry Andric break; 86270b57cec5SDimitry Andric case 'H': { 86280b57cec5SDimitry Andric static const int BUFFER_SIZE = 256; 86290b57cec5SDimitry Andric char buf[BUFFER_SIZE]; 86300b57cec5SDimitry Andric __kmp_expand_host_name(buf, BUFFER_SIZE); 86310b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf); 86320b57cec5SDimitry Andric } break; 86330b57cec5SDimitry Andric case 'P': 86340b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, getpid()); 86350b57cec5SDimitry Andric break; 86360b57cec5SDimitry Andric case 'i': 86370b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()); 86380b57cec5SDimitry Andric break; 86390b57cec5SDimitry Andric case 'N': 86400b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc); 86410b57cec5SDimitry Andric break; 86420b57cec5SDimitry Andric case 'a': 86430b57cec5SDimitry Andric field_value = 86440b57cec5SDimitry Andric __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1); 86450b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, field_value); 86460b57cec5SDimitry Andric break; 86470b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 86480b57cec5SDimitry Andric case 'A': { 86490b57cec5SDimitry Andric kmp_str_buf_t buf; 86500b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 86510b57cec5SDimitry Andric __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); 86520b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf.str); 86530b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 86540b57cec5SDimitry Andric } break; 86550b57cec5SDimitry Andric #endif 86560b57cec5SDimitry Andric default: 86570b57cec5SDimitry Andric // According to spec, If an implementation does not have info for field 86580b57cec5SDimitry Andric // type, then "undefined" is printed 86590b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, "%s", "undefined"); 86600b57cec5SDimitry Andric // Skip the field 86610b57cec5SDimitry Andric if (parse_long_name) { 86620b57cec5SDimitry Andric SKIP_TOKEN(*ptr); 86630b57cec5SDimitry Andric if (**ptr == '}') 86640b57cec5SDimitry Andric (*ptr)++; 86650b57cec5SDimitry Andric } else { 86660b57cec5SDimitry Andric (*ptr)++; 86670b57cec5SDimitry Andric } 86680b57cec5SDimitry Andric } 86690b57cec5SDimitry Andric 86700b57cec5SDimitry Andric KMP_ASSERT(format_index <= FORMAT_SIZE); 86710b57cec5SDimitry Andric return rc; 86720b57cec5SDimitry Andric } 86730b57cec5SDimitry Andric 86740b57cec5SDimitry Andric /* 86750b57cec5SDimitry Andric * Return number of characters needed to hold the affinity string 86760b57cec5SDimitry Andric * (not including null byte character) 86770b57cec5SDimitry Andric * The resultant string is printed to buffer, which the caller can then 86780b57cec5SDimitry Andric * handle afterwards 86790b57cec5SDimitry Andric */ 86800b57cec5SDimitry Andric size_t __kmp_aux_capture_affinity(int gtid, const char *format, 86810b57cec5SDimitry Andric kmp_str_buf_t *buffer) { 86820b57cec5SDimitry Andric const char *parse_ptr; 86830b57cec5SDimitry Andric size_t retval; 86840b57cec5SDimitry Andric const kmp_info_t *th; 86850b57cec5SDimitry Andric kmp_str_buf_t field; 86860b57cec5SDimitry Andric 86870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(buffer); 86880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 86890b57cec5SDimitry Andric 86900b57cec5SDimitry Andric __kmp_str_buf_init(&field); 86910b57cec5SDimitry Andric __kmp_str_buf_clear(buffer); 86920b57cec5SDimitry Andric 86930b57cec5SDimitry Andric th = __kmp_threads[gtid]; 86940b57cec5SDimitry Andric retval = 0; 86950b57cec5SDimitry Andric 86960b57cec5SDimitry Andric // If format is NULL or zero-length string, then we use 86970b57cec5SDimitry Andric // affinity-format-var ICV 86980b57cec5SDimitry Andric parse_ptr = format; 86990b57cec5SDimitry Andric if (parse_ptr == NULL || *parse_ptr == '\0') { 87000b57cec5SDimitry Andric parse_ptr = __kmp_affinity_format; 87010b57cec5SDimitry Andric } 87020b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parse_ptr); 87030b57cec5SDimitry Andric 87040b57cec5SDimitry Andric while (*parse_ptr != '\0') { 87050b57cec5SDimitry Andric // Parse a field 87060b57cec5SDimitry Andric if (*parse_ptr == '%') { 87070b57cec5SDimitry Andric // Put field in the buffer 87080b57cec5SDimitry Andric int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field); 87090b57cec5SDimitry Andric __kmp_str_buf_catbuf(buffer, &field); 87100b57cec5SDimitry Andric retval += rc; 87110b57cec5SDimitry Andric } else { 87120b57cec5SDimitry Andric // Put literal character in buffer 87130b57cec5SDimitry Andric __kmp_str_buf_cat(buffer, parse_ptr, 1); 87140b57cec5SDimitry Andric retval++; 87150b57cec5SDimitry Andric parse_ptr++; 87160b57cec5SDimitry Andric } 87170b57cec5SDimitry Andric } 87180b57cec5SDimitry Andric __kmp_str_buf_free(&field); 87190b57cec5SDimitry Andric return retval; 87200b57cec5SDimitry Andric } 87210b57cec5SDimitry Andric 87220b57cec5SDimitry Andric // Displays the affinity string to stdout 87230b57cec5SDimitry Andric void __kmp_aux_display_affinity(int gtid, const char *format) { 87240b57cec5SDimitry Andric kmp_str_buf_t buf; 87250b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 87260b57cec5SDimitry Andric __kmp_aux_capture_affinity(gtid, format, &buf); 87270b57cec5SDimitry Andric __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str); 87280b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 87290b57cec5SDimitry Andric } 87300b57cec5SDimitry Andric 87310b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 87320b57cec5SDimitry Andric 87330b57cec5SDimitry Andric void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { 87340b57cec5SDimitry Andric int blocktime = arg; /* argument is in milliseconds */ 87350b57cec5SDimitry Andric #if KMP_USE_MONITOR 87360b57cec5SDimitry Andric int bt_intervals; 87370b57cec5SDimitry Andric #endif 8738e8d8bef9SDimitry Andric kmp_int8 bt_set; 87390b57cec5SDimitry Andric 87400b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 87410b57cec5SDimitry Andric 87420b57cec5SDimitry Andric /* Normalize and set blocktime for the teams */ 87430b57cec5SDimitry Andric if (blocktime < KMP_MIN_BLOCKTIME) 87440b57cec5SDimitry Andric blocktime = KMP_MIN_BLOCKTIME; 87450b57cec5SDimitry Andric else if (blocktime > KMP_MAX_BLOCKTIME) 87460b57cec5SDimitry Andric blocktime = KMP_MAX_BLOCKTIME; 87470b57cec5SDimitry Andric 87480b57cec5SDimitry Andric set__blocktime_team(thread->th.th_team, tid, blocktime); 87490b57cec5SDimitry Andric set__blocktime_team(thread->th.th_serial_team, 0, blocktime); 87500b57cec5SDimitry Andric 87510b57cec5SDimitry Andric #if KMP_USE_MONITOR 87520b57cec5SDimitry Andric /* Calculate and set blocktime intervals for the teams */ 87530b57cec5SDimitry Andric bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 87540b57cec5SDimitry Andric 87550b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_team, tid, bt_intervals); 87560b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals); 87570b57cec5SDimitry Andric #endif 87580b57cec5SDimitry Andric 87590b57cec5SDimitry Andric /* Set whether blocktime has been set to "TRUE" */ 87600b57cec5SDimitry Andric bt_set = TRUE; 87610b57cec5SDimitry Andric 87620b57cec5SDimitry Andric set__bt_set_team(thread->th.th_team, tid, bt_set); 87630b57cec5SDimitry Andric set__bt_set_team(thread->th.th_serial_team, 0, bt_set); 87640b57cec5SDimitry Andric #if KMP_USE_MONITOR 87650b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 87660b57cec5SDimitry Andric "bt_intervals=%d, monitor_updates=%d\n", 87670b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 87680b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, 87690b57cec5SDimitry Andric __kmp_monitor_wakeups)); 87700b57cec5SDimitry Andric #else 87710b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", 87720b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 87730b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime)); 87740b57cec5SDimitry Andric #endif 87750b57cec5SDimitry Andric } 87760b57cec5SDimitry Andric 8777e8d8bef9SDimitry Andric void __kmp_aux_set_defaults(char const *str, size_t len) { 87780b57cec5SDimitry Andric if (!__kmp_init_serial) { 87790b57cec5SDimitry Andric __kmp_serial_initialize(); 87800b57cec5SDimitry Andric } 87810b57cec5SDimitry Andric __kmp_env_initialize(str); 87820b57cec5SDimitry Andric 87830b57cec5SDimitry Andric if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) { 87840b57cec5SDimitry Andric __kmp_env_print(); 87850b57cec5SDimitry Andric } 87860b57cec5SDimitry Andric } // __kmp_aux_set_defaults 87870b57cec5SDimitry Andric 87880b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 87890b57cec5SDimitry Andric /* internal fast reduction routines */ 87900b57cec5SDimitry Andric 87910b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T 87920b57cec5SDimitry Andric __kmp_determine_reduction_method( 87930b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 87940b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 87950b57cec5SDimitry Andric kmp_critical_name *lck) { 87960b57cec5SDimitry Andric 87970b57cec5SDimitry Andric // Default reduction method: critical construct ( lck != NULL, like in current 87980b57cec5SDimitry Andric // PAROPT ) 87990b57cec5SDimitry Andric // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method 88000b57cec5SDimitry Andric // can be selected by RTL 88010b57cec5SDimitry Andric // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method 88020b57cec5SDimitry Andric // can be selected by RTL 88030b57cec5SDimitry Andric // Finally, it's up to OpenMP RTL to make a decision on which method to select 88040b57cec5SDimitry Andric // among generated by PAROPT. 88050b57cec5SDimitry Andric 88060b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T retval; 88070b57cec5SDimitry Andric 88080b57cec5SDimitry Andric int team_size; 88090b57cec5SDimitry Andric 88100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 ) 88110b57cec5SDimitry Andric 88120b57cec5SDimitry Andric #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 881381ad6265SDimitry Andric (loc && \ 881481ad6265SDimitry Andric ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))) 88150b57cec5SDimitry Andric #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 88160b57cec5SDimitry Andric 88170b57cec5SDimitry Andric retval = critical_reduce_block; 88180b57cec5SDimitry Andric 88190b57cec5SDimitry Andric // another choice of getting a team size (with 1 dynamic deference) is slower 88200b57cec5SDimitry Andric team_size = __kmp_get_team_num_threads(global_tid); 88210b57cec5SDimitry Andric if (team_size == 1) { 88220b57cec5SDimitry Andric 88230b57cec5SDimitry Andric retval = empty_reduce_block; 88240b57cec5SDimitry Andric 88250b57cec5SDimitry Andric } else { 88260b57cec5SDimitry Andric 88270b57cec5SDimitry Andric int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 88280b57cec5SDimitry Andric 8829489b1cf2SDimitry Andric #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ 8830bdd1243dSDimitry Andric KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 88310b57cec5SDimitry Andric 88320b57cec5SDimitry Andric #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ 88330b57cec5SDimitry Andric KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 88340b57cec5SDimitry Andric 88350b57cec5SDimitry Andric int teamsize_cutoff = 4; 88360b57cec5SDimitry Andric 88370b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 88380b57cec5SDimitry Andric if (__kmp_mic_type != non_mic) { 88390b57cec5SDimitry Andric teamsize_cutoff = 8; 88400b57cec5SDimitry Andric } 88410b57cec5SDimitry Andric #endif 88420b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 88430b57cec5SDimitry Andric if (tree_available) { 88440b57cec5SDimitry Andric if (team_size <= teamsize_cutoff) { 88450b57cec5SDimitry Andric if (atomic_available) { 88460b57cec5SDimitry Andric retval = atomic_reduce_block; 88470b57cec5SDimitry Andric } 88480b57cec5SDimitry Andric } else { 88490b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 88500b57cec5SDimitry Andric } 88510b57cec5SDimitry Andric } else if (atomic_available) { 88520b57cec5SDimitry Andric retval = atomic_reduce_block; 88530b57cec5SDimitry Andric } 88540b57cec5SDimitry Andric #else 88550b57cec5SDimitry Andric #error "Unknown or unsupported OS" 88560b57cec5SDimitry Andric #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || 88570b57cec5SDimitry Andric // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 88580b57cec5SDimitry Andric 88590b57cec5SDimitry Andric #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 88600b57cec5SDimitry Andric 8861b121cb00SDimitry Andric #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ 8862b121cb00SDimitry Andric KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD 88630b57cec5SDimitry Andric 88640b57cec5SDimitry Andric // basic tuning 88650b57cec5SDimitry Andric 88660b57cec5SDimitry Andric if (atomic_available) { 88670b57cec5SDimitry Andric if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ??? 88680b57cec5SDimitry Andric retval = atomic_reduce_block; 88690b57cec5SDimitry Andric } 88700b57cec5SDimitry Andric } // otherwise: use critical section 88710b57cec5SDimitry Andric 88720b57cec5SDimitry Andric #elif KMP_OS_DARWIN 88730b57cec5SDimitry Andric 88740b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 88750b57cec5SDimitry Andric if (atomic_available && (num_vars <= 3)) { 88760b57cec5SDimitry Andric retval = atomic_reduce_block; 88770b57cec5SDimitry Andric } else if (tree_available) { 88780b57cec5SDimitry Andric if ((reduce_size > (9 * sizeof(kmp_real64))) && 88790b57cec5SDimitry Andric (reduce_size < (2000 * sizeof(kmp_real64)))) { 88800b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 88810b57cec5SDimitry Andric } 88820b57cec5SDimitry Andric } // otherwise: use critical section 88830b57cec5SDimitry Andric 88840b57cec5SDimitry Andric #else 88850b57cec5SDimitry Andric #error "Unknown or unsupported OS" 88860b57cec5SDimitry Andric #endif 88870b57cec5SDimitry Andric 88880b57cec5SDimitry Andric #else 88890b57cec5SDimitry Andric #error "Unknown or unsupported architecture" 88900b57cec5SDimitry Andric #endif 88910b57cec5SDimitry Andric } 88920b57cec5SDimitry Andric 88930b57cec5SDimitry Andric // KMP_FORCE_REDUCTION 88940b57cec5SDimitry Andric 88950b57cec5SDimitry Andric // If the team is serialized (team_size == 1), ignore the forced reduction 88960b57cec5SDimitry Andric // method and stay with the unsynchronized method (empty_reduce_block) 88970b57cec5SDimitry Andric if (__kmp_force_reduction_method != reduction_method_not_defined && 88980b57cec5SDimitry Andric team_size != 1) { 88990b57cec5SDimitry Andric 89000b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; 89010b57cec5SDimitry Andric 89020b57cec5SDimitry Andric int atomic_available, tree_available; 89030b57cec5SDimitry Andric 89040b57cec5SDimitry Andric switch ((forced_retval = __kmp_force_reduction_method)) { 89050b57cec5SDimitry Andric case critical_reduce_block: 89060b57cec5SDimitry Andric KMP_ASSERT(lck); // lck should be != 0 89070b57cec5SDimitry Andric break; 89080b57cec5SDimitry Andric 89090b57cec5SDimitry Andric case atomic_reduce_block: 89100b57cec5SDimitry Andric atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 89110b57cec5SDimitry Andric if (!atomic_available) { 89120b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "atomic"); 89130b57cec5SDimitry Andric forced_retval = critical_reduce_block; 89140b57cec5SDimitry Andric } 89150b57cec5SDimitry Andric break; 89160b57cec5SDimitry Andric 89170b57cec5SDimitry Andric case tree_reduce_block: 89180b57cec5SDimitry Andric tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 89190b57cec5SDimitry Andric if (!tree_available) { 89200b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "tree"); 89210b57cec5SDimitry Andric forced_retval = critical_reduce_block; 89220b57cec5SDimitry Andric } else { 89230b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 89240b57cec5SDimitry Andric forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 89250b57cec5SDimitry Andric #endif 89260b57cec5SDimitry Andric } 89270b57cec5SDimitry Andric break; 89280b57cec5SDimitry Andric 89290b57cec5SDimitry Andric default: 89300b57cec5SDimitry Andric KMP_ASSERT(0); // "unsupported method specified" 89310b57cec5SDimitry Andric } 89320b57cec5SDimitry Andric 89330b57cec5SDimitry Andric retval = forced_retval; 89340b57cec5SDimitry Andric } 89350b57cec5SDimitry Andric 89360b57cec5SDimitry Andric KA_TRACE(10, ("reduction method selected=%08x\n", retval)); 89370b57cec5SDimitry Andric 89380b57cec5SDimitry Andric #undef FAST_REDUCTION_TREE_METHOD_GENERATED 89390b57cec5SDimitry Andric #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 89400b57cec5SDimitry Andric 89410b57cec5SDimitry Andric return (retval); 89420b57cec5SDimitry Andric } 89430b57cec5SDimitry Andric // this function is for testing set/get/determine reduce method 89440b57cec5SDimitry Andric kmp_int32 __kmp_get_reduce_method(void) { 89450b57cec5SDimitry Andric return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8); 89460b57cec5SDimitry Andric } 89470b57cec5SDimitry Andric 89480b57cec5SDimitry Andric // Soft pause sets up threads to ignore blocktime and just go to sleep. 89490b57cec5SDimitry Andric // Spin-wait code checks __kmp_pause_status and reacts accordingly. 89500b57cec5SDimitry Andric void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; } 89510b57cec5SDimitry Andric 89520b57cec5SDimitry Andric // Hard pause shuts down the runtime completely. Resume happens naturally when 89530b57cec5SDimitry Andric // OpenMP is used subsequently. 89540b57cec5SDimitry Andric void __kmp_hard_pause() { 89550b57cec5SDimitry Andric __kmp_pause_status = kmp_hard_paused; 89560b57cec5SDimitry Andric __kmp_internal_end_thread(-1); 89570b57cec5SDimitry Andric } 89580b57cec5SDimitry Andric 89590b57cec5SDimitry Andric // Soft resume sets __kmp_pause_status, and wakes up all threads. 89600b57cec5SDimitry Andric void __kmp_resume_if_soft_paused() { 89610b57cec5SDimitry Andric if (__kmp_pause_status == kmp_soft_paused) { 89620b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 89630b57cec5SDimitry Andric 89640b57cec5SDimitry Andric for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) { 89650b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 89660b57cec5SDimitry Andric if (thread) { // Wake it if sleeping 8967e8d8bef9SDimitry Andric kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, 8968e8d8bef9SDimitry Andric thread); 89690b57cec5SDimitry Andric if (fl.is_sleeping()) 89700b57cec5SDimitry Andric fl.resume(gtid); 89710b57cec5SDimitry Andric else if (__kmp_try_suspend_mx(thread)) { // got suspend lock 89720b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep 89730b57cec5SDimitry Andric } else { // thread holds the lock and may sleep soon 89740b57cec5SDimitry Andric do { // until either the thread sleeps, or we can get the lock 89750b57cec5SDimitry Andric if (fl.is_sleeping()) { 89760b57cec5SDimitry Andric fl.resume(gtid); 89770b57cec5SDimitry Andric break; 89780b57cec5SDimitry Andric } else if (__kmp_try_suspend_mx(thread)) { 89790b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); 89800b57cec5SDimitry Andric break; 89810b57cec5SDimitry Andric } 89820b57cec5SDimitry Andric } while (1); 89830b57cec5SDimitry Andric } 89840b57cec5SDimitry Andric } 89850b57cec5SDimitry Andric } 89860b57cec5SDimitry Andric } 89870b57cec5SDimitry Andric } 89880b57cec5SDimitry Andric 89890b57cec5SDimitry Andric // This function is called via __kmpc_pause_resource. Returns 0 if successful. 89900b57cec5SDimitry Andric // TODO: add warning messages 89910b57cec5SDimitry Andric int __kmp_pause_resource(kmp_pause_status_t level) { 89920b57cec5SDimitry Andric if (level == kmp_not_paused) { // requesting resume 89930b57cec5SDimitry Andric if (__kmp_pause_status == kmp_not_paused) { 89940b57cec5SDimitry Andric // error message about runtime not being paused, so can't resume 89950b57cec5SDimitry Andric return 1; 89960b57cec5SDimitry Andric } else { 89970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused || 89980b57cec5SDimitry Andric __kmp_pause_status == kmp_hard_paused); 89990b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 90000b57cec5SDimitry Andric return 0; 90010b57cec5SDimitry Andric } 90020b57cec5SDimitry Andric } else if (level == kmp_soft_paused) { // requesting soft pause 90030b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 90040b57cec5SDimitry Andric // error message about already being paused 90050b57cec5SDimitry Andric return 1; 90060b57cec5SDimitry Andric } else { 90070b57cec5SDimitry Andric __kmp_soft_pause(); 90080b57cec5SDimitry Andric return 0; 90090b57cec5SDimitry Andric } 90100b57cec5SDimitry Andric } else if (level == kmp_hard_paused) { // requesting hard pause 90110b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 90120b57cec5SDimitry Andric // error message about already being paused 90130b57cec5SDimitry Andric return 1; 90140b57cec5SDimitry Andric } else { 90150b57cec5SDimitry Andric __kmp_hard_pause(); 90160b57cec5SDimitry Andric return 0; 90170b57cec5SDimitry Andric } 90180b57cec5SDimitry Andric } else { 90190b57cec5SDimitry Andric // error message about invalid level 90200b57cec5SDimitry Andric return 1; 90210b57cec5SDimitry Andric } 90220b57cec5SDimitry Andric } 90235ffd83dbSDimitry Andric 90245ffd83dbSDimitry Andric void __kmp_omp_display_env(int verbose) { 90255ffd83dbSDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 90265ffd83dbSDimitry Andric if (__kmp_init_serial == 0) 90275ffd83dbSDimitry Andric __kmp_do_serial_initialize(); 90285ffd83dbSDimitry Andric __kmp_display_env_impl(!verbose, verbose); 90295ffd83dbSDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 90305ffd83dbSDimitry Andric } 9031e8d8bef9SDimitry Andric 9032349cc55cSDimitry Andric // The team size is changing, so distributed barrier must be modified 9033349cc55cSDimitry Andric void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, 9034349cc55cSDimitry Andric int new_nthreads) { 9035349cc55cSDimitry Andric KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == 9036349cc55cSDimitry Andric bp_dist_bar); 9037349cc55cSDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 9038349cc55cSDimitry Andric 9039349cc55cSDimitry Andric // We want all the workers to stop waiting on the barrier while we adjust the 9040349cc55cSDimitry Andric // size of the team. 9041349cc55cSDimitry Andric for (int f = 1; f < old_nthreads; ++f) { 9042349cc55cSDimitry Andric KMP_DEBUG_ASSERT(other_threads[f] != NULL); 9043349cc55cSDimitry Andric // Ignore threads that are already inactive or not present in the team 9044349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) { 9045349cc55cSDimitry Andric // teams construct causes thread_limit to get passed in, and some of 9046349cc55cSDimitry Andric // those could be inactive; just ignore them 9047349cc55cSDimitry Andric continue; 9048349cc55cSDimitry Andric } 9049349cc55cSDimitry Andric // If thread is transitioning still to in_use state, wait for it 9050349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) { 9051349cc55cSDimitry Andric while (team->t.t_threads[f]->th.th_used_in_team.load() == 3) 9052349cc55cSDimitry Andric KMP_CPU_PAUSE(); 9053349cc55cSDimitry Andric } 9054349cc55cSDimitry Andric // The thread should be in_use now 9055349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1); 9056349cc55cSDimitry Andric // Transition to unused state 9057349cc55cSDimitry Andric team->t.t_threads[f]->th.th_used_in_team.store(2); 9058349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2); 9059349cc55cSDimitry Andric } 9060349cc55cSDimitry Andric // Release all the workers 906181ad6265SDimitry Andric team->t.b->go_release(); 9062349cc55cSDimitry Andric 9063349cc55cSDimitry Andric KMP_MFENCE(); 9064349cc55cSDimitry Andric 9065349cc55cSDimitry Andric // Workers should see transition status 2 and move to 0; but may need to be 9066349cc55cSDimitry Andric // woken up first 9067349cc55cSDimitry Andric int count = old_nthreads - 1; 9068349cc55cSDimitry Andric while (count > 0) { 9069349cc55cSDimitry Andric count = old_nthreads - 1; 9070349cc55cSDimitry Andric for (int f = 1; f < old_nthreads; ++f) { 9071349cc55cSDimitry Andric if (other_threads[f]->th.th_used_in_team.load() != 0) { 9072349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers 9073349cc55cSDimitry Andric kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST( 9074349cc55cSDimitry Andric void *, other_threads[f]->th.th_sleep_loc); 9075349cc55cSDimitry Andric __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag); 9076349cc55cSDimitry Andric } 9077349cc55cSDimitry Andric } else { 9078349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0); 9079349cc55cSDimitry Andric count--; 9080349cc55cSDimitry Andric } 9081349cc55cSDimitry Andric } 9082349cc55cSDimitry Andric } 9083349cc55cSDimitry Andric // Now update the barrier size 9084349cc55cSDimitry Andric team->t.b->update_num_threads(new_nthreads); 9085349cc55cSDimitry Andric team->t.b->go_reset(); 9086349cc55cSDimitry Andric } 9087349cc55cSDimitry Andric 9088349cc55cSDimitry Andric void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) { 9089349cc55cSDimitry Andric // Add the threads back to the team 9090349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team); 9091349cc55cSDimitry Andric // Threads were paused and pointed at th_used_in_team temporarily during a 9092349cc55cSDimitry Andric // resize of the team. We're going to set th_used_in_team to 3 to indicate to 9093349cc55cSDimitry Andric // the thread that it should transition itself back into the team. Then, if 9094349cc55cSDimitry Andric // blocktime isn't infinite, the thread could be sleeping, so we send a resume 9095349cc55cSDimitry Andric // to wake it up. 9096349cc55cSDimitry Andric for (int f = 1; f < new_nthreads; ++f) { 9097349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 9098349cc55cSDimitry Andric KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0, 9099349cc55cSDimitry Andric 3); 9100349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads 9101349cc55cSDimitry Andric __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid, 9102349cc55cSDimitry Andric (kmp_flag_32<false, false> *)NULL); 9103349cc55cSDimitry Andric } 9104349cc55cSDimitry Andric } 9105349cc55cSDimitry Andric // The threads should be transitioning to the team; when they are done, they 9106349cc55cSDimitry Andric // should have set th_used_in_team to 1. This loop forces master to wait until 9107349cc55cSDimitry Andric // all threads have moved into the team and are waiting in the barrier. 9108349cc55cSDimitry Andric int count = new_nthreads - 1; 9109349cc55cSDimitry Andric while (count > 0) { 9110349cc55cSDimitry Andric count = new_nthreads - 1; 9111349cc55cSDimitry Andric for (int f = 1; f < new_nthreads; ++f) { 9112349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) { 9113349cc55cSDimitry Andric count--; 9114349cc55cSDimitry Andric } 9115349cc55cSDimitry Andric } 9116349cc55cSDimitry Andric } 9117349cc55cSDimitry Andric } 9118349cc55cSDimitry Andric 9119e8d8bef9SDimitry Andric // Globals and functions for hidden helper task 9120e8d8bef9SDimitry Andric kmp_info_t **__kmp_hidden_helper_threads; 9121e8d8bef9SDimitry Andric kmp_info_t *__kmp_hidden_helper_main_thread; 9122e8d8bef9SDimitry Andric std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks; 9123e8d8bef9SDimitry Andric #if KMP_OS_LINUX 9124fe6060f1SDimitry Andric kmp_int32 __kmp_hidden_helper_threads_num = 8; 9125e8d8bef9SDimitry Andric kmp_int32 __kmp_enable_hidden_helper = TRUE; 9126e8d8bef9SDimitry Andric #else 9127fe6060f1SDimitry Andric kmp_int32 __kmp_hidden_helper_threads_num = 0; 9128e8d8bef9SDimitry Andric kmp_int32 __kmp_enable_hidden_helper = FALSE; 9129e8d8bef9SDimitry Andric #endif 9130e8d8bef9SDimitry Andric 9131e8d8bef9SDimitry Andric namespace { 9132e8d8bef9SDimitry Andric std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num; 9133e8d8bef9SDimitry Andric 9134e8d8bef9SDimitry Andric void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) { 9135e8d8bef9SDimitry Andric // This is an explicit synchronization on all hidden helper threads in case 9136e8d8bef9SDimitry Andric // that when a regular thread pushes a hidden helper task to one hidden 9137e8d8bef9SDimitry Andric // helper thread, the thread has not been awaken once since they're released 9138e8d8bef9SDimitry Andric // by the main thread after creating the team. 9139e8d8bef9SDimitry Andric KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num); 9140e8d8bef9SDimitry Andric while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) != 9141e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_num) 9142e8d8bef9SDimitry Andric ; 9143e8d8bef9SDimitry Andric 9144e8d8bef9SDimitry Andric // If main thread, then wait for signal 9145e8d8bef9SDimitry Andric if (__kmpc_master(nullptr, *gtid)) { 9146e8d8bef9SDimitry Andric // First, unset the initial state and release the initial thread 9147e8d8bef9SDimitry Andric TCW_4(__kmp_init_hidden_helper_threads, FALSE); 9148e8d8bef9SDimitry Andric __kmp_hidden_helper_initz_release(); 9149e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread_wait(); 9150e8d8bef9SDimitry Andric // Now wake up all worker threads 9151e8d8bef9SDimitry Andric for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) { 9152e8d8bef9SDimitry Andric __kmp_hidden_helper_worker_thread_signal(); 9153e8d8bef9SDimitry Andric } 9154e8d8bef9SDimitry Andric } 9155e8d8bef9SDimitry Andric } 9156e8d8bef9SDimitry Andric } // namespace 9157e8d8bef9SDimitry Andric 9158e8d8bef9SDimitry Andric void __kmp_hidden_helper_threads_initz_routine() { 9159e8d8bef9SDimitry Andric // Create a new root for hidden helper team/threads 9160e8d8bef9SDimitry Andric const int gtid = __kmp_register_root(TRUE); 9161e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread = __kmp_threads[gtid]; 9162e8d8bef9SDimitry Andric __kmp_hidden_helper_threads = &__kmp_threads[gtid]; 9163e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread->th.th_set_nproc = 9164e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_num; 9165e8d8bef9SDimitry Andric 9166e8d8bef9SDimitry Andric KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0); 9167e8d8bef9SDimitry Andric 9168e8d8bef9SDimitry Andric __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn); 9169e8d8bef9SDimitry Andric 9170e8d8bef9SDimitry Andric // Set the initialization flag to FALSE 9171e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper, FALSE); 9172e8d8bef9SDimitry Andric 9173e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_deinitz_release(); 9174e8d8bef9SDimitry Andric } 9175fe6060f1SDimitry Andric 9176fe6060f1SDimitry Andric /* Nesting Mode: 9177fe6060f1SDimitry Andric Set via KMP_NESTING_MODE, which takes an integer. 9178fe6060f1SDimitry Andric Note: we skip duplicate topology levels, and skip levels with only 9179fe6060f1SDimitry Andric one entity. 9180fe6060f1SDimitry Andric KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode. 9181fe6060f1SDimitry Andric KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels 9182fe6060f1SDimitry Andric in the topology, and initializes the number of threads at each of those 9183fe6060f1SDimitry Andric levels to the number of entities at each level, respectively, below the 9184fe6060f1SDimitry Andric entity at the parent level. 9185fe6060f1SDimitry Andric KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels, 9186fe6060f1SDimitry Andric but starts with nesting OFF -- max-active-levels-var is 1 -- and requires 9187fe6060f1SDimitry Andric the user to turn nesting on explicitly. This is an even more experimental 9188fe6060f1SDimitry Andric option to this experimental feature, and may change or go away in the 9189fe6060f1SDimitry Andric future. 9190fe6060f1SDimitry Andric */ 9191fe6060f1SDimitry Andric 9192fe6060f1SDimitry Andric // Allocate space to store nesting levels 9193fe6060f1SDimitry Andric void __kmp_init_nesting_mode() { 9194fe6060f1SDimitry Andric int levels = KMP_HW_LAST; 9195fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = levels; 9196fe6060f1SDimitry Andric __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int)); 9197fe6060f1SDimitry Andric for (int i = 0; i < levels; ++i) 9198fe6060f1SDimitry Andric __kmp_nesting_nth_level[i] = 0; 9199fe6060f1SDimitry Andric if (__kmp_nested_nth.size < levels) { 9200fe6060f1SDimitry Andric __kmp_nested_nth.nth = 9201fe6060f1SDimitry Andric (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int)); 9202fe6060f1SDimitry Andric __kmp_nested_nth.size = levels; 9203fe6060f1SDimitry Andric } 9204fe6060f1SDimitry Andric } 9205fe6060f1SDimitry Andric 9206fe6060f1SDimitry Andric // Set # threads for top levels of nesting; must be called after topology set 9207fe6060f1SDimitry Andric void __kmp_set_nesting_mode_threads() { 9208fe6060f1SDimitry Andric kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()]; 9209fe6060f1SDimitry Andric 9210fe6060f1SDimitry Andric if (__kmp_nesting_mode == 1) 9211fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 9212fe6060f1SDimitry Andric else if (__kmp_nesting_mode > 1) 9213fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = __kmp_nesting_mode; 9214fe6060f1SDimitry Andric 9215fe6060f1SDimitry Andric if (__kmp_topology) { // use topology info 9216fe6060f1SDimitry Andric int loc, hw_level; 9217fe6060f1SDimitry Andric for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() && 9218fe6060f1SDimitry Andric loc < __kmp_nesting_mode_nlevels; 9219fe6060f1SDimitry Andric loc++, hw_level++) { 9220fe6060f1SDimitry Andric __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level); 9221fe6060f1SDimitry Andric if (__kmp_nesting_nth_level[loc] == 1) 9222fe6060f1SDimitry Andric loc--; 9223fe6060f1SDimitry Andric } 9224fe6060f1SDimitry Andric // Make sure all cores are used 9225fe6060f1SDimitry Andric if (__kmp_nesting_mode > 1 && loc > 1) { 9226fe6060f1SDimitry Andric int core_level = __kmp_topology->get_level(KMP_HW_CORE); 9227fe6060f1SDimitry Andric int num_cores = __kmp_topology->get_count(core_level); 9228fe6060f1SDimitry Andric int upper_levels = 1; 9229fe6060f1SDimitry Andric for (int level = 0; level < loc - 1; ++level) 9230fe6060f1SDimitry Andric upper_levels *= __kmp_nesting_nth_level[level]; 9231fe6060f1SDimitry Andric if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores) 9232fe6060f1SDimitry Andric __kmp_nesting_nth_level[loc - 1] = 9233fe6060f1SDimitry Andric num_cores / __kmp_nesting_nth_level[loc - 2]; 9234fe6060f1SDimitry Andric } 9235fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = loc; 9236fe6060f1SDimitry Andric __kmp_nested_nth.used = __kmp_nesting_mode_nlevels; 9237fe6060f1SDimitry Andric } else { // no topology info available; provide a reasonable guesstimation 9238fe6060f1SDimitry Andric if (__kmp_avail_proc >= 4) { 9239fe6060f1SDimitry Andric __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2; 9240fe6060f1SDimitry Andric __kmp_nesting_nth_level[1] = 2; 9241fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = 2; 9242fe6060f1SDimitry Andric } else { 9243fe6060f1SDimitry Andric __kmp_nesting_nth_level[0] = __kmp_avail_proc; 9244fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = 1; 9245fe6060f1SDimitry Andric } 9246fe6060f1SDimitry Andric __kmp_nested_nth.used = __kmp_nesting_mode_nlevels; 9247fe6060f1SDimitry Andric } 9248fe6060f1SDimitry Andric for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) { 9249fe6060f1SDimitry Andric __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i]; 9250fe6060f1SDimitry Andric } 9251fe6060f1SDimitry Andric set__nproc(thread, __kmp_nesting_nth_level[0]); 9252fe6060f1SDimitry Andric if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode) 9253fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = __kmp_nesting_mode; 9254fe6060f1SDimitry Andric if (get__max_active_levels(thread) > 1) { 9255fe6060f1SDimitry Andric // if max levels was set, set nesting mode levels to same 9256fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = get__max_active_levels(thread); 9257fe6060f1SDimitry Andric } 9258fe6060f1SDimitry Andric if (__kmp_nesting_mode == 1) // turn on nesting for this case only 9259fe6060f1SDimitry Andric set__max_active_levels(thread, __kmp_nesting_mode_nlevels); 9260fe6060f1SDimitry Andric } 9261bdd1243dSDimitry Andric 9262bdd1243dSDimitry Andric // Empty symbols to export (see exports_so.txt) when feature is disabled 9263bdd1243dSDimitry Andric extern "C" { 9264bdd1243dSDimitry Andric #if !KMP_STATS_ENABLED 9265bdd1243dSDimitry Andric void __kmp_reset_stats() {} 9266bdd1243dSDimitry Andric #endif 9267bdd1243dSDimitry Andric #if !USE_DEBUGGER 9268bdd1243dSDimitry Andric int __kmp_omp_debug_struct_info = FALSE; 9269bdd1243dSDimitry Andric int __kmp_debugging = FALSE; 9270bdd1243dSDimitry Andric #endif 9271bdd1243dSDimitry Andric #if !USE_ITT_BUILD || !USE_ITT_NOTIFY 9272bdd1243dSDimitry Andric void __kmp_itt_fini_ittlib() {} 9273bdd1243dSDimitry Andric void __kmp_itt_init_ittlib() {} 9274bdd1243dSDimitry Andric #endif 9275bdd1243dSDimitry Andric } 9276bdd1243dSDimitry Andric 9277bdd1243dSDimitry Andric // end of file 9278