10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_runtime.cpp -- KPTS runtime support library 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "kmp.h" 140b57cec5SDimitry Andric #include "kmp_affinity.h" 150b57cec5SDimitry Andric #include "kmp_atomic.h" 160b57cec5SDimitry Andric #include "kmp_environment.h" 170b57cec5SDimitry Andric #include "kmp_error.h" 180b57cec5SDimitry Andric #include "kmp_i18n.h" 190b57cec5SDimitry Andric #include "kmp_io.h" 200b57cec5SDimitry Andric #include "kmp_itt.h" 210b57cec5SDimitry Andric #include "kmp_settings.h" 220b57cec5SDimitry Andric #include "kmp_stats.h" 230b57cec5SDimitry Andric #include "kmp_str.h" 240b57cec5SDimitry Andric #include "kmp_wait_release.h" 250b57cec5SDimitry Andric #include "kmp_wrapper_getpid.h" 260b57cec5SDimitry Andric #include "kmp_dispatch.h" 270b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 280b57cec5SDimitry Andric #include "kmp_dispatch_hier.h" 290b57cec5SDimitry Andric #endif 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #if OMPT_SUPPORT 320b57cec5SDimitry Andric #include "ompt-specific.h" 330b57cec5SDimitry Andric #endif 34fe6060f1SDimitry Andric #if OMPD_SUPPORT 35fe6060f1SDimitry Andric #include "ompd-specific.h" 36fe6060f1SDimitry Andric #endif 370b57cec5SDimitry Andric 38d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 39e8d8bef9SDimitry Andric #include "llvm/Support/TimeProfiler.h" 40e8d8bef9SDimitry Andric static char *ProfileTraceFile = nullptr; 41e8d8bef9SDimitry Andric #endif 42e8d8bef9SDimitry Andric 430b57cec5SDimitry Andric /* these are temporary issues to be dealt with */ 440b57cec5SDimitry Andric #define KMP_USE_PRCTL 0 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric #if KMP_OS_WINDOWS 470b57cec5SDimitry Andric #include <process.h> 480b57cec5SDimitry Andric #endif 490b57cec5SDimitry Andric 50e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 51e8d8bef9SDimitry Andric // windows does not need include files as it doesn't use shared memory 52e8d8bef9SDimitry Andric #else 53e8d8bef9SDimitry Andric #include <sys/mman.h> 54e8d8bef9SDimitry Andric #include <sys/stat.h> 55e8d8bef9SDimitry Andric #include <fcntl.h> 56e8d8bef9SDimitry Andric #define SHM_SIZE 1024 57e8d8bef9SDimitry Andric #endif 58e8d8bef9SDimitry Andric 590b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 600b57cec5SDimitry Andric char const __kmp_version_alt_comp[] = 610b57cec5SDimitry Andric KMP_VERSION_PREFIX "alternative compiler support: yes"; 620b57cec5SDimitry Andric #endif /* defined(KMP_GOMP_COMPAT) */ 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric char const __kmp_version_omp_api[] = 650b57cec5SDimitry Andric KMP_VERSION_PREFIX "API version: 5.0 (201611)"; 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric #ifdef KMP_DEBUG 680b57cec5SDimitry Andric char const __kmp_version_lock[] = 690b57cec5SDimitry Andric KMP_VERSION_PREFIX "lock type: run time selectable"; 700b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric #if KMP_USE_MONITOR 770b57cec5SDimitry Andric kmp_info_t __kmp_monitor; 780b57cec5SDimitry Andric #endif 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /* Forward declarations */ 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric void __kmp_cleanup(void); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, 850b57cec5SDimitry Andric int gtid); 860b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 870b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 880b57cec5SDimitry Andric ident_t *loc); 890b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 900b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, 910b57cec5SDimitry Andric int update_master_only = 0); 920b57cec5SDimitry Andric #endif 930b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void); 940b57cec5SDimitry Andric void __kmp_fork_barrier(int gtid, int tid); 950b57cec5SDimitry Andric void __kmp_join_barrier(int gtid); 960b57cec5SDimitry Andric void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, 970b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, ident_t *loc); 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 1000b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc); 1010b57cec5SDimitry Andric #endif 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed); 1040b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1050b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid); 1060b57cec5SDimitry Andric #endif 1070b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root); 1080b57cec5SDimitry Andric kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 1090b57cec5SDimitry Andric 110349cc55cSDimitry Andric void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, 111349cc55cSDimitry Andric int new_nthreads); 112349cc55cSDimitry Andric void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads); 113349cc55cSDimitry Andric 1140b57cec5SDimitry Andric /* Calculate the identifier of the current thread */ 1150b57cec5SDimitry Andric /* fast (and somewhat portable) way to get unique identifier of executing 1160b57cec5SDimitry Andric thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ 1170b57cec5SDimitry Andric int __kmp_get_global_thread_id() { 1180b57cec5SDimitry Andric int i; 1190b57cec5SDimitry Andric kmp_info_t **other_threads; 1200b57cec5SDimitry Andric size_t stack_data; 1210b57cec5SDimitry Andric char *stack_addr; 1220b57cec5SDimitry Andric size_t stack_size; 1230b57cec5SDimitry Andric char *stack_base; 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric KA_TRACE( 1260b57cec5SDimitry Andric 1000, 1270b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", 1280b57cec5SDimitry Andric __kmp_nth, __kmp_all_nth)); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to 1310b57cec5SDimitry Andric a parallel region, made it return KMP_GTID_DNE to force serial_initialize 1320b57cec5SDimitry Andric by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 1330b57cec5SDimitry Andric __kmp_init_gtid for this to work. */ 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric if (!TCR_4(__kmp_init_gtid)) 1360b57cec5SDimitry Andric return KMP_GTID_DNE; 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 1390b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 1400b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n")); 1410b57cec5SDimitry Andric return __kmp_gtid; 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric #endif 1440b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 1450b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n")); 1460b57cec5SDimitry Andric return __kmp_gtid_get_specific(); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n")); 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric stack_addr = (char *)&stack_data; 1510b57cec5SDimitry Andric other_threads = __kmp_threads; 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric /* ATT: The code below is a source of potential bugs due to unsynchronized 1540b57cec5SDimitry Andric access to __kmp_threads array. For example: 1550b57cec5SDimitry Andric 1. Current thread loads other_threads[i] to thr and checks it, it is 1560b57cec5SDimitry Andric non-NULL. 1570b57cec5SDimitry Andric 2. Current thread is suspended by OS. 1580b57cec5SDimitry Andric 3. Another thread unregisters and finishes (debug versions of free() 1590b57cec5SDimitry Andric may fill memory with something like 0xEF). 1600b57cec5SDimitry Andric 4. Current thread is resumed. 1610b57cec5SDimitry Andric 5. Current thread reads junk from *thr. 1620b57cec5SDimitry Andric TODO: Fix it. --ln */ 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 1670b57cec5SDimitry Andric if (!thr) 1680b57cec5SDimitry Andric continue; 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 1710b57cec5SDimitry Andric stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric /* stack grows down -- search through all of the active threads */ 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric if (stack_addr <= stack_base) { 1760b57cec5SDimitry Andric size_t stack_diff = stack_base - stack_addr; 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric if (stack_diff <= stack_size) { 1790b57cec5SDimitry Andric /* The only way we can be closer than the allocated */ 1800b57cec5SDimitry Andric /* stack size is if we are running on this thread. */ 1810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i); 1820b57cec5SDimitry Andric return i; 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric /* get specific to try and determine our gtid */ 1880b57cec5SDimitry Andric KA_TRACE(1000, 1890b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: internal alg. failed to find " 1900b57cec5SDimitry Andric "thread, using TLS\n")); 1910b57cec5SDimitry Andric i = __kmp_gtid_get_specific(); 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric /* if we havn't been assigned a gtid, then return code */ 1960b57cec5SDimitry Andric if (i < 0) 1970b57cec5SDimitry Andric return i; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /* dynamically updated stack window for uber threads to avoid get_specific 2000b57cec5SDimitry Andric call */ 2010b57cec5SDimitry Andric if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) { 2020b57cec5SDimitry Andric KMP_FATAL(StackOverflow, i); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 2060b57cec5SDimitry Andric if (stack_addr > stack_base) { 2070b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 2080b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 2090b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - 2100b57cec5SDimitry Andric stack_base); 2110b57cec5SDimitry Andric } else { 2120b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 2130b57cec5SDimitry Andric stack_base - stack_addr); 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric /* Reprint stack bounds for ubermaster since they have been refined */ 2170b57cec5SDimitry Andric if (__kmp_storage_map) { 2180b57cec5SDimitry Andric char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 2190b57cec5SDimitry Andric char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 2200b57cec5SDimitry Andric __kmp_print_storage_map_gtid(i, stack_beg, stack_end, 2210b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize, 2220b57cec5SDimitry Andric "th_%d stack (refinement)", i); 2230b57cec5SDimitry Andric } 2240b57cec5SDimitry Andric return i; 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric int __kmp_get_global_thread_id_reg() { 2280b57cec5SDimitry Andric int gtid; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric if (!__kmp_init_serial) { 2310b57cec5SDimitry Andric gtid = KMP_GTID_DNE; 2320b57cec5SDimitry Andric } else 2330b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 2340b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 2350b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n")); 2360b57cec5SDimitry Andric gtid = __kmp_gtid; 2370b57cec5SDimitry Andric } else 2380b57cec5SDimitry Andric #endif 2390b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 2400b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n")); 2410b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2420b57cec5SDimitry Andric } else { 2430b57cec5SDimitry Andric KA_TRACE(1000, 2440b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id_reg: using internal alg.\n")); 2450b57cec5SDimitry Andric gtid = __kmp_get_global_thread_id(); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric /* we must be a new uber master sibling thread */ 2490b57cec5SDimitry Andric if (gtid == KMP_GTID_DNE) { 2500b57cec5SDimitry Andric KA_TRACE(10, 2510b57cec5SDimitry Andric ("__kmp_get_global_thread_id_reg: Encountered new root thread. " 2520b57cec5SDimitry Andric "Registering a new gtid.\n")); 2530b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 2540b57cec5SDimitry Andric if (!__kmp_init_serial) { 2550b57cec5SDimitry Andric __kmp_do_serial_initialize(); 2560b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2570b57cec5SDimitry Andric } else { 2580b57cec5SDimitry Andric gtid = __kmp_register_root(FALSE); 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 2610b57cec5SDimitry Andric /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric return gtid; 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric /* caller must hold forkjoin_lock */ 2700b57cec5SDimitry Andric void __kmp_check_stack_overlap(kmp_info_t *th) { 2710b57cec5SDimitry Andric int f; 2720b57cec5SDimitry Andric char *stack_beg = NULL; 2730b57cec5SDimitry Andric char *stack_end = NULL; 2740b57cec5SDimitry Andric int gtid; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: called\n")); 2770b57cec5SDimitry Andric if (__kmp_storage_map) { 2780b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 2790b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric if (gtid == KMP_GTID_MONITOR) { 2840b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2850b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2860b57cec5SDimitry Andric "th_%s stack (%s)", "mon", 2870b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2880b57cec5SDimitry Andric } else { 2890b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2900b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2910b57cec5SDimitry Andric "th_%d stack (%s)", gtid, 2920b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2930b57cec5SDimitry Andric } 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric /* No point in checking ubermaster threads since they use refinement and 2970b57cec5SDimitry Andric * cannot overlap */ 2980b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2990b57cec5SDimitry Andric if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) { 3000b57cec5SDimitry Andric KA_TRACE(10, 3010b57cec5SDimitry Andric ("__kmp_check_stack_overlap: performing extensive checking\n")); 3020b57cec5SDimitry Andric if (stack_beg == NULL) { 3030b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 3040b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 3080b57cec5SDimitry Andric kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric if (f_th && f_th != th) { 3110b57cec5SDimitry Andric char *other_stack_end = 3120b57cec5SDimitry Andric (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 3130b57cec5SDimitry Andric char *other_stack_beg = 3140b57cec5SDimitry Andric other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 3150b57cec5SDimitry Andric if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 3160b57cec5SDimitry Andric (stack_end > other_stack_beg && stack_end < other_stack_end)) { 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric /* Print the other stack values before the abort */ 3190b57cec5SDimitry Andric if (__kmp_storage_map) 3200b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 3210b57cec5SDimitry Andric -1, other_stack_beg, other_stack_end, 3220b57cec5SDimitry Andric (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 3230b57cec5SDimitry Andric "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th)); 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit), 3260b57cec5SDimitry Andric __kmp_msg_null); 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n")); 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric void __kmp_infinite_loop(void) { 3370b57cec5SDimitry Andric static int done = FALSE; 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric while (!done) { 3400b57cec5SDimitry Andric KMP_YIELD(TRUE); 3410b57cec5SDimitry Andric } 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric #define MAX_MESSAGE 512 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, 3470b57cec5SDimitry Andric char const *format, ...) { 3480b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 3490b57cec5SDimitry Andric va_list ap; 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric va_start(ap, format); 3520b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, 3530b57cec5SDimitry Andric p2, (unsigned long)size, format); 3540b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 3550b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 3560b57cec5SDimitry Andric #if KMP_PRINT_DATA_PLACEMENT 3570b57cec5SDimitry Andric int node; 3580b57cec5SDimitry Andric if (gtid >= 0) { 3590b57cec5SDimitry Andric if (p1 <= p2 && (char *)p2 - (char *)p1 == size) { 3600b57cec5SDimitry Andric if (__kmp_storage_map_verbose) { 3610b57cec5SDimitry Andric node = __kmp_get_host_node(p1); 3620b57cec5SDimitry Andric if (node < 0) /* doesn't work, so don't try this next time */ 3630b57cec5SDimitry Andric __kmp_storage_map_verbose = FALSE; 3640b57cec5SDimitry Andric else { 3650b57cec5SDimitry Andric char *last; 3660b57cec5SDimitry Andric int lastNode; 3670b57cec5SDimitry Andric int localProc = __kmp_get_cpu_from_gtid(gtid); 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric const int page_size = KMP_GET_PAGE_SIZE(); 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1)); 3720b57cec5SDimitry Andric p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1)); 3730b57cec5SDimitry Andric if (localProc >= 0) 3740b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, 3750b57cec5SDimitry Andric localProc >> 1); 3760b57cec5SDimitry Andric else 3770b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d\n", gtid); 3780b57cec5SDimitry Andric #if KMP_USE_PRCTL 3790b57cec5SDimitry Andric /* The more elaborate format is disabled for now because of the prctl 3800b57cec5SDimitry Andric * hanging bug. */ 3810b57cec5SDimitry Andric do { 3820b57cec5SDimitry Andric last = p1; 3830b57cec5SDimitry Andric lastNode = node; 3840b57cec5SDimitry Andric /* This loop collates adjacent pages with the same host node. */ 3850b57cec5SDimitry Andric do { 3860b57cec5SDimitry Andric (char *)p1 += page_size; 3870b57cec5SDimitry Andric } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 3880b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1, 3890b57cec5SDimitry Andric lastNode); 3900b57cec5SDimitry Andric } while (p1 <= p2); 3910b57cec5SDimitry Andric #else 3920b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, 3930b57cec5SDimitry Andric (char *)p1 + (page_size - 1), 3940b57cec5SDimitry Andric __kmp_get_host_node(p1)); 3950b57cec5SDimitry Andric if (p1 < p2) { 3960b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, 3970b57cec5SDimitry Andric (char *)p2 + (page_size - 1), 3980b57cec5SDimitry Andric __kmp_get_host_node(p2)); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric #endif 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric } else 4040b57cec5SDimitry Andric __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)); 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric #endif /* KMP_PRINT_DATA_PLACEMENT */ 4070b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric void __kmp_warn(char const *format, ...) { 4110b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 4120b57cec5SDimitry Andric va_list ap; 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric if (__kmp_generate_warnings == kmp_warnings_off) { 4150b57cec5SDimitry Andric return; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric va_start(ap, format); 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format); 4210b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 4220b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 4230b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric va_end(ap); 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric void __kmp_abort_process() { 4290b57cec5SDimitry Andric // Later threads may stall here, but that's ok because abort() will kill them. 4300b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_exit_lock); 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric if (__kmp_debug_buf) { 4330b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric if (KMP_OS_WINDOWS) { 4370b57cec5SDimitry Andric // Let other threads know of abnormal termination and prevent deadlock 4380b57cec5SDimitry Andric // if abort happened during library initialization or shutdown 4390b57cec5SDimitry Andric __kmp_global.g.g_abort = SIGABRT; 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric /* On Windows* OS by default abort() causes pop-up error box, which stalls 4420b57cec5SDimitry Andric nightly testing. Unfortunately, we cannot reliably suppress pop-up error 4430b57cec5SDimitry Andric boxes. _set_abort_behavior() works well, but this function is not 4440b57cec5SDimitry Andric available in VS7 (this is not problem for DLL, but it is a problem for 4450b57cec5SDimitry Andric static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not 4460b57cec5SDimitry Andric help, at least in some versions of MS C RTL. 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric It seems following sequence is the only way to simulate abort() and 4490b57cec5SDimitry Andric avoid pop-up error box. */ 4500b57cec5SDimitry Andric raise(SIGABRT); 4510b57cec5SDimitry Andric _exit(3); // Just in case, if signal ignored, exit anyway. 4520b57cec5SDimitry Andric } else { 453e8d8bef9SDimitry Andric __kmp_unregister_library(); 4540b57cec5SDimitry Andric abort(); 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric __kmp_infinite_loop(); 4580b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_exit_lock); 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric } // __kmp_abort_process 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric void __kmp_abort_thread(void) { 4630b57cec5SDimitry Andric // TODO: Eliminate g_abort global variable and this function. 4640b57cec5SDimitry Andric // In case of abort just call abort(), it will kill all the threads. 4650b57cec5SDimitry Andric __kmp_infinite_loop(); 4660b57cec5SDimitry Andric } // __kmp_abort_thread 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andric /* Print out the storage map for the major kmp_info_t thread data structures 4690b57cec5SDimitry Andric that are allocated together. */ 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { 4720b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", 4730b57cec5SDimitry Andric gtid); 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team, 4760b57cec5SDimitry Andric sizeof(kmp_desc_t), "th_%d.th_info", gtid); 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head, 4790b57cec5SDimitry Andric sizeof(kmp_local_t), "th_%d.th_local", gtid); 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 4820b57cec5SDimitry Andric gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 4830b57cec5SDimitry Andric sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid); 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier], 4860b57cec5SDimitry Andric &thr->th.th_bar[bs_plain_barrier + 1], 4870b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[plain]", 4880b57cec5SDimitry Andric gtid); 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier], 4910b57cec5SDimitry Andric &thr->th.th_bar[bs_forkjoin_barrier + 1], 4920b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", 4930b57cec5SDimitry Andric gtid); 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 4960b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier], 4970b57cec5SDimitry Andric &thr->th.th_bar[bs_reduction_barrier + 1], 4980b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", 4990b57cec5SDimitry Andric gtid); 5000b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric /* Print out the storage map for the major kmp_team_t team data structures 5040b57cec5SDimitry Andric that are allocated together. */ 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, 5070b57cec5SDimitry Andric int team_id, int num_thr) { 5080b57cec5SDimitry Andric int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 5090b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 5100b57cec5SDimitry Andric header, team_id); 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0], 5130b57cec5SDimitry Andric &team->t.t_bar[bs_last_barrier], 5140b57cec5SDimitry Andric sizeof(kmp_balign_team_t) * bs_last_barrier, 5150b57cec5SDimitry Andric "%s_%d.t_bar", header, team_id); 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier], 5180b57cec5SDimitry Andric &team->t.t_bar[bs_plain_barrier + 1], 5190b57cec5SDimitry Andric sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", 5200b57cec5SDimitry Andric header, team_id); 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier], 5230b57cec5SDimitry Andric &team->t.t_bar[bs_forkjoin_barrier + 1], 5240b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5250b57cec5SDimitry Andric "%s_%d.t_bar[forkjoin]", header, team_id); 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 5280b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier], 5290b57cec5SDimitry Andric &team->t.t_bar[bs_reduction_barrier + 1], 5300b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5310b57cec5SDimitry Andric "%s_%d.t_bar[reduction]", header, team_id); 5320b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5350b57cec5SDimitry Andric -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 5360b57cec5SDimitry Andric sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id); 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5390b57cec5SDimitry Andric -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 5400b57cec5SDimitry Andric sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id); 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0], 5430b57cec5SDimitry Andric &team->t.t_disp_buffer[num_disp_buff], 5440b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff, 5450b57cec5SDimitry Andric "%s_%d.t_disp_buffer", header, team_id); 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 548fe6060f1SDimitry Andric static void __kmp_init_allocator() { 549fe6060f1SDimitry Andric __kmp_init_memkind(); 550fe6060f1SDimitry Andric __kmp_init_target_mem(); 551fe6060f1SDimitry Andric } 5520b57cec5SDimitry Andric static void __kmp_fini_allocator() { __kmp_fini_memkind(); } 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 5570b57cec5SDimitry Andric #if KMP_OS_WINDOWS 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) { 5600b57cec5SDimitry Andric //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric switch (fdwReason) { 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric case DLL_PROCESS_ATTACH: 5650b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n")); 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric return TRUE; 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric case DLL_PROCESS_DETACH: 5700b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific())); 5710b57cec5SDimitry Andric 572fe6060f1SDimitry Andric // According to Windows* documentation for DllMain entry point: 573fe6060f1SDimitry Andric // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference: 574fe6060f1SDimitry Andric // lpReserved == NULL when FreeLibrary() is called, 575fe6060f1SDimitry Andric // lpReserved != NULL when the process is terminated. 576fe6060f1SDimitry Andric // When FreeLibrary() is called, worker threads remain alive. So the 577fe6060f1SDimitry Andric // runtime's state is consistent and executing proper shutdown is OK. 578fe6060f1SDimitry Andric // When the process is terminated, worker threads have exited or been 579fe6060f1SDimitry Andric // forcefully terminated by the OS and only the shutdown thread remains. 580fe6060f1SDimitry Andric // This can leave the runtime in an inconsistent state. 581fe6060f1SDimitry Andric // Hence, only attempt proper cleanup when FreeLibrary() is called. 582fe6060f1SDimitry Andric // Otherwise, rely on OS to reclaim resources. 583fe6060f1SDimitry Andric if (lpReserved == NULL) 5840b57cec5SDimitry Andric __kmp_internal_end_library(__kmp_gtid_get_specific()); 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric return TRUE; 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric case DLL_THREAD_ATTACH: 5890b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_ATTACH\n")); 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric /* if we want to register new siblings all the time here call 5920b57cec5SDimitry Andric * __kmp_get_gtid(); */ 5930b57cec5SDimitry Andric return TRUE; 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric case DLL_THREAD_DETACH: 5960b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific())); 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric __kmp_internal_end_thread(__kmp_gtid_get_specific()); 5990b57cec5SDimitry Andric return TRUE; 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric return TRUE; 6030b57cec5SDimitry Andric } 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 6060b57cec5SDimitry Andric #endif /* KMP_DYNAMIC_LIB */ 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric /* __kmp_parallel_deo -- Wait until it's our turn. */ 6090b57cec5SDimitry Andric void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6100b57cec5SDimitry Andric int gtid = *gtid_ref; 6110b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6120b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6130b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6160b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6170b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 6180b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0); 6190b57cec5SDimitry Andric #else 6200b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL); 6210b57cec5SDimitry Andric #endif 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6240b57cec5SDimitry Andric if (!team->t.t_serialized) { 6250b57cec5SDimitry Andric KMP_MB(); 6260b57cec5SDimitry Andric KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ, 6270b57cec5SDimitry Andric NULL); 6280b57cec5SDimitry Andric KMP_MB(); 6290b57cec5SDimitry Andric } 6300b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric /* __kmp_parallel_dxo -- Signal the next task. */ 6340b57cec5SDimitry Andric void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6350b57cec5SDimitry Andric int gtid = *gtid_ref; 6360b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6370b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 6380b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6390b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6420b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6430b57cec5SDimitry Andric __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref); 6440b57cec5SDimitry Andric } 6450b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6460b57cec5SDimitry Andric if (!team->t.t_serialized) { 6470b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric /* use the tid of the next thread in this team */ 6500b57cec5SDimitry Andric /* TODO replace with general release procedure */ 6510b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 6590b57cec5SDimitry Andric /* The BARRIER for a SINGLE process section is always explicit */ 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { 6620b57cec5SDimitry Andric int status; 6630b57cec5SDimitry Andric kmp_info_t *th; 6640b57cec5SDimitry Andric kmp_team_t *team; 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 6670b57cec5SDimitry Andric __kmp_parallel_initialize(); 6680b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric th = __kmp_threads[gtid]; 6710b57cec5SDimitry Andric team = th->th.th_team; 6720b57cec5SDimitry Andric status = 0; 6730b57cec5SDimitry Andric 6740b57cec5SDimitry Andric th->th.th_ident = id_ref; 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric if (team->t.t_serialized) { 6770b57cec5SDimitry Andric status = 1; 6780b57cec5SDimitry Andric } else { 6790b57cec5SDimitry Andric kmp_int32 old_this = th->th.th_local.this_construct; 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric ++th->th.th_local.this_construct; 6820b57cec5SDimitry Andric /* try to set team count to thread count--success means thread got the 6830b57cec5SDimitry Andric single block */ 6840b57cec5SDimitry Andric /* TODO: Should this be acquire or release? */ 6850b57cec5SDimitry Andric if (team->t.t_construct == old_this) { 6860b57cec5SDimitry Andric status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this, 6870b57cec5SDimitry Andric th->th.th_local.this_construct); 6880b57cec5SDimitry Andric } 6890b57cec5SDimitry Andric #if USE_ITT_BUILD 6900b57cec5SDimitry Andric if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 6910b57cec5SDimitry Andric KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL && 692fe6060f1SDimitry Andric team->t.t_active_level == 1) { 693fe6060f1SDimitry Andric // Only report metadata by primary thread of active team at level 1 6940b57cec5SDimitry Andric __kmp_itt_metadata_single(id_ref); 6950b57cec5SDimitry Andric } 6960b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 6970b57cec5SDimitry Andric } 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 7000b57cec5SDimitry Andric if (status && push_ws) { 7010b57cec5SDimitry Andric __kmp_push_workshare(gtid, ct_psingle, id_ref); 7020b57cec5SDimitry Andric } else { 7030b57cec5SDimitry Andric __kmp_check_workshare(gtid, ct_psingle, id_ref); 7040b57cec5SDimitry Andric } 7050b57cec5SDimitry Andric } 7060b57cec5SDimitry Andric #if USE_ITT_BUILD 7070b57cec5SDimitry Andric if (status) { 7080b57cec5SDimitry Andric __kmp_itt_single_start(gtid); 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7110b57cec5SDimitry Andric return status; 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric void __kmp_exit_single(int gtid) { 7150b57cec5SDimitry Andric #if USE_ITT_BUILD 7160b57cec5SDimitry Andric __kmp_itt_single_end(gtid); 7170b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7180b57cec5SDimitry Andric if (__kmp_env_consistency_check) 7190b57cec5SDimitry Andric __kmp_pop_workshare(gtid, ct_psingle, NULL); 7200b57cec5SDimitry Andric } 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric /* determine if we can go parallel or must use a serialized parallel region and 7230b57cec5SDimitry Andric * how many threads we can use 7240b57cec5SDimitry Andric * set_nproc is the number of threads requested for the team 7250b57cec5SDimitry Andric * returns 0 if we should serialize or only use one thread, 7260b57cec5SDimitry Andric * otherwise the number of threads to use 7270b57cec5SDimitry Andric * The forkjoin lock is held by the caller. */ 7280b57cec5SDimitry Andric static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, 7290b57cec5SDimitry Andric int master_tid, int set_nthreads, 7300b57cec5SDimitry Andric int enter_teams) { 7310b57cec5SDimitry Andric int capacity; 7320b57cec5SDimitry Andric int new_nthreads; 7330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 7340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && parent_team); 7350b57cec5SDimitry Andric kmp_info_t *this_thr = parent_team->t.t_threads[master_tid]; 7360b57cec5SDimitry Andric 7370b57cec5SDimitry Andric // If dyn-var is set, dynamically adjust the number of desired threads, 7380b57cec5SDimitry Andric // according to the method specified by dynamic_mode. 7390b57cec5SDimitry Andric new_nthreads = set_nthreads; 7400b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid)) { 7410b57cec5SDimitry Andric ; 7420b57cec5SDimitry Andric } 7430b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 7440b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { 7450b57cec5SDimitry Andric new_nthreads = __kmp_load_balance_nproc(root, set_nthreads); 7460b57cec5SDimitry Andric if (new_nthreads == 1) { 7470b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 7480b57cec5SDimitry Andric "reservation to 1 thread\n", 7490b57cec5SDimitry Andric master_tid)); 7500b57cec5SDimitry Andric return 1; 7510b57cec5SDimitry Andric } 7520b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7530b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 7540b57cec5SDimitry Andric "reservation to %d threads\n", 7550b57cec5SDimitry Andric master_tid, new_nthreads)); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric } 7580b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 7590b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { 7600b57cec5SDimitry Andric new_nthreads = __kmp_avail_proc - __kmp_nth + 7610b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 7620b57cec5SDimitry Andric if (new_nthreads <= 1) { 7630b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 7640b57cec5SDimitry Andric "reservation to 1 thread\n", 7650b57cec5SDimitry Andric master_tid)); 7660b57cec5SDimitry Andric return 1; 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7690b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 7700b57cec5SDimitry Andric "reservation to %d threads\n", 7710b57cec5SDimitry Andric master_tid, new_nthreads)); 7720b57cec5SDimitry Andric } else { 7730b57cec5SDimitry Andric new_nthreads = set_nthreads; 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { 7760b57cec5SDimitry Andric if (set_nthreads > 2) { 7770b57cec5SDimitry Andric new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]); 7780b57cec5SDimitry Andric new_nthreads = (new_nthreads % set_nthreads) + 1; 7790b57cec5SDimitry Andric if (new_nthreads == 1) { 7800b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 7810b57cec5SDimitry Andric "reservation to 1 thread\n", 7820b57cec5SDimitry Andric master_tid)); 7830b57cec5SDimitry Andric return 1; 7840b57cec5SDimitry Andric } 7850b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7860b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 7870b57cec5SDimitry Andric "reservation to %d threads\n", 7880b57cec5SDimitry Andric master_tid, new_nthreads)); 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric } 7910b57cec5SDimitry Andric } else { 7920b57cec5SDimitry Andric KMP_ASSERT(0); 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT. 7960b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 7970b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 7980b57cec5SDimitry Andric __kmp_max_nth) { 7990b57cec5SDimitry Andric int tl_nthreads = __kmp_max_nth - __kmp_nth + 8000b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8010b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8020b57cec5SDimitry Andric tl_nthreads = 1; 8030b57cec5SDimitry Andric } 8040b57cec5SDimitry Andric 8050b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8060b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8070b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8080b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8090b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8100b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8110b57cec5SDimitry Andric } 8120b57cec5SDimitry Andric if (tl_nthreads == 1) { 8130b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 8140b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8150b57cec5SDimitry Andric master_tid)); 8160b57cec5SDimitry Andric return 1; 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 8190b57cec5SDimitry Andric "reservation to %d threads\n", 8200b57cec5SDimitry Andric master_tid, tl_nthreads)); 8210b57cec5SDimitry Andric new_nthreads = tl_nthreads; 8220b57cec5SDimitry Andric } 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric // Respect OMP_THREAD_LIMIT 8250b57cec5SDimitry Andric int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads; 8260b57cec5SDimitry Andric int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit; 8270b57cec5SDimitry Andric if (cg_nthreads + new_nthreads - 8280b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8290b57cec5SDimitry Andric max_cg_threads) { 8300b57cec5SDimitry Andric int tl_nthreads = max_cg_threads - cg_nthreads + 8310b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8320b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8330b57cec5SDimitry Andric tl_nthreads = 1; 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8370b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8380b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8390b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8400b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8410b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8420b57cec5SDimitry Andric } 8430b57cec5SDimitry Andric if (tl_nthreads == 1) { 8440b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 8450b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8460b57cec5SDimitry Andric master_tid)); 8470b57cec5SDimitry Andric return 1; 8480b57cec5SDimitry Andric } 8490b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 8500b57cec5SDimitry Andric "reservation to %d threads\n", 8510b57cec5SDimitry Andric master_tid, tl_nthreads)); 8520b57cec5SDimitry Andric new_nthreads = tl_nthreads; 8530b57cec5SDimitry Andric } 8540b57cec5SDimitry Andric 8550b57cec5SDimitry Andric // Check if the threads array is large enough, or needs expanding. 8560b57cec5SDimitry Andric // See comment in __kmp_register_root() about the adjustment if 8570b57cec5SDimitry Andric // __kmp_threads[0] == NULL. 8580b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 8590b57cec5SDimitry Andric if (TCR_PTR(__kmp_threads[0]) == NULL) { 8600b57cec5SDimitry Andric --capacity; 8610b57cec5SDimitry Andric } 862d409305fSDimitry Andric // If it is not for initializing the hidden helper team, we need to take 863d409305fSDimitry Andric // __kmp_hidden_helper_threads_num out of the capacity because it is included 864d409305fSDimitry Andric // in __kmp_threads_capacity. 865d409305fSDimitry Andric if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { 866d409305fSDimitry Andric capacity -= __kmp_hidden_helper_threads_num; 867d409305fSDimitry Andric } 8680b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 8690b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8700b57cec5SDimitry Andric capacity) { 8710b57cec5SDimitry Andric // Expand the threads array. 8720b57cec5SDimitry Andric int slotsRequired = __kmp_nth + new_nthreads - 8730b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) - 8740b57cec5SDimitry Andric capacity; 8750b57cec5SDimitry Andric int slotsAdded = __kmp_expand_threads(slotsRequired); 8760b57cec5SDimitry Andric if (slotsAdded < slotsRequired) { 8770b57cec5SDimitry Andric // The threads array was not expanded enough. 8780b57cec5SDimitry Andric new_nthreads -= (slotsRequired - slotsAdded); 8790b57cec5SDimitry Andric KMP_ASSERT(new_nthreads >= 1); 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8820b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8830b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8840b57cec5SDimitry Andric if (__kmp_tp_cached) { 8850b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8860b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 8870b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 8880b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 8890b57cec5SDimitry Andric } else { 8900b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8910b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 8920b57cec5SDimitry Andric KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); 8930b57cec5SDimitry Andric } 8940b57cec5SDimitry Andric } 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric } 8970b57cec5SDimitry Andric 8980b57cec5SDimitry Andric #ifdef KMP_DEBUG 8990b57cec5SDimitry Andric if (new_nthreads == 1) { 9000b57cec5SDimitry Andric KC_TRACE(10, 9010b57cec5SDimitry Andric ("__kmp_reserve_threads: T#%d serializing team after reclaiming " 9020b57cec5SDimitry Andric "dead roots and rechecking; requested %d threads\n", 9030b57cec5SDimitry Andric __kmp_get_gtid(), set_nthreads)); 9040b57cec5SDimitry Andric } else { 9050b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested" 9060b57cec5SDimitry Andric " %d threads\n", 9070b57cec5SDimitry Andric __kmp_get_gtid(), new_nthreads, set_nthreads)); 9080b57cec5SDimitry Andric } 9090b57cec5SDimitry Andric #endif // KMP_DEBUG 9100b57cec5SDimitry Andric return new_nthreads; 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric /* Allocate threads from the thread pool and assign them to the new team. We are 9140b57cec5SDimitry Andric assured that there are enough threads available, because we checked on that 9150b57cec5SDimitry Andric earlier within critical section forkjoin */ 9160b57cec5SDimitry Andric static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, 917349cc55cSDimitry Andric kmp_info_t *master_th, int master_gtid, 918349cc55cSDimitry Andric int fork_teams_workers) { 9190b57cec5SDimitry Andric int i; 9200b57cec5SDimitry Andric int use_hot_team; 9210b57cec5SDimitry Andric 9220b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc)); 9230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); 9240b57cec5SDimitry Andric KMP_MB(); 9250b57cec5SDimitry Andric 926fe6060f1SDimitry Andric /* first, let's setup the primary thread */ 9270b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = 0; 9280b57cec5SDimitry Andric master_th->th.th_team = team; 9290b57cec5SDimitry Andric master_th->th.th_team_nproc = team->t.t_nproc; 9300b57cec5SDimitry Andric master_th->th.th_team_master = master_th; 9310b57cec5SDimitry Andric master_th->th.th_team_serialized = FALSE; 9320b57cec5SDimitry Andric master_th->th.th_dispatch = &team->t.t_dispatch[0]; 9330b57cec5SDimitry Andric 9340b57cec5SDimitry Andric /* make sure we are not the optimized hot team */ 9350b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 9360b57cec5SDimitry Andric use_hot_team = 0; 9370b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 9380b57cec5SDimitry Andric if (hot_teams) { // hot teams array is not allocated if 9390b57cec5SDimitry Andric // KMP_HOT_TEAMS_MAX_LEVEL=0 9400b57cec5SDimitry Andric int level = team->t.t_active_level - 1; // index in array of hot teams 9410b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { // are we inside the teams? 9420b57cec5SDimitry Andric if (master_th->th.th_teams_size.nteams > 1) { 9430b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9440b57cec5SDimitry Andric // team_of_masters 9450b57cec5SDimitry Andric } 9460b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 9470b57cec5SDimitry Andric master_th->th.th_teams_level == team->t.t_level) { 9480b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9490b57cec5SDimitry Andric // team_of_workers before the parallel 9500b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 9510b57cec5SDimitry Andric } 9520b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 9530b57cec5SDimitry Andric if (hot_teams[level].hot_team) { 9540b57cec5SDimitry Andric // hot team has already been allocated for given level 9550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 9560b57cec5SDimitry Andric use_hot_team = 1; // the team is ready to use 9570b57cec5SDimitry Andric } else { 9580b57cec5SDimitry Andric use_hot_team = 0; // AC: threads are not allocated yet 9590b57cec5SDimitry Andric hot_teams[level].hot_team = team; // remember new hot team 9600b57cec5SDimitry Andric hot_teams[level].hot_team_nth = team->t.t_nproc; 9610b57cec5SDimitry Andric } 9620b57cec5SDimitry Andric } else { 9630b57cec5SDimitry Andric use_hot_team = 0; 9640b57cec5SDimitry Andric } 9650b57cec5SDimitry Andric } 9660b57cec5SDimitry Andric #else 9670b57cec5SDimitry Andric use_hot_team = team == root->r.r_hot_team; 9680b57cec5SDimitry Andric #endif 9690b57cec5SDimitry Andric if (!use_hot_team) { 9700b57cec5SDimitry Andric 971fe6060f1SDimitry Andric /* install the primary thread */ 9720b57cec5SDimitry Andric team->t.t_threads[0] = master_th; 9730b57cec5SDimitry Andric __kmp_initialize_info(master_th, team, 0, master_gtid); 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric /* now, install the worker threads */ 9760b57cec5SDimitry Andric for (i = 1; i < team->t.t_nproc; i++) { 9770b57cec5SDimitry Andric 9780b57cec5SDimitry Andric /* fork or reallocate a new thread and install it in team */ 9790b57cec5SDimitry Andric kmp_info_t *thr = __kmp_allocate_thread(root, team, i); 9800b57cec5SDimitry Andric team->t.t_threads[i] = thr; 9810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr); 9820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_team == team); 9830b57cec5SDimitry Andric /* align team and thread arrived states */ 9840b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 9850b57cec5SDimitry Andric "T#%d(%d:%d) join =%llu, plain=%llu\n", 9860b57cec5SDimitry Andric __kmp_gtid_from_tid(0, team), team->t.t_id, 0, 9870b57cec5SDimitry Andric __kmp_gtid_from_tid(i, team), team->t.t_id, i, 9880b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 9890b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 9900b57cec5SDimitry Andric thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 9910b57cec5SDimitry Andric thr->th.th_teams_level = master_th->th.th_teams_level; 9920b57cec5SDimitry Andric thr->th.th_teams_size = master_th->th.th_teams_size; 9930b57cec5SDimitry Andric { // Initialize threads' barrier data. 9940b57cec5SDimitry Andric int b; 9950b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar; 9960b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 9970b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 9980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 9990b57cec5SDimitry Andric #if USE_DEBUGGER 10000b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 10010b57cec5SDimitry Andric #endif 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric } 10040b57cec5SDimitry Andric } 10050b57cec5SDimitry Andric 10060b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 1007349cc55cSDimitry Andric // Do not partition the places list for teams construct workers who 1008349cc55cSDimitry Andric // haven't actually been forked to do real work yet. This partitioning 1009349cc55cSDimitry Andric // will take place in the parallel region nested within the teams construct. 1010349cc55cSDimitry Andric if (!fork_teams_workers) { 10110b57cec5SDimitry Andric __kmp_partition_places(team); 1012349cc55cSDimitry Andric } 10130b57cec5SDimitry Andric #endif 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric if (__kmp_display_affinity && team->t.t_display_affinity != 1) { 10170b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; i++) { 10180b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[i]; 10190b57cec5SDimitry Andric if (thr->th.th_prev_num_threads != team->t.t_nproc || 10200b57cec5SDimitry Andric thr->th.th_prev_level != team->t.t_level) { 10210b57cec5SDimitry Andric team->t.t_display_affinity = 1; 10220b57cec5SDimitry Andric break; 10230b57cec5SDimitry Andric } 10240b57cec5SDimitry Andric } 10250b57cec5SDimitry Andric } 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric KMP_MB(); 10280b57cec5SDimitry Andric } 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 10310b57cec5SDimitry Andric // Propagate any changes to the floating point control registers out to the team 10320b57cec5SDimitry Andric // We try to avoid unnecessary writes to the relevant cache line in the team 10330b57cec5SDimitry Andric // structure, so we don't make changes unless they are needed. 10340b57cec5SDimitry Andric inline static void propagateFPControl(kmp_team_t *team) { 10350b57cec5SDimitry Andric if (__kmp_inherit_fp_control) { 10360b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 10370b57cec5SDimitry Andric kmp_uint32 mxcsr; 10380b57cec5SDimitry Andric 1039fe6060f1SDimitry Andric // Get primary thread's values of FPU control flags (both X87 and vector) 10400b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 10410b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 10420b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric // There is no point looking at t_fp_control_saved here. 10450b57cec5SDimitry Andric // If it is TRUE, we still have to update the values if they are different 10460b57cec5SDimitry Andric // from those we now have. If it is FALSE we didn't save anything yet, but 10470b57cec5SDimitry Andric // our objective is the same. We have to ensure that the values in the team 10480b57cec5SDimitry Andric // are the same as those we have. 10490b57cec5SDimitry Andric // So, this code achieves what we need whether or not t_fp_control_saved is 10500b57cec5SDimitry Andric // true. By checking whether the value needs updating we avoid unnecessary 10510b57cec5SDimitry Andric // writes that would put the cache-line into a written state, causing all 10520b57cec5SDimitry Andric // threads in the team to have to read it again. 10530b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); 10540b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); 10550b57cec5SDimitry Andric // Although we don't use this value, other code in the runtime wants to know 10560b57cec5SDimitry Andric // whether it should restore them. So we must ensure it is correct. 10570b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); 10580b57cec5SDimitry Andric } else { 10590b57cec5SDimitry Andric // Similarly here. Don't write to this cache-line in the team structure 10600b57cec5SDimitry Andric // unless we have to. 10610b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); 10620b57cec5SDimitry Andric } 10630b57cec5SDimitry Andric } 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andric // Do the opposite, setting the hardware registers to the updated values from 10660b57cec5SDimitry Andric // the team. 10670b57cec5SDimitry Andric inline static void updateHWFPControl(kmp_team_t *team) { 10680b57cec5SDimitry Andric if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) { 10690b57cec5SDimitry Andric // Only reset the fp control regs if they have been changed in the team. 10700b57cec5SDimitry Andric // the parallel region that we are exiting. 10710b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 10720b57cec5SDimitry Andric kmp_uint32 mxcsr; 10730b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 10740b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 10750b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 10760b57cec5SDimitry Andric 10770b57cec5SDimitry Andric if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) { 10780b57cec5SDimitry Andric __kmp_clear_x87_fpu_status_word(); 10790b57cec5SDimitry Andric __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word); 10800b57cec5SDimitry Andric } 10810b57cec5SDimitry Andric 10820b57cec5SDimitry Andric if (team->t.t_mxcsr != mxcsr) { 10830b57cec5SDimitry Andric __kmp_load_mxcsr(&team->t.t_mxcsr); 10840b57cec5SDimitry Andric } 10850b57cec5SDimitry Andric } 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric #else 10880b57cec5SDimitry Andric #define propagateFPControl(x) ((void)0) 10890b57cec5SDimitry Andric #define updateHWFPControl(x) ((void)0) 10900b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, 10930b57cec5SDimitry Andric int realloc); // forward declaration 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric /* Run a parallel region that has been serialized, so runs only in a team of the 1096fe6060f1SDimitry Andric single primary thread. */ 10970b57cec5SDimitry Andric void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 10980b57cec5SDimitry Andric kmp_info_t *this_thr; 10990b57cec5SDimitry Andric kmp_team_t *serial_team; 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andric KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid)); 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric /* Skip all this code for autopar serialized loops since it results in 11040b57cec5SDimitry Andric unacceptable overhead */ 11050b57cec5SDimitry Andric if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 11060b57cec5SDimitry Andric return; 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 11090b57cec5SDimitry Andric __kmp_parallel_initialize(); 11100b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 11110b57cec5SDimitry Andric 11120b57cec5SDimitry Andric this_thr = __kmp_threads[global_tid]; 11130b57cec5SDimitry Andric serial_team = this_thr->th.th_serial_team; 11140b57cec5SDimitry Andric 11150b57cec5SDimitry Andric /* utilize the serialized team held by this thread */ 11160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team); 11170b57cec5SDimitry Andric KMP_MB(); 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 11200b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 11210b57cec5SDimitry Andric this_thr->th.th_task_team == 11220b57cec5SDimitry Andric this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 11230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] == 11240b57cec5SDimitry Andric NULL); 11250b57cec5SDimitry Andric KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / " 11260b57cec5SDimitry Andric "team %p, new task_team = NULL\n", 11270b57cec5SDimitry Andric global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 11280b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 11290b57cec5SDimitry Andric } 11300b57cec5SDimitry Andric 11310b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 11320b57cec5SDimitry Andric if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 11330b57cec5SDimitry Andric proc_bind = proc_bind_false; 11340b57cec5SDimitry Andric } else if (proc_bind == proc_bind_default) { 11350b57cec5SDimitry Andric // No proc_bind clause was specified, so use the current value 11360b57cec5SDimitry Andric // of proc-bind-var for this parallel region. 11370b57cec5SDimitry Andric proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 11380b57cec5SDimitry Andric } 11390b57cec5SDimitry Andric // Reset for next parallel region 11400b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric #if OMPT_SUPPORT 11430b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 11440b57cec5SDimitry Andric void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 11450b57cec5SDimitry Andric if (ompt_enabled.enabled && 11460b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric ompt_task_info_t *parent_task_info; 11490b57cec5SDimitry Andric parent_task_info = OMPT_CUR_TASK_INFO(this_thr); 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 11520b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 11530b57cec5SDimitry Andric int team_size = 1; 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 11560b57cec5SDimitry Andric &(parent_task_info->task_data), &(parent_task_info->frame), 1157489b1cf2SDimitry Andric &ompt_parallel_data, team_size, 1158489b1cf2SDimitry Andric ompt_parallel_invoker_program | ompt_parallel_team, codeptr); 11590b57cec5SDimitry Andric } 11600b57cec5SDimitry Andric } 11610b57cec5SDimitry Andric #endif // OMPT_SUPPORT 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric if (this_thr->th.th_team != serial_team) { 11640b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 11650b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric if (serial_team->t.t_serialized) { 11680b57cec5SDimitry Andric /* this serial team was already used 11690b57cec5SDimitry Andric TODO increase performance by making this locks more specific */ 11700b57cec5SDimitry Andric kmp_team_t *new_team; 11710b57cec5SDimitry Andric 11720b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 11730b57cec5SDimitry Andric 11740b57cec5SDimitry Andric new_team = 11750b57cec5SDimitry Andric __kmp_allocate_team(this_thr->th.th_root, 1, 1, 11760b57cec5SDimitry Andric #if OMPT_SUPPORT 11770b57cec5SDimitry Andric ompt_parallel_data, 11780b57cec5SDimitry Andric #endif 11790b57cec5SDimitry Andric proc_bind, &this_thr->th.th_current_task->td_icvs, 11800b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 11810b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 11820b57cec5SDimitry Andric KMP_ASSERT(new_team); 11830b57cec5SDimitry Andric 11840b57cec5SDimitry Andric /* setup new serialized team and install it */ 11850b57cec5SDimitry Andric new_team->t.t_threads[0] = this_thr; 11860b57cec5SDimitry Andric new_team->t.t_parent = this_thr->th.th_team; 11870b57cec5SDimitry Andric serial_team = new_team; 11880b57cec5SDimitry Andric this_thr->th.th_serial_team = serial_team; 11890b57cec5SDimitry Andric 11900b57cec5SDimitry Andric KF_TRACE( 11910b57cec5SDimitry Andric 10, 11920b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 11930b57cec5SDimitry Andric global_tid, serial_team)); 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric /* TODO the above breaks the requirement that if we run out of resources, 11960b57cec5SDimitry Andric then we can still guarantee that serialized teams are ok, since we may 11970b57cec5SDimitry Andric need to allocate a new one */ 11980b57cec5SDimitry Andric } else { 11990b57cec5SDimitry Andric KF_TRACE( 12000b57cec5SDimitry Andric 10, 12010b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 12020b57cec5SDimitry Andric global_tid, serial_team)); 12030b57cec5SDimitry Andric } 12040b57cec5SDimitry Andric 12050b57cec5SDimitry Andric /* we have to initialize this serial team */ 12060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 12070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 12080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team); 12090b57cec5SDimitry Andric serial_team->t.t_ident = loc; 12100b57cec5SDimitry Andric serial_team->t.t_serialized = 1; 12110b57cec5SDimitry Andric serial_team->t.t_nproc = 1; 12120b57cec5SDimitry Andric serial_team->t.t_parent = this_thr->th.th_team; 12130b57cec5SDimitry Andric serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched; 12140b57cec5SDimitry Andric this_thr->th.th_team = serial_team; 12150b57cec5SDimitry Andric serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; 12160b57cec5SDimitry Andric 1217349cc55cSDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid, 12180b57cec5SDimitry Andric this_thr->th.th_current_task)); 12190b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); 12200b57cec5SDimitry Andric this_thr->th.th_current_task->td_flags.executing = 0; 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andric __kmp_push_current_task_to_thread(this_thr, serial_team, 0); 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an 12250b57cec5SDimitry Andric implicit task for each serialized task represented by 12260b57cec5SDimitry Andric team->t.t_serialized? */ 12270b57cec5SDimitry Andric copy_icvs(&this_thr->th.th_current_task->td_icvs, 12280b57cec5SDimitry Andric &this_thr->th.th_current_task->td_parent->td_icvs); 12290b57cec5SDimitry Andric 12300b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 12310b57cec5SDimitry Andric // level 12320b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 12330b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 12340b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 12350b57cec5SDimitry Andric } 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric if (__kmp_nested_proc_bind.used && 12380b57cec5SDimitry Andric (level + 1 < __kmp_nested_proc_bind.used)) { 12390b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.proc_bind = 12400b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[level + 1]; 12410b57cec5SDimitry Andric } 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric #if USE_DEBUGGER 12440b57cec5SDimitry Andric serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger. 12450b57cec5SDimitry Andric #endif 12460b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = 0; 12470b57cec5SDimitry Andric 12480b57cec5SDimitry Andric /* set thread cache values */ 12490b57cec5SDimitry Andric this_thr->th.th_team_nproc = 1; 12500b57cec5SDimitry Andric this_thr->th.th_team_master = this_thr; 12510b57cec5SDimitry Andric this_thr->th.th_team_serialized = 1; 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andric serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; 12540b57cec5SDimitry Andric serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 12550b57cec5SDimitry Andric serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save 12560b57cec5SDimitry Andric 12570b57cec5SDimitry Andric propagateFPControl(serial_team); 12580b57cec5SDimitry Andric 12590b57cec5SDimitry Andric /* check if we need to allocate dispatch buffers stack */ 12600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 12610b57cec5SDimitry Andric if (!serial_team->t.t_dispatch->th_disp_buffer) { 12620b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = 12630b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 12640b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 12650b57cec5SDimitry Andric } 12660b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 12670b57cec5SDimitry Andric 12680b57cec5SDimitry Andric KMP_MB(); 12690b57cec5SDimitry Andric 12700b57cec5SDimitry Andric } else { 12710b57cec5SDimitry Andric /* this serialized team is already being used, 12720b57cec5SDimitry Andric * that's fine, just add another nested level */ 12730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 12740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 12750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 12760b57cec5SDimitry Andric ++serial_team->t.t_serialized; 12770b57cec5SDimitry Andric this_thr->th.th_team_serialized = serial_team->t.t_serialized; 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 12800b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 12810b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 12820b57cec5SDimitry Andric // level 12830b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 12840b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 12850b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 12860b57cec5SDimitry Andric } 12870b57cec5SDimitry Andric serial_team->t.t_level++; 12880b57cec5SDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " 12890b57cec5SDimitry Andric "of serial team %p to %d\n", 12900b57cec5SDimitry Andric global_tid, serial_team, serial_team->t.t_level)); 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric /* allocate/push dispatch buffers stack */ 12930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 12940b57cec5SDimitry Andric { 12950b57cec5SDimitry Andric dispatch_private_info_t *disp_buffer = 12960b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 12970b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 12980b57cec5SDimitry Andric disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 12990b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 13000b57cec5SDimitry Andric } 13010b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 13020b57cec5SDimitry Andric 13030b57cec5SDimitry Andric KMP_MB(); 13040b57cec5SDimitry Andric } 13050b57cec5SDimitry Andric KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); 13060b57cec5SDimitry Andric 13070b57cec5SDimitry Andric // Perform the display affinity functionality for 13080b57cec5SDimitry Andric // serialized parallel regions 13090b57cec5SDimitry Andric if (__kmp_display_affinity) { 13100b57cec5SDimitry Andric if (this_thr->th.th_prev_level != serial_team->t.t_level || 13110b57cec5SDimitry Andric this_thr->th.th_prev_num_threads != 1) { 13120b57cec5SDimitry Andric // NULL means use the affinity-format-var ICV 13130b57cec5SDimitry Andric __kmp_aux_display_affinity(global_tid, NULL); 13140b57cec5SDimitry Andric this_thr->th.th_prev_level = serial_team->t.t_level; 13150b57cec5SDimitry Andric this_thr->th.th_prev_num_threads = 1; 13160b57cec5SDimitry Andric } 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andric if (__kmp_env_consistency_check) 13200b57cec5SDimitry Andric __kmp_push_parallel(global_tid, NULL); 13210b57cec5SDimitry Andric #if OMPT_SUPPORT 13220b57cec5SDimitry Andric serial_team->t.ompt_team_info.master_return_address = codeptr; 13230b57cec5SDimitry Andric if (ompt_enabled.enabled && 13240b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 1325fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = 1326fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 13270b57cec5SDimitry Andric 13280b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 13290b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, 13300b57cec5SDimitry Andric &ompt_parallel_data, codeptr); 13310b57cec5SDimitry Andric 13320b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1); 13330b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric /* OMPT implicit task begin */ 13360b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 13370b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 13380b57cec5SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), 1339fe6060f1SDimitry Andric OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), 1340fe6060f1SDimitry Andric ompt_task_implicit); // TODO: Can this be ompt_task_initial? 1341fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = 1342fe6060f1SDimitry Andric __kmp_tid_from_gtid(global_tid); 13430b57cec5SDimitry Andric } 13440b57cec5SDimitry Andric 13450b57cec5SDimitry Andric /* OMPT state */ 13460b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 1347fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = 1348fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 13490b57cec5SDimitry Andric } 13500b57cec5SDimitry Andric #endif 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric 13530b57cec5SDimitry Andric /* most of the work for a fork */ 13540b57cec5SDimitry Andric /* return true if we really went parallel, false if serialized */ 13550b57cec5SDimitry Andric int __kmp_fork_call(ident_t *loc, int gtid, 13560b57cec5SDimitry Andric enum fork_context_e call_context, // Intel, GNU, ... 13570b57cec5SDimitry Andric kmp_int32 argc, microtask_t microtask, launch_t invoker, 135816794618SDimitry Andric kmp_va_list ap) { 13590b57cec5SDimitry Andric void **argv; 13600b57cec5SDimitry Andric int i; 13610b57cec5SDimitry Andric int master_tid; 13620b57cec5SDimitry Andric int master_this_cons; 13630b57cec5SDimitry Andric kmp_team_t *team; 13640b57cec5SDimitry Andric kmp_team_t *parent_team; 13650b57cec5SDimitry Andric kmp_info_t *master_th; 13660b57cec5SDimitry Andric kmp_root_t *root; 13670b57cec5SDimitry Andric int nthreads; 13680b57cec5SDimitry Andric int master_active; 13690b57cec5SDimitry Andric int master_set_numthreads; 13700b57cec5SDimitry Andric int level; 13710b57cec5SDimitry Andric int active_level; 13720b57cec5SDimitry Andric int teams_level; 13730b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 13740b57cec5SDimitry Andric kmp_hot_team_ptr_t **p_hot_teams; 13750b57cec5SDimitry Andric #endif 13760b57cec5SDimitry Andric { // KMP_TIME_BLOCK 13770b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); 13780b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 13790b57cec5SDimitry Andric 13800b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid)); 13810b57cec5SDimitry Andric if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) { 13820b57cec5SDimitry Andric /* Some systems prefer the stack for the root thread(s) to start with */ 13830b57cec5SDimitry Andric /* some gap from the parent stack to prevent false sharing. */ 13840b57cec5SDimitry Andric void *dummy = KMP_ALLOCA(__kmp_stkpadding); 13850b57cec5SDimitry Andric /* These 2 lines below are so this does not get optimized out */ 13860b57cec5SDimitry Andric if (__kmp_stkpadding > KMP_MAX_STKPADDING) 13870b57cec5SDimitry Andric __kmp_stkpadding += (short)((kmp_int64)dummy); 13880b57cec5SDimitry Andric } 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric /* initialize if needed */ 13910b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 13920b57cec5SDimitry Andric __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown 13930b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 13940b57cec5SDimitry Andric __kmp_parallel_initialize(); 13950b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 13960b57cec5SDimitry Andric 13970b57cec5SDimitry Andric /* setup current data */ 13980b57cec5SDimitry Andric master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with 13990b57cec5SDimitry Andric // shutdown 14000b57cec5SDimitry Andric parent_team = master_th->th.th_team; 14010b57cec5SDimitry Andric master_tid = master_th->th.th_info.ds.ds_tid; 14020b57cec5SDimitry Andric master_this_cons = master_th->th.th_local.this_construct; 14030b57cec5SDimitry Andric root = master_th->th.th_root; 14040b57cec5SDimitry Andric master_active = root->r.r_active; 14050b57cec5SDimitry Andric master_set_numthreads = master_th->th.th_set_nproc; 14060b57cec5SDimitry Andric 14070b57cec5SDimitry Andric #if OMPT_SUPPORT 14080b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 14090b57cec5SDimitry Andric ompt_data_t *parent_task_data; 14100b57cec5SDimitry Andric ompt_frame_t *ompt_frame; 14110b57cec5SDimitry Andric ompt_data_t *implicit_task_data; 14120b57cec5SDimitry Andric void *return_address = NULL; 14130b57cec5SDimitry Andric 14140b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14150b57cec5SDimitry Andric __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, 14160b57cec5SDimitry Andric NULL, NULL); 14170b57cec5SDimitry Andric return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); 14180b57cec5SDimitry Andric } 14190b57cec5SDimitry Andric #endif 14200b57cec5SDimitry Andric 1421fe6060f1SDimitry Andric // Assign affinity to root thread if it hasn't happened yet 1422fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 1423fe6060f1SDimitry Andric 14240b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 14250b57cec5SDimitry Andric level = parent_team->t.t_level; 14260b57cec5SDimitry Andric // used to launch non-serial teams even if nested is not allowed 14270b57cec5SDimitry Andric active_level = parent_team->t.t_active_level; 14280b57cec5SDimitry Andric // needed to check nesting inside the teams 14290b57cec5SDimitry Andric teams_level = master_th->th.th_teams_level; 14300b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 14310b57cec5SDimitry Andric p_hot_teams = &master_th->th.th_hot_teams; 14320b57cec5SDimitry Andric if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { 14330b57cec5SDimitry Andric *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( 14340b57cec5SDimitry Andric sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 14350b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 14360b57cec5SDimitry Andric // it is either actual or not needed (when active_level > 0) 14370b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team_nth = 1; 14380b57cec5SDimitry Andric } 14390b57cec5SDimitry Andric #endif 14400b57cec5SDimitry Andric 14410b57cec5SDimitry Andric #if OMPT_SUPPORT 14420b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14430b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 14440b57cec5SDimitry Andric int team_size = master_set_numthreads 14450b57cec5SDimitry Andric ? master_set_numthreads 14460b57cec5SDimitry Andric : get__nproc_2(parent_team, master_tid); 1447489b1cf2SDimitry Andric int flags = OMPT_INVOKER(call_context) | 1448489b1cf2SDimitry Andric ((microtask == (microtask_t)__kmp_teams_master) 1449489b1cf2SDimitry Andric ? ompt_parallel_league 1450489b1cf2SDimitry Andric : ompt_parallel_team); 14510b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 1452489b1cf2SDimitry Andric parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags, 1453489b1cf2SDimitry Andric return_address); 14540b57cec5SDimitry Andric } 14550b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 14560b57cec5SDimitry Andric } 14570b57cec5SDimitry Andric #endif 14580b57cec5SDimitry Andric 14590b57cec5SDimitry Andric master_th->th.th_ident = loc; 14600b57cec5SDimitry Andric 14610b57cec5SDimitry Andric if (master_th->th.th_teams_microtask && ap && 14620b57cec5SDimitry Andric microtask != (microtask_t)__kmp_teams_master && level == teams_level) { 14630b57cec5SDimitry Andric // AC: This is start of parallel that is nested inside teams construct. 14640b57cec5SDimitry Andric // The team is actual (hot), all workers are ready at the fork barrier. 14650b57cec5SDimitry Andric // No lock needed to initialize the team a bit, then free workers. 14660b57cec5SDimitry Andric parent_team->t.t_ident = loc; 14670b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, parent_team, TRUE); 14680b57cec5SDimitry Andric parent_team->t.t_argc = argc; 14690b57cec5SDimitry Andric argv = (void **)parent_team->t.t_argv; 14700b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 147116794618SDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 14720b57cec5SDimitry Andric // Increment our nested depth levels, but not increase the serialization 14730b57cec5SDimitry Andric if (parent_team == master_th->th.th_serial_team) { 14740b57cec5SDimitry Andric // AC: we are in serialized parallel 14750b57cec5SDimitry Andric __kmpc_serialized_parallel(loc, gtid); 14760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); 1477489b1cf2SDimitry Andric 1478e8d8bef9SDimitry Andric if (call_context == fork_context_gnu) { 1479e8d8bef9SDimitry Andric // AC: need to decrement t_serialized for enquiry functions to work 1480e8d8bef9SDimitry Andric // correctly, will restore at join time 1481e8d8bef9SDimitry Andric parent_team->t.t_serialized--; 1482e8d8bef9SDimitry Andric return TRUE; 1483e8d8bef9SDimitry Andric } 1484e8d8bef9SDimitry Andric 1485fe6060f1SDimitry Andric #if OMPD_SUPPORT 1486fe6060f1SDimitry Andric parent_team->t.t_pkfn = microtask; 1487fe6060f1SDimitry Andric #endif 1488fe6060f1SDimitry Andric 14890b57cec5SDimitry Andric #if OMPT_SUPPORT 14900b57cec5SDimitry Andric void *dummy; 1491489b1cf2SDimitry Andric void **exit_frame_p; 14920b57cec5SDimitry Andric 14930b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 14940b57cec5SDimitry Andric 14950b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14960b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 14970b57cec5SDimitry Andric &ompt_parallel_data, return_address); 1498489b1cf2SDimitry Andric exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 15010b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andric /* OMPT implicit task begin */ 15040b57cec5SDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 15050b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1506fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = 1507fe6060f1SDimitry Andric __kmp_tid_from_gtid(gtid); 1508489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1509489b1cf2SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1510489b1cf2SDimitry Andric implicit_task_data, 1, 1511489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 15120b57cec5SDimitry Andric } 15130b57cec5SDimitry Andric 15140b57cec5SDimitry Andric /* OMPT state */ 15150b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 15160b57cec5SDimitry Andric } else { 1517489b1cf2SDimitry Andric exit_frame_p = &dummy; 15180b57cec5SDimitry Andric } 15190b57cec5SDimitry Andric #endif 1520489b1cf2SDimitry Andric // AC: need to decrement t_serialized for enquiry functions to work 1521489b1cf2SDimitry Andric // correctly, will restore at join time 1522489b1cf2SDimitry Andric parent_team->t.t_serialized--; 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric { 15250b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 15260b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 15270b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv 15280b57cec5SDimitry Andric #if OMPT_SUPPORT 15290b57cec5SDimitry Andric , 1530489b1cf2SDimitry Andric exit_frame_p 15310b57cec5SDimitry Andric #endif 15320b57cec5SDimitry Andric ); 15330b57cec5SDimitry Andric } 15340b57cec5SDimitry Andric 15350b57cec5SDimitry Andric #if OMPT_SUPPORT 15360b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1537489b1cf2SDimitry Andric *exit_frame_p = NULL; 15380b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; 15390b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 15400b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 15410b57cec5SDimitry Andric ompt_scope_end, NULL, implicit_task_data, 1, 1542489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 15430b57cec5SDimitry Andric } 1544489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 15450b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 15460b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 15470b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1548489b1cf2SDimitry Andric &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th), 1549489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1550489b1cf2SDimitry Andric return_address); 15510b57cec5SDimitry Andric } 15520b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 15530b57cec5SDimitry Andric } 15540b57cec5SDimitry Andric #endif 15550b57cec5SDimitry Andric return TRUE; 15560b57cec5SDimitry Andric } 15570b57cec5SDimitry Andric 15580b57cec5SDimitry Andric parent_team->t.t_pkfn = microtask; 15590b57cec5SDimitry Andric parent_team->t.t_invoke = invoker; 15600b57cec5SDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 15610b57cec5SDimitry Andric parent_team->t.t_active_level++; 15620b57cec5SDimitry Andric parent_team->t.t_level++; 15630b57cec5SDimitry Andric parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save 15640b57cec5SDimitry Andric 1565489b1cf2SDimitry Andric #if OMPT_SUPPORT 1566489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 1567489b1cf2SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 1568489b1cf2SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1569489b1cf2SDimitry Andric &ompt_parallel_data, return_address); 1570489b1cf2SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true); 1571489b1cf2SDimitry Andric } 1572489b1cf2SDimitry Andric #endif 1573489b1cf2SDimitry Andric 15740b57cec5SDimitry Andric /* Change number of threads in the team if requested */ 15750b57cec5SDimitry Andric if (master_set_numthreads) { // The parallel has num_threads clause 1576349cc55cSDimitry Andric if (master_set_numthreads <= master_th->th.th_teams_size.nth) { 15770b57cec5SDimitry Andric // AC: only can reduce number of threads dynamically, can't increase 15780b57cec5SDimitry Andric kmp_info_t **other_threads = parent_team->t.t_threads; 1579349cc55cSDimitry Andric // NOTE: if using distributed barrier, we need to run this code block 1580349cc55cSDimitry Andric // even when the team size appears not to have changed from the max. 1581349cc55cSDimitry Andric int old_proc = master_th->th.th_teams_size.nth; 1582349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == 1583349cc55cSDimitry Andric bp_dist_bar) { 1584349cc55cSDimitry Andric __kmp_resize_dist_barrier(parent_team, old_proc, 1585349cc55cSDimitry Andric master_set_numthreads); 1586349cc55cSDimitry Andric __kmp_add_threads_to_team(parent_team, master_set_numthreads); 1587349cc55cSDimitry Andric } 15880b57cec5SDimitry Andric parent_team->t.t_nproc = master_set_numthreads; 15890b57cec5SDimitry Andric for (i = 0; i < master_set_numthreads; ++i) { 15900b57cec5SDimitry Andric other_threads[i]->th.th_team_nproc = master_set_numthreads; 15910b57cec5SDimitry Andric } 15920b57cec5SDimitry Andric } 1593349cc55cSDimitry Andric // Keep extra threads hot in the team for possible next parallels 15940b57cec5SDimitry Andric master_th->th.th_set_nproc = 0; 15950b57cec5SDimitry Andric } 15960b57cec5SDimitry Andric 15970b57cec5SDimitry Andric #if USE_DEBUGGER 15980b57cec5SDimitry Andric if (__kmp_debugging) { // Let debugger override number of threads. 15990b57cec5SDimitry Andric int nth = __kmp_omp_num_threads(loc); 16000b57cec5SDimitry Andric if (nth > 0) { // 0 means debugger doesn't want to change num threads 16010b57cec5SDimitry Andric master_set_numthreads = nth; 16020b57cec5SDimitry Andric } 16030b57cec5SDimitry Andric } 16040b57cec5SDimitry Andric #endif 16050b57cec5SDimitry Andric 1606349cc55cSDimitry Andric // Figure out the proc_bind policy for the nested parallel within teams 1607349cc55cSDimitry Andric kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 1608349cc55cSDimitry Andric // proc_bind_default means don't update 1609349cc55cSDimitry Andric kmp_proc_bind_t proc_bind_icv = proc_bind_default; 1610349cc55cSDimitry Andric if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 1611349cc55cSDimitry Andric proc_bind = proc_bind_false; 1612349cc55cSDimitry Andric } else { 1613349cc55cSDimitry Andric // No proc_bind clause specified; use current proc-bind-var 1614349cc55cSDimitry Andric if (proc_bind == proc_bind_default) { 1615349cc55cSDimitry Andric proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 1616349cc55cSDimitry Andric } 1617349cc55cSDimitry Andric /* else: The proc_bind policy was specified explicitly on parallel 1618349cc55cSDimitry Andric clause. 1619349cc55cSDimitry Andric This overrides proc-bind-var for this parallel region, but does not 1620349cc55cSDimitry Andric change proc-bind-var. */ 1621349cc55cSDimitry Andric // Figure the value of proc-bind-var for the child threads. 1622349cc55cSDimitry Andric if ((level + 1 < __kmp_nested_proc_bind.used) && 1623349cc55cSDimitry Andric (__kmp_nested_proc_bind.bind_types[level + 1] != 1624349cc55cSDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind)) { 1625349cc55cSDimitry Andric proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 1626349cc55cSDimitry Andric } 1627349cc55cSDimitry Andric } 1628349cc55cSDimitry Andric KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind); 1629349cc55cSDimitry Andric // Need to change the bind-var ICV to correct value for each implicit task 1630349cc55cSDimitry Andric if (proc_bind_icv != proc_bind_default && 1631349cc55cSDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) { 1632349cc55cSDimitry Andric kmp_info_t **other_threads = parent_team->t.t_threads; 1633349cc55cSDimitry Andric for (i = 0; i < master_th->th.th_team_nproc; ++i) { 1634349cc55cSDimitry Andric other_threads[i]->th.th_current_task->td_icvs.proc_bind = 1635349cc55cSDimitry Andric proc_bind_icv; 1636349cc55cSDimitry Andric } 1637349cc55cSDimitry Andric } 1638349cc55cSDimitry Andric // Reset for next parallel region 1639349cc55cSDimitry Andric master_th->th.th_set_proc_bind = proc_bind_default; 1640349cc55cSDimitry Andric 1641e8d8bef9SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY 1642e8d8bef9SDimitry Andric if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) || 1643e8d8bef9SDimitry Andric KMP_ITT_DEBUG) && 1644e8d8bef9SDimitry Andric __kmp_forkjoin_frames_mode == 3 && 1645e8d8bef9SDimitry Andric parent_team->t.t_active_level == 1 // only report frames at level 1 1646e8d8bef9SDimitry Andric && master_th->th.th_teams_size.nteams == 1) { 1647e8d8bef9SDimitry Andric kmp_uint64 tmp_time = __itt_get_timestamp(); 1648e8d8bef9SDimitry Andric master_th->th.th_frame_time = tmp_time; 1649e8d8bef9SDimitry Andric parent_team->t.t_region_time = tmp_time; 1650e8d8bef9SDimitry Andric } 1651e8d8bef9SDimitry Andric if (__itt_stack_caller_create_ptr) { 1652fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL); 1653e8d8bef9SDimitry Andric // create new stack stitching id before entering fork barrier 1654e8d8bef9SDimitry Andric parent_team->t.t_stack_id = __kmp_itt_stack_caller_create(); 1655e8d8bef9SDimitry Andric } 1656e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 1657349cc55cSDimitry Andric #if KMP_AFFINITY_SUPPORTED 1658349cc55cSDimitry Andric __kmp_partition_places(parent_team); 1659349cc55cSDimitry Andric #endif 1660e8d8bef9SDimitry Andric 16610b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, " 16620b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 16630b57cec5SDimitry Andric root, parent_team, master_th, gtid)); 16640b57cec5SDimitry Andric __kmp_internal_fork(loc, gtid, parent_team); 16650b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, " 16660b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 16670b57cec5SDimitry Andric root, parent_team, master_th, gtid)); 16680b57cec5SDimitry Andric 1669e8d8bef9SDimitry Andric if (call_context == fork_context_gnu) 1670e8d8bef9SDimitry Andric return TRUE; 1671e8d8bef9SDimitry Andric 1672fe6060f1SDimitry Andric /* Invoke microtask for PRIMARY thread */ 16730b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 16740b57cec5SDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 16750b57cec5SDimitry Andric 16760b57cec5SDimitry Andric if (!parent_team->t.t_invoke(gtid)) { 1677fe6060f1SDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread"); 16780b57cec5SDimitry Andric } 16790b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 16800b57cec5SDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 16810b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 16820b57cec5SDimitry Andric 16830b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 16840b57cec5SDimitry Andric 16850b57cec5SDimitry Andric return TRUE; 16860b57cec5SDimitry Andric } // Parallel closely nested in teams construct 16870b57cec5SDimitry Andric 16880b57cec5SDimitry Andric #if KMP_DEBUG 16890b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 16900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 16910b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 16920b57cec5SDimitry Andric } 16930b57cec5SDimitry Andric #endif 16940b57cec5SDimitry Andric 1695349cc55cSDimitry Andric // Need this to happen before we determine the number of threads, not while 1696349cc55cSDimitry Andric // we are allocating the team 1697349cc55cSDimitry Andric //__kmp_push_current_task_to_thread(master_th, parent_team, 0); 1698fe6060f1SDimitry Andric int enter_teams = 0; 16990b57cec5SDimitry Andric if (parent_team->t.t_active_level >= 17000b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels) { 17010b57cec5SDimitry Andric nthreads = 1; 17020b57cec5SDimitry Andric } else { 1703fe6060f1SDimitry Andric enter_teams = ((ap == NULL && active_level == 0) || 17040b57cec5SDimitry Andric (ap && teams_level > 0 && teams_level == level)); 1705349cc55cSDimitry Andric nthreads = master_set_numthreads 17060b57cec5SDimitry Andric ? master_set_numthreads 1707349cc55cSDimitry Andric // TODO: get nproc directly from current task 1708349cc55cSDimitry Andric : get__nproc_2(parent_team, master_tid); 17090b57cec5SDimitry Andric // Check if we need to take forkjoin lock? (no need for serialized 17100b57cec5SDimitry Andric // parallel out of teams construct). This code moved here from 17110b57cec5SDimitry Andric // __kmp_reserve_threads() to speedup nested serialized parallels. 17120b57cec5SDimitry Andric if (nthreads > 1) { 17130b57cec5SDimitry Andric if ((get__max_active_levels(master_th) == 1 && 17140b57cec5SDimitry Andric (root->r.r_in_parallel && !enter_teams)) || 17150b57cec5SDimitry Andric (__kmp_library == library_serial)) { 17160b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d" 17170b57cec5SDimitry Andric " threads\n", 17180b57cec5SDimitry Andric gtid, nthreads)); 17190b57cec5SDimitry Andric nthreads = 1; 17200b57cec5SDimitry Andric } 17210b57cec5SDimitry Andric } 17220b57cec5SDimitry Andric if (nthreads > 1) { 17230b57cec5SDimitry Andric /* determine how many new threads we can use */ 17240b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 17250b57cec5SDimitry Andric /* AC: If we execute teams from parallel region (on host), then teams 17260b57cec5SDimitry Andric should be created but each can only have 1 thread if nesting is 17270b57cec5SDimitry Andric disabled. If teams called from serial region, then teams and their 17280b57cec5SDimitry Andric threads should be created regardless of the nesting setting. */ 17290b57cec5SDimitry Andric nthreads = __kmp_reserve_threads(root, parent_team, master_tid, 17300b57cec5SDimitry Andric nthreads, enter_teams); 17310b57cec5SDimitry Andric if (nthreads == 1) { 17320b57cec5SDimitry Andric // Free lock for single thread execution here; for multi-thread 17330b57cec5SDimitry Andric // execution it will be freed later after team of threads created 17340b57cec5SDimitry Andric // and initialized 17350b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 17360b57cec5SDimitry Andric } 17370b57cec5SDimitry Andric } 17380b57cec5SDimitry Andric } 17390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nthreads > 0); 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric // If we temporarily changed the set number of threads then restore it now 17420b57cec5SDimitry Andric master_th->th.th_set_nproc = 0; 17430b57cec5SDimitry Andric 17440b57cec5SDimitry Andric /* create a serialized parallel region? */ 17450b57cec5SDimitry Andric if (nthreads == 1) { 17460b57cec5SDimitry Andric /* josh todo: hypothetical question: what do we do for OS X*? */ 17470b57cec5SDimitry Andric #if KMP_OS_LINUX && \ 17480b57cec5SDimitry Andric (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 17490b57cec5SDimitry Andric void *args[argc]; 17500b57cec5SDimitry Andric #else 17510b57cec5SDimitry Andric void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); 17520b57cec5SDimitry Andric #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ 17530b57cec5SDimitry Andric KMP_ARCH_AARCH64) */ 17540b57cec5SDimitry Andric 17550b57cec5SDimitry Andric KA_TRACE(20, 17560b57cec5SDimitry Andric ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); 17570b57cec5SDimitry Andric 17580b57cec5SDimitry Andric __kmpc_serialized_parallel(loc, gtid); 17590b57cec5SDimitry Andric 1760fe6060f1SDimitry Andric #if OMPD_SUPPORT 1761fe6060f1SDimitry Andric master_th->th.th_serial_team->t.t_pkfn = microtask; 1762fe6060f1SDimitry Andric #endif 1763fe6060f1SDimitry Andric 17640b57cec5SDimitry Andric if (call_context == fork_context_intel) { 17650b57cec5SDimitry Andric /* TODO this sucks, use the compiler itself to pass args! :) */ 17660b57cec5SDimitry Andric master_th->th.th_serial_team->t.t_ident = loc; 17670b57cec5SDimitry Andric if (!ap) { 17680b57cec5SDimitry Andric // revert change made in __kmpc_serialized_parallel() 17690b57cec5SDimitry Andric master_th->th.th_serial_team->t.t_level--; 17700b57cec5SDimitry Andric // Get args from parent team for teams construct 17710b57cec5SDimitry Andric 17720b57cec5SDimitry Andric #if OMPT_SUPPORT 17730b57cec5SDimitry Andric void *dummy; 1774489b1cf2SDimitry Andric void **exit_frame_p; 17750b57cec5SDimitry Andric ompt_task_info_t *task_info; 17760b57cec5SDimitry Andric 17770b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric if (ompt_enabled.enabled) { 17800b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 17810b57cec5SDimitry Andric &ompt_parallel_data, return_address); 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 17840b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 17850b57cec5SDimitry Andric 17860b57cec5SDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1787489b1cf2SDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 17880b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1789fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = 1790fe6060f1SDimitry Andric __kmp_tid_from_gtid(gtid); 1791489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1792489b1cf2SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1793489b1cf2SDimitry Andric &(task_info->task_data), 1, 1794489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1795489b1cf2SDimitry Andric ompt_task_implicit); 17960b57cec5SDimitry Andric } 17970b57cec5SDimitry Andric 17980b57cec5SDimitry Andric /* OMPT state */ 17990b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 18000b57cec5SDimitry Andric } else { 1801489b1cf2SDimitry Andric exit_frame_p = &dummy; 18020b57cec5SDimitry Andric } 18030b57cec5SDimitry Andric #endif 18040b57cec5SDimitry Andric 18050b57cec5SDimitry Andric { 18060b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 18070b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 18080b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, 18090b57cec5SDimitry Andric parent_team->t.t_argv 18100b57cec5SDimitry Andric #if OMPT_SUPPORT 18110b57cec5SDimitry Andric , 1812489b1cf2SDimitry Andric exit_frame_p 18130b57cec5SDimitry Andric #endif 18140b57cec5SDimitry Andric ); 18150b57cec5SDimitry Andric } 18160b57cec5SDimitry Andric 18170b57cec5SDimitry Andric #if OMPT_SUPPORT 18180b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1819489b1cf2SDimitry Andric *exit_frame_p = NULL; 18200b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 18210b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 18220b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1823489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1824489b1cf2SDimitry Andric ompt_task_implicit); 18250b57cec5SDimitry Andric } 1826489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 18270b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 18280b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 18290b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1830489b1cf2SDimitry Andric &ompt_parallel_data, parent_task_data, 1831489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1832489b1cf2SDimitry Andric return_address); 18330b57cec5SDimitry Andric } 18340b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 18350b57cec5SDimitry Andric } 18360b57cec5SDimitry Andric #endif 18370b57cec5SDimitry Andric } else if (microtask == (microtask_t)__kmp_teams_master) { 18380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_team == 18390b57cec5SDimitry Andric master_th->th.th_serial_team); 18400b57cec5SDimitry Andric team = master_th->th.th_team; 18410b57cec5SDimitry Andric // team->t.t_pkfn = microtask; 18420b57cec5SDimitry Andric team->t.t_invoke = invoker; 18430b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 18440b57cec5SDimitry Andric team->t.t_argc = argc; 18450b57cec5SDimitry Andric argv = (void **)team->t.t_argv; 18460b57cec5SDimitry Andric if (ap) { 18470b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 184816794618SDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 18490b57cec5SDimitry Andric } else { 18500b57cec5SDimitry Andric for (i = 0; i < argc; ++i) 18510b57cec5SDimitry Andric // Get args from parent team for teams construct 18520b57cec5SDimitry Andric argv[i] = parent_team->t.t_argv[i]; 18530b57cec5SDimitry Andric } 18540b57cec5SDimitry Andric // AC: revert change made in __kmpc_serialized_parallel() 18550b57cec5SDimitry Andric // because initial code in teams should have level=0 18560b57cec5SDimitry Andric team->t.t_level--; 18570b57cec5SDimitry Andric // AC: call special invoker for outer "parallel" of teams construct 18580b57cec5SDimitry Andric invoker(gtid); 1859489b1cf2SDimitry Andric #if OMPT_SUPPORT 1860489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 1861489b1cf2SDimitry Andric ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th); 1862489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1863489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1864489b1cf2SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 0, 1865489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial); 1866489b1cf2SDimitry Andric } 1867489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 1868489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1869489b1cf2SDimitry Andric &ompt_parallel_data, parent_task_data, 1870489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_league, 1871489b1cf2SDimitry Andric return_address); 1872489b1cf2SDimitry Andric } 1873489b1cf2SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 1874489b1cf2SDimitry Andric } 1875489b1cf2SDimitry Andric #endif 18760b57cec5SDimitry Andric } else { 18770b57cec5SDimitry Andric argv = args; 18780b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 187916794618SDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 18800b57cec5SDimitry Andric KMP_MB(); 18810b57cec5SDimitry Andric 18820b57cec5SDimitry Andric #if OMPT_SUPPORT 18830b57cec5SDimitry Andric void *dummy; 1884489b1cf2SDimitry Andric void **exit_frame_p; 18850b57cec5SDimitry Andric ompt_task_info_t *task_info; 18860b57cec5SDimitry Andric 18870b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 18880b57cec5SDimitry Andric 18890b57cec5SDimitry Andric if (ompt_enabled.enabled) { 18900b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 18910b57cec5SDimitry Andric &ompt_parallel_data, return_address); 18920b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 18930b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 18940b57cec5SDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1895489b1cf2SDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 18960b57cec5SDimitry Andric 18970b57cec5SDimitry Andric /* OMPT implicit task begin */ 18980b57cec5SDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 18990b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 19000b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 19010b57cec5SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1902489b1cf2SDimitry Andric implicit_task_data, 1, __kmp_tid_from_gtid(gtid), 1903489b1cf2SDimitry Andric ompt_task_implicit); 1904fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = 1905fe6060f1SDimitry Andric __kmp_tid_from_gtid(gtid); 19060b57cec5SDimitry Andric } 19070b57cec5SDimitry Andric 19080b57cec5SDimitry Andric /* OMPT state */ 19090b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 19100b57cec5SDimitry Andric } else { 1911489b1cf2SDimitry Andric exit_frame_p = &dummy; 19120b57cec5SDimitry Andric } 19130b57cec5SDimitry Andric #endif 19140b57cec5SDimitry Andric 19150b57cec5SDimitry Andric { 19160b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 19170b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 19180b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, args 19190b57cec5SDimitry Andric #if OMPT_SUPPORT 19200b57cec5SDimitry Andric , 1921489b1cf2SDimitry Andric exit_frame_p 19220b57cec5SDimitry Andric #endif 19230b57cec5SDimitry Andric ); 19240b57cec5SDimitry Andric } 19250b57cec5SDimitry Andric 19260b57cec5SDimitry Andric #if OMPT_SUPPORT 19270b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1928489b1cf2SDimitry Andric *exit_frame_p = NULL; 19290b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 19300b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 19310b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1932489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1933489b1cf2SDimitry Andric ompt_task_implicit); 19340b57cec5SDimitry Andric } 19350b57cec5SDimitry Andric 19360b57cec5SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 19370b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 19380b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 19390b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 19400b57cec5SDimitry Andric &ompt_parallel_data, parent_task_data, 1941489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1942489b1cf2SDimitry Andric return_address); 19430b57cec5SDimitry Andric } 19440b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 19450b57cec5SDimitry Andric } 19460b57cec5SDimitry Andric #endif 19470b57cec5SDimitry Andric } 19480b57cec5SDimitry Andric } else if (call_context == fork_context_gnu) { 19490b57cec5SDimitry Andric #if OMPT_SUPPORT 19500b57cec5SDimitry Andric ompt_lw_taskteam_t lwt; 19510b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, 19520b57cec5SDimitry Andric return_address); 19530b57cec5SDimitry Andric 19540b57cec5SDimitry Andric lwt.ompt_task_info.frame.exit_frame = ompt_data_none; 19550b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lwt, master_th, 1); 19560b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 19570b57cec5SDimitry Andric #endif 19580b57cec5SDimitry Andric 19590b57cec5SDimitry Andric // we were called from GNU native code 19600b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 19610b57cec5SDimitry Andric return FALSE; 19620b57cec5SDimitry Andric } else { 19630b57cec5SDimitry Andric KMP_ASSERT2(call_context < fork_context_last, 19640b57cec5SDimitry Andric "__kmp_fork_call: unknown fork_context parameter"); 19650b57cec5SDimitry Andric } 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 19680b57cec5SDimitry Andric KMP_MB(); 19690b57cec5SDimitry Andric return FALSE; 19700b57cec5SDimitry Andric } // if (nthreads == 1) 19710b57cec5SDimitry Andric 19720b57cec5SDimitry Andric // GEH: only modify the executing flag in the case when not serialized 19730b57cec5SDimitry Andric // serialized case is handled in kmpc_serialized_parallel 19740b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 19750b57cec5SDimitry Andric "curtask=%p, curtask_max_aclevel=%d\n", 19760b57cec5SDimitry Andric parent_team->t.t_active_level, master_th, 19770b57cec5SDimitry Andric master_th->th.th_current_task, 19780b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels)); 19790b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 19800b57cec5SDimitry Andric // executing 19810b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 19820b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 0; 19830b57cec5SDimitry Andric 19840b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 19850b57cec5SDimitry Andric /* Increment our nested depth level */ 19860b57cec5SDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 19870b57cec5SDimitry Andric } 19880b57cec5SDimitry Andric 19890b57cec5SDimitry Andric // See if we need to make a copy of the ICVs. 19900b57cec5SDimitry Andric int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 19910b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_nth.used) && 19920b57cec5SDimitry Andric (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { 19930b57cec5SDimitry Andric nthreads_icv = __kmp_nested_nth.nth[level + 1]; 19940b57cec5SDimitry Andric } else { 19950b57cec5SDimitry Andric nthreads_icv = 0; // don't update 19960b57cec5SDimitry Andric } 19970b57cec5SDimitry Andric 19980b57cec5SDimitry Andric // Figure out the proc_bind_policy for the new team. 19990b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 2000349cc55cSDimitry Andric // proc_bind_default means don't update 2001349cc55cSDimitry Andric kmp_proc_bind_t proc_bind_icv = proc_bind_default; 20020b57cec5SDimitry Andric if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 20030b57cec5SDimitry Andric proc_bind = proc_bind_false; 20040b57cec5SDimitry Andric } else { 20050b57cec5SDimitry Andric // No proc_bind clause specified; use current proc-bind-var for this 20060b57cec5SDimitry Andric // parallel region 2007349cc55cSDimitry Andric if (proc_bind == proc_bind_default) { 20080b57cec5SDimitry Andric proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 20090b57cec5SDimitry Andric } 2010349cc55cSDimitry Andric // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND 2011349cc55cSDimitry Andric if (master_th->th.th_teams_microtask && 2012349cc55cSDimitry Andric microtask == (microtask_t)__kmp_teams_master) { 2013349cc55cSDimitry Andric proc_bind = __kmp_teams_proc_bind; 2014349cc55cSDimitry Andric } 20150b57cec5SDimitry Andric /* else: The proc_bind policy was specified explicitly on parallel clause. 20160b57cec5SDimitry Andric This overrides proc-bind-var for this parallel region, but does not 20170b57cec5SDimitry Andric change proc-bind-var. */ 20180b57cec5SDimitry Andric // Figure the value of proc-bind-var for the child threads. 20190b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_proc_bind.used) && 20200b57cec5SDimitry Andric (__kmp_nested_proc_bind.bind_types[level + 1] != 20210b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind)) { 2022349cc55cSDimitry Andric // Do not modify the proc bind icv for the two teams construct forks 2023349cc55cSDimitry Andric // They just let the proc bind icv pass through 2024349cc55cSDimitry Andric if (!master_th->th.th_teams_microtask || 2025349cc55cSDimitry Andric !(microtask == (microtask_t)__kmp_teams_master || ap == NULL)) 20260b57cec5SDimitry Andric proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 20270b57cec5SDimitry Andric } 20280b57cec5SDimitry Andric } 20290b57cec5SDimitry Andric 20300b57cec5SDimitry Andric // Reset for next parallel region 20310b57cec5SDimitry Andric master_th->th.th_set_proc_bind = proc_bind_default; 20320b57cec5SDimitry Andric 20330b57cec5SDimitry Andric if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) { 20340b57cec5SDimitry Andric kmp_internal_control_t new_icvs; 20350b57cec5SDimitry Andric copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 20360b57cec5SDimitry Andric new_icvs.next = NULL; 20370b57cec5SDimitry Andric if (nthreads_icv > 0) { 20380b57cec5SDimitry Andric new_icvs.nproc = nthreads_icv; 20390b57cec5SDimitry Andric } 20400b57cec5SDimitry Andric if (proc_bind_icv != proc_bind_default) { 20410b57cec5SDimitry Andric new_icvs.proc_bind = proc_bind_icv; 20420b57cec5SDimitry Andric } 20430b57cec5SDimitry Andric 20440b57cec5SDimitry Andric /* allocate a new parallel team */ 20450b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 20460b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 20470b57cec5SDimitry Andric #if OMPT_SUPPORT 20480b57cec5SDimitry Andric ompt_parallel_data, 20490b57cec5SDimitry Andric #endif 20500b57cec5SDimitry Andric proc_bind, &new_icvs, 20510b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 2052349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) 2053349cc55cSDimitry Andric copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs); 20540b57cec5SDimitry Andric } else { 20550b57cec5SDimitry Andric /* allocate a new parallel team */ 20560b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 20570b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 20580b57cec5SDimitry Andric #if OMPT_SUPPORT 20590b57cec5SDimitry Andric ompt_parallel_data, 20600b57cec5SDimitry Andric #endif 20610b57cec5SDimitry Andric proc_bind, 20620b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, 20630b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 2064349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) 2065349cc55cSDimitry Andric copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, 2066349cc55cSDimitry Andric &master_th->th.th_current_task->td_icvs); 20670b57cec5SDimitry Andric } 20680b57cec5SDimitry Andric KF_TRACE( 20690b57cec5SDimitry Andric 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); 20700b57cec5SDimitry Andric 20710b57cec5SDimitry Andric /* setup the new team */ 20720b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); 20730b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); 20740b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 20750b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_parent, parent_team); 20760b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); 20770b57cec5SDimitry Andric #if OMPT_SUPPORT 20780b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address, 20790b57cec5SDimitry Andric return_address); 20800b57cec5SDimitry Andric #endif 20810b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe 20820b57cec5SDimitry Andric // TODO: parent_team->t.t_level == INT_MAX ??? 20830b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 20840b57cec5SDimitry Andric int new_level = parent_team->t.t_level + 1; 20850b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 20860b57cec5SDimitry Andric new_level = parent_team->t.t_active_level + 1; 20870b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 20880b57cec5SDimitry Andric } else { 20890b57cec5SDimitry Andric // AC: Do not increase parallel level at start of the teams construct 20900b57cec5SDimitry Andric int new_level = parent_team->t.t_level; 20910b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 20920b57cec5SDimitry Andric new_level = parent_team->t.t_active_level; 20930b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 20940b57cec5SDimitry Andric } 20950b57cec5SDimitry Andric kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); 2096fe6060f1SDimitry Andric // set primary thread's schedule as new run-time schedule 20970b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 20980b57cec5SDimitry Andric 20990b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); 21000b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator); 21010b57cec5SDimitry Andric 21020b57cec5SDimitry Andric // Update the floating point rounding in the team if required. 21030b57cec5SDimitry Andric propagateFPControl(team); 2104fe6060f1SDimitry Andric #if OMPD_SUPPORT 2105fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 2106fe6060f1SDimitry Andric ompd_bp_parallel_begin(); 2107fe6060f1SDimitry Andric #endif 21080b57cec5SDimitry Andric 21090b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 2110fe6060f1SDimitry Andric // Set primary thread's task team to team's task team. Unless this is hot 2111fe6060f1SDimitry Andric // team, it should be NULL. 21120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 21130b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 2114fe6060f1SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team " 21150b57cec5SDimitry Andric "%p, new task_team %p / team %p\n", 21160b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), 21170b57cec5SDimitry Andric master_th->th.th_task_team, parent_team, 21180b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], team)); 21190b57cec5SDimitry Andric 21200b57cec5SDimitry Andric if (active_level || master_th->th.th_task_team) { 2121fe6060f1SDimitry Andric // Take a memo of primary thread's task_state 21220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 21230b57cec5SDimitry Andric if (master_th->th.th_task_state_top >= 21240b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz) { // increase size 21250b57cec5SDimitry Andric kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz; 21260b57cec5SDimitry Andric kmp_uint8 *old_stack, *new_stack; 21270b57cec5SDimitry Andric kmp_uint32 i; 21280b57cec5SDimitry Andric new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 21290b57cec5SDimitry Andric for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) { 21300b57cec5SDimitry Andric new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 21310b57cec5SDimitry Andric } 21320b57cec5SDimitry Andric for (i = master_th->th.th_task_state_stack_sz; i < new_size; 21330b57cec5SDimitry Andric ++i) { // zero-init rest of stack 21340b57cec5SDimitry Andric new_stack[i] = 0; 21350b57cec5SDimitry Andric } 21360b57cec5SDimitry Andric old_stack = master_th->th.th_task_state_memo_stack; 21370b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack = new_stack; 21380b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz = new_size; 21390b57cec5SDimitry Andric __kmp_free(old_stack); 21400b57cec5SDimitry Andric } 2141fe6060f1SDimitry Andric // Store primary thread's task_state on stack 21420b57cec5SDimitry Andric master_th->th 21430b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top] = 21440b57cec5SDimitry Andric master_th->th.th_task_state; 21450b57cec5SDimitry Andric master_th->th.th_task_state_top++; 21460b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 21470b57cec5SDimitry Andric if (master_th->th.th_hot_teams && 21480b57cec5SDimitry Andric active_level < __kmp_hot_teams_max_level && 21490b57cec5SDimitry Andric team == master_th->th.th_hot_teams[active_level].hot_team) { 2150fe6060f1SDimitry Andric // Restore primary thread's nested state if nested hot team 21510b57cec5SDimitry Andric master_th->th.th_task_state = 21520b57cec5SDimitry Andric master_th->th 21530b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 21540b57cec5SDimitry Andric } else { 21550b57cec5SDimitry Andric #endif 21560b57cec5SDimitry Andric master_th->th.th_task_state = 0; 21570b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 21580b57cec5SDimitry Andric } 21590b57cec5SDimitry Andric #endif 21600b57cec5SDimitry Andric } 21610b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 21620b57cec5SDimitry Andric KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || 21630b57cec5SDimitry Andric (team == root->r.r_hot_team)); 21640b57cec5SDimitry Andric #endif 21650b57cec5SDimitry Andric } 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andric KA_TRACE( 21680b57cec5SDimitry Andric 20, 21690b57cec5SDimitry Andric ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 21700b57cec5SDimitry Andric gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, 21710b57cec5SDimitry Andric team->t.t_nproc)); 21720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team != root->r.r_hot_team || 21730b57cec5SDimitry Andric (team->t.t_master_tid == 0 && 21740b57cec5SDimitry Andric (team->t.t_parent == root->r.r_root_team || 21750b57cec5SDimitry Andric team->t.t_parent->t.t_serialized))); 21760b57cec5SDimitry Andric KMP_MB(); 21770b57cec5SDimitry Andric 21780b57cec5SDimitry Andric /* now, setup the arguments */ 21790b57cec5SDimitry Andric argv = (void **)team->t.t_argv; 21800b57cec5SDimitry Andric if (ap) { 21810b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) { 218216794618SDimitry Andric void *new_argv = va_arg(kmp_va_deref(ap), void *); 21830b57cec5SDimitry Andric KMP_CHECK_UPDATE(*argv, new_argv); 21840b57cec5SDimitry Andric argv++; 21850b57cec5SDimitry Andric } 21860b57cec5SDimitry Andric } else { 21870b57cec5SDimitry Andric for (i = 0; i < argc; ++i) { 21880b57cec5SDimitry Andric // Get args from parent team for teams construct 21890b57cec5SDimitry Andric KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); 21900b57cec5SDimitry Andric } 21910b57cec5SDimitry Andric } 21920b57cec5SDimitry Andric 21930b57cec5SDimitry Andric /* now actually fork the threads */ 21940b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_active, master_active); 21950b57cec5SDimitry Andric if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 21960b57cec5SDimitry Andric root->r.r_active = TRUE; 21970b57cec5SDimitry Andric 2198349cc55cSDimitry Andric __kmp_fork_team_threads(root, team, master_th, gtid, !ap); 21990b57cec5SDimitry Andric __kmp_setup_icv_copy(team, nthreads, 22000b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, loc); 22010b57cec5SDimitry Andric 22020b57cec5SDimitry Andric #if OMPT_SUPPORT 22030b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 22040b57cec5SDimitry Andric #endif 22050b57cec5SDimitry Andric 22060b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andric #if USE_ITT_BUILD 22090b57cec5SDimitry Andric if (team->t.t_active_level == 1 // only report frames at level 1 22100b57cec5SDimitry Andric && !master_th->th.th_teams_microtask) { // not in teams construct 22110b57cec5SDimitry Andric #if USE_ITT_NOTIFY 22120b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 22130b57cec5SDimitry Andric (__kmp_forkjoin_frames_mode == 3 || 22140b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 1)) { 22150b57cec5SDimitry Andric kmp_uint64 tmp_time = 0; 22160b57cec5SDimitry Andric if (__itt_get_timestamp_ptr) 22170b57cec5SDimitry Andric tmp_time = __itt_get_timestamp(); 22180b57cec5SDimitry Andric // Internal fork - report frame begin 22190b57cec5SDimitry Andric master_th->th.th_frame_time = tmp_time; 22200b57cec5SDimitry Andric if (__kmp_forkjoin_frames_mode == 3) 22210b57cec5SDimitry Andric team->t.t_region_time = tmp_time; 22220b57cec5SDimitry Andric } else 22230b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not both) 22240b57cec5SDimitry Andric #endif /* USE_ITT_NOTIFY */ 22250b57cec5SDimitry Andric if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) && 22260b57cec5SDimitry Andric __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) { 22270b57cec5SDimitry Andric // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer. 22280b57cec5SDimitry Andric __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 22290b57cec5SDimitry Andric } 22300b57cec5SDimitry Andric } 22310b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22320b57cec5SDimitry Andric 22330b57cec5SDimitry Andric /* now go on and do the work */ 22340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team); 22350b57cec5SDimitry Andric KMP_MB(); 22360b57cec5SDimitry Andric KF_TRACE(10, 22370b57cec5SDimitry Andric ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 22380b57cec5SDimitry Andric root, team, master_th, gtid)); 22390b57cec5SDimitry Andric 22400b57cec5SDimitry Andric #if USE_ITT_BUILD 22410b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 2242fe6060f1SDimitry Andric // create new stack stitching id before entering fork barrier 2243fe6060f1SDimitry Andric if (!enter_teams) { 2244fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL); 2245fe6060f1SDimitry Andric team->t.t_stack_id = __kmp_itt_stack_caller_create(); 2246fe6060f1SDimitry Andric } else if (parent_team->t.t_serialized) { 2247fe6060f1SDimitry Andric // keep stack stitching id in the serialized parent_team; 2248fe6060f1SDimitry Andric // current team will be used for parallel inside the teams; 2249fe6060f1SDimitry Andric // if parent_team is active, then it already keeps stack stitching id 2250fe6060f1SDimitry Andric // for the league of teams 2251fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL); 2252fe6060f1SDimitry Andric parent_team->t.t_stack_id = __kmp_itt_stack_caller_create(); 2253fe6060f1SDimitry Andric } 22540b57cec5SDimitry Andric } 22550b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22560b57cec5SDimitry Andric 2257fe6060f1SDimitry Andric // AC: skip __kmp_internal_fork at teams construct, let only primary 22580b57cec5SDimitry Andric // threads execute 22590b57cec5SDimitry Andric if (ap) { 22600b57cec5SDimitry Andric __kmp_internal_fork(loc, gtid, team); 22610b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, " 22620b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 22630b57cec5SDimitry Andric root, team, master_th, gtid)); 22640b57cec5SDimitry Andric } 22650b57cec5SDimitry Andric 22660b57cec5SDimitry Andric if (call_context == fork_context_gnu) { 22670b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 22680b57cec5SDimitry Andric return TRUE; 22690b57cec5SDimitry Andric } 22700b57cec5SDimitry Andric 2271fe6060f1SDimitry Andric /* Invoke microtask for PRIMARY thread */ 22720b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 22730b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 22740b57cec5SDimitry Andric } // END of timer KMP_fork_call block 22750b57cec5SDimitry Andric 22760b57cec5SDimitry Andric #if KMP_STATS_ENABLED 22770b57cec5SDimitry Andric // If beginning a teams construct, then change thread state 22780b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 22790b57cec5SDimitry Andric if (!ap) { 22800b57cec5SDimitry Andric KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION); 22810b57cec5SDimitry Andric } 22820b57cec5SDimitry Andric #endif 22830b57cec5SDimitry Andric 22840b57cec5SDimitry Andric if (!team->t.t_invoke(gtid)) { 2285fe6060f1SDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread"); 22860b57cec5SDimitry Andric } 22870b57cec5SDimitry Andric 22880b57cec5SDimitry Andric #if KMP_STATS_ENABLED 22890b57cec5SDimitry Andric // If was beginning of a teams construct, then reset thread state 22900b57cec5SDimitry Andric if (!ap) { 22910b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 22920b57cec5SDimitry Andric } 22930b57cec5SDimitry Andric #endif 22940b57cec5SDimitry Andric 22950b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 22960b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 22970b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 22980b57cec5SDimitry Andric 22990b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 23000b57cec5SDimitry Andric #if OMPT_SUPPORT 23010b57cec5SDimitry Andric if (ompt_enabled.enabled) { 23020b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 23030b57cec5SDimitry Andric } 23040b57cec5SDimitry Andric #endif 23050b57cec5SDimitry Andric 23060b57cec5SDimitry Andric return TRUE; 23070b57cec5SDimitry Andric } 23080b57cec5SDimitry Andric 23090b57cec5SDimitry Andric #if OMPT_SUPPORT 23100b57cec5SDimitry Andric static inline void __kmp_join_restore_state(kmp_info_t *thread, 23110b57cec5SDimitry Andric kmp_team_t *team) { 23120b57cec5SDimitry Andric // restore state outside the region 23130b57cec5SDimitry Andric thread->th.ompt_thread_info.state = 23140b57cec5SDimitry Andric ((team->t.t_serialized) ? ompt_state_work_serial 23150b57cec5SDimitry Andric : ompt_state_work_parallel); 23160b57cec5SDimitry Andric } 23170b57cec5SDimitry Andric 23180b57cec5SDimitry Andric static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, 23190b57cec5SDimitry Andric kmp_team_t *team, ompt_data_t *parallel_data, 2320489b1cf2SDimitry Andric int flags, void *codeptr) { 23210b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 23220b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 23230b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 2324489b1cf2SDimitry Andric parallel_data, &(task_info->task_data), flags, codeptr); 23250b57cec5SDimitry Andric } 23260b57cec5SDimitry Andric 23270b57cec5SDimitry Andric task_info->frame.enter_frame = ompt_data_none; 23280b57cec5SDimitry Andric __kmp_join_restore_state(thread, team); 23290b57cec5SDimitry Andric } 23300b57cec5SDimitry Andric #endif 23310b57cec5SDimitry Andric 23320b57cec5SDimitry Andric void __kmp_join_call(ident_t *loc, int gtid 23330b57cec5SDimitry Andric #if OMPT_SUPPORT 23340b57cec5SDimitry Andric , 23350b57cec5SDimitry Andric enum fork_context_e fork_context 23360b57cec5SDimitry Andric #endif 23370b57cec5SDimitry Andric , 23380b57cec5SDimitry Andric int exit_teams) { 23390b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); 23400b57cec5SDimitry Andric kmp_team_t *team; 23410b57cec5SDimitry Andric kmp_team_t *parent_team; 23420b57cec5SDimitry Andric kmp_info_t *master_th; 23430b57cec5SDimitry Andric kmp_root_t *root; 23440b57cec5SDimitry Andric int master_active; 23450b57cec5SDimitry Andric 23460b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid)); 23470b57cec5SDimitry Andric 23480b57cec5SDimitry Andric /* setup current data */ 23490b57cec5SDimitry Andric master_th = __kmp_threads[gtid]; 23500b57cec5SDimitry Andric root = master_th->th.th_root; 23510b57cec5SDimitry Andric team = master_th->th.th_team; 23520b57cec5SDimitry Andric parent_team = team->t.t_parent; 23530b57cec5SDimitry Andric 23540b57cec5SDimitry Andric master_th->th.th_ident = loc; 23550b57cec5SDimitry Andric 23560b57cec5SDimitry Andric #if OMPT_SUPPORT 2357489b1cf2SDimitry Andric void *team_microtask = (void *)team->t.t_pkfn; 2358e8d8bef9SDimitry Andric // For GOMP interface with serialized parallel, need the 2359e8d8bef9SDimitry Andric // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task 2360e8d8bef9SDimitry Andric // and end-parallel events. 2361e8d8bef9SDimitry Andric if (ompt_enabled.enabled && 2362e8d8bef9SDimitry Andric !(team->t.t_serialized && fork_context == fork_context_gnu)) { 23630b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 23640b57cec5SDimitry Andric } 23650b57cec5SDimitry Andric #endif 23660b57cec5SDimitry Andric 23670b57cec5SDimitry Andric #if KMP_DEBUG 23680b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { 23690b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, " 23700b57cec5SDimitry Andric "th_task_team = %p\n", 23710b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), team, 23720b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], 23730b57cec5SDimitry Andric master_th->th.th_task_team)); 23740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 23750b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state]); 23760b57cec5SDimitry Andric } 23770b57cec5SDimitry Andric #endif 23780b57cec5SDimitry Andric 23790b57cec5SDimitry Andric if (team->t.t_serialized) { 23800b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { 23810b57cec5SDimitry Andric // We are in teams construct 23820b57cec5SDimitry Andric int level = team->t.t_level; 23830b57cec5SDimitry Andric int tlevel = master_th->th.th_teams_level; 23840b57cec5SDimitry Andric if (level == tlevel) { 23850b57cec5SDimitry Andric // AC: we haven't incremented it earlier at start of teams construct, 23860b57cec5SDimitry Andric // so do it here - at the end of teams construct 23870b57cec5SDimitry Andric team->t.t_level++; 23880b57cec5SDimitry Andric } else if (level == tlevel + 1) { 23890b57cec5SDimitry Andric // AC: we are exiting parallel inside teams, need to increment 23900b57cec5SDimitry Andric // serialization in order to restore it in the next call to 23910b57cec5SDimitry Andric // __kmpc_end_serialized_parallel 23920b57cec5SDimitry Andric team->t.t_serialized++; 23930b57cec5SDimitry Andric } 23940b57cec5SDimitry Andric } 23950b57cec5SDimitry Andric __kmpc_end_serialized_parallel(loc, gtid); 23960b57cec5SDimitry Andric 23970b57cec5SDimitry Andric #if OMPT_SUPPORT 23980b57cec5SDimitry Andric if (ompt_enabled.enabled) { 23990b57cec5SDimitry Andric __kmp_join_restore_state(master_th, parent_team); 24000b57cec5SDimitry Andric } 24010b57cec5SDimitry Andric #endif 24020b57cec5SDimitry Andric 24030b57cec5SDimitry Andric return; 24040b57cec5SDimitry Andric } 24050b57cec5SDimitry Andric 24060b57cec5SDimitry Andric master_active = team->t.t_master_active; 24070b57cec5SDimitry Andric 24080b57cec5SDimitry Andric if (!exit_teams) { 24090b57cec5SDimitry Andric // AC: No barrier for internal teams at exit from teams construct. 24100b57cec5SDimitry Andric // But there is barrier for external team (league). 24110b57cec5SDimitry Andric __kmp_internal_join(loc, gtid, team); 2412fe6060f1SDimitry Andric #if USE_ITT_BUILD 2413fe6060f1SDimitry Andric if (__itt_stack_caller_create_ptr) { 2414fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL); 2415fe6060f1SDimitry Andric // destroy the stack stitching id after join barrier 2416fe6060f1SDimitry Andric __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id); 2417fe6060f1SDimitry Andric team->t.t_stack_id = NULL; 2418fe6060f1SDimitry Andric } 2419fe6060f1SDimitry Andric #endif 24200b57cec5SDimitry Andric } else { 24210b57cec5SDimitry Andric master_th->th.th_task_state = 24220b57cec5SDimitry Andric 0; // AC: no tasking in teams (out of any parallel) 2423fe6060f1SDimitry Andric #if USE_ITT_BUILD 2424fe6060f1SDimitry Andric if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) { 2425fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL); 2426fe6060f1SDimitry Andric // destroy the stack stitching id on exit from the teams construct 2427fe6060f1SDimitry Andric // if parent_team is active, then the id will be destroyed later on 2428fe6060f1SDimitry Andric // by master of the league of teams 2429fe6060f1SDimitry Andric __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id); 2430fe6060f1SDimitry Andric parent_team->t.t_stack_id = NULL; 2431fe6060f1SDimitry Andric } 2432fe6060f1SDimitry Andric #endif 2433349cc55cSDimitry Andric 2434349cc55cSDimitry Andric if (team->t.t_nproc > 1 && 2435349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2436349cc55cSDimitry Andric team->t.b->update_num_threads(team->t.t_nproc); 2437349cc55cSDimitry Andric __kmp_add_threads_to_team(team, team->t.t_nproc); 2438349cc55cSDimitry Andric } 24390b57cec5SDimitry Andric } 24400b57cec5SDimitry Andric 24410b57cec5SDimitry Andric KMP_MB(); 24420b57cec5SDimitry Andric 24430b57cec5SDimitry Andric #if OMPT_SUPPORT 24440b57cec5SDimitry Andric ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); 24450b57cec5SDimitry Andric void *codeptr = team->t.ompt_team_info.master_return_address; 24460b57cec5SDimitry Andric #endif 24470b57cec5SDimitry Andric 24480b57cec5SDimitry Andric #if USE_ITT_BUILD 24490b57cec5SDimitry Andric // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer. 24500b57cec5SDimitry Andric if (team->t.t_active_level == 1 && 2451e8d8bef9SDimitry Andric (!master_th->th.th_teams_microtask || /* not in teams construct */ 2452e8d8bef9SDimitry Andric master_th->th.th_teams_size.nteams == 1)) { 24530b57cec5SDimitry Andric master_th->th.th_ident = loc; 24540b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not 24550b57cec5SDimitry Andric // both) 24560b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 24570b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 3) 24580b57cec5SDimitry Andric __kmp_itt_frame_submit(gtid, team->t.t_region_time, 24590b57cec5SDimitry Andric master_th->th.th_frame_time, 0, loc, 24600b57cec5SDimitry Andric master_th->th.th_team_nproc, 1); 24610b57cec5SDimitry Andric else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) && 24620b57cec5SDimitry Andric !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames) 24630b57cec5SDimitry Andric __kmp_itt_region_joined(gtid); 24640b57cec5SDimitry Andric } // active_level == 1 24650b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 24660b57cec5SDimitry Andric 2467349cc55cSDimitry Andric #if KMP_AFFINITY_SUPPORTED 2468349cc55cSDimitry Andric if (!exit_teams) { 2469349cc55cSDimitry Andric // Restore master thread's partition. 2470349cc55cSDimitry Andric master_th->th.th_first_place = team->t.t_first_place; 2471349cc55cSDimitry Andric master_th->th.th_last_place = team->t.t_last_place; 2472349cc55cSDimitry Andric } 2473349cc55cSDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 2474349cc55cSDimitry Andric 24750b57cec5SDimitry Andric if (master_th->th.th_teams_microtask && !exit_teams && 24760b57cec5SDimitry Andric team->t.t_pkfn != (microtask_t)__kmp_teams_master && 24770b57cec5SDimitry Andric team->t.t_level == master_th->th.th_teams_level + 1) { 24780b57cec5SDimitry Andric // AC: We need to leave the team structure intact at the end of parallel 24790b57cec5SDimitry Andric // inside the teams construct, so that at the next parallel same (hot) team 24800b57cec5SDimitry Andric // works, only adjust nesting levels 2481489b1cf2SDimitry Andric #if OMPT_SUPPORT 2482489b1cf2SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 2483489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 2484489b1cf2SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 2485489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2486489b1cf2SDimitry Andric int ompt_team_size = team->t.t_nproc; 2487489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 2488489b1cf2SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2489489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 2490489b1cf2SDimitry Andric } 2491489b1cf2SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 2492489b1cf2SDimitry Andric task_info->task_data = ompt_data_none; 2493489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 2494489b1cf2SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 2495489b1cf2SDimitry Andric } 2496489b1cf2SDimitry Andric #endif 24970b57cec5SDimitry Andric /* Decrement our nested depth level */ 24980b57cec5SDimitry Andric team->t.t_level--; 24990b57cec5SDimitry Andric team->t.t_active_level--; 25000b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 25010b57cec5SDimitry Andric 25020b57cec5SDimitry Andric // Restore number of threads in the team if needed. This code relies on 25030b57cec5SDimitry Andric // the proper adjustment of th_teams_size.nth after the fork in 2504fe6060f1SDimitry Andric // __kmp_teams_master on each teams primary thread in the case that 25050b57cec5SDimitry Andric // __kmp_reserve_threads reduced it. 25060b57cec5SDimitry Andric if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) { 25070b57cec5SDimitry Andric int old_num = master_th->th.th_team_nproc; 25080b57cec5SDimitry Andric int new_num = master_th->th.th_teams_size.nth; 25090b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 25100b57cec5SDimitry Andric team->t.t_nproc = new_num; 25110b57cec5SDimitry Andric for (int i = 0; i < old_num; ++i) { 25120b57cec5SDimitry Andric other_threads[i]->th.th_team_nproc = new_num; 25130b57cec5SDimitry Andric } 25140b57cec5SDimitry Andric // Adjust states of non-used threads of the team 25150b57cec5SDimitry Andric for (int i = old_num; i < new_num; ++i) { 25160b57cec5SDimitry Andric // Re-initialize thread's barrier data. 25170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(other_threads[i]); 25180b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[i]->th.th_bar; 25190b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 25200b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 25210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 25220b57cec5SDimitry Andric #if USE_DEBUGGER 25230b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 25240b57cec5SDimitry Andric #endif 25250b57cec5SDimitry Andric } 25260b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 25270b57cec5SDimitry Andric // Synchronize thread's task state 25280b57cec5SDimitry Andric other_threads[i]->th.th_task_state = master_th->th.th_task_state; 25290b57cec5SDimitry Andric } 25300b57cec5SDimitry Andric } 25310b57cec5SDimitry Andric } 25320b57cec5SDimitry Andric 25330b57cec5SDimitry Andric #if OMPT_SUPPORT 25340b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2535489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data, 2536489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr); 25370b57cec5SDimitry Andric } 25380b57cec5SDimitry Andric #endif 25390b57cec5SDimitry Andric 25400b57cec5SDimitry Andric return; 25410b57cec5SDimitry Andric } 25420b57cec5SDimitry Andric 25430b57cec5SDimitry Andric /* do cleanup and restore the parent team */ 25440b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = team->t.t_master_tid; 25450b57cec5SDimitry Andric master_th->th.th_local.this_construct = team->t.t_master_this_cons; 25460b57cec5SDimitry Andric 25470b57cec5SDimitry Andric master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid]; 25480b57cec5SDimitry Andric 25490b57cec5SDimitry Andric /* jc: The following lock has instructions with REL and ACQ semantics, 25500b57cec5SDimitry Andric separating the parallel user code called in this parallel region 25510b57cec5SDimitry Andric from the serial user code called after this function returns. */ 25520b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 25530b57cec5SDimitry Andric 25540b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || 25550b57cec5SDimitry Andric team->t.t_level > master_th->th.th_teams_level) { 25560b57cec5SDimitry Andric /* Decrement our nested depth level */ 25570b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 25580b57cec5SDimitry Andric } 25590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); 25600b57cec5SDimitry Andric 25610b57cec5SDimitry Andric #if OMPT_SUPPORT 25620b57cec5SDimitry Andric if (ompt_enabled.enabled) { 25630b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 25640b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2565489b1cf2SDimitry Andric int flags = (team_microtask == (void *)__kmp_teams_master) 2566489b1cf2SDimitry Andric ? ompt_task_initial 2567489b1cf2SDimitry Andric : ompt_task_implicit; 2568489b1cf2SDimitry Andric int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc; 25690b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 25700b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2571489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, flags); 25720b57cec5SDimitry Andric } 25730b57cec5SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 25740b57cec5SDimitry Andric task_info->task_data = ompt_data_none; 25750b57cec5SDimitry Andric } 25760b57cec5SDimitry Andric #endif 25770b57cec5SDimitry Andric 25780b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0, 25790b57cec5SDimitry Andric master_th, team)); 25800b57cec5SDimitry Andric __kmp_pop_current_task_from_thread(master_th); 25810b57cec5SDimitry Andric 25820b57cec5SDimitry Andric master_th->th.th_def_allocator = team->t.t_def_allocator; 25830b57cec5SDimitry Andric 2584fe6060f1SDimitry Andric #if OMPD_SUPPORT 2585fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 2586fe6060f1SDimitry Andric ompd_bp_parallel_end(); 2587fe6060f1SDimitry Andric #endif 25880b57cec5SDimitry Andric updateHWFPControl(team); 25890b57cec5SDimitry Andric 25900b57cec5SDimitry Andric if (root->r.r_active != master_active) 25910b57cec5SDimitry Andric root->r.r_active = master_active; 25920b57cec5SDimitry Andric 25930b57cec5SDimitry Andric __kmp_free_team(root, team USE_NESTED_HOT_ARG( 25940b57cec5SDimitry Andric master_th)); // this will free worker threads 25950b57cec5SDimitry Andric 25960b57cec5SDimitry Andric /* this race was fun to find. make sure the following is in the critical 25970b57cec5SDimitry Andric region otherwise assertions may fail occasionally since the old team may be 25980b57cec5SDimitry Andric reallocated and the hierarchy appears inconsistent. it is actually safe to 25990b57cec5SDimitry Andric run and won't cause any bugs, but will cause those assertion failures. it's 26000b57cec5SDimitry Andric only one deref&assign so might as well put this in the critical region */ 26010b57cec5SDimitry Andric master_th->th.th_team = parent_team; 26020b57cec5SDimitry Andric master_th->th.th_team_nproc = parent_team->t.t_nproc; 26030b57cec5SDimitry Andric master_th->th.th_team_master = parent_team->t.t_threads[0]; 26040b57cec5SDimitry Andric master_th->th.th_team_serialized = parent_team->t.t_serialized; 26050b57cec5SDimitry Andric 26060b57cec5SDimitry Andric /* restore serialized team, if need be */ 26070b57cec5SDimitry Andric if (parent_team->t.t_serialized && 26080b57cec5SDimitry Andric parent_team != master_th->th.th_serial_team && 26090b57cec5SDimitry Andric parent_team != root->r.r_root_team) { 26100b57cec5SDimitry Andric __kmp_free_team(root, 26110b57cec5SDimitry Andric master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); 26120b57cec5SDimitry Andric master_th->th.th_serial_team = parent_team; 26130b57cec5SDimitry Andric } 26140b57cec5SDimitry Andric 26150b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 26160b57cec5SDimitry Andric if (master_th->th.th_task_state_top > 26170b57cec5SDimitry Andric 0) { // Restore task state from memo stack 26180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2619fe6060f1SDimitry Andric // Remember primary thread's state if we re-use this nested hot team 26200b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = 26210b57cec5SDimitry Andric master_th->th.th_task_state; 26220b57cec5SDimitry Andric --master_th->th.th_task_state_top; // pop 26230b57cec5SDimitry Andric // Now restore state at this level 26240b57cec5SDimitry Andric master_th->th.th_task_state = 26250b57cec5SDimitry Andric master_th->th 26260b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 26270b57cec5SDimitry Andric } 2628fe6060f1SDimitry Andric // Copy the task team from the parent team to the primary thread 26290b57cec5SDimitry Andric master_th->th.th_task_team = 26300b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]; 26310b57cec5SDimitry Andric KA_TRACE(20, 2632fe6060f1SDimitry Andric ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n", 26330b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), master_th->th.th_task_team, 26340b57cec5SDimitry Andric parent_team)); 26350b57cec5SDimitry Andric } 26360b57cec5SDimitry Andric 26370b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 26380b57cec5SDimitry Andric // executing 26390b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 26400b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 1; 26410b57cec5SDimitry Andric 26420b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 26430b57cec5SDimitry Andric 26440b57cec5SDimitry Andric #if OMPT_SUPPORT 2645489b1cf2SDimitry Andric int flags = 2646489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | 2647489b1cf2SDimitry Andric ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league 2648489b1cf2SDimitry Andric : ompt_parallel_team); 26490b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2650489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags, 26510b57cec5SDimitry Andric codeptr); 26520b57cec5SDimitry Andric } 26530b57cec5SDimitry Andric #endif 26540b57cec5SDimitry Andric 26550b57cec5SDimitry Andric KMP_MB(); 26560b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid)); 26570b57cec5SDimitry Andric } 26580b57cec5SDimitry Andric 26590b57cec5SDimitry Andric /* Check whether we should push an internal control record onto the 26600b57cec5SDimitry Andric serial team stack. If so, do it. */ 26610b57cec5SDimitry Andric void __kmp_save_internal_controls(kmp_info_t *thread) { 26620b57cec5SDimitry Andric 26630b57cec5SDimitry Andric if (thread->th.th_team != thread->th.th_serial_team) { 26640b57cec5SDimitry Andric return; 26650b57cec5SDimitry Andric } 26660b57cec5SDimitry Andric if (thread->th.th_team->t.t_serialized > 1) { 26670b57cec5SDimitry Andric int push = 0; 26680b57cec5SDimitry Andric 26690b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top == NULL) { 26700b57cec5SDimitry Andric push = 1; 26710b57cec5SDimitry Andric } else { 26720b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 26730b57cec5SDimitry Andric thread->th.th_team->t.t_serialized) { 26740b57cec5SDimitry Andric push = 1; 26750b57cec5SDimitry Andric } 26760b57cec5SDimitry Andric } 26770b57cec5SDimitry Andric if (push) { /* push a record on the serial team's stack */ 26780b57cec5SDimitry Andric kmp_internal_control_t *control = 26790b57cec5SDimitry Andric (kmp_internal_control_t *)__kmp_allocate( 26800b57cec5SDimitry Andric sizeof(kmp_internal_control_t)); 26810b57cec5SDimitry Andric 26820b57cec5SDimitry Andric copy_icvs(control, &thread->th.th_current_task->td_icvs); 26830b57cec5SDimitry Andric 26840b57cec5SDimitry Andric control->serial_nesting_level = thread->th.th_team->t.t_serialized; 26850b57cec5SDimitry Andric 26860b57cec5SDimitry Andric control->next = thread->th.th_team->t.t_control_stack_top; 26870b57cec5SDimitry Andric thread->th.th_team->t.t_control_stack_top = control; 26880b57cec5SDimitry Andric } 26890b57cec5SDimitry Andric } 26900b57cec5SDimitry Andric } 26910b57cec5SDimitry Andric 26920b57cec5SDimitry Andric /* Changes set_nproc */ 26930b57cec5SDimitry Andric void __kmp_set_num_threads(int new_nth, int gtid) { 26940b57cec5SDimitry Andric kmp_info_t *thread; 26950b57cec5SDimitry Andric kmp_root_t *root; 26960b57cec5SDimitry Andric 26970b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth)); 26980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 26990b57cec5SDimitry Andric 27000b57cec5SDimitry Andric if (new_nth < 1) 27010b57cec5SDimitry Andric new_nth = 1; 27020b57cec5SDimitry Andric else if (new_nth > __kmp_max_nth) 27030b57cec5SDimitry Andric new_nth = __kmp_max_nth; 27040b57cec5SDimitry Andric 27050b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 27060b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 27070b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc == new_nth) 27080b57cec5SDimitry Andric return; // nothing to do 27090b57cec5SDimitry Andric 27100b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 27110b57cec5SDimitry Andric 27120b57cec5SDimitry Andric set__nproc(thread, new_nth); 27130b57cec5SDimitry Andric 27140b57cec5SDimitry Andric // If this omp_set_num_threads() call will cause the hot team size to be 27150b57cec5SDimitry Andric // reduced (in the absence of a num_threads clause), then reduce it now, 27160b57cec5SDimitry Andric // rather than waiting for the next parallel region. 27170b57cec5SDimitry Andric root = thread->th.th_root; 27180b57cec5SDimitry Andric if (__kmp_init_parallel && (!root->r.r_active) && 27190b57cec5SDimitry Andric (root->r.r_hot_team->t.t_nproc > new_nth) 27200b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 27210b57cec5SDimitry Andric && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 27220b57cec5SDimitry Andric #endif 27230b57cec5SDimitry Andric ) { 27240b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 27250b57cec5SDimitry Andric int f; 27260b57cec5SDimitry Andric 27270b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 27280b57cec5SDimitry Andric 2729349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2730349cc55cSDimitry Andric __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth); 2731349cc55cSDimitry Andric } 27320b57cec5SDimitry Andric // Release the extra threads we don't need any more. 27330b57cec5SDimitry Andric for (f = new_nth; f < hot_team->t.t_nproc; f++) { 27340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 27350b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 27360b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should unref 27370b57cec5SDimitry Andric // task team. 27380b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_task_team = NULL; 27390b57cec5SDimitry Andric } 27400b57cec5SDimitry Andric __kmp_free_thread(hot_team->t.t_threads[f]); 27410b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 27420b57cec5SDimitry Andric } 27430b57cec5SDimitry Andric hot_team->t.t_nproc = new_nth; 27440b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 27450b57cec5SDimitry Andric if (thread->th.th_hot_teams) { 27460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); 27470b57cec5SDimitry Andric thread->th.th_hot_teams[0].hot_team_nth = new_nth; 27480b57cec5SDimitry Andric } 27490b57cec5SDimitry Andric #endif 27500b57cec5SDimitry Andric 2751349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2752349cc55cSDimitry Andric hot_team->t.b->update_num_threads(new_nth); 2753349cc55cSDimitry Andric __kmp_add_threads_to_team(hot_team, new_nth); 2754349cc55cSDimitry Andric } 2755349cc55cSDimitry Andric 27560b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 27570b57cec5SDimitry Andric 27580b57cec5SDimitry Andric // Update the t_nproc field in the threads that are still active. 27590b57cec5SDimitry Andric for (f = 0; f < new_nth; f++) { 27600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 27610b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 27620b57cec5SDimitry Andric } 27630b57cec5SDimitry Andric // Special flag in case omp_set_num_threads() call 27640b57cec5SDimitry Andric hot_team->t.t_size_changed = -1; 27650b57cec5SDimitry Andric } 27660b57cec5SDimitry Andric } 27670b57cec5SDimitry Andric 27680b57cec5SDimitry Andric /* Changes max_active_levels */ 27690b57cec5SDimitry Andric void __kmp_set_max_active_levels(int gtid, int max_active_levels) { 27700b57cec5SDimitry Andric kmp_info_t *thread; 27710b57cec5SDimitry Andric 27720b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread " 27730b57cec5SDimitry Andric "%d = (%d)\n", 27740b57cec5SDimitry Andric gtid, max_active_levels)); 27750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 27760b57cec5SDimitry Andric 27770b57cec5SDimitry Andric // validate max_active_levels 27780b57cec5SDimitry Andric if (max_active_levels < 0) { 27790b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsNegative, max_active_levels); 27800b57cec5SDimitry Andric // We ignore this call if the user has specified a negative value. 27810b57cec5SDimitry Andric // The current setting won't be changed. The last valid setting will be 27820b57cec5SDimitry Andric // used. A warning will be issued (if warnings are allowed as controlled by 27830b57cec5SDimitry Andric // the KMP_WARNINGS env var). 27840b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new " 27850b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 27860b57cec5SDimitry Andric gtid, max_active_levels)); 27870b57cec5SDimitry Andric return; 27880b57cec5SDimitry Andric } 27890b57cec5SDimitry Andric if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) { 27900b57cec5SDimitry Andric // it's OK, the max_active_levels is within the valid range: [ 0; 27910b57cec5SDimitry Andric // KMP_MAX_ACTIVE_LEVELS_LIMIT ] 27920b57cec5SDimitry Andric // We allow a zero value. (implementation defined behavior) 27930b57cec5SDimitry Andric } else { 27940b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels, 27950b57cec5SDimitry Andric KMP_MAX_ACTIVE_LEVELS_LIMIT); 27960b57cec5SDimitry Andric max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 27970b57cec5SDimitry Andric // Current upper limit is MAX_INT. (implementation defined behavior) 27980b57cec5SDimitry Andric // If the input exceeds the upper limit, we correct the input to be the 27990b57cec5SDimitry Andric // upper limit. (implementation defined behavior) 28000b57cec5SDimitry Andric // Actually, the flow should never get here until we use MAX_INT limit. 28010b57cec5SDimitry Andric } 28020b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new " 28030b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 28040b57cec5SDimitry Andric gtid, max_active_levels)); 28050b57cec5SDimitry Andric 28060b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28070b57cec5SDimitry Andric 28080b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 28090b57cec5SDimitry Andric 28100b57cec5SDimitry Andric set__max_active_levels(thread, max_active_levels); 28110b57cec5SDimitry Andric } 28120b57cec5SDimitry Andric 28130b57cec5SDimitry Andric /* Gets max_active_levels */ 28140b57cec5SDimitry Andric int __kmp_get_max_active_levels(int gtid) { 28150b57cec5SDimitry Andric kmp_info_t *thread; 28160b57cec5SDimitry Andric 28170b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid)); 28180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28190b57cec5SDimitry Andric 28200b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_current_task); 28220b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, " 28230b57cec5SDimitry Andric "curtask_maxaclevel=%d\n", 28240b57cec5SDimitry Andric gtid, thread->th.th_current_task, 28250b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.max_active_levels)); 28260b57cec5SDimitry Andric return thread->th.th_current_task->td_icvs.max_active_levels; 28270b57cec5SDimitry Andric } 28280b57cec5SDimitry Andric 2829fe6060f1SDimitry Andric // nteams-var per-device ICV 2830fe6060f1SDimitry Andric void __kmp_set_num_teams(int num_teams) { 2831fe6060f1SDimitry Andric if (num_teams > 0) 2832fe6060f1SDimitry Andric __kmp_nteams = num_teams; 2833fe6060f1SDimitry Andric } 2834fe6060f1SDimitry Andric int __kmp_get_max_teams(void) { return __kmp_nteams; } 2835fe6060f1SDimitry Andric // teams-thread-limit-var per-device ICV 2836fe6060f1SDimitry Andric void __kmp_set_teams_thread_limit(int limit) { 2837fe6060f1SDimitry Andric if (limit > 0) 2838fe6060f1SDimitry Andric __kmp_teams_thread_limit = limit; 2839fe6060f1SDimitry Andric } 2840fe6060f1SDimitry Andric int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; } 2841fe6060f1SDimitry Andric 28420b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int)); 28430b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int)); 28440b57cec5SDimitry Andric 28450b57cec5SDimitry Andric /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 28460b57cec5SDimitry Andric void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { 28470b57cec5SDimitry Andric kmp_info_t *thread; 28480b57cec5SDimitry Andric kmp_sched_t orig_kind; 28490b57cec5SDimitry Andric // kmp_team_t *team; 28500b57cec5SDimitry Andric 28510b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", 28520b57cec5SDimitry Andric gtid, (int)kind, chunk)); 28530b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28540b57cec5SDimitry Andric 28550b57cec5SDimitry Andric // Check if the kind parameter is valid, correct if needed. 28560b57cec5SDimitry Andric // Valid parameters should fit in one of two intervals - standard or extended: 28570b57cec5SDimitry Andric // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 28580b57cec5SDimitry Andric // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 28590b57cec5SDimitry Andric orig_kind = kind; 28600b57cec5SDimitry Andric kind = __kmp_sched_without_mods(kind); 28610b57cec5SDimitry Andric 28620b57cec5SDimitry Andric if (kind <= kmp_sched_lower || kind >= kmp_sched_upper || 28630b57cec5SDimitry Andric (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) { 28640b57cec5SDimitry Andric // TODO: Hint needs attention in case we change the default schedule. 28650b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind), 28660b57cec5SDimitry Andric KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"), 28670b57cec5SDimitry Andric __kmp_msg_null); 28680b57cec5SDimitry Andric kind = kmp_sched_default; 28690b57cec5SDimitry Andric chunk = 0; // ignore chunk value in case of bad kind 28700b57cec5SDimitry Andric } 28710b57cec5SDimitry Andric 28720b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28730b57cec5SDimitry Andric 28740b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 28750b57cec5SDimitry Andric 28760b57cec5SDimitry Andric if (kind < kmp_sched_upper_std) { 28770b57cec5SDimitry Andric if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) { 28780b57cec5SDimitry Andric // differ static chunked vs. unchunked: chunk should be invalid to 28790b57cec5SDimitry Andric // indicate unchunked schedule (which is the default) 28800b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 28810b57cec5SDimitry Andric } else { 28820b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 28830b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower - 1]; 28840b57cec5SDimitry Andric } 28850b57cec5SDimitry Andric } else { 28860b57cec5SDimitry Andric // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - 28870b57cec5SDimitry Andric // kmp_sched_lower - 2 ]; 28880b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 28890b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std - 28900b57cec5SDimitry Andric kmp_sched_lower - 2]; 28910b57cec5SDimitry Andric } 28920b57cec5SDimitry Andric __kmp_sched_apply_mods_intkind( 28930b57cec5SDimitry Andric orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type)); 28940b57cec5SDimitry Andric if (kind == kmp_sched_auto || chunk < 1) { 28950b57cec5SDimitry Andric // ignore parameter chunk for schedule auto 28960b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 28970b57cec5SDimitry Andric } else { 28980b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = chunk; 28990b57cec5SDimitry Andric } 29000b57cec5SDimitry Andric } 29010b57cec5SDimitry Andric 29020b57cec5SDimitry Andric /* Gets def_sched_var ICV values */ 29030b57cec5SDimitry Andric void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { 29040b57cec5SDimitry Andric kmp_info_t *thread; 29050b57cec5SDimitry Andric enum sched_type th_type; 29060b57cec5SDimitry Andric 29070b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid)); 29080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29090b57cec5SDimitry Andric 29100b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 29110b57cec5SDimitry Andric 29120b57cec5SDimitry Andric th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 29130b57cec5SDimitry Andric switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) { 29140b57cec5SDimitry Andric case kmp_sch_static: 29150b57cec5SDimitry Andric case kmp_sch_static_greedy: 29160b57cec5SDimitry Andric case kmp_sch_static_balanced: 29170b57cec5SDimitry Andric *kind = kmp_sched_static; 29180b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 29190b57cec5SDimitry Andric *chunk = 0; // chunk was not set, try to show this fact via zero value 29200b57cec5SDimitry Andric return; 29210b57cec5SDimitry Andric case kmp_sch_static_chunked: 29220b57cec5SDimitry Andric *kind = kmp_sched_static; 29230b57cec5SDimitry Andric break; 29240b57cec5SDimitry Andric case kmp_sch_dynamic_chunked: 29250b57cec5SDimitry Andric *kind = kmp_sched_dynamic; 29260b57cec5SDimitry Andric break; 29270b57cec5SDimitry Andric case kmp_sch_guided_chunked: 29280b57cec5SDimitry Andric case kmp_sch_guided_iterative_chunked: 29290b57cec5SDimitry Andric case kmp_sch_guided_analytical_chunked: 29300b57cec5SDimitry Andric *kind = kmp_sched_guided; 29310b57cec5SDimitry Andric break; 29320b57cec5SDimitry Andric case kmp_sch_auto: 29330b57cec5SDimitry Andric *kind = kmp_sched_auto; 29340b57cec5SDimitry Andric break; 29350b57cec5SDimitry Andric case kmp_sch_trapezoidal: 29360b57cec5SDimitry Andric *kind = kmp_sched_trapezoidal; 29370b57cec5SDimitry Andric break; 29380b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 29390b57cec5SDimitry Andric case kmp_sch_static_steal: 29400b57cec5SDimitry Andric *kind = kmp_sched_static_steal; 29410b57cec5SDimitry Andric break; 29420b57cec5SDimitry Andric #endif 29430b57cec5SDimitry Andric default: 29440b57cec5SDimitry Andric KMP_FATAL(UnknownSchedulingType, th_type); 29450b57cec5SDimitry Andric } 29460b57cec5SDimitry Andric 29470b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 29480b57cec5SDimitry Andric *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 29490b57cec5SDimitry Andric } 29500b57cec5SDimitry Andric 29510b57cec5SDimitry Andric int __kmp_get_ancestor_thread_num(int gtid, int level) { 29520b57cec5SDimitry Andric 29530b57cec5SDimitry Andric int ii, dd; 29540b57cec5SDimitry Andric kmp_team_t *team; 29550b57cec5SDimitry Andric kmp_info_t *thr; 29560b57cec5SDimitry Andric 29570b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level)); 29580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29590b57cec5SDimitry Andric 29600b57cec5SDimitry Andric // validate level 29610b57cec5SDimitry Andric if (level == 0) 29620b57cec5SDimitry Andric return 0; 29630b57cec5SDimitry Andric if (level < 0) 29640b57cec5SDimitry Andric return -1; 29650b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 29660b57cec5SDimitry Andric team = thr->th.th_team; 29670b57cec5SDimitry Andric ii = team->t.t_level; 29680b57cec5SDimitry Andric if (level > ii) 29690b57cec5SDimitry Andric return -1; 29700b57cec5SDimitry Andric 29710b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 29720b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 29730b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 29740b57cec5SDimitry Andric if (level <= 29750b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 29760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 29770b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 29780b57cec5SDimitry Andric // increase ii 29790b57cec5SDimitry Andric if (ii == tlevel) { 29800b57cec5SDimitry Andric ii += 2; // three teams have same level 29810b57cec5SDimitry Andric } else { 29820b57cec5SDimitry Andric ii++; // two teams have same level 29830b57cec5SDimitry Andric } 29840b57cec5SDimitry Andric } 29850b57cec5SDimitry Andric } 29860b57cec5SDimitry Andric 29870b57cec5SDimitry Andric if (ii == level) 29880b57cec5SDimitry Andric return __kmp_tid_from_gtid(gtid); 29890b57cec5SDimitry Andric 29900b57cec5SDimitry Andric dd = team->t.t_serialized; 29910b57cec5SDimitry Andric level++; 29920b57cec5SDimitry Andric while (ii > level) { 29930b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 29940b57cec5SDimitry Andric } 29950b57cec5SDimitry Andric if ((team->t.t_serialized) && (!dd)) { 29960b57cec5SDimitry Andric team = team->t.t_parent; 29970b57cec5SDimitry Andric continue; 29980b57cec5SDimitry Andric } 29990b57cec5SDimitry Andric if (ii > level) { 30000b57cec5SDimitry Andric team = team->t.t_parent; 30010b57cec5SDimitry Andric dd = team->t.t_serialized; 30020b57cec5SDimitry Andric ii--; 30030b57cec5SDimitry Andric } 30040b57cec5SDimitry Andric } 30050b57cec5SDimitry Andric 30060b57cec5SDimitry Andric return (dd > 1) ? (0) : (team->t.t_master_tid); 30070b57cec5SDimitry Andric } 30080b57cec5SDimitry Andric 30090b57cec5SDimitry Andric int __kmp_get_team_size(int gtid, int level) { 30100b57cec5SDimitry Andric 30110b57cec5SDimitry Andric int ii, dd; 30120b57cec5SDimitry Andric kmp_team_t *team; 30130b57cec5SDimitry Andric kmp_info_t *thr; 30140b57cec5SDimitry Andric 30150b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level)); 30160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 30170b57cec5SDimitry Andric 30180b57cec5SDimitry Andric // validate level 30190b57cec5SDimitry Andric if (level == 0) 30200b57cec5SDimitry Andric return 1; 30210b57cec5SDimitry Andric if (level < 0) 30220b57cec5SDimitry Andric return -1; 30230b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 30240b57cec5SDimitry Andric team = thr->th.th_team; 30250b57cec5SDimitry Andric ii = team->t.t_level; 30260b57cec5SDimitry Andric if (level > ii) 30270b57cec5SDimitry Andric return -1; 30280b57cec5SDimitry Andric 30290b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 30300b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 30310b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 30320b57cec5SDimitry Andric if (level <= 30330b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 30340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 30350b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 30360b57cec5SDimitry Andric // increase ii 30370b57cec5SDimitry Andric if (ii == tlevel) { 30380b57cec5SDimitry Andric ii += 2; // three teams have same level 30390b57cec5SDimitry Andric } else { 30400b57cec5SDimitry Andric ii++; // two teams have same level 30410b57cec5SDimitry Andric } 30420b57cec5SDimitry Andric } 30430b57cec5SDimitry Andric } 30440b57cec5SDimitry Andric 30450b57cec5SDimitry Andric while (ii > level) { 30460b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 30470b57cec5SDimitry Andric } 30480b57cec5SDimitry Andric if (team->t.t_serialized && (!dd)) { 30490b57cec5SDimitry Andric team = team->t.t_parent; 30500b57cec5SDimitry Andric continue; 30510b57cec5SDimitry Andric } 30520b57cec5SDimitry Andric if (ii > level) { 30530b57cec5SDimitry Andric team = team->t.t_parent; 30540b57cec5SDimitry Andric ii--; 30550b57cec5SDimitry Andric } 30560b57cec5SDimitry Andric } 30570b57cec5SDimitry Andric 30580b57cec5SDimitry Andric return team->t.t_nproc; 30590b57cec5SDimitry Andric } 30600b57cec5SDimitry Andric 30610b57cec5SDimitry Andric kmp_r_sched_t __kmp_get_schedule_global() { 30620b57cec5SDimitry Andric // This routine created because pairs (__kmp_sched, __kmp_chunk) and 30630b57cec5SDimitry Andric // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults 30640b57cec5SDimitry Andric // independently. So one can get the updated schedule here. 30650b57cec5SDimitry Andric 30660b57cec5SDimitry Andric kmp_r_sched_t r_sched; 30670b57cec5SDimitry Andric 30680b57cec5SDimitry Andric // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, 30690b57cec5SDimitry Andric // __kmp_guided. __kmp_sched should keep original value, so that user can set 30700b57cec5SDimitry Andric // KMP_SCHEDULE multiple times, and thus have different run-time schedules in 30710b57cec5SDimitry Andric // different roots (even in OMP 2.5) 30720b57cec5SDimitry Andric enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched); 30730b57cec5SDimitry Andric enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched); 30740b57cec5SDimitry Andric if (s == kmp_sch_static) { 30750b57cec5SDimitry Andric // replace STATIC with more detailed schedule (balanced or greedy) 30760b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_static; 30770b57cec5SDimitry Andric } else if (s == kmp_sch_guided_chunked) { 30780b57cec5SDimitry Andric // replace GUIDED with more detailed schedule (iterative or analytical) 30790b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_guided; 30800b57cec5SDimitry Andric } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 30810b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_sched; 30820b57cec5SDimitry Andric } 30830b57cec5SDimitry Andric SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers); 30840b57cec5SDimitry Andric 30850b57cec5SDimitry Andric if (__kmp_chunk < KMP_DEFAULT_CHUNK) { 30860b57cec5SDimitry Andric // __kmp_chunk may be wrong here (if it was not ever set) 30870b57cec5SDimitry Andric r_sched.chunk = KMP_DEFAULT_CHUNK; 30880b57cec5SDimitry Andric } else { 30890b57cec5SDimitry Andric r_sched.chunk = __kmp_chunk; 30900b57cec5SDimitry Andric } 30910b57cec5SDimitry Andric 30920b57cec5SDimitry Andric return r_sched; 30930b57cec5SDimitry Andric } 30940b57cec5SDimitry Andric 30950b57cec5SDimitry Andric /* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 30960b57cec5SDimitry Andric at least argc number of *t_argv entries for the requested team. */ 30970b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { 30980b57cec5SDimitry Andric 30990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 31000b57cec5SDimitry Andric if (!realloc || argc > team->t.t_max_argc) { 31010b57cec5SDimitry Andric 31020b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, " 31030b57cec5SDimitry Andric "current entries=%d\n", 31040b57cec5SDimitry Andric team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0)); 31050b57cec5SDimitry Andric /* if previously allocated heap space for args, free them */ 31060b57cec5SDimitry Andric if (realloc && team->t.t_argv != &team->t.t_inline_argv[0]) 31070b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 31080b57cec5SDimitry Andric 31090b57cec5SDimitry Andric if (argc <= KMP_INLINE_ARGV_ENTRIES) { 31100b57cec5SDimitry Andric /* use unused space in the cache line for arguments */ 31110b57cec5SDimitry Andric team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 31120b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d " 31130b57cec5SDimitry Andric "argv entries\n", 31140b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 31150b57cec5SDimitry Andric team->t.t_argv = &team->t.t_inline_argv[0]; 31160b57cec5SDimitry Andric if (__kmp_storage_map) { 31170b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 31180b57cec5SDimitry Andric -1, &team->t.t_inline_argv[0], 31190b57cec5SDimitry Andric &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 31200b57cec5SDimitry Andric (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv", 31210b57cec5SDimitry Andric team->t.t_id); 31220b57cec5SDimitry Andric } 31230b57cec5SDimitry Andric } else { 31240b57cec5SDimitry Andric /* allocate space for arguments in the heap */ 31250b57cec5SDimitry Andric team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1)) 31260b57cec5SDimitry Andric ? KMP_MIN_MALLOC_ARGV_ENTRIES 31270b57cec5SDimitry Andric : 2 * argc; 31280b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 31290b57cec5SDimitry Andric "argv entries\n", 31300b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 31310b57cec5SDimitry Andric team->t.t_argv = 31320b57cec5SDimitry Andric (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc); 31330b57cec5SDimitry Andric if (__kmp_storage_map) { 31340b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0], 31350b57cec5SDimitry Andric &team->t.t_argv[team->t.t_max_argc], 31360b57cec5SDimitry Andric sizeof(void *) * team->t.t_max_argc, 31370b57cec5SDimitry Andric "team_%d.t_argv", team->t.t_id); 31380b57cec5SDimitry Andric } 31390b57cec5SDimitry Andric } 31400b57cec5SDimitry Andric } 31410b57cec5SDimitry Andric } 31420b57cec5SDimitry Andric 31430b57cec5SDimitry Andric static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { 31440b57cec5SDimitry Andric int i; 31450b57cec5SDimitry Andric int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; 31460b57cec5SDimitry Andric team->t.t_threads = 31470b57cec5SDimitry Andric (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth); 31480b57cec5SDimitry Andric team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate( 31490b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff); 31500b57cec5SDimitry Andric team->t.t_dispatch = 31510b57cec5SDimitry Andric (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth); 31520b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 31530b57cec5SDimitry Andric (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth); 31540b57cec5SDimitry Andric team->t.t_max_nproc = max_nth; 31550b57cec5SDimitry Andric 31560b57cec5SDimitry Andric /* setup dispatch buffers */ 31570b57cec5SDimitry Andric for (i = 0; i < num_disp_buff; ++i) { 31580b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 31590b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 31600b57cec5SDimitry Andric } 31610b57cec5SDimitry Andric } 31620b57cec5SDimitry Andric 31630b57cec5SDimitry Andric static void __kmp_free_team_arrays(kmp_team_t *team) { 31640b57cec5SDimitry Andric /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 31650b57cec5SDimitry Andric int i; 31660b57cec5SDimitry Andric for (i = 0; i < team->t.t_max_nproc; ++i) { 31670b57cec5SDimitry Andric if (team->t.t_dispatch[i].th_disp_buffer != NULL) { 31680b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch[i].th_disp_buffer); 31690b57cec5SDimitry Andric team->t.t_dispatch[i].th_disp_buffer = NULL; 31700b57cec5SDimitry Andric } 31710b57cec5SDimitry Andric } 31720b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 31730b57cec5SDimitry Andric __kmp_dispatch_free_hierarchies(team); 31740b57cec5SDimitry Andric #endif 31750b57cec5SDimitry Andric __kmp_free(team->t.t_threads); 31760b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 31770b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 31780b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 31790b57cec5SDimitry Andric team->t.t_threads = NULL; 31800b57cec5SDimitry Andric team->t.t_disp_buffer = NULL; 31810b57cec5SDimitry Andric team->t.t_dispatch = NULL; 31820b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 0; 31830b57cec5SDimitry Andric } 31840b57cec5SDimitry Andric 31850b57cec5SDimitry Andric static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 31860b57cec5SDimitry Andric kmp_info_t **oldThreads = team->t.t_threads; 31870b57cec5SDimitry Andric 31880b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 31890b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 31900b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 31910b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nth); 31920b57cec5SDimitry Andric 31930b57cec5SDimitry Andric KMP_MEMCPY(team->t.t_threads, oldThreads, 31940b57cec5SDimitry Andric team->t.t_nproc * sizeof(kmp_info_t *)); 31950b57cec5SDimitry Andric 31960b57cec5SDimitry Andric __kmp_free(oldThreads); 31970b57cec5SDimitry Andric } 31980b57cec5SDimitry Andric 31990b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_global_icvs(void) { 32000b57cec5SDimitry Andric 32010b57cec5SDimitry Andric kmp_r_sched_t r_sched = 32020b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 32030b57cec5SDimitry Andric 32040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0); 32050b57cec5SDimitry Andric 32060b57cec5SDimitry Andric kmp_internal_control_t g_icvs = { 32070b57cec5SDimitry Andric 0, // int serial_nesting_level; //corresponds to value of th_team_serialized 32080b57cec5SDimitry Andric (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic 32090b57cec5SDimitry Andric // adjustment of threads (per thread) 32100b57cec5SDimitry Andric (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for 32110b57cec5SDimitry Andric // whether blocktime is explicitly set 32120b57cec5SDimitry Andric __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime 32130b57cec5SDimitry Andric #if KMP_USE_MONITOR 32140b57cec5SDimitry Andric __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime 32150b57cec5SDimitry Andric // intervals 32160b57cec5SDimitry Andric #endif 32170b57cec5SDimitry Andric __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for 32180b57cec5SDimitry Andric // next parallel region (per thread) 32190b57cec5SDimitry Andric // (use a max ub on value if __kmp_parallel_initialize not called yet) 32200b57cec5SDimitry Andric __kmp_cg_max_nth, // int thread_limit; 32210b57cec5SDimitry Andric __kmp_dflt_max_active_levels, // int max_active_levels; //internal control 32220b57cec5SDimitry Andric // for max_active_levels 32230b57cec5SDimitry Andric r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule 32240b57cec5SDimitry Andric // {sched,chunk} pair 32250b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], 32260b57cec5SDimitry Andric __kmp_default_device, 32270b57cec5SDimitry Andric NULL // struct kmp_internal_control *next; 32280b57cec5SDimitry Andric }; 32290b57cec5SDimitry Andric 32300b57cec5SDimitry Andric return g_icvs; 32310b57cec5SDimitry Andric } 32320b57cec5SDimitry Andric 32330b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { 32340b57cec5SDimitry Andric 32350b57cec5SDimitry Andric kmp_internal_control_t gx_icvs; 32360b57cec5SDimitry Andric gx_icvs.serial_nesting_level = 32370b57cec5SDimitry Andric 0; // probably =team->t.t_serial like in save_inter_controls 32380b57cec5SDimitry Andric copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs); 32390b57cec5SDimitry Andric gx_icvs.next = NULL; 32400b57cec5SDimitry Andric 32410b57cec5SDimitry Andric return gx_icvs; 32420b57cec5SDimitry Andric } 32430b57cec5SDimitry Andric 32440b57cec5SDimitry Andric static void __kmp_initialize_root(kmp_root_t *root) { 32450b57cec5SDimitry Andric int f; 32460b57cec5SDimitry Andric kmp_team_t *root_team; 32470b57cec5SDimitry Andric kmp_team_t *hot_team; 32480b57cec5SDimitry Andric int hot_team_max_nth; 32490b57cec5SDimitry Andric kmp_r_sched_t r_sched = 32500b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 32510b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 32520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 32530b57cec5SDimitry Andric KMP_ASSERT(!root->r.r_begin); 32540b57cec5SDimitry Andric 32550b57cec5SDimitry Andric /* setup the root state structure */ 32560b57cec5SDimitry Andric __kmp_init_lock(&root->r.r_begin_lock); 32570b57cec5SDimitry Andric root->r.r_begin = FALSE; 32580b57cec5SDimitry Andric root->r.r_active = FALSE; 32590b57cec5SDimitry Andric root->r.r_in_parallel = 0; 32600b57cec5SDimitry Andric root->r.r_blocktime = __kmp_dflt_blocktime; 3261fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED 3262fe6060f1SDimitry Andric root->r.r_affinity_assigned = FALSE; 3263fe6060f1SDimitry Andric #endif 32640b57cec5SDimitry Andric 32650b57cec5SDimitry Andric /* setup the root team for this task */ 32660b57cec5SDimitry Andric /* allocate the root team structure */ 32670b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); 32680b57cec5SDimitry Andric 32690b57cec5SDimitry Andric root_team = 32700b57cec5SDimitry Andric __kmp_allocate_team(root, 32710b57cec5SDimitry Andric 1, // new_nproc 32720b57cec5SDimitry Andric 1, // max_nproc 32730b57cec5SDimitry Andric #if OMPT_SUPPORT 32740b57cec5SDimitry Andric ompt_data_none, // root parallel id 32750b57cec5SDimitry Andric #endif 32760b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 32770b57cec5SDimitry Andric 0 // argc 3278fe6060f1SDimitry Andric USE_NESTED_HOT_ARG(NULL) // primary thread is unknown 32790b57cec5SDimitry Andric ); 32800b57cec5SDimitry Andric #if USE_DEBUGGER 32810b57cec5SDimitry Andric // Non-NULL value should be assigned to make the debugger display the root 32820b57cec5SDimitry Andric // team. 32830b57cec5SDimitry Andric TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0)); 32840b57cec5SDimitry Andric #endif 32850b57cec5SDimitry Andric 32860b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team)); 32870b57cec5SDimitry Andric 32880b57cec5SDimitry Andric root->r.r_root_team = root_team; 32890b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 32900b57cec5SDimitry Andric 32910b57cec5SDimitry Andric /* initialize root team */ 32920b57cec5SDimitry Andric root_team->t.t_threads[0] = NULL; 32930b57cec5SDimitry Andric root_team->t.t_nproc = 1; 32940b57cec5SDimitry Andric root_team->t.t_serialized = 1; 32950b57cec5SDimitry Andric // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 32960b57cec5SDimitry Andric root_team->t.t_sched.sched = r_sched.sched; 32970b57cec5SDimitry Andric KA_TRACE( 32980b57cec5SDimitry Andric 20, 32990b57cec5SDimitry Andric ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 33000b57cec5SDimitry Andric root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 33010b57cec5SDimitry Andric 33020b57cec5SDimitry Andric /* setup the hot team for this task */ 33030b57cec5SDimitry Andric /* allocate the hot team structure */ 33040b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); 33050b57cec5SDimitry Andric 33060b57cec5SDimitry Andric hot_team = 33070b57cec5SDimitry Andric __kmp_allocate_team(root, 33080b57cec5SDimitry Andric 1, // new_nproc 33090b57cec5SDimitry Andric __kmp_dflt_team_nth_ub * 2, // max_nproc 33100b57cec5SDimitry Andric #if OMPT_SUPPORT 33110b57cec5SDimitry Andric ompt_data_none, // root parallel id 33120b57cec5SDimitry Andric #endif 33130b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 33140b57cec5SDimitry Andric 0 // argc 3315fe6060f1SDimitry Andric USE_NESTED_HOT_ARG(NULL) // primary thread is unknown 33160b57cec5SDimitry Andric ); 33170b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); 33180b57cec5SDimitry Andric 33190b57cec5SDimitry Andric root->r.r_hot_team = hot_team; 33200b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 33210b57cec5SDimitry Andric 33220b57cec5SDimitry Andric /* first-time initialization */ 33230b57cec5SDimitry Andric hot_team->t.t_parent = root_team; 33240b57cec5SDimitry Andric 33250b57cec5SDimitry Andric /* initialize hot team */ 33260b57cec5SDimitry Andric hot_team_max_nth = hot_team->t.t_max_nproc; 33270b57cec5SDimitry Andric for (f = 0; f < hot_team_max_nth; ++f) { 33280b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 33290b57cec5SDimitry Andric } 33300b57cec5SDimitry Andric hot_team->t.t_nproc = 1; 33310b57cec5SDimitry Andric // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 33320b57cec5SDimitry Andric hot_team->t.t_sched.sched = r_sched.sched; 33330b57cec5SDimitry Andric hot_team->t.t_size_changed = 0; 33340b57cec5SDimitry Andric } 33350b57cec5SDimitry Andric 33360b57cec5SDimitry Andric #ifdef KMP_DEBUG 33370b57cec5SDimitry Andric 33380b57cec5SDimitry Andric typedef struct kmp_team_list_item { 33390b57cec5SDimitry Andric kmp_team_p const *entry; 33400b57cec5SDimitry Andric struct kmp_team_list_item *next; 33410b57cec5SDimitry Andric } kmp_team_list_item_t; 33420b57cec5SDimitry Andric typedef kmp_team_list_item_t *kmp_team_list_t; 33430b57cec5SDimitry Andric 33440b57cec5SDimitry Andric static void __kmp_print_structure_team_accum( // Add team to list of teams. 33450b57cec5SDimitry Andric kmp_team_list_t list, // List of teams. 33460b57cec5SDimitry Andric kmp_team_p const *team // Team to add. 33470b57cec5SDimitry Andric ) { 33480b57cec5SDimitry Andric 33490b57cec5SDimitry Andric // List must terminate with item where both entry and next are NULL. 33500b57cec5SDimitry Andric // Team is added to the list only once. 33510b57cec5SDimitry Andric // List is sorted in ascending order by team id. 33520b57cec5SDimitry Andric // Team id is *not* a key. 33530b57cec5SDimitry Andric 33540b57cec5SDimitry Andric kmp_team_list_t l; 33550b57cec5SDimitry Andric 33560b57cec5SDimitry Andric KMP_DEBUG_ASSERT(list != NULL); 33570b57cec5SDimitry Andric if (team == NULL) { 33580b57cec5SDimitry Andric return; 33590b57cec5SDimitry Andric } 33600b57cec5SDimitry Andric 33610b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_parent); 33620b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_next_pool); 33630b57cec5SDimitry Andric 33640b57cec5SDimitry Andric // Search list for the team. 33650b57cec5SDimitry Andric l = list; 33660b57cec5SDimitry Andric while (l->next != NULL && l->entry != team) { 33670b57cec5SDimitry Andric l = l->next; 33680b57cec5SDimitry Andric } 33690b57cec5SDimitry Andric if (l->next != NULL) { 33700b57cec5SDimitry Andric return; // Team has been added before, exit. 33710b57cec5SDimitry Andric } 33720b57cec5SDimitry Andric 33730b57cec5SDimitry Andric // Team is not found. Search list again for insertion point. 33740b57cec5SDimitry Andric l = list; 33750b57cec5SDimitry Andric while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) { 33760b57cec5SDimitry Andric l = l->next; 33770b57cec5SDimitry Andric } 33780b57cec5SDimitry Andric 33790b57cec5SDimitry Andric // Insert team. 33800b57cec5SDimitry Andric { 33810b57cec5SDimitry Andric kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( 33820b57cec5SDimitry Andric sizeof(kmp_team_list_item_t)); 33830b57cec5SDimitry Andric *item = *l; 33840b57cec5SDimitry Andric l->entry = team; 33850b57cec5SDimitry Andric l->next = item; 33860b57cec5SDimitry Andric } 33870b57cec5SDimitry Andric } 33880b57cec5SDimitry Andric 33890b57cec5SDimitry Andric static void __kmp_print_structure_team(char const *title, kmp_team_p const *team 33900b57cec5SDimitry Andric 33910b57cec5SDimitry Andric ) { 33920b57cec5SDimitry Andric __kmp_printf("%s", title); 33930b57cec5SDimitry Andric if (team != NULL) { 33940b57cec5SDimitry Andric __kmp_printf("%2x %p\n", team->t.t_id, team); 33950b57cec5SDimitry Andric } else { 33960b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 33970b57cec5SDimitry Andric } 33980b57cec5SDimitry Andric } 33990b57cec5SDimitry Andric 34000b57cec5SDimitry Andric static void __kmp_print_structure_thread(char const *title, 34010b57cec5SDimitry Andric kmp_info_p const *thread) { 34020b57cec5SDimitry Andric __kmp_printf("%s", title); 34030b57cec5SDimitry Andric if (thread != NULL) { 34040b57cec5SDimitry Andric __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread); 34050b57cec5SDimitry Andric } else { 34060b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 34070b57cec5SDimitry Andric } 34080b57cec5SDimitry Andric } 34090b57cec5SDimitry Andric 34100b57cec5SDimitry Andric void __kmp_print_structure(void) { 34110b57cec5SDimitry Andric 34120b57cec5SDimitry Andric kmp_team_list_t list; 34130b57cec5SDimitry Andric 34140b57cec5SDimitry Andric // Initialize list of teams. 34150b57cec5SDimitry Andric list = 34160b57cec5SDimitry Andric (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t)); 34170b57cec5SDimitry Andric list->entry = NULL; 34180b57cec5SDimitry Andric list->next = NULL; 34190b57cec5SDimitry Andric 34200b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nGlobal Thread " 34210b57cec5SDimitry Andric "Table\n------------------------------\n"); 34220b57cec5SDimitry Andric { 34230b57cec5SDimitry Andric int gtid; 34240b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34250b57cec5SDimitry Andric __kmp_printf("%2d", gtid); 34260b57cec5SDimitry Andric if (__kmp_threads != NULL) { 34270b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_threads[gtid]); 34280b57cec5SDimitry Andric } 34290b57cec5SDimitry Andric if (__kmp_root != NULL) { 34300b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_root[gtid]); 34310b57cec5SDimitry Andric } 34320b57cec5SDimitry Andric __kmp_printf("\n"); 34330b57cec5SDimitry Andric } 34340b57cec5SDimitry Andric } 34350b57cec5SDimitry Andric 34360b57cec5SDimitry Andric // Print out __kmp_threads array. 34370b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nThreads\n--------------------" 34380b57cec5SDimitry Andric "----------\n"); 34390b57cec5SDimitry Andric if (__kmp_threads != NULL) { 34400b57cec5SDimitry Andric int gtid; 34410b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34420b57cec5SDimitry Andric kmp_info_t const *thread = __kmp_threads[gtid]; 34430b57cec5SDimitry Andric if (thread != NULL) { 34440b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, thread); 34450b57cec5SDimitry Andric __kmp_printf(" Our Root: %p\n", thread->th.th_root); 34460b57cec5SDimitry Andric __kmp_print_structure_team(" Our Team: ", thread->th.th_team); 34470b57cec5SDimitry Andric __kmp_print_structure_team(" Serial Team: ", 34480b57cec5SDimitry Andric thread->th.th_serial_team); 34490b57cec5SDimitry Andric __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc); 3450fe6060f1SDimitry Andric __kmp_print_structure_thread(" Primary: ", 34510b57cec5SDimitry Andric thread->th.th_team_master); 34520b57cec5SDimitry Andric __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized); 34530b57cec5SDimitry Andric __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc); 34540b57cec5SDimitry Andric __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind); 34550b57cec5SDimitry Andric __kmp_print_structure_thread(" Next in pool: ", 34560b57cec5SDimitry Andric thread->th.th_next_pool); 34570b57cec5SDimitry Andric __kmp_printf("\n"); 34580b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_team); 34590b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_serial_team); 34600b57cec5SDimitry Andric } 34610b57cec5SDimitry Andric } 34620b57cec5SDimitry Andric } else { 34630b57cec5SDimitry Andric __kmp_printf("Threads array is not allocated.\n"); 34640b57cec5SDimitry Andric } 34650b57cec5SDimitry Andric 34660b57cec5SDimitry Andric // Print out __kmp_root array. 34670b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nUbers\n----------------------" 34680b57cec5SDimitry Andric "--------\n"); 34690b57cec5SDimitry Andric if (__kmp_root != NULL) { 34700b57cec5SDimitry Andric int gtid; 34710b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34720b57cec5SDimitry Andric kmp_root_t const *root = __kmp_root[gtid]; 34730b57cec5SDimitry Andric if (root != NULL) { 34740b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, root); 34750b57cec5SDimitry Andric __kmp_print_structure_team(" Root Team: ", root->r.r_root_team); 34760b57cec5SDimitry Andric __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team); 34770b57cec5SDimitry Andric __kmp_print_structure_thread(" Uber Thread: ", 34780b57cec5SDimitry Andric root->r.r_uber_thread); 34790b57cec5SDimitry Andric __kmp_printf(" Active?: %2d\n", root->r.r_active); 34800b57cec5SDimitry Andric __kmp_printf(" In Parallel: %2d\n", 34810b57cec5SDimitry Andric KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)); 34820b57cec5SDimitry Andric __kmp_printf("\n"); 34830b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_root_team); 34840b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_hot_team); 34850b57cec5SDimitry Andric } 34860b57cec5SDimitry Andric } 34870b57cec5SDimitry Andric } else { 34880b57cec5SDimitry Andric __kmp_printf("Ubers array is not allocated.\n"); 34890b57cec5SDimitry Andric } 34900b57cec5SDimitry Andric 34910b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nTeams\n----------------------" 34920b57cec5SDimitry Andric "--------\n"); 34930b57cec5SDimitry Andric while (list->next != NULL) { 34940b57cec5SDimitry Andric kmp_team_p const *team = list->entry; 34950b57cec5SDimitry Andric int i; 34960b57cec5SDimitry Andric __kmp_printf("Team %2x %p:\n", team->t.t_id, team); 34970b57cec5SDimitry Andric __kmp_print_structure_team(" Parent Team: ", team->t.t_parent); 3498fe6060f1SDimitry Andric __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid); 34990b57cec5SDimitry Andric __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc); 35000b57cec5SDimitry Andric __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized); 35010b57cec5SDimitry Andric __kmp_printf(" Number threads: %2d\n", team->t.t_nproc); 35020b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; ++i) { 35030b57cec5SDimitry Andric __kmp_printf(" Thread %2d: ", i); 35040b57cec5SDimitry Andric __kmp_print_structure_thread("", team->t.t_threads[i]); 35050b57cec5SDimitry Andric } 35060b57cec5SDimitry Andric __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool); 35070b57cec5SDimitry Andric __kmp_printf("\n"); 35080b57cec5SDimitry Andric list = list->next; 35090b57cec5SDimitry Andric } 35100b57cec5SDimitry Andric 35110b57cec5SDimitry Andric // Print out __kmp_thread_pool and __kmp_team_pool. 35120b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nPools\n----------------------" 35130b57cec5SDimitry Andric "--------\n"); 35140b57cec5SDimitry Andric __kmp_print_structure_thread("Thread pool: ", 35150b57cec5SDimitry Andric CCAST(kmp_info_t *, __kmp_thread_pool)); 35160b57cec5SDimitry Andric __kmp_print_structure_team("Team pool: ", 35170b57cec5SDimitry Andric CCAST(kmp_team_t *, __kmp_team_pool)); 35180b57cec5SDimitry Andric __kmp_printf("\n"); 35190b57cec5SDimitry Andric 35200b57cec5SDimitry Andric // Free team list. 35210b57cec5SDimitry Andric while (list != NULL) { 35220b57cec5SDimitry Andric kmp_team_list_item_t *item = list; 35230b57cec5SDimitry Andric list = list->next; 35240b57cec5SDimitry Andric KMP_INTERNAL_FREE(item); 35250b57cec5SDimitry Andric } 35260b57cec5SDimitry Andric } 35270b57cec5SDimitry Andric 35280b57cec5SDimitry Andric #endif 35290b57cec5SDimitry Andric 35300b57cec5SDimitry Andric //--------------------------------------------------------------------------- 35310b57cec5SDimitry Andric // Stuff for per-thread fast random number generator 35320b57cec5SDimitry Andric // Table of primes 35330b57cec5SDimitry Andric static const unsigned __kmp_primes[] = { 35340b57cec5SDimitry Andric 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877, 35350b57cec5SDimitry Andric 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 35360b57cec5SDimitry Andric 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201, 35370b57cec5SDimitry Andric 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 35380b57cec5SDimitry Andric 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7, 35390b57cec5SDimitry Andric 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 35400b57cec5SDimitry Andric 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45, 35410b57cec5SDimitry Andric 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 35420b57cec5SDimitry Andric 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363, 35430b57cec5SDimitry Andric 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 35440b57cec5SDimitry Andric 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f}; 35450b57cec5SDimitry Andric 35460b57cec5SDimitry Andric //--------------------------------------------------------------------------- 35470b57cec5SDimitry Andric // __kmp_get_random: Get a random number using a linear congruential method. 35480b57cec5SDimitry Andric unsigned short __kmp_get_random(kmp_info_t *thread) { 35490b57cec5SDimitry Andric unsigned x = thread->th.th_x; 3550e8d8bef9SDimitry Andric unsigned short r = (unsigned short)(x >> 16); 35510b57cec5SDimitry Andric 35520b57cec5SDimitry Andric thread->th.th_x = x * thread->th.th_a + 1; 35530b57cec5SDimitry Andric 35540b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 35550b57cec5SDimitry Andric thread->th.th_info.ds.ds_tid, r)); 35560b57cec5SDimitry Andric 35570b57cec5SDimitry Andric return r; 35580b57cec5SDimitry Andric } 35590b57cec5SDimitry Andric //-------------------------------------------------------- 35600b57cec5SDimitry Andric // __kmp_init_random: Initialize a random number generator 35610b57cec5SDimitry Andric void __kmp_init_random(kmp_info_t *thread) { 35620b57cec5SDimitry Andric unsigned seed = thread->th.th_info.ds.ds_tid; 35630b57cec5SDimitry Andric 35640b57cec5SDimitry Andric thread->th.th_a = 35650b57cec5SDimitry Andric __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))]; 35660b57cec5SDimitry Andric thread->th.th_x = (seed + 1) * thread->th.th_a + 1; 35670b57cec5SDimitry Andric KA_TRACE(30, 35680b57cec5SDimitry Andric ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a)); 35690b57cec5SDimitry Andric } 35700b57cec5SDimitry Andric 35710b57cec5SDimitry Andric #if KMP_OS_WINDOWS 35720b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead, returns number 35730b57cec5SDimitry Andric * reclaimed */ 35740b57cec5SDimitry Andric static int __kmp_reclaim_dead_roots(void) { 35750b57cec5SDimitry Andric int i, r = 0; 35760b57cec5SDimitry Andric 35770b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 35780b57cec5SDimitry Andric if (KMP_UBER_GTID(i) && 35790b57cec5SDimitry Andric !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 35800b57cec5SDimitry Andric !__kmp_root[i] 35810b57cec5SDimitry Andric ->r.r_active) { // AC: reclaim only roots died in non-active state 35820b57cec5SDimitry Andric r += __kmp_unregister_root_other_thread(i); 35830b57cec5SDimitry Andric } 35840b57cec5SDimitry Andric } 35850b57cec5SDimitry Andric return r; 35860b57cec5SDimitry Andric } 35870b57cec5SDimitry Andric #endif 35880b57cec5SDimitry Andric 35890b57cec5SDimitry Andric /* This function attempts to create free entries in __kmp_threads and 35900b57cec5SDimitry Andric __kmp_root, and returns the number of free entries generated. 35910b57cec5SDimitry Andric 35920b57cec5SDimitry Andric For Windows* OS static library, the first mechanism used is to reclaim array 35930b57cec5SDimitry Andric entries for root threads that are already dead. 35940b57cec5SDimitry Andric 35950b57cec5SDimitry Andric On all platforms, expansion is attempted on the arrays __kmp_threads_ and 35960b57cec5SDimitry Andric __kmp_root, with appropriate update to __kmp_threads_capacity. Array 35970b57cec5SDimitry Andric capacity is increased by doubling with clipping to __kmp_tp_capacity, if 35980b57cec5SDimitry Andric threadprivate cache array has been created. Synchronization with 35990b57cec5SDimitry Andric __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 36000b57cec5SDimitry Andric 36010b57cec5SDimitry Andric After any dead root reclamation, if the clipping value allows array expansion 36020b57cec5SDimitry Andric to result in the generation of a total of nNeed free slots, the function does 36030b57cec5SDimitry Andric that expansion. If not, nothing is done beyond the possible initial root 36040b57cec5SDimitry Andric thread reclamation. 36050b57cec5SDimitry Andric 36060b57cec5SDimitry Andric If any argument is negative, the behavior is undefined. */ 36070b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed) { 36080b57cec5SDimitry Andric int added = 0; 36090b57cec5SDimitry Andric int minimumRequiredCapacity; 36100b57cec5SDimitry Andric int newCapacity; 36110b57cec5SDimitry Andric kmp_info_t **newThreads; 36120b57cec5SDimitry Andric kmp_root_t **newRoot; 36130b57cec5SDimitry Andric 36140b57cec5SDimitry Andric // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so 36150b57cec5SDimitry Andric // resizing __kmp_threads does not need additional protection if foreign 36160b57cec5SDimitry Andric // threads are present 36170b57cec5SDimitry Andric 36180b57cec5SDimitry Andric #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB 36190b57cec5SDimitry Andric /* only for Windows static library */ 36200b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead */ 36210b57cec5SDimitry Andric added = __kmp_reclaim_dead_roots(); 36220b57cec5SDimitry Andric 36230b57cec5SDimitry Andric if (nNeed) { 36240b57cec5SDimitry Andric nNeed -= added; 36250b57cec5SDimitry Andric if (nNeed < 0) 36260b57cec5SDimitry Andric nNeed = 0; 36270b57cec5SDimitry Andric } 36280b57cec5SDimitry Andric #endif 36290b57cec5SDimitry Andric if (nNeed <= 0) 36300b57cec5SDimitry Andric return added; 36310b57cec5SDimitry Andric 36320b57cec5SDimitry Andric // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If 36330b57cec5SDimitry Andric // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the 36340b57cec5SDimitry Andric // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become 36350b57cec5SDimitry Andric // > __kmp_max_nth in one of two ways: 36360b57cec5SDimitry Andric // 36370b57cec5SDimitry Andric // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] 36385ffd83dbSDimitry Andric // may not be reused by another thread, so we may need to increase 36390b57cec5SDimitry Andric // __kmp_threads_capacity to __kmp_max_nth + 1. 36400b57cec5SDimitry Andric // 36410b57cec5SDimitry Andric // 2) New foreign root(s) are encountered. We always register new foreign 36420b57cec5SDimitry Andric // roots. This may cause a smaller # of threads to be allocated at 36430b57cec5SDimitry Andric // subsequent parallel regions, but the worker threads hang around (and 36440b57cec5SDimitry Andric // eventually go to sleep) and need slots in the __kmp_threads[] array. 36450b57cec5SDimitry Andric // 36460b57cec5SDimitry Andric // Anyway, that is the reason for moving the check to see if 36470b57cec5SDimitry Andric // __kmp_max_nth was exceeded into __kmp_reserve_threads() 36480b57cec5SDimitry Andric // instead of having it performed here. -BB 36490b57cec5SDimitry Andric 36500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity); 36510b57cec5SDimitry Andric 36520b57cec5SDimitry Andric /* compute expansion headroom to check if we can expand */ 36530b57cec5SDimitry Andric if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) { 36540b57cec5SDimitry Andric /* possible expansion too small -- give up */ 36550b57cec5SDimitry Andric return added; 36560b57cec5SDimitry Andric } 36570b57cec5SDimitry Andric minimumRequiredCapacity = __kmp_threads_capacity + nNeed; 36580b57cec5SDimitry Andric 36590b57cec5SDimitry Andric newCapacity = __kmp_threads_capacity; 36600b57cec5SDimitry Andric do { 36610b57cec5SDimitry Andric newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1) 36620b57cec5SDimitry Andric : __kmp_sys_max_nth; 36630b57cec5SDimitry Andric } while (newCapacity < minimumRequiredCapacity); 36640b57cec5SDimitry Andric newThreads = (kmp_info_t **)__kmp_allocate( 36650b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE); 36660b57cec5SDimitry Andric newRoot = 36670b57cec5SDimitry Andric (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity); 36680b57cec5SDimitry Andric KMP_MEMCPY(newThreads, __kmp_threads, 36690b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_info_t *)); 36700b57cec5SDimitry Andric KMP_MEMCPY(newRoot, __kmp_root, 36710b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_root_t *)); 36720b57cec5SDimitry Andric 36730b57cec5SDimitry Andric kmp_info_t **temp_threads = __kmp_threads; 36740b57cec5SDimitry Andric *(kmp_info_t * *volatile *)&__kmp_threads = newThreads; 36750b57cec5SDimitry Andric *(kmp_root_t * *volatile *)&__kmp_root = newRoot; 36760b57cec5SDimitry Andric __kmp_free(temp_threads); 36770b57cec5SDimitry Andric added += newCapacity - __kmp_threads_capacity; 36780b57cec5SDimitry Andric *(volatile int *)&__kmp_threads_capacity = newCapacity; 36790b57cec5SDimitry Andric 36800b57cec5SDimitry Andric if (newCapacity > __kmp_tp_capacity) { 36810b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 36820b57cec5SDimitry Andric if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 36830b57cec5SDimitry Andric __kmp_threadprivate_resize_cache(newCapacity); 36840b57cec5SDimitry Andric } else { // increase __kmp_tp_capacity to correspond with kmp_threads size 36850b57cec5SDimitry Andric *(volatile int *)&__kmp_tp_capacity = newCapacity; 36860b57cec5SDimitry Andric } 36870b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 36880b57cec5SDimitry Andric } 36890b57cec5SDimitry Andric 36900b57cec5SDimitry Andric return added; 36910b57cec5SDimitry Andric } 36920b57cec5SDimitry Andric 36930b57cec5SDimitry Andric /* Register the current thread as a root thread and obtain our gtid. We must 36940b57cec5SDimitry Andric have the __kmp_initz_lock held at this point. Argument TRUE only if are the 36950b57cec5SDimitry Andric thread that calls from __kmp_do_serial_initialize() */ 36960b57cec5SDimitry Andric int __kmp_register_root(int initial_thread) { 36970b57cec5SDimitry Andric kmp_info_t *root_thread; 36980b57cec5SDimitry Andric kmp_root_t *root; 36990b57cec5SDimitry Andric int gtid; 37000b57cec5SDimitry Andric int capacity; 37010b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 37020b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: entered\n")); 37030b57cec5SDimitry Andric KMP_MB(); 37040b57cec5SDimitry Andric 37050b57cec5SDimitry Andric /* 2007-03-02: 37060b57cec5SDimitry Andric If initial thread did not invoke OpenMP RTL yet, and this thread is not an 37070b57cec5SDimitry Andric initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not 37080b57cec5SDimitry Andric work as expected -- it may return false (that means there is at least one 37090b57cec5SDimitry Andric empty slot in __kmp_threads array), but it is possible the only free slot 37100b57cec5SDimitry Andric is #0, which is reserved for initial thread and so cannot be used for this 37110b57cec5SDimitry Andric one. Following code workarounds this bug. 37120b57cec5SDimitry Andric 37130b57cec5SDimitry Andric However, right solution seems to be not reserving slot #0 for initial 37140b57cec5SDimitry Andric thread because: 37150b57cec5SDimitry Andric (1) there is no magic in slot #0, 37160b57cec5SDimitry Andric (2) we cannot detect initial thread reliably (the first thread which does 37170b57cec5SDimitry Andric serial initialization may be not a real initial thread). 37180b57cec5SDimitry Andric */ 37190b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 37200b57cec5SDimitry Andric if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 37210b57cec5SDimitry Andric --capacity; 37220b57cec5SDimitry Andric } 37230b57cec5SDimitry Andric 3724d409305fSDimitry Andric // If it is not for initializing the hidden helper team, we need to take 3725d409305fSDimitry Andric // __kmp_hidden_helper_threads_num out of the capacity because it is included 3726d409305fSDimitry Andric // in __kmp_threads_capacity. 3727d409305fSDimitry Andric if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { 3728d409305fSDimitry Andric capacity -= __kmp_hidden_helper_threads_num; 3729d409305fSDimitry Andric } 3730d409305fSDimitry Andric 37310b57cec5SDimitry Andric /* see if there are too many threads */ 37320b57cec5SDimitry Andric if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { 37330b57cec5SDimitry Andric if (__kmp_tp_cached) { 37340b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), 37350b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 37360b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 37370b57cec5SDimitry Andric } else { 37380b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads), 37390b57cec5SDimitry Andric __kmp_msg_null); 37400b57cec5SDimitry Andric } 37410b57cec5SDimitry Andric } 37420b57cec5SDimitry Andric 3743e8d8bef9SDimitry Andric // When hidden helper task is enabled, __kmp_threads is organized as follows: 3744e8d8bef9SDimitry Andric // 0: initial thread, also a regular OpenMP thread. 3745e8d8bef9SDimitry Andric // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads. 3746e8d8bef9SDimitry Andric // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for 3747e8d8bef9SDimitry Andric // regular OpenMP threads. 3748e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper_threads)) { 3749e8d8bef9SDimitry Andric // Find an available thread slot for hidden helper thread. Slots for hidden 3750e8d8bef9SDimitry Andric // helper threads start from 1 to __kmp_hidden_helper_threads_num. 3751e8d8bef9SDimitry Andric for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL && 3752e8d8bef9SDimitry Andric gtid <= __kmp_hidden_helper_threads_num; 37530b57cec5SDimitry Andric gtid++) 37540b57cec5SDimitry Andric ; 3755e8d8bef9SDimitry Andric KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num); 3756e8d8bef9SDimitry Andric KA_TRACE(1, ("__kmp_register_root: found slot in threads array for " 3757e8d8bef9SDimitry Andric "hidden helper thread: T#%d\n", 3758e8d8bef9SDimitry Andric gtid)); 3759e8d8bef9SDimitry Andric } else { 3760e8d8bef9SDimitry Andric /* find an available thread slot */ 3761e8d8bef9SDimitry Andric // Don't reassign the zero slot since we need that to only be used by 3762e8d8bef9SDimitry Andric // initial thread. Slots for hidden helper threads should also be skipped. 3763d409305fSDimitry Andric if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 3764e8d8bef9SDimitry Andric gtid = 0; 3765e8d8bef9SDimitry Andric } else { 3766e8d8bef9SDimitry Andric for (gtid = __kmp_hidden_helper_threads_num + 1; 3767e8d8bef9SDimitry Andric TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++) 3768e8d8bef9SDimitry Andric ; 3769e8d8bef9SDimitry Andric } 3770e8d8bef9SDimitry Andric KA_TRACE( 3771e8d8bef9SDimitry Andric 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); 37720b57cec5SDimitry Andric KMP_ASSERT(gtid < __kmp_threads_capacity); 3773e8d8bef9SDimitry Andric } 37740b57cec5SDimitry Andric 37750b57cec5SDimitry Andric /* update global accounting */ 37760b57cec5SDimitry Andric __kmp_all_nth++; 37770b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 37780b57cec5SDimitry Andric 37790b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 37800b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 37810b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 37820b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 37830b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 37840b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 37850b57cec5SDimitry Andric } 37860b57cec5SDimitry Andric } else { 37870b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 37880b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 37890b57cec5SDimitry Andric } 37900b57cec5SDimitry Andric } 37910b57cec5SDimitry Andric } 37920b57cec5SDimitry Andric 37930b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 37940b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary */ 37950b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 37960b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 37970b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 37980b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 37990b57cec5SDimitry Andric } 38000b57cec5SDimitry Andric } 38010b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 38020b57cec5SDimitry Andric 38030b57cec5SDimitry Andric /* setup this new hierarchy */ 38040b57cec5SDimitry Andric if (!(root = __kmp_root[gtid])) { 38050b57cec5SDimitry Andric root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t)); 38060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_root_team); 38070b57cec5SDimitry Andric } 38080b57cec5SDimitry Andric 38090b57cec5SDimitry Andric #if KMP_STATS_ENABLED 38100b57cec5SDimitry Andric // Initialize stats as soon as possible (right after gtid assignment). 38110b57cec5SDimitry Andric __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); 38120b57cec5SDimitry Andric __kmp_stats_thread_ptr->startLife(); 38130b57cec5SDimitry Andric KMP_SET_THREAD_STATE(SERIAL_REGION); 38140b57cec5SDimitry Andric KMP_INIT_PARTITIONED_TIMERS(OMP_serial); 38150b57cec5SDimitry Andric #endif 38160b57cec5SDimitry Andric __kmp_initialize_root(root); 38170b57cec5SDimitry Andric 38180b57cec5SDimitry Andric /* setup new root thread structure */ 38190b57cec5SDimitry Andric if (root->r.r_uber_thread) { 38200b57cec5SDimitry Andric root_thread = root->r.r_uber_thread; 38210b57cec5SDimitry Andric } else { 38220b57cec5SDimitry Andric root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 38230b57cec5SDimitry Andric if (__kmp_storage_map) { 38240b57cec5SDimitry Andric __kmp_print_thread_storage_map(root_thread, gtid); 38250b57cec5SDimitry Andric } 38260b57cec5SDimitry Andric root_thread->th.th_info.ds.ds_gtid = gtid; 38270b57cec5SDimitry Andric #if OMPT_SUPPORT 38280b57cec5SDimitry Andric root_thread->th.ompt_thread_info.thread_data = ompt_data_none; 38290b57cec5SDimitry Andric #endif 38300b57cec5SDimitry Andric root_thread->th.th_root = root; 38310b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 38320b57cec5SDimitry Andric root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); 38330b57cec5SDimitry Andric } 38340b57cec5SDimitry Andric #if USE_FAST_MEMORY 38350b57cec5SDimitry Andric __kmp_initialize_fast_memory(root_thread); 38360b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 38370b57cec5SDimitry Andric 38380b57cec5SDimitry Andric #if KMP_USE_BGET 38390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL); 38400b57cec5SDimitry Andric __kmp_initialize_bget(root_thread); 38410b57cec5SDimitry Andric #endif 38420b57cec5SDimitry Andric __kmp_init_random(root_thread); // Initialize random number generator 38430b57cec5SDimitry Andric } 38440b57cec5SDimitry Andric 38450b57cec5SDimitry Andric /* setup the serial team held in reserve by the root thread */ 38460b57cec5SDimitry Andric if (!root_thread->th.th_serial_team) { 38470b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 38480b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); 38490b57cec5SDimitry Andric root_thread->th.th_serial_team = __kmp_allocate_team( 38500b57cec5SDimitry Andric root, 1, 1, 38510b57cec5SDimitry Andric #if OMPT_SUPPORT 38520b57cec5SDimitry Andric ompt_data_none, // root parallel id 38530b57cec5SDimitry Andric #endif 38540b57cec5SDimitry Andric proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); 38550b57cec5SDimitry Andric } 38560b57cec5SDimitry Andric KMP_ASSERT(root_thread->th.th_serial_team); 38570b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", 38580b57cec5SDimitry Andric root_thread->th.th_serial_team)); 38590b57cec5SDimitry Andric 38600b57cec5SDimitry Andric /* drop root_thread into place */ 38610b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 38620b57cec5SDimitry Andric 38630b57cec5SDimitry Andric root->r.r_root_team->t.t_threads[0] = root_thread; 38640b57cec5SDimitry Andric root->r.r_hot_team->t.t_threads[0] = root_thread; 38650b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 38660b57cec5SDimitry Andric // AC: the team created in reserve, not for execution (it is unused for now). 38670b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_serialized = 0; 38680b57cec5SDimitry Andric root->r.r_uber_thread = root_thread; 38690b57cec5SDimitry Andric 38700b57cec5SDimitry Andric /* initialize the thread, get it ready to go */ 38710b57cec5SDimitry Andric __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid); 38720b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, TRUE); 38730b57cec5SDimitry Andric 3874fe6060f1SDimitry Andric /* prepare the primary thread for get_gtid() */ 38750b57cec5SDimitry Andric __kmp_gtid_set_specific(gtid); 38760b57cec5SDimitry Andric 38770b57cec5SDimitry Andric #if USE_ITT_BUILD 38780b57cec5SDimitry Andric __kmp_itt_thread_name(gtid); 38790b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 38800b57cec5SDimitry Andric 38810b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 38820b57cec5SDimitry Andric __kmp_gtid = gtid; 38830b57cec5SDimitry Andric #endif 38840b57cec5SDimitry Andric __kmp_create_worker(gtid, root_thread, __kmp_stksize); 38850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid); 38860b57cec5SDimitry Andric 38870b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 38880b57cec5SDimitry Andric "plain=%u\n", 38890b57cec5SDimitry Andric gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team), 38900b57cec5SDimitry Andric root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 38910b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE)); 38920b57cec5SDimitry Andric { // Initialize barrier data. 38930b57cec5SDimitry Andric int b; 38940b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 38950b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE; 38960b57cec5SDimitry Andric #if USE_DEBUGGER 38970b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_worker_arrived = 0; 38980b57cec5SDimitry Andric #endif 38990b57cec5SDimitry Andric } 39000b57cec5SDimitry Andric } 39010b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 39020b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE); 39030b57cec5SDimitry Andric 39040b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 39050b57cec5SDimitry Andric root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; 39060b57cec5SDimitry Andric root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; 39070b57cec5SDimitry Andric root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; 39080b57cec5SDimitry Andric root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; 39090b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 39100b57cec5SDimitry Andric root_thread->th.th_def_allocator = __kmp_def_allocator; 39110b57cec5SDimitry Andric root_thread->th.th_prev_level = 0; 39120b57cec5SDimitry Andric root_thread->th.th_prev_num_threads = 1; 39130b57cec5SDimitry Andric 39140b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 39150b57cec5SDimitry Andric tmp->cg_root = root_thread; 39160b57cec5SDimitry Andric tmp->cg_thread_limit = __kmp_cg_max_nth; 39170b57cec5SDimitry Andric tmp->cg_nthreads = 1; 39180b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with" 39190b57cec5SDimitry Andric " cg_nthreads init to 1\n", 39200b57cec5SDimitry Andric root_thread, tmp)); 39210b57cec5SDimitry Andric tmp->up = NULL; 39220b57cec5SDimitry Andric root_thread->th.th_cg_roots = tmp; 39230b57cec5SDimitry Andric 39240b57cec5SDimitry Andric __kmp_root_counter++; 39250b57cec5SDimitry Andric 39260b57cec5SDimitry Andric #if OMPT_SUPPORT 39270b57cec5SDimitry Andric if (!initial_thread && ompt_enabled.enabled) { 39280b57cec5SDimitry Andric 39290b57cec5SDimitry Andric kmp_info_t *root_thread = ompt_get_thread(); 39300b57cec5SDimitry Andric 39310b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_overhead); 39320b57cec5SDimitry Andric 39330b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 39340b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 39350b57cec5SDimitry Andric ompt_thread_initial, __ompt_get_thread_data_internal()); 39360b57cec5SDimitry Andric } 39370b57cec5SDimitry Andric ompt_data_t *task_data; 39380b57cec5SDimitry Andric ompt_data_t *parallel_data; 3939fe6060f1SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, 3940fe6060f1SDimitry Andric NULL); 39410b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 39420b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 39430b57cec5SDimitry Andric ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial); 39440b57cec5SDimitry Andric } 39450b57cec5SDimitry Andric 39460b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_work_serial); 39470b57cec5SDimitry Andric } 39480b57cec5SDimitry Andric #endif 3949fe6060f1SDimitry Andric #if OMPD_SUPPORT 3950fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 3951fe6060f1SDimitry Andric ompd_bp_thread_begin(); 3952fe6060f1SDimitry Andric #endif 39530b57cec5SDimitry Andric 39540b57cec5SDimitry Andric KMP_MB(); 39550b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 39560b57cec5SDimitry Andric 39570b57cec5SDimitry Andric return gtid; 39580b57cec5SDimitry Andric } 39590b57cec5SDimitry Andric 39600b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 39610b57cec5SDimitry Andric static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, 39620b57cec5SDimitry Andric const int max_level) { 39630b57cec5SDimitry Andric int i, n, nth; 39640b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 39650b57cec5SDimitry Andric if (!hot_teams || !hot_teams[level].hot_team) { 39660b57cec5SDimitry Andric return 0; 39670b57cec5SDimitry Andric } 39680b57cec5SDimitry Andric KMP_DEBUG_ASSERT(level < max_level); 39690b57cec5SDimitry Andric kmp_team_t *team = hot_teams[level].hot_team; 39700b57cec5SDimitry Andric nth = hot_teams[level].hot_team_nth; 3971fe6060f1SDimitry Andric n = nth - 1; // primary thread is not freed 39720b57cec5SDimitry Andric if (level < max_level - 1) { 39730b57cec5SDimitry Andric for (i = 0; i < nth; ++i) { 39740b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[i]; 39750b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, level + 1, max_level); 39760b57cec5SDimitry Andric if (i > 0 && th->th.th_hot_teams) { 39770b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 39780b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 39790b57cec5SDimitry Andric } 39800b57cec5SDimitry Andric } 39810b57cec5SDimitry Andric } 39820b57cec5SDimitry Andric __kmp_free_team(root, team, NULL); 39830b57cec5SDimitry Andric return n; 39840b57cec5SDimitry Andric } 39850b57cec5SDimitry Andric #endif 39860b57cec5SDimitry Andric 39870b57cec5SDimitry Andric // Resets a root thread and clear its root and hot teams. 39880b57cec5SDimitry Andric // Returns the number of __kmp_threads entries directly and indirectly freed. 39890b57cec5SDimitry Andric static int __kmp_reset_root(int gtid, kmp_root_t *root) { 39900b57cec5SDimitry Andric kmp_team_t *root_team = root->r.r_root_team; 39910b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 39920b57cec5SDimitry Andric int n = hot_team->t.t_nproc; 39930b57cec5SDimitry Andric int i; 39940b57cec5SDimitry Andric 39950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_active); 39960b57cec5SDimitry Andric 39970b57cec5SDimitry Andric root->r.r_root_team = NULL; 39980b57cec5SDimitry Andric root->r.r_hot_team = NULL; 39990b57cec5SDimitry Andric // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team 40000b57cec5SDimitry Andric // before call to __kmp_free_team(). 40010b57cec5SDimitry Andric __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); 40020b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 40030b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 40040b57cec5SDimitry Andric 0) { // need to free nested hot teams and their threads if any 40050b57cec5SDimitry Andric for (i = 0; i < hot_team->t.t_nproc; ++i) { 40060b57cec5SDimitry Andric kmp_info_t *th = hot_team->t.t_threads[i]; 40070b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 1) { 40080b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level); 40090b57cec5SDimitry Andric } 40100b57cec5SDimitry Andric if (th->th.th_hot_teams) { 40110b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 40120b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 40130b57cec5SDimitry Andric } 40140b57cec5SDimitry Andric } 40150b57cec5SDimitry Andric } 40160b57cec5SDimitry Andric #endif 40170b57cec5SDimitry Andric __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); 40180b57cec5SDimitry Andric 40190b57cec5SDimitry Andric // Before we can reap the thread, we need to make certain that all other 40200b57cec5SDimitry Andric // threads in the teams that had this root as ancestor have stopped trying to 40210b57cec5SDimitry Andric // steal tasks. 40220b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 40230b57cec5SDimitry Andric __kmp_wait_to_unref_task_teams(); 40240b57cec5SDimitry Andric } 40250b57cec5SDimitry Andric 40260b57cec5SDimitry Andric #if KMP_OS_WINDOWS 40270b57cec5SDimitry Andric /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 40280b57cec5SDimitry Andric KA_TRACE( 40290b57cec5SDimitry Andric 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC 40300b57cec5SDimitry Andric "\n", 40310b57cec5SDimitry Andric (LPVOID) & (root->r.r_uber_thread->th), 40320b57cec5SDimitry Andric root->r.r_uber_thread->th.th_info.ds.ds_thread)); 40330b57cec5SDimitry Andric __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); 40340b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 40350b57cec5SDimitry Andric 4036fe6060f1SDimitry Andric #if OMPD_SUPPORT 4037fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 4038fe6060f1SDimitry Andric ompd_bp_thread_end(); 4039fe6060f1SDimitry Andric #endif 4040fe6060f1SDimitry Andric 40410b57cec5SDimitry Andric #if OMPT_SUPPORT 40420b57cec5SDimitry Andric ompt_data_t *task_data; 40430b57cec5SDimitry Andric ompt_data_t *parallel_data; 4044fe6060f1SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, 4045fe6060f1SDimitry Andric NULL); 40460b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 40470b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 40480b57cec5SDimitry Andric ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial); 40490b57cec5SDimitry Andric } 40500b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 40510b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)( 40520b57cec5SDimitry Andric &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); 40530b57cec5SDimitry Andric } 40540b57cec5SDimitry Andric #endif 40550b57cec5SDimitry Andric 40560b57cec5SDimitry Andric TCW_4(__kmp_nth, 40570b57cec5SDimitry Andric __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 40580b57cec5SDimitry Andric i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--; 40590b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p" 40600b57cec5SDimitry Andric " to %d\n", 40610b57cec5SDimitry Andric root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots, 40620b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_nthreads)); 40630b57cec5SDimitry Andric if (i == 1) { 40640b57cec5SDimitry Andric // need to free contention group structure 40650b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread == 40660b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_root); 40670b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL); 40680b57cec5SDimitry Andric __kmp_free(root->r.r_uber_thread->th.th_cg_roots); 40690b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots = NULL; 40700b57cec5SDimitry Andric } 40710b57cec5SDimitry Andric __kmp_reap_thread(root->r.r_uber_thread, 1); 40720b57cec5SDimitry Andric 4073480093f4SDimitry Andric // We canot put root thread to __kmp_thread_pool, so we have to reap it 4074480093f4SDimitry Andric // instead of freeing. 40750b57cec5SDimitry Andric root->r.r_uber_thread = NULL; 40760b57cec5SDimitry Andric /* mark root as no longer in use */ 40770b57cec5SDimitry Andric root->r.r_begin = FALSE; 40780b57cec5SDimitry Andric 40790b57cec5SDimitry Andric return n; 40800b57cec5SDimitry Andric } 40810b57cec5SDimitry Andric 40820b57cec5SDimitry Andric void __kmp_unregister_root_current_thread(int gtid) { 40830b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid)); 40840b57cec5SDimitry Andric /* this lock should be ok, since unregister_root_current_thread is never 40850b57cec5SDimitry Andric called during an abort, only during a normal close. furthermore, if you 40860b57cec5SDimitry Andric have the forkjoin lock, you should never try to get the initz lock */ 40870b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 40880b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 40890b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, " 40900b57cec5SDimitry Andric "exiting T#%d\n", 40910b57cec5SDimitry Andric gtid)); 40920b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 40930b57cec5SDimitry Andric return; 40940b57cec5SDimitry Andric } 40950b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 40960b57cec5SDimitry Andric 40970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 40980b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 40990b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 41000b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 41010b57cec5SDimitry Andric 41020b57cec5SDimitry Andric KMP_MB(); 41030b57cec5SDimitry Andric 41040b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 41050b57cec5SDimitry Andric kmp_team_t *team = thread->th.th_team; 41060b57cec5SDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 41070b57cec5SDimitry Andric 41080b57cec5SDimitry Andric // we need to wait for the proxy tasks before finishing the thread 41090b57cec5SDimitry Andric if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { 41100b57cec5SDimitry Andric #if OMPT_SUPPORT 41110b57cec5SDimitry Andric // the runtime is shutting down so we won't report any events 41120b57cec5SDimitry Andric thread->th.ompt_thread_info.state = ompt_state_undefined; 41130b57cec5SDimitry Andric #endif 41140b57cec5SDimitry Andric __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); 41150b57cec5SDimitry Andric } 41160b57cec5SDimitry Andric 41170b57cec5SDimitry Andric __kmp_reset_root(gtid, root); 41180b57cec5SDimitry Andric 41190b57cec5SDimitry Andric KMP_MB(); 41200b57cec5SDimitry Andric KC_TRACE(10, 41210b57cec5SDimitry Andric ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid)); 41220b57cec5SDimitry Andric 41230b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 41240b57cec5SDimitry Andric } 41250b57cec5SDimitry Andric 41260b57cec5SDimitry Andric #if KMP_OS_WINDOWS 41270b57cec5SDimitry Andric /* __kmp_forkjoin_lock must be already held 41280b57cec5SDimitry Andric Unregisters a root thread that is not the current thread. Returns the number 41290b57cec5SDimitry Andric of __kmp_threads entries freed as a result. */ 41300b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid) { 41310b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 41320b57cec5SDimitry Andric int r; 41330b57cec5SDimitry Andric 41340b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid)); 41350b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 41360b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 41370b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 41380b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 41390b57cec5SDimitry Andric 41400b57cec5SDimitry Andric r = __kmp_reset_root(gtid, root); 41410b57cec5SDimitry Andric KC_TRACE(10, 41420b57cec5SDimitry Andric ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid)); 41430b57cec5SDimitry Andric return r; 41440b57cec5SDimitry Andric } 41450b57cec5SDimitry Andric #endif 41460b57cec5SDimitry Andric 41470b57cec5SDimitry Andric #if KMP_DEBUG 41480b57cec5SDimitry Andric void __kmp_task_info() { 41490b57cec5SDimitry Andric 41500b57cec5SDimitry Andric kmp_int32 gtid = __kmp_entry_gtid(); 41510b57cec5SDimitry Andric kmp_int32 tid = __kmp_tid_from_gtid(gtid); 41520b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 41530b57cec5SDimitry Andric kmp_team_t *steam = this_thr->th.th_serial_team; 41540b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 41550b57cec5SDimitry Andric 41560b57cec5SDimitry Andric __kmp_printf( 41570b57cec5SDimitry Andric "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " 41580b57cec5SDimitry Andric "ptask=%p\n", 41590b57cec5SDimitry Andric gtid, tid, this_thr, team, steam, this_thr->th.th_current_task, 41600b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[tid].td_parent); 41610b57cec5SDimitry Andric } 41620b57cec5SDimitry Andric #endif // KMP_DEBUG 41630b57cec5SDimitry Andric 41640b57cec5SDimitry Andric /* TODO optimize with one big memclr, take out what isn't needed, split 41650b57cec5SDimitry Andric responsibility to workers as much as possible, and delay initialization of 41660b57cec5SDimitry Andric features as much as possible */ 41670b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, 41680b57cec5SDimitry Andric int tid, int gtid) { 41690b57cec5SDimitry Andric /* this_thr->th.th_info.ds.ds_gtid is setup in 41700b57cec5SDimitry Andric kmp_allocate_thread/create_worker. 41710b57cec5SDimitry Andric this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 41720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr != NULL); 41730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_serial_team); 41740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 41750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 41760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 4177fe6060f1SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 41780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master); 41790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_root); 41800b57cec5SDimitry Andric 41810b57cec5SDimitry Andric KMP_MB(); 41820b57cec5SDimitry Andric 41830b57cec5SDimitry Andric TCW_SYNC_PTR(this_thr->th.th_team, team); 41840b57cec5SDimitry Andric 41850b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = tid; 41860b57cec5SDimitry Andric this_thr->th.th_set_nproc = 0; 41870b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) 41880b57cec5SDimitry Andric // When tasking is possible, threads are not safe to reap until they are 41890b57cec5SDimitry Andric // done tasking; this will be set when tasking code is exited in wait 41900b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 41910b57cec5SDimitry Andric else // no tasking --> always safe to reap 41920b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 41930b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 41940b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 41950b57cec5SDimitry Andric this_thr->th.th_new_place = this_thr->th.th_current_place; 41960b57cec5SDimitry Andric #endif 41970b57cec5SDimitry Andric this_thr->th.th_root = master->th.th_root; 41980b57cec5SDimitry Andric 41990b57cec5SDimitry Andric /* setup the thread's cache of the team structure */ 42000b57cec5SDimitry Andric this_thr->th.th_team_nproc = team->t.t_nproc; 42010b57cec5SDimitry Andric this_thr->th.th_team_master = master; 42020b57cec5SDimitry Andric this_thr->th.th_team_serialized = team->t.t_serialized; 42030b57cec5SDimitry Andric 42040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); 42050b57cec5SDimitry Andric 42060b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 42070b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 42080b57cec5SDimitry Andric 42090b57cec5SDimitry Andric __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, 42100b57cec5SDimitry Andric team, tid, TRUE); 42110b57cec5SDimitry Andric 42120b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 42130b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 42140b57cec5SDimitry Andric // TODO: Initialize ICVs from parent; GEH - isn't that already done in 42150b57cec5SDimitry Andric // __kmp_initialize_team()? 42160b57cec5SDimitry Andric 42170b57cec5SDimitry Andric /* TODO no worksharing in speculative threads */ 42180b57cec5SDimitry Andric this_thr->th.th_dispatch = &team->t.t_dispatch[tid]; 42190b57cec5SDimitry Andric 42200b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 42210b57cec5SDimitry Andric 42220b57cec5SDimitry Andric if (!this_thr->th.th_pri_common) { 42230b57cec5SDimitry Andric this_thr->th.th_pri_common = 42240b57cec5SDimitry Andric (struct common_table *)__kmp_allocate(sizeof(struct common_table)); 42250b57cec5SDimitry Andric if (__kmp_storage_map) { 42260b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 42270b57cec5SDimitry Andric gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 42280b57cec5SDimitry Andric sizeof(struct common_table), "th_%d.th_pri_common\n", gtid); 42290b57cec5SDimitry Andric } 42300b57cec5SDimitry Andric this_thr->th.th_pri_head = NULL; 42310b57cec5SDimitry Andric } 42320b57cec5SDimitry Andric 4233fe6060f1SDimitry Andric if (this_thr != master && // Primary thread's CG root is initialized elsewhere 42340b57cec5SDimitry Andric this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set 4235fe6060f1SDimitry Andric // Make new thread's CG root same as primary thread's 42360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_cg_roots); 42370b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 42380b57cec5SDimitry Andric if (tmp) { 42390b57cec5SDimitry Andric // worker changes CG, need to check if old CG should be freed 42400b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 42410b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads" 42420b57cec5SDimitry Andric " on node %p of thread %p to %d\n", 42430b57cec5SDimitry Andric this_thr, tmp, tmp->cg_root, tmp->cg_nthreads)); 42440b57cec5SDimitry Andric if (i == 1) { 42450b57cec5SDimitry Andric __kmp_free(tmp); // last thread left CG --> free it 42460b57cec5SDimitry Andric } 42470b57cec5SDimitry Andric } 42480b57cec5SDimitry Andric this_thr->th.th_cg_roots = master->th.th_cg_roots; 42490b57cec5SDimitry Andric // Increment new thread's CG root's counter to add the new thread 42500b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads++; 42510b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on" 42520b57cec5SDimitry Andric " node %p of thread %p to %d\n", 42530b57cec5SDimitry Andric this_thr, this_thr->th.th_cg_roots, 42540b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_root, 42550b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads)); 42560b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.thread_limit = 42570b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_thread_limit; 42580b57cec5SDimitry Andric } 42590b57cec5SDimitry Andric 42600b57cec5SDimitry Andric /* Initialize dynamic dispatch */ 42610b57cec5SDimitry Andric { 42620b57cec5SDimitry Andric volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 42630b57cec5SDimitry Andric // Use team max_nproc since this will never change for the team. 42640b57cec5SDimitry Andric size_t disp_size = 42650b57cec5SDimitry Andric sizeof(dispatch_private_info_t) * 42660b57cec5SDimitry Andric (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers); 42670b57cec5SDimitry Andric KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, 42680b57cec5SDimitry Andric team->t.t_max_nproc)); 42690b57cec5SDimitry Andric KMP_ASSERT(dispatch); 42700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 42710b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]); 42720b57cec5SDimitry Andric 42730b57cec5SDimitry Andric dispatch->th_disp_index = 0; 42740b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; 42750b57cec5SDimitry Andric if (!dispatch->th_disp_buffer) { 42760b57cec5SDimitry Andric dispatch->th_disp_buffer = 42770b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate(disp_size); 42780b57cec5SDimitry Andric 42790b57cec5SDimitry Andric if (__kmp_storage_map) { 42800b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 42810b57cec5SDimitry Andric gtid, &dispatch->th_disp_buffer[0], 42820b57cec5SDimitry Andric &dispatch->th_disp_buffer[team->t.t_max_nproc == 1 42830b57cec5SDimitry Andric ? 1 42840b57cec5SDimitry Andric : __kmp_dispatch_num_buffers], 4285fe6060f1SDimitry Andric disp_size, 4286fe6060f1SDimitry Andric "th_%d.th_dispatch.th_disp_buffer " 42870b57cec5SDimitry Andric "(team_%d.t_dispatch[%d].th_disp_buffer)", 42880b57cec5SDimitry Andric gtid, team->t.t_id, gtid); 42890b57cec5SDimitry Andric } 42900b57cec5SDimitry Andric } else { 42910b57cec5SDimitry Andric memset(&dispatch->th_disp_buffer[0], '\0', disp_size); 42920b57cec5SDimitry Andric } 42930b57cec5SDimitry Andric 42940b57cec5SDimitry Andric dispatch->th_dispatch_pr_current = 0; 42950b57cec5SDimitry Andric dispatch->th_dispatch_sh_current = 0; 42960b57cec5SDimitry Andric 42970b57cec5SDimitry Andric dispatch->th_deo_fcn = 0; /* ORDERED */ 42980b57cec5SDimitry Andric dispatch->th_dxo_fcn = 0; /* END ORDERED */ 42990b57cec5SDimitry Andric } 43000b57cec5SDimitry Andric 43010b57cec5SDimitry Andric this_thr->th.th_next_pool = NULL; 43020b57cec5SDimitry Andric 43030b57cec5SDimitry Andric if (!this_thr->th.th_task_state_memo_stack) { 43040b57cec5SDimitry Andric size_t i; 43050b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack = 43060b57cec5SDimitry Andric (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8)); 43070b57cec5SDimitry Andric this_thr->th.th_task_state_top = 0; 43080b57cec5SDimitry Andric this_thr->th.th_task_state_stack_sz = 4; 43090b57cec5SDimitry Andric for (i = 0; i < this_thr->th.th_task_state_stack_sz; 43100b57cec5SDimitry Andric ++i) // zero init the stack 43110b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack[i] = 0; 43120b57cec5SDimitry Andric } 43130b57cec5SDimitry Andric 43140b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 43150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 43160b57cec5SDimitry Andric 43170b57cec5SDimitry Andric KMP_MB(); 43180b57cec5SDimitry Andric } 43190b57cec5SDimitry Andric 43200b57cec5SDimitry Andric /* allocate a new thread for the requesting team. this is only called from 43210b57cec5SDimitry Andric within a forkjoin critical section. we will first try to get an available 43220b57cec5SDimitry Andric thread from the thread pool. if none is available, we will fork a new one 43230b57cec5SDimitry Andric assuming we are able to create a new one. this should be assured, as the 43240b57cec5SDimitry Andric caller should check on this first. */ 43250b57cec5SDimitry Andric kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, 43260b57cec5SDimitry Andric int new_tid) { 43270b57cec5SDimitry Andric kmp_team_t *serial_team; 43280b57cec5SDimitry Andric kmp_info_t *new_thr; 43290b57cec5SDimitry Andric int new_gtid; 43300b57cec5SDimitry Andric 43310b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); 43320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && team); 43330b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 43340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); 43350b57cec5SDimitry Andric #endif 43360b57cec5SDimitry Andric KMP_MB(); 43370b57cec5SDimitry Andric 43380b57cec5SDimitry Andric /* first, try to get one from the thread pool */ 43390b57cec5SDimitry Andric if (__kmp_thread_pool) { 43400b57cec5SDimitry Andric new_thr = CCAST(kmp_info_t *, __kmp_thread_pool); 43410b57cec5SDimitry Andric __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool; 43420b57cec5SDimitry Andric if (new_thr == __kmp_thread_pool_insert_pt) { 43430b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 43440b57cec5SDimitry Andric } 43450b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 43460b57cec5SDimitry Andric __kmp_suspend_initialize_thread(new_thr); 43470b57cec5SDimitry Andric __kmp_lock_suspend_mx(new_thr); 43480b57cec5SDimitry Andric if (new_thr->th.th_active_in_pool == TRUE) { 43490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE); 43500b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 43510b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 43520b57cec5SDimitry Andric } 43530b57cec5SDimitry Andric __kmp_unlock_suspend_mx(new_thr); 43540b57cec5SDimitry Andric 43550b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 43560b57cec5SDimitry Andric __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid)); 43570b57cec5SDimitry Andric KMP_ASSERT(!new_thr->th.th_team); 43580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity); 43590b57cec5SDimitry Andric 43600b57cec5SDimitry Andric /* setup the thread structure */ 43610b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, 43620b57cec5SDimitry Andric new_thr->th.th_info.ds.ds_gtid); 43630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); 43640b57cec5SDimitry Andric 43650b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 43660b57cec5SDimitry Andric 43670b57cec5SDimitry Andric new_thr->th.th_task_state = 0; 43680b57cec5SDimitry Andric new_thr->th.th_task_state_top = 0; 43690b57cec5SDimitry Andric new_thr->th.th_task_state_stack_sz = 4; 43700b57cec5SDimitry Andric 4371349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 4372349cc55cSDimitry Andric // Make sure pool thread has transitioned to waiting on own thread struct 4373349cc55cSDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0); 4374349cc55cSDimitry Andric // Thread activated in __kmp_allocate_team when increasing team size 4375349cc55cSDimitry Andric } 4376349cc55cSDimitry Andric 43770b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 43780b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 43790b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 43800b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 43810b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 43820b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 43830b57cec5SDimitry Andric } 43840b57cec5SDimitry Andric } 43850b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 43860b57cec5SDimitry Andric 43870b57cec5SDimitry Andric #if KMP_DEBUG 43880b57cec5SDimitry Andric // If thread entered pool via __kmp_free_thread, wait_flag should != 43890b57cec5SDimitry Andric // KMP_BARRIER_PARENT_FLAG. 43900b57cec5SDimitry Andric int b; 43910b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 43920b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) 43930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 43940b57cec5SDimitry Andric #endif 43950b57cec5SDimitry Andric 43960b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 43970b57cec5SDimitry Andric __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid)); 43980b57cec5SDimitry Andric 43990b57cec5SDimitry Andric KMP_MB(); 44000b57cec5SDimitry Andric return new_thr; 44010b57cec5SDimitry Andric } 44020b57cec5SDimitry Andric 44030b57cec5SDimitry Andric /* no, well fork a new one */ 44040b57cec5SDimitry Andric KMP_ASSERT(__kmp_nth == __kmp_all_nth); 44050b57cec5SDimitry Andric KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity); 44060b57cec5SDimitry Andric 44070b57cec5SDimitry Andric #if KMP_USE_MONITOR 44080b57cec5SDimitry Andric // If this is the first worker thread the RTL is creating, then also 44090b57cec5SDimitry Andric // launch the monitor thread. We try to do this as early as possible. 44100b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 44110b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 44120b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 44130b57cec5SDimitry Andric KF_TRACE(10, ("before __kmp_create_monitor\n")); 44140b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 1); 44150b57cec5SDimitry Andric __kmp_create_monitor(&__kmp_monitor); 44160b57cec5SDimitry Andric KF_TRACE(10, ("after __kmp_create_monitor\n")); 44170b57cec5SDimitry Andric #if KMP_OS_WINDOWS 44180b57cec5SDimitry Andric // AC: wait until monitor has started. This is a fix for CQ232808. 44190b57cec5SDimitry Andric // The reason is that if the library is loaded/unloaded in a loop with 44200b57cec5SDimitry Andric // small (parallel) work in between, then there is high probability that 44210b57cec5SDimitry Andric // monitor thread started after the library shutdown. At shutdown it is 4422fe6060f1SDimitry Andric // too late to cope with the problem, because when the primary thread is 4423fe6060f1SDimitry Andric // in DllMain (process detach) the monitor has no chances to start (it is 4424fe6060f1SDimitry Andric // blocked), and primary thread has no means to inform the monitor that 4425fe6060f1SDimitry Andric // the library has gone, because all the memory which the monitor can 4426fe6060f1SDimitry Andric // access is going to be released/reset. 44270b57cec5SDimitry Andric while (TCR_4(__kmp_init_monitor) < 2) { 44280b57cec5SDimitry Andric KMP_YIELD(TRUE); 44290b57cec5SDimitry Andric } 44300b57cec5SDimitry Andric KF_TRACE(10, ("after monitor thread has started\n")); 44310b57cec5SDimitry Andric #endif 44320b57cec5SDimitry Andric } 44330b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 44340b57cec5SDimitry Andric } 44350b57cec5SDimitry Andric #endif 44360b57cec5SDimitry Andric 44370b57cec5SDimitry Andric KMP_MB(); 4438e8d8bef9SDimitry Andric 4439e8d8bef9SDimitry Andric { 4440e8d8bef9SDimitry Andric int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads) 4441e8d8bef9SDimitry Andric ? 1 4442e8d8bef9SDimitry Andric : __kmp_hidden_helper_threads_num + 1; 4443e8d8bef9SDimitry Andric 4444e8d8bef9SDimitry Andric for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL; 4445e8d8bef9SDimitry Andric ++new_gtid) { 44460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); 44470b57cec5SDimitry Andric } 44480b57cec5SDimitry Andric 4449e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper_threads)) { 4450e8d8bef9SDimitry Andric KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num); 4451e8d8bef9SDimitry Andric } 4452e8d8bef9SDimitry Andric } 4453e8d8bef9SDimitry Andric 44540b57cec5SDimitry Andric /* allocate space for it. */ 44550b57cec5SDimitry Andric new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 44560b57cec5SDimitry Andric 44570b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 44580b57cec5SDimitry Andric 4459e8d8bef9SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG 4460e8d8bef9SDimitry Andric // suppress race conditions detection on synchronization flags in debug mode 4461e8d8bef9SDimitry Andric // this helps to analyze library internals eliminating false positives 4462e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4463e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4464e8d8bef9SDimitry Andric &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc)); 4465e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4466e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4467e8d8bef9SDimitry Andric &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state)); 4468e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 4469e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4470e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4471e8d8bef9SDimitry Andric &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init)); 4472e8d8bef9SDimitry Andric #else 4473e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4474e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4475e8d8bef9SDimitry Andric &new_thr->th.th_suspend_init_count, 4476e8d8bef9SDimitry Andric sizeof(new_thr->th.th_suspend_init_count)); 4477e8d8bef9SDimitry Andric #endif 4478e8d8bef9SDimitry Andric // TODO: check if we need to also suppress b_arrived flags 4479e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4480e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4481e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go), 4482e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[0].bb.b_go)); 4483e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4484e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4485e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go), 4486e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[1].bb.b_go)); 4487e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4488e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4489e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go), 4490e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[2].bb.b_go)); 4491e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */ 44920b57cec5SDimitry Andric if (__kmp_storage_map) { 44930b57cec5SDimitry Andric __kmp_print_thread_storage_map(new_thr, new_gtid); 44940b57cec5SDimitry Andric } 44950b57cec5SDimitry Andric 4496fe6060f1SDimitry Andric // add the reserve serialized team, initialized from the team's primary thread 44970b57cec5SDimitry Andric { 44980b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team); 44990b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n")); 45000b57cec5SDimitry Andric new_thr->th.th_serial_team = serial_team = 45010b57cec5SDimitry Andric (kmp_team_t *)__kmp_allocate_team(root, 1, 1, 45020b57cec5SDimitry Andric #if OMPT_SUPPORT 45030b57cec5SDimitry Andric ompt_data_none, // root parallel id 45040b57cec5SDimitry Andric #endif 45050b57cec5SDimitry Andric proc_bind_default, &r_icvs, 45060b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 45070b57cec5SDimitry Andric } 45080b57cec5SDimitry Andric KMP_ASSERT(serial_team); 45090b57cec5SDimitry Andric serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for 45100b57cec5SDimitry Andric // execution (it is unused for now). 45110b57cec5SDimitry Andric serial_team->t.t_threads[0] = new_thr; 45120b57cec5SDimitry Andric KF_TRACE(10, 45130b57cec5SDimitry Andric ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 45140b57cec5SDimitry Andric new_thr)); 45150b57cec5SDimitry Andric 45160b57cec5SDimitry Andric /* setup the thread structures */ 45170b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, new_gtid); 45180b57cec5SDimitry Andric 45190b57cec5SDimitry Andric #if USE_FAST_MEMORY 45200b57cec5SDimitry Andric __kmp_initialize_fast_memory(new_thr); 45210b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 45220b57cec5SDimitry Andric 45230b57cec5SDimitry Andric #if KMP_USE_BGET 45240b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL); 45250b57cec5SDimitry Andric __kmp_initialize_bget(new_thr); 45260b57cec5SDimitry Andric #endif 45270b57cec5SDimitry Andric 45280b57cec5SDimitry Andric __kmp_init_random(new_thr); // Initialize random number generator 45290b57cec5SDimitry Andric 45300b57cec5SDimitry Andric /* Initialize these only once when thread is grabbed for a team allocation */ 45310b57cec5SDimitry Andric KA_TRACE(20, 45320b57cec5SDimitry Andric ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 45330b57cec5SDimitry Andric __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 45340b57cec5SDimitry Andric 45350b57cec5SDimitry Andric int b; 45360b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 45370b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 45380b57cec5SDimitry Andric balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 45390b57cec5SDimitry Andric balign[b].bb.team = NULL; 45400b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 45410b57cec5SDimitry Andric balign[b].bb.use_oncore_barrier = 0; 45420b57cec5SDimitry Andric } 45430b57cec5SDimitry Andric 4544349cc55cSDimitry Andric TCW_PTR(new_thr->th.th_sleep_loc, NULL); 4545349cc55cSDimitry Andric new_thr->th.th_sleep_loc_type = flag_unset; 4546349cc55cSDimitry Andric 45470b57cec5SDimitry Andric new_thr->th.th_spin_here = FALSE; 45480b57cec5SDimitry Andric new_thr->th.th_next_waiting = 0; 45490b57cec5SDimitry Andric #if KMP_OS_UNIX 45500b57cec5SDimitry Andric new_thr->th.th_blocking = false; 45510b57cec5SDimitry Andric #endif 45520b57cec5SDimitry Andric 45530b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 45540b57cec5SDimitry Andric new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 45550b57cec5SDimitry Andric new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 45560b57cec5SDimitry Andric new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 45570b57cec5SDimitry Andric new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 45580b57cec5SDimitry Andric #endif 45590b57cec5SDimitry Andric new_thr->th.th_def_allocator = __kmp_def_allocator; 45600b57cec5SDimitry Andric new_thr->th.th_prev_level = 0; 45610b57cec5SDimitry Andric new_thr->th.th_prev_num_threads = 1; 45620b57cec5SDimitry Andric 45630b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 45640b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 45650b57cec5SDimitry Andric TCW_4(new_thr->th.th_active, TRUE); 45660b57cec5SDimitry Andric 45670b57cec5SDimitry Andric /* adjust the global counters */ 45680b57cec5SDimitry Andric __kmp_all_nth++; 45690b57cec5SDimitry Andric __kmp_nth++; 45700b57cec5SDimitry Andric 45710b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 45720b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 45730b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 45740b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 45750b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 45760b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 45770b57cec5SDimitry Andric } 45780b57cec5SDimitry Andric } else { 45790b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 45800b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 45810b57cec5SDimitry Andric } 45820b57cec5SDimitry Andric } 45830b57cec5SDimitry Andric } 45840b57cec5SDimitry Andric 45850b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 45860b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 45870b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 45880b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 45890b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 45900b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 45910b57cec5SDimitry Andric } 45920b57cec5SDimitry Andric } 45930b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 45940b57cec5SDimitry Andric 45950b57cec5SDimitry Andric /* actually fork it and create the new worker thread */ 45960b57cec5SDimitry Andric KF_TRACE( 45970b57cec5SDimitry Andric 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); 45980b57cec5SDimitry Andric __kmp_create_worker(new_gtid, new_thr, __kmp_stksize); 45990b57cec5SDimitry Andric KF_TRACE(10, 46000b57cec5SDimitry Andric ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr)); 46010b57cec5SDimitry Andric 46020b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), 46030b57cec5SDimitry Andric new_gtid)); 46040b57cec5SDimitry Andric KMP_MB(); 46050b57cec5SDimitry Andric return new_thr; 46060b57cec5SDimitry Andric } 46070b57cec5SDimitry Andric 46080b57cec5SDimitry Andric /* Reinitialize team for reuse. 46090b57cec5SDimitry Andric The hot team code calls this case at every fork barrier, so EPCC barrier 46100b57cec5SDimitry Andric test are extremely sensitive to changes in it, esp. writes to the team 46110b57cec5SDimitry Andric struct, which cause a cache invalidation in all threads. 46120b57cec5SDimitry Andric IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ 46130b57cec5SDimitry Andric static void __kmp_reinitialize_team(kmp_team_t *team, 46140b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 46150b57cec5SDimitry Andric ident_t *loc) { 46160b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 46170b57cec5SDimitry Andric team->t.t_threads[0], team)); 46180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team && new_icvs); 46190b57cec5SDimitry Andric KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); 46200b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 46210b57cec5SDimitry Andric 46220b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); 4623fe6060f1SDimitry Andric // Copy ICVs to the primary thread's implicit taskdata 46240b57cec5SDimitry Andric __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE); 46250b57cec5SDimitry Andric copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 46260b57cec5SDimitry Andric 46270b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 46280b57cec5SDimitry Andric team->t.t_threads[0], team)); 46290b57cec5SDimitry Andric } 46300b57cec5SDimitry Andric 46310b57cec5SDimitry Andric /* Initialize the team data structure. 46320b57cec5SDimitry Andric This assumes the t_threads and t_max_nproc are already set. 46330b57cec5SDimitry Andric Also, we don't touch the arguments */ 46340b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 46350b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 46360b57cec5SDimitry Andric ident_t *loc) { 46370b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team)); 46380b57cec5SDimitry Andric 46390b57cec5SDimitry Andric /* verify */ 46400b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 46410b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc); 46420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 46430b57cec5SDimitry Andric KMP_MB(); 46440b57cec5SDimitry Andric 46450b57cec5SDimitry Andric team->t.t_master_tid = 0; /* not needed */ 46460b57cec5SDimitry Andric /* team->t.t_master_bar; not needed */ 46470b57cec5SDimitry Andric team->t.t_serialized = new_nproc > 1 ? 0 : 1; 46480b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 46490b57cec5SDimitry Andric 46500b57cec5SDimitry Andric /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ 46510b57cec5SDimitry Andric team->t.t_next_pool = NULL; 46520b57cec5SDimitry Andric /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess 46530b57cec5SDimitry Andric * up hot team */ 46540b57cec5SDimitry Andric 46550b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 46560b57cec5SDimitry Andric team->t.t_invoke = NULL; /* not needed */ 46570b57cec5SDimitry Andric 46580b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 46590b57cec5SDimitry Andric team->t.t_sched.sched = new_icvs->sched.sched; 46600b57cec5SDimitry Andric 46610b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 46620b57cec5SDimitry Andric team->t.t_fp_control_saved = FALSE; /* not needed */ 46630b57cec5SDimitry Andric team->t.t_x87_fpu_control_word = 0; /* not needed */ 46640b57cec5SDimitry Andric team->t.t_mxcsr = 0; /* not needed */ 46650b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 46660b57cec5SDimitry Andric 46670b57cec5SDimitry Andric team->t.t_construct = 0; 46680b57cec5SDimitry Andric 46690b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 0; 46700b57cec5SDimitry Andric team->t.t_master_active = FALSE; 46710b57cec5SDimitry Andric 46720b57cec5SDimitry Andric #ifdef KMP_DEBUG 46730b57cec5SDimitry Andric team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ 46740b57cec5SDimitry Andric #endif 46750b57cec5SDimitry Andric #if KMP_OS_WINDOWS 46760b57cec5SDimitry Andric team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ 46770b57cec5SDimitry Andric #endif 46780b57cec5SDimitry Andric 46790b57cec5SDimitry Andric team->t.t_control_stack_top = NULL; 46800b57cec5SDimitry Andric 46810b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, loc); 46820b57cec5SDimitry Andric 46830b57cec5SDimitry Andric KMP_MB(); 46840b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); 46850b57cec5SDimitry Andric } 46860b57cec5SDimitry Andric 4687489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 46880b57cec5SDimitry Andric /* Sets full mask for thread and returns old mask, no changes to structures. */ 46890b57cec5SDimitry Andric static void 46900b57cec5SDimitry Andric __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { 46910b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 46920b57cec5SDimitry Andric int status; 46930b57cec5SDimitry Andric if (old_mask != NULL) { 46940b57cec5SDimitry Andric status = __kmp_get_system_affinity(old_mask, TRUE); 46950b57cec5SDimitry Andric int error = errno; 46960b57cec5SDimitry Andric if (status != 0) { 46970b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error), 46980b57cec5SDimitry Andric __kmp_msg_null); 46990b57cec5SDimitry Andric } 47000b57cec5SDimitry Andric } 47010b57cec5SDimitry Andric __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE); 47020b57cec5SDimitry Andric } 47030b57cec5SDimitry Andric } 47040b57cec5SDimitry Andric #endif 47050b57cec5SDimitry Andric 47060b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 47070b57cec5SDimitry Andric 47080b57cec5SDimitry Andric // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 4709fe6060f1SDimitry Andric // It calculates the worker + primary thread's partition based upon the parent 47100b57cec5SDimitry Andric // thread's partition, and binds each worker to a thread in their partition. 4711fe6060f1SDimitry Andric // The primary thread's partition should already include its current binding. 47120b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { 4713fe6060f1SDimitry Andric // Do not partition places for the hidden helper team 4714fe6060f1SDimitry Andric if (KMP_HIDDEN_HELPER_TEAM(team)) 4715fe6060f1SDimitry Andric return; 4716fe6060f1SDimitry Andric // Copy the primary thread's place partition to the team struct 47170b57cec5SDimitry Andric kmp_info_t *master_th = team->t.t_threads[0]; 47180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th != NULL); 47190b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 47200b57cec5SDimitry Andric int first_place = master_th->th.th_first_place; 47210b57cec5SDimitry Andric int last_place = master_th->th.th_last_place; 47220b57cec5SDimitry Andric int masters_place = master_th->th.th_current_place; 47230b57cec5SDimitry Andric team->t.t_first_place = first_place; 47240b57cec5SDimitry Andric team->t.t_last_place = last_place; 47250b57cec5SDimitry Andric 47260b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 47270b57cec5SDimitry Andric "bound to place %d partition = [%d,%d]\n", 47280b57cec5SDimitry Andric proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]), 47290b57cec5SDimitry Andric team->t.t_id, masters_place, first_place, last_place)); 47300b57cec5SDimitry Andric 47310b57cec5SDimitry Andric switch (proc_bind) { 47320b57cec5SDimitry Andric 47330b57cec5SDimitry Andric case proc_bind_default: 4734fe6060f1SDimitry Andric // Serial teams might have the proc_bind policy set to proc_bind_default. 4735fe6060f1SDimitry Andric // Not an issue -- we don't rebind primary thread for any proc_bind policy. 47360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 47370b57cec5SDimitry Andric break; 47380b57cec5SDimitry Andric 4739fe6060f1SDimitry Andric case proc_bind_primary: { 47400b57cec5SDimitry Andric int f; 47410b57cec5SDimitry Andric int n_th = team->t.t_nproc; 47420b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 47430b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 47440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 47450b57cec5SDimitry Andric th->th.th_first_place = first_place; 47460b57cec5SDimitry Andric th->th.th_last_place = last_place; 47470b57cec5SDimitry Andric th->th.th_new_place = masters_place; 47480b57cec5SDimitry Andric if (__kmp_display_affinity && masters_place != th->th.th_current_place && 47490b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 47500b57cec5SDimitry Andric team->t.t_display_affinity = 1; 47510b57cec5SDimitry Andric } 47520b57cec5SDimitry Andric 4753fe6060f1SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d " 47540b57cec5SDimitry Andric "partition = [%d,%d]\n", 47550b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 47560b57cec5SDimitry Andric f, masters_place, first_place, last_place)); 47570b57cec5SDimitry Andric } 47580b57cec5SDimitry Andric } break; 47590b57cec5SDimitry Andric 47600b57cec5SDimitry Andric case proc_bind_close: { 47610b57cec5SDimitry Andric int f; 47620b57cec5SDimitry Andric int n_th = team->t.t_nproc; 47630b57cec5SDimitry Andric int n_places; 47640b57cec5SDimitry Andric if (first_place <= last_place) { 47650b57cec5SDimitry Andric n_places = last_place - first_place + 1; 47660b57cec5SDimitry Andric } else { 47670b57cec5SDimitry Andric n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 47680b57cec5SDimitry Andric } 47690b57cec5SDimitry Andric if (n_th <= n_places) { 47700b57cec5SDimitry Andric int place = masters_place; 47710b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 47720b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 47730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 47740b57cec5SDimitry Andric 47750b57cec5SDimitry Andric if (place == last_place) { 47760b57cec5SDimitry Andric place = first_place; 47770b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 47780b57cec5SDimitry Andric place = 0; 47790b57cec5SDimitry Andric } else { 47800b57cec5SDimitry Andric place++; 47810b57cec5SDimitry Andric } 47820b57cec5SDimitry Andric th->th.th_first_place = first_place; 47830b57cec5SDimitry Andric th->th.th_last_place = last_place; 47840b57cec5SDimitry Andric th->th.th_new_place = place; 47850b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 47860b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 47870b57cec5SDimitry Andric team->t.t_display_affinity = 1; 47880b57cec5SDimitry Andric } 47890b57cec5SDimitry Andric 47900b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 47910b57cec5SDimitry Andric "partition = [%d,%d]\n", 47920b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 47930b57cec5SDimitry Andric team->t.t_id, f, place, first_place, last_place)); 47940b57cec5SDimitry Andric } 47950b57cec5SDimitry Andric } else { 47960b57cec5SDimitry Andric int S, rem, gap, s_count; 47970b57cec5SDimitry Andric S = n_th / n_places; 47980b57cec5SDimitry Andric s_count = 0; 47990b57cec5SDimitry Andric rem = n_th - (S * n_places); 48000b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 48010b57cec5SDimitry Andric int place = masters_place; 48020b57cec5SDimitry Andric int gap_ct = gap; 48030b57cec5SDimitry Andric for (f = 0; f < n_th; f++) { 48040b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48050b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48060b57cec5SDimitry Andric 48070b57cec5SDimitry Andric th->th.th_first_place = first_place; 48080b57cec5SDimitry Andric th->th.th_last_place = last_place; 48090b57cec5SDimitry Andric th->th.th_new_place = place; 48100b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 48110b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48120b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48130b57cec5SDimitry Andric } 48140b57cec5SDimitry Andric s_count++; 48150b57cec5SDimitry Andric 48160b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 48170b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 48180b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 48190b57cec5SDimitry Andric // we added an extra thread to this place; move to next place 48200b57cec5SDimitry Andric if (place == last_place) { 48210b57cec5SDimitry Andric place = first_place; 48220b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48230b57cec5SDimitry Andric place = 0; 48240b57cec5SDimitry Andric } else { 48250b57cec5SDimitry Andric place++; 48260b57cec5SDimitry Andric } 48270b57cec5SDimitry Andric s_count = 0; 48280b57cec5SDimitry Andric gap_ct = 1; 48290b57cec5SDimitry Andric rem--; 48300b57cec5SDimitry Andric } else if (s_count == S) { // place full; don't add extra 48310b57cec5SDimitry Andric if (place == last_place) { 48320b57cec5SDimitry Andric place = first_place; 48330b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48340b57cec5SDimitry Andric place = 0; 48350b57cec5SDimitry Andric } else { 48360b57cec5SDimitry Andric place++; 48370b57cec5SDimitry Andric } 48380b57cec5SDimitry Andric gap_ct++; 48390b57cec5SDimitry Andric s_count = 0; 48400b57cec5SDimitry Andric } 48410b57cec5SDimitry Andric 48420b57cec5SDimitry Andric KA_TRACE(100, 48430b57cec5SDimitry Andric ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 48440b57cec5SDimitry Andric "partition = [%d,%d]\n", 48450b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, 48460b57cec5SDimitry Andric th->th.th_new_place, first_place, last_place)); 48470b57cec5SDimitry Andric } 48480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(place == masters_place); 48490b57cec5SDimitry Andric } 48500b57cec5SDimitry Andric } break; 48510b57cec5SDimitry Andric 48520b57cec5SDimitry Andric case proc_bind_spread: { 48530b57cec5SDimitry Andric int f; 48540b57cec5SDimitry Andric int n_th = team->t.t_nproc; 48550b57cec5SDimitry Andric int n_places; 48560b57cec5SDimitry Andric int thidx; 48570b57cec5SDimitry Andric if (first_place <= last_place) { 48580b57cec5SDimitry Andric n_places = last_place - first_place + 1; 48590b57cec5SDimitry Andric } else { 48600b57cec5SDimitry Andric n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 48610b57cec5SDimitry Andric } 48620b57cec5SDimitry Andric if (n_th <= n_places) { 48630b57cec5SDimitry Andric int place = -1; 48640b57cec5SDimitry Andric 48650b57cec5SDimitry Andric if (n_places != static_cast<int>(__kmp_affinity_num_masks)) { 48660b57cec5SDimitry Andric int S = n_places / n_th; 48670b57cec5SDimitry Andric int s_count, rem, gap, gap_ct; 48680b57cec5SDimitry Andric 48690b57cec5SDimitry Andric place = masters_place; 48700b57cec5SDimitry Andric rem = n_places - n_th * S; 48710b57cec5SDimitry Andric gap = rem ? n_th / rem : 1; 48720b57cec5SDimitry Andric gap_ct = gap; 48730b57cec5SDimitry Andric thidx = n_th; 48740b57cec5SDimitry Andric if (update_master_only == 1) 48750b57cec5SDimitry Andric thidx = 1; 48760b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 48770b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48790b57cec5SDimitry Andric 48800b57cec5SDimitry Andric th->th.th_first_place = place; 48810b57cec5SDimitry Andric th->th.th_new_place = place; 48820b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 48830b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48840b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48850b57cec5SDimitry Andric } 48860b57cec5SDimitry Andric s_count = 1; 48870b57cec5SDimitry Andric while (s_count < S) { 48880b57cec5SDimitry Andric if (place == last_place) { 48890b57cec5SDimitry Andric place = first_place; 48900b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48910b57cec5SDimitry Andric place = 0; 48920b57cec5SDimitry Andric } else { 48930b57cec5SDimitry Andric place++; 48940b57cec5SDimitry Andric } 48950b57cec5SDimitry Andric s_count++; 48960b57cec5SDimitry Andric } 48970b57cec5SDimitry Andric if (rem && (gap_ct == gap)) { 48980b57cec5SDimitry Andric if (place == last_place) { 48990b57cec5SDimitry Andric place = first_place; 49000b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 49010b57cec5SDimitry Andric place = 0; 49020b57cec5SDimitry Andric } else { 49030b57cec5SDimitry Andric place++; 49040b57cec5SDimitry Andric } 49050b57cec5SDimitry Andric rem--; 49060b57cec5SDimitry Andric gap_ct = 0; 49070b57cec5SDimitry Andric } 49080b57cec5SDimitry Andric th->th.th_last_place = place; 49090b57cec5SDimitry Andric gap_ct++; 49100b57cec5SDimitry Andric 49110b57cec5SDimitry Andric if (place == last_place) { 49120b57cec5SDimitry Andric place = first_place; 49130b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 49140b57cec5SDimitry Andric place = 0; 49150b57cec5SDimitry Andric } else { 49160b57cec5SDimitry Andric place++; 49170b57cec5SDimitry Andric } 49180b57cec5SDimitry Andric 49190b57cec5SDimitry Andric KA_TRACE(100, 49200b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 49210b57cec5SDimitry Andric "partition = [%d,%d], __kmp_affinity_num_masks: %u\n", 49220b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 49230b57cec5SDimitry Andric f, th->th.th_new_place, th->th.th_first_place, 49240b57cec5SDimitry Andric th->th.th_last_place, __kmp_affinity_num_masks)); 49250b57cec5SDimitry Andric } 49260b57cec5SDimitry Andric } else { 49270b57cec5SDimitry Andric /* Having uniform space of available computation places I can create 49280b57cec5SDimitry Andric T partitions of round(P/T) size and put threads into the first 49290b57cec5SDimitry Andric place of each partition. */ 49300b57cec5SDimitry Andric double current = static_cast<double>(masters_place); 49310b57cec5SDimitry Andric double spacing = 49320b57cec5SDimitry Andric (static_cast<double>(n_places + 1) / static_cast<double>(n_th)); 49330b57cec5SDimitry Andric int first, last; 49340b57cec5SDimitry Andric kmp_info_t *th; 49350b57cec5SDimitry Andric 49360b57cec5SDimitry Andric thidx = n_th + 1; 49370b57cec5SDimitry Andric if (update_master_only == 1) 49380b57cec5SDimitry Andric thidx = 1; 49390b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 49400b57cec5SDimitry Andric first = static_cast<int>(current); 49410b57cec5SDimitry Andric last = static_cast<int>(current + spacing) - 1; 49420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last >= first); 49430b57cec5SDimitry Andric if (first >= n_places) { 49440b57cec5SDimitry Andric if (masters_place) { 49450b57cec5SDimitry Andric first -= n_places; 49460b57cec5SDimitry Andric last -= n_places; 49470b57cec5SDimitry Andric if (first == (masters_place + 1)) { 49480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 49490b57cec5SDimitry Andric first--; 49500b57cec5SDimitry Andric } 49510b57cec5SDimitry Andric if (last == masters_place) { 49520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == (n_th - 1)); 49530b57cec5SDimitry Andric last--; 49540b57cec5SDimitry Andric } 49550b57cec5SDimitry Andric } else { 49560b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 49570b57cec5SDimitry Andric first = 0; 49580b57cec5SDimitry Andric last = 0; 49590b57cec5SDimitry Andric } 49600b57cec5SDimitry Andric } 49610b57cec5SDimitry Andric if (last >= n_places) { 49620b57cec5SDimitry Andric last = (n_places - 1); 49630b57cec5SDimitry Andric } 49640b57cec5SDimitry Andric place = first; 49650b57cec5SDimitry Andric current += spacing; 49660b57cec5SDimitry Andric if (f < n_th) { 49670b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= first); 49680b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > first); 49690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= last); 49700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > last); 49710b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last_place >= first_place); 49720b57cec5SDimitry Andric th = team->t.t_threads[f]; 49730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 49740b57cec5SDimitry Andric th->th.th_first_place = first; 49750b57cec5SDimitry Andric th->th.th_new_place = place; 49760b57cec5SDimitry Andric th->th.th_last_place = last; 49770b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 49780b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 49790b57cec5SDimitry Andric team->t.t_display_affinity = 1; 49800b57cec5SDimitry Andric } 49810b57cec5SDimitry Andric KA_TRACE(100, 49820b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 49830b57cec5SDimitry Andric "partition = [%d,%d], spacing = %.4f\n", 49840b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 49850b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 49860b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place, spacing)); 49870b57cec5SDimitry Andric } 49880b57cec5SDimitry Andric } 49890b57cec5SDimitry Andric } 49900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 49910b57cec5SDimitry Andric } else { 49920b57cec5SDimitry Andric int S, rem, gap, s_count; 49930b57cec5SDimitry Andric S = n_th / n_places; 49940b57cec5SDimitry Andric s_count = 0; 49950b57cec5SDimitry Andric rem = n_th - (S * n_places); 49960b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 49970b57cec5SDimitry Andric int place = masters_place; 49980b57cec5SDimitry Andric int gap_ct = gap; 49990b57cec5SDimitry Andric thidx = n_th; 50000b57cec5SDimitry Andric if (update_master_only == 1) 50010b57cec5SDimitry Andric thidx = 1; 50020b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 50030b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 50040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 50050b57cec5SDimitry Andric 50060b57cec5SDimitry Andric th->th.th_first_place = place; 50070b57cec5SDimitry Andric th->th.th_last_place = place; 50080b57cec5SDimitry Andric th->th.th_new_place = place; 50090b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 50100b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 50110b57cec5SDimitry Andric team->t.t_display_affinity = 1; 50120b57cec5SDimitry Andric } 50130b57cec5SDimitry Andric s_count++; 50140b57cec5SDimitry Andric 50150b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 50160b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 50170b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 50180b57cec5SDimitry Andric // we added an extra thread to this place; move on to next place 50190b57cec5SDimitry Andric if (place == last_place) { 50200b57cec5SDimitry Andric place = first_place; 50210b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 50220b57cec5SDimitry Andric place = 0; 50230b57cec5SDimitry Andric } else { 50240b57cec5SDimitry Andric place++; 50250b57cec5SDimitry Andric } 50260b57cec5SDimitry Andric s_count = 0; 50270b57cec5SDimitry Andric gap_ct = 1; 50280b57cec5SDimitry Andric rem--; 50290b57cec5SDimitry Andric } else if (s_count == S) { // place is full; don't add extra thread 50300b57cec5SDimitry Andric if (place == last_place) { 50310b57cec5SDimitry Andric place = first_place; 50320b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 50330b57cec5SDimitry Andric place = 0; 50340b57cec5SDimitry Andric } else { 50350b57cec5SDimitry Andric place++; 50360b57cec5SDimitry Andric } 50370b57cec5SDimitry Andric gap_ct++; 50380b57cec5SDimitry Andric s_count = 0; 50390b57cec5SDimitry Andric } 50400b57cec5SDimitry Andric 50410b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 50420b57cec5SDimitry Andric "partition = [%d,%d]\n", 50430b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 50440b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 50450b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place)); 50460b57cec5SDimitry Andric } 50470b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 50480b57cec5SDimitry Andric } 50490b57cec5SDimitry Andric } break; 50500b57cec5SDimitry Andric 50510b57cec5SDimitry Andric default: 50520b57cec5SDimitry Andric break; 50530b57cec5SDimitry Andric } 50540b57cec5SDimitry Andric 50550b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id)); 50560b57cec5SDimitry Andric } 50570b57cec5SDimitry Andric 50580b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 50590b57cec5SDimitry Andric 50600b57cec5SDimitry Andric /* allocate a new team data structure to use. take one off of the free pool if 50610b57cec5SDimitry Andric available */ 50620b57cec5SDimitry Andric kmp_team_t * 50630b57cec5SDimitry Andric __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, 50640b57cec5SDimitry Andric #if OMPT_SUPPORT 50650b57cec5SDimitry Andric ompt_data_t ompt_parallel_data, 50660b57cec5SDimitry Andric #endif 50670b57cec5SDimitry Andric kmp_proc_bind_t new_proc_bind, 50680b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 50690b57cec5SDimitry Andric int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { 50700b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); 50710b57cec5SDimitry Andric int f; 50720b57cec5SDimitry Andric kmp_team_t *team; 50730b57cec5SDimitry Andric int use_hot_team = !root->r.r_active; 50740b57cec5SDimitry Andric int level = 0; 5075349cc55cSDimitry Andric int do_place_partition = 1; 50760b57cec5SDimitry Andric 50770b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: called\n")); 50780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); 50790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(max_nproc >= new_nproc); 50800b57cec5SDimitry Andric KMP_MB(); 50810b57cec5SDimitry Andric 50820b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 50830b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams; 50840b57cec5SDimitry Andric if (master) { 50850b57cec5SDimitry Andric team = master->th.th_team; 50860b57cec5SDimitry Andric level = team->t.t_active_level; 50870b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 50880b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1 && 50890b57cec5SDimitry Andric ( // #teams > 1 50900b57cec5SDimitry Andric team->t.t_pkfn == 50910b57cec5SDimitry Andric (microtask_t)__kmp_teams_master || // inner fork of the teams 50920b57cec5SDimitry Andric master->th.th_teams_level < 50930b57cec5SDimitry Andric team->t.t_level)) { // or nested parallel inside the teams 50940b57cec5SDimitry Andric ++level; // not increment if #teams==1, or for outer fork of the teams; 50950b57cec5SDimitry Andric // increment otherwise 50960b57cec5SDimitry Andric } 5097349cc55cSDimitry Andric // Do not perform the place partition if inner fork of the teams 5098349cc55cSDimitry Andric // Wait until nested parallel region encountered inside teams construct 5099349cc55cSDimitry Andric if ((master->th.th_teams_size.nteams == 1 && 5100349cc55cSDimitry Andric master->th.th_teams_level >= team->t.t_level) || 5101349cc55cSDimitry Andric (team->t.t_pkfn == (microtask_t)__kmp_teams_master)) 5102349cc55cSDimitry Andric do_place_partition = 0; 51030b57cec5SDimitry Andric } 51040b57cec5SDimitry Andric hot_teams = master->th.th_hot_teams; 51050b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level && hot_teams && 5106e8d8bef9SDimitry Andric hot_teams[level].hot_team) { 5107e8d8bef9SDimitry Andric // hot team has already been allocated for given level 51080b57cec5SDimitry Andric use_hot_team = 1; 51090b57cec5SDimitry Andric } else { 51100b57cec5SDimitry Andric use_hot_team = 0; 51110b57cec5SDimitry Andric } 5112e8d8bef9SDimitry Andric } else { 5113e8d8bef9SDimitry Andric // check we won't access uninitialized hot_teams, just in case 5114e8d8bef9SDimitry Andric KMP_DEBUG_ASSERT(new_nproc == 1); 51150b57cec5SDimitry Andric } 51160b57cec5SDimitry Andric #endif 51170b57cec5SDimitry Andric // Optimization to use a "hot" team 51180b57cec5SDimitry Andric if (use_hot_team && new_nproc > 1) { 51190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= max_nproc); 51200b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 51210b57cec5SDimitry Andric team = hot_teams[level].hot_team; 51220b57cec5SDimitry Andric #else 51230b57cec5SDimitry Andric team = root->r.r_hot_team; 51240b57cec5SDimitry Andric #endif 51250b57cec5SDimitry Andric #if KMP_DEBUG 51260b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 51270b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 51280b57cec5SDimitry Andric "task_team[1] = %p before reinit\n", 51290b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 51300b57cec5SDimitry Andric } 51310b57cec5SDimitry Andric #endif 51320b57cec5SDimitry Andric 5133349cc55cSDimitry Andric if (team->t.t_nproc != new_nproc && 5134349cc55cSDimitry Andric __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5135349cc55cSDimitry Andric // Distributed barrier may need a resize 5136349cc55cSDimitry Andric int old_nthr = team->t.t_nproc; 5137349cc55cSDimitry Andric __kmp_resize_dist_barrier(team, old_nthr, new_nproc); 5138349cc55cSDimitry Andric } 5139349cc55cSDimitry Andric 5140349cc55cSDimitry Andric // If not doing the place partition, then reset the team's proc bind 5141349cc55cSDimitry Andric // to indicate that partitioning of all threads still needs to take place 5142349cc55cSDimitry Andric if (do_place_partition == 0) 5143349cc55cSDimitry Andric team->t.t_proc_bind = proc_bind_default; 51440b57cec5SDimitry Andric // Has the number of threads changed? 51450b57cec5SDimitry Andric /* Let's assume the most common case is that the number of threads is 51460b57cec5SDimitry Andric unchanged, and put that case first. */ 51470b57cec5SDimitry Andric if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 51480b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n")); 51490b57cec5SDimitry Andric // This case can mean that omp_set_num_threads() was called and the hot 51500b57cec5SDimitry Andric // team size was already reduced, so we check the special flag 51510b57cec5SDimitry Andric if (team->t.t_size_changed == -1) { 51520b57cec5SDimitry Andric team->t.t_size_changed = 1; 51530b57cec5SDimitry Andric } else { 51540b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_size_changed, 0); 51550b57cec5SDimitry Andric } 51560b57cec5SDimitry Andric 51570b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 51580b57cec5SDimitry Andric kmp_r_sched_t new_sched = new_icvs->sched; 5159fe6060f1SDimitry Andric // set primary thread's schedule as new run-time schedule 51600b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 51610b57cec5SDimitry Andric 51620b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 51630b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 51640b57cec5SDimitry Andric 51650b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0, 51660b57cec5SDimitry Andric team->t.t_threads[0], team)); 51670b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 51680b57cec5SDimitry Andric 51690b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 51700b57cec5SDimitry Andric if ((team->t.t_size_changed == 0) && 51710b57cec5SDimitry Andric (team->t.t_proc_bind == new_proc_bind)) { 51720b57cec5SDimitry Andric if (new_proc_bind == proc_bind_spread) { 5173349cc55cSDimitry Andric if (do_place_partition) { 5174349cc55cSDimitry Andric // add flag to update only master for spread 5175349cc55cSDimitry Andric __kmp_partition_places(team, 1); 5176349cc55cSDimitry Andric } 51770b57cec5SDimitry Andric } 51780b57cec5SDimitry Andric KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " 51790b57cec5SDimitry Andric "proc_bind = %d, partition = [%d,%d]\n", 51800b57cec5SDimitry Andric team->t.t_id, new_proc_bind, team->t.t_first_place, 51810b57cec5SDimitry Andric team->t.t_last_place)); 51820b57cec5SDimitry Andric } else { 5183349cc55cSDimitry Andric if (do_place_partition) { 51840b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 51850b57cec5SDimitry Andric __kmp_partition_places(team); 51860b57cec5SDimitry Andric } 5187349cc55cSDimitry Andric } 51880b57cec5SDimitry Andric #else 51890b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 51900b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 51910b57cec5SDimitry Andric } else if (team->t.t_nproc > new_nproc) { 51920b57cec5SDimitry Andric KA_TRACE(20, 51930b57cec5SDimitry Andric ("__kmp_allocate_team: decreasing hot team thread count to %d\n", 51940b57cec5SDimitry Andric new_nproc)); 51950b57cec5SDimitry Andric 51960b57cec5SDimitry Andric team->t.t_size_changed = 1; 5197349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5198349cc55cSDimitry Andric // Barrier size already reduced earlier in this function 5199349cc55cSDimitry Andric // Activate team threads via th_used_in_team 5200349cc55cSDimitry Andric __kmp_add_threads_to_team(team, new_nproc); 5201349cc55cSDimitry Andric } 52020b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52030b57cec5SDimitry Andric if (__kmp_hot_teams_mode == 0) { 52040b57cec5SDimitry Andric // AC: saved number of threads should correspond to team's value in this 52050b57cec5SDimitry Andric // mode, can be bigger in mode 1, when hot team has threads in reserve 52060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 52070b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; 52080b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 52090b57cec5SDimitry Andric /* release the extra threads we don't need any more */ 52100b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; f++) { 52110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 52120b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 52130b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should 52140b57cec5SDimitry Andric // unref task team. 52150b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 52160b57cec5SDimitry Andric } 52170b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 52180b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 52190b57cec5SDimitry Andric } 52200b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52210b57cec5SDimitry Andric } // (__kmp_hot_teams_mode == 0) 52220b57cec5SDimitry Andric else { 52230b57cec5SDimitry Andric // When keeping extra threads in team, switch threads to wait on own 52240b57cec5SDimitry Andric // b_go flag 52250b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; ++f) { 52260b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 52270b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; 52280b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 52290b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { 52300b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 52310b57cec5SDimitry Andric } 52320b57cec5SDimitry Andric KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); 52330b57cec5SDimitry Andric } 52340b57cec5SDimitry Andric } 52350b57cec5SDimitry Andric } 52360b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 52370b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 52380b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 52390b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched); 52400b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 52410b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 52420b57cec5SDimitry Andric 52430b57cec5SDimitry Andric // Update remaining threads 52440b57cec5SDimitry Andric for (f = 0; f < new_nproc; ++f) { 52450b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc = new_nproc; 52460b57cec5SDimitry Andric } 52470b57cec5SDimitry Andric 5248fe6060f1SDimitry Andric // restore the current task state of the primary thread: should be the 52490b57cec5SDimitry Andric // implicit task 52500b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, 52510b57cec5SDimitry Andric team->t.t_threads[0], team)); 52520b57cec5SDimitry Andric 52530b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 52540b57cec5SDimitry Andric 52550b57cec5SDimitry Andric #ifdef KMP_DEBUG 52560b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 52570b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 52580b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 52590b57cec5SDimitry Andric team->t.t_nproc); 52600b57cec5SDimitry Andric } 52610b57cec5SDimitry Andric #endif 52620b57cec5SDimitry Andric 5263349cc55cSDimitry Andric if (do_place_partition) { 52640b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 52650b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 52660b57cec5SDimitry Andric __kmp_partition_places(team); 52670b57cec5SDimitry Andric #endif 5268349cc55cSDimitry Andric } 52690b57cec5SDimitry Andric } else { // team->t.t_nproc < new_nproc 5270489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 52710b57cec5SDimitry Andric kmp_affin_mask_t *old_mask; 52720b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 52730b57cec5SDimitry Andric KMP_CPU_ALLOC(old_mask); 52740b57cec5SDimitry Andric } 52750b57cec5SDimitry Andric #endif 52760b57cec5SDimitry Andric 52770b57cec5SDimitry Andric KA_TRACE(20, 52780b57cec5SDimitry Andric ("__kmp_allocate_team: increasing hot team thread count to %d\n", 52790b57cec5SDimitry Andric new_nproc)); 5280349cc55cSDimitry Andric int old_nproc = team->t.t_nproc; // save old value and use to update only 52810b57cec5SDimitry Andric team->t.t_size_changed = 1; 52820b57cec5SDimitry Andric 52830b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52840b57cec5SDimitry Andric int avail_threads = hot_teams[level].hot_team_nth; 52850b57cec5SDimitry Andric if (new_nproc < avail_threads) 52860b57cec5SDimitry Andric avail_threads = new_nproc; 52870b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 52880b57cec5SDimitry Andric for (f = team->t.t_nproc; f < avail_threads; ++f) { 52890b57cec5SDimitry Andric // Adjust barrier data of reserved threads (if any) of the team 52900b57cec5SDimitry Andric // Other data will be set in __kmp_initialize_info() below. 52910b57cec5SDimitry Andric int b; 52920b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[f]->th.th_bar; 52930b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 52940b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 52950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 52960b57cec5SDimitry Andric #if USE_DEBUGGER 52970b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 52980b57cec5SDimitry Andric #endif 52990b57cec5SDimitry Andric } 53000b57cec5SDimitry Andric } 53010b57cec5SDimitry Andric if (hot_teams[level].hot_team_nth >= new_nproc) { 53020b57cec5SDimitry Andric // we have all needed threads in reserve, no need to allocate any 53030b57cec5SDimitry Andric // this only possible in mode 1, cannot have reserved threads in mode 0 53040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 53050b57cec5SDimitry Andric team->t.t_nproc = new_nproc; // just get reserved threads involved 53060b57cec5SDimitry Andric } else { 5307349cc55cSDimitry Andric // We may have some threads in reserve, but not enough; 5308349cc55cSDimitry Andric // get reserved threads involved if any. 5309349cc55cSDimitry Andric team->t.t_nproc = hot_teams[level].hot_team_nth; 53100b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size 53110b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 53120b57cec5SDimitry Andric if (team->t.t_max_nproc < new_nproc) { 53130b57cec5SDimitry Andric /* reallocate larger arrays */ 53140b57cec5SDimitry Andric __kmp_reallocate_team_arrays(team, new_nproc); 53150b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, NULL); 53160b57cec5SDimitry Andric } 53170b57cec5SDimitry Andric 5318489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 5319fe6060f1SDimitry Andric /* Temporarily set full mask for primary thread before creation of 5320fe6060f1SDimitry Andric workers. The reason is that workers inherit the affinity from the 5321fe6060f1SDimitry Andric primary thread, so if a lot of workers are created on the single 5322fe6060f1SDimitry Andric core quickly, they don't get a chance to set their own affinity for 5323fe6060f1SDimitry Andric a long time. */ 53240b57cec5SDimitry Andric __kmp_set_thread_affinity_mask_full_tmp(old_mask); 53250b57cec5SDimitry Andric #endif 53260b57cec5SDimitry Andric 53270b57cec5SDimitry Andric /* allocate new threads for the hot team */ 53280b57cec5SDimitry Andric for (f = team->t.t_nproc; f < new_nproc; f++) { 53290b57cec5SDimitry Andric kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f); 53300b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_worker); 53310b57cec5SDimitry Andric team->t.t_threads[f] = new_worker; 53320b57cec5SDimitry Andric 53330b57cec5SDimitry Andric KA_TRACE(20, 53340b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init T#%d arrived: " 53350b57cec5SDimitry Andric "join=%llu, plain=%llu\n", 53360b57cec5SDimitry Andric team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f, 53370b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 53380b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 53390b57cec5SDimitry Andric 53400b57cec5SDimitry Andric { // Initialize barrier data for new threads. 53410b57cec5SDimitry Andric int b; 53420b57cec5SDimitry Andric kmp_balign_t *balign = new_worker->th.th_bar; 53430b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 53440b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 53450b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != 53460b57cec5SDimitry Andric KMP_BARRIER_PARENT_FLAG); 53470b57cec5SDimitry Andric #if USE_DEBUGGER 53480b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 53490b57cec5SDimitry Andric #endif 53500b57cec5SDimitry Andric } 53510b57cec5SDimitry Andric } 53520b57cec5SDimitry Andric } 53530b57cec5SDimitry Andric 5354489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 53550b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 5356fe6060f1SDimitry Andric /* Restore initial primary thread's affinity mask */ 53570b57cec5SDimitry Andric __kmp_set_system_affinity(old_mask, TRUE); 53580b57cec5SDimitry Andric KMP_CPU_FREE(old_mask); 53590b57cec5SDimitry Andric } 53600b57cec5SDimitry Andric #endif 53610b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 53620b57cec5SDimitry Andric } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 53630b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 5364349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5365349cc55cSDimitry Andric // Barrier size already increased earlier in this function 5366349cc55cSDimitry Andric // Activate team threads via th_used_in_team 5367349cc55cSDimitry Andric __kmp_add_threads_to_team(team, new_nproc); 5368349cc55cSDimitry Andric } 53690b57cec5SDimitry Andric /* make sure everyone is syncronized */ 53700b57cec5SDimitry Andric // new threads below 53710b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, 53720b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 53730b57cec5SDimitry Andric 53740b57cec5SDimitry Andric /* reinitialize the threads */ 53750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 53760b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) 53770b57cec5SDimitry Andric __kmp_initialize_info(team->t.t_threads[f], team, f, 53780b57cec5SDimitry Andric __kmp_gtid_from_tid(f, team)); 53790b57cec5SDimitry Andric 53800b57cec5SDimitry Andric if (level) { // set th_task_state for new threads in nested hot team 53810b57cec5SDimitry Andric // __kmp_initialize_info() no longer zeroes th_task_state, so we should 53820b57cec5SDimitry Andric // only need to set the th_task_state for the new threads. th_task_state 5383fe6060f1SDimitry Andric // for primary thread will not be accurate until after this in 5384fe6060f1SDimitry Andric // __kmp_fork_call(), so we look to the primary thread's memo_stack to 5385fe6060f1SDimitry Andric // get the correct value. 53860b57cec5SDimitry Andric for (f = old_nproc; f < team->t.t_nproc; ++f) 53870b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_state = 53880b57cec5SDimitry Andric team->t.t_threads[0]->th.th_task_state_memo_stack[level]; 53890b57cec5SDimitry Andric } else { // set th_task_state for new threads in non-nested hot team 5390fe6060f1SDimitry Andric // copy primary thread's state 5391fe6060f1SDimitry Andric kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state; 53920b57cec5SDimitry Andric for (f = old_nproc; f < team->t.t_nproc; ++f) 53930b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_state = old_state; 53940b57cec5SDimitry Andric } 53950b57cec5SDimitry Andric 53960b57cec5SDimitry Andric #ifdef KMP_DEBUG 53970b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { 53980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 53990b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 54000b57cec5SDimitry Andric team->t.t_nproc); 54010b57cec5SDimitry Andric } 54020b57cec5SDimitry Andric #endif 54030b57cec5SDimitry Andric 5404349cc55cSDimitry Andric if (do_place_partition) { 54050b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 54060b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 54070b57cec5SDimitry Andric __kmp_partition_places(team); 54080b57cec5SDimitry Andric #endif 5409349cc55cSDimitry Andric } 54100b57cec5SDimitry Andric } // Check changes in number of threads 54110b57cec5SDimitry Andric 54120b57cec5SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 54130b57cec5SDimitry Andric if (master->th.th_teams_microtask) { 54140b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 54150b57cec5SDimitry Andric // propagate teams construct specific info to workers 54160b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 54170b57cec5SDimitry Andric thr->th.th_teams_microtask = master->th.th_teams_microtask; 54180b57cec5SDimitry Andric thr->th.th_teams_level = master->th.th_teams_level; 54190b57cec5SDimitry Andric thr->th.th_teams_size = master->th.th_teams_size; 54200b57cec5SDimitry Andric } 54210b57cec5SDimitry Andric } 54220b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 54230b57cec5SDimitry Andric if (level) { 54240b57cec5SDimitry Andric // Sync barrier state for nested hot teams, not needed for outermost hot 54250b57cec5SDimitry Andric // team. 54260b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 54270b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 54280b57cec5SDimitry Andric int b; 54290b57cec5SDimitry Andric kmp_balign_t *balign = thr->th.th_bar; 54300b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 54310b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 54320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 54330b57cec5SDimitry Andric #if USE_DEBUGGER 54340b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 54350b57cec5SDimitry Andric #endif 54360b57cec5SDimitry Andric } 54370b57cec5SDimitry Andric } 54380b57cec5SDimitry Andric } 54390b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 54400b57cec5SDimitry Andric 54410b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 54420b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 54430b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 54440b57cec5SDimitry Andric // The hot team re-uses the previous task team, 54450b57cec5SDimitry Andric // if untouched during the previous release->gather phase. 54460b57cec5SDimitry Andric 54470b57cec5SDimitry Andric KF_TRACE(10, (" hot_team = %p\n", team)); 54480b57cec5SDimitry Andric 54490b57cec5SDimitry Andric #if KMP_DEBUG 54500b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 54510b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 54520b57cec5SDimitry Andric "task_team[1] = %p after reinit\n", 54530b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 54540b57cec5SDimitry Andric } 54550b57cec5SDimitry Andric #endif 54560b57cec5SDimitry Andric 54570b57cec5SDimitry Andric #if OMPT_SUPPORT 54580b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 54590b57cec5SDimitry Andric #endif 54600b57cec5SDimitry Andric 54610b57cec5SDimitry Andric KMP_MB(); 54620b57cec5SDimitry Andric 54630b57cec5SDimitry Andric return team; 54640b57cec5SDimitry Andric } 54650b57cec5SDimitry Andric 54660b57cec5SDimitry Andric /* next, let's try to take one from the team pool */ 54670b57cec5SDimitry Andric KMP_MB(); 54680b57cec5SDimitry Andric for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) { 54690b57cec5SDimitry Andric /* TODO: consider resizing undersized teams instead of reaping them, now 54700b57cec5SDimitry Andric that we have a resizing mechanism */ 54710b57cec5SDimitry Andric if (team->t.t_max_nproc >= max_nproc) { 54720b57cec5SDimitry Andric /* take this team from the team pool */ 54730b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 54740b57cec5SDimitry Andric 5475349cc55cSDimitry Andric if (max_nproc > 1 && 5476349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5477349cc55cSDimitry Andric if (!team->t.b) { // Allocate barrier structure 5478349cc55cSDimitry Andric team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); 5479349cc55cSDimitry Andric } 5480349cc55cSDimitry Andric } 5481349cc55cSDimitry Andric 54820b57cec5SDimitry Andric /* setup the team for fresh use */ 54830b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 54840b57cec5SDimitry Andric 54850b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and " 54860b57cec5SDimitry Andric "task_team[1] %p to NULL\n", 54870b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 54880b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; 54890b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; 54900b57cec5SDimitry Andric 54910b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 54920b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 54930b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 54940b57cec5SDimitry Andric 54950b57cec5SDimitry Andric KA_TRACE( 54960b57cec5SDimitry Andric 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 54970b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 54980b57cec5SDimitry Andric { // Initialize barrier data. 54990b57cec5SDimitry Andric int b; 55000b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 55010b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 55020b57cec5SDimitry Andric #if USE_DEBUGGER 55030b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 55040b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 55050b57cec5SDimitry Andric #endif 55060b57cec5SDimitry Andric } 55070b57cec5SDimitry Andric } 55080b57cec5SDimitry Andric 55090b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 55100b57cec5SDimitry Andric 55110b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n", 55120b57cec5SDimitry Andric team->t.t_id)); 55130b57cec5SDimitry Andric 55140b57cec5SDimitry Andric #if OMPT_SUPPORT 55150b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 55160b57cec5SDimitry Andric #endif 55170b57cec5SDimitry Andric 55180b57cec5SDimitry Andric KMP_MB(); 55190b57cec5SDimitry Andric 55200b57cec5SDimitry Andric return team; 55210b57cec5SDimitry Andric } 55220b57cec5SDimitry Andric 55230b57cec5SDimitry Andric /* reap team if it is too small, then loop back and check the next one */ 55240b57cec5SDimitry Andric // not sure if this is wise, but, will be redone during the hot-teams 55250b57cec5SDimitry Andric // rewrite. 55260b57cec5SDimitry Andric /* TODO: Use technique to find the right size hot-team, don't reap them */ 55270b57cec5SDimitry Andric team = __kmp_reap_team(team); 55280b57cec5SDimitry Andric __kmp_team_pool = team; 55290b57cec5SDimitry Andric } 55300b57cec5SDimitry Andric 55310b57cec5SDimitry Andric /* nothing available in the pool, no matter, make a new team! */ 55320b57cec5SDimitry Andric KMP_MB(); 55330b57cec5SDimitry Andric team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t)); 55340b57cec5SDimitry Andric 55350b57cec5SDimitry Andric /* and set it up */ 55360b57cec5SDimitry Andric team->t.t_max_nproc = max_nproc; 5537349cc55cSDimitry Andric if (max_nproc > 1 && 5538349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5539349cc55cSDimitry Andric // Allocate barrier structure 5540349cc55cSDimitry Andric team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); 5541349cc55cSDimitry Andric } 5542349cc55cSDimitry Andric 55430b57cec5SDimitry Andric /* NOTE well, for some reason allocating one big buffer and dividing it up 55440b57cec5SDimitry Andric seems to really hurt performance a lot on the P4, so, let's not use this */ 55450b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nproc); 55460b57cec5SDimitry Andric 55470b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: making a new team\n")); 55480b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 55490b57cec5SDimitry Andric 55500b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 55510b57cec5SDimitry Andric "%p to NULL\n", 55520b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 55530b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes 55540b57cec5SDimitry Andric // memory, no need to duplicate 55550b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes 55560b57cec5SDimitry Andric // memory, no need to duplicate 55570b57cec5SDimitry Andric 55580b57cec5SDimitry Andric if (__kmp_storage_map) { 55590b57cec5SDimitry Andric __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc); 55600b57cec5SDimitry Andric } 55610b57cec5SDimitry Andric 55620b57cec5SDimitry Andric /* allocate space for arguments */ 55630b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, FALSE); 55640b57cec5SDimitry Andric team->t.t_argc = argc; 55650b57cec5SDimitry Andric 55660b57cec5SDimitry Andric KA_TRACE(20, 55670b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 55680b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 55690b57cec5SDimitry Andric { // Initialize barrier data. 55700b57cec5SDimitry Andric int b; 55710b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 55720b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 55730b57cec5SDimitry Andric #if USE_DEBUGGER 55740b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 55750b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 55760b57cec5SDimitry Andric #endif 55770b57cec5SDimitry Andric } 55780b57cec5SDimitry Andric } 55790b57cec5SDimitry Andric 55800b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 55810b57cec5SDimitry Andric 55820b57cec5SDimitry Andric #if OMPT_SUPPORT 55830b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 55840b57cec5SDimitry Andric team->t.ompt_serialized_team_info = NULL; 55850b57cec5SDimitry Andric #endif 55860b57cec5SDimitry Andric 55870b57cec5SDimitry Andric KMP_MB(); 55880b57cec5SDimitry Andric 55890b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", 55900b57cec5SDimitry Andric team->t.t_id)); 55910b57cec5SDimitry Andric 55920b57cec5SDimitry Andric return team; 55930b57cec5SDimitry Andric } 55940b57cec5SDimitry Andric 55950b57cec5SDimitry Andric /* TODO implement hot-teams at all levels */ 55960b57cec5SDimitry Andric /* TODO implement lazy thread release on demand (disband request) */ 55970b57cec5SDimitry Andric 55980b57cec5SDimitry Andric /* free the team. return it to the team pool. release all the threads 55990b57cec5SDimitry Andric * associated with it */ 56000b57cec5SDimitry Andric void __kmp_free_team(kmp_root_t *root, 56010b57cec5SDimitry Andric kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { 56020b57cec5SDimitry Andric int f; 56030b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), 56040b57cec5SDimitry Andric team->t.t_id)); 56050b57cec5SDimitry Andric 56060b57cec5SDimitry Andric /* verify state */ 56070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 56080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 56090b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc); 56100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 56110b57cec5SDimitry Andric 56120b57cec5SDimitry Andric int use_hot_team = team == root->r.r_hot_team; 56130b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 56140b57cec5SDimitry Andric int level; 56150b57cec5SDimitry Andric if (master) { 56160b57cec5SDimitry Andric level = team->t.t_active_level - 1; 56170b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 56180b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1) { 56190b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 56200b57cec5SDimitry Andric // team_of_masters 56210b57cec5SDimitry Andric } 56220b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 56230b57cec5SDimitry Andric master->th.th_teams_level == team->t.t_level) { 56240b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 56250b57cec5SDimitry Andric // team_of_workers before the parallel 56260b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 56270b57cec5SDimitry Andric } 5628349cc55cSDimitry Andric #if KMP_DEBUG 5629349cc55cSDimitry Andric kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams; 5630349cc55cSDimitry Andric #endif 56310b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 56320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team); 56330b57cec5SDimitry Andric use_hot_team = 1; 56340b57cec5SDimitry Andric } 56350b57cec5SDimitry Andric } 56360b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 56370b57cec5SDimitry Andric 56380b57cec5SDimitry Andric /* team is done working */ 56390b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, 56400b57cec5SDimitry Andric NULL); // Important for Debugging Support Library. 56410b57cec5SDimitry Andric #if KMP_OS_WINDOWS 56420b57cec5SDimitry Andric team->t.t_copyin_counter = 0; // init counter for possible reuse 56430b57cec5SDimitry Andric #endif 56440b57cec5SDimitry Andric // Do not reset pointer to parent team to NULL for hot teams. 56450b57cec5SDimitry Andric 56460b57cec5SDimitry Andric /* if we are non-hot team, release our threads */ 56470b57cec5SDimitry Andric if (!use_hot_team) { 56480b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 56490b57cec5SDimitry Andric // Wait for threads to reach reapable state 56500b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 56510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 56520b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 56530b57cec5SDimitry Andric volatile kmp_uint32 *state = &th->th.th_reap_state; 56540b57cec5SDimitry Andric while (*state != KMP_SAFE_TO_REAP) { 56550b57cec5SDimitry Andric #if KMP_OS_WINDOWS 56560b57cec5SDimitry Andric // On Windows a thread can be killed at any time, check this 56570b57cec5SDimitry Andric DWORD ecode; 56580b57cec5SDimitry Andric if (!__kmp_is_thread_alive(th, &ecode)) { 56590b57cec5SDimitry Andric *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread 56600b57cec5SDimitry Andric break; 56610b57cec5SDimitry Andric } 56620b57cec5SDimitry Andric #endif 56630b57cec5SDimitry Andric // first check if thread is sleeping 5664e8d8bef9SDimitry Andric kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); 56650b57cec5SDimitry Andric if (fl.is_sleeping()) 56660b57cec5SDimitry Andric fl.resume(__kmp_gtid_from_thread(th)); 56670b57cec5SDimitry Andric KMP_CPU_PAUSE(); 56680b57cec5SDimitry Andric } 56690b57cec5SDimitry Andric } 56700b57cec5SDimitry Andric 56710b57cec5SDimitry Andric // Delete task teams 56720b57cec5SDimitry Andric int tt_idx; 56730b57cec5SDimitry Andric for (tt_idx = 0; tt_idx < 2; ++tt_idx) { 56740b57cec5SDimitry Andric kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 56750b57cec5SDimitry Andric if (task_team != NULL) { 56760b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams 56770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 56780b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 56790b57cec5SDimitry Andric } 56800b57cec5SDimitry Andric KA_TRACE( 56810b57cec5SDimitry Andric 20, 56820b57cec5SDimitry Andric ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", 56830b57cec5SDimitry Andric __kmp_get_gtid(), task_team, team->t.t_id)); 56840b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 56850b57cec5SDimitry Andric __kmp_free_task_team(master, task_team); 56860b57cec5SDimitry Andric #endif 56870b57cec5SDimitry Andric team->t.t_task_team[tt_idx] = NULL; 56880b57cec5SDimitry Andric } 56890b57cec5SDimitry Andric } 56900b57cec5SDimitry Andric } 56910b57cec5SDimitry Andric 56920b57cec5SDimitry Andric // Reset pointer to parent team only for non-hot teams. 56930b57cec5SDimitry Andric team->t.t_parent = NULL; 56940b57cec5SDimitry Andric team->t.t_level = 0; 56950b57cec5SDimitry Andric team->t.t_active_level = 0; 56960b57cec5SDimitry Andric 56970b57cec5SDimitry Andric /* free the worker threads */ 56980b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 56990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5700349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5701349cc55cSDimitry Andric KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 5702349cc55cSDimitry Andric 1, 2); 5703349cc55cSDimitry Andric } 57040b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 5705349cc55cSDimitry Andric } 5706349cc55cSDimitry Andric 5707349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5708349cc55cSDimitry Andric if (team->t.b) { 5709349cc55cSDimitry Andric // wake up thread at old location 5710349cc55cSDimitry Andric team->t.b->go_release(); 5711349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 5712349cc55cSDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 5713349cc55cSDimitry Andric if (team->t.b->sleep[f].sleep) { 5714349cc55cSDimitry Andric __kmp_atomic_resume_64( 5715349cc55cSDimitry Andric team->t.t_threads[f]->th.th_info.ds.ds_gtid, 5716349cc55cSDimitry Andric (kmp_atomic_flag_64<> *)NULL); 5717349cc55cSDimitry Andric } 5718349cc55cSDimitry Andric } 5719349cc55cSDimitry Andric } 5720349cc55cSDimitry Andric // Wait for threads to be removed from team 5721349cc55cSDimitry Andric for (int f = 1; f < team->t.t_nproc; ++f) { 5722349cc55cSDimitry Andric while (team->t.t_threads[f]->th.th_used_in_team.load() != 0) 5723349cc55cSDimitry Andric KMP_CPU_PAUSE(); 5724349cc55cSDimitry Andric } 5725349cc55cSDimitry Andric } 5726349cc55cSDimitry Andric } 5727349cc55cSDimitry Andric 5728349cc55cSDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57290b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 57300b57cec5SDimitry Andric } 57310b57cec5SDimitry Andric 5732349cc55cSDimitry Andric if (team->t.t_max_nproc > 1 && 5733349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5734349cc55cSDimitry Andric distributedBarrier::deallocate(team->t.b); 5735349cc55cSDimitry Andric team->t.b = NULL; 5736349cc55cSDimitry Andric } 57370b57cec5SDimitry Andric /* put the team back in the team pool */ 57380b57cec5SDimitry Andric /* TODO limit size of team pool, call reap_team if pool too large */ 57390b57cec5SDimitry Andric team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool); 57400b57cec5SDimitry Andric __kmp_team_pool = (volatile kmp_team_t *)team; 5741fe6060f1SDimitry Andric } else { // Check if team was created for primary threads in teams construct 57420b57cec5SDimitry Andric // See if first worker is a CG root 57430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[1] && 57440b57cec5SDimitry Andric team->t.t_threads[1]->th.th_cg_roots); 57450b57cec5SDimitry Andric if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) { 57460b57cec5SDimitry Andric // Clean up the CG root nodes on workers so that this team can be re-used 57470b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57480b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 57490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots && 57500b57cec5SDimitry Andric thr->th.th_cg_roots->cg_root == thr); 57510b57cec5SDimitry Andric // Pop current CG root off list 57520b57cec5SDimitry Andric kmp_cg_root_t *tmp = thr->th.th_cg_roots; 57530b57cec5SDimitry Andric thr->th.th_cg_roots = tmp->up; 57540b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving" 57550b57cec5SDimitry Andric " up to node %p. cg_nthreads was %d\n", 57560b57cec5SDimitry Andric thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads)); 57570b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 57580b57cec5SDimitry Andric if (i == 1) { 57590b57cec5SDimitry Andric __kmp_free(tmp); // free CG if we are the last thread in it 57600b57cec5SDimitry Andric } 57610b57cec5SDimitry Andric // Restore current task's thread_limit from CG root 57620b57cec5SDimitry Andric if (thr->th.th_cg_roots) 57630b57cec5SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = 57640b57cec5SDimitry Andric thr->th.th_cg_roots->cg_thread_limit; 57650b57cec5SDimitry Andric } 57660b57cec5SDimitry Andric } 57670b57cec5SDimitry Andric } 57680b57cec5SDimitry Andric 57690b57cec5SDimitry Andric KMP_MB(); 57700b57cec5SDimitry Andric } 57710b57cec5SDimitry Andric 57720b57cec5SDimitry Andric /* reap the team. destroy it, reclaim all its resources and free its memory */ 57730b57cec5SDimitry Andric kmp_team_t *__kmp_reap_team(kmp_team_t *team) { 57740b57cec5SDimitry Andric kmp_team_t *next_pool = team->t.t_next_pool; 57750b57cec5SDimitry Andric 57760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 57770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 57780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 57790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 57800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_argv); 57810b57cec5SDimitry Andric 57820b57cec5SDimitry Andric /* TODO clean the threads that are a part of this? */ 57830b57cec5SDimitry Andric 57840b57cec5SDimitry Andric /* free stuff */ 57850b57cec5SDimitry Andric __kmp_free_team_arrays(team); 57860b57cec5SDimitry Andric if (team->t.t_argv != &team->t.t_inline_argv[0]) 57870b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 57880b57cec5SDimitry Andric __kmp_free(team); 57890b57cec5SDimitry Andric 57900b57cec5SDimitry Andric KMP_MB(); 57910b57cec5SDimitry Andric return next_pool; 57920b57cec5SDimitry Andric } 57930b57cec5SDimitry Andric 57940b57cec5SDimitry Andric // Free the thread. Don't reap it, just place it on the pool of available 57950b57cec5SDimitry Andric // threads. 57960b57cec5SDimitry Andric // 57970b57cec5SDimitry Andric // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 57980b57cec5SDimitry Andric // binding for the affinity mechanism to be useful. 57990b57cec5SDimitry Andric // 58000b57cec5SDimitry Andric // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 58010b57cec5SDimitry Andric // However, we want to avoid a potential performance problem by always 58020b57cec5SDimitry Andric // scanning through the list to find the correct point at which to insert 58030b57cec5SDimitry Andric // the thread (potential N**2 behavior). To do this we keep track of the 58040b57cec5SDimitry Andric // last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 58050b57cec5SDimitry Andric // With single-level parallelism, threads will always be added to the tail 58060b57cec5SDimitry Andric // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested 58070b57cec5SDimitry Andric // parallelism, all bets are off and we may need to scan through the entire 58080b57cec5SDimitry Andric // free list. 58090b57cec5SDimitry Andric // 58100b57cec5SDimitry Andric // This change also has a potentially large performance benefit, for some 58110b57cec5SDimitry Andric // applications. Previously, as threads were freed from the hot team, they 58120b57cec5SDimitry Andric // would be placed back on the free list in inverse order. If the hot team 58130b57cec5SDimitry Andric // grew back to it's original size, then the freed thread would be placed 58140b57cec5SDimitry Andric // back on the hot team in reverse order. This could cause bad cache 58150b57cec5SDimitry Andric // locality problems on programs where the size of the hot team regularly 58160b57cec5SDimitry Andric // grew and shrunk. 58170b57cec5SDimitry Andric // 58185ffd83dbSDimitry Andric // Now, for single-level parallelism, the OMP tid is always == gtid. 58190b57cec5SDimitry Andric void __kmp_free_thread(kmp_info_t *this_th) { 58200b57cec5SDimitry Andric int gtid; 58210b57cec5SDimitry Andric kmp_info_t **scan; 58220b57cec5SDimitry Andric 58230b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 58240b57cec5SDimitry Andric __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); 58250b57cec5SDimitry Andric 58260b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th); 58270b57cec5SDimitry Andric 58280b57cec5SDimitry Andric // When moving thread to pool, switch thread to wait on own b_go flag, and 58290b57cec5SDimitry Andric // uninitialized (NULL team). 58300b57cec5SDimitry Andric int b; 58310b57cec5SDimitry Andric kmp_balign_t *balign = this_th->th.th_bar; 58320b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 58330b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 58340b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 58350b57cec5SDimitry Andric balign[b].bb.team = NULL; 58360b57cec5SDimitry Andric balign[b].bb.leaf_kids = 0; 58370b57cec5SDimitry Andric } 58380b57cec5SDimitry Andric this_th->th.th_task_state = 0; 58390b57cec5SDimitry Andric this_th->th.th_reap_state = KMP_SAFE_TO_REAP; 58400b57cec5SDimitry Andric 58410b57cec5SDimitry Andric /* put thread back on the free pool */ 58420b57cec5SDimitry Andric TCW_PTR(this_th->th.th_team, NULL); 58430b57cec5SDimitry Andric TCW_PTR(this_th->th.th_root, NULL); 58440b57cec5SDimitry Andric TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ 58450b57cec5SDimitry Andric 58460b57cec5SDimitry Andric while (this_th->th.th_cg_roots) { 58470b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads--; 58480b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node" 58490b57cec5SDimitry Andric " %p of thread %p to %d\n", 58500b57cec5SDimitry Andric this_th, this_th->th.th_cg_roots, 58510b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_root, 58520b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads)); 58530b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_th->th.th_cg_roots; 58540b57cec5SDimitry Andric if (tmp->cg_root == this_th) { // Thread is a cg_root 58550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0); 58560b57cec5SDimitry Andric KA_TRACE( 58570b57cec5SDimitry Andric 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp)); 58580b57cec5SDimitry Andric this_th->th.th_cg_roots = tmp->up; 58590b57cec5SDimitry Andric __kmp_free(tmp); 58600b57cec5SDimitry Andric } else { // Worker thread 58610b57cec5SDimitry Andric if (tmp->cg_nthreads == 0) { // last thread leaves contention group 58620b57cec5SDimitry Andric __kmp_free(tmp); 58630b57cec5SDimitry Andric } 58640b57cec5SDimitry Andric this_th->th.th_cg_roots = NULL; 58650b57cec5SDimitry Andric break; 58660b57cec5SDimitry Andric } 58670b57cec5SDimitry Andric } 58680b57cec5SDimitry Andric 58690b57cec5SDimitry Andric /* If the implicit task assigned to this thread can be used by other threads 58700b57cec5SDimitry Andric * -> multiple threads can share the data and try to free the task at 58710b57cec5SDimitry Andric * __kmp_reap_thread at exit. This duplicate use of the task data can happen 58720b57cec5SDimitry Andric * with higher probability when hot team is disabled but can occurs even when 58730b57cec5SDimitry Andric * the hot team is enabled */ 58740b57cec5SDimitry Andric __kmp_free_implicit_task(this_th); 58750b57cec5SDimitry Andric this_th->th.th_current_task = NULL; 58760b57cec5SDimitry Andric 58770b57cec5SDimitry Andric // If the __kmp_thread_pool_insert_pt is already past the new insert 58780b57cec5SDimitry Andric // point, then we need to re-scan the entire list. 58790b57cec5SDimitry Andric gtid = this_th->th.th_info.ds.ds_gtid; 58800b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 58810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL); 58820b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) { 58830b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 58840b57cec5SDimitry Andric } 58850b57cec5SDimitry Andric } 58860b57cec5SDimitry Andric 58870b57cec5SDimitry Andric // Scan down the list to find the place to insert the thread. 58880b57cec5SDimitry Andric // scan is the address of a link in the list, possibly the address of 58890b57cec5SDimitry Andric // __kmp_thread_pool itself. 58900b57cec5SDimitry Andric // 58915ffd83dbSDimitry Andric // In the absence of nested parallelism, the for loop will have 0 iterations. 58920b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 58930b57cec5SDimitry Andric scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool); 58940b57cec5SDimitry Andric } else { 58950b57cec5SDimitry Andric scan = CCAST(kmp_info_t **, &__kmp_thread_pool); 58960b57cec5SDimitry Andric } 58970b57cec5SDimitry Andric for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid); 58980b57cec5SDimitry Andric scan = &((*scan)->th.th_next_pool)) 58990b57cec5SDimitry Andric ; 59000b57cec5SDimitry Andric 59010b57cec5SDimitry Andric // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 59020b57cec5SDimitry Andric // to its address. 59030b57cec5SDimitry Andric TCW_PTR(this_th->th.th_next_pool, *scan); 59040b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = *scan = this_th; 59050b57cec5SDimitry Andric KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) || 59060b57cec5SDimitry Andric (this_th->th.th_info.ds.ds_gtid < 59070b57cec5SDimitry Andric this_th->th.th_next_pool->th.th_info.ds.ds_gtid)); 59080b57cec5SDimitry Andric TCW_4(this_th->th.th_in_pool, TRUE); 59090b57cec5SDimitry Andric __kmp_suspend_initialize_thread(this_th); 59100b57cec5SDimitry Andric __kmp_lock_suspend_mx(this_th); 59110b57cec5SDimitry Andric if (this_th->th.th_active == TRUE) { 59120b57cec5SDimitry Andric KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); 59130b57cec5SDimitry Andric this_th->th.th_active_in_pool = TRUE; 59140b57cec5SDimitry Andric } 59150b57cec5SDimitry Andric #if KMP_DEBUG 59160b57cec5SDimitry Andric else { 59170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE); 59180b57cec5SDimitry Andric } 59190b57cec5SDimitry Andric #endif 59200b57cec5SDimitry Andric __kmp_unlock_suspend_mx(this_th); 59210b57cec5SDimitry Andric 59220b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth - 1); 59230b57cec5SDimitry Andric 59240b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 59250b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 59260b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 59270b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 59280b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 59290b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 59300b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 59310b57cec5SDimitry Andric } 59320b57cec5SDimitry Andric } 59330b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 59340b57cec5SDimitry Andric 59350b57cec5SDimitry Andric KMP_MB(); 59360b57cec5SDimitry Andric } 59370b57cec5SDimitry Andric 59380b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 59390b57cec5SDimitry Andric 59400b57cec5SDimitry Andric void *__kmp_launch_thread(kmp_info_t *this_thr) { 5941d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 5942e8d8bef9SDimitry Andric ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); 5943e8d8bef9SDimitry Andric // TODO: add a configuration option for time granularity 5944e8d8bef9SDimitry Andric if (ProfileTraceFile) 5945e8d8bef9SDimitry Andric llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget"); 5946e8d8bef9SDimitry Andric #endif 5947e8d8bef9SDimitry Andric 59480b57cec5SDimitry Andric int gtid = this_thr->th.th_info.ds.ds_gtid; 59490b57cec5SDimitry Andric /* void *stack_data;*/ 5950489b1cf2SDimitry Andric kmp_team_t **volatile pteam; 59510b57cec5SDimitry Andric 59520b57cec5SDimitry Andric KMP_MB(); 59530b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); 59540b57cec5SDimitry Andric 59550b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 59560b57cec5SDimitry Andric this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? 59570b57cec5SDimitry Andric } 59580b57cec5SDimitry Andric 5959fe6060f1SDimitry Andric #if OMPD_SUPPORT 5960fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 5961fe6060f1SDimitry Andric ompd_bp_thread_begin(); 5962fe6060f1SDimitry Andric #endif 5963fe6060f1SDimitry Andric 59640b57cec5SDimitry Andric #if OMPT_SUPPORT 5965fe6060f1SDimitry Andric ompt_data_t *thread_data = nullptr; 59660b57cec5SDimitry Andric if (ompt_enabled.enabled) { 59670b57cec5SDimitry Andric thread_data = &(this_thr->th.ompt_thread_info.thread_data); 59680b57cec5SDimitry Andric *thread_data = ompt_data_none; 59690b57cec5SDimitry Andric 59700b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 59710b57cec5SDimitry Andric this_thr->th.ompt_thread_info.wait_id = 0; 59720b57cec5SDimitry Andric this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); 5973489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags = 0; 59740b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 59750b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 59760b57cec5SDimitry Andric ompt_thread_worker, thread_data); 59770b57cec5SDimitry Andric } 59780b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_idle; 59790b57cec5SDimitry Andric } 59800b57cec5SDimitry Andric #endif 5981489b1cf2SDimitry Andric 59820b57cec5SDimitry Andric /* This is the place where threads wait for work */ 59830b57cec5SDimitry Andric while (!TCR_4(__kmp_global.g.g_done)) { 59840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); 59850b57cec5SDimitry Andric KMP_MB(); 59860b57cec5SDimitry Andric 59870b57cec5SDimitry Andric /* wait for work to do */ 59880b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); 59890b57cec5SDimitry Andric 59900b57cec5SDimitry Andric /* No tid yet since not part of a team */ 59910b57cec5SDimitry Andric __kmp_fork_barrier(gtid, KMP_GTID_DNE); 59920b57cec5SDimitry Andric 59930b57cec5SDimitry Andric #if OMPT_SUPPORT 59940b57cec5SDimitry Andric if (ompt_enabled.enabled) { 59950b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 59960b57cec5SDimitry Andric } 59970b57cec5SDimitry Andric #endif 59980b57cec5SDimitry Andric 5999489b1cf2SDimitry Andric pteam = &this_thr->th.th_team; 60000b57cec5SDimitry Andric 60010b57cec5SDimitry Andric /* have we been allocated? */ 60020b57cec5SDimitry Andric if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { 60030b57cec5SDimitry Andric /* we were just woken up, so run our new task */ 60040b57cec5SDimitry Andric if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { 60050b57cec5SDimitry Andric int rc; 60060b57cec5SDimitry Andric KA_TRACE(20, 60070b57cec5SDimitry Andric ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 60080b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 60090b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 60100b57cec5SDimitry Andric 60110b57cec5SDimitry Andric updateHWFPControl(*pteam); 60120b57cec5SDimitry Andric 60130b57cec5SDimitry Andric #if OMPT_SUPPORT 60140b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60150b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 60160b57cec5SDimitry Andric } 60170b57cec5SDimitry Andric #endif 60180b57cec5SDimitry Andric 60190b57cec5SDimitry Andric rc = (*pteam)->t.t_invoke(gtid); 60200b57cec5SDimitry Andric KMP_ASSERT(rc); 60210b57cec5SDimitry Andric 60220b57cec5SDimitry Andric KMP_MB(); 60230b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 60240b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 60250b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 60260b57cec5SDimitry Andric } 60270b57cec5SDimitry Andric #if OMPT_SUPPORT 60280b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60290b57cec5SDimitry Andric /* no frame set while outside task */ 60300b57cec5SDimitry Andric __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none; 60310b57cec5SDimitry Andric 60320b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 60330b57cec5SDimitry Andric } 60340b57cec5SDimitry Andric #endif 60350b57cec5SDimitry Andric /* join barrier after parallel region */ 60360b57cec5SDimitry Andric __kmp_join_barrier(gtid); 60370b57cec5SDimitry Andric } 60380b57cec5SDimitry Andric } 60390b57cec5SDimitry Andric TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); 60400b57cec5SDimitry Andric 6041fe6060f1SDimitry Andric #if OMPD_SUPPORT 6042fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 6043fe6060f1SDimitry Andric ompd_bp_thread_end(); 6044fe6060f1SDimitry Andric #endif 6045fe6060f1SDimitry Andric 60460b57cec5SDimitry Andric #if OMPT_SUPPORT 60470b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 60480b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); 60490b57cec5SDimitry Andric } 60500b57cec5SDimitry Andric #endif 60510b57cec5SDimitry Andric 60520b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 60530b57cec5SDimitry Andric /* run the destructors for the threadprivate data for this thread */ 60540b57cec5SDimitry Andric __kmp_common_destroy_gtid(gtid); 60550b57cec5SDimitry Andric 60560b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); 60570b57cec5SDimitry Andric KMP_MB(); 6058e8d8bef9SDimitry Andric 6059d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 6060e8d8bef9SDimitry Andric llvm::timeTraceProfilerFinishThread(); 6061e8d8bef9SDimitry Andric #endif 60620b57cec5SDimitry Andric return this_thr; 60630b57cec5SDimitry Andric } 60640b57cec5SDimitry Andric 60650b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 60660b57cec5SDimitry Andric 60670b57cec5SDimitry Andric void __kmp_internal_end_dest(void *specific_gtid) { 60680b57cec5SDimitry Andric // Make sure no significant bits are lost 6069e8d8bef9SDimitry Andric int gtid; 6070e8d8bef9SDimitry Andric __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id); 60710b57cec5SDimitry Andric 60720b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 60730b57cec5SDimitry Andric /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 60740b57cec5SDimitry Andric * this is because 0 is reserved for the nothing-stored case */ 60750b57cec5SDimitry Andric 60760b57cec5SDimitry Andric __kmp_internal_end_thread(gtid); 60770b57cec5SDimitry Andric } 60780b57cec5SDimitry Andric 60790b57cec5SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 60800b57cec5SDimitry Andric 60810b57cec5SDimitry Andric __attribute__((destructor)) void __kmp_internal_end_dtor(void) { 60820b57cec5SDimitry Andric __kmp_internal_end_atexit(); 60830b57cec5SDimitry Andric } 60840b57cec5SDimitry Andric 60850b57cec5SDimitry Andric #endif 60860b57cec5SDimitry Andric 60870b57cec5SDimitry Andric /* [Windows] josh: when the atexit handler is called, there may still be more 60880b57cec5SDimitry Andric than one thread alive */ 60890b57cec5SDimitry Andric void __kmp_internal_end_atexit(void) { 60900b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_atexit\n")); 60910b57cec5SDimitry Andric /* [Windows] 60920b57cec5SDimitry Andric josh: ideally, we want to completely shutdown the library in this atexit 60930b57cec5SDimitry Andric handler, but stat code that depends on thread specific data for gtid fails 60940b57cec5SDimitry Andric because that data becomes unavailable at some point during the shutdown, so 60950b57cec5SDimitry Andric we call __kmp_internal_end_thread instead. We should eventually remove the 60960b57cec5SDimitry Andric dependency on __kmp_get_specific_gtid in the stat code and use 60970b57cec5SDimitry Andric __kmp_internal_end_library to cleanly shutdown the library. 60980b57cec5SDimitry Andric 60990b57cec5SDimitry Andric // TODO: Can some of this comment about GVS be removed? 61000b57cec5SDimitry Andric I suspect that the offending stat code is executed when the calling thread 61010b57cec5SDimitry Andric tries to clean up a dead root thread's data structures, resulting in GVS 61020b57cec5SDimitry Andric code trying to close the GVS structures for that thread, but since the stat 61030b57cec5SDimitry Andric code uses __kmp_get_specific_gtid to get the gtid with the assumption that 61040b57cec5SDimitry Andric the calling thread is cleaning up itself instead of another thread, it get 61050b57cec5SDimitry Andric confused. This happens because allowing a thread to unregister and cleanup 61060b57cec5SDimitry Andric another thread is a recent modification for addressing an issue. 61070b57cec5SDimitry Andric Based on the current design (20050722), a thread may end up 61080b57cec5SDimitry Andric trying to unregister another thread only if thread death does not trigger 61090b57cec5SDimitry Andric the calling of __kmp_internal_end_thread. For Linux* OS, there is the 61100b57cec5SDimitry Andric thread specific data destructor function to detect thread death. For 61110b57cec5SDimitry Andric Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there 61120b57cec5SDimitry Andric is nothing. Thus, the workaround is applicable only for Windows static 61130b57cec5SDimitry Andric stat library. */ 61140b57cec5SDimitry Andric __kmp_internal_end_library(-1); 61150b57cec5SDimitry Andric #if KMP_OS_WINDOWS 61160b57cec5SDimitry Andric __kmp_close_console(); 61170b57cec5SDimitry Andric #endif 61180b57cec5SDimitry Andric } 61190b57cec5SDimitry Andric 61200b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { 61210b57cec5SDimitry Andric // It is assumed __kmp_forkjoin_lock is acquired. 61220b57cec5SDimitry Andric 61230b57cec5SDimitry Andric int gtid; 61240b57cec5SDimitry Andric 61250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread != NULL); 61260b57cec5SDimitry Andric 61270b57cec5SDimitry Andric gtid = thread->th.th_info.ds.ds_gtid; 61280b57cec5SDimitry Andric 61290b57cec5SDimitry Andric if (!is_root) { 61300b57cec5SDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 61310b57cec5SDimitry Andric /* Assume the threads are at the fork barrier here */ 61320b57cec5SDimitry Andric KA_TRACE( 61330b57cec5SDimitry Andric 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", 61340b57cec5SDimitry Andric gtid)); 6135349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 6136349cc55cSDimitry Andric while ( 6137349cc55cSDimitry Andric !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3)) 6138349cc55cSDimitry Andric KMP_CPU_PAUSE(); 6139349cc55cSDimitry Andric __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL); 6140349cc55cSDimitry Andric } else { 6141349cc55cSDimitry Andric /* Need release fence here to prevent seg faults for tree forkjoin 6142349cc55cSDimitry Andric barrier (GEH) */ 6143e8d8bef9SDimitry Andric kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, 6144e8d8bef9SDimitry Andric thread); 61450b57cec5SDimitry Andric __kmp_release_64(&flag); 61460b57cec5SDimitry Andric } 6147349cc55cSDimitry Andric } 61480b57cec5SDimitry Andric 61490b57cec5SDimitry Andric // Terminate OS thread. 61500b57cec5SDimitry Andric __kmp_reap_worker(thread); 61510b57cec5SDimitry Andric 61520b57cec5SDimitry Andric // The thread was killed asynchronously. If it was actively 61530b57cec5SDimitry Andric // spinning in the thread pool, decrement the global count. 61540b57cec5SDimitry Andric // 61550b57cec5SDimitry Andric // There is a small timing hole here - if the worker thread was just waking 61560b57cec5SDimitry Andric // up after sleeping in the pool, had reset it's th_active_in_pool flag but 61570b57cec5SDimitry Andric // not decremented the global counter __kmp_thread_pool_active_nth yet, then 61580b57cec5SDimitry Andric // the global counter might not get updated. 61590b57cec5SDimitry Andric // 61600b57cec5SDimitry Andric // Currently, this can only happen as the library is unloaded, 61610b57cec5SDimitry Andric // so there are no harmful side effects. 61620b57cec5SDimitry Andric if (thread->th.th_active_in_pool) { 61630b57cec5SDimitry Andric thread->th.th_active_in_pool = FALSE; 61640b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 61650b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0); 61660b57cec5SDimitry Andric } 61670b57cec5SDimitry Andric } 61680b57cec5SDimitry Andric 61690b57cec5SDimitry Andric __kmp_free_implicit_task(thread); 61700b57cec5SDimitry Andric 61710b57cec5SDimitry Andric // Free the fast memory for tasking 61720b57cec5SDimitry Andric #if USE_FAST_MEMORY 61730b57cec5SDimitry Andric __kmp_free_fast_memory(thread); 61740b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 61750b57cec5SDimitry Andric 61760b57cec5SDimitry Andric __kmp_suspend_uninitialize_thread(thread); 61770b57cec5SDimitry Andric 61780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread); 61790b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 61800b57cec5SDimitry Andric 61810b57cec5SDimitry Andric --__kmp_all_nth; 61820b57cec5SDimitry Andric // __kmp_nth was decremented when thread is added to the pool. 61830b57cec5SDimitry Andric 61840b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 61850b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 61860b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 61870b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 61880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 61890b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 61900b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 61910b57cec5SDimitry Andric } 61920b57cec5SDimitry Andric } 61930b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 61940b57cec5SDimitry Andric 61950b57cec5SDimitry Andric /* free the memory being used */ 61960b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 61970b57cec5SDimitry Andric if (thread->th.th_cons) { 61980b57cec5SDimitry Andric __kmp_free_cons_stack(thread->th.th_cons); 61990b57cec5SDimitry Andric thread->th.th_cons = NULL; 62000b57cec5SDimitry Andric } 62010b57cec5SDimitry Andric } 62020b57cec5SDimitry Andric 62030b57cec5SDimitry Andric if (thread->th.th_pri_common != NULL) { 62040b57cec5SDimitry Andric __kmp_free(thread->th.th_pri_common); 62050b57cec5SDimitry Andric thread->th.th_pri_common = NULL; 62060b57cec5SDimitry Andric } 62070b57cec5SDimitry Andric 62080b57cec5SDimitry Andric if (thread->th.th_task_state_memo_stack != NULL) { 62090b57cec5SDimitry Andric __kmp_free(thread->th.th_task_state_memo_stack); 62100b57cec5SDimitry Andric thread->th.th_task_state_memo_stack = NULL; 62110b57cec5SDimitry Andric } 62120b57cec5SDimitry Andric 62130b57cec5SDimitry Andric #if KMP_USE_BGET 62140b57cec5SDimitry Andric if (thread->th.th_local.bget_data != NULL) { 62150b57cec5SDimitry Andric __kmp_finalize_bget(thread); 62160b57cec5SDimitry Andric } 62170b57cec5SDimitry Andric #endif 62180b57cec5SDimitry Andric 62190b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 62200b57cec5SDimitry Andric if (thread->th.th_affin_mask != NULL) { 62210b57cec5SDimitry Andric KMP_CPU_FREE(thread->th.th_affin_mask); 62220b57cec5SDimitry Andric thread->th.th_affin_mask = NULL; 62230b57cec5SDimitry Andric } 62240b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 62250b57cec5SDimitry Andric 62260b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 62270b57cec5SDimitry Andric if (thread->th.th_hier_bar_data != NULL) { 62280b57cec5SDimitry Andric __kmp_free(thread->th.th_hier_bar_data); 62290b57cec5SDimitry Andric thread->th.th_hier_bar_data = NULL; 62300b57cec5SDimitry Andric } 62310b57cec5SDimitry Andric #endif 62320b57cec5SDimitry Andric 62330b57cec5SDimitry Andric __kmp_reap_team(thread->th.th_serial_team); 62340b57cec5SDimitry Andric thread->th.th_serial_team = NULL; 62350b57cec5SDimitry Andric __kmp_free(thread); 62360b57cec5SDimitry Andric 62370b57cec5SDimitry Andric KMP_MB(); 62380b57cec5SDimitry Andric 62390b57cec5SDimitry Andric } // __kmp_reap_thread 62400b57cec5SDimitry Andric 6241349cc55cSDimitry Andric static void __kmp_itthash_clean(kmp_info_t *th) { 6242349cc55cSDimitry Andric #if USE_ITT_NOTIFY 6243349cc55cSDimitry Andric if (__kmp_itt_region_domains.count > 0) { 6244349cc55cSDimitry Andric for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) { 6245349cc55cSDimitry Andric kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i]; 6246349cc55cSDimitry Andric while (bucket) { 6247349cc55cSDimitry Andric kmp_itthash_entry_t *next = bucket->next_in_bucket; 6248349cc55cSDimitry Andric __kmp_thread_free(th, bucket); 6249349cc55cSDimitry Andric bucket = next; 6250349cc55cSDimitry Andric } 6251349cc55cSDimitry Andric } 6252349cc55cSDimitry Andric } 6253349cc55cSDimitry Andric if (__kmp_itt_barrier_domains.count > 0) { 6254349cc55cSDimitry Andric for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) { 6255349cc55cSDimitry Andric kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i]; 6256349cc55cSDimitry Andric while (bucket) { 6257349cc55cSDimitry Andric kmp_itthash_entry_t *next = bucket->next_in_bucket; 6258349cc55cSDimitry Andric __kmp_thread_free(th, bucket); 6259349cc55cSDimitry Andric bucket = next; 6260349cc55cSDimitry Andric } 6261349cc55cSDimitry Andric } 6262349cc55cSDimitry Andric } 6263349cc55cSDimitry Andric #endif 6264349cc55cSDimitry Andric } 6265349cc55cSDimitry Andric 62660b57cec5SDimitry Andric static void __kmp_internal_end(void) { 62670b57cec5SDimitry Andric int i; 62680b57cec5SDimitry Andric 62690b57cec5SDimitry Andric /* First, unregister the library */ 62700b57cec5SDimitry Andric __kmp_unregister_library(); 62710b57cec5SDimitry Andric 62720b57cec5SDimitry Andric #if KMP_OS_WINDOWS 62730b57cec5SDimitry Andric /* In Win static library, we can't tell when a root actually dies, so we 62740b57cec5SDimitry Andric reclaim the data structures for any root threads that have died but not 62750b57cec5SDimitry Andric unregistered themselves, in order to shut down cleanly. 62760b57cec5SDimitry Andric In Win dynamic library we also can't tell when a thread dies. */ 62770b57cec5SDimitry Andric __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of 62780b57cec5SDimitry Andric // dead roots 62790b57cec5SDimitry Andric #endif 62800b57cec5SDimitry Andric 62810b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) 62820b57cec5SDimitry Andric if (__kmp_root[i]) 62830b57cec5SDimitry Andric if (__kmp_root[i]->r.r_active) 62840b57cec5SDimitry Andric break; 62850b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 62860b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 62870b57cec5SDimitry Andric 62880b57cec5SDimitry Andric if (i < __kmp_threads_capacity) { 62890b57cec5SDimitry Andric #if KMP_USE_MONITOR 62900b57cec5SDimitry Andric // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 62910b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 62920b57cec5SDimitry Andric 62930b57cec5SDimitry Andric // Need to check that monitor was initialized before reaping it. If we are 62940b57cec5SDimitry Andric // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then 62950b57cec5SDimitry Andric // __kmp_monitor will appear to contain valid data, but it is only valid in 62960b57cec5SDimitry Andric // the parent process, not the child. 62970b57cec5SDimitry Andric // New behavior (201008): instead of keying off of the flag 62980b57cec5SDimitry Andric // __kmp_init_parallel, the monitor thread creation is keyed off 62990b57cec5SDimitry Andric // of the new flag __kmp_init_monitor. 63000b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 63010b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 63020b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 63030b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 63040b57cec5SDimitry Andric } 63050b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 63060b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 63070b57cec5SDimitry Andric #endif // KMP_USE_MONITOR 63080b57cec5SDimitry Andric } else { 63090b57cec5SDimitry Andric /* TODO move this to cleanup code */ 63100b57cec5SDimitry Andric #ifdef KMP_DEBUG 63110b57cec5SDimitry Andric /* make sure that everything has properly ended */ 63120b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 63130b57cec5SDimitry Andric if (__kmp_root[i]) { 63140b57cec5SDimitry Andric // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: 63150b57cec5SDimitry Andric // there can be uber threads alive here 63160b57cec5SDimitry Andric KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active? 63170b57cec5SDimitry Andric } 63180b57cec5SDimitry Andric } 63190b57cec5SDimitry Andric #endif 63200b57cec5SDimitry Andric 63210b57cec5SDimitry Andric KMP_MB(); 63220b57cec5SDimitry Andric 63230b57cec5SDimitry Andric // Reap the worker threads. 63240b57cec5SDimitry Andric // This is valid for now, but be careful if threads are reaped sooner. 63250b57cec5SDimitry Andric while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool. 63260b57cec5SDimitry Andric // Get the next thread from the pool. 63270b57cec5SDimitry Andric kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool); 63280b57cec5SDimitry Andric __kmp_thread_pool = thread->th.th_next_pool; 63290b57cec5SDimitry Andric // Reap it. 63300b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); 63310b57cec5SDimitry Andric thread->th.th_next_pool = NULL; 63320b57cec5SDimitry Andric thread->th.th_in_pool = FALSE; 63330b57cec5SDimitry Andric __kmp_reap_thread(thread, 0); 63340b57cec5SDimitry Andric } 63350b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 63360b57cec5SDimitry Andric 63370b57cec5SDimitry Andric // Reap teams. 63380b57cec5SDimitry Andric while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool. 63390b57cec5SDimitry Andric // Get the next team from the pool. 63400b57cec5SDimitry Andric kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool); 63410b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 63420b57cec5SDimitry Andric // Reap it. 63430b57cec5SDimitry Andric team->t.t_next_pool = NULL; 63440b57cec5SDimitry Andric __kmp_reap_team(team); 63450b57cec5SDimitry Andric } 63460b57cec5SDimitry Andric 63470b57cec5SDimitry Andric __kmp_reap_task_teams(); 63480b57cec5SDimitry Andric 63490b57cec5SDimitry Andric #if KMP_OS_UNIX 63500b57cec5SDimitry Andric // Threads that are not reaped should not access any resources since they 63510b57cec5SDimitry Andric // are going to be deallocated soon, so the shutdown sequence should wait 63520b57cec5SDimitry Andric // until all threads either exit the final spin-waiting loop or begin 63530b57cec5SDimitry Andric // sleeping after the given blocktime. 63540b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 63550b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[i]; 63560b57cec5SDimitry Andric while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)) 63570b57cec5SDimitry Andric KMP_CPU_PAUSE(); 63580b57cec5SDimitry Andric } 63590b57cec5SDimitry Andric #endif 63600b57cec5SDimitry Andric 63610b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 63620b57cec5SDimitry Andric // TBD: Add some checking... 63630b57cec5SDimitry Andric // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 63640b57cec5SDimitry Andric } 63650b57cec5SDimitry Andric 63660b57cec5SDimitry Andric /* Make sure all threadprivate destructors get run by joining with all 63670b57cec5SDimitry Andric worker threads before resetting this flag */ 63680b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_common, FALSE); 63690b57cec5SDimitry Andric 63700b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n")); 63710b57cec5SDimitry Andric KMP_MB(); 63720b57cec5SDimitry Andric 63730b57cec5SDimitry Andric #if KMP_USE_MONITOR 63740b57cec5SDimitry Andric // See note above: One of the possible fixes for CQ138434 / CQ140126 63750b57cec5SDimitry Andric // 63760b57cec5SDimitry Andric // FIXME: push both code fragments down and CSE them? 63770b57cec5SDimitry Andric // push them into __kmp_cleanup() ? 63780b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 63790b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 63800b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 63810b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 63820b57cec5SDimitry Andric } 63830b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 63840b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 63850b57cec5SDimitry Andric #endif 63860b57cec5SDimitry Andric } /* else !__kmp_global.t_active */ 63870b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, FALSE); 63880b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 63890b57cec5SDimitry Andric 63900b57cec5SDimitry Andric __kmp_cleanup(); 63910b57cec5SDimitry Andric #if OMPT_SUPPORT 63920b57cec5SDimitry Andric ompt_fini(); 63930b57cec5SDimitry Andric #endif 63940b57cec5SDimitry Andric } 63950b57cec5SDimitry Andric 63960b57cec5SDimitry Andric void __kmp_internal_end_library(int gtid_req) { 63970b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 63980b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 63990b57cec5SDimitry Andric only place to clear __kmp_serial_init */ 64000b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 64010b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 64025ffd83dbSDimitry Andric // redundant, because the next check will work in any case. 64030b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 64040b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n")); 64050b57cec5SDimitry Andric /* TODO abort? */ 64060b57cec5SDimitry Andric return; 64070b57cec5SDimitry Andric } 64080b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 64090b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: already finished\n")); 64100b57cec5SDimitry Andric return; 64110b57cec5SDimitry Andric } 64120b57cec5SDimitry Andric 6413fe6060f1SDimitry Andric // If hidden helper team has been initialized, we need to deinit it 6414fe6060f1SDimitry Andric if (TCR_4(__kmp_init_hidden_helper) && 6415fe6060f1SDimitry Andric !TCR_4(__kmp_hidden_helper_team_done)) { 6416fe6060f1SDimitry Andric TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); 6417fe6060f1SDimitry Andric // First release the main thread to let it continue its work 6418fe6060f1SDimitry Andric __kmp_hidden_helper_main_thread_release(); 6419fe6060f1SDimitry Andric // Wait until the hidden helper team has been destroyed 6420fe6060f1SDimitry Andric __kmp_hidden_helper_threads_deinitz_wait(); 6421fe6060f1SDimitry Andric } 6422fe6060f1SDimitry Andric 64230b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 64240b57cec5SDimitry Andric /* find out who we are and what we should do */ 64250b57cec5SDimitry Andric { 64260b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 64270b57cec5SDimitry Andric KA_TRACE( 64280b57cec5SDimitry Andric 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req)); 64290b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 64300b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system " 64310b57cec5SDimitry Andric "already shutdown\n")); 64320b57cec5SDimitry Andric return; 64330b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 64340b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not " 64350b57cec5SDimitry Andric "registered, or system shutdown\n")); 64360b57cec5SDimitry Andric return; 64370b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 64380b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system " 64390b57cec5SDimitry Andric "shutdown\n")); 64400b57cec5SDimitry Andric /* we don't know who we are, but we may still shutdown the library */ 64410b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 64420b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 64430b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 64440b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 64450b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 6446e8d8bef9SDimitry Andric __kmp_unregister_library(); 64470b57cec5SDimitry Andric KA_TRACE(10, 64480b57cec5SDimitry Andric ("__kmp_internal_end_library: root still active, abort T#%d\n", 64490b57cec5SDimitry Andric gtid)); 64500b57cec5SDimitry Andric return; 64510b57cec5SDimitry Andric } else { 6452349cc55cSDimitry Andric __kmp_itthash_clean(__kmp_threads[gtid]); 64530b57cec5SDimitry Andric KA_TRACE( 64540b57cec5SDimitry Andric 10, 64550b57cec5SDimitry Andric ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid)); 64560b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 64570b57cec5SDimitry Andric } 64580b57cec5SDimitry Andric } else { 64590b57cec5SDimitry Andric /* worker threads may call this function through the atexit handler, if they 64600b57cec5SDimitry Andric * call exit() */ 64610b57cec5SDimitry Andric /* For now, skip the usual subsequent processing and just dump the debug buffer. 64620b57cec5SDimitry Andric TODO: do a thorough shutdown instead */ 64630b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 64640b57cec5SDimitry Andric if (__kmp_debug_buf) 64650b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 64660b57cec5SDimitry Andric #endif 6467e8d8bef9SDimitry Andric // added unregister library call here when we switch to shm linux 6468e8d8bef9SDimitry Andric // if we don't, it will leave lots of files in /dev/shm 6469e8d8bef9SDimitry Andric // cleanup shared memory file before exiting. 6470e8d8bef9SDimitry Andric __kmp_unregister_library(); 64710b57cec5SDimitry Andric return; 64720b57cec5SDimitry Andric } 64730b57cec5SDimitry Andric } 64740b57cec5SDimitry Andric /* synchronize the termination process */ 64750b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 64760b57cec5SDimitry Andric 64770b57cec5SDimitry Andric /* have we already finished */ 64780b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 64790b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n")); 64800b57cec5SDimitry Andric /* TODO abort? */ 64810b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 64820b57cec5SDimitry Andric return; 64830b57cec5SDimitry Andric } 64840b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 64850b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 64860b57cec5SDimitry Andric return; 64870b57cec5SDimitry Andric } 64880b57cec5SDimitry Andric 64890b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 64900b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 64910b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 64920b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 64930b57cec5SDimitry Andric __kmp_internal_end_*. */ 64940b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 64950b57cec5SDimitry Andric 64960b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 64970b57cec5SDimitry Andric __kmp_internal_end(); 64980b57cec5SDimitry Andric 64990b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 65000b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 65010b57cec5SDimitry Andric 65020b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: exit\n")); 65030b57cec5SDimitry Andric 65040b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 65050b57cec5SDimitry Andric if (__kmp_debug_buf) 65060b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 65070b57cec5SDimitry Andric #endif 65080b57cec5SDimitry Andric 65090b57cec5SDimitry Andric #if KMP_OS_WINDOWS 65100b57cec5SDimitry Andric __kmp_close_console(); 65110b57cec5SDimitry Andric #endif 65120b57cec5SDimitry Andric 65130b57cec5SDimitry Andric __kmp_fini_allocator(); 65140b57cec5SDimitry Andric 65150b57cec5SDimitry Andric } // __kmp_internal_end_library 65160b57cec5SDimitry Andric 65170b57cec5SDimitry Andric void __kmp_internal_end_thread(int gtid_req) { 65180b57cec5SDimitry Andric int i; 65190b57cec5SDimitry Andric 65200b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 65210b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 65220b57cec5SDimitry Andric * only place to clear __kmp_serial_init */ 65230b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 65240b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 65250b57cec5SDimitry Andric // redundant, because the next check will work in any case. 65260b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 65270b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n")); 65280b57cec5SDimitry Andric /* TODO abort? */ 65290b57cec5SDimitry Andric return; 65300b57cec5SDimitry Andric } 65310b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 65320b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n")); 65330b57cec5SDimitry Andric return; 65340b57cec5SDimitry Andric } 65350b57cec5SDimitry Andric 6536e8d8bef9SDimitry Andric // If hidden helper team has been initialized, we need to deinit it 6537fe6060f1SDimitry Andric if (TCR_4(__kmp_init_hidden_helper) && 6538fe6060f1SDimitry Andric !TCR_4(__kmp_hidden_helper_team_done)) { 6539e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); 6540e8d8bef9SDimitry Andric // First release the main thread to let it continue its work 6541e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread_release(); 6542e8d8bef9SDimitry Andric // Wait until the hidden helper team has been destroyed 6543e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_deinitz_wait(); 6544e8d8bef9SDimitry Andric } 6545e8d8bef9SDimitry Andric 65460b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 65470b57cec5SDimitry Andric 65480b57cec5SDimitry Andric /* find out who we are and what we should do */ 65490b57cec5SDimitry Andric { 65500b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 65510b57cec5SDimitry Andric KA_TRACE(10, 65520b57cec5SDimitry Andric ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req)); 65530b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 65540b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system " 65550b57cec5SDimitry Andric "already shutdown\n")); 65560b57cec5SDimitry Andric return; 65570b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 65580b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not " 65590b57cec5SDimitry Andric "registered, or system shutdown\n")); 65600b57cec5SDimitry Andric return; 65610b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 65620b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system " 65630b57cec5SDimitry Andric "shutdown\n")); 65640b57cec5SDimitry Andric return; 65650b57cec5SDimitry Andric /* we don't know who we are */ 65660b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 65670b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 65680b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 65690b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 65700b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 65710b57cec5SDimitry Andric KA_TRACE(10, 65720b57cec5SDimitry Andric ("__kmp_internal_end_thread: root still active, abort T#%d\n", 65730b57cec5SDimitry Andric gtid)); 65740b57cec5SDimitry Andric return; 65750b57cec5SDimitry Andric } else { 65760b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", 65770b57cec5SDimitry Andric gtid)); 65780b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 65790b57cec5SDimitry Andric } 65800b57cec5SDimitry Andric } else { 65810b57cec5SDimitry Andric /* just a worker thread, let's leave */ 65820b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid)); 65830b57cec5SDimitry Andric 65840b57cec5SDimitry Andric if (gtid >= 0) { 65850b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_task_team = NULL; 65860b57cec5SDimitry Andric } 65870b57cec5SDimitry Andric 65880b57cec5SDimitry Andric KA_TRACE(10, 65890b57cec5SDimitry Andric ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", 65900b57cec5SDimitry Andric gtid)); 65910b57cec5SDimitry Andric return; 65920b57cec5SDimitry Andric } 65930b57cec5SDimitry Andric } 65940b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 65950b57cec5SDimitry Andric if (__kmp_pause_status != kmp_hard_paused) 65960b57cec5SDimitry Andric // AC: lets not shutdown the dynamic library at the exit of uber thread, 65970b57cec5SDimitry Andric // because we will better shutdown later in the library destructor. 65980b57cec5SDimitry Andric { 65990b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req)); 66000b57cec5SDimitry Andric return; 66010b57cec5SDimitry Andric } 66020b57cec5SDimitry Andric #endif 66030b57cec5SDimitry Andric /* synchronize the termination process */ 66040b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 66050b57cec5SDimitry Andric 66060b57cec5SDimitry Andric /* have we already finished */ 66070b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 66080b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n")); 66090b57cec5SDimitry Andric /* TODO abort? */ 66100b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66110b57cec5SDimitry Andric return; 66120b57cec5SDimitry Andric } 66130b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 66140b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66150b57cec5SDimitry Andric return; 66160b57cec5SDimitry Andric } 66170b57cec5SDimitry Andric 66180b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 66190b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 66200b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 66210b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 66220b57cec5SDimitry Andric __kmp_internal_end_*. */ 66230b57cec5SDimitry Andric 66240b57cec5SDimitry Andric /* should we finish the run-time? are all siblings done? */ 66250b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 66260b57cec5SDimitry Andric 66270b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 66280b57cec5SDimitry Andric if (KMP_UBER_GTID(i)) { 66290b57cec5SDimitry Andric KA_TRACE( 66300b57cec5SDimitry Andric 10, 66310b57cec5SDimitry Andric ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i)); 66320b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 66330b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66340b57cec5SDimitry Andric return; 66350b57cec5SDimitry Andric } 66360b57cec5SDimitry Andric } 66370b57cec5SDimitry Andric 66380b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 66390b57cec5SDimitry Andric 66400b57cec5SDimitry Andric __kmp_internal_end(); 66410b57cec5SDimitry Andric 66420b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 66430b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66440b57cec5SDimitry Andric 66450b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req)); 66460b57cec5SDimitry Andric 66470b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 66480b57cec5SDimitry Andric if (__kmp_debug_buf) 66490b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 66500b57cec5SDimitry Andric #endif 66510b57cec5SDimitry Andric } // __kmp_internal_end_thread 66520b57cec5SDimitry Andric 66530b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 66540b57cec5SDimitry Andric // Library registration stuff. 66550b57cec5SDimitry Andric 66560b57cec5SDimitry Andric static long __kmp_registration_flag = 0; 66570b57cec5SDimitry Andric // Random value used to indicate library initialization. 66580b57cec5SDimitry Andric static char *__kmp_registration_str = NULL; 66590b57cec5SDimitry Andric // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 66600b57cec5SDimitry Andric 66610b57cec5SDimitry Andric static inline char *__kmp_reg_status_name() { 66620b57cec5SDimitry Andric /* On RHEL 3u5 if linked statically, getpid() returns different values in 66630b57cec5SDimitry Andric each thread. If registration and unregistration go in different threads 66640b57cec5SDimitry Andric (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env 66650b57cec5SDimitry Andric env var can not be found, because the name will contain different pid. */ 6666e8d8bef9SDimitry Andric // macOS* complains about name being too long with additional getuid() 6667e8d8bef9SDimitry Andric #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB 6668e8d8bef9SDimitry Andric return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(), 6669e8d8bef9SDimitry Andric (int)getuid()); 6670e8d8bef9SDimitry Andric #else 66710b57cec5SDimitry Andric return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid()); 6672e8d8bef9SDimitry Andric #endif 66730b57cec5SDimitry Andric } // __kmp_reg_status_get 66740b57cec5SDimitry Andric 66750b57cec5SDimitry Andric void __kmp_register_library_startup(void) { 66760b57cec5SDimitry Andric 66770b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); // Name of the environment variable. 66780b57cec5SDimitry Andric int done = 0; 66790b57cec5SDimitry Andric union { 66800b57cec5SDimitry Andric double dtime; 66810b57cec5SDimitry Andric long ltime; 66820b57cec5SDimitry Andric } time; 66830b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 66840b57cec5SDimitry Andric __kmp_initialize_system_tick(); 66850b57cec5SDimitry Andric #endif 66860b57cec5SDimitry Andric __kmp_read_system_time(&time.dtime); 66870b57cec5SDimitry Andric __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL); 66880b57cec5SDimitry Andric __kmp_registration_str = 66890b57cec5SDimitry Andric __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag, 66900b57cec5SDimitry Andric __kmp_registration_flag, KMP_LIBRARY_FILE); 66910b57cec5SDimitry Andric 66920b57cec5SDimitry Andric KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name, 66930b57cec5SDimitry Andric __kmp_registration_str)); 66940b57cec5SDimitry Andric 66950b57cec5SDimitry Andric while (!done) { 66960b57cec5SDimitry Andric 66970b57cec5SDimitry Andric char *value = NULL; // Actual value of the environment variable. 66980b57cec5SDimitry Andric 6699349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6700e8d8bef9SDimitry Andric char *shm_name = __kmp_str_format("/%s", name); 6701e8d8bef9SDimitry Andric int shm_preexist = 0; 6702e8d8bef9SDimitry Andric char *data1; 6703e8d8bef9SDimitry Andric int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666); 6704e8d8bef9SDimitry Andric if ((fd1 == -1) && (errno == EEXIST)) { 6705e8d8bef9SDimitry Andric // file didn't open because it already exists. 6706e8d8bef9SDimitry Andric // try opening existing file 6707e8d8bef9SDimitry Andric fd1 = shm_open(shm_name, O_RDWR, 0666); 6708e8d8bef9SDimitry Andric if (fd1 == -1) { // file didn't open 6709e8d8bef9SDimitry Andric // error out here 6710e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM"), KMP_ERR(0), 6711e8d8bef9SDimitry Andric __kmp_msg_null); 6712e8d8bef9SDimitry Andric } else { 6713e8d8bef9SDimitry Andric // able to open existing file 6714e8d8bef9SDimitry Andric shm_preexist = 1; 6715e8d8bef9SDimitry Andric } 6716e8d8bef9SDimitry Andric } else if (fd1 == -1) { // SHM didn't open; it was due to error other than 6717e8d8bef9SDimitry Andric // already exists. 6718e8d8bef9SDimitry Andric // error out here. 6719e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM2"), KMP_ERR(errno), 6720e8d8bef9SDimitry Andric __kmp_msg_null); 6721e8d8bef9SDimitry Andric } 6722e8d8bef9SDimitry Andric if (shm_preexist == 0) { 6723e8d8bef9SDimitry Andric // we created SHM now set size 6724e8d8bef9SDimitry Andric if (ftruncate(fd1, SHM_SIZE) == -1) { 6725e8d8bef9SDimitry Andric // error occured setting size; 6726e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't set size of SHM"), 6727e8d8bef9SDimitry Andric KMP_ERR(errno), __kmp_msg_null); 6728e8d8bef9SDimitry Andric } 6729e8d8bef9SDimitry Andric } 6730e8d8bef9SDimitry Andric data1 = 6731e8d8bef9SDimitry Andric (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0); 6732e8d8bef9SDimitry Andric if (data1 == MAP_FAILED) { 6733e8d8bef9SDimitry Andric // failed to map shared memory 6734e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't map SHM"), KMP_ERR(errno), 6735e8d8bef9SDimitry Andric __kmp_msg_null); 6736e8d8bef9SDimitry Andric } 6737e8d8bef9SDimitry Andric if (shm_preexist == 0) { // set data to SHM, set value 6738e8d8bef9SDimitry Andric KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str); 6739e8d8bef9SDimitry Andric } 6740e8d8bef9SDimitry Andric // Read value from either what we just wrote or existing file. 6741e8d8bef9SDimitry Andric value = __kmp_str_format("%s", data1); // read value from SHM 6742e8d8bef9SDimitry Andric munmap(data1, SHM_SIZE); 6743e8d8bef9SDimitry Andric close(fd1); 6744e8d8bef9SDimitry Andric #else // Windows and unix with static library 67450b57cec5SDimitry Andric // Set environment variable, but do not overwrite if it is exist. 67460b57cec5SDimitry Andric __kmp_env_set(name, __kmp_registration_str, 0); 6747e8d8bef9SDimitry Andric // read value to see if it got set 67480b57cec5SDimitry Andric value = __kmp_env_get(name); 6749e8d8bef9SDimitry Andric #endif 6750e8d8bef9SDimitry Andric 67510b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 67520b57cec5SDimitry Andric done = 1; // Ok, environment variable set successfully, exit the loop. 67530b57cec5SDimitry Andric } else { 67540b57cec5SDimitry Andric // Oops. Write failed. Another copy of OpenMP RTL is in memory. 67550b57cec5SDimitry Andric // Check whether it alive or dead. 67560b57cec5SDimitry Andric int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 67570b57cec5SDimitry Andric char *tail = value; 67580b57cec5SDimitry Andric char *flag_addr_str = NULL; 67590b57cec5SDimitry Andric char *flag_val_str = NULL; 67600b57cec5SDimitry Andric char const *file_name = NULL; 67610b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_addr_str, &tail); 67620b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_val_str, &tail); 67630b57cec5SDimitry Andric file_name = tail; 67640b57cec5SDimitry Andric if (tail != NULL) { 6765fe6060f1SDimitry Andric unsigned long *flag_addr = 0; 6766fe6060f1SDimitry Andric unsigned long flag_val = 0; 67670b57cec5SDimitry Andric KMP_SSCANF(flag_addr_str, "%p", RCAST(void **, &flag_addr)); 67680b57cec5SDimitry Andric KMP_SSCANF(flag_val_str, "%lx", &flag_val); 67690b57cec5SDimitry Andric if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) { 67700b57cec5SDimitry Andric // First, check whether environment-encoded address is mapped into 67710b57cec5SDimitry Andric // addr space. 67720b57cec5SDimitry Andric // If so, dereference it to see if it still has the right value. 67730b57cec5SDimitry Andric if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) { 67740b57cec5SDimitry Andric neighbor = 1; 67750b57cec5SDimitry Andric } else { 67760b57cec5SDimitry Andric // If not, then we know the other copy of the library is no longer 67770b57cec5SDimitry Andric // running. 67780b57cec5SDimitry Andric neighbor = 2; 67790b57cec5SDimitry Andric } 67800b57cec5SDimitry Andric } 67810b57cec5SDimitry Andric } 67820b57cec5SDimitry Andric switch (neighbor) { 67830b57cec5SDimitry Andric case 0: // Cannot parse environment variable -- neighbor status unknown. 67840b57cec5SDimitry Andric // Assume it is the incompatible format of future version of the 67850b57cec5SDimitry Andric // library. Assume the other library is alive. 67860b57cec5SDimitry Andric // WARN( ... ); // TODO: Issue a warning. 67870b57cec5SDimitry Andric file_name = "unknown library"; 67880b57cec5SDimitry Andric KMP_FALLTHROUGH(); 67890b57cec5SDimitry Andric // Attention! Falling to the next case. That's intentional. 67900b57cec5SDimitry Andric case 1: { // Neighbor is alive. 67910b57cec5SDimitry Andric // Check it is allowed. 67920b57cec5SDimitry Andric char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK"); 67930b57cec5SDimitry Andric if (!__kmp_str_match_true(duplicate_ok)) { 67940b57cec5SDimitry Andric // That's not allowed. Issue fatal error. 67950b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name), 67960b57cec5SDimitry Andric KMP_HNT(DuplicateLibrary), __kmp_msg_null); 67970b57cec5SDimitry Andric } 67980b57cec5SDimitry Andric KMP_INTERNAL_FREE(duplicate_ok); 67990b57cec5SDimitry Andric __kmp_duplicate_library_ok = 1; 68000b57cec5SDimitry Andric done = 1; // Exit the loop. 68010b57cec5SDimitry Andric } break; 68020b57cec5SDimitry Andric case 2: { // Neighbor is dead. 6803e8d8bef9SDimitry Andric 6804349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6805e8d8bef9SDimitry Andric // close shared memory. 6806e8d8bef9SDimitry Andric shm_unlink(shm_name); // this removes file in /dev/shm 6807e8d8bef9SDimitry Andric #else 68080b57cec5SDimitry Andric // Clear the variable and try to register library again. 68090b57cec5SDimitry Andric __kmp_env_unset(name); 6810e8d8bef9SDimitry Andric #endif 68110b57cec5SDimitry Andric } break; 6812fe6060f1SDimitry Andric default: { 6813fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(0); 6814fe6060f1SDimitry Andric } break; 68150b57cec5SDimitry Andric } 68160b57cec5SDimitry Andric } 68170b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)value); 6818349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6819e8d8bef9SDimitry Andric KMP_INTERNAL_FREE((void *)shm_name); 6820e8d8bef9SDimitry Andric #endif 6821e8d8bef9SDimitry Andric } // while 68220b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)name); 68230b57cec5SDimitry Andric 68240b57cec5SDimitry Andric } // func __kmp_register_library_startup 68250b57cec5SDimitry Andric 68260b57cec5SDimitry Andric void __kmp_unregister_library(void) { 68270b57cec5SDimitry Andric 68280b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); 6829e8d8bef9SDimitry Andric char *value = NULL; 6830e8d8bef9SDimitry Andric 6831349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6832e8d8bef9SDimitry Andric char *shm_name = __kmp_str_format("/%s", name); 6833e8d8bef9SDimitry Andric int fd1 = shm_open(shm_name, O_RDONLY, 0666); 6834e8d8bef9SDimitry Andric if (fd1 == -1) { 6835e8d8bef9SDimitry Andric // file did not open. return. 6836e8d8bef9SDimitry Andric return; 6837e8d8bef9SDimitry Andric } 6838e8d8bef9SDimitry Andric char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0); 6839e8d8bef9SDimitry Andric if (data1 != MAP_FAILED) { 6840e8d8bef9SDimitry Andric value = __kmp_str_format("%s", data1); // read value from SHM 6841e8d8bef9SDimitry Andric munmap(data1, SHM_SIZE); 6842e8d8bef9SDimitry Andric } 6843e8d8bef9SDimitry Andric close(fd1); 6844e8d8bef9SDimitry Andric #else 6845e8d8bef9SDimitry Andric value = __kmp_env_get(name); 6846e8d8bef9SDimitry Andric #endif 68470b57cec5SDimitry Andric 68480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); 68490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); 68500b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 68510b57cec5SDimitry Andric // Ok, this is our variable. Delete it. 6852349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6853e8d8bef9SDimitry Andric shm_unlink(shm_name); // this removes file in /dev/shm 6854e8d8bef9SDimitry Andric #else 68550b57cec5SDimitry Andric __kmp_env_unset(name); 6856e8d8bef9SDimitry Andric #endif 68570b57cec5SDimitry Andric } 68580b57cec5SDimitry Andric 6859349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6860e8d8bef9SDimitry Andric KMP_INTERNAL_FREE(shm_name); 6861e8d8bef9SDimitry Andric #endif 6862e8d8bef9SDimitry Andric 68630b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_registration_str); 68640b57cec5SDimitry Andric KMP_INTERNAL_FREE(value); 68650b57cec5SDimitry Andric KMP_INTERNAL_FREE(name); 68660b57cec5SDimitry Andric 68670b57cec5SDimitry Andric __kmp_registration_flag = 0; 68680b57cec5SDimitry Andric __kmp_registration_str = NULL; 68690b57cec5SDimitry Andric 68700b57cec5SDimitry Andric } // __kmp_unregister_library 68710b57cec5SDimitry Andric 68720b57cec5SDimitry Andric // End of Library registration stuff. 68730b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 68740b57cec5SDimitry Andric 68750b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 68760b57cec5SDimitry Andric 68770b57cec5SDimitry Andric static void __kmp_check_mic_type() { 68780b57cec5SDimitry Andric kmp_cpuid_t cpuid_state = {0}; 68790b57cec5SDimitry Andric kmp_cpuid_t *cs_p = &cpuid_state; 68800b57cec5SDimitry Andric __kmp_x86_cpuid(1, 0, cs_p); 68810b57cec5SDimitry Andric // We don't support mic1 at the moment 68820b57cec5SDimitry Andric if ((cs_p->eax & 0xff0) == 0xB10) { 68830b57cec5SDimitry Andric __kmp_mic_type = mic2; 68840b57cec5SDimitry Andric } else if ((cs_p->eax & 0xf0ff0) == 0x50670) { 68850b57cec5SDimitry Andric __kmp_mic_type = mic3; 68860b57cec5SDimitry Andric } else { 68870b57cec5SDimitry Andric __kmp_mic_type = non_mic; 68880b57cec5SDimitry Andric } 68890b57cec5SDimitry Andric } 68900b57cec5SDimitry Andric 68910b57cec5SDimitry Andric #endif /* KMP_MIC_SUPPORTED */ 68920b57cec5SDimitry Andric 6893e8d8bef9SDimitry Andric #if KMP_HAVE_UMWAIT 6894e8d8bef9SDimitry Andric static void __kmp_user_level_mwait_init() { 6895e8d8bef9SDimitry Andric struct kmp_cpuid buf; 6896e8d8bef9SDimitry Andric __kmp_x86_cpuid(7, 0, &buf); 6897e8d8bef9SDimitry Andric __kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait; 6898e8d8bef9SDimitry Andric KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n", 6899e8d8bef9SDimitry Andric __kmp_umwait_enabled)); 6900e8d8bef9SDimitry Andric } 6901e8d8bef9SDimitry Andric #elif KMP_HAVE_MWAIT 6902e8d8bef9SDimitry Andric #ifndef AT_INTELPHIUSERMWAIT 6903e8d8bef9SDimitry Andric // Spurious, non-existent value that should always fail to return anything. 6904e8d8bef9SDimitry Andric // Will be replaced with the correct value when we know that. 6905e8d8bef9SDimitry Andric #define AT_INTELPHIUSERMWAIT 10000 6906e8d8bef9SDimitry Andric #endif 6907e8d8bef9SDimitry Andric // getauxval() function is available in RHEL7 and SLES12. If a system with an 6908e8d8bef9SDimitry Andric // earlier OS is used to build the RTL, we'll use the following internal 6909e8d8bef9SDimitry Andric // function when the entry is not found. 6910e8d8bef9SDimitry Andric unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL; 6911e8d8bef9SDimitry Andric unsigned long getauxval(unsigned long) { return 0; } 6912e8d8bef9SDimitry Andric 6913e8d8bef9SDimitry Andric static void __kmp_user_level_mwait_init() { 6914e8d8bef9SDimitry Andric // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available 6915e8d8bef9SDimitry Andric // use them to find if the user-level mwait is enabled. Otherwise, forcibly 6916e8d8bef9SDimitry Andric // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable 6917e8d8bef9SDimitry Andric // KMP_USER_LEVEL_MWAIT was set to TRUE. 6918e8d8bef9SDimitry Andric if (__kmp_mic_type == mic3) { 6919e8d8bef9SDimitry Andric unsigned long res = getauxval(AT_INTELPHIUSERMWAIT); 6920e8d8bef9SDimitry Andric if ((res & 0x1) || __kmp_user_level_mwait) { 6921e8d8bef9SDimitry Andric __kmp_mwait_enabled = TRUE; 6922e8d8bef9SDimitry Andric if (__kmp_user_level_mwait) { 6923e8d8bef9SDimitry Andric KMP_INFORM(EnvMwaitWarn); 6924e8d8bef9SDimitry Andric } 6925e8d8bef9SDimitry Andric } else { 6926e8d8bef9SDimitry Andric __kmp_mwait_enabled = FALSE; 6927e8d8bef9SDimitry Andric } 6928e8d8bef9SDimitry Andric } 6929e8d8bef9SDimitry Andric KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, " 6930e8d8bef9SDimitry Andric "__kmp_mwait_enabled = %d\n", 6931e8d8bef9SDimitry Andric __kmp_mic_type, __kmp_mwait_enabled)); 6932e8d8bef9SDimitry Andric } 6933e8d8bef9SDimitry Andric #endif /* KMP_HAVE_UMWAIT */ 6934e8d8bef9SDimitry Andric 69350b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void) { 69360b57cec5SDimitry Andric int i, gtid; 6937e8d8bef9SDimitry Andric size_t size; 69380b57cec5SDimitry Andric 69390b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n")); 69400b57cec5SDimitry Andric 69410b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4); 69420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4); 69430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8); 69440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8); 69450b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *)); 69460b57cec5SDimitry Andric 69470b57cec5SDimitry Andric #if OMPT_SUPPORT 69480b57cec5SDimitry Andric ompt_pre_init(); 69490b57cec5SDimitry Andric #endif 6950fe6060f1SDimitry Andric #if OMPD_SUPPORT 6951fe6060f1SDimitry Andric __kmp_env_dump(); 6952fe6060f1SDimitry Andric ompd_init(); 6953fe6060f1SDimitry Andric #endif 69540b57cec5SDimitry Andric 69550b57cec5SDimitry Andric __kmp_validate_locks(); 69560b57cec5SDimitry Andric 69570b57cec5SDimitry Andric /* Initialize internal memory allocator */ 69580b57cec5SDimitry Andric __kmp_init_allocator(); 69590b57cec5SDimitry Andric 69600b57cec5SDimitry Andric /* Register the library startup via an environment variable and check to see 69610b57cec5SDimitry Andric whether another copy of the library is already registered. */ 69620b57cec5SDimitry Andric 69630b57cec5SDimitry Andric __kmp_register_library_startup(); 69640b57cec5SDimitry Andric 69650b57cec5SDimitry Andric /* TODO reinitialization of library */ 69660b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 69670b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n")); 69680b57cec5SDimitry Andric } 69690b57cec5SDimitry Andric 69700b57cec5SDimitry Andric __kmp_global.g.g_abort = 0; 69710b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 69720b57cec5SDimitry Andric 69730b57cec5SDimitry Andric /* initialize the locks */ 69740b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 69750b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 69760b57cec5SDimitry Andric __kmp_init_speculative_stats(); 69770b57cec5SDimitry Andric #endif 69780b57cec5SDimitry Andric #endif 69790b57cec5SDimitry Andric #if KMP_STATS_ENABLED 69800b57cec5SDimitry Andric __kmp_stats_init(); 69810b57cec5SDimitry Andric #endif 69820b57cec5SDimitry Andric __kmp_init_lock(&__kmp_global_lock); 69830b57cec5SDimitry Andric __kmp_init_queuing_lock(&__kmp_dispatch_lock); 69840b57cec5SDimitry Andric __kmp_init_lock(&__kmp_debug_lock); 69850b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock); 69860b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); 69870b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_2i); 69880b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4i); 69890b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4r); 69900b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8i); 69910b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8r); 69920b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8c); 69930b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_10r); 69940b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16r); 69950b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16c); 69960b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_20c); 69970b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_32c); 69980b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock); 69990b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_exit_lock); 70000b57cec5SDimitry Andric #if KMP_USE_MONITOR 70010b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_monitor_lock); 70020b57cec5SDimitry Andric #endif 70030b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock); 70040b57cec5SDimitry Andric 70050b57cec5SDimitry Andric /* conduct initialization and initial setup of configuration */ 70060b57cec5SDimitry Andric 70070b57cec5SDimitry Andric __kmp_runtime_initialize(); 70080b57cec5SDimitry Andric 70090b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 70100b57cec5SDimitry Andric __kmp_check_mic_type(); 70110b57cec5SDimitry Andric #endif 70120b57cec5SDimitry Andric 70130b57cec5SDimitry Andric // Some global variable initialization moved here from kmp_env_initialize() 70140b57cec5SDimitry Andric #ifdef KMP_DEBUG 70150b57cec5SDimitry Andric kmp_diag = 0; 70160b57cec5SDimitry Andric #endif 70170b57cec5SDimitry Andric __kmp_abort_delay = 0; 70180b57cec5SDimitry Andric 70190b57cec5SDimitry Andric // From __kmp_init_dflt_team_nth() 70200b57cec5SDimitry Andric /* assume the entire machine will be used */ 70210b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_xproc; 70220b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) { 70230b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 70240b57cec5SDimitry Andric } 70250b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) { 70260b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 70270b57cec5SDimitry Andric } 70280b57cec5SDimitry Andric __kmp_max_nth = __kmp_sys_max_nth; 70290b57cec5SDimitry Andric __kmp_cg_max_nth = __kmp_sys_max_nth; 70300b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default 70310b57cec5SDimitry Andric if (__kmp_teams_max_nth > __kmp_sys_max_nth) { 70320b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_sys_max_nth; 70330b57cec5SDimitry Andric } 70340b57cec5SDimitry Andric 70350b57cec5SDimitry Andric // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" 70360b57cec5SDimitry Andric // part 70370b57cec5SDimitry Andric __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 70380b57cec5SDimitry Andric #if KMP_USE_MONITOR 70390b57cec5SDimitry Andric __kmp_monitor_wakeups = 70400b57cec5SDimitry Andric KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 70410b57cec5SDimitry Andric __kmp_bt_intervals = 70420b57cec5SDimitry Andric KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 70430b57cec5SDimitry Andric #endif 70440b57cec5SDimitry Andric // From "KMP_LIBRARY" part of __kmp_env_initialize() 70450b57cec5SDimitry Andric __kmp_library = library_throughput; 70460b57cec5SDimitry Andric // From KMP_SCHEDULE initialization 70470b57cec5SDimitry Andric __kmp_static = kmp_sch_static_balanced; 70480b57cec5SDimitry Andric // AC: do not use analytical here, because it is non-monotonous 70490b57cec5SDimitry Andric //__kmp_guided = kmp_sch_guided_iterative_chunked; 70500b57cec5SDimitry Andric //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no 70510b57cec5SDimitry Andric // need to repeat assignment 70520b57cec5SDimitry Andric // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch 70530b57cec5SDimitry Andric // bit control and barrier method control parts 70540b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70550b57cec5SDimitry Andric #define kmp_reduction_barrier_gather_bb ((int)1) 70560b57cec5SDimitry Andric #define kmp_reduction_barrier_release_bb ((int)1) 7057349cc55cSDimitry Andric #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt 7058349cc55cSDimitry Andric #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt 70590b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70600b57cec5SDimitry Andric for (i = bs_plain_barrier; i < bs_last_barrier; i++) { 70610b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; 70620b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; 70630b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt; 70640b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt; 70650b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70660b57cec5SDimitry Andric if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only ( 70670b57cec5SDimitry Andric // lin_64 ): hyper,1 70680b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb; 70690b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb; 70700b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat; 70710b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat; 70720b57cec5SDimitry Andric } 70730b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70740b57cec5SDimitry Andric } 70750b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70760b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_pat 70770b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_pat 70780b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_bb 70790b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_bb 70800b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70810b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 70820b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 70830b57cec5SDimitry Andric // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 70840b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather 70850b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] = 70860b57cec5SDimitry Andric 1; // forkjoin release 70870b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 70880b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 70890b57cec5SDimitry Andric } 70900b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70910b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 70920b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 70930b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 70940b57cec5SDimitry Andric } 70950b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70960b57cec5SDimitry Andric #endif // KMP_MIC_SUPPORTED 70970b57cec5SDimitry Andric 70980b57cec5SDimitry Andric // From KMP_CHECKS initialization 70990b57cec5SDimitry Andric #ifdef KMP_DEBUG 71000b57cec5SDimitry Andric __kmp_env_checks = TRUE; /* development versions have the extra checks */ 71010b57cec5SDimitry Andric #else 71020b57cec5SDimitry Andric __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ 71030b57cec5SDimitry Andric #endif 71040b57cec5SDimitry Andric 71050b57cec5SDimitry Andric // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 71060b57cec5SDimitry Andric __kmp_foreign_tp = TRUE; 71070b57cec5SDimitry Andric 71080b57cec5SDimitry Andric __kmp_global.g.g_dynamic = FALSE; 71090b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_default; 71100b57cec5SDimitry Andric 7111fe6060f1SDimitry Andric __kmp_init_nesting_mode(); 7112fe6060f1SDimitry Andric 71130b57cec5SDimitry Andric __kmp_env_initialize(NULL); 71140b57cec5SDimitry Andric 7115e8d8bef9SDimitry Andric #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 7116e8d8bef9SDimitry Andric __kmp_user_level_mwait_init(); 7117e8d8bef9SDimitry Andric #endif 71180b57cec5SDimitry Andric // Print all messages in message catalog for testing purposes. 71190b57cec5SDimitry Andric #ifdef KMP_DEBUG 71200b57cec5SDimitry Andric char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); 71210b57cec5SDimitry Andric if (__kmp_str_match_true(val)) { 71220b57cec5SDimitry Andric kmp_str_buf_t buffer; 71230b57cec5SDimitry Andric __kmp_str_buf_init(&buffer); 71240b57cec5SDimitry Andric __kmp_i18n_dump_catalog(&buffer); 71250b57cec5SDimitry Andric __kmp_printf("%s", buffer.str); 71260b57cec5SDimitry Andric __kmp_str_buf_free(&buffer); 71270b57cec5SDimitry Andric } 71280b57cec5SDimitry Andric __kmp_env_free(&val); 71290b57cec5SDimitry Andric #endif 71300b57cec5SDimitry Andric 71310b57cec5SDimitry Andric __kmp_threads_capacity = 71320b57cec5SDimitry Andric __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); 71330b57cec5SDimitry Andric // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 71340b57cec5SDimitry Andric __kmp_tp_capacity = __kmp_default_tp_capacity( 71350b57cec5SDimitry Andric __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 71360b57cec5SDimitry Andric 71370b57cec5SDimitry Andric // If the library is shut down properly, both pools must be NULL. Just in 71380b57cec5SDimitry Andric // case, set them to NULL -- some memory may leak, but subsequent code will 71390b57cec5SDimitry Andric // work even if pools are not freed. 71400b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL); 71410b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL); 71420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_team_pool == NULL); 71430b57cec5SDimitry Andric __kmp_thread_pool = NULL; 71440b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 71450b57cec5SDimitry Andric __kmp_team_pool = NULL; 71460b57cec5SDimitry Andric 71470b57cec5SDimitry Andric /* Allocate all of the variable sized records */ 71480b57cec5SDimitry Andric /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are 71490b57cec5SDimitry Andric * expandable */ 71500b57cec5SDimitry Andric /* Since allocation is cache-aligned, just add extra padding at the end */ 71510b57cec5SDimitry Andric size = 71520b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + 71530b57cec5SDimitry Andric CACHE_LINE; 71540b57cec5SDimitry Andric __kmp_threads = (kmp_info_t **)__kmp_allocate(size); 71550b57cec5SDimitry Andric __kmp_root = (kmp_root_t **)((char *)__kmp_threads + 71560b57cec5SDimitry Andric sizeof(kmp_info_t *) * __kmp_threads_capacity); 71570b57cec5SDimitry Andric 71580b57cec5SDimitry Andric /* init thread counts */ 71590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_all_nth == 71600b57cec5SDimitry Andric 0); // Asserts fail if the library is reinitializing and 71610b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination. 71620b57cec5SDimitry Andric __kmp_all_nth = 0; 71630b57cec5SDimitry Andric __kmp_nth = 0; 71640b57cec5SDimitry Andric 71650b57cec5SDimitry Andric /* setup the uber master thread and hierarchy */ 71660b57cec5SDimitry Andric gtid = __kmp_register_root(TRUE); 71670b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid)); 71680b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 71690b57cec5SDimitry Andric KMP_ASSERT(KMP_INITIAL_GTID(gtid)); 71700b57cec5SDimitry Andric 71710b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 71720b57cec5SDimitry Andric 71730b57cec5SDimitry Andric __kmp_common_initialize(); 71740b57cec5SDimitry Andric 71750b57cec5SDimitry Andric #if KMP_OS_UNIX 71760b57cec5SDimitry Andric /* invoke the child fork handler */ 71770b57cec5SDimitry Andric __kmp_register_atfork(); 71780b57cec5SDimitry Andric #endif 71790b57cec5SDimitry Andric 71800b57cec5SDimitry Andric #if !KMP_DYNAMIC_LIB 71810b57cec5SDimitry Andric { 71820b57cec5SDimitry Andric /* Invoke the exit handler when the program finishes, only for static 71830b57cec5SDimitry Andric library. For dynamic library, we already have _fini and DllMain. */ 71840b57cec5SDimitry Andric int rc = atexit(__kmp_internal_end_atexit); 71850b57cec5SDimitry Andric if (rc != 0) { 71860b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc), 71870b57cec5SDimitry Andric __kmp_msg_null); 71880b57cec5SDimitry Andric } 71890b57cec5SDimitry Andric } 71900b57cec5SDimitry Andric #endif 71910b57cec5SDimitry Andric 71920b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 71930b57cec5SDimitry Andric #if KMP_OS_UNIX 71940b57cec5SDimitry Andric /* NOTE: make sure that this is called before the user installs their own 71950b57cec5SDimitry Andric signal handlers so that the user handlers are called first. this way they 71960b57cec5SDimitry Andric can return false, not call our handler, avoid terminating the library, and 71970b57cec5SDimitry Andric continue execution where they left off. */ 71980b57cec5SDimitry Andric __kmp_install_signals(FALSE); 71990b57cec5SDimitry Andric #endif /* KMP_OS_UNIX */ 72000b57cec5SDimitry Andric #if KMP_OS_WINDOWS 72010b57cec5SDimitry Andric __kmp_install_signals(TRUE); 72020b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 72030b57cec5SDimitry Andric #endif 72040b57cec5SDimitry Andric 72050b57cec5SDimitry Andric /* we have finished the serial initialization */ 72060b57cec5SDimitry Andric __kmp_init_counter++; 72070b57cec5SDimitry Andric 72080b57cec5SDimitry Andric __kmp_init_serial = TRUE; 72090b57cec5SDimitry Andric 72100b57cec5SDimitry Andric if (__kmp_settings) { 72110b57cec5SDimitry Andric __kmp_env_print(); 72120b57cec5SDimitry Andric } 72130b57cec5SDimitry Andric 72140b57cec5SDimitry Andric if (__kmp_display_env || __kmp_display_env_verbose) { 72150b57cec5SDimitry Andric __kmp_env_print_2(); 72160b57cec5SDimitry Andric } 72170b57cec5SDimitry Andric 72180b57cec5SDimitry Andric #if OMPT_SUPPORT 72190b57cec5SDimitry Andric ompt_post_init(); 72200b57cec5SDimitry Andric #endif 72210b57cec5SDimitry Andric 72220b57cec5SDimitry Andric KMP_MB(); 72230b57cec5SDimitry Andric 72240b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n")); 72250b57cec5SDimitry Andric } 72260b57cec5SDimitry Andric 72270b57cec5SDimitry Andric void __kmp_serial_initialize(void) { 72280b57cec5SDimitry Andric if (__kmp_init_serial) { 72290b57cec5SDimitry Andric return; 72300b57cec5SDimitry Andric } 72310b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 72320b57cec5SDimitry Andric if (__kmp_init_serial) { 72330b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 72340b57cec5SDimitry Andric return; 72350b57cec5SDimitry Andric } 72360b57cec5SDimitry Andric __kmp_do_serial_initialize(); 72370b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 72380b57cec5SDimitry Andric } 72390b57cec5SDimitry Andric 72400b57cec5SDimitry Andric static void __kmp_do_middle_initialize(void) { 72410b57cec5SDimitry Andric int i, j; 72420b57cec5SDimitry Andric int prev_dflt_team_nth; 72430b57cec5SDimitry Andric 72440b57cec5SDimitry Andric if (!__kmp_init_serial) { 72450b57cec5SDimitry Andric __kmp_do_serial_initialize(); 72460b57cec5SDimitry Andric } 72470b57cec5SDimitry Andric 72480b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_middle_initialize: enter\n")); 72490b57cec5SDimitry Andric 72500b57cec5SDimitry Andric // Save the previous value for the __kmp_dflt_team_nth so that 72510b57cec5SDimitry Andric // we can avoid some reinitialization if it hasn't changed. 72520b57cec5SDimitry Andric prev_dflt_team_nth = __kmp_dflt_team_nth; 72530b57cec5SDimitry Andric 72540b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 72550b57cec5SDimitry Andric // __kmp_affinity_initialize() will try to set __kmp_ncores to the 72560b57cec5SDimitry Andric // number of cores on the machine. 72570b57cec5SDimitry Andric __kmp_affinity_initialize(); 72580b57cec5SDimitry Andric 72590b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 72600b57cec5SDimitry Andric 72610b57cec5SDimitry Andric KMP_ASSERT(__kmp_xproc > 0); 72620b57cec5SDimitry Andric if (__kmp_avail_proc == 0) { 72630b57cec5SDimitry Andric __kmp_avail_proc = __kmp_xproc; 72640b57cec5SDimitry Andric } 72650b57cec5SDimitry Andric 72660b57cec5SDimitry Andric // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), 72670b57cec5SDimitry Andric // correct them now 72680b57cec5SDimitry Andric j = 0; 72690b57cec5SDimitry Andric while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) { 72700b57cec5SDimitry Andric __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = 72710b57cec5SDimitry Andric __kmp_avail_proc; 72720b57cec5SDimitry Andric j++; 72730b57cec5SDimitry Andric } 72740b57cec5SDimitry Andric 72750b57cec5SDimitry Andric if (__kmp_dflt_team_nth == 0) { 72760b57cec5SDimitry Andric #ifdef KMP_DFLT_NTH_CORES 72770b57cec5SDimitry Andric // Default #threads = #cores 72780b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_ncores; 72790b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 72800b57cec5SDimitry Andric "__kmp_ncores (%d)\n", 72810b57cec5SDimitry Andric __kmp_dflt_team_nth)); 72820b57cec5SDimitry Andric #else 72830b57cec5SDimitry Andric // Default #threads = #available OS procs 72840b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_avail_proc; 72850b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 72860b57cec5SDimitry Andric "__kmp_avail_proc(%d)\n", 72870b57cec5SDimitry Andric __kmp_dflt_team_nth)); 72880b57cec5SDimitry Andric #endif /* KMP_DFLT_NTH_CORES */ 72890b57cec5SDimitry Andric } 72900b57cec5SDimitry Andric 72910b57cec5SDimitry Andric if (__kmp_dflt_team_nth < KMP_MIN_NTH) { 72920b57cec5SDimitry Andric __kmp_dflt_team_nth = KMP_MIN_NTH; 72930b57cec5SDimitry Andric } 72940b57cec5SDimitry Andric if (__kmp_dflt_team_nth > __kmp_sys_max_nth) { 72950b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_sys_max_nth; 72960b57cec5SDimitry Andric } 72970b57cec5SDimitry Andric 7298fe6060f1SDimitry Andric if (__kmp_nesting_mode > 0) 7299fe6060f1SDimitry Andric __kmp_set_nesting_mode_threads(); 7300fe6060f1SDimitry Andric 73010b57cec5SDimitry Andric // There's no harm in continuing if the following check fails, 73020b57cec5SDimitry Andric // but it indicates an error in the previous logic. 73030b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub); 73040b57cec5SDimitry Andric 73050b57cec5SDimitry Andric if (__kmp_dflt_team_nth != prev_dflt_team_nth) { 73060b57cec5SDimitry Andric // Run through the __kmp_threads array and set the num threads icv for each 73070b57cec5SDimitry Andric // root thread that is currently registered with the RTL (which has not 73080b57cec5SDimitry Andric // already explicitly set its nthreads-var with a call to 73090b57cec5SDimitry Andric // omp_set_num_threads()). 73100b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 73110b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[i]; 73120b57cec5SDimitry Andric if (thread == NULL) 73130b57cec5SDimitry Andric continue; 73140b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc != 0) 73150b57cec5SDimitry Andric continue; 73160b57cec5SDimitry Andric 73170b57cec5SDimitry Andric set__nproc(__kmp_threads[i], __kmp_dflt_team_nth); 73180b57cec5SDimitry Andric } 73190b57cec5SDimitry Andric } 73200b57cec5SDimitry Andric KA_TRACE( 73210b57cec5SDimitry Andric 20, 73220b57cec5SDimitry Andric ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 73230b57cec5SDimitry Andric __kmp_dflt_team_nth)); 73240b57cec5SDimitry Andric 73250b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 73260b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */ 73270b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 73280b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 73290b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 73300b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 73310b57cec5SDimitry Andric } 73320b57cec5SDimitry Andric } 73330b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 73340b57cec5SDimitry Andric 73350b57cec5SDimitry Andric /* we have finished middle initialization */ 73360b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_middle, TRUE); 73370b57cec5SDimitry Andric 73380b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n")); 73390b57cec5SDimitry Andric } 73400b57cec5SDimitry Andric 73410b57cec5SDimitry Andric void __kmp_middle_initialize(void) { 73420b57cec5SDimitry Andric if (__kmp_init_middle) { 73430b57cec5SDimitry Andric return; 73440b57cec5SDimitry Andric } 73450b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 73460b57cec5SDimitry Andric if (__kmp_init_middle) { 73470b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73480b57cec5SDimitry Andric return; 73490b57cec5SDimitry Andric } 73500b57cec5SDimitry Andric __kmp_do_middle_initialize(); 73510b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73520b57cec5SDimitry Andric } 73530b57cec5SDimitry Andric 73540b57cec5SDimitry Andric void __kmp_parallel_initialize(void) { 73550b57cec5SDimitry Andric int gtid = __kmp_entry_gtid(); // this might be a new root 73560b57cec5SDimitry Andric 73570b57cec5SDimitry Andric /* synchronize parallel initialization (for sibling) */ 73580b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) 73590b57cec5SDimitry Andric return; 73600b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 73610b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 73620b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73630b57cec5SDimitry Andric return; 73640b57cec5SDimitry Andric } 73650b57cec5SDimitry Andric 73660b57cec5SDimitry Andric /* TODO reinitialization after we have already shut down */ 73670b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 73680b57cec5SDimitry Andric KA_TRACE( 73690b57cec5SDimitry Andric 10, 73700b57cec5SDimitry Andric ("__kmp_parallel_initialize: attempt to init while shutting down\n")); 73710b57cec5SDimitry Andric __kmp_infinite_loop(); 73720b57cec5SDimitry Andric } 73730b57cec5SDimitry Andric 73740b57cec5SDimitry Andric /* jc: The lock __kmp_initz_lock is already held, so calling 73750b57cec5SDimitry Andric __kmp_serial_initialize would cause a deadlock. So we call 73760b57cec5SDimitry Andric __kmp_do_serial_initialize directly. */ 73770b57cec5SDimitry Andric if (!__kmp_init_middle) { 73780b57cec5SDimitry Andric __kmp_do_middle_initialize(); 73790b57cec5SDimitry Andric } 7380fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 73810b57cec5SDimitry Andric __kmp_resume_if_hard_paused(); 73820b57cec5SDimitry Andric 73830b57cec5SDimitry Andric /* begin initialization */ 73840b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); 73850b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 73860b57cec5SDimitry Andric 73870b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 73880b57cec5SDimitry Andric // Save the FP control regs. 73890b57cec5SDimitry Andric // Worker threads will set theirs to these values at thread startup. 73900b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); 73910b57cec5SDimitry Andric __kmp_store_mxcsr(&__kmp_init_mxcsr); 73920b57cec5SDimitry Andric __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 73930b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 73940b57cec5SDimitry Andric 73950b57cec5SDimitry Andric #if KMP_OS_UNIX 73960b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 73970b57cec5SDimitry Andric /* must be after __kmp_serial_initialize */ 73980b57cec5SDimitry Andric __kmp_install_signals(TRUE); 73990b57cec5SDimitry Andric #endif 74000b57cec5SDimitry Andric #endif 74010b57cec5SDimitry Andric 74020b57cec5SDimitry Andric __kmp_suspend_initialize(); 74030b57cec5SDimitry Andric 74040b57cec5SDimitry Andric #if defined(USE_LOAD_BALANCE) 74050b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 74060b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 74070b57cec5SDimitry Andric } 74080b57cec5SDimitry Andric #else 74090b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 74100b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 74110b57cec5SDimitry Andric } 74120b57cec5SDimitry Andric #endif 74130b57cec5SDimitry Andric 74140b57cec5SDimitry Andric if (__kmp_version) { 74150b57cec5SDimitry Andric __kmp_print_version_2(); 74160b57cec5SDimitry Andric } 74170b57cec5SDimitry Andric 74180b57cec5SDimitry Andric /* we have finished parallel initialization */ 74190b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_parallel, TRUE); 74200b57cec5SDimitry Andric 74210b57cec5SDimitry Andric KMP_MB(); 74220b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); 74230b57cec5SDimitry Andric 74240b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 74250b57cec5SDimitry Andric } 74260b57cec5SDimitry Andric 7427e8d8bef9SDimitry Andric void __kmp_hidden_helper_initialize() { 7428e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper)) 7429e8d8bef9SDimitry Andric return; 7430e8d8bef9SDimitry Andric 7431e8d8bef9SDimitry Andric // __kmp_parallel_initialize is required before we initialize hidden helper 7432e8d8bef9SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 7433e8d8bef9SDimitry Andric __kmp_parallel_initialize(); 7434e8d8bef9SDimitry Andric 7435e8d8bef9SDimitry Andric // Double check. Note that this double check should not be placed before 7436e8d8bef9SDimitry Andric // __kmp_parallel_initialize as it will cause dead lock. 7437e8d8bef9SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 7438e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper)) { 7439e8d8bef9SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7440e8d8bef9SDimitry Andric return; 7441e8d8bef9SDimitry Andric } 7442e8d8bef9SDimitry Andric 7443e8d8bef9SDimitry Andric // Set the count of hidden helper tasks to be executed to zero 7444e8d8bef9SDimitry Andric KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0); 7445e8d8bef9SDimitry Andric 7446e8d8bef9SDimitry Andric // Set the global variable indicating that we're initializing hidden helper 7447e8d8bef9SDimitry Andric // team/threads 7448e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE); 7449e8d8bef9SDimitry Andric 7450e8d8bef9SDimitry Andric // Platform independent initialization 7451e8d8bef9SDimitry Andric __kmp_do_initialize_hidden_helper_threads(); 7452e8d8bef9SDimitry Andric 7453e8d8bef9SDimitry Andric // Wait here for the finish of initialization of hidden helper teams 7454e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_initz_wait(); 7455e8d8bef9SDimitry Andric 7456e8d8bef9SDimitry Andric // We have finished hidden helper initialization 7457e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper, TRUE); 7458e8d8bef9SDimitry Andric 7459e8d8bef9SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7460e8d8bef9SDimitry Andric } 7461e8d8bef9SDimitry Andric 74620b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 74630b57cec5SDimitry Andric 74640b57cec5SDimitry Andric void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 74650b57cec5SDimitry Andric kmp_team_t *team) { 74660b57cec5SDimitry Andric kmp_disp_t *dispatch; 74670b57cec5SDimitry Andric 74680b57cec5SDimitry Andric KMP_MB(); 74690b57cec5SDimitry Andric 74700b57cec5SDimitry Andric /* none of the threads have encountered any constructs, yet. */ 74710b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 74720b57cec5SDimitry Andric #if KMP_CACHE_MANAGE 74730b57cec5SDimitry Andric KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived); 74740b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */ 74750b57cec5SDimitry Andric dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 74760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch); 74770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 74780b57cec5SDimitry Andric // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ 74790b57cec5SDimitry Andric // this_thr->th.th_info.ds.ds_tid ] ); 74800b57cec5SDimitry Andric 74810b57cec5SDimitry Andric dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ 74820b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter 74830b57cec5SDimitry Andric if (__kmp_env_consistency_check) 74840b57cec5SDimitry Andric __kmp_push_parallel(gtid, team->t.t_ident); 74850b57cec5SDimitry Andric 74860b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 74870b57cec5SDimitry Andric } 74880b57cec5SDimitry Andric 74890b57cec5SDimitry Andric void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 74900b57cec5SDimitry Andric kmp_team_t *team) { 74910b57cec5SDimitry Andric if (__kmp_env_consistency_check) 74920b57cec5SDimitry Andric __kmp_pop_parallel(gtid, team->t.t_ident); 74930b57cec5SDimitry Andric 74940b57cec5SDimitry Andric __kmp_finish_implicit_task(this_thr); 74950b57cec5SDimitry Andric } 74960b57cec5SDimitry Andric 74970b57cec5SDimitry Andric int __kmp_invoke_task_func(int gtid) { 74980b57cec5SDimitry Andric int rc; 74990b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 75000b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 75010b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 75020b57cec5SDimitry Andric 75030b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, tid, this_thr, team); 75040b57cec5SDimitry Andric #if USE_ITT_BUILD 75050b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 7506fe6060f1SDimitry Andric // inform ittnotify about entering user's code 7507fe6060f1SDimitry Andric if (team->t.t_stack_id != NULL) { 7508fe6060f1SDimitry Andric __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id); 7509fe6060f1SDimitry Andric } else { 7510fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL); 75110b57cec5SDimitry Andric __kmp_itt_stack_callee_enter( 7512fe6060f1SDimitry Andric (__itt_caller)team->t.t_parent->t.t_stack_id); 7513fe6060f1SDimitry Andric } 75140b57cec5SDimitry Andric } 75150b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 75160b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 75170b57cec5SDimitry Andric SSC_MARK_INVOKING(); 75180b57cec5SDimitry Andric #endif 75190b57cec5SDimitry Andric 75200b57cec5SDimitry Andric #if OMPT_SUPPORT 75210b57cec5SDimitry Andric void *dummy; 7522489b1cf2SDimitry Andric void **exit_frame_p; 75230b57cec5SDimitry Andric ompt_data_t *my_task_data; 75240b57cec5SDimitry Andric ompt_data_t *my_parallel_data; 75250b57cec5SDimitry Andric int ompt_team_size; 75260b57cec5SDimitry Andric 75270b57cec5SDimitry Andric if (ompt_enabled.enabled) { 7528fe6060f1SDimitry Andric exit_frame_p = &(team->t.t_implicit_task_taskdata[tid] 7529fe6060f1SDimitry Andric .ompt_task_info.frame.exit_frame.ptr); 75300b57cec5SDimitry Andric } else { 7531489b1cf2SDimitry Andric exit_frame_p = &dummy; 75320b57cec5SDimitry Andric } 75330b57cec5SDimitry Andric 75340b57cec5SDimitry Andric my_task_data = 75350b57cec5SDimitry Andric &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data); 75360b57cec5SDimitry Andric my_parallel_data = &(team->t.ompt_team_info.parallel_data); 75370b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 75380b57cec5SDimitry Andric ompt_team_size = team->t.t_nproc; 75390b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 75400b57cec5SDimitry Andric ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, 7541489b1cf2SDimitry Andric __kmp_tid_from_gtid(gtid), ompt_task_implicit); 75420b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); 75430b57cec5SDimitry Andric } 75440b57cec5SDimitry Andric #endif 75450b57cec5SDimitry Andric 75460b57cec5SDimitry Andric #if KMP_STATS_ENABLED 75470b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 75480b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 75490b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_teams); 75500b57cec5SDimitry Andric } else { 75510b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_parallel); 75520b57cec5SDimitry Andric } 75530b57cec5SDimitry Andric KMP_SET_THREAD_STATE(IMPLICIT_TASK); 75540b57cec5SDimitry Andric #endif 75550b57cec5SDimitry Andric 75560b57cec5SDimitry Andric rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid, 75570b57cec5SDimitry Andric tid, (int)team->t.t_argc, (void **)team->t.t_argv 75580b57cec5SDimitry Andric #if OMPT_SUPPORT 75590b57cec5SDimitry Andric , 7560489b1cf2SDimitry Andric exit_frame_p 75610b57cec5SDimitry Andric #endif 75620b57cec5SDimitry Andric ); 75630b57cec5SDimitry Andric #if OMPT_SUPPORT 7564489b1cf2SDimitry Andric *exit_frame_p = NULL; 7565489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team; 75660b57cec5SDimitry Andric #endif 75670b57cec5SDimitry Andric 75680b57cec5SDimitry Andric #if KMP_STATS_ENABLED 75690b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 75700b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 75710b57cec5SDimitry Andric } 75720b57cec5SDimitry Andric KMP_POP_PARTITIONED_TIMER(); 75730b57cec5SDimitry Andric #endif 75740b57cec5SDimitry Andric 75750b57cec5SDimitry Andric #if USE_ITT_BUILD 75760b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 7577fe6060f1SDimitry Andric // inform ittnotify about leaving user's code 7578fe6060f1SDimitry Andric if (team->t.t_stack_id != NULL) { 7579fe6060f1SDimitry Andric __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id); 7580fe6060f1SDimitry Andric } else { 7581fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL); 75820b57cec5SDimitry Andric __kmp_itt_stack_callee_leave( 7583fe6060f1SDimitry Andric (__itt_caller)team->t.t_parent->t.t_stack_id); 7584fe6060f1SDimitry Andric } 75850b57cec5SDimitry Andric } 75860b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 75870b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, tid, this_thr, team); 75880b57cec5SDimitry Andric 75890b57cec5SDimitry Andric return rc; 75900b57cec5SDimitry Andric } 75910b57cec5SDimitry Andric 75920b57cec5SDimitry Andric void __kmp_teams_master(int gtid) { 7593fe6060f1SDimitry Andric // This routine is called by all primary threads in teams construct 75940b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 75950b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 75960b57cec5SDimitry Andric ident_t *loc = team->t.t_ident; 75970b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nth; 75980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_teams_microtask); 75990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_set_nproc); 76000b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid, 76010b57cec5SDimitry Andric __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask)); 76020b57cec5SDimitry Andric 76030b57cec5SDimitry Andric // This thread is a new CG root. Set up the proper variables. 76040b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 76050b57cec5SDimitry Andric tmp->cg_root = thr; // Make thr the CG root 7606fe6060f1SDimitry Andric // Init to thread limit stored when league primary threads were forked 76070b57cec5SDimitry Andric tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit; 76080b57cec5SDimitry Andric tmp->cg_nthreads = 1; // Init counter to one active thread, this one 76090b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init" 76100b57cec5SDimitry Andric " cg_nthreads to 1\n", 76110b57cec5SDimitry Andric thr, tmp)); 76120b57cec5SDimitry Andric tmp->up = thr->th.th_cg_roots; 76130b57cec5SDimitry Andric thr->th.th_cg_roots = tmp; 76140b57cec5SDimitry Andric 76150b57cec5SDimitry Andric // Launch league of teams now, but not let workers execute 76160b57cec5SDimitry Andric // (they hang on fork barrier until next parallel) 76170b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 76180b57cec5SDimitry Andric SSC_MARK_FORKING(); 76190b57cec5SDimitry Andric #endif 76200b57cec5SDimitry Andric __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, 76210b57cec5SDimitry Andric (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 76220b57cec5SDimitry Andric VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); 76230b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 76240b57cec5SDimitry Andric SSC_MARK_JOINING(); 76250b57cec5SDimitry Andric #endif 76260b57cec5SDimitry Andric // If the team size was reduced from the limit, set it to the new size 76270b57cec5SDimitry Andric if (thr->th.th_team_nproc < thr->th.th_teams_size.nth) 76280b57cec5SDimitry Andric thr->th.th_teams_size.nth = thr->th.th_team_nproc; 76290b57cec5SDimitry Andric // AC: last parameter "1" eliminates join barrier which won't work because 76300b57cec5SDimitry Andric // worker threads are in a fork barrier waiting for more parallel regions 76310b57cec5SDimitry Andric __kmp_join_call(loc, gtid 76320b57cec5SDimitry Andric #if OMPT_SUPPORT 76330b57cec5SDimitry Andric , 76340b57cec5SDimitry Andric fork_context_intel 76350b57cec5SDimitry Andric #endif 76360b57cec5SDimitry Andric , 76370b57cec5SDimitry Andric 1); 76380b57cec5SDimitry Andric } 76390b57cec5SDimitry Andric 76400b57cec5SDimitry Andric int __kmp_invoke_teams_master(int gtid) { 76410b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 76420b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 76430b57cec5SDimitry Andric #if KMP_DEBUG 76440b57cec5SDimitry Andric if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) 76450b57cec5SDimitry Andric KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == 76460b57cec5SDimitry Andric (void *)__kmp_teams_master); 76470b57cec5SDimitry Andric #endif 76480b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, 0, this_thr, team); 7649489b1cf2SDimitry Andric #if OMPT_SUPPORT 7650489b1cf2SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 7651489b1cf2SDimitry Andric ompt_data_t *task_data = 7652489b1cf2SDimitry Andric &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data; 7653489b1cf2SDimitry Andric ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data; 7654489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 7655489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 7656489b1cf2SDimitry Andric ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid, 7657489b1cf2SDimitry Andric ompt_task_initial); 7658489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid; 7659489b1cf2SDimitry Andric } 7660489b1cf2SDimitry Andric #endif 76610b57cec5SDimitry Andric __kmp_teams_master(gtid); 7662489b1cf2SDimitry Andric #if OMPT_SUPPORT 7663489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league; 7664489b1cf2SDimitry Andric #endif 76650b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, 0, this_thr, team); 76660b57cec5SDimitry Andric return 1; 76670b57cec5SDimitry Andric } 76680b57cec5SDimitry Andric 76690b57cec5SDimitry Andric /* this sets the requested number of threads for the next parallel region 76700b57cec5SDimitry Andric encountered by this team. since this should be enclosed in the forkjoin 7671480093f4SDimitry Andric critical section it should avoid race conditions with asymmetrical nested 76720b57cec5SDimitry Andric parallelism */ 76730b57cec5SDimitry Andric 76740b57cec5SDimitry Andric void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { 76750b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 76760b57cec5SDimitry Andric 76770b57cec5SDimitry Andric if (num_threads > 0) 76780b57cec5SDimitry Andric thr->th.th_set_nproc = num_threads; 76790b57cec5SDimitry Andric } 76800b57cec5SDimitry Andric 7681fe6060f1SDimitry Andric static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams, 7682fe6060f1SDimitry Andric int num_threads) { 7683fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(thr); 7684fe6060f1SDimitry Andric // Remember the number of threads for inner parallel regions 7685fe6060f1SDimitry Andric if (!TCR_4(__kmp_init_middle)) 7686fe6060f1SDimitry Andric __kmp_middle_initialize(); // get internal globals calculated 7687fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 7688fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc); 7689fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth); 7690fe6060f1SDimitry Andric 7691fe6060f1SDimitry Andric if (num_threads == 0) { 7692fe6060f1SDimitry Andric if (__kmp_teams_thread_limit > 0) { 7693fe6060f1SDimitry Andric num_threads = __kmp_teams_thread_limit; 7694fe6060f1SDimitry Andric } else { 7695fe6060f1SDimitry Andric num_threads = __kmp_avail_proc / num_teams; 7696fe6060f1SDimitry Andric } 7697fe6060f1SDimitry Andric // adjust num_threads w/o warning as it is not user setting 7698fe6060f1SDimitry Andric // num_threads = min(num_threads, nthreads-var, thread-limit-var) 7699fe6060f1SDimitry Andric // no thread_limit clause specified - do not change thread-limit-var ICV 7700fe6060f1SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7701fe6060f1SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7702fe6060f1SDimitry Andric } 7703fe6060f1SDimitry Andric if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) { 7704fe6060f1SDimitry Andric num_threads = thr->th.th_current_task->td_icvs.thread_limit; 7705fe6060f1SDimitry Andric } // prevent team size to exceed thread-limit-var 7706fe6060f1SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 7707fe6060f1SDimitry Andric num_threads = __kmp_teams_max_nth / num_teams; 7708fe6060f1SDimitry Andric } 7709fe6060f1SDimitry Andric if (num_threads == 0) { 7710fe6060f1SDimitry Andric num_threads = 1; 7711fe6060f1SDimitry Andric } 7712fe6060f1SDimitry Andric } else { 77130eae32dcSDimitry Andric if (num_threads < 0) { 77140eae32dcSDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1), 77150eae32dcSDimitry Andric __kmp_msg_null); 77160eae32dcSDimitry Andric num_threads = 1; 77170eae32dcSDimitry Andric } 7718fe6060f1SDimitry Andric // This thread will be the primary thread of the league primary threads 7719fe6060f1SDimitry Andric // Store new thread limit; old limit is saved in th_cg_roots list 7720fe6060f1SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = num_threads; 7721fe6060f1SDimitry Andric // num_threads = min(num_threads, nthreads-var) 7722fe6060f1SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7723fe6060f1SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7724fe6060f1SDimitry Andric } 7725fe6060f1SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 7726fe6060f1SDimitry Andric int new_threads = __kmp_teams_max_nth / num_teams; 7727fe6060f1SDimitry Andric if (new_threads == 0) { 7728fe6060f1SDimitry Andric new_threads = 1; 7729fe6060f1SDimitry Andric } 7730fe6060f1SDimitry Andric if (new_threads != num_threads) { 7731fe6060f1SDimitry Andric if (!__kmp_reserve_warn) { // user asked for too many threads 7732fe6060f1SDimitry Andric __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT 7733fe6060f1SDimitry Andric __kmp_msg(kmp_ms_warning, 7734fe6060f1SDimitry Andric KMP_MSG(CantFormThrTeam, num_threads, new_threads), 7735fe6060f1SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 7736fe6060f1SDimitry Andric } 7737fe6060f1SDimitry Andric } 7738fe6060f1SDimitry Andric num_threads = new_threads; 7739fe6060f1SDimitry Andric } 7740fe6060f1SDimitry Andric } 7741fe6060f1SDimitry Andric thr->th.th_teams_size.nth = num_threads; 7742fe6060f1SDimitry Andric } 7743fe6060f1SDimitry Andric 77440b57cec5SDimitry Andric /* this sets the requested number of teams for the teams region and/or 77450b57cec5SDimitry Andric the number of threads for the next parallel region encountered */ 77460b57cec5SDimitry Andric void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, 77470b57cec5SDimitry Andric int num_threads) { 77480b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 77490eae32dcSDimitry Andric if (num_teams < 0) { 77500eae32dcSDimitry Andric // OpenMP specification requires requested values to be positive, 77510eae32dcSDimitry Andric // but people can send us any value, so we'd better check 77520eae32dcSDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1), 77530eae32dcSDimitry Andric __kmp_msg_null); 77540eae32dcSDimitry Andric num_teams = 1; 77550eae32dcSDimitry Andric } 7756fe6060f1SDimitry Andric if (num_teams == 0) { 7757fe6060f1SDimitry Andric if (__kmp_nteams > 0) { 7758fe6060f1SDimitry Andric num_teams = __kmp_nteams; 7759fe6060f1SDimitry Andric } else { 77600b57cec5SDimitry Andric num_teams = 1; // default number of teams is 1. 7761fe6060f1SDimitry Andric } 7762fe6060f1SDimitry Andric } 77630b57cec5SDimitry Andric if (num_teams > __kmp_teams_max_nth) { // if too many teams requested? 77640b57cec5SDimitry Andric if (!__kmp_reserve_warn) { 77650b57cec5SDimitry Andric __kmp_reserve_warn = 1; 77660b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 77670b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), 77680b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 77690b57cec5SDimitry Andric } 77700b57cec5SDimitry Andric num_teams = __kmp_teams_max_nth; 77710b57cec5SDimitry Andric } 77720b57cec5SDimitry Andric // Set number of teams (number of threads in the outer "parallel" of the 77730b57cec5SDimitry Andric // teams) 77740b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 77750b57cec5SDimitry Andric 7776fe6060f1SDimitry Andric __kmp_push_thread_limit(thr, num_teams, num_threads); 7777489b1cf2SDimitry Andric } 7778fe6060f1SDimitry Andric 7779fe6060f1SDimitry Andric /* This sets the requested number of teams for the teams region and/or 7780fe6060f1SDimitry Andric the number of threads for the next parallel region encountered */ 7781fe6060f1SDimitry Andric void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb, 7782fe6060f1SDimitry Andric int num_teams_ub, int num_threads) { 7783fe6060f1SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 7784fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0); 7785fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb); 7786fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_threads >= 0); 7787fe6060f1SDimitry Andric 7788fe6060f1SDimitry Andric if (num_teams_lb > num_teams_ub) { 7789fe6060f1SDimitry Andric __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub), 7790fe6060f1SDimitry Andric KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null); 77910b57cec5SDimitry Andric } 7792fe6060f1SDimitry Andric 7793fe6060f1SDimitry Andric int num_teams = 1; // defalt number of teams is 1. 7794fe6060f1SDimitry Andric 7795fe6060f1SDimitry Andric if (num_teams_lb == 0 && num_teams_ub > 0) 7796fe6060f1SDimitry Andric num_teams_lb = num_teams_ub; 7797fe6060f1SDimitry Andric 7798fe6060f1SDimitry Andric if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause 7799fe6060f1SDimitry Andric num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams; 7800fe6060f1SDimitry Andric if (num_teams > __kmp_teams_max_nth) { 7801fe6060f1SDimitry Andric if (!__kmp_reserve_warn) { 7802fe6060f1SDimitry Andric __kmp_reserve_warn = 1; 78030b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 7804fe6060f1SDimitry Andric KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), 78050b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 78060b57cec5SDimitry Andric } 7807fe6060f1SDimitry Andric num_teams = __kmp_teams_max_nth; 7808fe6060f1SDimitry Andric } 7809fe6060f1SDimitry Andric } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams 7810fe6060f1SDimitry Andric num_teams = num_teams_ub; 7811fe6060f1SDimitry Andric } else { // num_teams_lb <= num_teams <= num_teams_ub 78120eae32dcSDimitry Andric if (num_threads <= 0) { 7813fe6060f1SDimitry Andric if (num_teams_ub > __kmp_teams_max_nth) { 7814fe6060f1SDimitry Andric num_teams = num_teams_lb; 7815fe6060f1SDimitry Andric } else { 7816fe6060f1SDimitry Andric num_teams = num_teams_ub; 7817fe6060f1SDimitry Andric } 7818fe6060f1SDimitry Andric } else { 7819fe6060f1SDimitry Andric num_teams = (num_threads > __kmp_teams_max_nth) 7820fe6060f1SDimitry Andric ? num_teams 7821fe6060f1SDimitry Andric : __kmp_teams_max_nth / num_threads; 7822fe6060f1SDimitry Andric if (num_teams < num_teams_lb) { 7823fe6060f1SDimitry Andric num_teams = num_teams_lb; 7824fe6060f1SDimitry Andric } else if (num_teams > num_teams_ub) { 7825fe6060f1SDimitry Andric num_teams = num_teams_ub; 78260b57cec5SDimitry Andric } 78270b57cec5SDimitry Andric } 7828fe6060f1SDimitry Andric } 7829fe6060f1SDimitry Andric // Set number of teams (number of threads in the outer "parallel" of the 7830fe6060f1SDimitry Andric // teams) 7831fe6060f1SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 7832fe6060f1SDimitry Andric 7833fe6060f1SDimitry Andric __kmp_push_thread_limit(thr, num_teams, num_threads); 78340b57cec5SDimitry Andric } 78350b57cec5SDimitry Andric 78360b57cec5SDimitry Andric // Set the proc_bind var to use in the following parallel region. 78370b57cec5SDimitry Andric void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { 78380b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 78390b57cec5SDimitry Andric thr->th.th_set_proc_bind = proc_bind; 78400b57cec5SDimitry Andric } 78410b57cec5SDimitry Andric 78420b57cec5SDimitry Andric /* Launch the worker threads into the microtask. */ 78430b57cec5SDimitry Andric 78440b57cec5SDimitry Andric void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { 78450b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 78460b57cec5SDimitry Andric 78470b57cec5SDimitry Andric #ifdef KMP_DEBUG 78480b57cec5SDimitry Andric int f; 78490b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 78500b57cec5SDimitry Andric 78510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 78520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 78530b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 78540b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 78550b57cec5SDimitry Andric 78560b57cec5SDimitry Andric team->t.t_construct = 0; /* no single directives seen yet */ 78570b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 78580b57cec5SDimitry Andric 0; /* thread 0 enters the ordered section first */ 78590b57cec5SDimitry Andric 78600b57cec5SDimitry Andric /* Reset the identifiers on the dispatch buffer */ 78610b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 78620b57cec5SDimitry Andric if (team->t.t_max_nproc > 1) { 78630b57cec5SDimitry Andric int i; 78640b57cec5SDimitry Andric for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { 78650b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 78660b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 78670b57cec5SDimitry Andric } 78680b57cec5SDimitry Andric } else { 78690b57cec5SDimitry Andric team->t.t_disp_buffer[0].buffer_index = 0; 78700b57cec5SDimitry Andric team->t.t_disp_buffer[0].doacross_buf_idx = 0; 78710b57cec5SDimitry Andric } 78720b57cec5SDimitry Andric 78730b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 78740b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 78750b57cec5SDimitry Andric 78760b57cec5SDimitry Andric #ifdef KMP_DEBUG 78770b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 78780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 78790b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc); 78800b57cec5SDimitry Andric } 78810b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 78820b57cec5SDimitry Andric 78830b57cec5SDimitry Andric /* release the worker threads so they may begin working */ 78840b57cec5SDimitry Andric __kmp_fork_barrier(gtid, 0); 78850b57cec5SDimitry Andric } 78860b57cec5SDimitry Andric 78870b57cec5SDimitry Andric void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { 78880b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 78890b57cec5SDimitry Andric 78900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 78910b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 78920b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 78930b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 78940b57cec5SDimitry Andric 78950b57cec5SDimitry Andric /* Join barrier after fork */ 78960b57cec5SDimitry Andric 78970b57cec5SDimitry Andric #ifdef KMP_DEBUG 78980b57cec5SDimitry Andric if (__kmp_threads[gtid] && 78990b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) { 79000b57cec5SDimitry Andric __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid, 79010b57cec5SDimitry Andric __kmp_threads[gtid]); 79020b57cec5SDimitry Andric __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 79030b57cec5SDimitry Andric "team->t.t_nproc=%d\n", 79040b57cec5SDimitry Andric gtid, __kmp_threads[gtid]->th.th_team_nproc, team, 79050b57cec5SDimitry Andric team->t.t_nproc); 79060b57cec5SDimitry Andric __kmp_print_structure(); 79070b57cec5SDimitry Andric } 79080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 79090b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc); 79100b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 79110b57cec5SDimitry Andric 79120b57cec5SDimitry Andric __kmp_join_barrier(gtid); /* wait for everyone */ 79130b57cec5SDimitry Andric #if OMPT_SUPPORT 79140b57cec5SDimitry Andric if (ompt_enabled.enabled && 79150b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { 79160b57cec5SDimitry Andric int ds_tid = this_thr->th.th_info.ds.ds_tid; 79170b57cec5SDimitry Andric ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr); 79180b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 79190b57cec5SDimitry Andric #if OMPT_OPTIONAL 79200b57cec5SDimitry Andric void *codeptr = NULL; 79210b57cec5SDimitry Andric if (KMP_MASTER_TID(ds_tid) && 79220b57cec5SDimitry Andric (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || 79230b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region))) 79240b57cec5SDimitry Andric codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address; 79250b57cec5SDimitry Andric 79260b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region_wait) { 79270b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 79280b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 79290b57cec5SDimitry Andric codeptr); 79300b57cec5SDimitry Andric } 79310b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region) { 79320b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 79330b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 79340b57cec5SDimitry Andric codeptr); 79350b57cec5SDimitry Andric } 79360b57cec5SDimitry Andric #endif 79370b57cec5SDimitry Andric if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { 79380b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 7939fe6060f1SDimitry Andric ompt_scope_end, NULL, task_data, 0, ds_tid, 7940fe6060f1SDimitry Andric ompt_task_implicit); // TODO: Can this be ompt_task_initial? 79410b57cec5SDimitry Andric } 79420b57cec5SDimitry Andric } 79430b57cec5SDimitry Andric #endif 79440b57cec5SDimitry Andric 79450b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 79460b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 79470b57cec5SDimitry Andric } 79480b57cec5SDimitry Andric 79490b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 79500b57cec5SDimitry Andric 79510b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 79520b57cec5SDimitry Andric 79530b57cec5SDimitry Andric // Return the worker threads actively spinning in the hot team, if we 79540b57cec5SDimitry Andric // are at the outermost level of parallelism. Otherwise, return 0. 79550b57cec5SDimitry Andric static int __kmp_active_hot_team_nproc(kmp_root_t *root) { 79560b57cec5SDimitry Andric int i; 79570b57cec5SDimitry Andric int retval; 79580b57cec5SDimitry Andric kmp_team_t *hot_team; 79590b57cec5SDimitry Andric 79600b57cec5SDimitry Andric if (root->r.r_active) { 79610b57cec5SDimitry Andric return 0; 79620b57cec5SDimitry Andric } 79630b57cec5SDimitry Andric hot_team = root->r.r_hot_team; 79640b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 7965fe6060f1SDimitry Andric return hot_team->t.t_nproc - 1; // Don't count primary thread 79660b57cec5SDimitry Andric } 79670b57cec5SDimitry Andric 7968fe6060f1SDimitry Andric // Skip the primary thread - it is accounted for elsewhere. 79690b57cec5SDimitry Andric retval = 0; 79700b57cec5SDimitry Andric for (i = 1; i < hot_team->t.t_nproc; i++) { 79710b57cec5SDimitry Andric if (hot_team->t.t_threads[i]->th.th_active) { 79720b57cec5SDimitry Andric retval++; 79730b57cec5SDimitry Andric } 79740b57cec5SDimitry Andric } 79750b57cec5SDimitry Andric return retval; 79760b57cec5SDimitry Andric } 79770b57cec5SDimitry Andric 79780b57cec5SDimitry Andric // Perform an automatic adjustment to the number of 79790b57cec5SDimitry Andric // threads used by the next parallel region. 79800b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { 79810b57cec5SDimitry Andric int retval; 79820b57cec5SDimitry Andric int pool_active; 79830b57cec5SDimitry Andric int hot_team_active; 79840b57cec5SDimitry Andric int team_curr_active; 79850b57cec5SDimitry Andric int system_active; 79860b57cec5SDimitry Andric 79870b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root, 79880b57cec5SDimitry Andric set_nproc)); 79890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 79900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0] 79910b57cec5SDimitry Andric ->th.th_current_task->td_icvs.dynamic == TRUE); 79920b57cec5SDimitry Andric KMP_DEBUG_ASSERT(set_nproc > 1); 79930b57cec5SDimitry Andric 79940b57cec5SDimitry Andric if (set_nproc == 1) { 79950b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n")); 79960b57cec5SDimitry Andric return 1; 79970b57cec5SDimitry Andric } 79980b57cec5SDimitry Andric 79990b57cec5SDimitry Andric // Threads that are active in the thread pool, active in the hot team for this 80000b57cec5SDimitry Andric // particular root (if we are at the outer par level), and the currently 8001fe6060f1SDimitry Andric // executing thread (to become the primary thread) are available to add to the 8002fe6060f1SDimitry Andric // new team, but are currently contributing to the system load, and must be 80030b57cec5SDimitry Andric // accounted for. 80040b57cec5SDimitry Andric pool_active = __kmp_thread_pool_active_nth; 80050b57cec5SDimitry Andric hot_team_active = __kmp_active_hot_team_nproc(root); 80060b57cec5SDimitry Andric team_curr_active = pool_active + hot_team_active + 1; 80070b57cec5SDimitry Andric 80080b57cec5SDimitry Andric // Check the system load. 80090b57cec5SDimitry Andric system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active); 80100b57cec5SDimitry Andric KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d " 80110b57cec5SDimitry Andric "hot team active = %d\n", 80120b57cec5SDimitry Andric system_active, pool_active, hot_team_active)); 80130b57cec5SDimitry Andric 80140b57cec5SDimitry Andric if (system_active < 0) { 80150b57cec5SDimitry Andric // There was an error reading the necessary info from /proc, so use the 80160b57cec5SDimitry Andric // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode 80170b57cec5SDimitry Andric // = dynamic_thread_limit, we shouldn't wind up getting back here. 80180b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 80190b57cec5SDimitry Andric KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit"); 80200b57cec5SDimitry Andric 80210b57cec5SDimitry Andric // Make this call behave like the thread limit algorithm. 80220b57cec5SDimitry Andric retval = __kmp_avail_proc - __kmp_nth + 80230b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 80240b57cec5SDimitry Andric if (retval > set_nproc) { 80250b57cec5SDimitry Andric retval = set_nproc; 80260b57cec5SDimitry Andric } 80270b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 80280b57cec5SDimitry Andric retval = KMP_MIN_NTH; 80290b57cec5SDimitry Andric } 80300b57cec5SDimitry Andric 80310b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", 80320b57cec5SDimitry Andric retval)); 80330b57cec5SDimitry Andric return retval; 80340b57cec5SDimitry Andric } 80350b57cec5SDimitry Andric 80360b57cec5SDimitry Andric // There is a slight delay in the load balance algorithm in detecting new 80370b57cec5SDimitry Andric // running procs. The real system load at this instant should be at least as 80380b57cec5SDimitry Andric // large as the #active omp thread that are available to add to the team. 80390b57cec5SDimitry Andric if (system_active < team_curr_active) { 80400b57cec5SDimitry Andric system_active = team_curr_active; 80410b57cec5SDimitry Andric } 80420b57cec5SDimitry Andric retval = __kmp_avail_proc - system_active + team_curr_active; 80430b57cec5SDimitry Andric if (retval > set_nproc) { 80440b57cec5SDimitry Andric retval = set_nproc; 80450b57cec5SDimitry Andric } 80460b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 80470b57cec5SDimitry Andric retval = KMP_MIN_NTH; 80480b57cec5SDimitry Andric } 80490b57cec5SDimitry Andric 80500b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval)); 80510b57cec5SDimitry Andric return retval; 80520b57cec5SDimitry Andric } // __kmp_load_balance_nproc() 80530b57cec5SDimitry Andric 80540b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 80550b57cec5SDimitry Andric 80560b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 80570b57cec5SDimitry Andric 80580b57cec5SDimitry Andric /* NOTE: this is called with the __kmp_init_lock held */ 80590b57cec5SDimitry Andric void __kmp_cleanup(void) { 80600b57cec5SDimitry Andric int f; 80610b57cec5SDimitry Andric 80620b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: enter\n")); 80630b57cec5SDimitry Andric 80640b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 80650b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 80660b57cec5SDimitry Andric __kmp_remove_signals(); 80670b57cec5SDimitry Andric #endif 80680b57cec5SDimitry Andric TCW_4(__kmp_init_parallel, FALSE); 80690b57cec5SDimitry Andric } 80700b57cec5SDimitry Andric 80710b57cec5SDimitry Andric if (TCR_4(__kmp_init_middle)) { 80720b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 80730b57cec5SDimitry Andric __kmp_affinity_uninitialize(); 80740b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 80750b57cec5SDimitry Andric __kmp_cleanup_hierarchy(); 80760b57cec5SDimitry Andric TCW_4(__kmp_init_middle, FALSE); 80770b57cec5SDimitry Andric } 80780b57cec5SDimitry Andric 80790b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n")); 80800b57cec5SDimitry Andric 80810b57cec5SDimitry Andric if (__kmp_init_serial) { 80820b57cec5SDimitry Andric __kmp_runtime_destroy(); 80830b57cec5SDimitry Andric __kmp_init_serial = FALSE; 80840b57cec5SDimitry Andric } 80850b57cec5SDimitry Andric 80860b57cec5SDimitry Andric __kmp_cleanup_threadprivate_caches(); 80870b57cec5SDimitry Andric 80880b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 80890b57cec5SDimitry Andric if (__kmp_root[f] != NULL) { 80900b57cec5SDimitry Andric __kmp_free(__kmp_root[f]); 80910b57cec5SDimitry Andric __kmp_root[f] = NULL; 80920b57cec5SDimitry Andric } 80930b57cec5SDimitry Andric } 80940b57cec5SDimitry Andric __kmp_free(__kmp_threads); 80950b57cec5SDimitry Andric // __kmp_threads and __kmp_root were allocated at once, as single block, so 80960b57cec5SDimitry Andric // there is no need in freeing __kmp_root. 80970b57cec5SDimitry Andric __kmp_threads = NULL; 80980b57cec5SDimitry Andric __kmp_root = NULL; 80990b57cec5SDimitry Andric __kmp_threads_capacity = 0; 81000b57cec5SDimitry Andric 81010b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 81020b57cec5SDimitry Andric __kmp_cleanup_indirect_user_locks(); 81030b57cec5SDimitry Andric #else 81040b57cec5SDimitry Andric __kmp_cleanup_user_locks(); 81050b57cec5SDimitry Andric #endif 8106fe6060f1SDimitry Andric #if OMPD_SUPPORT 8107fe6060f1SDimitry Andric if (ompd_state) { 8108fe6060f1SDimitry Andric __kmp_free(ompd_env_block); 8109fe6060f1SDimitry Andric ompd_env_block = NULL; 8110fe6060f1SDimitry Andric ompd_env_block_size = 0; 8111fe6060f1SDimitry Andric } 8112fe6060f1SDimitry Andric #endif 81130b57cec5SDimitry Andric 81140b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 81150b57cec5SDimitry Andric KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file)); 81160b57cec5SDimitry Andric __kmp_cpuinfo_file = NULL; 81170b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 81180b57cec5SDimitry Andric 81190b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 81200b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 81210b57cec5SDimitry Andric __kmp_print_speculative_stats(); 81220b57cec5SDimitry Andric #endif 81230b57cec5SDimitry Andric #endif 81240b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_nth.nth); 81250b57cec5SDimitry Andric __kmp_nested_nth.nth = NULL; 81260b57cec5SDimitry Andric __kmp_nested_nth.size = 0; 81270b57cec5SDimitry Andric __kmp_nested_nth.used = 0; 81280b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); 81290b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types = NULL; 81300b57cec5SDimitry Andric __kmp_nested_proc_bind.size = 0; 81310b57cec5SDimitry Andric __kmp_nested_proc_bind.used = 0; 81320b57cec5SDimitry Andric if (__kmp_affinity_format) { 81330b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_affinity_format); 81340b57cec5SDimitry Andric __kmp_affinity_format = NULL; 81350b57cec5SDimitry Andric } 81360b57cec5SDimitry Andric 81370b57cec5SDimitry Andric __kmp_i18n_catclose(); 81380b57cec5SDimitry Andric 81390b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 81400b57cec5SDimitry Andric __kmp_hier_scheds.deallocate(); 81410b57cec5SDimitry Andric #endif 81420b57cec5SDimitry Andric 81430b57cec5SDimitry Andric #if KMP_STATS_ENABLED 81440b57cec5SDimitry Andric __kmp_stats_fini(); 81450b57cec5SDimitry Andric #endif 81460b57cec5SDimitry Andric 81470b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: exit\n")); 81480b57cec5SDimitry Andric } 81490b57cec5SDimitry Andric 81500b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 81510b57cec5SDimitry Andric 81520b57cec5SDimitry Andric int __kmp_ignore_mppbeg(void) { 81530b57cec5SDimitry Andric char *env; 81540b57cec5SDimitry Andric 81550b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) { 81560b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 81570b57cec5SDimitry Andric return FALSE; 81580b57cec5SDimitry Andric } 81590b57cec5SDimitry Andric // By default __kmpc_begin() is no-op. 81600b57cec5SDimitry Andric return TRUE; 81610b57cec5SDimitry Andric } 81620b57cec5SDimitry Andric 81630b57cec5SDimitry Andric int __kmp_ignore_mppend(void) { 81640b57cec5SDimitry Andric char *env; 81650b57cec5SDimitry Andric 81660b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) { 81670b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 81680b57cec5SDimitry Andric return FALSE; 81690b57cec5SDimitry Andric } 81700b57cec5SDimitry Andric // By default __kmpc_end() is no-op. 81710b57cec5SDimitry Andric return TRUE; 81720b57cec5SDimitry Andric } 81730b57cec5SDimitry Andric 81740b57cec5SDimitry Andric void __kmp_internal_begin(void) { 81750b57cec5SDimitry Andric int gtid; 81760b57cec5SDimitry Andric kmp_root_t *root; 81770b57cec5SDimitry Andric 81780b57cec5SDimitry Andric /* this is a very important step as it will register new sibling threads 81790b57cec5SDimitry Andric and assign these new uber threads a new gtid */ 81800b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 81810b57cec5SDimitry Andric root = __kmp_threads[gtid]->th.th_root; 81820b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 81830b57cec5SDimitry Andric 81840b57cec5SDimitry Andric if (root->r.r_begin) 81850b57cec5SDimitry Andric return; 81860b57cec5SDimitry Andric __kmp_acquire_lock(&root->r.r_begin_lock, gtid); 81870b57cec5SDimitry Andric if (root->r.r_begin) { 81880b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 81890b57cec5SDimitry Andric return; 81900b57cec5SDimitry Andric } 81910b57cec5SDimitry Andric 81920b57cec5SDimitry Andric root->r.r_begin = TRUE; 81930b57cec5SDimitry Andric 81940b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 81950b57cec5SDimitry Andric } 81960b57cec5SDimitry Andric 81970b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 81980b57cec5SDimitry Andric 81990b57cec5SDimitry Andric void __kmp_user_set_library(enum library_type arg) { 82000b57cec5SDimitry Andric int gtid; 82010b57cec5SDimitry Andric kmp_root_t *root; 82020b57cec5SDimitry Andric kmp_info_t *thread; 82030b57cec5SDimitry Andric 82040b57cec5SDimitry Andric /* first, make sure we are initialized so we can get our gtid */ 82050b57cec5SDimitry Andric 82060b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 82070b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 82080b57cec5SDimitry Andric 82090b57cec5SDimitry Andric root = thread->th.th_root; 82100b57cec5SDimitry Andric 82110b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, 82120b57cec5SDimitry Andric library_serial)); 82130b57cec5SDimitry Andric if (root->r.r_in_parallel) { /* Must be called in serial section of top-level 82140b57cec5SDimitry Andric thread */ 82150b57cec5SDimitry Andric KMP_WARNING(SetLibraryIncorrectCall); 82160b57cec5SDimitry Andric return; 82170b57cec5SDimitry Andric } 82180b57cec5SDimitry Andric 82190b57cec5SDimitry Andric switch (arg) { 82200b57cec5SDimitry Andric case library_serial: 82210b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 82220b57cec5SDimitry Andric set__nproc(thread, 1); 82230b57cec5SDimitry Andric break; 82240b57cec5SDimitry Andric case library_turnaround: 82250b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 82260b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 82270b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 82280b57cec5SDimitry Andric break; 82290b57cec5SDimitry Andric case library_throughput: 82300b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 82310b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 82320b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 82330b57cec5SDimitry Andric break; 82340b57cec5SDimitry Andric default: 82350b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 82360b57cec5SDimitry Andric } 82370b57cec5SDimitry Andric 82380b57cec5SDimitry Andric __kmp_aux_set_library(arg); 82390b57cec5SDimitry Andric } 82400b57cec5SDimitry Andric 82410b57cec5SDimitry Andric void __kmp_aux_set_stacksize(size_t arg) { 82420b57cec5SDimitry Andric if (!__kmp_init_serial) 82430b57cec5SDimitry Andric __kmp_serial_initialize(); 82440b57cec5SDimitry Andric 82450b57cec5SDimitry Andric #if KMP_OS_DARWIN 82460b57cec5SDimitry Andric if (arg & (0x1000 - 1)) { 82470b57cec5SDimitry Andric arg &= ~(0x1000 - 1); 82480b57cec5SDimitry Andric if (arg + 0x1000) /* check for overflow if we round up */ 82490b57cec5SDimitry Andric arg += 0x1000; 82500b57cec5SDimitry Andric } 82510b57cec5SDimitry Andric #endif 82520b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 82530b57cec5SDimitry Andric 82540b57cec5SDimitry Andric /* only change the default stacksize before the first parallel region */ 82550b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) { 82560b57cec5SDimitry Andric size_t value = arg; /* argument is in bytes */ 82570b57cec5SDimitry Andric 82580b57cec5SDimitry Andric if (value < __kmp_sys_min_stksize) 82590b57cec5SDimitry Andric value = __kmp_sys_min_stksize; 82600b57cec5SDimitry Andric else if (value > KMP_MAX_STKSIZE) 82610b57cec5SDimitry Andric value = KMP_MAX_STKSIZE; 82620b57cec5SDimitry Andric 82630b57cec5SDimitry Andric __kmp_stksize = value; 82640b57cec5SDimitry Andric 82650b57cec5SDimitry Andric __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ 82660b57cec5SDimitry Andric } 82670b57cec5SDimitry Andric 82680b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 82690b57cec5SDimitry Andric } 82700b57cec5SDimitry Andric 82710b57cec5SDimitry Andric /* set the behaviour of the runtime library */ 82720b57cec5SDimitry Andric /* TODO this can cause some odd behaviour with sibling parallelism... */ 82730b57cec5SDimitry Andric void __kmp_aux_set_library(enum library_type arg) { 82740b57cec5SDimitry Andric __kmp_library = arg; 82750b57cec5SDimitry Andric 82760b57cec5SDimitry Andric switch (__kmp_library) { 82770b57cec5SDimitry Andric case library_serial: { 82780b57cec5SDimitry Andric KMP_INFORM(LibraryIsSerial); 82790b57cec5SDimitry Andric } break; 82800b57cec5SDimitry Andric case library_turnaround: 82810b57cec5SDimitry Andric if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set) 82820b57cec5SDimitry Andric __kmp_use_yield = 2; // only yield when oversubscribed 82830b57cec5SDimitry Andric break; 82840b57cec5SDimitry Andric case library_throughput: 82850b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) 82860b57cec5SDimitry Andric __kmp_dflt_blocktime = 200; 82870b57cec5SDimitry Andric break; 82880b57cec5SDimitry Andric default: 82890b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 82900b57cec5SDimitry Andric } 82910b57cec5SDimitry Andric } 82920b57cec5SDimitry Andric 82930b57cec5SDimitry Andric /* Getting team information common for all team API */ 82940b57cec5SDimitry Andric // Returns NULL if not in teams construct 82950b57cec5SDimitry Andric static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) { 82960b57cec5SDimitry Andric kmp_info_t *thr = __kmp_entry_thread(); 82970b57cec5SDimitry Andric teams_serialized = 0; 82980b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 82990b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 83000b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 83010b57cec5SDimitry Andric int ii = team->t.t_level; 83020b57cec5SDimitry Andric teams_serialized = team->t.t_serialized; 83030b57cec5SDimitry Andric int level = tlevel + 1; 83040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 83050b57cec5SDimitry Andric while (ii > level) { 83060b57cec5SDimitry Andric for (teams_serialized = team->t.t_serialized; 83070b57cec5SDimitry Andric (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) { 83080b57cec5SDimitry Andric } 83090b57cec5SDimitry Andric if (team->t.t_serialized && (!teams_serialized)) { 83100b57cec5SDimitry Andric team = team->t.t_parent; 83110b57cec5SDimitry Andric continue; 83120b57cec5SDimitry Andric } 83130b57cec5SDimitry Andric if (ii > level) { 83140b57cec5SDimitry Andric team = team->t.t_parent; 83150b57cec5SDimitry Andric ii--; 83160b57cec5SDimitry Andric } 83170b57cec5SDimitry Andric } 83180b57cec5SDimitry Andric return team; 83190b57cec5SDimitry Andric } 83200b57cec5SDimitry Andric return NULL; 83210b57cec5SDimitry Andric } 83220b57cec5SDimitry Andric 83230b57cec5SDimitry Andric int __kmp_aux_get_team_num() { 83240b57cec5SDimitry Andric int serialized; 83250b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 83260b57cec5SDimitry Andric if (team) { 83270b57cec5SDimitry Andric if (serialized > 1) { 83280b57cec5SDimitry Andric return 0; // teams region is serialized ( 1 team of 1 thread ). 83290b57cec5SDimitry Andric } else { 83300b57cec5SDimitry Andric return team->t.t_master_tid; 83310b57cec5SDimitry Andric } 83320b57cec5SDimitry Andric } 83330b57cec5SDimitry Andric return 0; 83340b57cec5SDimitry Andric } 83350b57cec5SDimitry Andric 83360b57cec5SDimitry Andric int __kmp_aux_get_num_teams() { 83370b57cec5SDimitry Andric int serialized; 83380b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 83390b57cec5SDimitry Andric if (team) { 83400b57cec5SDimitry Andric if (serialized > 1) { 83410b57cec5SDimitry Andric return 1; 83420b57cec5SDimitry Andric } else { 83430b57cec5SDimitry Andric return team->t.t_parent->t.t_nproc; 83440b57cec5SDimitry Andric } 83450b57cec5SDimitry Andric } 83460b57cec5SDimitry Andric return 1; 83470b57cec5SDimitry Andric } 83480b57cec5SDimitry Andric 83490b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 83500b57cec5SDimitry Andric 83510b57cec5SDimitry Andric /* 83520b57cec5SDimitry Andric * Affinity Format Parser 83530b57cec5SDimitry Andric * 83540b57cec5SDimitry Andric * Field is in form of: %[[[0].]size]type 83550b57cec5SDimitry Andric * % and type are required (%% means print a literal '%') 83560b57cec5SDimitry Andric * type is either single char or long name surrounded by {}, 83570b57cec5SDimitry Andric * e.g., N or {num_threads} 83580b57cec5SDimitry Andric * 0 => leading zeros 83590b57cec5SDimitry Andric * . => right justified when size is specified 83600b57cec5SDimitry Andric * by default output is left justified 83610b57cec5SDimitry Andric * size is the *minimum* field length 83620b57cec5SDimitry Andric * All other characters are printed as is 83630b57cec5SDimitry Andric * 83640b57cec5SDimitry Andric * Available field types: 83650b57cec5SDimitry Andric * L {thread_level} - omp_get_level() 83660b57cec5SDimitry Andric * n {thread_num} - omp_get_thread_num() 83670b57cec5SDimitry Andric * h {host} - name of host machine 83680b57cec5SDimitry Andric * P {process_id} - process id (integer) 83690b57cec5SDimitry Andric * T {thread_identifier} - native thread identifier (integer) 83700b57cec5SDimitry Andric * N {num_threads} - omp_get_num_threads() 83710b57cec5SDimitry Andric * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1) 83720b57cec5SDimitry Andric * a {thread_affinity} - comma separated list of integers or integer ranges 83730b57cec5SDimitry Andric * (values of affinity mask) 83740b57cec5SDimitry Andric * 83750b57cec5SDimitry Andric * Implementation-specific field types can be added 83760b57cec5SDimitry Andric * If a type is unknown, print "undefined" 83770b57cec5SDimitry Andric */ 83780b57cec5SDimitry Andric 83790b57cec5SDimitry Andric // Structure holding the short name, long name, and corresponding data type 83800b57cec5SDimitry Andric // for snprintf. A table of these will represent the entire valid keyword 83810b57cec5SDimitry Andric // field types. 83820b57cec5SDimitry Andric typedef struct kmp_affinity_format_field_t { 83830b57cec5SDimitry Andric char short_name; // from spec e.g., L -> thread level 83840b57cec5SDimitry Andric const char *long_name; // from spec thread_level -> thread level 83850b57cec5SDimitry Andric char field_format; // data type for snprintf (typically 'd' or 's' 83860b57cec5SDimitry Andric // for integer or string) 83870b57cec5SDimitry Andric } kmp_affinity_format_field_t; 83880b57cec5SDimitry Andric 83890b57cec5SDimitry Andric static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = { 83900b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 83910b57cec5SDimitry Andric {'A', "thread_affinity", 's'}, 83920b57cec5SDimitry Andric #endif 83930b57cec5SDimitry Andric {'t', "team_num", 'd'}, 83940b57cec5SDimitry Andric {'T', "num_teams", 'd'}, 83950b57cec5SDimitry Andric {'L', "nesting_level", 'd'}, 83960b57cec5SDimitry Andric {'n', "thread_num", 'd'}, 83970b57cec5SDimitry Andric {'N', "num_threads", 'd'}, 83980b57cec5SDimitry Andric {'a', "ancestor_tnum", 'd'}, 83990b57cec5SDimitry Andric {'H', "host", 's'}, 84000b57cec5SDimitry Andric {'P', "process_id", 'd'}, 84010b57cec5SDimitry Andric {'i', "native_thread_id", 'd'}}; 84020b57cec5SDimitry Andric 84030b57cec5SDimitry Andric // Return the number of characters it takes to hold field 84040b57cec5SDimitry Andric static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, 84050b57cec5SDimitry Andric const char **ptr, 84060b57cec5SDimitry Andric kmp_str_buf_t *field_buffer) { 84070b57cec5SDimitry Andric int rc, format_index, field_value; 84080b57cec5SDimitry Andric const char *width_left, *width_right; 84090b57cec5SDimitry Andric bool pad_zeros, right_justify, parse_long_name, found_valid_name; 84100b57cec5SDimitry Andric static const int FORMAT_SIZE = 20; 84110b57cec5SDimitry Andric char format[FORMAT_SIZE] = {0}; 84120b57cec5SDimitry Andric char absolute_short_name = 0; 84130b57cec5SDimitry Andric 84140b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 84150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 84160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(**ptr == '%'); 84170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(field_buffer); 84180b57cec5SDimitry Andric 84190b57cec5SDimitry Andric __kmp_str_buf_clear(field_buffer); 84200b57cec5SDimitry Andric 84210b57cec5SDimitry Andric // Skip the initial % 84220b57cec5SDimitry Andric (*ptr)++; 84230b57cec5SDimitry Andric 84240b57cec5SDimitry Andric // Check for %% first 84250b57cec5SDimitry Andric if (**ptr == '%') { 84260b57cec5SDimitry Andric __kmp_str_buf_cat(field_buffer, "%", 1); 84270b57cec5SDimitry Andric (*ptr)++; // skip over the second % 84280b57cec5SDimitry Andric return 1; 84290b57cec5SDimitry Andric } 84300b57cec5SDimitry Andric 84310b57cec5SDimitry Andric // Parse field modifiers if they are present 84320b57cec5SDimitry Andric pad_zeros = false; 84330b57cec5SDimitry Andric if (**ptr == '0') { 84340b57cec5SDimitry Andric pad_zeros = true; 84350b57cec5SDimitry Andric (*ptr)++; // skip over 0 84360b57cec5SDimitry Andric } 84370b57cec5SDimitry Andric right_justify = false; 84380b57cec5SDimitry Andric if (**ptr == '.') { 84390b57cec5SDimitry Andric right_justify = true; 84400b57cec5SDimitry Andric (*ptr)++; // skip over . 84410b57cec5SDimitry Andric } 84420b57cec5SDimitry Andric // Parse width of field: [width_left, width_right) 84430b57cec5SDimitry Andric width_left = width_right = NULL; 84440b57cec5SDimitry Andric if (**ptr >= '0' && **ptr <= '9') { 84450b57cec5SDimitry Andric width_left = *ptr; 84460b57cec5SDimitry Andric SKIP_DIGITS(*ptr); 84470b57cec5SDimitry Andric width_right = *ptr; 84480b57cec5SDimitry Andric } 84490b57cec5SDimitry Andric 84500b57cec5SDimitry Andric // Create the format for KMP_SNPRINTF based on flags parsed above 84510b57cec5SDimitry Andric format_index = 0; 84520b57cec5SDimitry Andric format[format_index++] = '%'; 84530b57cec5SDimitry Andric if (!right_justify) 84540b57cec5SDimitry Andric format[format_index++] = '-'; 84550b57cec5SDimitry Andric if (pad_zeros) 84560b57cec5SDimitry Andric format[format_index++] = '0'; 84570b57cec5SDimitry Andric if (width_left && width_right) { 84580b57cec5SDimitry Andric int i = 0; 84590b57cec5SDimitry Andric // Only allow 8 digit number widths. 84600b57cec5SDimitry Andric // This also prevents overflowing format variable 84610b57cec5SDimitry Andric while (i < 8 && width_left < width_right) { 84620b57cec5SDimitry Andric format[format_index++] = *width_left; 84630b57cec5SDimitry Andric width_left++; 84640b57cec5SDimitry Andric i++; 84650b57cec5SDimitry Andric } 84660b57cec5SDimitry Andric } 84670b57cec5SDimitry Andric 84680b57cec5SDimitry Andric // Parse a name (long or short) 84690b57cec5SDimitry Andric // Canonicalize the name into absolute_short_name 84700b57cec5SDimitry Andric found_valid_name = false; 84710b57cec5SDimitry Andric parse_long_name = (**ptr == '{'); 84720b57cec5SDimitry Andric if (parse_long_name) 84730b57cec5SDimitry Andric (*ptr)++; // skip initial left brace 84740b57cec5SDimitry Andric for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) / 84750b57cec5SDimitry Andric sizeof(__kmp_affinity_format_table[0]); 84760b57cec5SDimitry Andric ++i) { 84770b57cec5SDimitry Andric char short_name = __kmp_affinity_format_table[i].short_name; 84780b57cec5SDimitry Andric const char *long_name = __kmp_affinity_format_table[i].long_name; 84790b57cec5SDimitry Andric char field_format = __kmp_affinity_format_table[i].field_format; 84800b57cec5SDimitry Andric if (parse_long_name) { 8481e8d8bef9SDimitry Andric size_t length = KMP_STRLEN(long_name); 84820b57cec5SDimitry Andric if (strncmp(*ptr, long_name, length) == 0) { 84830b57cec5SDimitry Andric found_valid_name = true; 84840b57cec5SDimitry Andric (*ptr) += length; // skip the long name 84850b57cec5SDimitry Andric } 84860b57cec5SDimitry Andric } else if (**ptr == short_name) { 84870b57cec5SDimitry Andric found_valid_name = true; 84880b57cec5SDimitry Andric (*ptr)++; // skip the short name 84890b57cec5SDimitry Andric } 84900b57cec5SDimitry Andric if (found_valid_name) { 84910b57cec5SDimitry Andric format[format_index++] = field_format; 84920b57cec5SDimitry Andric format[format_index++] = '\0'; 84930b57cec5SDimitry Andric absolute_short_name = short_name; 84940b57cec5SDimitry Andric break; 84950b57cec5SDimitry Andric } 84960b57cec5SDimitry Andric } 84970b57cec5SDimitry Andric if (parse_long_name) { 84980b57cec5SDimitry Andric if (**ptr != '}') { 84990b57cec5SDimitry Andric absolute_short_name = 0; 85000b57cec5SDimitry Andric } else { 85010b57cec5SDimitry Andric (*ptr)++; // skip over the right brace 85020b57cec5SDimitry Andric } 85030b57cec5SDimitry Andric } 85040b57cec5SDimitry Andric 85050b57cec5SDimitry Andric // Attempt to fill the buffer with the requested 85060b57cec5SDimitry Andric // value using snprintf within __kmp_str_buf_print() 85070b57cec5SDimitry Andric switch (absolute_short_name) { 85080b57cec5SDimitry Andric case 't': 85090b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num()); 85100b57cec5SDimitry Andric break; 85110b57cec5SDimitry Andric case 'T': 85120b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams()); 85130b57cec5SDimitry Andric break; 85140b57cec5SDimitry Andric case 'L': 85150b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level); 85160b57cec5SDimitry Andric break; 85170b57cec5SDimitry Andric case 'n': 85180b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid)); 85190b57cec5SDimitry Andric break; 85200b57cec5SDimitry Andric case 'H': { 85210b57cec5SDimitry Andric static const int BUFFER_SIZE = 256; 85220b57cec5SDimitry Andric char buf[BUFFER_SIZE]; 85230b57cec5SDimitry Andric __kmp_expand_host_name(buf, BUFFER_SIZE); 85240b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf); 85250b57cec5SDimitry Andric } break; 85260b57cec5SDimitry Andric case 'P': 85270b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, getpid()); 85280b57cec5SDimitry Andric break; 85290b57cec5SDimitry Andric case 'i': 85300b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()); 85310b57cec5SDimitry Andric break; 85320b57cec5SDimitry Andric case 'N': 85330b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc); 85340b57cec5SDimitry Andric break; 85350b57cec5SDimitry Andric case 'a': 85360b57cec5SDimitry Andric field_value = 85370b57cec5SDimitry Andric __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1); 85380b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, field_value); 85390b57cec5SDimitry Andric break; 85400b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 85410b57cec5SDimitry Andric case 'A': { 85420b57cec5SDimitry Andric kmp_str_buf_t buf; 85430b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 85440b57cec5SDimitry Andric __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); 85450b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf.str); 85460b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 85470b57cec5SDimitry Andric } break; 85480b57cec5SDimitry Andric #endif 85490b57cec5SDimitry Andric default: 85500b57cec5SDimitry Andric // According to spec, If an implementation does not have info for field 85510b57cec5SDimitry Andric // type, then "undefined" is printed 85520b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, "%s", "undefined"); 85530b57cec5SDimitry Andric // Skip the field 85540b57cec5SDimitry Andric if (parse_long_name) { 85550b57cec5SDimitry Andric SKIP_TOKEN(*ptr); 85560b57cec5SDimitry Andric if (**ptr == '}') 85570b57cec5SDimitry Andric (*ptr)++; 85580b57cec5SDimitry Andric } else { 85590b57cec5SDimitry Andric (*ptr)++; 85600b57cec5SDimitry Andric } 85610b57cec5SDimitry Andric } 85620b57cec5SDimitry Andric 85630b57cec5SDimitry Andric KMP_ASSERT(format_index <= FORMAT_SIZE); 85640b57cec5SDimitry Andric return rc; 85650b57cec5SDimitry Andric } 85660b57cec5SDimitry Andric 85670b57cec5SDimitry Andric /* 85680b57cec5SDimitry Andric * Return number of characters needed to hold the affinity string 85690b57cec5SDimitry Andric * (not including null byte character) 85700b57cec5SDimitry Andric * The resultant string is printed to buffer, which the caller can then 85710b57cec5SDimitry Andric * handle afterwards 85720b57cec5SDimitry Andric */ 85730b57cec5SDimitry Andric size_t __kmp_aux_capture_affinity(int gtid, const char *format, 85740b57cec5SDimitry Andric kmp_str_buf_t *buffer) { 85750b57cec5SDimitry Andric const char *parse_ptr; 85760b57cec5SDimitry Andric size_t retval; 85770b57cec5SDimitry Andric const kmp_info_t *th; 85780b57cec5SDimitry Andric kmp_str_buf_t field; 85790b57cec5SDimitry Andric 85800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(buffer); 85810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 85820b57cec5SDimitry Andric 85830b57cec5SDimitry Andric __kmp_str_buf_init(&field); 85840b57cec5SDimitry Andric __kmp_str_buf_clear(buffer); 85850b57cec5SDimitry Andric 85860b57cec5SDimitry Andric th = __kmp_threads[gtid]; 85870b57cec5SDimitry Andric retval = 0; 85880b57cec5SDimitry Andric 85890b57cec5SDimitry Andric // If format is NULL or zero-length string, then we use 85900b57cec5SDimitry Andric // affinity-format-var ICV 85910b57cec5SDimitry Andric parse_ptr = format; 85920b57cec5SDimitry Andric if (parse_ptr == NULL || *parse_ptr == '\0') { 85930b57cec5SDimitry Andric parse_ptr = __kmp_affinity_format; 85940b57cec5SDimitry Andric } 85950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parse_ptr); 85960b57cec5SDimitry Andric 85970b57cec5SDimitry Andric while (*parse_ptr != '\0') { 85980b57cec5SDimitry Andric // Parse a field 85990b57cec5SDimitry Andric if (*parse_ptr == '%') { 86000b57cec5SDimitry Andric // Put field in the buffer 86010b57cec5SDimitry Andric int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field); 86020b57cec5SDimitry Andric __kmp_str_buf_catbuf(buffer, &field); 86030b57cec5SDimitry Andric retval += rc; 86040b57cec5SDimitry Andric } else { 86050b57cec5SDimitry Andric // Put literal character in buffer 86060b57cec5SDimitry Andric __kmp_str_buf_cat(buffer, parse_ptr, 1); 86070b57cec5SDimitry Andric retval++; 86080b57cec5SDimitry Andric parse_ptr++; 86090b57cec5SDimitry Andric } 86100b57cec5SDimitry Andric } 86110b57cec5SDimitry Andric __kmp_str_buf_free(&field); 86120b57cec5SDimitry Andric return retval; 86130b57cec5SDimitry Andric } 86140b57cec5SDimitry Andric 86150b57cec5SDimitry Andric // Displays the affinity string to stdout 86160b57cec5SDimitry Andric void __kmp_aux_display_affinity(int gtid, const char *format) { 86170b57cec5SDimitry Andric kmp_str_buf_t buf; 86180b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 86190b57cec5SDimitry Andric __kmp_aux_capture_affinity(gtid, format, &buf); 86200b57cec5SDimitry Andric __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str); 86210b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 86220b57cec5SDimitry Andric } 86230b57cec5SDimitry Andric 86240b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 86250b57cec5SDimitry Andric 86260b57cec5SDimitry Andric void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { 86270b57cec5SDimitry Andric int blocktime = arg; /* argument is in milliseconds */ 86280b57cec5SDimitry Andric #if KMP_USE_MONITOR 86290b57cec5SDimitry Andric int bt_intervals; 86300b57cec5SDimitry Andric #endif 8631e8d8bef9SDimitry Andric kmp_int8 bt_set; 86320b57cec5SDimitry Andric 86330b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 86340b57cec5SDimitry Andric 86350b57cec5SDimitry Andric /* Normalize and set blocktime for the teams */ 86360b57cec5SDimitry Andric if (blocktime < KMP_MIN_BLOCKTIME) 86370b57cec5SDimitry Andric blocktime = KMP_MIN_BLOCKTIME; 86380b57cec5SDimitry Andric else if (blocktime > KMP_MAX_BLOCKTIME) 86390b57cec5SDimitry Andric blocktime = KMP_MAX_BLOCKTIME; 86400b57cec5SDimitry Andric 86410b57cec5SDimitry Andric set__blocktime_team(thread->th.th_team, tid, blocktime); 86420b57cec5SDimitry Andric set__blocktime_team(thread->th.th_serial_team, 0, blocktime); 86430b57cec5SDimitry Andric 86440b57cec5SDimitry Andric #if KMP_USE_MONITOR 86450b57cec5SDimitry Andric /* Calculate and set blocktime intervals for the teams */ 86460b57cec5SDimitry Andric bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 86470b57cec5SDimitry Andric 86480b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_team, tid, bt_intervals); 86490b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals); 86500b57cec5SDimitry Andric #endif 86510b57cec5SDimitry Andric 86520b57cec5SDimitry Andric /* Set whether blocktime has been set to "TRUE" */ 86530b57cec5SDimitry Andric bt_set = TRUE; 86540b57cec5SDimitry Andric 86550b57cec5SDimitry Andric set__bt_set_team(thread->th.th_team, tid, bt_set); 86560b57cec5SDimitry Andric set__bt_set_team(thread->th.th_serial_team, 0, bt_set); 86570b57cec5SDimitry Andric #if KMP_USE_MONITOR 86580b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 86590b57cec5SDimitry Andric "bt_intervals=%d, monitor_updates=%d\n", 86600b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 86610b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, 86620b57cec5SDimitry Andric __kmp_monitor_wakeups)); 86630b57cec5SDimitry Andric #else 86640b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", 86650b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 86660b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime)); 86670b57cec5SDimitry Andric #endif 86680b57cec5SDimitry Andric } 86690b57cec5SDimitry Andric 8670e8d8bef9SDimitry Andric void __kmp_aux_set_defaults(char const *str, size_t len) { 86710b57cec5SDimitry Andric if (!__kmp_init_serial) { 86720b57cec5SDimitry Andric __kmp_serial_initialize(); 86730b57cec5SDimitry Andric } 86740b57cec5SDimitry Andric __kmp_env_initialize(str); 86750b57cec5SDimitry Andric 86760b57cec5SDimitry Andric if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) { 86770b57cec5SDimitry Andric __kmp_env_print(); 86780b57cec5SDimitry Andric } 86790b57cec5SDimitry Andric } // __kmp_aux_set_defaults 86800b57cec5SDimitry Andric 86810b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 86820b57cec5SDimitry Andric /* internal fast reduction routines */ 86830b57cec5SDimitry Andric 86840b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T 86850b57cec5SDimitry Andric __kmp_determine_reduction_method( 86860b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 86870b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 86880b57cec5SDimitry Andric kmp_critical_name *lck) { 86890b57cec5SDimitry Andric 86900b57cec5SDimitry Andric // Default reduction method: critical construct ( lck != NULL, like in current 86910b57cec5SDimitry Andric // PAROPT ) 86920b57cec5SDimitry Andric // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method 86930b57cec5SDimitry Andric // can be selected by RTL 86940b57cec5SDimitry Andric // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method 86950b57cec5SDimitry Andric // can be selected by RTL 86960b57cec5SDimitry Andric // Finally, it's up to OpenMP RTL to make a decision on which method to select 86970b57cec5SDimitry Andric // among generated by PAROPT. 86980b57cec5SDimitry Andric 86990b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T retval; 87000b57cec5SDimitry Andric 87010b57cec5SDimitry Andric int team_size; 87020b57cec5SDimitry Andric 87030b57cec5SDimitry Andric KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 ) 87040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 ) 87050b57cec5SDimitry Andric 87060b57cec5SDimitry Andric #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 87070b57cec5SDimitry Andric ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 87080b57cec5SDimitry Andric #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 87090b57cec5SDimitry Andric 87100b57cec5SDimitry Andric retval = critical_reduce_block; 87110b57cec5SDimitry Andric 87120b57cec5SDimitry Andric // another choice of getting a team size (with 1 dynamic deference) is slower 87130b57cec5SDimitry Andric team_size = __kmp_get_team_num_threads(global_tid); 87140b57cec5SDimitry Andric if (team_size == 1) { 87150b57cec5SDimitry Andric 87160b57cec5SDimitry Andric retval = empty_reduce_block; 87170b57cec5SDimitry Andric 87180b57cec5SDimitry Andric } else { 87190b57cec5SDimitry Andric 87200b57cec5SDimitry Andric int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 87210b57cec5SDimitry Andric 8722489b1cf2SDimitry Andric #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ 8723489b1cf2SDimitry Andric KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 87240b57cec5SDimitry Andric 87250b57cec5SDimitry Andric #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ 87260b57cec5SDimitry Andric KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 87270b57cec5SDimitry Andric 87280b57cec5SDimitry Andric int teamsize_cutoff = 4; 87290b57cec5SDimitry Andric 87300b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 87310b57cec5SDimitry Andric if (__kmp_mic_type != non_mic) { 87320b57cec5SDimitry Andric teamsize_cutoff = 8; 87330b57cec5SDimitry Andric } 87340b57cec5SDimitry Andric #endif 87350b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 87360b57cec5SDimitry Andric if (tree_available) { 87370b57cec5SDimitry Andric if (team_size <= teamsize_cutoff) { 87380b57cec5SDimitry Andric if (atomic_available) { 87390b57cec5SDimitry Andric retval = atomic_reduce_block; 87400b57cec5SDimitry Andric } 87410b57cec5SDimitry Andric } else { 87420b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 87430b57cec5SDimitry Andric } 87440b57cec5SDimitry Andric } else if (atomic_available) { 87450b57cec5SDimitry Andric retval = atomic_reduce_block; 87460b57cec5SDimitry Andric } 87470b57cec5SDimitry Andric #else 87480b57cec5SDimitry Andric #error "Unknown or unsupported OS" 87490b57cec5SDimitry Andric #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || 87500b57cec5SDimitry Andric // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 87510b57cec5SDimitry Andric 87520b57cec5SDimitry Andric #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 87530b57cec5SDimitry Andric 87540b57cec5SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD 87550b57cec5SDimitry Andric 87560b57cec5SDimitry Andric // basic tuning 87570b57cec5SDimitry Andric 87580b57cec5SDimitry Andric if (atomic_available) { 87590b57cec5SDimitry Andric if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ??? 87600b57cec5SDimitry Andric retval = atomic_reduce_block; 87610b57cec5SDimitry Andric } 87620b57cec5SDimitry Andric } // otherwise: use critical section 87630b57cec5SDimitry Andric 87640b57cec5SDimitry Andric #elif KMP_OS_DARWIN 87650b57cec5SDimitry Andric 87660b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 87670b57cec5SDimitry Andric if (atomic_available && (num_vars <= 3)) { 87680b57cec5SDimitry Andric retval = atomic_reduce_block; 87690b57cec5SDimitry Andric } else if (tree_available) { 87700b57cec5SDimitry Andric if ((reduce_size > (9 * sizeof(kmp_real64))) && 87710b57cec5SDimitry Andric (reduce_size < (2000 * sizeof(kmp_real64)))) { 87720b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 87730b57cec5SDimitry Andric } 87740b57cec5SDimitry Andric } // otherwise: use critical section 87750b57cec5SDimitry Andric 87760b57cec5SDimitry Andric #else 87770b57cec5SDimitry Andric #error "Unknown or unsupported OS" 87780b57cec5SDimitry Andric #endif 87790b57cec5SDimitry Andric 87800b57cec5SDimitry Andric #else 87810b57cec5SDimitry Andric #error "Unknown or unsupported architecture" 87820b57cec5SDimitry Andric #endif 87830b57cec5SDimitry Andric } 87840b57cec5SDimitry Andric 87850b57cec5SDimitry Andric // KMP_FORCE_REDUCTION 87860b57cec5SDimitry Andric 87870b57cec5SDimitry Andric // If the team is serialized (team_size == 1), ignore the forced reduction 87880b57cec5SDimitry Andric // method and stay with the unsynchronized method (empty_reduce_block) 87890b57cec5SDimitry Andric if (__kmp_force_reduction_method != reduction_method_not_defined && 87900b57cec5SDimitry Andric team_size != 1) { 87910b57cec5SDimitry Andric 87920b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; 87930b57cec5SDimitry Andric 87940b57cec5SDimitry Andric int atomic_available, tree_available; 87950b57cec5SDimitry Andric 87960b57cec5SDimitry Andric switch ((forced_retval = __kmp_force_reduction_method)) { 87970b57cec5SDimitry Andric case critical_reduce_block: 87980b57cec5SDimitry Andric KMP_ASSERT(lck); // lck should be != 0 87990b57cec5SDimitry Andric break; 88000b57cec5SDimitry Andric 88010b57cec5SDimitry Andric case atomic_reduce_block: 88020b57cec5SDimitry Andric atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 88030b57cec5SDimitry Andric if (!atomic_available) { 88040b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "atomic"); 88050b57cec5SDimitry Andric forced_retval = critical_reduce_block; 88060b57cec5SDimitry Andric } 88070b57cec5SDimitry Andric break; 88080b57cec5SDimitry Andric 88090b57cec5SDimitry Andric case tree_reduce_block: 88100b57cec5SDimitry Andric tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 88110b57cec5SDimitry Andric if (!tree_available) { 88120b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "tree"); 88130b57cec5SDimitry Andric forced_retval = critical_reduce_block; 88140b57cec5SDimitry Andric } else { 88150b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 88160b57cec5SDimitry Andric forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 88170b57cec5SDimitry Andric #endif 88180b57cec5SDimitry Andric } 88190b57cec5SDimitry Andric break; 88200b57cec5SDimitry Andric 88210b57cec5SDimitry Andric default: 88220b57cec5SDimitry Andric KMP_ASSERT(0); // "unsupported method specified" 88230b57cec5SDimitry Andric } 88240b57cec5SDimitry Andric 88250b57cec5SDimitry Andric retval = forced_retval; 88260b57cec5SDimitry Andric } 88270b57cec5SDimitry Andric 88280b57cec5SDimitry Andric KA_TRACE(10, ("reduction method selected=%08x\n", retval)); 88290b57cec5SDimitry Andric 88300b57cec5SDimitry Andric #undef FAST_REDUCTION_TREE_METHOD_GENERATED 88310b57cec5SDimitry Andric #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 88320b57cec5SDimitry Andric 88330b57cec5SDimitry Andric return (retval); 88340b57cec5SDimitry Andric } 88350b57cec5SDimitry Andric // this function is for testing set/get/determine reduce method 88360b57cec5SDimitry Andric kmp_int32 __kmp_get_reduce_method(void) { 88370b57cec5SDimitry Andric return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8); 88380b57cec5SDimitry Andric } 88390b57cec5SDimitry Andric 88400b57cec5SDimitry Andric // Soft pause sets up threads to ignore blocktime and just go to sleep. 88410b57cec5SDimitry Andric // Spin-wait code checks __kmp_pause_status and reacts accordingly. 88420b57cec5SDimitry Andric void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; } 88430b57cec5SDimitry Andric 88440b57cec5SDimitry Andric // Hard pause shuts down the runtime completely. Resume happens naturally when 88450b57cec5SDimitry Andric // OpenMP is used subsequently. 88460b57cec5SDimitry Andric void __kmp_hard_pause() { 88470b57cec5SDimitry Andric __kmp_pause_status = kmp_hard_paused; 88480b57cec5SDimitry Andric __kmp_internal_end_thread(-1); 88490b57cec5SDimitry Andric } 88500b57cec5SDimitry Andric 88510b57cec5SDimitry Andric // Soft resume sets __kmp_pause_status, and wakes up all threads. 88520b57cec5SDimitry Andric void __kmp_resume_if_soft_paused() { 88530b57cec5SDimitry Andric if (__kmp_pause_status == kmp_soft_paused) { 88540b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 88550b57cec5SDimitry Andric 88560b57cec5SDimitry Andric for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) { 88570b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 88580b57cec5SDimitry Andric if (thread) { // Wake it if sleeping 8859e8d8bef9SDimitry Andric kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, 8860e8d8bef9SDimitry Andric thread); 88610b57cec5SDimitry Andric if (fl.is_sleeping()) 88620b57cec5SDimitry Andric fl.resume(gtid); 88630b57cec5SDimitry Andric else if (__kmp_try_suspend_mx(thread)) { // got suspend lock 88640b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep 88650b57cec5SDimitry Andric } else { // thread holds the lock and may sleep soon 88660b57cec5SDimitry Andric do { // until either the thread sleeps, or we can get the lock 88670b57cec5SDimitry Andric if (fl.is_sleeping()) { 88680b57cec5SDimitry Andric fl.resume(gtid); 88690b57cec5SDimitry Andric break; 88700b57cec5SDimitry Andric } else if (__kmp_try_suspend_mx(thread)) { 88710b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); 88720b57cec5SDimitry Andric break; 88730b57cec5SDimitry Andric } 88740b57cec5SDimitry Andric } while (1); 88750b57cec5SDimitry Andric } 88760b57cec5SDimitry Andric } 88770b57cec5SDimitry Andric } 88780b57cec5SDimitry Andric } 88790b57cec5SDimitry Andric } 88800b57cec5SDimitry Andric 88810b57cec5SDimitry Andric // This function is called via __kmpc_pause_resource. Returns 0 if successful. 88820b57cec5SDimitry Andric // TODO: add warning messages 88830b57cec5SDimitry Andric int __kmp_pause_resource(kmp_pause_status_t level) { 88840b57cec5SDimitry Andric if (level == kmp_not_paused) { // requesting resume 88850b57cec5SDimitry Andric if (__kmp_pause_status == kmp_not_paused) { 88860b57cec5SDimitry Andric // error message about runtime not being paused, so can't resume 88870b57cec5SDimitry Andric return 1; 88880b57cec5SDimitry Andric } else { 88890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused || 88900b57cec5SDimitry Andric __kmp_pause_status == kmp_hard_paused); 88910b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 88920b57cec5SDimitry Andric return 0; 88930b57cec5SDimitry Andric } 88940b57cec5SDimitry Andric } else if (level == kmp_soft_paused) { // requesting soft pause 88950b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 88960b57cec5SDimitry Andric // error message about already being paused 88970b57cec5SDimitry Andric return 1; 88980b57cec5SDimitry Andric } else { 88990b57cec5SDimitry Andric __kmp_soft_pause(); 89000b57cec5SDimitry Andric return 0; 89010b57cec5SDimitry Andric } 89020b57cec5SDimitry Andric } else if (level == kmp_hard_paused) { // requesting hard pause 89030b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 89040b57cec5SDimitry Andric // error message about already being paused 89050b57cec5SDimitry Andric return 1; 89060b57cec5SDimitry Andric } else { 89070b57cec5SDimitry Andric __kmp_hard_pause(); 89080b57cec5SDimitry Andric return 0; 89090b57cec5SDimitry Andric } 89100b57cec5SDimitry Andric } else { 89110b57cec5SDimitry Andric // error message about invalid level 89120b57cec5SDimitry Andric return 1; 89130b57cec5SDimitry Andric } 89140b57cec5SDimitry Andric } 89155ffd83dbSDimitry Andric 89165ffd83dbSDimitry Andric void __kmp_omp_display_env(int verbose) { 89175ffd83dbSDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 89185ffd83dbSDimitry Andric if (__kmp_init_serial == 0) 89195ffd83dbSDimitry Andric __kmp_do_serial_initialize(); 89205ffd83dbSDimitry Andric __kmp_display_env_impl(!verbose, verbose); 89215ffd83dbSDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 89225ffd83dbSDimitry Andric } 8923e8d8bef9SDimitry Andric 8924349cc55cSDimitry Andric // The team size is changing, so distributed barrier must be modified 8925349cc55cSDimitry Andric void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, 8926349cc55cSDimitry Andric int new_nthreads) { 8927349cc55cSDimitry Andric KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == 8928349cc55cSDimitry Andric bp_dist_bar); 8929349cc55cSDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 8930349cc55cSDimitry Andric 8931349cc55cSDimitry Andric // We want all the workers to stop waiting on the barrier while we adjust the 8932349cc55cSDimitry Andric // size of the team. 8933349cc55cSDimitry Andric for (int f = 1; f < old_nthreads; ++f) { 8934349cc55cSDimitry Andric KMP_DEBUG_ASSERT(other_threads[f] != NULL); 8935349cc55cSDimitry Andric // Ignore threads that are already inactive or not present in the team 8936349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) { 8937349cc55cSDimitry Andric // teams construct causes thread_limit to get passed in, and some of 8938349cc55cSDimitry Andric // those could be inactive; just ignore them 8939349cc55cSDimitry Andric continue; 8940349cc55cSDimitry Andric } 8941349cc55cSDimitry Andric // If thread is transitioning still to in_use state, wait for it 8942349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) { 8943349cc55cSDimitry Andric while (team->t.t_threads[f]->th.th_used_in_team.load() == 3) 8944349cc55cSDimitry Andric KMP_CPU_PAUSE(); 8945349cc55cSDimitry Andric } 8946349cc55cSDimitry Andric // The thread should be in_use now 8947349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1); 8948349cc55cSDimitry Andric // Transition to unused state 8949349cc55cSDimitry Andric team->t.t_threads[f]->th.th_used_in_team.store(2); 8950349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2); 8951349cc55cSDimitry Andric } 8952349cc55cSDimitry Andric // Release all the workers 8953349cc55cSDimitry Andric kmp_uint64 new_value; // new value for go 8954349cc55cSDimitry Andric new_value = team->t.b->go_release(); 8955349cc55cSDimitry Andric 8956349cc55cSDimitry Andric KMP_MFENCE(); 8957349cc55cSDimitry Andric 8958349cc55cSDimitry Andric // Workers should see transition status 2 and move to 0; but may need to be 8959349cc55cSDimitry Andric // woken up first 8960349cc55cSDimitry Andric size_t my_go_index; 8961349cc55cSDimitry Andric int count = old_nthreads - 1; 8962349cc55cSDimitry Andric while (count > 0) { 8963349cc55cSDimitry Andric count = old_nthreads - 1; 8964349cc55cSDimitry Andric for (int f = 1; f < old_nthreads; ++f) { 8965349cc55cSDimitry Andric my_go_index = f / team->t.b->threads_per_go; 8966349cc55cSDimitry Andric if (other_threads[f]->th.th_used_in_team.load() != 0) { 8967349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers 8968349cc55cSDimitry Andric kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST( 8969349cc55cSDimitry Andric void *, other_threads[f]->th.th_sleep_loc); 8970349cc55cSDimitry Andric __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag); 8971349cc55cSDimitry Andric } 8972349cc55cSDimitry Andric } else { 8973349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0); 8974349cc55cSDimitry Andric count--; 8975349cc55cSDimitry Andric } 8976349cc55cSDimitry Andric } 8977349cc55cSDimitry Andric } 8978349cc55cSDimitry Andric // Now update the barrier size 8979349cc55cSDimitry Andric team->t.b->update_num_threads(new_nthreads); 8980349cc55cSDimitry Andric team->t.b->go_reset(); 8981349cc55cSDimitry Andric } 8982349cc55cSDimitry Andric 8983349cc55cSDimitry Andric void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) { 8984349cc55cSDimitry Andric // Add the threads back to the team 8985349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team); 8986349cc55cSDimitry Andric // Threads were paused and pointed at th_used_in_team temporarily during a 8987349cc55cSDimitry Andric // resize of the team. We're going to set th_used_in_team to 3 to indicate to 8988349cc55cSDimitry Andric // the thread that it should transition itself back into the team. Then, if 8989349cc55cSDimitry Andric // blocktime isn't infinite, the thread could be sleeping, so we send a resume 8990349cc55cSDimitry Andric // to wake it up. 8991349cc55cSDimitry Andric for (int f = 1; f < new_nthreads; ++f) { 8992349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 8993349cc55cSDimitry Andric KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0, 8994349cc55cSDimitry Andric 3); 8995349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads 8996349cc55cSDimitry Andric __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid, 8997349cc55cSDimitry Andric (kmp_flag_32<false, false> *)NULL); 8998349cc55cSDimitry Andric } 8999349cc55cSDimitry Andric } 9000349cc55cSDimitry Andric // The threads should be transitioning to the team; when they are done, they 9001349cc55cSDimitry Andric // should have set th_used_in_team to 1. This loop forces master to wait until 9002349cc55cSDimitry Andric // all threads have moved into the team and are waiting in the barrier. 9003349cc55cSDimitry Andric int count = new_nthreads - 1; 9004349cc55cSDimitry Andric while (count > 0) { 9005349cc55cSDimitry Andric count = new_nthreads - 1; 9006349cc55cSDimitry Andric for (int f = 1; f < new_nthreads; ++f) { 9007349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) { 9008349cc55cSDimitry Andric count--; 9009349cc55cSDimitry Andric } 9010349cc55cSDimitry Andric } 9011349cc55cSDimitry Andric } 9012349cc55cSDimitry Andric } 9013349cc55cSDimitry Andric 9014e8d8bef9SDimitry Andric // Globals and functions for hidden helper task 9015e8d8bef9SDimitry Andric kmp_info_t **__kmp_hidden_helper_threads; 9016e8d8bef9SDimitry Andric kmp_info_t *__kmp_hidden_helper_main_thread; 9017e8d8bef9SDimitry Andric std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks; 9018e8d8bef9SDimitry Andric #if KMP_OS_LINUX 9019fe6060f1SDimitry Andric kmp_int32 __kmp_hidden_helper_threads_num = 8; 9020e8d8bef9SDimitry Andric kmp_int32 __kmp_enable_hidden_helper = TRUE; 9021e8d8bef9SDimitry Andric #else 9022fe6060f1SDimitry Andric kmp_int32 __kmp_hidden_helper_threads_num = 0; 9023e8d8bef9SDimitry Andric kmp_int32 __kmp_enable_hidden_helper = FALSE; 9024e8d8bef9SDimitry Andric #endif 9025e8d8bef9SDimitry Andric 9026e8d8bef9SDimitry Andric namespace { 9027e8d8bef9SDimitry Andric std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num; 9028e8d8bef9SDimitry Andric 9029e8d8bef9SDimitry Andric void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) { 9030e8d8bef9SDimitry Andric // This is an explicit synchronization on all hidden helper threads in case 9031e8d8bef9SDimitry Andric // that when a regular thread pushes a hidden helper task to one hidden 9032e8d8bef9SDimitry Andric // helper thread, the thread has not been awaken once since they're released 9033e8d8bef9SDimitry Andric // by the main thread after creating the team. 9034e8d8bef9SDimitry Andric KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num); 9035e8d8bef9SDimitry Andric while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) != 9036e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_num) 9037e8d8bef9SDimitry Andric ; 9038e8d8bef9SDimitry Andric 9039e8d8bef9SDimitry Andric // If main thread, then wait for signal 9040e8d8bef9SDimitry Andric if (__kmpc_master(nullptr, *gtid)) { 9041e8d8bef9SDimitry Andric // First, unset the initial state and release the initial thread 9042e8d8bef9SDimitry Andric TCW_4(__kmp_init_hidden_helper_threads, FALSE); 9043e8d8bef9SDimitry Andric __kmp_hidden_helper_initz_release(); 9044e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread_wait(); 9045e8d8bef9SDimitry Andric // Now wake up all worker threads 9046e8d8bef9SDimitry Andric for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) { 9047e8d8bef9SDimitry Andric __kmp_hidden_helper_worker_thread_signal(); 9048e8d8bef9SDimitry Andric } 9049e8d8bef9SDimitry Andric } 9050e8d8bef9SDimitry Andric } 9051e8d8bef9SDimitry Andric } // namespace 9052e8d8bef9SDimitry Andric 9053e8d8bef9SDimitry Andric void __kmp_hidden_helper_threads_initz_routine() { 9054e8d8bef9SDimitry Andric // Create a new root for hidden helper team/threads 9055e8d8bef9SDimitry Andric const int gtid = __kmp_register_root(TRUE); 9056e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread = __kmp_threads[gtid]; 9057e8d8bef9SDimitry Andric __kmp_hidden_helper_threads = &__kmp_threads[gtid]; 9058e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread->th.th_set_nproc = 9059e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_num; 9060e8d8bef9SDimitry Andric 9061e8d8bef9SDimitry Andric KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0); 9062e8d8bef9SDimitry Andric 9063e8d8bef9SDimitry Andric __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn); 9064e8d8bef9SDimitry Andric 9065e8d8bef9SDimitry Andric // Set the initialization flag to FALSE 9066e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper, FALSE); 9067e8d8bef9SDimitry Andric 9068e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_deinitz_release(); 9069e8d8bef9SDimitry Andric } 9070fe6060f1SDimitry Andric 9071fe6060f1SDimitry Andric /* Nesting Mode: 9072fe6060f1SDimitry Andric Set via KMP_NESTING_MODE, which takes an integer. 9073fe6060f1SDimitry Andric Note: we skip duplicate topology levels, and skip levels with only 9074fe6060f1SDimitry Andric one entity. 9075fe6060f1SDimitry Andric KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode. 9076fe6060f1SDimitry Andric KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels 9077fe6060f1SDimitry Andric in the topology, and initializes the number of threads at each of those 9078fe6060f1SDimitry Andric levels to the number of entities at each level, respectively, below the 9079fe6060f1SDimitry Andric entity at the parent level. 9080fe6060f1SDimitry Andric KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels, 9081fe6060f1SDimitry Andric but starts with nesting OFF -- max-active-levels-var is 1 -- and requires 9082fe6060f1SDimitry Andric the user to turn nesting on explicitly. This is an even more experimental 9083fe6060f1SDimitry Andric option to this experimental feature, and may change or go away in the 9084fe6060f1SDimitry Andric future. 9085fe6060f1SDimitry Andric */ 9086fe6060f1SDimitry Andric 9087fe6060f1SDimitry Andric // Allocate space to store nesting levels 9088fe6060f1SDimitry Andric void __kmp_init_nesting_mode() { 9089fe6060f1SDimitry Andric int levels = KMP_HW_LAST; 9090fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = levels; 9091fe6060f1SDimitry Andric __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int)); 9092fe6060f1SDimitry Andric for (int i = 0; i < levels; ++i) 9093fe6060f1SDimitry Andric __kmp_nesting_nth_level[i] = 0; 9094fe6060f1SDimitry Andric if (__kmp_nested_nth.size < levels) { 9095fe6060f1SDimitry Andric __kmp_nested_nth.nth = 9096fe6060f1SDimitry Andric (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int)); 9097fe6060f1SDimitry Andric __kmp_nested_nth.size = levels; 9098fe6060f1SDimitry Andric } 9099fe6060f1SDimitry Andric } 9100fe6060f1SDimitry Andric 9101fe6060f1SDimitry Andric // Set # threads for top levels of nesting; must be called after topology set 9102fe6060f1SDimitry Andric void __kmp_set_nesting_mode_threads() { 9103fe6060f1SDimitry Andric kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()]; 9104fe6060f1SDimitry Andric 9105fe6060f1SDimitry Andric if (__kmp_nesting_mode == 1) 9106fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 9107fe6060f1SDimitry Andric else if (__kmp_nesting_mode > 1) 9108fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = __kmp_nesting_mode; 9109fe6060f1SDimitry Andric 9110fe6060f1SDimitry Andric if (__kmp_topology) { // use topology info 9111fe6060f1SDimitry Andric int loc, hw_level; 9112fe6060f1SDimitry Andric for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() && 9113fe6060f1SDimitry Andric loc < __kmp_nesting_mode_nlevels; 9114fe6060f1SDimitry Andric loc++, hw_level++) { 9115fe6060f1SDimitry Andric __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level); 9116fe6060f1SDimitry Andric if (__kmp_nesting_nth_level[loc] == 1) 9117fe6060f1SDimitry Andric loc--; 9118fe6060f1SDimitry Andric } 9119fe6060f1SDimitry Andric // Make sure all cores are used 9120fe6060f1SDimitry Andric if (__kmp_nesting_mode > 1 && loc > 1) { 9121fe6060f1SDimitry Andric int core_level = __kmp_topology->get_level(KMP_HW_CORE); 9122fe6060f1SDimitry Andric int num_cores = __kmp_topology->get_count(core_level); 9123fe6060f1SDimitry Andric int upper_levels = 1; 9124fe6060f1SDimitry Andric for (int level = 0; level < loc - 1; ++level) 9125fe6060f1SDimitry Andric upper_levels *= __kmp_nesting_nth_level[level]; 9126fe6060f1SDimitry Andric if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores) 9127fe6060f1SDimitry Andric __kmp_nesting_nth_level[loc - 1] = 9128fe6060f1SDimitry Andric num_cores / __kmp_nesting_nth_level[loc - 2]; 9129fe6060f1SDimitry Andric } 9130fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = loc; 9131fe6060f1SDimitry Andric __kmp_nested_nth.used = __kmp_nesting_mode_nlevels; 9132fe6060f1SDimitry Andric } else { // no topology info available; provide a reasonable guesstimation 9133fe6060f1SDimitry Andric if (__kmp_avail_proc >= 4) { 9134fe6060f1SDimitry Andric __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2; 9135fe6060f1SDimitry Andric __kmp_nesting_nth_level[1] = 2; 9136fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = 2; 9137fe6060f1SDimitry Andric } else { 9138fe6060f1SDimitry Andric __kmp_nesting_nth_level[0] = __kmp_avail_proc; 9139fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = 1; 9140fe6060f1SDimitry Andric } 9141fe6060f1SDimitry Andric __kmp_nested_nth.used = __kmp_nesting_mode_nlevels; 9142fe6060f1SDimitry Andric } 9143fe6060f1SDimitry Andric for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) { 9144fe6060f1SDimitry Andric __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i]; 9145fe6060f1SDimitry Andric } 9146fe6060f1SDimitry Andric set__nproc(thread, __kmp_nesting_nth_level[0]); 9147fe6060f1SDimitry Andric if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode) 9148fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = __kmp_nesting_mode; 9149fe6060f1SDimitry Andric if (get__max_active_levels(thread) > 1) { 9150fe6060f1SDimitry Andric // if max levels was set, set nesting mode levels to same 9151fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = get__max_active_levels(thread); 9152fe6060f1SDimitry Andric } 9153fe6060f1SDimitry Andric if (__kmp_nesting_mode == 1) // turn on nesting for this case only 9154fe6060f1SDimitry Andric set__max_active_levels(thread, __kmp_nesting_mode_nlevels); 9155fe6060f1SDimitry Andric } 9156